diff --git a/.checkpatch.conf b/.checkpatch.conf new file mode 100644 index 00000000..0c92ffeb --- /dev/null +++ b/.checkpatch.conf @@ -0,0 +1,25 @@ +# This is not the kernel so don't expect a Linux tree. +--no-tree + +# This is not the kernel so signoff means nothing +--no-signoff + +# Makes it easier to parse the output +--show-types +--emacs +--quiet + +# We love small terminals +--max-line-length 80 + +# We have different standards from the kernel +--ignore FILE_PATH_CHANGES +--ignore LINE_SPACING +--ignore GIT_COMMIT_ID +--ignore SPLIT_STRING +--ignore PREFER_PRINTF +--ignore BOOL_BITFIELD +--ignore SSCANF_TO_KSTRTO + +# This is not the kernel so licenses aren't relevant +--ignore SPDX_LICENSE_TAG diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..303ab379 --- /dev/null +++ b/.clang-format @@ -0,0 +1,123 @@ +--- +Language: Cpp +# BasedOnStyle: LLVM +AccessModifierOffset: -2 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Right +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: MultiLine +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Custom +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 8 +ContinuationIndentWidth: 8 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH + - cds_list_for_each + - cds_list_for_each_entry + - cds_list_for_each_entry_rcu + - cds_list_for_each_entry_safe + - cds_lfht_for_each_entry +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + - Regex: '.*' + Priority: 1 +IncludeIsMainRegex: '(Test)?$' +IndentCaseLabels: false +IndentPPDirectives: None +IndentWidth: 8 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 8 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Right +ReflowComments: true +SortIncludes: true +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +TabWidth: 8 +UseTab: Always +... + diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 00000000..faf9fcf9 --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,44 @@ +--- +Checks: 'bugprone-*, + -bugprone-suspicious-missing-comma, + clang-diagnostic-*, + -clang-diagnostic-address-of-packed-member, + -clang-diagnostic-warning, + clang-analyzer-*, + -clang-analyzer-valist.Uninitialized, + -clang-analyzer-optin.performance.Padding, + -clang-analyzer-unix.Malloc, + -clang-analyzer-core.uninitialized.Assign, + -clang-analyzer-security.insecureAPI.strcpy, + -clang-analyzer-core.UndefinedBinaryOperatorResult, + -clang-analyzer-core.uninitialized.UndefReturn, + -clang-analyzer-core.NullDereference, + misc-*, + -misc-unused-parameters, + performance-*, + portability-*, + readability-*, + -readability-braces-around-statements, + -readability-redundant-member-init, + -readability-implicit-bool-conversion' +WarningsAsErrors: '' +HeaderFilterRegex: '' +AnalyzeTemporaryDtors: false +FormatStyle: file +User: abuild +CheckOptions: + - key: google-readability-braces-around-statements.ShortStatementLines + value: '2' + - key: readability-braces-around-statements.ShortStatementLines + value: '2' + - key: readability-function-size.StatementThreshold + value: '1540' + - key: readability-function-size.BranchThreshold + value: '98' + - key: readability-function-size.ParameterThreshold + value: '22' + - key: readability-function-size.NestingThreshold + value: '7' + - key: readability-function-size.VariableThreshold + value: '293' +... diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..7b37e8c0 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,18 @@ +# Located at the project root +root = true + +# Base settings for most files +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +indent_style = tab +indent_size = 8 + +max_line_length = 80 + +[meson.build] +indent_style = space +indent_size = 8 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..5d425843 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +debian/changelog merge=dpkg-mergechangelogs diff --git a/.github/workflow/jrgitlint.yml b/.github/workflow/jrgitlint.yml new file mode 100644 index 00000000..460ef477 --- /dev/null +++ b/.github/workflow/jrgitlint.yml @@ -0,0 +1,18 @@ +name: JR GitLint + +on: [pull_request] + +jobs: + gitlint: + + runs-on: ubuntu-latest + + container: + image: jorisroovers/gitlint + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Gitlint + run: gitlint --commits ${GITHUB_BASE_REF}..${GITHUB_HEAD_REF} diff --git a/.gitignore b/.gitignore index 9c2b406e..838011ef 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,9 @@ /dataplane_test # Build results /obj-* +# Protobuf generated files +protobuf/*.pb.h +protobuf/*.pb-c.[ch] # Debian rules build-stamp # RTE rules @@ -28,9 +31,15 @@ Makefile.in /*.log ylwrap debian/debhelper-build-stamp +debian/golang-github-danos-vyatta-dataplane-protobuf-dev/ +debian/libvyatta-dataplane-proto-dev/ +debian/libvyatta-dataplane-proto-support/ +debian/libvyatta-dataplane-proto1/ debian/libvyattafal-dev/ debian/vyatta-dataplane-dev/ debian/vyatta-dataplane-sample-plugin/ +debian/vyatta-dataplane-test/ +debian/libvyatta-jsonw1/ ltmain.sh m4/libtool.m4 m4/ltoptions.m4 @@ -50,6 +59,8 @@ mini_debuginfo *.ko *.so *.so.dbg +*.lo +*.la *.mod.c *.i *.lst @@ -85,12 +96,24 @@ cscope.po.out # git files that we don't want to ignore even it they are dot-files !.gitignore +!.gitlint + +# don't ignore tool config files +!.checkpatch.conf +!.clang-tidy +!.clang-format +!.editorconfig *.orig *.rej *~ \#*# +#UT executables +/tests/per_file/*_test + #UT coverage files /*-coverage.info /*-coverage/ +#cppucheck log files +tests/per_file/cpputest_*.xml diff --git a/.gitlint b/.gitlint new file mode 100644 index 00000000..fcb17a77 --- /dev/null +++ b/.gitlint @@ -0,0 +1,93 @@ +# All these sections are optional, edit this file as you like. +[general] +# Ignore certain rules, you can reference them by their id or by their full name +ignore=body-is-missing + +# verbosity should be a value between 1 and 3, the commandline -v flags take precedence over this +# verbosity = 2 + +# By default gitlint will ignore merge commits. Set to 'false' to disable. +# ignore-merge-commits=true + +# By default gitlint will ignore fixup commits. Set to 'false' to disable. +# ignore-fixup-commits=true + +# By default gitlint will ignore squash commits. Set to 'false' to disable. +# ignore-squash-commits=true + +# Ignore any data send to gitlint via stdin +# ignore-stdin=true + +# Enable debug mode (prints more output). Disabled by default. +# debug=true + +# Enable community contributed rules +# See http://jorisroovers.github.io/gitlint/contrib_rules for details +# contrib=contrib-title-conventional-commits,CC1 + +# Set the extra-path where gitlint will search for user defined rules +# See http://jorisroovers.github.io/gitlint/user_defined_rules for details +# extra-path=examples/ + +[title-max-length] +line-length=72 + +[title-must-not-contain-word] +# Comma-separated list of words that should not occur in the title. Matching is case +# insensitive. It's fine if the keyword occurs as part of a larger word (so "WIPING" +# will not cause a violation, but "WIP: my title" will. +words=wip,rfc + +# [title-match-regex] +# python like regex (https://docs.python.org/2/library/re.html) that the +# commit-msg title must be matched to. +# Note that the regex can contradict with other rules if not used correctly +# (e.g. title-must-not-contain-word). +# regex=^US[0-9]* + +[body-max-line-length] +line-length=80 + +[body-min-length] +# If present, allow body to consist of only a Jira-ID :-( +min-length=11 + +# [body-is-missing] +# Whether to ignore this rule on merge commits (which typically only have a title) +# default = True +# ignore-merge-commits=false + +# [body-changed-file-mention] +# List of files that need to be explicitly mentioned in the body when they are changed +# This is useful for when developers often erroneously edit certain files or git submodules. +# By specifying this rule, developers can only change the file when they explicitly reference +# it in the commit message. +# files=gitlint/rules.py,README.md + +# [author-valid-email] +# python like regex (https://docs.python.org/2/library/re.html) that the +# commit author email address should be matched to +# For example, use the following regex if you only want to allow email addresses from foo.com +# regex=[^@]+@foo.com + +# [ignore-by-title] +# Ignore certain rules for commits of which the title matches a regex +# E.g. Match commit titles that start with "Release" +# regex=^Release(.*) +# +# Ignore certain rules, you can reference them by their id or by their full name +# Use 'all' to ignore all rules +# ignore=T1,body-min-length + +# [ignore-by-body] +# Ignore certain rules for commits of which the body has a line that matches a regex +# E.g. Match bodies that have a line that that contain "release" +# regex=(.*)release(.*) +# +# Ignore certain rules, you can reference them by their id or by their full name +# Use 'all' to ignore all rules +# ignore=T1,body-min-length + +# [contrib-title-conventional-commits] +# Specify allowed commit types. For details see: https://www.conventionalcommits.org/ +# types = bugfix,user-story,epic diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 00000000..69c1f6ef --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1,19 @@ +# This is a comment. +# Each line is a file pattern followed by one or more owners. + +# Ordering is important! The last matching file pattern has the highest precedence. + +# Do not use @user, @@group or @org/team syntax as these are either Github or Bitbucket specific +# Use only emails addresses + +/src/npf_shim.* ian.wilson@intl.att.com paitken@vyatta.att-mail.com dfawcus+atlassian@vyatta.att-mail.com gsheare@vyatta.att-mail.com +/src/npf/ ian.wilson@intl.att.com paitken@vyatta.att-mail.com dfawcus+atlassian@vyatta.att-mail.com gsheare@vyatta.att-mail.com +/src/npf/app_group/ paitken@vyatta.att-mail.com dfawcus+atlassian@vyatta.att-mail.com gsheare@vyatta.att-mail.com +/src/npf/dpi/ paitken@vyatta.att-mail.com dfawcus+atlassian@vyatta.att-mail.com gsheare@vyatta.att-mail.com + +src/*qos* aroberts@vyatta.att-mail.com adewar@vyatta.att-mail.com ag474u@vyatta.att-mail.com + +/src/session/ ian.wilson@intl.att.com paitken@vyatta.att-mail.com dfawcus+atlassian@vyatta.att-mail.com gsheare@vyatta.att-mail.com + +meson.build nick.brown@att.com +/Jenkinsfile nick.brown@att.com diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index df5234be..5cca2b17 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -27,18 +27,13 @@ be dynamically invoked in a runtime constructed graph. Nodes should never be called directly (currently through their *_fused() generated entry points). -## Unit Testing - -Vyatta-dataplane unit tests are done in a harness that builds the whole -dataplane. +## Whole dataplane unit tests The unit tests are executed as part of the default package build and **must** be kept passing with every commit. You should consider adding unit tests for any new functionality being add. -### Whole dataplane tests - The majority of the dataplane is built into a process and APIs are provided to inject state/traffic and verify the processing of the state, using [libcheck][5]. @@ -60,17 +55,17 @@ during code review. ## Static Analysis -[Cppcheck][3] can be used to check the code for Static Analysis warnings. - -`cppcheck` should be in `$PATH` and `scripts/cppcheck_wrapper.sh` used -to check the files modified by a set of changes with something like: +[clang-tidy][3] should be used to check the code for Static Analysis warnings. -`./scripts/cppcheck_wrapper.sh origin/master bugfix/foo` +``` shell +apt install clang-tidy clang +export CC=clang CXX=clang++ +meson build && cd build && ninja clang-tidy +``` You **must not** introduce any new warnings. [1]: https://www.kernel.org/doc/Documentation/CodingStyle "Linux Kernel Coding Style" [2]: https://github.com/torvalds/linux/blob/master/scripts/checkpatch.pl "checkpatch script" -[3]: http://cppcheck.sourceforge.net/ "Cppcheck Static Analyser" -[4]: http://cpputest.github.io/ "Cpputest Unit Test Framework" +[3]: https://clang.llvm.org/extra/clang-tidy/ "Clang-Tidy Code Checker" [5]: http://libcheck.github.io/check/ "Check Unit Test Framework" diff --git a/Jenkinsfile b/Jenkinsfile index adfe07a7..fab5bd23 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -23,21 +23,22 @@ def cancelPreviousBuilds() { pipeline { agent any - parameters { booleanParam(name: 'FORCE_VALGRIND', defaultValue: false, description: 'Execute Valgrind even for a PR branch') } - environment { - OBS_TARGET_PROJECT = 'VR:Dartmouth' + OBS_INSTANCE = 'build-release' + OBS_TARGET_PROJECT = 'DANOS:Shipping:2105' OBS_TARGET_REPO = 'standard' OBS_TARGET_ARCH = 'x86_64' - // # Replace : with _ in project name, as osc-buildpkg does - OSC_BUILD_ROOT = "${WORKSPACE}" + '/build-root/' + "${env.OBS_TARGET_PROJECT.replace(':','_')}" + '-' + "${env.OBS_TARGET_REPO}" + '-' + "${OBS_TARGET_ARCH}" - DH_VERBOSE = 1 - DH_QUIET = 0 - DEB_BUILD_OPTIONS ='verbose' + // Replace : with _ in project name so mountable paths can be used. + BUILD_ROOT_RELATIVE = 'build-root/' + "${env.OBS_TARGET_PROJECT.replace(':','_')}" + '-' + "${env.OBS_TARGET_REPO}" + '-' + "${OBS_TARGET_ARCH}" + // Workspace specific chroot location used instead of /var/tmp allows parallel builds between jobs + OSC_BUILD_ROOT = "${WORKSPACE}" + '/' + "${env.BUILD_ROOT_RELATIVE}" + // CHANGE_TARGET is set for PRs. + // When CHANGE_TARGET is not set it's a regular build so we use BRANCH_NAME. + REF_BRANCH = "${env.CHANGE_TARGET != null ? env.CHANGE_TARGET : env.BRANCH_NAME}" } options { - timeout(time: 180, unit: 'MINUTES') // Hopefully maximum even when Valgrind is included! + timeout(time: 60, unit: 'MINUTES') checkoutToSubdirectory("vyatta-dataplane") quietPeriod(90) // Wait 90 seconds in case there are more SCM pushes/PR merges coming } @@ -45,146 +46,73 @@ pipeline { stages { // A work around, until this feature is implemented: https://issues.jenkins-ci.org/browse/JENKINS-47503 - stage('Cancel older builds') { steps { script { - cancelPreviousBuilds() - }}} - - stage('OSC config') { - steps { - sh 'printenv' - // Build scripts with tasks to perform in the chroot - sh """ -cat < osc-buildpackage_buildscript_default -export BUILD_ID=\"${BUILD_ID}\" -export JENKINS_NODE_COOKIE=\"${JENKINS_NODE_COOKIE}\" -dpkg-buildpackage -jauto -us -uc -b -EOF -""" - sh """ -cat < osc-buildpackage_buildscript_scan_build -export BUILD_ID=\"${BUILD_ID}\" -export JENKINS_NODE_COOKIE=\"${JENKINS_NODE_COOKIE}\" -scan-build --status-bugs --use-cc clang --use-c++ clang++ -o clangScanBuildReports -maxloop 64 dpkg-buildpackage -jauto -us -uc -b -EOF -""" - } - } - - // Workspace specific chroot location used instead of /var/tmp - // Allows parallel builds between jobs, but not between stages in a single job - // TODO: Enhance osc-buildpkg to support parallel builds from the same pkg_srcdir - // TODO: probably by allowing it to accept a .conf file from a location other than pkg_srcdir + stage('Cancel older builds') { + when { allOf { + // Only if this is a Pull Request + expression { env.CHANGE_ID != null } + expression { env.CHANGE_TARGET != null } + }} + steps { script { + cancelPreviousBuilds() + }}} stage('OSC Build') { steps { dir('vyatta-dataplane') { - sh """ -cat < .osc-buildpackage.conf -OSC_BUILDPACKAGE_TMP=\"${WORKSPACE}\" -OSC_BUILDPACKAGE_BUILDSCRIPT=\"${WORKSPACE}/osc-buildpackage_buildscript_default\" -EOF -""" - sh "osc-buildpkg -v -g -T -P ${env.OBS_TARGET_PROJECT} ${env.OBS_TARGET_REPO} -- --trust-all-projects --build-uid='caller'" - } - } - } - - stage('clang Static Analysis') { - environment { - CC = 'clang' - CXX ='clang++' - DEB_BUILD_OPTIONS = 'nocheck' - } - steps { - dir('vyatta-dataplane') { - sh """ -cat < .osc-buildpackage.conf -OSC_BUILDPACKAGE_TMP=\"${WORKSPACE}\" -OSC_BUILDPACKAGE_BUILDSCRIPT=\"${WORKSPACE}/osc-buildpackage_buildscript_scan_build\" -EOF -""" - sh "osc-buildpkg -v -g -T -P ${env.OBS_TARGET_PROJECT} ${env.OBS_TARGET_REPO} -- --trust-all-projects --build-uid='caller' --extra-pkgs='clang' --extra-pkgs='llvm-dev'" + sh "gbp buildpackage --git-verbose --git-ignore-branch -S --no-check-builddeps -us -uc" } + writeFile file: 'build.script', + text: """\ + export BUILD_ID=\"${BUILD_ID}\" + export JENKINS_NODE_COOKIE=\"${JENKINS_NODE_COOKIE}\" + export DH_VERBOSE=1 DH_QUIET=0 + export DEB_BUILD_OPTIONS='verbose all_tests sanitizer' + dpkg-buildpackage -jauto -us -uc -b + """.stripIndent() + sh "osc -v -A ${env.OBS_INSTANCE} build --download-api-only --local-package --no-service --trust-all-projects --build-uid=caller --alternative-project=${env.OBS_TARGET_PROJECT} ${env.OBS_TARGET_REPO} ${env.OBS_TARGET_ARCH}" } post { - failure { - echo 'clang analyzer found issues' - dir('clangScanBuildReports'){ - sh "cp ${env.OSC_BUILD_ROOT}/usr/src/packages/BUILD/clangScanBuildReports/*/* ." + always { + sh """ + mkdir junit_results + for file in ${env.OSC_BUILD_ROOT}/usr/src/packages/BUILD/build/tests/whole_dp/*.xml + do + xsltproc --output junit_results/\$(basename \$file) vyatta-dataplane/tests/whole_dp/XML_for_JUnit.xsl \$file || true + done + """ + + junit 'junit_results/*.xml' } - publishHTML target: [ - allowMissing: false, - alwaysLinkToLastBuild: false, - keepAll: false, - reportDir: 'clangScanBuildReports', - reportFiles: 'index.html', - reportTitles: 'clang scan-build Static Analysis', - reportName: 'clang scan-build Static Analysis Report' - ] - } } } - stage('Valgrind') { - when { anyOf { - expression { env.CHANGE_ID == null } // If this is not a Pull Request - expression { return params.FORCE_VALGRIND } // Or if forced - }} - environment { - DEB_BUILD_PROFILES = 'pkg.vyatta-dataplane.valgrind' - } + stage('Code Stats') { + when {expression { env.CHANGE_ID == null }} // Not when this is a Pull Request steps { - dir('vyatta-dataplane') { - sh """ -cat < .osc-buildpackage.conf -OSC_BUILDPACKAGE_TMP=\"${WORKSPACE}\" -OSC_BUILDPACKAGE_BUILDSCRIPT=\"${WORKSPACE}/osc-buildpackage_buildscript_default\" -EOF -""" - sh "osc-buildpkg -v -g -T -P ${env.OBS_TARGET_PROJECT} ${env.OBS_TARGET_REPO} -- --trust-all-projects --build-uid='caller' --extra-pkgs='valgrind'" - } + sh 'sloccount --duplicates --wide --details vyatta-dataplane > sloccount.sc' + sloccountPublish pattern: '**/sloccount.sc' } } - stage('cppcheck Static Analysis') { - when {expression { env.CHANGE_ID == null }} // Not when this is a Pull Request - environment { - extra_cppcheck_parameters = '--xml-version=2 --error-exitcode=0' - } - steps { - dir('vyatta-dataplane') { - sh "./scripts/cppcheck_wrapper.sh 2> ${WORKSPACE}/cppcheck.xml" - } - // TODO: Currently this doesn't cause a failure - // TODO: Fail if the number of cppcheck errors is above some threshold. - // TODO: Better yet would for there to be none and then remove - // --error-exitcode=0 above so that it fails on any reported error. - sh 'cppcheck-htmlreport --title="Vyatta Dataplane" --file=cppcheck.xml --report-dir=cppcheck_reports --source-dir=vyatta-dataplane' - } - post { - success { - publishHTML target: [ - allowMissing: false, - alwaysLinkToLastBuild: false, - keepAll: false, - reportDir: 'cppcheck_reports', - reportFiles: 'index.html', - reportTitles: 'cppcheck Static Analysis', - reportName: 'cppcheck Static Analysis Report' - ] + stage('checkpatch') { + when { + allOf { + // Only if this is a Pull Request + expression { env.CHANGE_ID != null } + expression { env.CHANGE_TARGET != null } } } - } - - stage('Code Stats') { - when {expression { env.CHANGE_ID == null }} // Not when this is a Pull Request steps { - sh 'sloccount --duplicates --wide --details vyatta-dataplane > sloccount.sc' - sloccountPublish pattern: '**/sloccount.sc' + catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') { + dir('vyatta-dataplane') { + //TODO: Path to checkpatch.pl should not be hardcoded! + sh "PATH=~/linux-vyatta/scripts:$PATH ./scripts/checkpatch_wrapper.sh upstream/${env.CHANGE_TARGET} origin/${env.BRANCH_NAME}" + } + } } } - stage('checkpatch') { + stage('gitlint') { when { allOf { // Only if this is a Pull Request @@ -192,14 +120,49 @@ EOF expression { env.CHANGE_TARGET != null } } } + agent { + docker { image 'jorisroovers/gitlint' + args '--entrypoint=""' + reuseNode true + } + } steps { - dir('vyatta-dataplane') { - //TODO: Path to checkpatch.pl should not be hardcoded! - sh "PATH=~/linux-vyatta/scripts:$PATH ./scripts/checkpatch_wrapper.sh upstream/${env.CHANGE_TARGET} origin/${env.BRANCH_NAME}" + catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') { + dir('vyatta-dataplane') { + sh "gitlint --commits upstream/${env.CHANGE_TARGET}..origin/${env.BRANCH_NAME}" + } } } } + stage('Code Static Analysis') { + steps { + dir('vyatta-dataplane') { + sh "gbp buildpackage --git-verbose --git-ignore-branch -S --no-check-builddeps -us -uc" + } + writeFile file: 'build.script', + text: """\ + export BUILD_ID=\"${BUILD_ID}\" + export JENKINS_NODE_COOKIE=\"${JENKINS_NODE_COOKIE}\" + export CC=clang CCX=clang++ + meson builddir && cd builddir + ninja clang-tidy >& clang-tidy.log + sed -i 's|/usr/src/packages/BUILD|${WORKSPACE}/vyatta-dataplane|g' clang-tidy.log + """.stripIndent() + sh "osc -v -A ${env.OBS_INSTANCE} build --download-api-only --local-package --no-service --trust-all-projects --build-uid=caller --nochecks --extra-pkgs='clang-tidy' --extra-pkgs='clang' --alternative-project=${env.OBS_TARGET_PROJECT} ${env.OBS_TARGET_REPO} ${env.OBS_TARGET_ARCH}" + } + post { + always { + archiveArtifacts artifacts: "${env.BUILD_ROOT_RELATIVE}/usr/src/packages/BUILD/builddir/clang-tidy.log" + recordIssues enabledForFailure: true, + tool: clangTidy(pattern: "${env.BUILD_ROOT_RELATIVE}/usr/src/packages/BUILD/builddir/clang-tidy.log"), + sourceDirectory: 'vyatta-dataplane', + referenceJobName: "DANOS/vyatta-dataplane/${env.REF_BRANCH}", + qualityGates: [[type: 'NEW', threshold: 1]] + } + } + } + } // stages post { diff --git a/Makefile.am b/Makefile.am deleted file mode 100644 index 4adeb19e..00000000 --- a/Makefile.am +++ /dev/null @@ -1,592 +0,0 @@ -include $(top_srcdir)/common.mk - -ACLOCAL_AMFLAGS=-I m4 - -OTHER_LINKER_FLAGS = -rdynamic # Needed for backtraces -# Fix the start of the hot_text section in an attempt to provide more -# stability between builds of forwarding performance. -# -# This isn't a scientifically chosen value, but merely one that was -# chosen as the address of ether_input before it was put into a -# section with a fixed start address and where it was observed to have -# good performance. -OTHER_LINKER_FLAGS += -Wl,--section-start,.text=429e00 -OTHER_LINKER_FLAGS += -Wl,--section-ordering-file,$(srcdir)/dataplane.section-ordering - -# Needed for -flto -AR = gcc-ar -RANLIB = gcc-ranlib - -CRYTPO_FILES = \ - src/crypto/crypto.c \ - src/crypto/crypto_engine.c \ - src/crypto/crypto_policy.c \ - src/crypto/crypto_sadb.c \ - src/crypto/esp.c \ - src/crypto/vti.c \ - src/crypto/crypto_pmd.c - -IPV6_FILES = \ - src/netinet6/in6.c \ - src/netinet6/ip6_commands.c \ - src/netinet6/ip6_forward.c \ - src/netinet6/ip6_icmp.c \ - src/netinet6/ip6_main.c \ - src/netinet6/ip6_mroute.c \ - src/netinet6/ip6_options.c \ - src/netinet6/nd6_nbr.c \ - src/netinet6/route_v6.c \ - src/netinet6/scope6.c - -MPLS_FILES = \ - src/mpls/mpls_commands.c \ - src/mpls/mpls_forward.c \ - src/mpls/mpls_label_table.c \ - src/mpls/mpls_netlink.c - -PIPELINE_FILES = \ - src/pipeline/pl_commands.c \ - src/pipeline/pl_fused_gen.c \ - src/pipeline/pl_node.c \ - src/pipeline/pl_node_boot.c \ - src/pipeline/pl_plugin.c \ - $(PIPELINE_NODE_FILES) - -PIPELINE_NODE_FILES = \ - src/pipeline/nodes/l2_bridge_in.c \ - src/pipeline/nodes/l2_capture.c \ - src/pipeline/nodes/cross_connect/l2_cross_connect_node.c \ - src/pipeline/nodes/cross_connect/l2_cross_connect_cmd.c \ - src/pipeline/nodes/cross_connect/cross_connect.c \ - src/pipeline/nodes/l2_ether_in.c \ - src/pipeline/nodes/l2_ether_forward.c \ - src/pipeline/nodes/l2_ether_lookup.c \ - src/pipeline/nodes/l2_hw_hdr.c \ - src/pipeline/nodes/l2_portmonitor.c \ - src/pipeline/nodes/l2_portmonitor_hw.c \ - src/pipeline/nodes/l2_vlan_mod_ingress.c \ - src/pipeline/nodes/pppoe/l2_pppoe_node.c \ - src/pipeline/nodes/pppoe/l2_pppoe_cmd.c \ - src/pipeline/nodes/pppoe/pppoe.c \ - src/pipeline/nodes/l2_sw_vlan.c \ - src/pipeline/nodes/l3_acl.c \ - src/pipeline/nodes/l3_arp.c \ - src/pipeline/nodes/l3_fw_in.c \ - src/pipeline/nodes/l3_fw_out.c \ - src/pipeline/nodes/l3_pbr.c \ - src/pipeline/nodes/l3_tcp_mss.c \ - src/pipeline/nodes/l3_v4_cgnat.c \ - src/pipeline/nodes/l3_v4_defrag.c \ - src/pipeline/nodes/l3_v4_encap.c \ - src/pipeline/nodes/l3_v4_ipsec.c \ - src/pipeline/nodes/l3_v4_l4.c \ - src/pipeline/nodes/l3_v4_no_address.c \ - src/pipeline/nodes/l3_v4_no_forwarding.c \ - src/pipeline/nodes/l3_v4_out.c \ - src/pipeline/nodes/l3_v4_post_route_lookup.c \ - src/pipeline/nodes/l3_v4_route_lookup.c \ - src/pipeline/nodes/l3_v4_rpf.c \ - src/pipeline/nodes/l3_v4_val.c \ - src/pipeline/nodes/l3_v6_defrag.c \ - src/pipeline/nodes/l3_v6_encap.c \ - src/pipeline/nodes/l3_v6_ipsec.c \ - src/pipeline/nodes/l3_v6_no_address.c \ - src/pipeline/nodes/l3_v6_no_forwarding.c \ - src/pipeline/nodes/l3_v6_out.c \ - src/pipeline/nodes/l3_v6_post_route_lookup.c \ - src/pipeline/nodes/l3_v6_route_lookup.c \ - src/pipeline/nodes/l3_v6_val.c \ - src/pipeline/nodes/l3_v6_nptv6.c \ - src/pipeline/nodes/term.c - -SESSION_FILES = \ - src/session/session.c \ - src/session/session_cmds.c \ - src/session/session_feature.c - -CORE_FILES = \ - src/arp.c \ - src/backplane.c \ - src/bpf_filter.c \ - src/bridge.c \ - src/bridge_netlink.c \ - src/bridge_port.c \ - src/bridge_vlan_set.c \ - src/commands.c \ - src/protobuf.c \ - src/protobuf_util.c \ - src/config.c \ - src/control.c \ - src/cpp_rate_limiter.c \ - src/dealer.c \ - src/devinfo.c \ - src/dpdk_eth_if.c \ - src/dp_event.c \ - src/ecmp.c \ - src/ether.c \ - src/event.c \ - src/fal.c \ - src/gre.c \ - src/hotplug.c \ - src/if.c \ - src/if_ether.c \ - src/if_feat.c \ - src/if_llatbl.c \ - src/in.c \ - src/in_cksum.c \ - src/ipip_tunnel.c \ - src/ip_forward.c \ - src/ip_commands.c \ - src/ip_icmp.c \ - src/ip_mcast.c \ - src/ip_mcast_fal_interface.c \ - src/ip_netlink.c \ - src/ip_options.c \ - src/ip_output.c \ - src/iptun_common.c \ - src/json_writer.c \ - src/l2_rx_fltr.c \ - src/l2tp/l2tpeth_decap.c \ - src/l2tp/l2tpeth_dp.c \ - src/l2tp/l2tpeth_netlink.c \ - src/lag.c \ - src/linkwatch.c \ - src/log.c \ - src/loopback.c \ - src/lpm/lpm.c \ - src/lpm/lpm6.c \ - src/macvlan.c \ - src/main.c \ - src/master.c \ - src/mstp.c \ - src/netinet/ip_mroute.c \ - src/netlink.c \ - src/nsh.c \ - src/pd_show.c \ - src/pktmbuf.c \ - src/pathmonitor/pathmonitor_cmds.c \ - src/portmonitor/portmonitor_cmds.c \ - src/portmonitor/portmonitor_dp.c \ - src/power.c \ - src/ptp.c \ - src/qos_sched.c \ - src/qos_dpdk.c \ - src/qos_hw.c \ - src/qos_hw_show.c \ - src/qos_obj_db.c \ - src/route.c \ - src/route_broker.c \ - src/rt_commands.c \ - src/rt_tracker.c \ - src/storm_ctl.c \ - src/sfp.c \ - src/switch.c \ - src/switchport.c \ - src/udp_handler.c \ - src/util.c \ - src/vhost.c \ - src/vlan_if.c \ - src/vlan_if_netlink.c \ - src/vlan_modify.c \ - src/vrf.c \ - src/vxlan.c \ - src/shadow.c \ - src/zmq_dp.c - -FILES_NOT_FOR_TEST = \ - src/capture.c \ - src/ip_id.c \ - src/team.c \ - src/shadow_receive.c - -NPF_FILES = \ - src/npf/alg/npf_alg_private.c \ - src/npf/alg/npf_alg_public.c \ - src/npf/alg/npf_alg_tftp.c \ - src/npf/alg/npf_alg_sip.c \ - src/npf/alg/npf_alg_ftp.c \ - src/npf/alg/npf_alg_rpc.c \ - src/npf/apm/apm.c \ - src/npf/cgnat/cgn.c \ - src/npf/cgnat/cgn_cmd_cfg.c \ - src/npf/cgnat/cgn_cmd_op.c \ - src/npf/cgnat/cgn_if.c \ - src/npf/cgnat/cgn_log.c \ - src/npf/cgnat/cgn_map.c \ - src/npf/cgnat/cgn_mbuf.c \ - src/npf/cgnat/cgn_policy.c \ - src/npf/cgnat/cgn_session.c \ - src/npf/cgnat/cgn_sess2.c \ - src/npf/cgnat/cgn_sess_state.c \ - src/npf/cgnat/cgn_source.c \ - src/npf/config/npf_attach_point.c \ - src/npf/config/npf_auto_attach.c \ - src/npf/config/npf_config.c \ - src/npf/config/npf_config_state.c \ - src/npf/config/npf_dump.c \ - src/npf/config/npf_gen_ruleset.c \ - src/npf/config/npf_rule_group.c \ - src/npf/config/npf_ruleset_type.c \ - src/npf/config/pmf_rule.c \ - src/npf/config/pmf_parse.c \ - src/npf/config/pmf_dump.c \ - src/npf/config/pmf_att_rlgrp.c \ - src/npf/config/pmf_hw.c \ - src/npf/fragment/ipv4_frag_tbl.c \ - src/npf/fragment/ipv4_rsmbl.c \ - src/npf/fragment/ipv6_rsmbl.c \ - src/npf/fragment/ipv6_rsmbl_tbl.c \ - src/npf/nat/nat_cmd_cfg.c \ - src/npf/nat/nat_cmd_op.c \ - src/npf/nat/nat_pool.c \ - src/npf/nat/nat_pool_event.c \ - src/npf/grouper2.c \ - src/npf/npf_nat64.c \ - src/npf/npf_addrgrp.c \ - src/npf/npf_apm.c \ - src/npf/npf_cache.c \ - src/npf/npf_cidr_util.c \ - src/npf/npf_cmd.c \ - src/npf/npf_cmd_cfg.c \ - src/npf/npf_cmd_op.c \ - src/npf/npf_dataplane_session.c \ - src/npf/npf_disassemble.c \ - src/npf/npf_event.c \ - src/npf/npf_icmp.c \ - src/npf/npf_if.c \ - src/npf/npf_if_feat.c \ - src/npf/npf_instr.c \ - src/npf/npf_mbuf.c \ - src/npf/npf_nat.c \ - src/npf/npf_ncgen.c \ - src/npf/npf_processor.c \ - src/npf/npf_ptree.c \ - src/npf/npf_rule_gen.c \ - src/npf/npf_ruleset.c \ - src/npf/npf_session.c \ - src/npf/npf_state.c \ - src/npf/npf_state_tcp.c \ - src/npf/npf_tblset.c \ - src/npf/npf_timeouts.c \ - src/npf/npf_vrf.c \ - src/npf/rproc/npf_ext_action_group.c \ - src/npf/rproc/npf_ext_counter.c \ - src/npf/rproc/npf_ext_ctrdef.c \ - src/npf/rproc/npf_ext_ctrref.c \ - src/npf/rproc/npf_ext_log.c \ - src/npf/rproc/npf_ext_mark.c \ - src/npf/rproc/npf_ext_nat64.c \ - src/npf/rproc/npf_ext_nptv6.c \ - src/npf/rproc/npf_ext_policer.c \ - src/npf/rproc/npf_ext_pathmon.c \ - src/npf/rproc/npf_ext_session_limit.c \ - src/npf/rproc/npf_ext_setvrf.c \ - src/npf/rproc/npf_ext_tag.c \ - src/npf/rproc/npf_rproc.c \ - src/npf_shim.c - -if USE_DPI -DPI_FILES = src/npf/dpi/dpi.c src/npf/rproc/npf_ext_dpi.c \ - src/npf/dpi/dpi_public.c src/npf/rproc/npf_ext_appfw.c \ - src/npf/dpi/app_cmds.c \ - src/npf/rproc/npf_ext_app.c -PIPELINE_NODE_FILES += src/pipeline/nodes/l3_dpi.c -else -DPI_FILES = src/npf/dpi/dpi_stubs.c -endif - -PL_GEN_FUSED_OPTS = \ - --include pl_fused_gen.h \ - --include nodes/pl_nodes_common.h \ - --include pl_fused.h \ - --entry vyatta:ether-forward \ - --entry vyatta:ether-in \ - --entry vyatta:arp-in-nothot \ - --entry vyatta:ipv4-validate \ - --entry vyatta:ipv6-validate \ - --entry vyatta:ipv4-route-lookup \ - --entry vyatta:ipv4-out \ - --entry vyatta:ipv6-out \ - --entry vyatta:ipv4-defrag-out-spath \ - --entry vyatta:ipv6-defrag-out-spath \ - --entry vyatta:ipv4-encap \ - --entry vyatta:ipv6-encap \ - --entry vyatta:ipv4-encap-only \ - --entry vyatta:ipv6-encap-only \ - --feature-point vyatta:ether-lookup \ - --feature-point vyatta:ipv4-validate \ - --feature-point vyatta:ipv4-route-lookup \ - --feature-point vyatta:ipv4-out \ - --feature-point vyatta:ipv4-encap \ - --feature-point vyatta:ipv6-validate \ - --feature-point vyatta:ipv6-route-lookup \ - --feature-point vyatta:ipv6-out \ - --feature-point vyatta:ipv6-encap - -src/pipeline/pl_fused_gen.h: $(PIPELINE_NODE_FILES) $(srcdir)/scripts/pl_gen_fused - $(AM_V_GEN)$(srcdir)/scripts/pl_gen_fused $(PL_GEN_FUSED_OPTS) --header-out $@ $(PIPELINE_NODE_FILES:%=$(srcdir)/%) -src/pipeline/pl_fused_gen.c: $(PIPELINE_NODE_FILES) $(srcdir)/scripts/pl_gen_fused src/pipeline/pl_fused_gen.h - $(AM_V_GEN)$(srcdir)/scripts/pl_gen_fused $(PL_GEN_FUSED_OPTS) --impl-out $@ $(PIPELINE_NODE_FILES:%=$(srcdir)/%) - -BUILT_SOURCES = src/pipeline/pl_fused_gen.h $(PROTO_FILES:.proto=.pb-c.h) $(SAMPLE_PROTO_FILES:.proto=.pb-c.h) -CLEANFILES = src/pipeline/pl_fused_gen.h src/pipeline/pl_fused_gen.c -CLEANFILES += $(PROTO_FILES:.proto=.pb-c.c) $(PROTO_FILES:.proto=.pb-c.h) -CLEANFILES += $(SAMPLE_PROTO_FILES:.proto=.pb-c.c) $(SAMPLE_PROTO_FILES:.proto=.pb-c.h) - -PROTO_FILES = \ - protobuf/DataplaneEnvelope.proto \ - protobuf/XConnectConfig.proto \ - protobuf/PPPOEConfig.proto \ - protobuf/TCPMSSConfig.proto \ - protobuf/PipelineStatsConfig.proto \ - protobuf/CryptoPolicyConfig.proto \ - protobuf/IPAddress.proto \ - protobuf/VFPSetConfig.proto \ - protobuf/cpp_rl.proto - -SAMPLE_PROTO_FILES = src/pipeline/nodes/sample/SampleFeatConfig.proto - -%.pb-c.c %.pb-c.h: %.proto - $(AM_V_GEN)@PROTOC_C@ -I=$(dir $^) --c_out=$(dir $@) $^ - -%_pb2.py: %.proto - $(AM_V_GEN)protoc -I=$(dir $^) --python_out=$(dir $@) $^ - -%.pm: %.proto - $(AM_V_GEN)$(srcdir)/scripts/vyatta-generate-pb-perl.pl $^ $(dir $@) - -protopythonmodulesdir=/usr/lib/python3/dist-packages/vyatta/proto -protopythonmodules_DATA = $(PROTO_FILES:%.proto=%_pb2.py) - -protoperlmodulesdir=/usr/share/perl5/vyatta/proto -protoperlmodules_DATA = $(PROTO_FILES:%.proto=%.pm) - -sbin_PROGRAMS = dataplane - -dataplane_SOURCES = $(CORE_FILES) $(FILES_NOT_FOR_TEST) $(CRYTPO_FILES) $(IPV6_FILES) $(NPF_FILES) $(MPLS_FILES) $(DPI_FILES) $(PIPELINE_FILES) $(SESSION_FILES) $(PROTO_FILES:.proto=.pb-c.c) - -dataplane_CPPFLAGS = $(AM_CPPFLAGS) -dataplane_CPPFLAGS += -I./src \ - -I$(srcdir)/include \ - -I$(srcdir)/src \ - -I$(srcdir)/src/npf \ - -I$(srcdir)/src/netinet6 \ - -I$(srcdir)/src/pipeline \ - -I$(builddir)/src/pipeline - -dataplane_CFLAGS = $(EXTRA_CFLAGS) $(DPDK_CFLAGS) $(DPI_CFLAGS) $(LIBMNL_CFLAGS) $(LIBCAP_CFLAGS) $(LIBCZMQ_CFLAGS) $(LIBZMQ_CFLAGS) $(LIBURCU_QSBR_CFLAGS) $(LIBURCU_CDS_CFLAGS) $(LIBOSIP2_CFLAGS) $(LIBCRYPTO_CFLAGS) -O3 $(DATAPLANE_LTO_FLAG) -DNDEBUG -DFUSED_MODE $(PTHREAD_CFLAGS) $(SYSTEMD_CFLAGS) -ffunction-sections - -dataplane_LDFLAGS = $(EXTRA_LDFLAGS) $(OTHER_LINKER_FLAGS) -dataplane_LDADD = $(DPDK_LIBS) $(DPI_LIBS) $(LIBMNL_LIBS) $(LIBCAP_LIBS) $(LIBCZMQ_LIBS) $(LIBZMQ_LIBS) $(LIBURCU_QSBR_LIBS) $(LIBURCU_CDS_LIBS) $(LIBOSIP2_LIBS) $(LIBCRYPTO_LIBS) $(PTHREAD_LIBS) $(SYSTEMD_LIBS) -ldl $(PROTOBUF_C_LIBS) -EXTRA_dataplane_DEPENDENCIES = $(srcdir)/dataplane.section-ordering - - -dataplane_test_SOURCES = $(CORE_FILES) $(CRYTPO_FILES) $(IPV6_FILES) $(NPF_FILES) $(MPLS_FILES) $(DPI_FILES) $(PIPELINE_FILES) $(SESSION_FILES) $(PROTO_FILES:.proto=.pb-c.c) $(SAMPLE_PROTO_FILES:.proto=.pb-c.c) -dataplane_test_SOURCES += \ - src/pipeline/nodes/sample/sample.c \ - tests/common/src/dp_test_pktmbuf_lib.c \ - tests/common/src/dp_test_crypto_lib.c \ - tests/whole_dp/src/dp_test.c \ - tests/whole_dp/src/dp_test_arp.c \ - tests/whole_dp/src/dp_test_bitmask.c \ - tests/whole_dp/src/dp_test_bridge.c \ - tests/whole_dp/src/dp_test_bridge_vlan_filter.c \ - tests/whole_dp/src/dp_test_bridge_n.c \ - tests/whole_dp/src/dp_test_cmd_check.c \ - tests/whole_dp/src/dp_test_cmd_state.c \ - tests/whole_dp/src/dp_test_console.c \ - tests/whole_dp/src/dp_test_controller.c \ - tests/whole_dp/src/dp_test_cpp_lim.c \ - tests/whole_dp/src/dp_test_cpp_lim_fal.c \ - tests/whole_dp/src/dp_test_cross_connect.c \ - tests/whole_dp/src/dp_test_crypto_block_policy.c \ - tests/whole_dp/src/dp_test_crypto_multi_tunnel.c \ - tests/whole_dp/src/dp_test_crypto_policy.c \ - tests/whole_dp/src/dp_test_crypto_site_to_site.c \ - tests/whole_dp/src/dp_test_crypto_site_to_site_passthru.c \ - tests/whole_dp/src/dp_test_crypto_utils.c \ - tests/whole_dp/src/dp_test_esp.c \ - tests/whole_dp/src/dp_test_fails.c \ - tests/whole_dp/src/dp_test_gre.c \ - tests/whole_dp/src/dp_test_gre6.c \ - tests/whole_dp/src/dp_test_if_config.c \ - tests/whole_dp/src/dp_test_intf_incomplete.c \ - tests/whole_dp/src/dp_test_ip.c \ - tests/whole_dp/src/dp_test_ip_arp.c \ - tests/whole_dp/src/dp_test_ip_n.c \ - tests/whole_dp/src/dp_test_ip6.c \ - tests/whole_dp/src/dp_test_ip6_icmp.c \ - tests/whole_dp/src/dp_test_ip6_neigh.c \ - tests/whole_dp/src/dp_test_ip_icmp.c \ - tests/whole_dp/src/dp_test_ip_multicast.c \ - tests/whole_dp/src/dp_test_json_utils.c \ - tests/whole_dp/src/dp_test_lib.c \ - tests/whole_dp/src/dp_test_lib_cmd.c \ - tests/whole_dp/src/dp_test_lib_exp.c \ - tests/whole_dp/src/dp_test_lib_intf.c \ - tests/whole_dp/src/dp_test_lib_pkt.c \ - tests/whole_dp/src/dp_test_lib_portmonitor.c \ - tests/whole_dp/src/dp_test_lib_tcp.c \ - tests/whole_dp/src/dp_test_missed_netlink.c \ - tests/whole_dp/src/dp_test_mpls.c \ - tests/whole_dp/src/dp_test_mstp_cmds.c \ - tests/whole_dp/src/dp_test_mstp_fwd.c \ - tests/whole_dp/src/dp_test_nat.c \ - tests/whole_dp/src/dp_test_netlink_state.c \ - tests/whole_dp/src/dp_test_npf_commands.c \ - tests/whole_dp/src/dp_test_npf_feat.c \ - tests/whole_dp/src/dp_test_npf_defrag.c \ - tests/whole_dp/src/dp_test_npf_golden.c \ - tests/whole_dp/src/dp_test_npf_bridge.c \ - tests/whole_dp/src/dp_test_npf_cgnat.c \ - tests/whole_dp/src/dp_test_npf_dscp.c \ - tests/whole_dp/src/dp_test_npf_tblset.c \ - tests/whole_dp/src/dp_test_npf_addrgrp.c \ - tests/whole_dp/src/dp_test_npf_fw.c \ - tests/whole_dp/src/dp_test_npf_fw_ipv6.c \ - tests/whole_dp/src/dp_test_npf_fw_lib.c \ - tests/whole_dp/src/dp_test_npf_hairpin.c \ - tests/whole_dp/src/dp_test_npf_icmp.c \ - tests/whole_dp/src/dp_test_npf_lib.c \ - tests/whole_dp/src/dp_test_npf_local.c \ - tests/whole_dp/src/dp_test_npf_mbuf.c \ - tests/whole_dp/src/dp_test_npf_ptree.c \ - tests/whole_dp/src/dp_test_npf_nat.c \ - tests/whole_dp/src/dp_test_npf_nat64.c \ - tests/whole_dp/src/dp_test_npf_nat_lib.c \ - tests/whole_dp/src/dp_test_npf_nptv6.c \ - tests/whole_dp/src/dp_test_npf_alg_ftp.c \ - tests/whole_dp/src/dp_test_npf_alg_lib.c \ - tests/whole_dp/src/dp_test_npf_alg_rpc.c \ - tests/whole_dp/src/dp_test_npf_alg_tftp.c \ - tests/whole_dp/src/dp_test_npf_qos.c \ - tests/whole_dp/src/dp_test_npf_portmap_lib.c \ - tests/whole_dp/src/dp_test_npf_prot_group.c \ - tests/whole_dp/src/dp_test_npf_ruleset_state.c \ - tests/whole_dp/src/dp_test_npf_sess_lib.c \ - tests/whole_dp/src/dp_test_npf_session_limit.c \ - tests/whole_dp/src/dp_test_npf_snat_overrun.c \ - tests/whole_dp/src/dp_test_npf_tcp.c \ - tests/whole_dp/src/dp_test_pbr.c \ - tests/whole_dp/src/dp_test_pipeline.c \ - tests/whole_dp/src/dp_test_poe_cmds.c \ - tests/whole_dp/src/dp_test_portmonitor_commands.c \ - tests/whole_dp/src/dp_test_portmonitor.c \ - tests/whole_dp/src/dp_test_ppp.c \ - tests/whole_dp/src/dp_test_ptp.c \ - tests/whole_dp/src/dp_test_qos_basic.c \ - tests/whole_dp/src/dp_test_qos_lib.c \ - tests/whole_dp/src/dp_test_route_broker.c \ - tests/whole_dp/src/dp_test_route_tracker.c \ - tests/whole_dp/src/dp_test_slow_path.c \ - tests/whole_dp/src/dp_test_session_lib.c \ - tests/whole_dp/src/dp_test_session.c \ - tests/whole_dp/src/dp_test_session_cmds.c \ - tests/whole_dp/src/dp_test_sfp.c \ - tests/whole_dp/src/dp_test_storm_ctl.c \ - tests/whole_dp/src/dp_test_str.c \ - tests/whole_dp/src/dp_test_stubs.c \ - tests/whole_dp/src/dp_test_stubs_linux.c \ - tests/whole_dp/src/dp_test_switch.c \ - tests/whole_dp/src/dp_test_switch_vlan.c \ - tests/whole_dp/src/dp_test_tcp_mss_clamp.c \ - tests/whole_dp/src/dp_test_vrf.c \ - tests/whole_dp/src/dp_test_vti.c \ - tests/whole_dp/src/dp_test_vxlan.c \ - tests/whole_dp/src/dp_test_wrapped_funcs.c \ - tests/whole_dp/src/dp_test_xfrm.c - -fal_plugin_test_la_SOURCES = \ - tests/whole_dp/src/fal_plugin_test.c \ - tests/whole_dp/src/fal_plugin_sw_port.c \ - tests/whole_dp/src/fal_plugin_framer.c \ - tests/whole_dp/src/fal_plugin_qos.c \ - tests/whole_dp/src/fal_plugin_pm.c \ - tests/whole_dp/src/fal_plugin_policer.c \ - tests/whole_dp/src/fal_plugin_cpp_limiter.c \ - tests/whole_dp/src/fal_plugin_ptp.c - -pkginclude_HEADERS = \ - include/json_writer.h \ - include/pl_common.h \ - include/pl_node.h \ - include/compiler.h \ - include/protobuf.h \ - include/fal_plugin.h \ - include/bridge_flags.h \ - include/bridge_vlan_set.h - -pkgplugindir = $(pkglibdir)/pipeline/plugins -pkgplugin_LTLIBRARIES = sample_plugin.la -sample_plugin_la_SOURCES = src/pipeline/nodes/sample/sample.c $(SAMPLE_PROTO_FILES:.proto=.pb-c.c) -sample_plugin_la_CPPFLAGS = -I$(srcdir)/include -I$(builddir)/src/pipeline/nodes/sample $(DPDK_CFLAGS) -I$(builddir)/src/pipeline/nodes/sample -sample_plugin_la_LDFLAGS = -avoid-version -module -shared -export-dynamic - -@CODE_COVERAGE_RULES@ - -@VALGRIND_CHECK_RULES@ -VALGRIND_FLAGS = --trace-children=yes -VALGRIND_SUPPRESSIONS_FILES = $(srcdir)/tests/whole_dp/valgrind_suppressions -EXTRA_DIST = $(VALGRIND_SUPPRESSIONS_FILES) - -dataplane_test_CPPFLAGS = $(dataplane_CPPFLAGS) $(JSON_C_CFLAGS) -dataplane_test_CPPFLAGS += -I$(srcdir)/tests/whole_dp/src -I$(srcdir)/tests/common/inc -dataplane_test_CPPFLAGS += $(sample_plugin_la_CPPFLAGS) - -dataplane_test_CFLAGS = $(dataplane_CFLAGS) -fno-lto -UNDEBUG -O0 -g $(CODE_COVERAGE_CFLAGS) -dataplane_test_CFLAGS += -Wno-unused-parameter - -dataplane_test_LDADD = $(dataplane_LDADD) $(CHECK_LIBS) $(JSON_C_LIBS) $(CODE_COVERAGE_LIBS) - -dataplane_test_LDFLAGS = $(dataplane_LDFLAGS) -dataplane_test_LDFLAGS += -Wl,-wrap,main -dataplane_test_LDFLAGS += -Wl,-wrap,RAND_bytes -dataplane_test_LDFLAGS += -Wl,-wrap,rte_pktmbuf_pool_create -dataplane_test_LDFLAGS += -Wl,-wrap,rte_mempool_create -dataplane_test_LDFLAGS += -Wl,-wrap,rte_eal_init -dataplane_test_LDFLAGS += -Wl,-wrap,popen -dataplane_test_LDFLAGS += -Wl,-wrap,pclose -EXTRA_dataplane_test_DEPENDENCIES = $(EXTRA_dataplane_DEPENDENCIES) tests/whole_dp/dummyfs/run/dataplane/platform.conf - -dist_check_SCRIPTS = tests/whole_dp/dataplane_test.sh -fal_plugin_test_la_CPPFLAGS = $(dataplane_CPPFLAGS) $(JSON_C_CFLAGS) -fal_plugin_test_la_CPPFLAGS += -I$(srcdir)/tests/whole_dp/src -I$(srcdir)/tests/common/inc $(SWPORT_CFLAGS) - -fal_plugin_test_la_CFLAGS = $(dataplane_CFLAGS) -fPIC -UNDEBUG -O0 -g $(CODE_COVERAGE_CFLAGS) -fal_plugin_test_la_CFLAGS += -Wno-unused-parameter - -fal_plugin_test_la_LIBADD = $(dataplane_LDADD) $(CHECK_LIBS) $(JSON_C_LIBS) $(CODE_COVERAGE_LIBS) - -fal_plugin_test_la_LDFLAGS = $(EXTRA_LDFLAGS) $(SWPORT_LIBS) -fal_plugin_test_la_LDFLAGS += -avoid-version -module -shared -export-dynamic -fal_plugin_test_la_LDFLAGS += -export-symbols $(srcdir)/tests/whole_dp/src/fal_plugin_test.sym - -tests/whole_dp/dummyfs/run/dataplane/platform.conf: - mkdir -p `dirname $@` - echo '[dataplane]' > $@ - echo 'fal_plugin = .libs/fal_plugin_test.so' >> $@ - -if WHOLE_DP_TEST -pkglib_LTLIBRARIES = fal_plugin_test.la -check_PROGRAMS = dataplane_test -TESTS = $(dist_check_SCRIPTS) -endif - -DATAPLANE_TEST_ARGS = $(__dataplane_test_args_@AM_V@) -__dataplane_test_args_ = $(__dataplane_test_args_@AM_DEFAULT_V@) -__dataplane_test_args_0 = "-d0" -__dataplane_test_args_1 = "-d2" - -dataplane_test_run: dataplane_test - catchsegv ./dataplane_test $(DATAPLANE_TEST_ARGS) - -.PHONY: dataplane_test_run - -vyattasysconfdir=$(sysconfdir)/vyatta -vyattasysconf_DATA = dataplane-drivers.conf - -vyattadatadir=$(datadir)/vyatta -vyattadata_DATA = dataplane-drivers-default.conf - -vyattadpdatadir=$(datadir)/vyatta-dataplane/protobuf -vyattadpdata_DATA = $(PROTO_FILES) diff --git a/README.md b/README.md index c774e2c4..0d1f74d0 100644 --- a/README.md +++ b/README.md @@ -8,3 +8,47 @@ fast path by use of the [Intel DPDK][0]. Please see [CONTRIBUTING.md](CONTRIBUTING.md) for details of coding requirements. [0]: http://dpdk.org/ "Data Plane Development Kit" + +## Package Maintenance + +### Changelog Merging +When merging branches, particularly master -> master-next, conflicts can often be +generated due to differing changelog entries. The `dpkg-mergechangelogs` tool can +automatically resolve conflicts in debian/changelog and ensure consistent ordering +of changelog entries. + +This repository is configured to use `dpkg-mergechangelogs` to resolve conflicts in +debian/changelog. However to make use of it, a maintainer must install the tool and +enable it. From `dpkg-mergechangelogs(1)`: + +> INTEGRATION WITH GIT +> +> If you want to use this program to merge Debian changelog files in a +> git repository, you have first to register a new merge driver in +> .git/config or ~/.gitconfig: +> +> [merge "dpkg-mergechangelogs"] +> name = debian/changelog merge driver +> driver = dpkg-mergechangelogs -m %O %A %B %A + +## Source Structure + +| Directory | Description | +| --------------- | ----------- | +| include | Header files which form part of the public API | +| protobuf | Google Protocol Buffers message formats which form part of the public API | +| scripts | Development and build scripts | +| src/crypto | IPSec crypto processing | +| src/if/bridge | Bridge/switch interface type implementation | +| src/if/dpdk-eth | DPDK ethernet interface type implementation | +| src/l2tp | L2TP interface and processing | +| src/session | L4 Session Manager | +| src/mpls | MultiProtocol Label Switching processing | +| src/npf | Firewall, NAT, QoS classification & L3 ACL features | +| src/netinet | IPv4 protocol processing | +| src/netinet6 | IPv6 protocol processing | +| src/pathmonitor | Path monitoring feature | +| src/pipeline | Forwarding pipeline infrastructure | +| src/portmonitor | Port monitoring feature (packet mirroring) | +| tests/whole_dp | Grey-box testing of the dataplane as a unit | +| tools | Scripts that are installed to help the dataplane service | diff --git a/autogen.sh b/autogen.sh deleted file mode 100755 index 58e4c017..00000000 --- a/autogen.sh +++ /dev/null @@ -1,2 +0,0 @@ -#! /bin/sh -autoreconf --install || exit 1 diff --git a/common.mk b/common.mk deleted file mode 100644 index e1eed960..00000000 --- a/common.mk +++ /dev/null @@ -1,5 +0,0 @@ -AM_CPPFLAGS = \ - -DVYATTA_SYSCONF_DIR='"$(sysconfdir)/vyatta"' \ - -DVYATTA_DATA_DIR='"$(datadir)/vyatta"' \ - -DPKGLIB_DIR='"$(pkglibdir)"' \ - -include build_config.h # Details of the build configuration diff --git a/configure.ac b/configure.ac deleted file mode 100644 index ea97be21..00000000 --- a/configure.ac +++ /dev/null @@ -1,423 +0,0 @@ - -# -*- Autoconf -*- -# Process this file with autoconf to produce a configure script. - -AC_PREREQ([2.69]) -# NOTE: We don't make use of AC_PACKAGE_VERSION so always set to 1.0.0 -AC_INIT([Vyatta-Dataplane], [m4_esyscmd_s([dpkg-parsechangelog -S Version])]) -AC_CONFIG_MACRO_DIR([m4]) -AC_CONFIG_HEADERS([build_config.h]) -AM_INIT_AUTOMAKE([1.13.2 subdir-objects foreign -Wall tar-ustar]) -AM_SILENT_RULES([yes]) - -AC_USE_SYSTEM_EXTENSIONS - -# Checks for programs. -AC_GNU_SOURCE -AC_PROG_CC -# not explicitly required since AM 1.14 but kept for backwards compatibility -AM_PROG_CC_C_O -AX_CHECK_COMPILE_FLAG([-std=gnu11], [CFLAGS+=" -std=gnu11"], [AC_PROG_CC_C99]) -AC_PROG_CPP -AC_PROG_CXX -AX_CXX_COMPILE_STDCXX(11) -AM_PROG_AR -LT_INIT([dlopen]) -AC_PROG_MAKE_SET -AX_PTHREAD - -# Exactly match the architecture flags used by dpdk or inlining errors during build -AC_CANONICAL_HOST -AS_CASE([$host_cpu], - [x86_64], [EXTRA_CFLAGS="$EXTRA_CFLAGS -m64 -march=corei7" EXTRA_CXXFLAGS="$EXTRA_CXXFLAGS -m64 -march=corei7"], - [AC_MSG_ERROR([unsupported CPU architecture: $host_cpu])] - ) - -COMMON_WARNINGS="-Wall -Wextra - -Wundef - -Wwrite-strings - -Wpointer-arith - -Wmissing-declarations - -Wredundant-decls - -Wno-missing-field-initializers - -Wformat=2 - -Wformat-nonliteral - -Wformat-security - -Wsign-compare - -Wstrict-aliasing - -Wmissing-format-attribute - -Wmissing-noreturn - -Winit-self - -Wredundant-decls - -Wmissing-include-dirs - -Wunused-but-set-variable - -Warray-bounds - -Wreturn-type - -Wlogical-op - -Wno-deprecated-declarations - -Wno-suggest-attribute=format" - -AC_LANG_PUSH([C]) -AX_CHECK_COMPILE_FLAG([-Werror=unknown-warning-option], -[extra_test_flag="-Werror=unknown-warning-option"], -[extra_test_flag=""]) -AX_APPEND_COMPILE_FLAGS([ $COMMON_WARNINGS \ - -Wnested-externs \ - -Wmissing-prototypes \ - -Wstrict-prototypes \ - -Wold-style-definition \ - -Wimplicit-function-declaration \ - -Wno-stringop-overflow \ - -Wno-stringop-truncation \ - -Wno-format-truncation \ - -Wno-format-nonliteral \ - -Wno-format-overflow \ - -Wno-tautological-constant-out-of-range-compare \ - ], EXTRA_CFLAGS, [$extra_test_flag]) -AX_APPEND_FLAG([-Werror], EXTRA_CFLAGS) -AC_SUBST(EXTRA_CFLAGS) -AC_LANG_POP([C]) - -AC_LANG_PUSH([C++]) -AX_CHECK_COMPILE_FLAG([-Werror=unknown-warning-option], -[extra_test_flag="-Werror=unknown-warning-option"], -[extra_test_flag=""]) -AX_APPEND_COMPILE_FLAGS([ $COMMON_WARNINGS \ - -Wno-overloaded-virtual \ - -Wno-deprecated-register \ - ], EXTRA_CXXFLAGS, [$extra_test_flag]) -AX_APPEND_FLAG([-Werror], EXTRA_CXXFLAGS) -AC_SUBST(EXTRA_CXXFLAGS) -AC_LANG_POP([C++]) - -AX_CODE_COVERAGE - -AX_VALGRIND_DFLT([sgcheck], [off]) -AX_VALGRIND_DFLT([helgrind], [off]) -AX_VALGRIND_DFLT([drd], [off]) -AX_VALGRIND_DFLT([memcheck], [on]) -AX_VALGRIND_CHECK - -# Checks for libraries. -PKG_CHECK_MODULES(CPPUTEST, [cpputest >= 3.8], , [ - PKG_CHECK_MODULES(CPPUTEST, [cpputest >= 3.4], - AC_DEFINE([CPPUTEST_LT_3_8], 1, [CppUtest < 3.8 detected]) - ) -]) -PKG_CHECK_MODULES(LIBCAP, [libcap], , [ - AC_SEARCH_LIBS([cap_get_proc], [cap], , [ - AC_MSG_ERROR([No library 'libcap' found]) - ]) -]) - -AC_SEARCH_LIBS([ini_parse_file], [inih], , [ - AC_MSG_ERROR([unable to find the ini_parse_file() function])]) -AC_CHECK_HEADER([ini.h]) - -PKG_CHECK_MODULES(CHECK, [check]) -PKG_CHECK_MODULES(JSON_C, [json-c]) -PKG_CHECK_MODULES(LIBCRYPTO, [libcrypto]) -PKG_CHECK_MODULES(LIBCZMQ, [libczmq >= 3.0.2]) -PKG_CHECK_MODULES(LIBZMQ, [libzmq >= 4.0.4]) -PKG_CHECK_MODULES(LIBURCU_CDS, [liburcu-cds >= 0.8.0]) -PKG_CHECK_MODULES(LIBURCU_QSBR, [liburcu-qsbr >= 0.8.0]) -PKG_CHECK_MODULES(LIBMNL, [libmnl]) -PKG_CHECK_MODULES(LIBOSIP2, [libosip2]) -PKG_CHECK_MODULES_STATIC(DPDK, [libdpdk >= 17.05]) -PKG_CHECK_MODULES(SWPORT, [vyatta-dpdk-swport >= 0.1.3]) -PKG_CHECK_MODULES([PROTOBUF_C], [libprotobuf-c >= 1.0.0]) - -AC_ARG_VAR([PROTOC_C], [protobuf compiler command]) -AC_PATH_PROG([PROTOC_C], [protoc-c], [], - [`$PKG_CONFIG --variable=exec_prefix libprotobuf-c`/bin:$PATH]) -if test -z "$PROTOC_C"; then - AC_MSG_ERROR([Could not find protoc-c]) -fi - -AC_ARG_WITH([dpi], AS_HELP_STRING([--without-dpi], [Build without qosmos-dpi library (default: test)])) - -AS_IF([test "x$with_dpi" != "xno"], [ - PKG_CHECK_MODULES(DPI, [libqosmos-dpi >= 5.1.0] , [HAVE_DPI=1] , [HAVE_DPI=0] - AC_MSG_WARN([DPI engine not found])) -]) - -AM_CONDITIONAL([USE_DPI], [test "$HAVE_DPI" = 1]) - -# Always use the Gold linker so we can specify text section locations with/without LTO -AX_CHECK_LINK_FLAG([-fuse-ld=gold], [LDFLAGS+=" -fuse-ld=gold"], - [AC_MSG_ERROR([Gold linker required])]) - -AC_ARG_WITH([lto], AS_HELP_STRING([--without-lto], [Build without link time optimsation])) -AS_IF([test "x$with_lto" != "xno"], - # Do parallel LTO if the compiler supports it as it's quicker - [AX_CHECK_COMPILE_FLAG([-Werror -flto=1], [AC_SUBST(DATAPLANE_LTO_FLAG, '-flto=$(shell nproc)')], - [AC_SUBST(DATAPLANE_LTO_FLAG, '-flto')]) -]) - -AC_CHECK_HEADERS(valgrind/memcheck.h) -AC_CHECK_HEADERS(openssl/hmac.h) -AC_CHECK_DECLS([HMAC_CTX_new],,,[#include]) - -AC_CHECK_MEMBERS([struct bpf_aux_data.vlan_tag], [], - [AC_MSG_ERROR([No library 'libpcap' with 'struct bpf_aux_data' found])], - [[#include ]]) - -CFLAGS_BACK=$CFLAGS -CFLAGS+=" $DPDK_CFLAGS -Werror" -AC_MSG_CHECKING([whether DPDK's port IDs are uint16_t]) -AC_COMPILE_IFELSE( - [AC_LANG_PROGRAM( - [[ -#include -int foo(uint16_t a, enum rte_eth_event_type b, void *c, void *d) -{ - return 0; -} - ]], - [[ -rte_eth_dev_callback_register(0, 0, foo, NULL); - ]]) - ],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_RTE_PORT_ID_16_BITS, [1], - [whether DPDK's port IDs are uint16_t]) - ],[ - AC_MSG_RESULT([no]) -]) -CFLAGS=$CFLAGS_BACK - -CFLAGS_BACK=$CFLAGS -CFLAGS+=" $DPDK_CFLAGS -Werror" -AC_MSG_CHECKING([whether rte_mempool_generic_get/put have a flags argument]) -AC_COMPILE_IFELSE( - [AC_LANG_PROGRAM( - [[ -#include - ]], - [[ -rte_mempool_generic_get(NULL, NULL, 0, NULL, 0); - ]]) - ],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_RTE_MEMPOOL_GENERIC_FLAGS, [1], - [whether rte_mempool_generic_get/put have a flags argument]) - ],[ - AC_MSG_RESULT([no]) -]) -CFLAGS=$CFLAGS_BACK - -CFLAGS_BACK=$CFLAGS -CFLAGS+=" $DPDK_CFLAGS -Werror" -AC_MSG_CHECKING([whether rte_eth_dev_reset has non-blocking flag]) -AC_COMPILE_IFELSE( - [AC_LANG_PROGRAM( - [[ -#include - ]], - [[ -rte_eth_dev_reset(0, 0); - ]]) - ],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_RTE_ETH_DEV_RESET_2_ARGS, [1], - [whether rte_eth_dev_reset has non-blocking flag]) - ],[ - AC_MSG_RESULT([no]) -]) -CFLAGS=$CFLAGS_BACK - -CFLAGS_BACK=$CFLAGS -CFLAGS+=" $DPDK_CFLAGS -Werror" -AC_MSG_CHECKING([whether rte_mempool_virt2iova is available]) -AC_COMPILE_IFELSE( - [AC_LANG_PROGRAM( - [[ -#include - ]], - [[ -rte_mempool_virt2iova(0); - ]]) - ],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_RTE_MEMPOOL_VIRT2IOVA, [1], - [whether rte_mempool_virt2iova is available]) - ],[ - AC_MSG_RESULT([no]) -]) -CFLAGS=$CFLAGS_BACK - -CFLAGS_BACK=$CFLAGS -CFLAGS+=" $DPDK_CFLAGS -Werror" -AC_MSG_CHECKING([whether rte_eal_cleanup is available]) -AC_COMPILE_IFELSE( - [AC_LANG_PROGRAM( - [[ -#include - ]], - [[ -rte_eal_cleanup(); - ]]) - ],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_RTE_EAL_CLEANUP, [1], - [whether rte_eal_cleanup is available]) - ],[ - AC_MSG_RESULT([no]) -]) -CFLAGS=$CFLAGS_BACK - -CFLAGS_BACK=$CFLAGS -CFLAGS+=" $DPDK_CFLAGS -Werror" -AC_MSG_CHECKING([whether rte_eth_dev_count_avail is available]) -AC_COMPILE_IFELSE( - [AC_LANG_PROGRAM( - [[ -#include - ]], - [[ -rte_eth_dev_count_avail(); - ]]) - ],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_RTE_ETH_DEV_COUNT_AVAIL, [1], - [whether rte_eth_dev_count_avail is available]) - ],[ - AC_MSG_RESULT([no]) -]) -CFLAGS=$CFLAGS_BACK - -CFLAGS_BACK=$CFLAGS -CFLAGS+=" $DPDK_CFLAGS -Werror -Wno-deprecated-declarations" -AC_MSG_CHECKING([whether rte_devargs_add is available]) -AC_COMPILE_IFELSE( - [AC_LANG_PROGRAM( - [[ -#include - ]], - [[ -rte_devargs_add(0, 0); - ]]) - ],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_RTE_DEVARGS_ADD, [1], - [whether rte_devargs_add is available]) - ],[ - AC_MSG_RESULT([no]) -]) -CFLAGS=$CFLAGS_BACK - -CFLAGS_BACK=$CFLAGS -CFLAGS+=" $DPDK_CFLAGS -Werror -Wno-deprecated-declarations" -AC_MSG_CHECKING([whether rte_meter_srtcm_profile_config is available]) -AC_COMPILE_IFELSE( - [AC_LANG_PROGRAM( - [[ -#include - ]], - [[ -rte_meter_srtcm_profile_config(0, 0); - ]]) - ],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_RTE_METER_SRTCM_PROFILE_CONFIG, [1], - [whether rte_meter_srtcm_profile_config is available]) - ],[ - AC_MSG_RESULT([no]) -]) -CFLAGS=$CFLAGS_BACK - -CFLAGS_BACK=$CFLAGS -CFLAGS+=" $DPDK_CFLAGS -Werror -Wno-deprecated-declarations" -AC_MSG_CHECKING([whether rte_dev_remove is available]) -AC_COMPILE_IFELSE( - [AC_LANG_PROGRAM( - [[ -#include - ]], - [[ -rte_dev_remove(0); - ]]) - ],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_RTE_DEV_REMOVE, [1], - [whether rte_dev_remove is available]) - ],[ - AC_MSG_RESULT([no]) -]) -CFLAGS=$CFLAGS_BACK - -CFLAGS_BACK=$CFLAGS -CFLAGS+=" $DPDK_CFLAGS -Werror -Wno-deprecated-declarations" -AC_MSG_CHECKING([whether rte_dev_probe is available]) -AC_COMPILE_IFELSE( - [AC_LANG_PROGRAM( - [[ -#include - ]], - [[ -rte_dev_probe(0); - ]]) - ],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_RTE_DEV_PROBE, [1], - [whether rte_dev_probe is available]) - ],[ - AC_MSG_RESULT([no]) -]) -CFLAGS=$CFLAGS_BACK - -CFLAGS_BACK=$CFLAGS -CFLAGS+=" $DPDK_CFLAGS -Werror" -AC_CHECK_HEADERS(rte_bus_pci.h) -AC_CHECK_HEADERS(rte_ethdev_driver.h) -CFLAGS=$CFLAGS_BACK - -CFLAGS_BACK=$CFLAGS -CFLAGS+=" $DPDK_CFLAGS -Werror" -AC_CHECK_MEMBER([struct rte_eth_dev_info.device], [CFLAGS_BACK="$CFLAGS_BACK -DHAVE_RTE_ETH_DEV_INFO_DEVICE"], [], [[#include ]]) -CFLAGS=$CFLAGS_BACK - -AC_SUBST(COPYRIGHT_YEAR, [`date +%Y`]) - -AC_ARG_WITH([systemd], - AS_HELP_STRING([--without-systemd], [Ignore presence of systemd and disable it])) - -AS_IF([test "x$with_systemd" != "xno"], - [PKG_CHECK_MODULES(SYSTEMD, [libsystemd], [have_systemd=yes], [have_systemd=no])], - [have_systemd=no]) - -AS_IF([test "x$have_systemd" = "xyes"], - [AC_DEFINE(HAVE_SYSTEMD, 1, [Have systemd])], - [AS_IF([test "x$with_systemd" = "xyes"], - [AC_MSG_ERROR([systemd requested but not found]) - ]) -]) - -# Allow whole_dp unit tests to be disabled as they have issues -# running in a chroot without /proc and /sys, which is the case when -# running dpkg-buildpackage in OBS. -AC_ARG_ENABLE([whole_dp_test], -AS_HELP_STRING([--disable-whole_dp_test], [disable whole_dp unit tests]), -[case "${enableval}" in - yes) whole_dp_test=true ;; - no) whole_dp_test=false ;; - *) AC_MSG_ERROR([bad value ${enableval} for --enable-whole_dp_test]) ;; -esac],[whole_dp_test=true]) -AM_CONDITIONAL([WHOLE_DP_TEST], [test "x$whole_dp_test" = "xtrue"]) - -# Needed so dataplane_test can find it's hardcoded config -# files during a VPATH build. -AC_CONFIG_LINKS(dataplane-drivers-default.conf:dataplane-drivers-default.conf) -AC_CONFIG_LINKS(tests/whole_dp/dummyfs/proc/cpuinfo:tests/whole_dp/dummyfs/proc/cpuinfo) -AC_CONFIG_LINKS(tests/whole_dp/dummyfs/sys/bus/pci/devices/.dummy:tests/whole_dp/dummyfs/sys/bus/pci/devices/.dummy) -AC_CONFIG_LINKS(tests/whole_dp/dummyfs/sys/devices/system/cpu/online:tests/whole_dp/dummyfs/sys/devices/system/cpu/online) -AC_CONFIG_LINKS(tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu0/topology/core_id:tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu0/topology/core_id) -AC_CONFIG_LINKS(tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu0/topology/physical_package_id:tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu0/topology/physical_package_id) -AC_CONFIG_LINKS(tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu0/topology/thread_siblings:tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu0/topology/thread_siblings) -AC_CONFIG_LINKS(tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu1/topology/core_id:tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu1/topology/core_id) -AC_CONFIG_LINKS(tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu1/topology/physical_package_id:tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu1/topology/physical_package_id) -AC_CONFIG_LINKS(tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu1/topology/thread_siblings:tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu1/topology/thread_siblings) -AC_CONFIG_LINKS(tests/whole_dp/dummyfs/sys/module/.dummy:tests/whole_dp/dummyfs/sys/module/.dummy) - -AC_CONFIG_FILES([Makefile src/version.h]) -AC_OUTPUT diff --git a/dataplane-drivers-default.conf b/dataplane-drivers-default.conf index 803c8cbe..873733b8 100644 --- a/dataplane-drivers-default.conf +++ b/dataplane-drivers-default.conf @@ -95,17 +95,22 @@ tx_desc=128 [mlx5_100] max_rxq=16 rx_desc=2048 -tx_desc=128 +tx_desc=1024 +tx_desc_vm_multiplier=4 +rx_offloads=keep_crc +tx_offloads=!dev_tx_offload_vlan_insert [mlx5_40] max_rxq=4 rx_desc=2048 tx_desc=128 +tx_offloads=!dev_tx_offload_vlan_insert [mlx5] max_rxq=2 rx_desc=2048 tx_desc=128 +tx_offloads=!dev_tx_offload_vlan_insert [nicvf] max_rxq=4 @@ -117,10 +122,16 @@ max_rxq=2 rx_desc=2048 tx_desc=128 +# The ixgbe LSC interrupt is tells the driver that the link status +# is changing. The current DPDK driver schedules an event in the +# near future to check the status. However, empircal testing has +# shown that the delay is too short. Instead of trying to pick +# some idea delay time, use the polling bheavior instead. [ixgbe] max_rxq=2 rx_desc=2048 tx_desc=512 +dev_flags=!rte_eth_dev_intr_lsc [bnx2x] max_rxq=2 @@ -146,6 +157,7 @@ rx_desc=256 tx_desc=256 virtual=yes limit-txq=yes +rx_mq_mode=eth_mq_rx_none [igb] max_rxq=1 @@ -168,6 +180,10 @@ use_all_txq=yes # 8192+512(tx_desc)+2048(PKT_RING_SIZE), with # DATAPLANE_SLAVE_MULTIPLIER, mbufs is 18944 and aligns # to the next power of 2, it is 32768 mbufs. +# +# net_bonding can't advertise DEV_TX_OFFLOAD_MULTI_SEGS +# until after the first member is added. However, if we +# are able to configure the member it doesn't matter. [bonding] max_rxq=2 rx_desc=2048 @@ -175,14 +191,35 @@ tx_desc=512 extra=2048 virtual=yes limit-txq=yes +tx_offloads=!dev_tx_offload_multi_segs +# limit-txq must be configured to yes because the unaccelerated +# version of the netvsc PMD assumes you will always have the +# same number of RX and TX queues. [netvsc] -max_rxq=1 -rx_desc=320 -tx_desc=320 +max_rxq=2 +rx_desc=2048 +tx_desc=2048 virtual=yes limit-txq=yes +# unit testing only; no tx_offloads necessary. +[net_ring] +max_rxq=2 +rx_desc=512 +tx_desc=128 +tx_offloads=!dev_tx_offload_multi_segs,!dev_tx_offload_vlan_insert + +[cxgbe] +max_rxq=2 +rx_desc=2048 +tx_desc=512 + +[ena] +max_rxq=4 +rx_desc=2048 +tx_desc=512 + [default] max_rxq=2 rx_desc=512 diff --git a/debian/bin/debug_strip b/debian/bin/debug_strip index 20053317..e377a299 100755 --- a/debian/bin/debug_strip +++ b/debian/bin/debug_strip @@ -9,11 +9,11 @@ if [[ $# -lt 1 ]] then - echo "Usage: $0 [--dbg-package=package]" + echo "Usage: $0 [--dbg-package=package] [--auto-dbgsym]" exit 1 fi -LONGOPTS="-l dbg-package: -l help -l verbose" +LONGOPTS="-l dbg-package: -l help -l verbose -l auto-dbgsym" SHORTOPTS="vh" ARGS=$(getopt "$LONGOPTS" -- "$SHORTOPTS" "$@") if [ $? -ne 0 ] @@ -30,6 +30,7 @@ while [ $# -gt 0 ]; do --help|-h) shift; echo -e "$0 [options]\n"; exit 0;; --verbose|-v) shift; VERBOSE=1;; --dbg-package) shift; DBGPKG="$1"; shift;; + --auto-dbgsym) shift; AUTODBGSYM=1;; *) break;; esac done @@ -68,26 +69,58 @@ generate_minidebug_and_debug() # Attach the mini debuginfo xz mini_debuginfo objcopy --add-section .gnu_debugdata=mini_debuginfo.xz ${binary} + + rm mini_debuginfo.xz +} + +write_buildid_file() +{ + local package=$1 + local buildids=$2 + # See dh_gencontrol + local dir=debian/.debhelper/$package + + mkdir -p "$dir" + echo "$buildids" | tr ' ' '\n' | sort | tr '\n' ' ' > "$dir/dbgsym-build-ids" } for PKG in $(awk '/^Package:/ { print $2 }' < debian/control) do - for PROG in $(find debian/$PKG -path '*/usr/*' -type f -a \( -executable -o -name '*.so' \) ) + buildids="" + for PROG in $(find debian/$PKG -path '*/usr/*' -type f -a \( -executable -o -name '*.so*' \) ) do - if [ -n "$DBGPKG" ]; then + if [ -n "$DBGPKG" -o -n "$AUTODBGSYM" ]; then build=$(readelf -n $PROG | awk '/Build ID:/ { print $3}') [ -n "$VERBOSE" ] && echo "build-id $PROG $build" dir=${build:0:2} - debug_dir=debian/$DBGPKG/usr/lib/debug/.build-id/$dir - debug_path=$debug_dir/${build:2}.debug - - install -d $debug_dir + if [ -n "$AUTODBGSYM" ]; then + # Location that dh_gencontrol expects to find files in + # so it can generate debug packages automatically + debugtmp="debian/.debhelper/$PKG/dbgsym-root" + + # Make sure changelog, copyright etc are picked up from + # the main package + dbgsym_docdir="$debugtmp/usr/share/doc"; + doc_symlink="$dbgsym_docdir/$PKG-dbgsym"; + mkdir -p "$dbgsym_docdir" + ln -s "$PKG" "$doc_symlink" + else + debugtmp="debian/$DBGPKG" + fi + debug_dir="$debugtmp/usr/lib/debug/.build-id/$dir" + debug_path="$debug_dir/${build:2}.debug" + + install -d "$debug_dir" generate_minidebug_and_debug "$PROG" "$debug_path" - chmod 644 $debug_path - objcopy --add-gnu-debuglink $debug_path $PROG + chmod 644 "$debug_path" + objcopy --add-gnu-debuglink "$debug_path" "$PROG" + buildids="$buildids $build" else generate_minidebug_and_debug "$PROG" "$PROG.debug" fi done + if [ -n "$buildids" ]; then + write_buildid_file "$PKG" "$buildids" + fi done diff --git a/debian/changelog b/debian/changelog index c8676261..eb2cba44 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,4286 @@ +vyatta-dataplane (3.12.48) unstable; urgency=medium + + [ Alan Dewar ] + * GPC: check ifp->fal_l3 rather than ifp->if_created + * GPC: delay FAL programming until interface ready (Fixes: VRVDR-54766) + * GPC: remove unnecessary casting + + [ Srinivas Narayan ] + * crypto: Flow cache invalidation should occur from main thread + * Revert "crypto: Move sa unbinding from PMD to rcu callback" + * crypto: Rework SA cleanup sequence + * crypto: Ensure that the correct PMD is picked always + + -- Srinivas Narayan Tue, 27 Apr 2021 16:47:49 +0100 + +vyatta-dataplane (3.12.47) unstable; urgency=medium + + [ Shubham Shrivastava ] + * DPI: Avoid concurrent access of ndpi_flow_struct by multiple lcores + * DPI: Max packets check and detection giveup for ndpi flow + + [ Simon Barber ] + * CRYPTO:Ensure SA return codes valid + * CRYPTO: Fix policy return codes + + -- Srinivas Narayan Thu, 22 Apr 2021 14:51:42 +0100 + +vyatta-dataplane (3.12.46) unstable; urgency=medium + + [ Nicholas Brown ] + * Add support for dpkg-mergechangelogs + + [ Srinivas Narayan ] + * crypto: Move sa unbinding from PMD to rcu callback + + -- Srinivas Narayan Fri, 16 Apr 2021 14:20:41 +0100 + +vyatta-dataplane (3.12.45) unstable; urgency=medium + + [ Ian Wilson ] + * npf: Ensure NAT policy is ref-counted when stored in rule or session + + [ Gavin Shearer ] + * gpc: add request for hardware commit after processing GPC protobuf msg + + [ Alan Dewar ] + * GPC: prevent crash when optimisation is off (Fixes: VRVDR-54885) + + -- Srinivas Narayan Thu, 15 Apr 2021 12:33:39 +0100 + +vyatta-dataplane (3.12.44) unstable; urgency=medium + + [ Ian Wilson ] + * npf: Allow unit-tests to reset session ID to 0 + * ut: Test connsync packing and restoration of SNAT session + * ut: Verify the SNAT session json ssync3 test + * npf: Fix SNAT session restoration from connsync buffer + + [ Mark Gillott ] + * capture: ensure final cleanup occurs after interface cleanup + (Fixes: VRVDR-54831) + + [ Srinivas Narayan ] + * Move crypto & UDP teardown to a later point + + [ Charles (Chas) Williams ] + * fal: add FAL_RET_PLUGIN_CONSUMED framer type (Bugfix: VRVDR-53937) + * pktmbuf: add public API to set vrf (Bugfix: VRVDR-54447) + + -- Srinivas Narayan Mon, 12 Apr 2021 13:11:48 +0100 + +vyatta-dataplane (3.12.43) unstable; urgency=medium + + [ Nicholas Brown ] + * properly export env var in Jenkinsfile + + [ aroberts ] + * Add Aidan Gallagher to the reviewers for qos files + + [ Paul Aitken ] + * DPI: ensure nDPI is init'd on all cores + + [ Alan Dewar ] + * GPC: add initial counter support + * GPC: retrieve a policer's red packet count + * GPC: add support for resettable counters + + -- Srinivas Narayan Wed, 07 Apr 2021 15:28:36 +0100 + +vyatta-dataplane (3.12.42) unstable; urgency=medium + + [ Ian Wilson ] + * ut: Rename npf alg ftp unit-tests + * ut: Add npf alg ftp11 tests for source ports outside trans range + * ut: Add npf alg ftp12 test for source ports outside trans range + * ut: Change 5 of the npf alg ftp unit-tests to DP_START_TEST_FULL_RUN + + [ Nicholas Brown ] + * Use osc in Jenkinsfile + + [ Simon Barber ] + * CRYPTO: Minor tidyup of policy update + * CRYPTO: Ensuring correct pr->flags handling during an update + + [ Ian Wilson ] + * ut: Add npf ALG test to destroy ftp session before ftp ALG tuple + * alg: Delete tuples created by a session when the session is destroyed + * alg: Only iterate over tuple tables if count is != 0 + + [ Ramkumar Ganapathysubramanian ] + * Fixed crash in QoS when removing egress map + + [ Frédéric Perrin ] + * icmp6_redirect: set hlim to 255 + + [ Simon Barber ] + + -- Simon Barber Wed, 31 Mar 2021 22:15:50 +0100 + +vyatta-dataplane (3.12.41) unstable; urgency=medium + + [ Gavin Shearer ] + * ippf/gpc: add missing refcount decrements when counters released + + [ Nicholas Brown ] + * Use custom_target instead of generator for sample + * Add a build target to run clang-tidy + + -- Srinivas Narayan Mon, 22 Mar 2021 17:05:24 +0000 + +vyatta-dataplane (3.12.40) unstable; urgency=medium + + [ Mark Gillott ] + * capture: on cleanup serialise access to console socket (Fixes: VRVDR-53851) + + [ Charles (Chas) Williams ] + * nh_common: avoid route/neighbor update races (Bugfix: VRVDR-53960) + + -- Srinivas Narayan Wed, 17 Mar 2021 14:27:36 +0000 + +vyatta-dataplane (3.12.39) unstable; urgency=medium + + [ Alan Dewar ] + * GPC: add utility functions for later use + * GPC: add GPC structs and enums + * GPC: give gpc_config.c a better name + * GPC: add feature and table parsing + * GPC: add rule and counter parsing + * GPC: add match and action parsing + * GPC: add more utility functions + * GPC: add config walk functions + * GPC: add initial no-op op-mode code + * GPC: complete initial op-mode code + * GPC: add rule action code and empty match code + * GPC: fill in most empty rule match functions + * GPC: add ip-prefix and icmp parse functions + * UT: add initial protobuf utility functions + * UT: extend the GPC unit-test to test something + * GPC: create a FAL policer when necessary + * GPC: call new gpc_db APIs to instantiate config + * GPC: add new QoS actions to pmf_rule struct + * GPC: add zeroed counters to op-mode json + + -- Srinivas Narayan Tue, 16 Mar 2021 08:57:07 +0000 + +vyatta-dataplane (3.12.38) unstable; urgency=medium + + [ Simon Barber ] + * PERF: Restore ALWAYS_INLINE to input NAT64 processing + + [ Derek Fawcus ] + * ACL: Only process ACL GPC groups + * ACL: Simplify by removing pmf_rlset_ext + * GPC: Move function location within file + * GPC: Add counters header files + * GPC: Add counters code + * GPC: Adjust GPC DB for counters + * ACL: Use counter field in GPC rule + * ACL: Simplify by removing pmf_attrl + * ACL: Rename shimmed cntr accessors + * ACL: Add new counter accessors + * ACL: Store old cntr in rule owner + * ACL: Adjust use of counters as rules modified + * ACL: Control existence of cntg as rules altered + * ACL: Add counter hardware notification mechanism + * ACL: Notify hardware of counters + * ACL: Debug dump of old and new counters + * ACL: Switch to new counters facility + * ACL: Remove old counter support code + * ACL: Prepare to move op-mode support + * ACL: Move the ACL structure dump command + * ACL: Move the ACL counter show command + * ACL: Move the ACL counter clear command + * GPC: Rename routines pmf_hw_* -> gpc_hw_* + * GPC: Rename files pmf_hw.[ch] -> gpc_hw.[ch] + * ACL: Silence clang-tidy about string comparision + + -- Srinivas Narayan Mon, 15 Mar 2021 15:29:27 +0000 + +vyatta-dataplane (3.12.37) unstable; urgency=medium + + [ Nicholas Brown ] + * fix gcc 10 compiler error + + [ Srinivas Narayan ] + * Include rcu.h in vyatta-dataplane-dev + + -- Srinivas Narayan Sat, 13 Mar 2021 12:14:03 +0000 + +vyatta-dataplane (3.12.36) unstable; urgency=medium + + [ Daniel Gollub ] + * debian: bump DPDK version depedency for rte-acl API changes + + -- Srinivas Narayan Fri, 12 Mar 2021 16:51:24 +0000 + +vyatta-dataplane (3.12.35) unstable; urgency=medium + + [ Daniel Gollub ] + * cgnat: include userspace RCU header + * rcu: introduce generic rcu header + * rcu: prefer generic rcu header include + * rcu: refactor existing RCU code + * rcu: prepare unified RCU API for general use + * rcu: perform unified RCU thread registration + * rcu: cutover to dp_rcu_thread_online/offline API + * controller: use dp_rcu_ wrapper for quiescent state + * rcu: introduce dp_rcu_barrier() wrapper + * rcu: introduce dp_rcu_synchronize + * rcu: introduce dp_rcu_read_(un)lock API + * npf_rte_acl: report thread id to the RCU QS variable + + -- Srinivas Narayan Fri, 12 Mar 2021 16:39:17 +0000 + +vyatta-dataplane (3.12.34) unstable; urgency=medium + + [ Nicholas Brown ] + * Add support for libcheck 0.15 + + -- Srinivas Narayan Fri, 12 Mar 2021 16:15:09 +0000 + +vyatta-dataplane (3.12.33) unstable; urgency=medium + + [ Charles (Chas) Williams ] + * flow: ensure flow cache entry is zeroed for hashing (Bugfix: VRVDR-54681) + + [ Paul Aitken ] + * DPI: change app group deletion from RCU to GC + * DPI: change app group DB deletion from RCU to GC + + -- Srinivas Narayan Fri, 12 Mar 2021 10:52:02 +0000 + +vyatta-dataplane (3.12.32) unstable; urgency=medium + + * crypto : Fix up ICV + + -- Srinivas Narayan Wed, 10 Mar 2021 11:50:07 +0000 + +vyatta-dataplane (3.12.31) unstable; urgency=medium + + [ Simon Barber ] + * Perf: Limit the nat64 processing pulled into the core pipeline + + -- Srinivas Narayan Mon, 08 Mar 2021 09:57:03 +0000 + +vyatta-dataplane (3.12.30) unstable; urgency=medium + + [ Nicholas Brown ] + * Tweak how Jenkins deals with rebuilding branches + * Build against DANOS:Shipping:2105 in Jenkinsfile + + [ Thomas Kiely ] + * Populate sci_vlan for L3 VIFs + * Remove incorrect error message + + -- Srinivas Narayan Thu, 04 Mar 2021 14:45:11 +0000 + +vyatta-dataplane (3.12.29) unstable; urgency=medium + + [ Paul Aitken ] + * pl_gen_fused: fix E302 + * pl_gen_fused: fix E305 + * pl_gen_fused: fix E703 + * pl_gen_fused: fix E128 + * pl_gen_fused: fix E251 + * pl_gen_fused: fix E241 + * pl_gen_fused: fix F841 + * pl_gen_fused: fix F523 + * pl_gen_fused: fix F524 + * pl_gen_fused: fix E712 + * pl_gen_fused: fix E713 + * pl_gen_fused: fix C0325 + * pl_gen_fused: fix R1710 + * DPI: call ndpi_finalize_initalization + + [ Nachiketa Prachanda ] + * npf: use rcu safe null check for sentry + + [ Simon Barber ] + + -- Simon Barber Wed, 03 Mar 2021 10:08:00 +0000 + +vyatta-dataplane (3.12.28) unstable; urgency=medium + + [ Nicholas Brown ] + * Consistently use compiler hot/cold defines/macros + * Consistently use ALWAYS_INLINE compiler macro + * make lpm_tbl24_get_next_hop_idx static + * pl_gen_fused should use compiler defines/macros + * Consistently use compiler __used define + * Consistently use compiler __noinline define + + -- Srinivas Narayan Tue, 23 Feb 2021 15:52:15 +0000 + +vyatta-dataplane (3.12.27) unstable; urgency=medium + + [ Nicholas Brown ] + * Fix static analysis failures in alg sip nat tests + + -- Srinivas Narayan Tue, 23 Feb 2021 10:28:32 +0000 + +vyatta-dataplane (3.12.26) unstable; urgency=medium + + [ Gavin Shearer ] + * nat: don't panic system on APM sanity failure + + [ Nicholas Brown ] + * Add a fused_mode build option + * ability to disable fused_mode from package options + * Mark Static Analysis stage failed in quality gate + + [ Ian Wilson ] + * ut: Check for session struct size is not needed + * cgnat: Re-arrange objects in 'struct cgn_sess2' + * cgnat: Change sub-session table init parameters + * cgnat: Add separate fwd and back hash table nodes for sub-session + * cgnat: Add direction param to sub-session hash table key + * cgnat: sub-session hash table changed to use forw and back sentries + * cgnat: Change sub-session iterators to iterate over 'out' sentries + * cgnat: Add two convenience sub-session key comparison functions + * cgnat: Replace defines of s2_addr and s2_port with functions + * cgnat: Replace define s2_expired with function + * ut: Changes to cgnat unit-test to pass line number into helper functions + + [ Charles (Chas) Williams ] + * tests: fix path redirection checks (Bugfix: VRVDR-54471) + * vplane-uio: refactor supported devices into module + * vplane-mlx-setup: configure all supported adapters + * Remove xen support (Bugfix: VRVDR-54443) + * vplane-mlx-setup: perl cleanup (Bugfix: VRVDR-54443) + + [ Paul Aitken ] + * NPF: only call session_set_app when the session exists + + [ Nicholas Brown ] + * master-next branch targets Vyatta:Master + * Revert "master-next branch targets Vyatta:Master" + + [ Daniel Gollub ] + * crypto: avoid dereferecing bad vrf_ctx pointer + + [ Thomas Kiely ] + * Add kbits/bytes conversion macros + + [ Srinivas Narayan ] + * crypto : Always flush flow cache after committing ruleset + + [ Ian Wilson ] + * ut: Remove old SIP unit-test files + * npf: Add a "trans-port-alloc" optional param to NAT ruleset cstore cmd + * npf: Changed rule parsing action_keys array to be in alphabetic order + * ut: Add port_alloc object to struct 'dp_test_npf_nat_rule_t' + * ut: Add option to pass in payload parameters to dpt_udp + * ut: Add test sip_nat10 and data set 1 to dp_test_npf_alg_sip_nat.c + * ut: Add test sip_nat11 to dp_test_npf_alg_sip_nat.c + * ut: Add test sip_nat12 to dp_test_npf_alg_sip_nat.c + * ut: Add test sip_nat20 to dp_test_npf_alg_sip_nat.c + * ut: Add test sip_nat21 to dp_test_npf_alg_sip_nat.c + * ut: Add SIP data set #3 + * ut: Add test sip_nat30 to dp_test_npf_alg_sip_nat.c + * ut: Add SIP data set #4 + * ut: Add test sip_nat40 to dp_test_npf_alg_sip_nat.c + * Added SSCANF_TO_KSTRTO to check patch ignore list + + [ Charles (Chas) Williams ] + * main: split port allocations out of port_conf (Bugfix: VRVDR-54440) + * main: fix overflow of buffers counter (Bugfix: VRVDR-54440) + * main: document struct offsets and sizes (Bugfix: VRVDR-54440) + + -- Srinivas Narayan Mon, 22 Feb 2021 21:48:18 +0000 + +vyatta-dataplane (3.12.25) unstable; urgency=medium + + [ Srinivas Narayan ] + * crypto : broaden error checking + + [ Ramkumar Ganapathysubramanian ] + * Creating new structures to hold QoS egress map information + * Qos changes to support hierarchical egress map + + [ Daniel Gollub ] + * crypto: reset XFRM/nl seq counter on flush + + [ Gavin Shearer ] + * fw: improve comment at "result:" in npf_hook_track() + * fw: skip NAT processing with session in wrong direction + * nat: split map_rcu_free sanity check into a different function + * nat: only call map_rcu_free sanity if from GC function + + -- Srinivas Narayan Fri, 05 Feb 2021 15:40:07 +0000 + +vyatta-dataplane (3.12.24) unstable; urgency=medium + + * Revert "GPC: add zeroed counters to op-mode json" + * Revert "GPC: temporary commit to stop crashes" + * Revert "GPC: add new QoS actions to pmf_rule struct" + * Revert "GPC: call new gpc_db APIs to instantiate config" + * Revert "GPC: create a FAL policer when necessary" + * Revert "UT: extend the GPC unit-test to test something" + * Revert "UT: add initial protobuf utility functions" + * Revert "GPC: add ip-prefix and icmp parse functions" + * Revert "GPC: fill in most empty rule match functions" + * Revert "GPC: add rule action code and empty match code" + * Revert "GPC: complete initial op-mode code" + * Revert "GPC: add config walk functions" + * Revert "GPC: add more utility functions" + * Revert "GPC: add match and action parsing" + * Revert "GPC: add rule and counter parsing" + * Revert "GPC: add feature and table parsing" + * Revert "GPC: give gpc_config.c a better name" + * Revert "GPC: add GPC structs and enums" + * Revert "GPC: add utility functions for later use" + + -- Simon Barber Tue, 02 Feb 2021 12:17:31 +0000 + +vyatta-dataplane (3.12.23) unstable; urgency=medium + + [ Alan Dewar ] + * GPC: add utility functions for later use + * GPC: add GPC structs and enums + * GPC: give gpc_config.c a better name + * GPC: add feature and table parsing + * GPC: add rule and counter parsing + * GPC: add match and action parsing + * GPC: add more utility functions + * GPC: add config walk functions + * GPC: add initial no-op op-mode code + * GPC: complete initial op-mode code + * GPC: add rule action code and empty match code + * GPC: fill in most empty rule match functions + * GPC: add ip-prefix and icmp parse functions + * UT: add initial protobuf utility functions + * UT: extend the GPC unit-test to test something + * GPC: create a FAL policer when necessary + * GPC: call new gpc_db APIs to instantiate config + * GPC: add new QoS actions to pmf_rule struct + * GPC: temporary commit to stop crashes + * GPC: add zeroed counters to op-mode json + + [ Simon Barber ] + + -- Simon Barber Tue, 26 Jan 2021 14:03:18 +0000 + +vyatta-dataplane (3.12.22) unstable; urgency=medium + + [ Srinivas Narayan ] + * ut: Add calls to cleanup NPF state + * ut: Add call to cleanup NPF state between tests + + [ Daniel Gollub ] + * npf: expose npf_masklen_to_grouper_mask to allow usage in rldb + * npf_rte_acl: Add npf_rte_acl_del_rule method + * npf_rte_acl: Stage transaction implementation + * npf_rte_acl: cutover to transactions + * npf_rte_acl: more fine-grain return value for match + + [ Srinivas Narayan ] + * Initial version of NPF rule database API + + [ Daniel Gollub ] + * rldb: base implementation + * rldb: implement rldb dump methods + * rldb: implement stats methods + * rldb: implement rldb_rule_handle helpers + * rldb: implement rldb_find_rule method + * rldb: implement transaction methods + * rldb: implement rldb ACL rule marshaling helpers + * rldb: implement rldb_del_rule method + * rldb: implement rldb_add_rule method + * rldb: implement rldb_match method + * npf_rte_acl: enable rte_acl hashtable usage + * npf_rte_acl: setup rte_acl with counter-id + * npf: init rldb + * main: register DPDK's RCU QSBR variable + * rldb: enable rte_acl rcu support + * ut: initial rldb API tests + * ut: avoid priority value 0 in crypto tests + * ut: rule number becomes mandatory for crypto policies + * ut: crypto policy rldb cutover preparation + * ut: send update policy XFRM message + * cerypto: introduce policy_rule flags + * crypto: create rldb instances per AF/VRF/dir + * crypto: avoid rule_tag_ht in feat_attach_by_reqid + * crypto: prepare policy_rule for rldb cutover + * crypto: stage policy_prepare_rldb_rule + * crypto: stage policy_rule_get_rldb + * crypto: stage policy_rule_add_to_rldb + * crypto: stage policy_rule_update_rldb + * crypto: stage policy_rule_remove_from_rldb + * crypto: stage crypto_policy_rldb_commit + * crypto: introduce policy_rule pending_add flag + * crypto: cutover from npf to rldb + * crypto: drop unused NPF code + * crypto: discontinue policy-rule tags + * crypto: drop unused PR-TAG code + + [ Simon Barber ] + + -- Simon Barber Tue, 26 Jan 2021 11:33:18 +0000 + +vyatta-dataplane (3.12.21) unstable; urgency=medium + + [ Charles (Chas) Williams ] + * clang: fix variable length array size warnings (Bugfix: VRVDR-54147) + * npf: avoid returning success during failure (Bugfix: VRVDR-54155) + * clang: normalize brace usage (Bugfix: VRVDR-54146) + * rcu: fix compiler errors with pointers and integers (Bugfix: VRVDR-54202) + + [ Derek Fawcus ] + * ACL: Add new summary bits for actions + * ACL: Adjust FAL message code for readability + * ACL: Use new summary bits for encoding new actions + * GPC: Add represention of parsed mark action + * ACL: Add helper routines for parsed marking + * ACL: Parse the qos mark actions + * GPC: Add QoS actions - set colour / designation + + [ Daniel Gollub ] + * crypto: avoid dereferecing bad sa pointer + + [ Simon Barber ] + + -- Simon Barber Fri, 22 Jan 2021 19:20:49 +0000 + +vyatta-dataplane (3.12.20) unstable; urgency=medium + + [ Srinivas Narayan ] + * crypto: destroy session in driver from RCU callback + + [ Simon Barber ] + * Allow for V6 route delete num_paths == 0 + + [ Paul Aitken ] + * DPI: fix nDPI 3.4 + + [ Simon Barber ] + + -- Simon Barber Mon, 18 Jan 2021 08:13:13 +0000 + +vyatta-dataplane (3.12.19) unstable; urgency=medium + + [ Ian Wilson ] + * Uninitialised variable in nat64_out_process_common + * Revert "Update soft_ticks using clock_gettime" + * Update soft_ticks using clock_gettime (re-factored) + * cgnat: Change 3-tuple sess and subscriber structs to use unix_epoch_us + * cgnat: Change port-block start and end times to use unix_epoch_us + * cgnat: Change sub-sessions to use unix_epoch_us + * cgnat: Change the clear-session log messages to use unix_epoch_us + * cgnat: Change max subscriber session timestamps to use unix_epoch_us + * cgnat: Remove last uses of the cgnat specific time functions + + [ Charles (Chas) Williams ] + * lag: preserve started state across reconfigure (Bugfix: VRVDR-53928) + * dpdk_eth_if: add some additional state debugging (Bugfix: VRVDR-53928) + + [ Karthik Murugesan ] + * Queue depth and WRED threshold configuration enhancement + * Added range check for time to byte conversion + + [ Paul Aitken ] + * DPI: engines_len inconsistency + + [ Charles (Chas) Williams ] + * ip_rt: reject bad rib updates + * dp_test_mpls: ensure no division by zero + * clang: re-enable clang checks + + [ Gavin Shearer ] + * gpc: fix issue of calling create fn instead of delete (Fixes: VRVDR-53959) + + [ Simon Barber ] + * CRYPTO: Do not set xfrm_direct true + * CRYPTO: Undo IPSEC breakage caused by clang fix + + -- Simon Barber Wed, 13 Jan 2021 20:21:22 +0000 + +vyatta-dataplane (3.12.18) unstable; urgency=medium + + [ Charles (Chas) Williams ] + * clang: fix readability-named-parameter + * clang: fix readability-inconsistent-declaration-parameter-name + * tests: fix readability-inconsistent-declaration-parameter-name + * clang: fix else after return + + [ Nicholas Brown ] + * Re-enable some clang-tidy checks + * Remove old CODEOWNERS user + + [ Charles (Chas) Williams ] + * tests: gre: fix clang-tidy CallAndMessage issues + * crypto: ensure ctx.context is set to some value + * clang: quiet warnings about uninitialized usage + * qos: fix possibly uninitialized values + * tests: lib: make sure we have a valid mbuf pointer + * tests: npf: ensure ptree_string2key returns something + * clang-tidy: re-enable clang-analyzer-core.CallAndMessage + + [ Ian Wilson ] + * cgnat: Move cgn_addrstr to cgn_log_rte.c and make static + * cgnat: Move all 'return code' code into cgn_rc.h and cgn_rc.c + * cgnat: Move time related functions into cgn_time.c and cgn_time.h + * cgnat: Move cgn_arg_to_int to cgn_cmd_cfg.c + * cgnat: Replace include of cgn.h with cgn_public.h in ip_icmp.c + * cgnat: Move function that only exist for unit-tests into cgn_test.c + * cgnat: Move global variables to cgn.c + * cgnat: Sessions are only created in outbound context + * cgnat: Replace 'enum cgn_flow' with 'enum cgn_dir' + * cgnat: Use 'enum cgn_dir' instead of 'int' for the direction variable + * cgnat: Remove cgnat cache object cpk_hlen and add cpk_l4_len + * cgnat: Use a 'cgnat map info' struct to pass data to/from mapping fns + * cgnat: Use correct type for 'enum nat_proto' vars and params + * cgnat: Remove direction parameter from session create path + * cgnat: Change cgn_session_establish to use 'struct cgn_map' + * cgnat: Move test for sub-session enable out of cgn_session_establish + * cgnat: Sub-sessions are only created in the outbound context + * cgnat: Rename cgn_sess2_activate and deactivate to cgn_sess2_add and del + * Fixup copyright dates for 2021 + + [ Charles (Chas) Williams ] + * ip_rt_protobuf: fix clang deadstore warning + * controller: check return from add port parse + * esp: fix deadstore in esp_input + * qos_sched: fix clang deadstore in parsing + * npf: remove dead store before enumerated switch + * pipeline: remove checks in tear downs + * esp: remove dead store before enumerated switch + * tests: check ret to avoid dead stores in qos tests + * tests: fix deadstore in unit tests + + [ Mike Manning ] + * ARP and ND: entry should not be valid after failure notification + * ARP: restrict handling of notifications from kernel + * ARP: remove entry after failure notification + + [ Ian Wilson ] + * Update soft_ticks using clock_gettime + + [ Derek Fawcus ] + * GPC: Add policer action to the ACL at FAL API + + [ Nicholas Brown ] + * command: fix clang deadstore warning + * dp_test: fix clang deadstore warning + * npf test: fix clang deadstore warning + * npf test: remove spurious return + * clang: Re-enable clang-analyzer-deadcode.DeadStores + + [ Charles (Chas) Williams ] + * debian: remove libxen dependency + + [ Simon Barber ] + + -- Simon Barber Mon, 11 Jan 2021 14:12:50 +0000 + +vyatta-dataplane (3.12.17) unstable; urgency=medium + + [ Charles (Chas) Williams ] + * micro bfd: allow micro BFD packets when no address (Bugfix: VRVDR-46131) + * main: change to warning about VLAN insertion + + [ Nicholas Brown ] + * Add .clang-format file + * Add .editorconfig file + + [ Robert Shearman ] + * include: update comment comparing SAI with FAL + * include: add new FAL APIs for using VRF objects with routes + * fal: don't signal routes with non-main tables to FAL plugins + * vrf: create VRF FAL object before initialising VRF + * fal: use new FAL route APIs if supported + * include: add new FAL APIs for using rtr-intf objects with neighs + * fal: use new FAL IP neighbor APIs if supported (Fixes: VRVDR-53924) + * vrf: track and display PD programming state of VRFs + + [ Simon Barber ] + + -- Simon Barber Mon, 04 Jan 2021 17:08:39 +0000 + +vyatta-dataplane (3.12.16) unstable; urgency=medium + + [ Nicholas Brown ] + * Jenkinsfile: clang-tidy robustness + + [ Paul Atkins ] + * clang: turn off all warnings that are still found when running clang + + [ Charles (Chas) Williams ] + * clang: fix bugprone-suspicious-string-compare + * clang: eliminate bzero + + [ Mandeep Rohilla ] + * LAG: Check to see if min links is configured + + [ Charles (Chas) Williams ] + * tests: add new macro to fail unit tests + * tests: convert some static asserts to abort + + [ Nicholas Brown ] + * Re-enable clang-tidy warnings that are now fixed + * Fomat .clang-tidy file using yaml continuations + + [ Charles (Chas) Williams ] + * mlx4: configure default steering mode + * clang: fix bugprone-narrowing-conversions + + [ Nicholas Brown ] + * Re-eneable bugprone-suspicious-missing-comma + + [ Simon Barber ] + + -- Simon Barber Mon, 04 Jan 2021 16:09:52 +0000 + +vyatta-dataplane (3.12.15) unstable; urgency=medium + + [ Nicholas Brown ] + * Add NPF codeowners + + [ Ian Wilson ] + * npf: Split npf_config_default_alloc_free into separate alloc and free + * npf: Check npf_conf is not NULL before calling rcu callback to free it + * npf: Free ruleset attach point memory when it is deleted + + [ Derek Fawcus ] + * GPC: Use ruleset ifname accessor + * GPC: Add new accessors for rules and groups + * GPC: Change the ACL FAL layer to the new accessors + * GPC: Add and use ifp helper functions + * GPC: Add and use ruleset helper functions + * ACL: Move ruleset database to GPC layer + * ACL: Move group database to GPC layer + * GPC: Add and use rule helpers + * GPC: Adjust rule notifier helpers + * ACL: Move rule database to GPC layer + * GPC: Add per feature logging + * GPC: Per feature attach/detach + * GPC: Add attach/detach for ingress QoS + + [ Paul Atkins ] + * ut: call proper free function to free multicast route + * ip6_mroute: init the mroute6 stats before populating them + + [ Mandeep Rohilla ] + * LAG: Distinguish between a member being usable and enabled + * LAG: Check if the given interface is a member port + * FAL LAG: Determine if the port is a member of a LAG + * LAG: Check for member port with the registered lag ops + * IFOP: Set the usability of an interface + * IF OP: Set usability of an interface + * LAG: Set usability of a LAG member + * FAL LAG: Set LAG member's usability + * DPDK ETH OP: Set usability of ETH type interface + * IF STATE: Mark an interface as unusable + * FAL LAG: Remove member from tx hash when micro BFD down + + [ Simon Barber ] + * CRYPTO: Add missing return codes on failure paths in sadb + * UT:CRYPTO Fix timing race between SA and interface creation + + -- Simon Barber Fri, 18 Dec 2020 15:44:02 +0000 + +vyatta-dataplane (3.12.14) unstable; urgency=medium + + [ Alan Dewar ] + * GPC: add debug constants and flag + * GPC: dummy protobuf handler + + [ Paul Atkins ] + * ut: tidies to the whole_dp readme file + * gre: check v6 addresses are equal using the v6 member of the union + * vxlan: move the FDB functions to allow removal of forward declarations + * lpm: remove old clang analyzer tags as they are not helpful + * if: allow multiple node instance callback to be set + + [ Simon Barber ] + * CRYPTO: Block sending of xfrm acks if the old control is still active + + [ Derek Fawcus ] + * NPF: Static analysis fixes + + [ Gavin Shearer ] + * npf: set cache-empty flag after an ICMP host redirect + + [ Simon Barber ] + + -- Simon Barber Wed, 16 Dec 2020 18:03:40 +0000 + +vyatta-dataplane (3.12.13) unstable; urgency=medium + + [ Nicholas Brown ] + * Don't run perform Static Analysis on generated pipeline code + * enable function complexity checks + + [ Simon Barber ] + + -- Simon Barber Tue, 15 Dec 2020 11:02:28 +0000 + +vyatta-dataplane (3.12.12) unstable; urgency=medium + + [ Mark Gillott ] + * if: track LAG members when updating multicast promiscuous mode + (Fixes: VRVDR-53559) + + [ Gavin Shearer ] + * npf: change ipv6-route show firewall output + + [ Nicholas Brown ] + * Add a test setup for easy use of valgrind + + [ Robert Shearman ] + * if: generate events on admin-status changes + * ip_mcast: make use of dp events to improve modularity + * bridge: remove admin-down ports from VLANs in the FAL (Fixes: VRVDR-53674) + + [ Nicholas Brown ] + * ignore corrupt libcheck results files + + [ Paul Atkins ] + * main: fix memleak in check_broken_firmware + * ut: free xfrm server endpoints after use + + [ Simon Barber ] + * CRYPTO: Handle SA netlink messages straight from IKE control-plane + * CRYPTO: Pass a ptr to a container to rtnl_process_xfrm_sa + * CRYPTO: Add support for XFRM_GETSA stats service + * UT:CRYPTO: Add code to test request and process xfrm SA stats + * CRYPTO: Add SA expire xfrm message generation + * UT:CRYPTO: Handle xfrm direct SA expire messages + + [ Paul Atkins ] + + -- Paul Atkins Mon, 14 Dec 2020 14:35:45 +0000 + +vyatta-dataplane (3.12.11) unstable; urgency=medium + + [ Mandeep Rohilla ] + * IF EVT: Allow intf type specific context to be passed + * IF EVT: Add customised initialisation for different int type + + [ Simon Barber ] + * CRYPTO: Disable xfrm_direct path if xfrms received from vplaned + * CRYPTO: Pass xfrm_client_aux_data to rtnl_process_xfrm_sa + * CRYPTO: Add xfrm client flush and commits commands + * UT:CRYPTO: Add support to crypto for flush & commit cmd from xfrm_server + * UT:CRYPTO Test new flush command + + [ Mike Manning ] + * Provide config params for ARP aging time and maximum number of entries + * Check for ARP cache limit when maximum number of entries exceeded + * Modify GArp protobuf command from cmd_arp_cfg to garp + * Convert configuration command for ND to using protobufs + * Allow configuration of ARP aging time and maximum number of entries + * ARP dynamic local entries should not be removed due to kernel deletions + * Extend time to expiry only for locally created proxy entries + * Minor improvements to lladdr_update() for use by pipelined arp + + [ Paul Atkins ] + + -- Paul Atkins Thu, 10 Dec 2020 14:09:00 +0000 + +vyatta-dataplane (3.12.10) unstable; urgency=medium + + [ Paul Atkins ] + * 3.11.72 + + [ Charles (Chas) Williams ] + * api: clean up public LAG event API + * lag: handle checks against non-DPDK interfaces + + [ Srinivas Narayan ] + * Fix bug in bitmask parsing + + -- Srinivas Narayan Wed, 09 Dec 2020 13:31:08 +0000 + +vyatta-dataplane (3.12.9) unstable; urgency=medium + + [ Srinivas Narayan ] + * util: Add API to construct a bitmask from a stream of bytes + * protobuf: Add protobuf definition for feature affinity + * affinity: Add dummy command handler for feature affinity + * crypto: migrate crypto engine set command handling to protobuf + * crypto: Add protobuf handling for setting crypto-fwd cpus + * crypto: Emit debug message for forwarding cores being set + * crypto: Convert fwd queue to multi-consumer + + [ Robert Shearman ] + * nh_common: ignore path state updates for deleted interfaces + (Fixes: VRVDR-53541) + * include: add FAL_INVALID_VRF_ID define + + [ Gavin Shearer ] + * fal: add FAL attribute for configuring a colour aware policer + * fal: change COLOR to COLOUR in FAL API enums for consistency + * fal: change color to colour in fal_qos_map_params_t + * fal: use decimal instead of hex for for enum values + * fal: remove commas after and "end" or "max" enum value + + [ Charles (Chas) Williams ] + * event: expose if_link_change (Bugfix: VRVDR-53783) + + [ Paul Atkins ] + * bridge: move bridge_timer and related funcs to avoid prototype + * bridge: remove redundant return + * protobuf_util: malloc ipv6 addr data as a multiple of bytes + * mstp: replace assert with static_assert + * qos: replace assert with static_assert + * cgn_cmd_cfg: replace assert with static_assert + * cgn_sess_state: replace assert with static_assert + * ipv6_rsmbl: replace assert with static_assert + * cgn_sess2: replace assert with static_assert + * cgn_session: replace assert with static_assert + * npf_cache: replace assert with static_assert + * npf_state: replace assert with static_assert + * npf_rc: replace assert with static_assert + * npf_state_tcp: replace assert with static_assert + * npf_ptree: replace assert with static_assert + * npf_rproc: replace assert with static_assert + * l3_tcp_mss: remove unnecessary asserts + + [ Charles (Chas) Williams ] + * lag: handle first set of min links (Bugfix: VRVDR-53788) + + [ Nicholas Brown ] + * Jenkinsfile: expose libcheck test result to Jenkins + + [ Dewi Morgan ] + * mpls: payload_type needed for ip frag handling + + [ Paul Atkins ] + * dpdk_lag: make pointers that are not modified const + * iptun_common: make pointers that are not modified const + * lpm6: make pointers that are not modified const + * qos_hw: make pointers that are not modified const + * sfp: make pointers that are not modified const + * main: make pointers that are not modified const + * dpi: make pointers that are not modified const + * ip6_options: make pointers that are not modified const + * alg_rpc: make pointers that are not modified const + * cgn_policy: make pointers that are not modified const + * alg_apt: make pointers that are not modified const + * npf_nat64: make pointers that are not modified const + * npf_cidr_util: make pointers that are not modified const + * npf_rte_acl: make pointers that are not modified const + * npf_addrgrp: make pointers that are not modified const + * ut:dp_test_npf_addrgrp: make pointers that are not modified const + * ut:dp_test_npf_cgnat: make pointers that are not modified const + * session: make pointers that are not modified const + * ut:dp_test_lib_exp: make pointers that are not modified const + * ut:dp_test_lib_intf: make pointers that are not modified const + * ut:dp_test_pktmbuf_lib: make pointers that are not modified const + + [ Simon Barber ] + * CRYPTO: Retrieve and store the XFRM clients pull and push socket info + * CRYPTO:Add Dataplane xfrm client + * CRYPTO: Add hooks for xfrm_client init + * UT:CRYPTO Create xfrm server sockets + * UT:CRYPTO Convert xfrm netlink mesasges to use xfrm server + * UT:CRYPTO Set message sequence number in xfrm netlink hdr + * UT:CRYPTO Add xfrm_server ack receive processing + * UT:CRYPTO Check the number of xfrm acks received + * CRYPTO: Add support for XFRM ACKs when NPF policies are updated + * UT:CRYPTO Handle NACKs being returned for incomplete policies + * CRYPTO:Introduce batch updates from the server + * UT:CRYPTO: Add msg header to xfrm server messages + * UT:CRYPTO: Build xfrm UT messages in heap memory + * UT:CRYPTO: Add a public delete_sa_verify accessor + + [ Charles (Chas) Williams ] + * clang: remove redundant control flow + + [ Brian Russell ] + * gpc: add colour awareness to protobuf + + [ Paul Atkins ] + + -- Paul Atkins Tue, 08 Dec 2020 14:49:50 +0000 + +vyatta-dataplane (3.12.8) unstable; urgency=medium + + [ Paul Atkins ] + * 3.11.70 + + [ Charles (Chas) Williams ] + * api: add public api for LAG member FAL object ID (Bugfix: VRVDR-53723) + + [ Nicholas Brown ] + * UT: Add comment about split in test files + * Add support for DEB_BUILD_PROFILES="nocheck" + * Update CODEOWNERS for new meson.build files + + [ Sharmila Podury ] + * On reboot, disabled bond has members in u/u link state + + [ Paul Atkins ] + * pl_gen_fused: move print inside of debug check + * backplane: explicitly compare strcmp result + * dealer: explicitly compare strcmp result + * commands: explicitly compare strcmp result + * control: explicitly compare strcmp result + * switch: explicitly compare strcmp result + * fal_lag: explicitly compare strcmp result + * if: explicitly compare strcmp result + * mac_limit: explicitly compare strcmp result + * pd_show: explicitly compare strcmp result + * pktmbuf: explicitly compare strcmp result + * qos_sched: explicitly compare strcmp result + * route_broker: explicitly compare strcmp result + * rt_tracker: explicitly compare strcmp result + * storm_ctrl: explicitly compare strcmp result + * vlan_modify: explicitly compare strcmp result + * shadow: explicitly compare strcmp result + * nd6_nbr: explicitly compare strcmp result + * alg_ftp: explicitly compare strcmp result + * alg_tftp: explicitly compare strcmp result + * alg_sip: explicitly compare strcmp result + * sip_parse: explicitly compare strcmp result + * sip_translate: explicitly compare strcmp result + * cgn_cmd_cfg: explicitly compare strcmp result + * npf_cmd_cfg: explicitly compare strcmp result + * npf_zone_private: explicitly compare strcmp result + * l3_v4_route_lookup: explicitly compare strcmp result + * l3_v6_route_lookup: explicitly compare strcmp result + * capture: explicitly compare strcmp result + * ut:dp_test_mac_limit: explicitly compare strcmp result + * ut:dp_test_npf_nat_lib: explicitly compare strcmp result + * ut:dp_test_route_broker: explicitly compare strcmp result + * ut:dp_test_npf_portmap_lib: explicitly compare strcmp result + + [ Srinivas Narayan ] + * crypto: only emit non-zero counters + * crypto: Define APIs for post-processing core selection + * crypto: Allocate/free forwarding core for each SA + * crypto: allocate/free post-processing queues + * crypto: Fix bug in index handling + * crypto: redirect packets to forwarding cores + * crypto: Emit post crypto forwarding packet stats + * crypto: redirect packets in batches + * crypto: set up flag for presence of post-crypto workload + + [ Sharmila Podury ] + * Refactor code that checks if device is started + + [ Paul Atkins ] + * mpls_forward: don't use 'else' after an 'if' call returns + * alg: don't use 'else' after an 'if' call returns + * sip_parse: don't use 'else' after an 'if' call returns + * npf_auto_attach: don't use 'else' after an 'if' call returns + * cgn_session: don't use 'else' after an 'if' call returns + * npf_cidr_util.c: don't use 'else' after an 'if' call returns + * npf_addrgrp.c: don't use 'else' after an 'if' call returns + * npf_cache: don't use 'else' after an 'if' call returns + * npf_cmd_cfg: don't use 'else' after an 'if' call returns + * npf_instr: don't use 'else' after an 'if' call returns + * npf_unpack: don't use 'else' after an 'if' call returns + * npf_ruleset: don't use 'else' after an 'if' call returns + * ls_cross_connect_cmd: don't use 'else' after an 'if' call returns + * l2_ether_forward: don't use 'else' after an 'if' call returns + * l3_pbr: don't use 'else' after an 'if' call returns + * l3_v4_gre: don't use 'else' after an 'if' call returns + * l3_v4_ipsec: don't use 'else' after an 'if' call returns + * l3_v4_l2tpv3: don't use 'else' after an 'if' call returns + * l3_v4_out: don't use 'else' after an 'if' call returns + * l3_v4_post_route_lookup: don't use 'else' after an 'if' call returns + * l3_v4_udp: don't use 'else' after an 'if' call returns + * l3_v6_l4: don't use 'else' after an 'if' call returns + * l3_v6_out: don't use 'else' after an 'if' call returns + * l3_v6_port_route_lookup: don't use 'else' after an 'if' call returns + * l3_v6_udp: don't use 'else' after an 'if' call returns + * capture: don't use 'else' after an 'if' call returns + * ut:dp_test_npf_addrgroup: don't use 'else' after an 'if' call returns + * ut:dp_test: don't use 'else' after an 'if' call returns + * ut:dp_test_json_utils: don't use 'else' after an 'if' call returns + * ut:dp_test_lib_exp: don't use 'else' after an 'if' call returns + * ut:dp_test_lib: don't use 'else' after an 'if' call returns + * ut:dp_test_pktmbuf_lib: don't use 'else' after an 'if' call returns + * 3.11.71 + + [ Simon Barber ] + * CRYPTO: Tidy Static Analysis Warning + * CRYPTO: Remove the need for a second peer lookup during SA insertion + * CRYPTO:UT: Change SADB tests to us incrementing spi and req_id + * CRYPTO: Change SADB to use tunnel reqid instead of the dest addr + * Crypto: Rename SADB peer struct + * CRYPTO: Add peer dest address check into SA lookup + + [ Mandeep Rohilla ] + * IF API: Api to transmit an L2 frame out of an interface + * IF API: Api to get the ether address of an interface + + [ Charles (Chas) Williams ] + * clang: prevent zero length allocation + * clang: fix redundant expression + * clang: misplaced widening cast + * clang: fix possible loss of precision + * clang: fix uninitialized use of variable + * clang: fix misc-non-copyable-objects + * clang: fix promotion to double + * clang: remove explicit casting + * clang: fix signed versus unsigned comparisons + * clang: fix logical not usage + * clang: remove const qualifier in declaration + + [ Paul Atkins ] + + -- Paul Atkins Wed, 02 Dec 2020 11:24:52 +0000 + +vyatta-dataplane (3.12.7) unstable; urgency=medium + + [ Paul Atkins ] + * json_writer: use same parameter names in prototype and definition + * config: use same parameter names in prototype and definition + * event: use same parameter names in prototype and definition + * flow_cache: use same parameter names in prototype and definition + * fal_bfd: use same parameter names in prototype and definition + * fal: use same parameter names in prototype and definition + * bridge_port: use same parameter names in prototype and definition + * gre: use same parameter names in prototype and definition + * if: use same parameter names in prototype and definition + * in_cksum: use same parameter names in prototype and definition + * lag: use same parameter names in prototype and definition + * lpm6: use same parameter names in prototype and definition + * controller: use same parameter names in prototype and definition + * main: use same parameter names in prototype and definition + * qos: use same parameter names in prototype and definition + * route: use same parameter names in prototype and definition + * rt_tracker: use same parameter names in prototype and definition + * vrf: use same parameter names in prototype and definition + * pl_commands: use same parameter names in prototype and definition + * pl_node_boot: use same parameter names in prototype and definition + * crypto_engine: use same parameter names in prototype and definition + * crypto_pmd: use same parameter names in prototype and definition + * crypto_policy: use same parameter names in prototype and definition + * vti: use same parameter names in prototype and definition + * dpi: use same parameter names in prototype and definition + * in6: use same parameter names in prototype and definition + * ip6_icmp: use same parameter names in prototype and definition + * alg_npf: use same parameter names in prototype and definition + * app_group_dp.c: don't use 'else' after an 'if' call returns + * app_group_cmd.c: don't use 'else' after an 'if' call returns + * protobuf_util: don't use 'else' after an 'if' call returns + * commands: don't use 'else' after an 'if' call returns + * debug: don't use 'else' after an 'if' call returns + * config: don't use 'else' after an 'if' call returns + * cpp_rate_limiter: don't use 'else' after an 'if' call returns + * ecmp: don't use 'else' after an 'if' call returns + * fal: don't use 'else' after an 'if' call returns + * bridge: don't use 'else' after an 'if' call returns + * dpdk_eth_linkwatch: don't use 'else' after an 'if' call returns + * gre: don't use 'else' after an 'if' call returns + * vxlan: don't use 'else' after an 'if' call returns + * if: don't use 'else' after an 'if' call returns + * lag: don't use 'else' after an 'if' call returns + * l2tpeth_netlink: don't use 'else' after an 'if' call returns + * lpm: don't use 'else' after an 'if' call returns + * lpm6: don't use 'else' after an 'if' call returns + * ip_mroute: don't use 'else' after an 'if' call returns + * nh_common: don't use 'else' after an 'if' call returns + * netlink: don't use 'else' after an 'if' call returns + * qos_obj_db: don't use 'else' after an 'if' call returns + * qos_ext_buf_monitor: don't use 'else' after an 'if' call returns + * qos_dpdk: don't use 'else' after an 'if' call returns + * qos_sched: don't use 'else' after an 'if' call returns + * route: don't use 'else' after an 'if' call returns + * rt_commands: don't use 'else' after an 'if' call returns + * util: don't use 'else' after an 'if' call returns + * udp_handler: don't use 'else' after an 'if' call returns + * storm_ctl: don't use 'else' after an 'if' call returns + * crypto: don't use 'else' after an 'if' call returns + * crypto_sadb: don't use 'else' after an 'if' call returns + * vti: don't use 'else' after an 'if' call returns + * npf_appdb: don't use 'else' after an 'if' call returns + * npf_typedb: don't use 'else' after an 'if' call returns + * ip6_mroute: don't use 'else' after an 'if' call returns + * scope6: don't use 'else' after an 'if' call returns + * nd6_nbr: don't use 'else' after an 'if' call returns + * mpls: don't use 'else' after an 'if' call returns + * route_v6: don't use 'else' after an 'if' call returns + * mpls_forward: don't use 'else' after an 'if' call returns + + [ Robert Shearman ] + * storm_control: don't create duplicate policers when reacting to events + (Fixes: VRVDR-53623) + + [ Paul Atkins ] + * 3.11.69 + + [ Charles (Chas) Williams ] + * ut: dp_ifnet_iana_type can fail when unit testing + + [ Brian Russell ] + * gpc: add gpc protobuf + + [ Nicholas Brown ] + * Jenkinsfile: Build target is now the Halifax + + [ aroberts ] + * Update dependency on DPDK version for dataplane + + [ Paul Atkins ] + * Jenkinsfile: Build target is now master for master-next branch + * alg_npf: use same parameter names in prototype and definition + * alg: use same parameter names in prototype and definition + * cgn_mbuf: use same parameter names in prototype and definition + * cgn_sess_state: use same parameter names in prototype and definition + * npf_attach_point: use same parameter names in prototype and definition + * cgn_session: use same parameter names in prototype and definition + * pmf_parse: use same parameter names in prototype and definition + * npf_apm: use same parameter names in prototype and definition + * npf_instr: use same parameter names in prototype and definition + * npf_match: use same parameter names in prototype and definition + * npf_ruleset: use same parameter names in prototype and definition + * npf_session: use same parameter names in prototype and definition + * npf_state: use same parameter names in prototype and definition + * npf_zone_private: use same parameter names in prototype and definition + * npf_match: use same parameter names in prototype and definition + * npf_ext_action_grp: use same parameter names in prototype and definition + * npf_rproc: use same parameter names in prototype and definition + * session: use same parameter names in prototype and definition + * shadow: use same parameter names in prototype and definition + * ut:dp_test_cmd_check: use same parameter names in prototype and definition + * ut:dp_test_cmd_state: use same parameter names in prototype and definition + * ut:dp_test_crypto_utils: use same parameter names in prototype and definition + * ut:dp_test_json_utils: use same parameter names in prototype and definition + * ut:dp_test_lib: use same parameter names in prototype and definition + * ut:dp_test_lib_exp: use same parameter names in prototype and definition + * t:dp_test_libintf: use same parameter names in prototype and definition + * ut:npf_alg_sip_call: use same parameter names in prototype and definition + * ut:npf_alg_sip_lib: use same parameter names in prototype and definition + * ut:firewall_lib: use same parameter names in prototype and definition + * ut:dp_test_npf_lib: use same parameter names in prototype and definition + * ut:npf_nat_lib: use same parameter names in prototype and definition + * ut:dp_test_pktmbuf_lib: use same parameter names in prototype and definition + * ut:dp_test_qos_lib: use same parameter names in prototype and definition + * ut:dp_test_session_lib: use same parameter names in prototype and definition + * ut:fal_plugin_pm: use same parameter names in prototype and definition + * ut:fal_plugin_test: use same parameter names in prototype and definition + * ut:fal_plugin_qos: use same parameter names in prototype and definition + * ut:fal_plugin_ptp: use same parameter names in prototype and definition + + [ Ramkumar Ganapathysubramanian ] + * New FAL plugin to get L3 Interface attribute + * Dataplane changes for QoS DSCP remarking + + [ Paul Atkins ] + + -- Paul Atkins Wed, 25 Nov 2020 09:19:14 +0000 + +vyatta-dataplane (3.12.6) unstable; urgency=medium + + [ Paul Atkins ] + * 3.11.67 + + [ aroberts ] + * QOS: Don't drawback TC layer shape rate + + [ Nicholas Brown ] + * ut: remove unused CLI options + * ut: expose to testharness the core list dataplane_test passes to DPDK + * ut: remove cpu relate stuff from dummyfs used by dataplane_test + + [ Paul Atkins ] + * 3.11.68 + * bridge: increment counters for ucast/nucast on vlans for local traffic + + [ Ian Wilson ] + * Move next_arg utility function to util.c + * Rename print_pl_feats to pl_print_feats and move to pl_commands.c + * Add function to write json for an interface-based pipeline feature + * Add pipeline show feature commands to l3_v4_out and l3_v6_out + * Add pipeline show feature commands to l3_v4_out_spath and l3_v6_out_spath + * Add pipeline show feature commands to l3_v4_encap and l3_v6_encap + * Add pipeline show feature commands to l3_v4_val and l3_v6_val + * Add pipeline show feature commands to l2_consume, l2_ether_lookup and l2_local + * Add pipeline show feature commands to l3_v4_route_lookup and l3_v6_route_lookup + * Add pipeline show feature commands to global nodes - drop, l4, and udp_in + * ut: Tests local ICMP pkts with egress ACLs and the originate fw + * Only pass an output interface pointer to icmp_send_no_route + * Change ICMP pkts to use ipv4-out-spath pipeline to filter pkts + + [ Robert Shearman ] + * storm_control: don't create duplicate policers when reacting to events + (Fixes: VRVDR-53623) + + [ Paul Atkins ] + + -- Paul Atkins Fri, 20 Nov 2020 12:00:03 +0000 + +vyatta-dataplane (3.12.5) unstable; urgency=medium + + [ Thomas Kiely ] + * Originated traffic via l2tpv3/ipsec is dropped + * Remove function which is no longer used + + [ Nicholas Brown ] + * Install Go protobuf files in correct location + + [ Charles (Chas) Williams ] + * ptp: allow ports to not exist (yet) (Bugfix: VRVDR-53511) + * ptp: refactor ptp_find_peer (Bugfix: VRVDR-53511) + + [ Paul Atkins ] + * 3.11.65 + + [ Nicholas Brown ] + * Removed 'unused-override' warnings + * Static Analysis as last Jenkins step + * Allow longer timeout for the slow tests + * Update docs for meson instead of make + + [ Charles (Chas) Williams ] + * ptp: rework the peer select logic (Bugfix: VRVDR-53538) + * ptp: refactor cmd_ptp_op (Bugfix: VRVDR-53538) + * ptp: add resolver op mode commands (Bugfix: VRVDR-53538) + * ptp: basic resolver unit test (Bugfix: VRVDR-53538) + * ptp: routed resolver unit test (Bugfix: VRVDR-53538) + * ptp: test edge cases in resolver (Bugfix: VRVDR-53538) + * ptp: ensure sufficient buffer space (Bugfix: VRVDR-53538) + + [ Paul Atkins ] + * bpf_filter: add parentheses around macro arguments + * lpm: add parentheses around macro arguments + * portmonitor_cmds: don't use a macro to determine if a sess is erspan + * dpi: add parentheses around macro arguments + * ip6_icmp: add parentheses around macro arguments + * alg_rpc: add parentheses around macro arguments + * npf_cidr_util: add parentheses around macro arguments + * npf_instr: add parentheses around macro arguments + * npf_nat: add parentheses around macro arguments + * ut:arp: add parentheses around macro arguments + * ut:crypto: add parentheses around macro arguments + * ut:ip6: add parentheses around macro arguments + * ut:mstp_fwd: add parentheses around macro arguments + * ut:npf: add parentheses around macro arguments + * ut:fal_plugin_framer: add parentheses around macro arguments + * ut:fal_plugin_test: add parentheses around macro arguments + + [ Nicholas Brown ] + * Don't run clang-tidy on generated protobuf files + * Add missing dependency on generated files + * Increase UT timeout + + [ Paul Atkins ] + * if: rename cmd_pause_show to show_eth_info + * if: move the showing of the pause state into its own function + * fal: add api to allow dumping of L2 port state + * fal_plugin: add a new fal plugin API to dump bfd session state + + [ Daniel Gollub ] + * l2tpeth: restore VLAN functionatliy on L2TPv3 interfaces + + [ Paul Atkins ] + * 3.11.66 + + [ Alan Dewar ] + * QOS: period keyword changed to microseconds units (Fixes: VRVDR-53324) + + [ Nicholas Brown ] + * Add more details on building the UT + + [ Robert Shearman ] + * storm_ctl: add support for configuring on VLAN subinterfaces + (Closes: VRVDR-53561) + + [ Charles (Chas) Williams ] + * lag: add protobuf (Bugfix: VRVDR-52496) + * lag: add min links support (Bugfix: VRVDR-52496) + * lag: add events (Bugfix: VRVDR-52496) + * event: expose if_create/if_delete events (Bugfix: VRVDR-52496) + * lag: add IANA type (Bugfix: VRVDR-52496) + + [ Ethan Li ] + * Define Micro BFD FAL attribute + + [ Gavin Shearer ] + * gpc: add a Provides to ensure FAL builds against correct dataplane + * fal qos: Add attributes to attach QoS IPv4/IPv6 GPC to i/f + * fal policer: add support for trTCM policers + * fal gpc: add actions to set designation, colour, and policer + + [ Paul Atkins ] + + -- Paul Atkins Fri, 20 Nov 2020 08:57:30 +0000 + +vyatta-dataplane (3.12.4) unstable; urgency=medium + + [ Charles (Chas) Williams ] + * ptp: avoid dereferencing bad interface pointers (Bugfix: VRVDR-53517) + + [ Paul Atkins ] + * 3.11.60 + + [ Robert Shearman ] + * nh_common: fix hash key during nexthop hash del & add + * route_flags: ignore RTF_UNUSABLE for nexthop comparison (Fixes: VRVDR-53512) + + [ Derek Fawcus ] + * NPF: Remove some stale code, to allow flexibility + * pipeline: use 'out node' for IPv4 mcast ethernet + * pipeline: use 'out node' for IPv6 mcast ethernet + * multicast: Clean up punt handling for unsup tuns + * multicast: Use 'out node' for IPv4/IPv6 VTI output + * multicast: Use 'out node' for IPv4/IPv6 P2P GRE tx + * multicast: Move TTL decrement during forward + * multicast: Move OIL replication counts + * multicast: Rename an interface variable + * multicast: Do not replicate to a DOWN interface + * multicast: Move pipeline walk logic + + [ Ian Wilson ] + * ut: Test egress ACLs with IPv4 and IPv6 multicast forwarding + + [ Paul Atkins ] + * 3.11.61 + + [ Nicholas Brown ] + * platform.conf can be static + + [ Paul Atkins ] + * 3.11.62 + + [ Ian Wilson ] + * ut: Export functions to build and tear-down a GRE tunnel + * ut: Test sw egress ACLs with GRE tunnels + + [ Robert Shearman ] + * ecmp: remove unused "max-paths" option (Fixes: VRVDR-52393) + + [ Paul Atkins ] + * 3.11.63 + + [ Ian Wilson ] + * ut: Add function to wait for multicast route + + [ Paul Atkins ] + * 3.11.64 + + [ Mike Manning ] + * Dynamic entries in ND cache should not be deleted unless stale + + [ Nicholas Brown ] + * Meson build support (Closes: VRVDR-52941) + * Change hard-coded UT paths + * Add clang-tidy Static Analysis support to Jenkinsfile + * Remove autotools build support + * Define individual meson tests for each CK test suite + + [ Paul Atkins ] + + -- Paul Atkins Mon, 09 Nov 2020 13:31:07 +0000 + +vyatta-dataplane (3.12.3) unstable; urgency=medium + + [ Nicholas Brown ] + * Clarify UT path redirection code + + [ Ian Wilson ] + * npf: Rename 'typedef enum TCP_STATES' to 'enum tcp_session state' + * npf: Rename npf_tcpstate_t to struct npf_tcp_window + * npf: Replace nst_tcpst with nst_tcp_win in npf_state_t + * npf: Pack enum dp_session_state so that is used 1 byte instead of 2 + * npf: Add npf_state_tcp2gen to determine generic state from tcp state + * npf: Pack enum npf_proto_idx. Use the enum instead of a uint8_t + * npf: Add separate log functions for TCP and everything else + * npf: Add separate state change functions for TCP and everything else + * npf: Add separate state pack update functions for TCP and other + * npf: Add separate functions for connsync update/restore of session state + * npf: Move spinlocks into npf_state_npf_pack_update_xxx fns + * npf: Use separate state inspect functions for TCP, ICMP, and other + * npf: Use separate connsync state update functions for TCP and other + * npf: Add a union to npf_state_t for the different state types + * npf: Add separate function for session close for TCP and other + * npf: Rename npf_state_tcp_state_set and npf_state_generic_state_set + * npf: Remove TCP session states that were outside the enum + * npf: Change TCP sessions to use nst_tcp_state instead of nst_state + * npf: Change TCP strict FSM to be a lookup table to return a boolean + * npf: npf_state_update_session_state replaced + * npf: npf_timeout_get replaced + * npf: Session state stats defines replaced with inline functions + * npf: npf_map_str_to_generic_state replaced with dp_session_name2state + * npf: vrf creation moved from npf_timeout_set to cmd_npf_global_timeout + * npf: non-TCP sessions changed to use nst_gen_state instead of nst_state + * npf: union of nst_tcp_state and nst_gen_state removed from npf_state_t + * npf: Changes to sess limit rproc to use generic session state + * npf: Defines NPF_SET_SESSION_LOG_FLAG etc. replaced with functions + * npf: Renamed functions that return session state name + * npf: npf_pack_session_state changed to include union of state types + * npf: 'struct session' pointer passed into npf_session_update_state + * npf: npf_session_t pointer passed into npf_state_inspect + * npf: npf_tcp_state_is_closed fn removed + * npf: Changes to connsync functions to pack and update session state + + [ Nachiketa Prachanda ] + * fix use after free on events unregistration + * api: interface event notifications to plugins + + [ Paul Atkins ] + * 3.11.44 + * portmonitor_cmds: remove the code to handle out of order cfg + + [ Charles (Chas) Williams ] + * ptp: handle unavailable interface vlan mappings (Bugfix: VRVDR-53372) + + [ Paul Atkins ] + * 3.11.45 + + [ Gavin Shearer ] + * fal acl: use rule number attr instead of priority in rules + + [ Paul Atkins ] + * 3.11.46 + + [ Srinivas Narayan ] + * Include shadow ring in buffer count calculations + * Increase slowpath receive ring size + + [ Paul Atkins ] + * 3.11.46 + + [ Srinivas Narayan ] + * crypto: store out_ethertype in context + * crypto: store out_hdr_len in context + * crypto: re-order fields in crypto_pkt_ctx + * crypto: store udp encap length in SA to simplify code + * crypto: include DPDK driver stats in output + * crypto: Pass packet burst to ESP + * crypto: Move bad mbufs to end of context array + * crypto: Pass a batch of packets to the PMD + * crypto: Allocate crypto ops when crypto pkt buffer is set up + * crypto: pass batches of packets to ESP encrypt functions + * crypto: Pass batches of packets to ESP decrypt functions + * crypto: Remove debug error messages in some places + * crypto: pass errors from PMD operations to higher layers + * crypto: remove unnecessary check & error + * crypto: Add error counters for failures + * crypto: Set action to drop if packet has not been processed + * crypto: Prefetch batches of context pointers for processing + * crypto: Fetch data in first mbuf into L2 cache + * crypto: Prefetch contexts into L2 cache + * crypto: add inline implementation to grow buffer + * crypto: Invoke rte_pktmbuf_lastseg only for multi-seg pkts + + [ Paul Atkins ] + * 3.11.47 + + [ Ian Wilson ] + * ut: Verify that ipv4-orig-feat feature is enabled + * npf: NPF_RS_FLAG_FEAT_GBL renamed to NPF_RS_FLAG_FEAT_INTF_ALL + * npf: Move check of NPF_RS_FLAG_FEAT_INTF_ALL from npf_gbl_rs_count_incr + * npf: Enable feature on all interfaces for rulesets attached to global + * npf: Separate enabling of defrag-out and defrag-out-spath + + [ Paul Atkins ] + * 3.11.48 + + [ Ian Wilson ] + * ut: Tests egress ACL on a pppoe interface + * ut: Tests egress ACL on a bridge interface + + [ Paul Atkins ] + * 3.11.49 + + [ Nicholas Brown ] + * Move IGNORE_SANITIZER to compiler.h + + [ Paul Atkins ] + * 3.11.50 + + [ Thomas Kiely ] + * Update S2S UT infra for multiple V4 policies + * Add V4 test to ensure correct policy match for proto + * ACL V4 rule setup does not handle discrete protocol + * Update S2S UT infra for multiple V6 policies + * Add V6 test to ensure correct policy match for proto + * ACL V6 rule setup does not handle discrete protocol + + [ Paul Atkins ] + * 3.11.51 + + [ Srinivas Narayan ] + * crypto: save bytes from each batch for use as IVs + * crypto: add return value check for engine init + * crypto: remove unused functions + + [ Paul Atkins ] + * 3.11.52 + * lpm: change lpm walker to take a struct of params + * lpm: allow the callers of lpm_walk to kick the trackers for a rule + * route: call the route trackers after updating the fal l3 state + * lpm6: change lpm6 walker to take a struct of params + * lpm6: allow the callers of lpm6_walk to kick the trackers for a rule + * route6: call the route trackers after updating the fal l3 state + * 3.11.53 + + [ Ian Wilson ] + * ut: Add functions to enable multicast fwding and to add multicast route + * ut: Add test for multicast forwarding in the dataplane + + [ Paul Atkins ] + * 3.11.54 + + [ Srinivas Narayan ] + * crypto: tune op and session pool sizes + + [ Paul Atkins ] + * 3.11.55 + + [ Srinivas Narayan ] + * crypto: prefetch fields in ctx in encrypt path + * crypto: Include current index in prefetch + * crypto: prefetch IVs to be used in encryption + * crypto: prefetch ops + + [ Paul Atkins ] + * 3.11.56 + + [ Ian Wilson ] + * ut: Add test for IPv6 multicast forwarding in the dataplane + + [ Paul Atkins ] + * 3.11.57 + + [ Derek Fawcus ] + * NPF: Rename npc_next_proto to npc_proto_final + * NPF: Rename 'PROTO' opcode to 'PROTO_FINAL' + * NPF: Add matching against IP header protocol field + * NPF: Report proto-final in rule output + + [ Nicholas Brown ] + * Depend on librte-meta-allpmds + + [ Paul Atkins ] + * 3.11.58 + + [ Ian Wilson ] + * npf: Local traffic, including IPv6 ND/NA, may be dropped by zone fw + * ut: Test IPv6 nbr egress in the presence of zones firewall + + [ Paul Atkins ] + * 3.11.59 + + -- Paul Atkins Tue, 03 Nov 2020 11:51:01 +0000 + +vyatta-dataplane (3.12.2) unstable; urgency=medium + + [ Nicholas Brown ] + * Remove no_extra_tests DEB_BUILD_OPTIONS + * Add debian packaging directory to .gitignore + * More specific include path libvyattafal pkgconfig + + [ Charles (Chas) Williams ] + * ptp: refactor into ptp_peer_dst_lookup (Bugfix: VRVDR-53302) + * ptp: refactor into ptp_peer_dst_resolve (Bugfix: VRVDR-53302) + * ptp: group peers by IP address (Bugfix: VRVDR-53302) + * ptp: refactor into ptp_peer_find_nexthop (Bugfix: VRVDR-53302) + * ptp: select best route for peer (Bugfix: VRVDR-53302) + + [ Paul Atkins ] + * 3.11.40 + * vhost: remove the code to handle out of order cfg for vhost + * 3.11.41 + + [ Ian Wilson ] + * npf: Check ingress and egress features when deciding to return ACL stats + * acl: Egress ACL in s/w path will not match dp originated IPv6 ND traffic + * ipv6_originate_filter made static + + [ Paul Atkins ] + * 3.11.42 + + [ Charles (Chas) Williams ] + * ptp: get sibling ifp and nexthop during iteration (Bugfix: VRVDR-53302) + * ptp: prefer peers with reachability (Bugfix: VRVDR-53302) + + [ Paul Atkins ] + * commands: remove the code to handle out of order cfg for poe + * 3.11.43 + + -- Paul Atkins Wed, 14 Oct 2020 10:02:17 +0100 + +vyatta-dataplane (3.12.1) unstable; urgency=medium + + [ Robert Shearman ] + * protobuf: add Path message fields for recursive labels + + [ Paul Atkins ] + + -- Paul Atkins Mon, 12 Oct 2020 09:20:52 +0100 + +vyatta-dataplane (3.11.72) unstable; urgency=medium + + [ Robert Shearman ] + * nh_common: ignore path state updates for deleted interfaces + (Fixes: VRVDR-53541) + + [ Paul Atkins ] + + -- Paul Atkins Mon, 07 Dec 2020 08:29:00 +0000 + +vyatta-dataplane (3.11.71) unstable; urgency=medium + + [ Sharmila Podury ] + * On reboot, disabled bond has members in u/u link state + * Refactor code that checks if device is started + + [ Paul Atkins ] + + -- Paul Atkins Tue, 01 Dec 2020 13:50:58 +0000 + +vyatta-dataplane (3.11.70) unstable; urgency=medium + + [ Nicholas Brown ] + * Jenkinsfile: Build target is now the Halifax + + [ aroberts ] + * Update dependency on DPDK version for dataplane + + [ Paul Atkins ] + + -- Paul Atkins Tue, 24 Nov 2020 09:53:18 +0000 + +vyatta-dataplane (3.11.69) unstable; urgency=medium + + [ Robert Shearman ] + * storm_control: don't create duplicate policers when reacting to events + (Fixes: VRVDR-53623) + + [ Paul Atkins ] + + -- Paul Atkins Mon, 23 Nov 2020 08:22:52 +0000 + +vyatta-dataplane (3.11.68) unstable; urgency=medium + + [ aroberts ] + * QOS: Don't drawback TC layer shape rate + + [ Paul Atkins ] + + -- Paul Atkins Fri, 20 Nov 2020 09:08:32 +0000 + +vyatta-dataplane (3.11.67) unstable; urgency=medium + + [ Alan Dewar ] + * QOS: period keyword changed to microseconds units (Fixes: VRVDR-53324) + + [ Paul Atkins ] + + -- Paul Atkins Tue, 17 Nov 2020 10:16:43 +0000 + +vyatta-dataplane (3.11.66) unstable; urgency=medium + + [ Daniel Gollub ] + * l2tpeth: restore VLAN functionatliy on L2TPv3 interfaces + + [ Paul Atkins ] + + -- Paul Atkins Tue, 17 Nov 2020 09:20:52 +0000 + +vyatta-dataplane (3.11.65) unstable; urgency=medium + + [ Thomas Kiely ] + * Originated traffic via l2tpv3/ipsec is dropped + * Remove function which is no longer used + + [ Charles (Chas) Williams ] + * ptp: allow ports to not exist (yet) (Bugfix: VRVDR-53511) + * ptp: refactor ptp_find_peer (Bugfix: VRVDR-53511) + + [ Paul Atkins ] + + -- Paul Atkins Tue, 10 Nov 2020 09:03:48 +0000 + +vyatta-dataplane (3.11.64) unstable; urgency=medium + + [ Ian Wilson ] + * ut: Add function to wait for multicast route + + [ Paul Atkins ] + + -- Paul Atkins Thu, 05 Nov 2020 13:56:12 +0000 + +vyatta-dataplane (3.11.63) unstable; urgency=medium + + [ Ian Wilson ] + * ut: Export functions to build and tear-down a GRE tunnel + * ut: Test sw egress ACLs with GRE tunnels + + [ Paul Atkins ] + + -- Paul Atkins Thu, 05 Nov 2020 13:29:08 +0000 + +vyatta-dataplane (3.11.62) unstable; urgency=medium + + [ Nicholas Brown ] + * platform.conf can be static + + [ Paul Atkins ] + + -- Paul Atkins Wed, 04 Nov 2020 12:39:24 +0000 + +vyatta-dataplane (3.11.61) unstable; urgency=medium + + [ Robert Shearman ] + * nh_common: fix hash key during nexthop hash del & add + * route_flags: ignore RTF_UNUSABLE for nexthop comparison (Fixes: VRVDR-53512) + + [ Derek Fawcus ] + * NPF: Remove some stale code, to allow flexibility + * pipeline: use 'out node' for IPv4 mcast ethernet + * pipeline: use 'out node' for IPv6 mcast ethernet + * multicast: Clean up punt handling for unsup tuns + * multicast: Use 'out node' for IPv4/IPv6 VTI output + * multicast: Use 'out node' for IPv4/IPv6 P2P GRE tx + * multicast: Move TTL decrement during forward + * multicast: Move OIL replication counts + * multicast: Rename an interface variable + * multicast: Do not replicate to a DOWN interface + * multicast: Move pipeline walk logic + + [ Ian Wilson ] + * ut: Test egress ACLs with IPv4 and IPv6 multicast forwarding + + [ Paul Atkins ] + + -- Paul Atkins Wed, 04 Nov 2020 12:37:28 +0000 + +vyatta-dataplane (3.11.60) unstable; urgency=medium + + [ Charles (Chas) Williams ] + * ptp: avoid dereferencing bad interface pointers (Bugfix: VRVDR-53517) + + [ Paul Atkins ] + + -- Paul Atkins Tue, 03 Nov 2020 13:00:25 +0000 + +vyatta-dataplane (3.11.59) unstable; urgency=medium + + [ Ian Wilson ] + * npf: Local traffic, including IPv6 ND/NA, may be dropped by zone fw + * ut: Test IPv6 nbr egress in the presence of zones firewall + + [ Paul Atkins ] + + -- Paul Atkins Tue, 03 Nov 2020 09:06:14 +0000 + +vyatta-dataplane (3.11.58) unstable; urgency=medium + + [ Derek Fawcus ] + * NPF: Rename npc_next_proto to npc_proto_final + * NPF: Rename 'PROTO' opcode to 'PROTO_FINAL' + * NPF: Add matching against IP header protocol field + * NPF: Report proto-final in rule output + + [ Nicholas Brown ] + * Depend on librte-meta-allpmds + + [ Paul Atkins ] + + -- Paul Atkins Mon, 02 Nov 2020 15:41:08 +0000 + +vyatta-dataplane (3.11.57) unstable; urgency=medium + + [ Ian Wilson ] + * ut: Add test for IPv6 multicast forwarding in the dataplane + + [ Paul Atkins ] + + -- Paul Atkins Thu, 29 Oct 2020 08:54:47 +0000 + +vyatta-dataplane (3.11.56) unstable; urgency=medium + + [ Srinivas Narayan ] + * crypto: prefetch fields in ctx in encrypt path + * crypto: Include current index in prefetch + * crypto: prefetch IVs to be used in encryption + * crypto: prefetch ops + + [ Paul Atkins ] + + -- Paul Atkins Wed, 28 Oct 2020 11:51:25 +0000 + +vyatta-dataplane (3.11.55) unstable; urgency=medium + + [ Srinivas Narayan ] + * crypto: tune op and session pool sizes + + [ Paul Atkins ] + + -- Paul Atkins Wed, 28 Oct 2020 08:11:57 +0000 + +vyatta-dataplane (3.11.54) unstable; urgency=medium + + [ Ian Wilson ] + * ut: Add functions to enable multicast fwding and to add multicast route + * ut: Add test for multicast forwarding in the dataplane + + [ Paul Atkins ] + + -- Paul Atkins Tue, 27 Oct 2020 08:35:24 +0000 + +vyatta-dataplane (3.11.53) unstable; urgency=medium + + * lpm: change lpm walker to take a struct of params + * lpm: allow the callers of lpm_walk to kick the trackers for a rule + * route: call the route trackers after updating the fal l3 state + * lpm6: change lpm6 walker to take a struct of params + * lpm6: allow the callers of lpm6_walk to kick the trackers for a rule + * route6: call the route trackers after updating the fal l3 state + + -- Paul Atkins Mon, 26 Oct 2020 13:21:18 +0000 + +vyatta-dataplane (3.11.52) unstable; urgency=medium + + [ Srinivas Narayan ] + * crypto: save bytes from each batch for use as IVs + * crypto: add return value check for engine init + * crypto: remove unused functions + + [ Paul Atkins ] + + -- Paul Atkins Mon, 26 Oct 2020 09:45:57 +0000 + +vyatta-dataplane (3.11.51) unstable; urgency=medium + + [ Thomas Kiely ] + * Update S2S UT infra for multiple V4 policies + * Add V4 test to ensure correct policy match for proto + * ACL V4 rule setup does not handle discrete protocol + * Update S2S UT infra for multiple V6 policies + * Add V6 test to ensure correct policy match for proto + * ACL V6 rule setup does not handle discrete protocol + + [ Paul Atkins ] + + -- Paul Atkins Fri, 23 Oct 2020 10:09:01 +0100 + +vyatta-dataplane (3.11.50) unstable; urgency=medium + + [ Nicholas Brown ] + * Move IGNORE_SANITIZER to compiler.h + + [ Paul Atkins ] + + -- Paul Atkins Fri, 23 Oct 2020 09:05:58 +0100 + +vyatta-dataplane (3.11.49) unstable; urgency=medium + + [ Ian Wilson ] + * ut: Tests egress ACL on a pppoe interface + * ut: Tests egress ACL on a bridge interface + + [ Paul Atkins ] + + -- Paul Atkins Thu, 22 Oct 2020 08:18:20 +0100 + +vyatta-dataplane (3.11.48) unstable; urgency=medium + + [ Ian Wilson ] + * ut: Verify that ipv4-orig-feat feature is enabled + * npf: NPF_RS_FLAG_FEAT_GBL renamed to NPF_RS_FLAG_FEAT_INTF_ALL + * npf: Move check of NPF_RS_FLAG_FEAT_INTF_ALL from npf_gbl_rs_count_incr + * npf: Enable feature on all interfaces for rulesets attached to global + * npf: Separate enabling of defrag-out and defrag-out-spath + + [ Paul Atkins ] + + -- Paul Atkins Wed, 21 Oct 2020 16:34:35 +0100 + +vyatta-dataplane (3.11.47) unstable; urgency=medium + + [ Srinivas Narayan ] + * crypto: store out_ethertype in context + * crypto: store out_hdr_len in context + * crypto: re-order fields in crypto_pkt_ctx + * crypto: store udp encap length in SA to simplify code + * crypto: include DPDK driver stats in output + * crypto: Pass packet burst to ESP + * crypto: Move bad mbufs to end of context array + * crypto: Pass a batch of packets to the PMD + * crypto: Allocate crypto ops when crypto pkt buffer is set up + * crypto: pass batches of packets to ESP encrypt functions + * crypto: Pass batches of packets to ESP decrypt functions + * crypto: Remove debug error messages in some places + * crypto: pass errors from PMD operations to higher layers + * crypto: remove unnecessary check & error + * crypto: Add error counters for failures + * crypto: Set action to drop if packet has not been processed + * crypto: Prefetch batches of context pointers for processing + * crypto: Fetch data in first mbuf into L2 cache + * crypto: Prefetch contexts into L2 cache + * crypto: add inline implementation to grow buffer + * crypto: Invoke rte_pktmbuf_lastseg only for multi-seg pkts + + [ Paul Atkins ] + + -- Paul Atkins Wed, 21 Oct 2020 09:14:31 +0100 + +vyatta-dataplane (3.11.46) unstable; urgency=medium + + [ Gavin Shearer ] + * fal acl: use rule number attr instead of priority in rules + + [ Paul Atkins ] + * Include shadow ring in buffer count calculations + * Increase slowpath receive ring size + + [ Paul Atkins ] + + -- Paul Atkins Tue, 20 Oct 2020 10:07:06 +0100 + +vyatta-dataplane (3.11.45) unstable; urgency=medium + + [ Paul Atkins ] + * portmonitor_cmds: remove the code to handle out of order cfg + + [ Charles (Chas) Williams ] + * ptp: handle unavailable interface vlan mappings (Bugfix: VRVDR-53372) + + [ Paul Atkins ] + + -- Paul Atkins Fri, 16 Oct 2020 09:52:15 +0100 + +vyatta-dataplane (3.11.44) unstable; urgency=medium + + [ Nicholas Brown ] + * Clarify UT path redirection code + + [ Ian Wilson ] + * npf: Rename 'typedef enum TCP_STATES' to 'enum tcp_session state' + * npf: Rename npf_tcpstate_t to struct npf_tcp_window + * npf: Replace nst_tcpst with nst_tcp_win in npf_state_t + * npf: Pack enum dp_session_state so that is used 1 byte instead of 2 + * npf: Add npf_state_tcp2gen to determine generic state from tcp state + * npf: Pack enum npf_proto_idx. Use the enum instead of a uint8_t + * npf: Add separate log functions for TCP and everything else + * npf: Add separate state change functions for TCP and everything else + * npf: Add separate state pack update functions for TCP and other + * npf: Add separate functions for connsync update/restore of session state + * npf: Move spinlocks into npf_state_npf_pack_update_xxx fns + * npf: Use separate state inspect functions for TCP, ICMP, and other + * npf: Use separate connsync state update functions for TCP and other + * npf: Add a union to npf_state_t for the different state types + * npf: Add separate function for session close for TCP and other + * npf: Rename npf_state_tcp_state_set and npf_state_generic_state_set + * npf: Remove TCP session states that were outside the enum + * npf: Change TCP sessions to use nst_tcp_state instead of nst_state + * npf: Change TCP strict FSM to be a lookup table to return a boolean + * npf: npf_state_update_session_state replaced + * npf: npf_timeout_get replaced + * npf: Session state stats defines replaced with inline functions + * npf: npf_map_str_to_generic_state replaced with dp_session_name2state + * npf: vrf creation moved from npf_timeout_set to cmd_npf_global_timeout + * npf: non-TCP sessions changed to use nst_gen_state instead of nst_state + * npf: union of nst_tcp_state and nst_gen_state removed from npf_state_t + * npf: Changes to sess limit rproc to use generic session state + * npf: Defines NPF_SET_SESSION_LOG_FLAG etc. replaced with functions + * npf: Renamed functions that return session state name + * npf: npf_pack_session_state changed to include union of state types + * npf: 'struct session' pointer passed into npf_session_update_state + * npf: npf_session_t pointer passed into npf_state_inspect + * npf: npf_tcp_state_is_closed fn removed + * npf: Changes to connsync functions to pack and update session state + + [ Nachiketa Prachanda ] + * fix use after free on events unregistration + * api: interface event notifications to plugins + + [ Paul Atkins ] + + -- Paul Atkins Wed, 14 Oct 2020 15:50:34 +0100 + +vyatta-dataplane (3.11.43) unstable; urgency=medium + + [ Charles (Chas) Williams ] + * ptp: get sibling ifp and nexthop during iteration (Bugfix: VRVDR-53302) + * ptp: prefer peers with reachability (Bugfix: VRVDR-53302) + + [ Paul Atkins ] + * commands: remove the code to handle out of order cfg for poe + + -- Paul Atkins Wed, 14 Oct 2020 08:31:05 +0100 + +vyatta-dataplane (3.11.42) unstable; urgency=medium + + [ Ian Wilson ] + * npf: Check ingress and egress features when deciding to return ACL stats + * acl: Egress ACL in s/w path will not match dp originated IPv6 ND traffic + * ipv6_originate_filter made static + + [ Paul Atkins ] + + -- Paul Atkins Tue, 13 Oct 2020 14:12:35 +0100 + +vyatta-dataplane (3.11.41) unstable; urgency=medium + + * vhost: remove the code to handle out of order cfg for vhost + + -- Paul Atkins Tue, 13 Oct 2020 10:34:16 +0100 + +vyatta-dataplane (3.11.40) unstable; urgency=medium + + [ Nicholas Brown ] + * Remove no_extra_tests DEB_BUILD_OPTIONS + * Add debian packaging directory to .gitignore + * More specific include path libvyattafal pkgconfig + + [ Charles (Chas) Williams ] + * ptp: refactor into ptp_peer_dst_lookup (Bugfix: VRVDR-53302) + * ptp: refactor into ptp_peer_dst_resolve (Bugfix: VRVDR-53302) + * ptp: group peers by IP address (Bugfix: VRVDR-53302) + * ptp: refactor into ptp_peer_find_nexthop (Bugfix: VRVDR-53302) + * ptp: select best route for peer (Bugfix: VRVDR-53302) + + [ Paul Atkins ] + + -- Paul Atkins Tue, 13 Oct 2020 09:56:41 +0100 + +vyatta-dataplane (3.11.39) unstable; urgency=medium + + [ Ramesh Devarajan ] + * capture: Print dropped frames count + + [ Paul Atkins ] + * capture: compare fal obj against FAL_NULL_OBJECT_ID instead of NULL + + -- Paul Atkins Thu, 08 Oct 2020 11:30:41 +0100 + +vyatta-dataplane (3.11.38) unstable; urgency=medium + + [ Charles (Chas) Williams ] + * dpdk: add ConnectX-6 support to vplane-uio + + [ Paul Atkins ] + * backplane: add comment about why command replay is needed + * backplane: check that the FILE ptr is set in the command handler + * rt_commands: remove the code to handle out of order cfg for garp + * commands: remove the code to handle out of order cfg for switchport + + -- Paul Atkins Wed, 07 Oct 2020 08:54:45 +0100 + +vyatta-dataplane (3.11.37) unstable; urgency=medium + + [ Nicholas Brown ] + * Update exported symbols from fal test plugin + * Use visibility attribute for symbols + * Use __FOR_EXPORT instead of __externally_visible + + [ Robert Shearman ] + * mpls: fix crash when displaying PD subset data + * pd_show: allow showing objects in state full as well + * mpls: fix output of PD subset state (Fixes: VRVDR-53208) + + [ Ian Wilson ] + * ut: Add test for sw acl with fragmented IPv4 pkt on output + * ut: Add test for sw acl with fragmented IPv6 pkt on output + * npf: Set address family in ruleset when grouper is not initialized + + [ Paul Atkins ] + + -- Paul Atkins Fri, 02 Oct 2020 14:54:03 +0100 + +vyatta-dataplane (3.11.36) unstable; urgency=medium + + [ Ian Wilson ] + * ut: dpt_udp and dpt_tcp enhanced to support IPv6 + * ut: Basic ingress software ACL tests enhanced to include TCP and UDP + * ut: Add egress software ACL tests + * ut: Add tests for sw ACLs on the ip_lookup_and_originate output path + * ut: Add tests for sw ACLs for pkts originated from the router (spath) + * Create ipv4-out-spath pipeline node + * Create ipv6-out-spath pipeline node + + [ Paul Atkins ] + + -- Paul Atkins Thu, 01 Oct 2020 08:34:29 +0100 + +vyatta-dataplane (3.11.35) unstable; urgency=medium + + * Revert "dp_test: remove undefined functions from headers" + * Revert "ut: remove unused code in dp_test_npf_sess_lib" + + -- Paul Atkins Tue, 29 Sep 2020 11:28:22 +0100 + +vyatta-dataplane (3.11.34) unstable; urgency=medium + + [ Nicholas Brown ] + * Treat ndpi dependency like other dependencies + * Use pkgconfig macro variables for protobuf + * Move all test code to single directory + + [ Nachiketa Prachanda ] + * dp_test: remove undefined functions from headers + * rename dpt_session_counters + * dp_test: export dpt_session_counters function + + [ Paul Atkins ] + * npf: remove unused code from alg/alg_apt.c + * npf: remove unused code from cgnat/cgn_session.c + * npf: remove unused code from cgnat/cgn_sess_state.c + * npf: remove unused code from cgnat/cgn_log.c + * npf: remove unused code from cgnat/cgn_policy.c + * npf: remove unused code from config/npf_attach_point.c + * npf: remove unused code from config/pmf_att_rlgrp.c + * npf: remove unused code from config/pmf_parse.c + * npf: remove unused code from dpi/npf_typedb.c + * npf: remove unused code from nat/nat_pool_event.c + * npf: remove unused code from zones/npf_zone_private.c + * npf: remove unused code from npf_vrf.c + * npf: remove unused code from npf_tblset.c + * npf: remove unused code from npf_cache.c + * npf: remove unused code from npf_nat.c + * npf: remove unused code from npf_nat64.c + * npf: remove unused code from npf_processor.c + * npf: remove unused code from npf_state_tcp.c + * npf: remove unused code from npf_disassemble.c + * npf: remove unused code from npf_session.c + + -- Paul Atkins Tue, 29 Sep 2020 09:07:41 +0100 + +vyatta-dataplane (3.11.33) unstable; urgency=medium + + [ Nicholas Brown ] + * remove config for cpputest + * Don't check DPDK port size + * Require a recent openssl version + * Don't set cpu arch in attempt to match DPDK + * PACKAGE_VERSION is already defined in build_config.h + + [ Paul Atkins ] + + -- Paul Atkins Mon, 28 Sep 2020 10:12:48 +0100 + +vyatta-dataplane (3.11.32) unstable; urgency=medium + + [ Charles (Chas) Williams ] + * debian: cleanup .postinst script (Bugfix: VRVDR-53193) + + [ Paul Atkins ] + + -- Paul Atkins Fri, 25 Sep 2020 08:14:15 +0100 + +vyatta-dataplane (3.11.31) unstable; urgency=medium + + [ Robert Shearman ] + * if: remove unnecessary name hash insert during hwport init + * if: move allocation of DPDK ethernet interfaces to dpdk_eth_if.c + (Closes: VRVDR-53058) + + [ Paul Atkins ] + + -- Paul Atkins Thu, 24 Sep 2020 15:29:41 +0100 + +vyatta-dataplane (3.11.30) unstable; urgency=medium + + [ Srinivas Narayan ] + * crypto: set ICV offset correctly for multi-segment packets + * crypto: Initialize status to avoid spurious drops + + [ Paul Atkins ] + + -- Paul Atkins Thu, 24 Sep 2020 15:05:14 +0100 + +vyatta-dataplane (3.11.29) unstable; urgency=medium + + [ Paul Aitken ] + * DPI: fix appFW "ten packets" functionality + * DPI: make appFW cognisant of engine ID + + [ Ian Wilson ] + * npf: Initialisation of config ht moved out of npf_make_rule + * npf: Parse special ACL group-attribute rule if present + * npf: Skip ruleset inspection for address-family if not enabled on group + * ut: Add simple software ACL tests + + [ Paul Atkins ] + + -- Paul Atkins Mon, 21 Sep 2020 07:55:22 +0100 + +vyatta-dataplane (3.11.28) unstable; urgency=medium + + [ Paul Atkins ] + * ipv6: remove unused code from nd6_nbr.c + * if: remove unused code from if.c + * if: remove unused code from bridge/bridge_port.c + * crypto: remove unused code from crypto_policy.c + * crypto: remove unused code from crypto_engine.c + * commands: remove unused code from commands.c + * arp: remove unused code from arp.c + + [ Ian Wilson ] + * npf: Filter and sort sessions by NAT translation address or port + + [ Paul Atkins ] + + -- Paul Atkins Fri, 18 Sep 2020 16:43:48 +0100 + +vyatta-dataplane (3.11.27) unstable; urgency=medium + + * commands: remove the code to handle out of order cfg for speed + * storm_ctl: remove the code to handle out of order cfg + + -- Paul Atkins Thu, 17 Sep 2020 13:11:51 +0100 + +vyatta-dataplane (3.11.26) unstable; urgency=medium + + * ut: remove dp_test_lib_cmd as the code in it is not used + * ut: remove unused code in dp_test_netlink_state.c + * ut: remove unused code in dp_test_qos_lib.c + * ut: remove unused code in dp_test_str.c + * ut: remove unused code in dp_test_console.c + * ut: remove unused code in dp_test_lib.c + * ut: remove unused code in dp_test_lib_intf.c + + -- Paul Atkins Wed, 16 Sep 2020 09:40:24 +0100 + +vyatta-dataplane (3.11.25) unstable; urgency=medium + + * npf: remove the code in src/npf/alg/apt as it is not used + * ut: remove unused code in dp_test_npf_alg_sip_lib + * ut: remove unused code in dp_test_npf_alg_sip_call + * ut: remove unused code in dp_test_npf_alg_sip_parse + * ut: remove unused code in dp_test_npf_alg_lib + * ut: remove unused code in dp_test_npf_lib + * ut: remove unused code in dp_test_npf_sess_lib + * ut: remove unused code in dp_test_session_internal_lib + * ut: remove unused code in dp_test_npf_nat_lib + * ut: remove unused code in dp_test_npf_fw_lib + * ut: remove unused code in dp_test_npf_portmap_lib + + -- Paul Atkins Tue, 15 Sep 2020 13:41:22 +0100 + +vyatta-dataplane (3.11.24) unstable; urgency=medium + + [ Paul Aitken ] + * NPF: prevent possible null deref + + [ Gavin Shearer ] + * cgn: add locking when sending logs ZMQ + + [ Paul Atkins ] + + -- Paul Atkins Tue, 15 Sep 2020 13:37:16 +0100 + +vyatta-dataplane (3.11.23) unstable; urgency=medium + + * zmq_dp: remove unused code + * dealer: remove unused code + * ut: remove unused code in dp_test_crypto_utils + * ut: remove unused code in dp_test_lib_intf + * ut: remove unused code in dp_test_netlink_state + * ut: remove unused code in dp_test_qos_lib + * vxlan: remove unused code + * vti: remove unused code + * qos_sched: remove unused code + * nsh: remove unused code + + -- Paul Atkins Mon, 14 Sep 2020 14:14:45 +0100 + +vyatta-dataplane (3.11.22) unstable; urgency=medium + + [ Gavin Shearer ] + * npf: ensure cache ptr set if grouper/rule processing + * npf: check IPv4/v6 proto before accessing cached protocol + * npf: add check to npf_remark_dscp for non-IP packets + * npf: update npf_cache_all() to not convert failures to success + * npf: init cache IP addresses pointer to NULL for embedded pkts + * npf: enable test for a bad embedded ICMP error packet + * npf: remove double space from "from npf" + + [ Philip Downey ] + * MCAST Avoid deferencing unitialised fal RFP object (Fixes: VRVDR-46304) + + [ Thomas Kiely ] + * Debug keyword "flow-cache" missing + * Make MAC limit debugs conditional + + [ Paul Atkins ] + * mpls: track pd state when updating label table entry update + * ut: add mpls test for sending existing route update + + -- Paul Atkins Mon, 14 Sep 2020 13:42:54 +0100 + +vyatta-dataplane (3.11.21) unstable; urgency=medium + + [ Paul Atkins ] + * netlink: vrf_link_create should return NULL not false on failure + + [ Robert Shearman ] + * fal: fix generation of next-hop router interface attribute + (Fixes: VRVDR-52948) + + [ Ian Wilson ] + * npf: Unused function npf_json_nat_session removed + * npf: Return rule details in firewall and NAT session json + + [ Paul Atkins ] + + -- Paul Atkins Fri, 11 Sep 2020 08:37:49 +0100 + +vyatta-dataplane (3.11.20) unstable; urgency=medium + + [ Paul Atkins ] + * crypto: policy_rule_find_by_tag should return NULL not false + + [ Gavin Shearer ] + * cpp: request that the burst rate is set to 100ms + + [ Manohar Rapeti ] + * qos: enhanced "qos show platform" (Bugfix: VRVDR-52788) + * qos: possible accessing of freed pointer (Bugfix: VRVDR-52788) + * qos: Indentation fix (Bugfix: VRVDR-52788) + + [ Ian Wilson ] + * ut: Renamed npf ICMP tests + * ut: Added test for ICMP pkts with corrupted embedded packet + + [ Paul Atkins ] + + -- Paul Atkins Thu, 10 Sep 2020 09:36:33 +0100 + +vyatta-dataplane (3.11.19) unstable; urgency=medium + + * 3.10.70 + * ut: remove mac limit tests that removes profile while in use + * ut: dp_test_npf_json_get_portmap_port leaking json array + * controller: cleanup all requests on shutdown + * ut: fix mem leak in the qos class tests + * ut: free the packet descriptors in the session tests + * ut: cleanup json array after use in sess lib + + -- Paul Atkins Wed, 09 Sep 2020 08:43:39 +0100 + +vyatta-dataplane (3.11.18) unstable; urgency=medium + + [ aroberts ] + * Don't allow a child shaper to exceed 99.6% of parent rate + + [ Manohar Rapeti ] + * qos: mem leak in dataplane UT (Bugfix: VRVDR-49730) + * qos: uninitialised value (Bugfix: VRVDR-49730) + + [ Paul Atkins ] + + -- Paul Atkins Tue, 08 Sep 2020 08:38:25 +0100 + +vyatta-dataplane (3.11.17) unstable; urgency=medium + + [ aroberts ] + * Add 100G support for Qos shaper commands + * Add dependency with 64bit qos structure in dpdk + + [ Paul Atkins ] + + -- Paul Atkins Fri, 04 Sep 2020 09:54:23 +0100 + +vyatta-dataplane (3.11.16) unstable; urgency=medium + + [ Robert Shearman ] + * dpdk-eth: avoid duplicate link up/link down logs (Fixes: VRVDR-52606) + + [ Paul Atkins ] + + -- Paul Atkins Thu, 03 Sep 2020 16:15:43 +0100 + +vyatta-dataplane (3.11.15) unstable; urgency=medium + + [ Shweta Choudaha ] + * portmonitor: add and replay multiple cfg command + + [ Ian Wilson ] + * npf: Rename npf_pack_npf_state to npf_pack_session_state + * npf: Rename npf_pack_session_stats to npf_pack_dp_sess_stats + + [ Charles (Chas) Williams ] + * ptp: add additional-path support (Bugfix: VRVDR-48480) + + [ Ian Wilson ] + * npf: Rename struct npf_pack_npf_nat to struct npf_pack_nat + * npf: Change prefix in npf_pack_dp_session from 'dps_' to 'pds_' + * npf: Rename npf_pack_sentry to npf_pack_sentry_packet + * npf: Use 'pns' for prefix and pointer variable for npf_pack_npf_session + * npf: Naturally align npf_tcpstate_t and npf_state_t + * npf: Rename npf_pack_npf_nat64 to npf_pack_nat64 + * npf: Prefix npf_pack_message_hdr objects with 'pmh_' + * npf: Add packed attribute to 'enum session_pack_type' + * npf: Prefix npf_pack_session_hdr objects with 'psh_' + * session: Pack enum session_feature_type and re-arrange session_feature + * ut: Add function to fetch session counters + * ut: Tests connsync for a firewall UDP session + * ut: Tests connsync for a firewall TCP session with TCP strict enabled + + [ Robert Shearman ] + * 3.10.69 + + [ Paul Atkins ] + * flow_cache: initialise the flow_cache_hash_key to 0 + * ut: modify the stack trace for the urcu resize valgrind suppression + * ut: return -EOPNOTSUPP in the test fal code in ...l2_get_attrs + * rte_acl: free the name when calling npf_rte_acl_destroy + * ut: add a valgrind suppression for rte_cpu_get_flag_enabled + * ut: add more wildcards to the grouper suppression to catch all calls + * ut: free the contexts created in the npf_apt tests + + [ Robert Shearman ] + * include: standardise FAL interface index parameter naming + * include: standardise FAL object parameter naming (Fixes: VRVDR-52849) + + [ Paul Atkins ] + + -- Paul Atkins Thu, 03 Sep 2020 13:36:47 +0100 + +vyatta-dataplane (3.11.14) unstable; urgency=medium + + [ Mandeep Rohilla ] + * BFD: Switch attribute to get Local Discriminator shift + + [ Srinivas Narayan ] + * crypto: cut over non-combined ciphers to rte infra + * crypto: Handle failures in session setup gracefully + * crypto: set up correct default for aead_algo + * crypto: remove references to cipher_name + * crypto: remove references to md_name + * crypto: Remove unnecessary storage of auth alg name + * crypto: Define separate structure for openssl info + * crypto: pass openssl setup decision to SA setup function + * crypto: Remove openssl implementation for aes-gcm + * crypto: define block sizes + * crypto: Make iv generation and storage common operations + * crypto: Move openssl fields to separate structure + * crypto: include dpdk device id and name in vplsh output + * crypto: Add error messages in failure path + * crypto: reset pmd id in array of ids by dev type + * crypto: Make dev-id value check specific + * crypto: separate definitions for cipher & digest key sizes + + [ Karthik Murugesan ] + * vxlan: Added null check to avoid null-pointer dereference + + [ Charles (Chas) Williams ] + * Fix potentially offensive language + * Fix potentially offensive language in CGNAT + + -- Robert Shearman Mon, 24 Aug 2020 17:46:44 +0100 + +vyatta-dataplane (3.11.13) unstable; urgency=medium + + [ Vinicius Soares ] + * npf: Moved parser auxiliary static functions to the top of the file + * npf: Added argument to auxiliary parser function that specifies a delimiter + * npf: Added support for 'auto-per-action' counter type for ACL rules. + * npf: Added support for 'auto-per-action' counter type for ACL rules. + + [ Paul Atkins ] + + -- Paul Atkins Fri, 21 Aug 2020 18:13:12 +0100 + +vyatta-dataplane (3.11.12) unstable; urgency=medium + + [ Paul Atkins ] + * 3.10.67 + + [ Paul Aitken ] + * DPI: app groups: avoid null deref + + [ Paul Atkins ] + * 3.10.68 + + [ Robert Shearman ] + * route_v6: fix nexthop retrieval for promotion during route delete + * route: fix nexthop retrieval for promotion during route delete + (Fixes: VRVDR-52609) + + [ Srinivas Narayan ] + * crypto: store crypto device id in SA for faster access + * crypto: create session pools and queue pairs for PMD + * crypto: set up session in driver + * crypto: Pass crypto op using packet metadata + * crypto: Make openssl encrypt/decrypt function public + * crypto: set up infrastructure to invoke PMD + * crypto: cut over to rte PMD infra + * crypto: set up crypto op for AES-GCM + + [ Paul Atkins ] + * debian: fix wording in changelog + + [ Srinivas Narayan ] + * crypto: minimize dependency on SA + * crypto: set session direction at the time of SA creation + * crypto: Make crypto_pkt_ctx visible to other crypto modules + * crypto: Add packet metadata fields to crypto_pkt_ctx + * crypto: store SA in crypto packet context + * crypto: store bytes processed in crypto packet context + * crypto: refactor esp_input_inner + * crypto: re-factor esp_output_inner + * crypto: consolidate esp input functions + * crypto: consolidate esp_output functions + * crypto: increment output error only on encrypt op + * crypto: streamline post-decrypt processing + * crypto: Refactor post-decrypt VTI handling + * crypto: re-factor post-decrypt VFP handling + * crypto: refactor post-decrypt overlay VRF handling + * crypto: refactor all post-decrypt handling + * crypto: enable burst processing for esp_input + * crypto: Enable burst processing for esp_output + + [ Paul Atkins ] + + -- Paul Atkins Fri, 21 Aug 2020 17:51:18 +0100 + +vyatta-dataplane (3.11.11) unstable; urgency=medium + + [ Paul Atkins ] + * gre: don't double free mbuf if using a gre tunnel to ourself + + [ Ian Wilson ] + * npf: Change npf to use dataplane session states for UDP etc. + * npf: Store generic session state in the session + * npf: Set the alg bit in ALG parent sessions + * npf: Add the se_alg feature boolean to the connsync structure + * npf: Change the feature uint8_ts to bits in npf_pack_dp_session + * npf: Replace se_nat bit with se_snat and se_dnat bits in the session + * npf: Add se_in and se_out bits to the session + * npf: Add se_app bit to the session to mark dpi sessions + * npf: Add function to calculate session time-to-expire for json + * npf: Return generic ALG json for each session + * npf: Return specific ALG json for each session + * npf: Naturally align struct npf_pack_dp_session + * npf: Remove se_etime from connsync session structure + * ut: Add test functions to create and send udp, tcp, or icmp NATd pkts + * session: Add command to return list of items from the dataplane sessions + * npf: Add address family filter to session list command + * npf: Add filters to session list command for ID, proto, intf and direction + * npf: Add filters to session list cmd for src and dest addr and port + * npf: Add filters to session list cmd for features + * npf: Separate the existing session show filter from the json + * npf: Add handler for "show dataplane sessions" command + * npf: Add handler for "show dataplane sessions summary" command + * npf: Add handler for "clear dataplane sessions" command + * ut: Rename the npf snat test cases + * npf: Add a flag to the session to denote that is being used as a fw session + + [ Paul Atkins ] + * flow_cache: move flow_cache_empty_table higher in file + * flow_cache: teardown the flow cache on shutdown + * flow_cache: don't init the flow cache multiple times per core + * ut: don't leak the ifname in the addport_request + + [ aroberts ] + * Reinstall a mark-map if a resource group changes + + [ Paul Atkins ] + + -- Paul Atkins Wed, 19 Aug 2020 09:54:14 +0100 + +vyatta-dataplane (3.11.10) unstable; urgency=medium + + [ Srinivas Narayan ] + * crypto: Convert lengths to uint8_t in preparation for move + * crypto: Avoid dependency on openssl definitions + * crypto: Re-arrange fields for better performance + * crypto: create DPDK infrastructure pools + * crypto: Add DPDK versions of encryption & auth algorithms + * crypto: clean up openssl functions + * crypto: determine PMD type based on algorithms + * crypto: provide function to determine next crypto core + * crypto: Create DPDK crypto PMD + + [ Paul Atkins ] + + -- Paul Atkins Tue, 18 Aug 2020 08:29:42 +0100 + +vyatta-dataplane (3.11.9) unstable; urgency=medium + + [ aroberts ] + * Don't allow a child shaper to exceed 99.6% of parent rate + + [ Paul Atkins ] + * 3.10.63 + + [ Ian Wilson ] + * npf: Increment session stats if session matched, and the pkt is not dropped + + [ Paul Atkins ] + * 3.10.64 + + [ Gavin Shearer ] + * ippf: fix issue of corrupted first byte of IPv6 address to match + + [ Paul Atkins ] + * 3.10.65 + + [ Robert Shearman ] + * if: move vlan_if_get_stats to common interface code + * gre: add support for retrieving FAL stats + + [ Paul Atkins ] + * 3.10.66 + + [ Brian Russell ] + * qos: specify designator for the priority local queue + * qos: uprev protocol versions + + [ Paul Atkins ] + + -- Paul Atkins Mon, 17 Aug 2020 15:44:18 +0100 + +vyatta-dataplane (3.11.8) unstable; urgency=medium + + [ Robert Shearman ] + * fal_plugin: add object model for MPLS + * fal_plugin: add FAL object for VRFs + * fal_plugin: add attributes for MPLS TTL mode (Closes: VRVDR-52435) + * fal: add next hop label attributes + * nh_common: add FAL programming helpers + * route: make use of FAL nh_common helpers + * route_v6: make use of FAL nh_common helpers + * mpls: signal FAL create/update/delete of MPLS routes + * fal: signal use of next hop groups + * fal: translate IPv6 nexthops with IPv4 mapped addresses to IPv4 nexthop + * tests: test IPv6 labeled routes via attached IPv4 nexthops + * vrf: create and use FAL VRF object + * fal: support MPLS deagg routes + * mpls: add support for dumping one MPLS route + * mpls: add support for signalling change to TTL mode to FAL + (Closes: VRVDR-52436) + + [ Paul Atkins ] + + -- Paul Atkins Thu, 13 Aug 2020 15:09:49 +0100 + +vyatta-dataplane (3.11.7) unstable; urgency=medium + + [ Robert Shearman ] + * controller: extend timeout for adding a port (Fixes: VRVDR-52458) + + [ Paul Atkins ] + * 3.10.58 + + [ Karthik Murugesan ] + * vxlan: Added null check to avoid null-pointer dereference + + [ Paul Atkins ] + * 3.10.59 + + [ Paul Aitken ] + * DPI: add application group database + * DPI: application resource groups + * DPI: application resource group parsing + * DPI: new APIs for application resource groups + * DPI: add application resource groups to app firewall + * DPI: add application resource groups to Makefile + + [ Paul Atkins ] + * 3.10.60 + + [ Karthik Murugesan ] + * ptp: Added support for G.8275.1 profiles + + [ Shweta Choudaha ] + * Mirror: Add support for source vlan config + * Mirror: Add support to show vlan info for src intf + + [ Manohar Rapeti ] + * if: Spurious error logs messages (Bugfix: VRVDR-52346) + + [ Brian Russell ] + * qos: don't enable dscp mapping if designation in use + * qos: allocate a priority local designation + + [ Paul Atkins ] + * 3.10.61 + + [ Brian Russell ] + * qos: extend mark-map to include dp + + [ Paul Atkins ] + * 3.10.62 + + -- Paul Atkins Thu, 13 Aug 2020 14:29:20 +0100 + +vyatta-dataplane (3.11.6) unstable; urgency=medium + + [ Gavin Shearer ] + * cpp: add support for PIM and IP multicast + + [ Paul Atkins ] + + -- Paul Atkins Wed, 05 Aug 2020 16:23:14 +0100 + +vyatta-dataplane (3.11.5) unstable; urgency=medium + + [ Brian Russell ] + * qos: restore protocol version 10 + + [ Rishi Narain ] + * SyncE: Allowing ESMC frame flow from vyatta-dataplane + * SyncE: New feature support + + [ Ramkumar Ganapathysubramanian ] + * Removed unnecessary error logs in L3 Interface attribute set + + [ Paul Atkins ] + * dp_event: reorder some of the dp_events in the enum + * event: add a public event notifier for vrf create/delete events + * 3.10.56 + + [ Charles (Chas) Williams ] + * netvsc: increase driver limits (Bugfix: VRVDR-52360) + * ptp: check switch nexthop interface is reachable (Bugfix: VRVDR-52447) + + [ Paul Atkins ] + * 3.10.57 + + -- Paul Atkins Wed, 05 Aug 2020 10:01:32 +0100 + +vyatta-dataplane (3.11.4) unstable; urgency=medium + + [ Paul Atkins ] + * 3.10.54 + * Jenkins: change master branch target to be DANOS:Glasgow + + [ Simon Barber ] + * Convert return string of vplsh led blink cmd to json + + [ Paul Aitken ] + * dataplane abort due to short string in rte_jhash + + [ Ramkumar Ganapathysubramanian ] + * Adding support for DSCP or PCP value in QoS egress-map + + [ Paul Aitken ] + * DPI: Inconsistent use of protocol in DPI output + * Avoid buffer overrun in sip_alg_hash + + [ Robert Shearman ] + * 3.10.55 + + -- Robert Shearman Tue, 28 Jul 2020 09:43:59 +0100 + +vyatta-dataplane (3.11.3) unstable; urgency=medium + + [ Robert Shearman ] + * fal: check for conditions that violate FAL route API contract + * lpm: pass pd_state by reference in walk callback + * route: update dependent routes when interface FAL L3 state changes + * route6: update dependent routes when interface FAL L3 state changes + (Fixes: VRVDR-50303) + + [ Paul Atkins ] + * 3.10.52 + + [ Robert Shearman ] + * nh_common: skip over backup next-hops when fixing up protected tracking + (Fixes: VRVDR-52257) + * ut: add symbols for router-interface FAL object in test FAL + * ut: test PIC edge with a gateway being used by both primary & backup + + [ Ian Wilson ] + * npf: Add npf return code counters + * npf: Add commands to show return code counters + * npf: Add mechanism to filter return code show output dependent on type + * npf: Add commands to clear return code counters + * ut: Add function to fetch and print npf return code counts + * npf: Change nat64 to use nat64_decision_t instead of npf_decision_t + * npf: Increment return code counters in npf_hook_track + * npf: Change npf_cache_all to return either a 0 or a negative return code + * npf: Change the function used by npf_cache_all to return a return code + * npf: Add return codes to npf_cache_all + * npf: Add param to npf_get_cache to allow return code to be returned + * npf: Add return codes to npf state functions + * npf: Add return codes to npf_hook_track calls to session functions + * npf: Add return codes to the function to rewrite l3 and l4 fields + * npf: Add return code to npf_icmpv4_err_nat + * npf: Change nat64 common functions to have a single return point + * npf: Add return codes to the nat64 map and convert functions + * npf: Add return codes to nat64 + * npf: Add optional pointer, rcp, to npf_hook_notrack parameters + * npf: Increment l2 return code counters in bridging + * npf: Add single return point in local firewall functions + * npf: Increment return code counts for local firewall + * npf: Increment return code counts for ACL firewall + * npf: Return select detailed return code stats for nat64 + + [ Paul Atkins ] + * 3.10.53 + + [ Robert Shearman ] + * storm_ctl: fix write to heap after free when deleting an instance + (Fixes: VRVDR-52115) + * tests: add test for storm-control out of order profile delete + * Makefile.am: reduce dependencies of fal_plugin_test_la + + [ Charles (Chas) Williams ] + * main: handle sparse port configurations (Bugfix: VRVDR-49805) + * netvsc: set more reasonable queue lengths (Bugfix: VRVDR-49805) + + [ Paul Atkins ] + + -- Paul Atkins Fri, 24 Jul 2020 16:13:59 +0100 + +vyatta-dataplane (3.11.2) unstable; urgency=medium + + [ bs775m ] + * fal_plugin :add attributes for enb/dis pause frame + * vyatta-datapath:add support for enb/dis pauseframe + + [ Charles (Chas) Williams ] + * Correct enumeration declaration + + [ Paul Atkins ] + + -- Paul Atkins Fri, 24 Jul 2020 09:12:29 +0100 + +vyatta-dataplane (3.11.1) unstable; urgency=medium + + [ Mandeep Rohilla ] + * BFD: Add support for querying max interval values supported + + [ Paul Atkins ] + + -- Paul Atkins Mon, 20 Jul 2020 21:45:59 +0100 + +vyatta-dataplane (3.10.70) unstable; urgency=medium + + [ aroberts ] + * Don't allow a child shaper to exceed 99.6% of parent rate + + [ Paul Atkins ] + + -- Paul Atkins Fri, 04 Sep 2020 10:15:04 +0100 + +vyatta-dataplane (3.10.69) unstable; urgency=medium + + [ Charles (Chas) Williams ] + * ptp: add additional-path support (Bugfix: VRVDR-48480) + + -- Robert Shearman Tue, 01 Sep 2020 11:42:50 +0100 + +vyatta-dataplane (3.10.68) unstable; urgency=medium + + [ Paul Aitken ] + * DPI: app groups: avoid null deref + + [ Paul Atkins ] + + -- Paul Atkins Wed, 19 Aug 2020 11:43:55 +0100 + +vyatta-dataplane (3.10.67) unstable; urgency=medium + + * gre: don't double free mbuf if using a gre tunnel to ourself + + -- Paul Atkins Wed, 19 Aug 2020 08:37:29 +0100 + +vyatta-dataplane (3.10.66) unstable; urgency=medium + + [ Robert Shearman ] + * if: move vlan_if_get_stats to common interface code + * gre: add support for retrieving FAL stats + + [ Paul Atkins ] + + -- Paul Atkins Mon, 17 Aug 2020 10:14:51 +0100 + +vyatta-dataplane (3.10.65) unstable; urgency=medium + + [ Gavin Shearer ] + * ippf: fix issue of corrupted first byte of IPv6 address to match + + [ Paul Atkins ] + + -- Paul Atkins Mon, 17 Aug 2020 09:44:06 +0100 + +vyatta-dataplane (3.10.64) unstable; urgency=medium + + [ Ian Wilson ] + * npf: Increment session stats if session matched, and the pkt is not dropped + + [ Paul Atkins ] + + -- Paul Atkins Fri, 14 Aug 2020 10:29:11 +0100 + +vyatta-dataplane (3.10.63) unstable; urgency=medium + + [ aroberts ] + * Don't allow a child shaper to exceed 99.6% of parent rate + + [ Paul Atkins ] + + -- Paul Atkins Fri, 14 Aug 2020 10:26:41 +0100 + +vyatta-dataplane (3.10.62) unstable; urgency=medium + + [ Brian Russell ] + * qos: extend mark-map to include dp + + [ Paul Atkins ] + + -- Paul Atkins Thu, 13 Aug 2020 10:17:11 +0100 + +vyatta-dataplane (3.10.61) unstable; urgency=medium + + [ Brian Russell ] + * qos: don't enable dscp mapping if designation in use + * qos: allocate a priority local designation + + [ Paul Atkins ] + + -- Paul Atkins Mon, 10 Aug 2020 14:46:43 +0100 + +vyatta-dataplane (3.10.60) unstable; urgency=medium + + [ Paul Aitken ] + * DPI: add application group database + * DPI: application resource groups + * DPI: application resource group parsing + * DPI: new APIs for application resource groups + * DPI: add application resource groups to app firewall + * DPI: add application resource groups to Makefile + + [ Paul Atkins ] + + -- Paul Atkins Thu, 06 Aug 2020 13:15:51 +0100 + +vyatta-dataplane (3.10.59) unstable; urgency=medium + + [ Karthik Murugesan ] + * vxlan: Added null check to avoid null-pointer dereference + + [ Paul Atkins ] + + -- Paul Atkins Thu, 06 Aug 2020 08:46:48 +0100 + +vyatta-dataplane (3.10.58) unstable; urgency=medium + + [ Robert Shearman ] + * controller: extend timeout for adding a port (Fixes: VRVDR-52458) + + [ Paul Atkins ] + + -- Paul Atkins Wed, 05 Aug 2020 16:25:22 +0100 + +vyatta-dataplane (3.10.57) unstable; urgency=medium + + [ Charles (Chas) Williams ] + * ptp: check switch nexthop interface is reachable (Bugfix: VRVDR-52447) + + [ Paul Atkins ] + + -- Paul Atkins Wed, 05 Aug 2020 09:26:54 +0100 + +vyatta-dataplane (3.10.56) unstable; urgency=medium + + * dp_event: reorder some of the dp_events in the enum + * event: add a public event notifier for vrf create/delete events + + -- Paul Atkins Tue, 04 Aug 2020 15:41:08 +0100 + +vyatta-dataplane (3.10.55) unstable; urgency=medium + + [ Paul Atkins ] + * Jenkins: change master branch target to be DANOS:Glasgow + + [ Simon Barber ] + * Convert return string of vplsh led blink cmd to json + + [ Paul Aitken ] + * dataplane abort due to short string in rte_jhash + * DPI: Inconsistent use of protocol in DPI output + * Avoid buffer overrun in sip_alg_hash + + -- Robert Shearman Tue, 28 Jul 2020 09:31:32 +0100 + +vyatta-dataplane (3.10.54) unstable; urgency=medium + + [ Robert Shearman ] + * storm_ctl: fix write to heap after free when deleting an instance + (Fixes: VRVDR-52115) + * tests: add test for storm-control out of order profile delete + * Makefile.am: reduce dependencies of fal_plugin_test_la + + [ Paul Atkins ] + + -- Paul Atkins Fri, 24 Jul 2020 09:51:57 +0100 + +vyatta-dataplane (3.10.53) unstable; urgency=medium + + [ Robert Shearman ] + * nh_common: skip over backup next-hops when fixing up protected tracking + (Fixes: VRVDR-52257) + * ut: add symbols for router-interface FAL object in test FAL + * ut: test PIC edge with a gateway being used by both primary & backup + + [ Ian Wilson ] + * npf: Add npf return code counters + * npf: Add commands to show return code counters + * npf: Add mechanism to filter return code show output dependent on type + * npf: Add commands to clear return code counters + * ut: Add function to fetch and print npf return code counts + * npf: Change nat64 to use nat64_decision_t instead of npf_decision_t + * npf: Increment return code counters in npf_hook_track + * npf: Change npf_cache_all to return either a 0 or a negative return code + * npf: Change the function used by npf_cache_all to return a return code + * npf: Add return codes to npf_cache_all + * npf: Add param to npf_get_cache to allow return code to be returned + * npf: Add return codes to npf state functions + * npf: Add return codes to npf_hook_track calls to session functions + * npf: Add return codes to the function to rewrite l3 and l4 fields + * npf: Add return code to npf_icmpv4_err_nat + * npf: Change nat64 common functions to have a single return point + * npf: Add return codes to the nat64 map and convert functions + * npf: Add return codes to nat64 + * npf: Add optional pointer, rcp, to npf_hook_notrack parameters + * npf: Increment l2 return code counters in bridging + * npf: Add single return point in local firewall functions + * npf: Increment return code counts for local firewall + * npf: Increment return code counts for ACL firewall + * npf: Return select detailed return code stats for nat64 + + [ Paul Atkins ] + + -- Paul Atkins Wed, 22 Jul 2020 10:10:23 +0100 + +vyatta-dataplane (3.10.52) unstable; urgency=medium + + [ Robert Shearman ] + * fal: check for conditions that violate FAL route API contract + * lpm: pass pd_state by reference in walk callback + * route: update dependent routes when interface FAL L3 state changes + * route6: update dependent routes when interface FAL L3 state changes + (Fixes: VRVDR-50303) + + [ Paul Atkins ] + + -- Paul Atkins Tue, 21 Jul 2020 07:47:56 +0100 + +vyatta-dataplane (3.10.51) unstable; urgency=medium + + [ Mandeep Rohilla ] + * BFD: Add support for querying max interval values supported + + [ Paul Atkins ] + + -- Paul Atkins Mon, 20 Jul 2020 21:23:09 +0100 + +vyatta-dataplane (3.10.50) unstable; urgency=medium + + * sanitizer: set ASAN_OPTIONS when sanitizer is used + + -- Paul Atkins Mon, 20 Jul 2020 13:36:11 +0100 + +vyatta-dataplane (3.10.49) unstable; urgency=medium + + [ Robert Shearman ] + * shadow: remove the shadow port handler when an interface is freed + (Fixes: VRVDR-52193) + * shadow: use events for init/destroy + * sample: fix the visit_after node declaration + + [ Paul Atkins ] + + -- Paul Atkins Mon, 20 Jul 2020 13:18:35 +0100 + +vyatta-dataplane (3.10.48) unstable; urgency=medium + + [ Robert Shearman ] + * config: factor out and simplify PCI address parsing for backplane ports + * config: parse management_port platform.conf attribute + * dpdk-eth: add management port attribute to interface information + + [ Paul Atkins ] + + -- Paul Atkins Mon, 20 Jul 2020 08:46:22 +0100 + +vyatta-dataplane (3.10.47) unstable; urgency=medium + + [ Thomas Kiely ] + * mac_limit: Remove temporary show keyword + + [ Charles (Chas) Williams ] + * coverity: fix handling when an error is returned (Bugfix: VRVDR-52191) + + [ Robert Shearman ] + * dpdk-eth: only remove a LAG port after the ifp using it has been freed + * main: only close the ports after cleaning up interfaces (Fixes: VRVDR-52220) + + [ Paul Atkins ] + * fal: provide alternate name for FAL_BFD_HW_MODE_CP_INDEPENDENT + + -- Paul Atkins Thu, 16 Jul 2020 12:02:27 +0100 + +vyatta-dataplane (3.10.46) unstable; urgency=medium + + [ Mike Manning ] + * L2TPv3: Fails to be ping across tunnel using L2TPv3 + + [ Robert Shearman ] + * if: fix cleanup of DPDK ethernet interfaces (Fixes: VRVDR-52145) + + [ Paul Atkins ] + * ut: add vars that tests can use to pass state to the fal + + -- Paul Atkins Fri, 10 Jul 2020 13:59:35 +0100 + +vyatta-dataplane (3.10.45) unstable; urgency=medium + + [ Nicholas Brown ] + * Add a CODEOWNERS file + + [ Charles (Chas) Williams ] + * vhost: fix netlink races with hotplug (Bugfix: VRVDR-50960) + + [ Paul Atkins ] + + -- Paul Atkins Wed, 08 Jul 2020 15:21:32 +0100 + +vyatta-dataplane (3.10.44) unstable; urgency=medium + + * main: allow the user to specify the platform_file location + * ut: allow user to specify platform conf file + + -- Paul Atkins Tue, 07 Jul 2020 13:02:45 +0100 + +vyatta-dataplane (3.10.43) unstable; urgency=medium + + [ Nicholas Brown ] + * Remove last mentions of valgrind build + * Enable the address sanitizer as part of the jenkins build + * use .checkpatch.conf + + [ Robert Shearman ] + * dpdk-eth: check that ifp exists in linkwatch_change_mark_state + * dpdk-eth: don't require ifp for updating queue state (Fixes: VRVDR-52109) + + [ Paul Atkins ] + + -- Paul Atkins Mon, 06 Jul 2020 13:29:50 +0100 + +vyatta-dataplane (3.10.42) unstable; urgency=medium + + [ Charles (Chas) Williams ] + * lag: remove potentially offensive language (Bugfix: VRVDR-51820) + * main: remove potentially offensive language (Bugfix: VRVDR-51820) + * vrf: remove potentially offensive language (Bugfix: VRVDR-51820) + * bridge: remove potentially offensive language (Bugfix: VRVDR-51820) + * tests: remove potentially offensive language (Bugfix: VRVDR-51820) + * session: remove potentially offensive language (Bugfix: VRVDR-51820) + + [ Paul Atkins ] + + -- Paul Atkins Fri, 03 Jul 2020 15:49:00 +0100 + +vyatta-dataplane (3.10.41) unstable; urgency=medium + + [ Robert Shearman ] + * if: issue feature event for interface being created + * mstp: defer creation of STP object until after bridge created in FAL + (Fixes: VRVDR-52083) + * bridge: fix duplicate FAL br_new_port notification (Fixes: VRVDR-52084) + * ut: validate FAL contract for bridge-port objects + + [ Thomas Kiely ] + * mac_limit: Rename "mac-count" command to "limit status" + + [ Brian Russell ] + * qos: update fal global map when resource group changes + + [ Paul Atkins ] + + -- Paul Atkins Fri, 03 Jul 2020 14:22:05 +0100 + +vyatta-dataplane (3.10.40) unstable; urgency=medium + + [ harios ] + * Fix done for nexthop as IPv4 mapped IPv6 address + + [ Robert Shearman ] + * main: swap order of checks on closing ports (Fixes: VRVDR-52095) + * if: make promiscuity apply to VLANs as well as MAC addresses + (Fixes: VRVDR-52049) + * capture: remove interface-type check for setting promiscuity + + [ Paul Atkins ] + + -- Paul Atkins Fri, 03 Jul 2020 09:06:51 +0100 + +vyatta-dataplane (3.10.39) unstable; urgency=medium + + [ Robert Shearman ] + * ut: add dp1 prefix to switchports + * devinfo: change if_port_info to not require an ifp + * master: avoid needing ifp present when adding/deleting ports + * if: classify backplane ports as dataplane interfaces + * if: clean up life-cycle of DPDK ethernet interface objects + (Closes: VRVDR-51844) + * if: remove missed link & unspec address handling + * if: remove missed IP address & netconf handling (Closes: VRVDR-51845) + * if: remove unused hwport incomplete infra + + [ Paul Atkins ] + + -- Paul Atkins Thu, 02 Jul 2020 08:52:24 +0100 + +vyatta-dataplane (3.10.38) unstable; urgency=medium + + [ Robert Shearman ] + * shadow: remove superfluous interface netlink state management + + [ Paul Atkins ] + + -- Paul Atkins Wed, 01 Jul 2020 13:24:13 +0100 + +vyatta-dataplane (3.10.37) unstable; urgency=medium + + [ Paul Aitken ] + * DPI: add nDPI debugging + * DPI: load optional nDPI protocols and categories + + [ Charles (Chas) Williams ] + * unit tests: fix mbuf debuggging (Bugfix: VRVDR-51987) + * crypto: fix mbuf debugging (Bugfix: VRVDR-51987) + * mpls: reject short packets (Bugfix: VRVDR-51987) + + [ Paul Atkins ] + + -- Paul Atkins Wed, 01 Jul 2020 09:22:12 +0100 + +vyatta-dataplane (3.10.36) unstable; urgency=medium + + * control: add a comment to request new commands in protobuf format + * dpdk_eth_if: don't dump ports that have been unplugged + * if: in ifnet_byethname skip over unplugged interfaces + * hotplug: mark the interface as unplugged at the start of processing + * dpdk_eth_if: don't assume that info.driver_name is valid + + -- Paul Atkins Wed, 24 Jun 2020 09:07:45 +0100 + +vyatta-dataplane (3.10.35) unstable; urgency=medium + + [ Shweta Choudaha ] + * Backplane:Shut DPDK bkplane ports post fal cleanup + + [ Paul Atkins ] + + -- Paul Atkins Fri, 19 Jun 2020 16:18:37 +0100 + +vyatta-dataplane (3.10.34) unstable; urgency=medium + + [ Ethan Li ] + * bfd-hw: add FAL attribute for BFD hw running mode + + [ Nicholas Brown ] + * sample plugin and test code only using public API + * The test code for the sample plugin should also a plugin + + [ Ian Wilson ] + * npf: Set custom timeout in dataplane session after session is created + * npf: Add option to cache pkt without updating the cache grouper data + * npf: Move _npf_cache_all_at in order to avoid forward reference + + [ Thomas Kiely ] + * Avoid unnecessary unapply of mac limit feature + + [ Mandeep Rohilla ] + * BR_VLAN_SET: Api to determine if vlan set is empty + * BR_VLAN_SET UT: unit tests for the vlan set empty api + + [ Paul Atkins ] + * ut: func to verify state based on pb show should use void * + + -- Paul Atkins Fri, 19 Jun 2020 09:02:21 +0100 + +vyatta-dataplane (3.10.33) unstable; urgency=medium + + [ Ian Wilson ] + * cgnat: Several small cosmetic changes to cgnat + * cgnat: Add 2-tuple sessn to hash table if it fails to be added directly + * cgnat: Move 2-tuple session inspection code into separate function + * cgnat: Block outbound flow if max-dest-per-session reached + * cgnat: Block inbound packets if max-dest-per-session reached + * cgnat: Increase maximum configurable max-dest-per-session to 128 + * ut: Fixup cgnat25 to expect an ICMP error + * cgnat: Remove interface config store and replay mechanism + * ut: Remove cgnat14 test for interface store and replay mechanism + + [ Paul Atkins ] + + -- Paul Atkins Tue, 16 Jun 2020 10:32:46 +0100 + +vyatta-dataplane (3.10.32) unstable; urgency=medium + + [ Mandeep Rohilla ] + * DP_EVENT: Add new event for MTU change notifications + * MTU: Register QoS's intereset in MTU change + * MTU: Don't bounce the port when changing the MTU + + [ Paul Atkins ] + + -- Paul Atkins Fri, 12 Jun 2020 11:54:49 +0100 + +vyatta-dataplane (3.10.31) unstable; urgency=medium + + [ Ian Wilson ] + * npf: Simplify and enhance the api for fetching address group json + + [ Paul Atkins ] + + -- Paul Atkins Fri, 12 Jun 2020 11:14:46 +0100 + +vyatta-dataplane (3.10.30) unstable; urgency=medium + + [ Robert Shearman ] + * debian: don't suppress changelog generation + * Pull JSON writer code out into a shared library (Fixes: VRVDR-51389) + + [ Paul Atkins ] + + -- Paul Atkins Tue, 09 Jun 2020 09:24:36 +0100 + +vyatta-dataplane (3.10.29) unstable; urgency=medium + + [ Srinivas Narayan ] + * Check if next hop is non-NULL before de-referencing it + + [ Paul Atkins ] + + -- Paul Atkins Tue, 09 Jun 2020 08:40:38 +0100 + +vyatta-dataplane (3.10.28) unstable; urgency=medium + + [ Charles (Chas) Williams ] + * conf: allow dev_flags to be filtered (Bugfix: VRVDR-48438) + * ixgbe: do not use the LSC interrupt (Bugfix: VRVDR-48438) + * ifconfig: allow inspection of the lsc status + * vhost: do not wait forever for QMP (Bugfix: VRVDR-51099) + + [ Paul Atkins ] + + -- Paul Atkins Fri, 05 Jun 2020 10:16:49 +0100 + +vyatta-dataplane (3.10.27) unstable; urgency=medium + + [ Ian Wilson ] + * nat64: Only dereference the session sentry once + * nat64: Free memory before returning in nat64_create error case + + [ Robert Shearman ] + * storm_ctl: avoid redundant FAL update when adding threshold for new type + * storm_ctl: move fal_policer_modify_profile function down + * storm_ctl: make threshold removal FAL state symmetric (Fixes: VRVDR-51406) + + [ Paul Atkins ] + * lpm6: fix check for depth when removing /24 + + -- Paul Atkins Thu, 04 Jun 2020 12:28:06 +0100 + +vyatta-dataplane (3.10.26) unstable; urgency=medium + + [ Mark Gillott ] + * if: migrate hardware port completion to separate function + * if: replace NEWPORT with INIPORT & ADDPORT (Fixes: VRVDR-46511) + * if: postpone snapshot request until port initialisation complete + (Fixes: VRVDR-46511) + + [ Paul Atkins ] + + -- Paul Atkins Fri, 29 May 2020 14:15:43 +0100 + +vyatta-dataplane (3.10.25) unstable; urgency=medium + + [ Paul Aitken ] + * DPI: fix memleaks in dpi_ctor + * DPI: make initialisation return errno + + [ Robert Shearman ] + * ip_rt_protobuf: fix coverity resource leak reports (Fixes: VRVDR-51284) + + [ Ian Wilson ] + * cgnat: Return NAT pool "full" (np_full) json to control plane + + [ ak487r ] + * tests: remove ipv6 ND Solicitation tests for originating firewall + + [ Ian Wilson ] + * cgnat: Max dest per session only allows powers of two + + [ Tom Kiely ] + * MAC Limit: Add a new attr for mac limit on port/vlan + * MAC Limit: Add a new log type flag for mac limiting feat + * MAC Limit: Add support for creating MAC limiting profiles + * MAC Limit: Add support mac limit entry based on port/vlan + * MAC Limit: Apply mac limiting in the FAL + * MAC Limit: Add hooks for applying mac limiting based on dp events + * MAC Limit: Add support for show command + * Add Unit Tests for MAC limiting feature. + + [ Paul Atkins ] + + -- Paul Atkins Fri, 29 May 2020 12:13:11 +0100 + +vyatta-dataplane (3.10.24) unstable; urgency=medium + + [ Paul Atkins ] + * main: add a wrapper to unregister a thread with rcu + * ip_forward: remove references to next_hop_v6 + + [ Robert Shearman ] + * route: set some missing address family types + * route_v6: set some missing address family types + * mpls: set some missing address family types + + [ Paul Atkins ] + * main: make ASSERT_MASTER public + + -- Paul Atkins Fri, 22 May 2020 11:01:09 +0100 + +vyatta-dataplane (3.10.23) unstable; urgency=medium + + [ Paul Atkins ] + * debian: define prefix in the libyattafal .pc file + + [ Paul Carson ] + * Forward PPP CHAP traffic to PPP (Fixes: VRVDR-49231) + + [ Derek Fawcus ] + * NPF: Use ICMP opcode defines, not magic values + * NPF: Adjust ICMP opcode generation + * NPF: Enable matching of ICMP classes in ncode + * Update checkpath warnings in use + + [ Paul Atkins ] + + -- Paul Atkins Thu, 21 May 2020 10:40:12 +0100 + +vyatta-dataplane (3.10.22) unstable; urgency=medium + + [ Robert Shearman ] + * debug_strip: add build-ids to package metadata + * Generate -dbgsym packages per binary package (Fixes: VRVDR-50948) + + [ Shweta Choudaha ] + * flow_cache: Use get_lcore_max to get max lcoreid + + [ Paul Atkins ] + + -- Paul Atkins Tue, 19 May 2020 10:59:05 +0100 + +vyatta-dataplane (3.10.21) unstable; urgency=medium + + [ Paul Atkins ] + * nd6_nbr: when storing the v6 addr use sockaddr_storage not sockaddr + + [ Gavin Shearer ] + * nat64: check rule group name of rule is set before accessing it + + [ Paul Atkins ] + * main: make sure that we don't register rcu thread twice + + [ Charles (Chas) Williams ] + * dpdk: get dev_info before closing (Bugfix: VRVDR-51041) + + [ Paul Atkins ] + + -- Paul Atkins Mon, 18 May 2020 19:29:08 +0100 + +vyatta-dataplane (3.10.20) unstable; urgency=medium + + [ Paul Aitken ] + * DPI: new APIs to support application name database + * DPI: new APIs to support application type database + * DPI: new APIs for user-defined applications + * DPI: new APIs to allow packet processing by nDPI + * DPI: change --without-dpi to build without nDPI + * DPI: add new files to makefile + * DPI: remove redundant files + * DPI: don't need to include DPI + * DPI: remove Qosmos from dpi_internal.h, add new APIs + * DPI: update app_cmds.c includes + * DPI: dpi_public.c to call the engine-based APIs + * DPI: update DPI rprocs to engine-based APIs + * DPI: remove app database from npf_ext_app.c + * DPI: update app FW DPI rprocs to use engine-based APIs + * DPI: update L3 DPI pipeline to use engine-based APIs + * DPI: add new engine-based APIs to dpi.c + * DPI: add libndpi-dev build dependency + + [ Paul Atkins ] + + -- Paul Atkins Mon, 18 May 2020 15:32:27 +0100 + +vyatta-dataplane (3.10.19) unstable; urgency=medium + + [ ak487r ] + * npf: add originating firewall + * tests: add ipv4 tcp slowpath tests for originating firewall + * tests: add ipv6 tcp slowpath tests for originating firewall + * tests: add ipv4 icmp packet to big tests for originating firewall + * tests: add ipv6 icmp packet to big tests for originating firewall + * tests: add ipv6 ND Advertisement tests for originating firewall + * tests: add ipv6 ND Solicitation tests for originating firewall + * tests: add ipv4 echo reply by cgnat for originating firewall + + [ Paul Atkins ] + * ut: fix issue with buffer size when comparing one-of + * nh_common: change 1 to 1ull when using it to shift 64 bit numbers + * nh_common: when updating map get the count from the bitmap + * nh_common: on nh_map init leave space for unusable primaries + * nh_common: use CMM macros when reading/modifying the usability flag + * nh_common: reinit nh map contents if collisions when marking unusable + * nh_common: let paths be marked usable and unusable + * ut: tests for pic edge where a path is made usable + * route6: display the next hop map for v6 routes + * ut: remove extra whitespace in pic edge tests + * ut: add an ipv6 pic edge test + * route: register path_state functions with event infra for cleanup + + -- Paul Atkins Mon, 18 May 2020 13:56:55 +0100 + +vyatta-dataplane (3.10.18) unstable; urgency=medium + + [ Sanjay Iyer ] + * hw-bfd: Add additional FAL attributes (Fixes: VRVDR-50399) + + [ Paul Atkins ] + + -- Paul Atkins Fri, 15 May 2020 17:24:01 +0100 + +vyatta-dataplane (3.10.17) unstable; urgency=medium + + [ Paul Atkins ] + * nh_common: check if a nh is unusable before marking as unusable + * nh_common: don't put an unusable nh into the map at init time + * nh_common: add a bitmask to track usable next_hops + * ut: allow for checking for one of many expected strings + * ut: further tests for pic edge + + [ Gavin Shearer ] + * nat: add support in apm for multiple port maps based on protocol + * nat: rename fields in port_prot structure + * nat: add passing in IP proto to apm port request fns + * nat: update per-rule nat statistics to be per-protocol + * nat: pass back in JSON the per-protocol rule used counts + * nat: add warning if changing port pool for separate ICMP pool + + [ Paul Atkins ] + + -- Paul Atkins Fri, 15 May 2020 13:40:54 +0100 + +vyatta-dataplane (3.10.16) unstable; urgency=medium + + [ Robert Shearman ] + * if: fix typo in ifop_uninit comment + * bridge: issuing FAL delport notifications for members on bridge delete + (Fixes: VRVDR-51123) + + [ Srinivas Narayan ] + * npf: Add a flag to enable hash table linkage for rules + * npf: add hash table linkage for rules + + [ Paul Atkins ] + * nh_common: change api to mark path unusable to also allow usable + + -- Paul Atkins Fri, 15 May 2020 11:53:04 +0100 + +vyatta-dataplane (3.10.15) unstable; urgency=medium + + [ Dewi Morgan ] + * dataplane: add dp_ifnet_admin_status api + + [ Paul Atkins ] + * nh_common: when a next_hop is marked unusable update the fal + * urcu: add an API to allow a thread to register with urcu + * dpdk_linkwatch: on link down mark paths unusable + + [ Robert Shearman ] + * fal: signal backup paths + + [ Paul Atkins ] + + -- Paul Atkins Wed, 13 May 2020 16:27:47 +0100 + +vyatta-dataplane (3.10.14) unstable; urgency=medium + + [ Srinivas Narayan ] + * npf: Add flags to skip stats maintenance + * npf: Skip stats allocation if NO_STATS flag is set + * npf: Add NULL checks for rule stats where necessary + * npf: Skip stats allocation for IPsec rulesets + * npf: Use NO_STATS flag to optimize high level ops + + [ Simon Barber ] + * Set the l3 length of Site-2-Site Packets for TX on spath + + [ Paul Atkins ] + * ut: move the pic edge tests into dp_test_ip_pic_edge.c + * nh_common: change the nexthop to have a struct_ip addr + * nh_common: rename nexthop_create_copy + * mpls: make nh_outlabels_copy copy all labels + * nh_common: add a function to copy a next_hop and use instead of memcpy + * nh_common: provide apis to help do a modify of an active next_hop_list + * nh_common: store the number of primary paths in the next_hop_list + * nh_common: store the protected next_hops in a 2 level hash + * nh_common: use a nh_map when a next_hop_list has backup paths + * route: Add an api to allow plugins to provide path state + * nh_common: add a ptr back from the NH to the NH list + * nh_common: update sw forwarding state when a NH becomes unusable + * ut: ip pic edge tests + * ut: ip pic edge tests with traffic + * nh_common: fix typos in comments + * nh_common: when tracking nexthops with backups, skip those with no ifp + + -- Paul Atkins Wed, 13 May 2020 08:43:08 +0100 + +vyatta-dataplane (3.10.13) unstable; urgency=medium + + [ Nicholas Brown ] + * update autconf dpdk check to check for 19.11 + + [ Robert Shearman ] + * protobuf: fix typo in description of mpls_labels field + + [ Srinivas Narayan ] + * flow-cache: Add API prototypes for flow-cache + * flow-cache: Move crypto pkt buffer definition + * flow-cache: Rename pr_cache* to flow_cache* + * flow-cache: Rename variable used for flow_cache_entry + * flow-cache: Move address union definition to make it re-usable + * flow-cache: Migrate addresses to common definition + * flow-cache: Enable support for IPv6 + * flow-cache: refactor common code invoking flow_cache_add + * flow-cache: Decouple flow cache from crypto per-core block + * flow-cache: use accessors for rule and context + * flow-cache: add a bit to identify packets not matching any rule + * flow-cache: Add support for caching negative matches + * flow-cache: Create cache entries for cleartext packets + * flow-cache: Skip further processing on cache match for cleartext packet + * flow-cache: Avoid de-referencing policy rule unless present + * flow-cache: Use rss hash if present in buffer + * flow-cache: Add support for aging + * flow-cache: refactor code to dump cache + * flow-cache: Move flow-cache infra to separate module + * flow-cache: Emit hit counts + * flow-cache: split flow cache dump into smaller functions + * flow-cache: Update comments to use 'flow cache' + * flow-cache: rename crypto specific function & macro + * flow-cache: Reorder fields to remove holes in structure + + [ Shweta Choudaha ] + * backplane: use device max_mtu for backplane intf + + [ Paul Atkins ] + + -- Paul Atkins Tue, 12 May 2020 12:13:28 +0100 + +vyatta-dataplane (3.10.12) unstable; urgency=medium + + * debian: update dependency on vyatta-dpdk-swport + + -- Paul Atkins Mon, 11 May 2020 14:31:12 +0100 + +vyatta-dataplane (3.10.11) unstable; urgency=medium + + [ Brian Russell ] + * qos: fix legacy map show + + [ Nicholas Brown ] + * git ignore more debian package install directories + * dataplane-dev does not have a dependency on the test binary + + [ Robert Shearman ] + * route_v6: move handle_route6 function to ip_netlink.c + * if: rename incomplete_route_add function + * protobuf: add definition for route updates from the RIB + * ip_rt_protobuf: add support for decoding protobuf route updates + * tests: add support for protobuf route messages + * ip_rt_protobuf: add support for installing backup paths + * tests: add test for routes with backup paths + * ecmp: move netlink handling to ip_netlink.c + * ip_rt_protobuf: preserve display behaviour for MPLS deagg routes + + [ Paul Atkins ] + + -- Paul Atkins Mon, 11 May 2020 11:49:36 +0100 + +vyatta-dataplane (3.10.10) unstable; urgency=medium + + [ Mike Larson ] + * Typo in include guard + + [ Shweta Choudaha ] + * Add support for I40E X722 device + + [ Charles (Chas) Williams ] + * dpdk: 19.11: struct ether_addr to struct rte_ether_addr + (Bugfix: VRVDR-45636) + * dpdk: 19.11; is_*_ether_addr to rte_is_*_ether_addr (Bugfix: VRVDR-45636) + * dpdk: 19.11: e_RTE_METER_COLORS to RTE_COLORS (Bugfix: VRVDR-45636) + * dpdk: 19.11: rename struct *_hdr to struct rte_*_hdr (Bugfix: VRVDR-45636) + * dpdk: 19.11: ETHER_* to RTE_ETHER_* (Bugfix: VRVDR-45636) + * dpdk: 19.11: add alignment to packed structs (Bugfix: VRVDR-45636) + * dpdk: 19.11: fix swport unit tests (Bugfix: VRVDR-45636) + * dpdk: 19.11: update build depdendencies (Bugfix: VRVDR-45636) + + [ Paul Atkins ] + + -- Paul Atkins Mon, 11 May 2020 09:13:58 +0100 + +vyatta-dataplane (3.10.9) unstable; urgency=medium + + [ Gavin Shearer ] + * alg: ensure parent session active before linking child + + [ Paul Atkins ] + * 3.9.108 + + [ Charles (Chas) Williams ] + * crypto: count burst buffer full as drops (Bugfix: VRVDR-50279) + * crypto: count most errors as proto drops (Bugfix: VRVDR-50279) + * crypto: eliminate macro usage (Bugfix: VRVDR-50279) + * crypto: count packets against tunnel interface (Bugfix: VRVDR-50279) + * crypto: do not count failed packets (Bugfix: VRVDR-50279) + + [ Paul Atkins ] + * 3.9.109 + + [ Mark Gillott ] + * Register event operations only on first use (Fixes: VRVDR-50621) + * Capture portmonitor replay errors (Fixes: VRVDR-50621) + + [ Paul Atkins ] + * 3.9.110 + + [ Charles (Chas) Williams ] + * vhost: fix QMP communication (Bugfix: VRVDR-50745) + + [ Paul Atkins ] + * 3.9.111 + + [ Nicholas Brown ] + * Add new public API dp_pipeline_is_feature_enabled_by_inst() + + [ Paul Atkins ] + * 3.9.112 + + [ Mark Gillott ] + * pipeline: add initialiser to declaration of storage_ctx + + [ Nicholas Brown ] + * Install test headers into the correct path from Makefile + * dataplane_test in it's own package + + [ Gavin Shearer ] + * nat64: make per-rule 'used' count be decremented + * nat64: set full range for overload start/stop ports + + [ Mark Gillott ] + * pcap: serialise access to capture console socket (Fixes: VRVDR-50937) + + [ Paul Atkins ] + + -- Paul Atkins Wed, 06 May 2020 14:13:12 +0100 + +vyatta-dataplane (3.10.8) unstable; urgency=medium + + [ Mike Larson ] + * Add helper functions for protobuf + + [ Paul Atkins ] + * nh_common: use a common version of nh_get_lables + * nh_common: use a common version of nh_get_flags + * mpls: use dp_nh_get_ifp instead of nh_get_if + * nh_common: use a common version of nexthop_mp_select + * route: add a family parameter to nexthop_select + * route6: add a family parameter to nexthop6_select + * crypto: crypto_policy_handle_packet_outbound_checks to use common nh + * crypto: pr_feat_attach should use a single nh instead of a union + * nh_common: use a common version of nexthop_select + * nh: change nh_select so that it takes a family not a nh_type + * nh: use nexthop_select instead of nh_select + * mpls: make mpls_label_table_lookup return a next_hop ptr + * mpls: make mpls_unlabeled_input return a struct next_hop * + * mpls: modify mpls_oam_v4_lookup to use a next_hop instead of a union + * mpls: change mpls_label_table_ins_lbl_internal to take a next_hop ptr + * mpls: change mpls_label_table_insert_label to take a next_hop ptr + * mpls: change mpls_label_table_add_reserved_labels to use a next_hop ptr + * mpls: nh_fwd_mpls should take a struct next_hop ptr + * mpls: change nh_eth_output_mpls to take a struct next_hop ptr + * mpls: change nh_mpls_ip_fragment to take a struct next_hop ptr + * mpls: change nh_mpls_forward to take a struct next_hop ptr + * mpls: change mpls_labeled_forward to use a struct next_hop ptr + * mpls: change mpls_unlabeled_forward to use a struct next_hop ptr + * ecmp: change ecmp_mpls_create to return a struct next_hop ptr + * mpls: change mpls_route_change to use a struct next_hop ptr + * crypto: make crypto_policy_check_outbound take a struct next_hop ** + * l3_v4_ipsec: use struct next_hop instead of the nh union + * l3_v6_ipsec: use struct next_hop instead of the nh union + * shadow: use struct next_hop instead of the nh union nin spath_reader + * nh: remove union next_hop_v4_or_v6_ptr as it is no longer used + * crypto: tidy up code in crypto_policy_check_outbound + * crypto: remove common code in crypto_policy_feat_attach_by_reqid + * crypto: remove common code in policy_bind_feat_attach + * crypto: remove common code in policy_rule_to_json + * shadow: commonise next_hop code in spath_reader + * nh_common: make some of the nexthop functions static + * nh: move the final funcs from nh.c and delete the file + * mpls: move nh_fwd_ret into mpls_forward.c as that is the only user + * nh: remove definition of NH_STRING_MAX as it is unused + * mpls: remove duplicate code in mpls_route_change + * nh_common: move enum nh_type into nh_common.h + * nh_common: include mpls.h as it uses the outlabels defined there + * nh_common: include ip_addr.h as it uses the ip_addr defined there + * route: include if_llatbl.h as it use symbols defined there + * l3_v4_encap: include if_llatbl.h as it use symbols defined there + * nh: move enum nh_type to nh_common.h and remove nh.h + * nh_common: rename next_hop_u to next_hop_list + + -- Paul Atkins Fri, 01 May 2020 10:11:41 +0100 + +vyatta-dataplane (3.10.7) unstable; urgency=medium + + [ Nicholas Brown ] + * Git Ignore generated library files + + [ Robert Shearman ] + * vlan_modify: fix json writing unwinding for no interfaces + (Fixes: VRVDR-50839) + * fal: add FAL next-hop attributes for PIC Edge (Closes: VRVDR-50739) + * fal: add memory management functions + * if: use RCU for interface fal_l3 field + * storm_control: use RCU for instance sci_fal_obj array + * tests: use FAL memory helpers in test plugin + + [ Paul Atkins ] + + -- Paul Atkins Fri, 01 May 2020 08:19:56 +0100 + +vyatta-dataplane (3.10.6) unstable; urgency=medium + + [ Paul Atkins ] + * ut: change the size of the rings on the tx/rx interfaces + * ut: provide apis for injecting and getting tx'ed packets + * ut: add a new test for qos bursts + + [ Robert Shearman ] + * vlan_modify: don't create filter chain unless there's an action we handle + (Fixes: VRVDR-50709) + * vlan_modify: remove some noisy log messages (Fixes: VRVDR-50711) + + [ Mike Larson ] + * Protobuf support files need to be exported for plug-in + * Move install location for protobuf generated files + * Export more UT functions/headers + * Update pkg-config path for dev + * Set up dependency correctly for proto projects + + [ Paul Atkins ] + * route: make the gateway in a next hop a union of v4/v6 + * route: change order of fields in struct next_hop_u + * mpls: include stdbool.h as the header file uses bool + * nh_common: add a new nh_common file to contain core nh code + * route6: make the v6 route code use the common 'struct next_hop' + * nh_common: move struct next_hop_u into nh_common.h + * nh_common: remove struct next_hop_v6_u, use the v4/v6 version + * nh_common: move the struct nexthop_hash_key to nh_common header file + * route6: use the common nexthop_hash_key structure + * nh_common: move struct nexthop_table into nh_common.h + * route6: use the common nexthop table definition + * nh_common: add code to allow registration per AF + * nh_common: add common funcs to get/set ifp from nh + * route6: move route_v6_init and route_v6_uninit lower in file + * route: register hash functions with nh_common + * route6: register hash functions with nh_common + * route: don't use global nh_tbl var from nexthop_new + * route: modify the debug in nexthop_reuse to add the af + * nh_common: make the nexthop_lookup function common + * route6: use the common nexthop_lookup function + * fal: use a common version of next_hop_to_packet_action + * fal: use a common version of next_hop_group_packet_action + * fal: use a common version of next_hop_to_attr_list + * fal: use a common version of fal_ip_new_next_hops + * nh_common: use a common version of nexthop_reuse + * nh_common: use a common version of nexthop_hash_insert + * route6: pass an address family into nexthop6_new + * route6: use NEXTHOP_HASH_TBL_SIZE instead of the v6 specific version + * nh_common: use a common version of nexthop_alloc + * nh_common: use a common version of nexthop_destroy + * route6: change nexthop6_new to take a proto field + * nh_common: use a common version of nexthop_new + * route: modify nexthop_create to take a struct ip_addr for the gateway + * route6: modify nexthop6_create to take a struct ip_addr for the gateway + * nh_common: use a common version of nexthop_create + * fal: use a common version of fal_ip_del_next_hops + * nh_common: use a common version of nh_is_neigh_present + * nh_common: use a common version of nh_is_neigh_created + * nh_common: use a common version of nh_get_lle + * route: add a family parameter to nexthop_put + * route6: add a family parameter to nexthop6_put + * nh_common: use a common version of nexthop_put + * nh_common: use a common version of nexthop_create_copy + * route: add a family parameter to nexthop_hash_del_add + * route6: add a family parameter to nexthop6_hash_del_add + * nh_common: use a common version of nexthop_hash_del_add + * nh_common: use a common version of nh_is_connected + * nh_common: use a common version of nh_is_local + * nh_common: use a common version of nh_is_gw + * route: add a family parameter to nh4_set_neigh_present + * route6: add a family parameter to nh6_set_neigh_present + * nh_common: use a common version of nh_set_neigh_present + * route: add a family parameter to nh4_clear_neigh_present + * route6: add a family parameter to nh6_clear_neigh_present + * nh_common: use a common version of nh_clear_neigh_present + * route: add a family parameter to nh4_set_neigh_created + * route6: add a family parameter to nh6_set_neigh_created + * nh_common: use a common version of nh_set_neigh_created + * route: add a family parameter to nh4_clear_neigh_created + * route6: add a family parameter to nh6_clear_neigh_created + * nh_common: use a common version of nh_clear_neigh_created + * nh_common: use a common version of nextu_nc_count + * nh_common: use a common version of nextu_find_path_using_ifp + * nh_common: use a common version of nextu_is_any_connected + * route: add a family parameter to route_nh_replace + * route: add a family parameter to route6_nh_replace + * fal: move next_hop_group_packet_action higher in file + * fal: use next_hop_group_packet_action when creating new ip nhs + + -- Paul Atkins Wed, 29 Apr 2020 08:42:25 +0100 + +vyatta-dataplane (3.10.5) unstable; urgency=medium + + [ Srinivas Narayan ] + * Refactor use of grouper2 functions into separate module + * rte-acl: determine ruleset size at creation time + * rte-acl: Add packet matching abstraction + * rte-acl: migrate ruleset to packet match abstraction API + * rte-acl: Rename npf_grouper_cb_data and make it public + * rte-acl: Add rte-acl based implementation of packet matching callbacks + * rte-acl: Set up crypto callbacks for using rte-acl + * rte-acl: Pass rule group as part of the context to match function + * rte-acl: Add API to find rule in a group + * rte-acl: Add API to determine if ruleset uses cache + * rte-acl: Update crypto callback for match API + * rte-acl: Use NPF cache only if ruleset requires it + * rte-acl: skip using npf-cache if ruleset doesn't rely on it + * rte-acl: Only invoke classifier for non-empty rulesets + * rte-acl: Streamline call flow in npf_ruleset_inspect + + [ Paul Atkins ] + + -- Paul Atkins Mon, 27 Apr 2020 08:09:24 +0100 + +vyatta-dataplane (3.10.4) unstable; urgency=medium + + * Revert "Protobuf support files need to be exported for plug-in" + * Revert "Move install location for protobuf generated files" + * Revert "Export more UT functions/headers" + * Revert "Update pkg-config path for dev" + + -- Paul Atkins Fri, 24 Apr 2020 09:18:28 +0100 + +vyatta-dataplane (3.10.3) unstable; urgency=medium + + [ Paul Atkins ] + * ut: mark dp_test_crypto_perf_scale tests as DONT_RUN + + [ Mike Larson ] + * Protobuf support files need to be exported for plug-in + * Move install location for protobuf generated files + * Export more UT functions/headers + * Update pkg-config path for dev + + [ Paul Atkins ] + + -- Paul Atkins Fri, 24 Apr 2020 08:21:26 +0100 + +vyatta-dataplane (3.10.2) unstable; urgency=medium + + [ Mark Gillott ] + * pcap: run FAL updates on master thread (Fixes: VRVDR-50581) + + [ Paul Atkins ] + * 3.9.105 + + [ Nicholas Brown ] + * master branch is targeting 2005 release + * Restore .gitlint file + * Identify hidden files that should not be ignored + + [ Gavin Shearer ] + * l3acl: don't commit rules to HW on event IF_FEAT_MODE_EVENT_L3_ENABLED + + [ Ian Wilson ] + * cgnat: Obsolete some error counts, and add echo-req count to summary + + [ Nicholas Brown ] + * Remove copyright and license assertion output + + [ Paul Atkins ] + * ipv4_rsmbl: If we detect duplicate fragments then clean up properly + * ipv4_rsmbl: check all previous frags to determine duplicates + + [ Srinivas Narayan ] + * ipsec-ut: Update crypto UTs to support policy count verification + * ipsec-ut: Force NPF cleanup at the end of s2s suites + * ipsec-ut: Add NPF cleanup calls to multi-tunnel tests + * ipsec-ut: Add a test to measure time to setup/teardown 500 tunnels + * ipsec-ut: Increase poll interval for crypto policy display + * crypto: Add 'brief' option to 'ipsec spd' command + * crypto: Add total and live policy counts + * ipsec-ut: Update UT to use 'brief' cmd and live policy count + * UT: Add API to specify polling interval for json state + * ipsec-ut: Update polling interval and count based on new API + + [ Paul Atkins ] + * ipv4_rsmbl: drop fragment if it includes previously rx'ed bytes + * ut: enhance the ipv4 duplicate fragment tests + * ipv6_rsmbl: If we detect duplicate fragments then clean up properly + * ipv6_rsmbl: check all previous frags to determine duplicates + * ipv6_rsmbl: drop fragment if it includes previously rx'ed bytes + * if: make if_output_features always inline + * l2_vlan_mod: rename the vlan_mod pipline feature file + * l2_vlan_mod: add a new pipeline node for egress vlan modify + * portmonitor: add a new pipeline node for output portmonitor + * capture: add a new pipeline node for output capture + * if: split if_output into an internal and external version + * if: remove if_output_features and call the feat point directly + * if: make the pipeline call if_output_internal + * 3.9.106 + + [ Ian Wilson ] + * npf: Optimal address-group show output including host addresses + + [ Robert Shearman ] + * pipeline: use correct ifp for egress vlan modify feature + (Fixes: VRVDR-50708) + + [ Paul Atkins ] + * 3.9.107 + + -- Paul Atkins Tue, 21 Apr 2020 09:40:00 +0100 + +vyatta-dataplane (3.10.1) unstable; urgency=medium + + [ Nicholas Brown ] + * master-next branch is targeting danos project + + [ Srinivas Narayan ] + * crypto: Increase force commit count to 2000 + + [ Paul Atkins ] + + -- Paul Atkins Wed, 08 Apr 2020 13:36:41 +0100 + +vyatta-dataplane (3.9.112) unstable; urgency=medium + + [ Nicholas Brown ] + * Add new public API dp_pipeline_is_feature_enabled_by_inst() + + [ Paul Atkins ] + + -- Paul Atkins Wed, 06 May 2020 09:18:57 +0100 + +vyatta-dataplane (3.9.111) unstable; urgency=medium + + [ Charles (Chas) Williams ] + * vhost: fix QMP communication (Bugfix: VRVDR-50745) + + [ Paul Atkins ] + + -- Paul Atkins Wed, 22 Apr 2020 16:21:13 +0100 + +vyatta-dataplane (3.9.110) unstable; urgency=medium + + [ Mark Gillott ] + * Register event operations only on first use (Fixes: VRVDR-50621) + * Capture portmonitor replay errors (Fixes: VRVDR-50621) + + [ Paul Atkins ] + + -- Paul Atkins Tue, 21 Apr 2020 07:45:42 +0100 + +vyatta-dataplane (3.9.109) unstable; urgency=medium + + [ Charles (Chas) Williams ] + * crypto: count burst buffer full as drops (Bugfix: VRVDR-50279) + * crypto: count most errors as proto drops (Bugfix: VRVDR-50279) + * crypto: eliminate macro usage (Bugfix: VRVDR-50279) + * crypto: count packets against tunnel interface (Bugfix: VRVDR-50279) + * crypto: do not count failed packets (Bugfix: VRVDR-50279) + + [ Paul Atkins ] + + -- Paul Atkins Tue, 21 Apr 2020 07:43:38 +0100 + +vyatta-dataplane (3.9.108) unstable; urgency=medium + + [ Gavin Shearer ] + * alg: ensure parent session active before linking child + + [ Paul Atkins ] + + -- Paul Atkins Tue, 21 Apr 2020 07:42:23 +0100 + +vyatta-dataplane (3.9.107) unstable; urgency=medium + + [ Ian Wilson ] + * npf: Optimal address-group show output including host addresses + + [ Robert Shearman ] + * pipeline: use correct ifp for egress vlan modify feature + (Fixes: VRVDR-50708) + + [ Paul Atkins ] + + -- Paul Atkins Fri, 17 Apr 2020 11:20:45 +0100 + +vyatta-dataplane (3.9.106) unstable; urgency=medium + + [ Nicholas Brown ] + * master branch is targeting 2005 release + * Restore .gitlint file + * Identify hidden files that should not be ignored + + [ Gavin Shearer ] + * l3acl: don't commit rules to HW on event IF_FEAT_MODE_EVENT_L3_ENABLED + + [ Ian Wilson ] + * cgnat: Obsolete some error counts, and add echo-req count to summary + + [ Nicholas Brown ] + * Remove copyright and license assertion output + + [ Paul Atkins ] + * ipv4_rsmbl: If we detect duplicate fragments then clean up properly + * ipv4_rsmbl: check all previous frags to determine duplicates + * ipv4_rsmbl: drop fragment if it includes previously rx'ed bytes + * ut: enhance the ipv4 duplicate fragment tests + * ipv6_rsmbl: If we detect duplicate fragments then clean up properly + * ipv6_rsmbl: check all previous frags to determine duplicates + * ipv6_rsmbl: drop fragment if it includes previously rx'ed bytes + * if: make if_output_features always inline + * l2_vlan_mod: rename the vlan_mod pipline feature file + * l2_vlan_mod: add a new pipeline node for egress vlan modify + * portmonitor: add a new pipeline node for output portmonitor + * capture: add a new pipeline node for output capture + * if: split if_output into an internal and external version + * if: remove if_output_features and call the feat point directly + * if: make the pipeline call if_output_internal + + -- Paul Atkins Thu, 16 Apr 2020 12:34:30 +0100 + +vyatta-dataplane (3.9.105) unstable; urgency=medium + + [ Mark Gillott ] + * pcap: run FAL updates on master thread (Fixes: VRVDR-50581) + + [ Paul Atkins ] + + -- Paul Atkins Wed, 08 Apr 2020 07:11:28 +0100 + +vyatta-dataplane (3.9.104) unstable; urgency=medium + + * DANOS Import master + + -- Nicholas Brown Tue, 07 Apr 2020 13:26:26 +0100 + vyatta-dataplane (3.7.86.1.4) unstable; urgency=medium * DANOS Import diff --git a/debian/control b/debian/control index f13a3fe4..f233451e 100644 --- a/debian/control +++ b/debian/control @@ -3,47 +3,106 @@ Section: non-free/net Priority: optional Maintainer: Vyatta Package Maintainers Standards-Version: 3.9.8 -Build-Depends: debhelper (>= 9), dh-autoreconf, pkg-config, - libdpdk-dev (>= 18.11-0vyatta4), libnuma-dev, - libinih-dev, - libmnl-dev, libcap-dev, liburcu-dev (>= 0.8.0), - libzmq3-dev (>= 4.0.4), libbsd-dev, libczmq-dev, - libxen-dev, libosip2-dev, - cpputest, check, lcov, - libpcap-dev (>= 1.7.0), libjson-c-dev, - libssl-dev, debhelper (>= 9.20160709) | dh-systemd (>= 10.2.4), - autoconf-archive (>= 20150925), - libsystemd-dev, libvyatta-dpdk-swport-dev (>= 0.1.15), - valgrind , - python3, protobuf-c-compiler, libprotobuf-c-dev, - protobuf-compiler, perl (>= 5.8.8), - libgoogle-protocolbuffers-perl, - bvnos-linux-libc-dev-vyatta (>> 4.19.16-0vyatta6), +Build-Depends: meson, + bvnos-linux-libc-dev-vyatta (>> 4.19.16-0vyatta6), + check , + debhelper (>= 9.20160709), + dh-exec, + golang-goprotobuf-dev, + lcov , + libbsd-dev, + libcap-dev, + libczmq-dev, + libdpdk-dev (>= 19.11.4-0vyatta16), + libgoogle-protocolbuffers-perl, + libinih-dev, + libjson-c-dev , + libmnl-dev, + libndpi-dev (>= 3.4), + libnuma-dev, + libosip2-dev, + libpcap-dev (>= 1.7.0), + libprotobuf-c-dev, + libprotobuf-dev, + libssl-dev, + libsystemd-dev, + liburcu-dev (>= 0.8.0), + libvyatta-dpdk-swport-dev (>= 0.1.23), + libzmq3-dev (>= 4.0.4), + perl (>= 5.8.8), + pkg-config, + protobuf-c-compiler, + protobuf-compiler, + python3, Package: vyatta-dataplane Section: non-free/net Architecture: any Recommends: dpdk-modules -Depends: libconfig-tiny-perl, libfile-slurp-perl, - adduser, dmidecode [amd64], - lsb-base (>= 3.0-6), rsyslog, vyatta-cpu-shield (>> 0.2.6), - vyatta-platform-util, - ${perl:Depends}, ${shlibs:Depends}, ${misc:Depends} -Provides: fal-policer-clear-stats, - fal-router-interface, - fal-vlan-feat-create, - fal-port-attr-hw-mirroring, +Depends: adduser, + dmidecode [amd64], + libconfig-tiny-perl, + libfile-slurp-perl, + librte-meta-allpmds, + lsb-base (>= 3.0-6), + mstflint, + rsyslog, + rte-acl-rebuild-support, + vyatta-cpu-shield (>> 0.2.6), + vyatta-platform-util, + libvyatta-vplane, + ${misc:Depends}, + ${perl:Depends}, + ${shlibs:Depends} +Breaks: vplane-controller (<< 3.6.8), + vyatta-poe (<< 0.3), + vyatta-service-portmonitor-v1-yang (<< 3.8) +Provides: fal-acl, + fal-acl-commit, fal-backplane, - fal-qos-hw-info, - fal-tun, + fal-bfd-hw, + fal-bfd-hw-nego, + fal-bfd-hw-mode, + fal-br-walk-neigh, + fal-buffer-errors, + fal-cmd-ret, + fal-ipmc, + fal-l2-upd-port-status, + fal-lag, + fal-nh-router-intf, + fal-pause, + fal-packet-capture, + fal-policer-clear-stats, + fal-port-attr-hw-mirroring, fal-ptp, + fal-ptp-additional-path, + fal-ptp-g8275p2, + fal-ptp-g8275p1, fal-ptp-peer-resolution, - fal-acl, - fal-ipmc, - fal-acl-commit, - fal-cmd-ret, - fal-br-walk-neigh, + fal-synce, + fal-qos-hw-info, + fal-qos-mark-map-dp, + fal-qos-queue-designation, + fal-rif-stats, + fal-route-walk, + fal-router-interface, + fal-tun, fal-tun-dscp, + fal-vlan-feat-create, + fal-deferred-free, + fal-mac-limit, + fal-br-vlan-set-empty, + fal-bfd-max-interval-cnt, + fal-qos-egressmap, + fal-cpp-pim-ip-mc, + fal-mirror-vlan, + fal-bfd-sess-id, + fal-acl-rule-no, + fal-bfd-session-dump, + fal-qos-dscp-egressmap, + fal-route-vrf-obj, + fal-neigh-rtr-intf-obj, + fal-ret-plugin-consumed, Description: Vyatta optimized dataplane Vyatta dataplane is the set of tools to provide performance optimized routing and forwarding. It supports IPv4, IPv6, firewalling, bridging and more. @@ -51,10 +110,29 @@ Description: Vyatta optimized dataplane Package: vyatta-dataplane-dev Section: non-free/devel Architecture: any -Depends: ${misc:Depends} +Depends: check, + libdpdk-dev (>= 19.11.4-0vyatta16), + libjson-c-dev, + libvyattafal-dev, + libvyatta-dataplane-proto-dev, + libprotobuf-c-dev, + libvyatta-jsonw1 (= ${binary:Version}), + ${misc:Depends}, + ${shlibs:Depends} +Provides: dp-pktmbuf-set-vrf, Description: Vyatta dataplane pipeline node build support Set of headers required for dataplane compilation +Package: vyatta-dataplane-test +Section: non-free/devel +Architecture: any +Build-Profiles: +Depends: librte-meta-allpmds, + ${misc:Depends}, + ${shlibs:Depends} +Description: Vyatta dataplane test binary + A test binary used for running unit tests + Package: libvyatta-dataplane-proto-support Section: non-free/devel Architecture: all @@ -65,7 +143,10 @@ Description: Vyatta dataplane protocol buffer files and support Package: vyatta-dataplane-sample-plugin Section: non-free/net Architecture: any -Depends: vyatta-dataplane, ${shlibs:Depends}, ${misc:Depends} +Build-Profiles: +Depends: vyatta-dataplane | vyatta-dataplane-test, + ${misc:Depends}, + ${shlibs:Depends} Description: Vyatta dataplane sample plugin library A sample pipeline plugin for the Vyatta dataplane that illustrates how a plugin can work. @@ -73,33 +154,62 @@ Description: Vyatta dataplane sample plugin library Package: libvyattafal-dev Section: non-free/libdevel Architecture: any -Depends: ${misc:Depends}, libdpdk-dev -Provides: fal-dev-policer-clear-stats, - fal-dev-router-interface, - fal-dev-vlan-feat-create, - fal-dev-vlan-rx-stats-range, - fal-dev-port-attr-hw-mirroring, +Depends: libdpdk-dev (>= 19.11.4-0vyatta16), ${misc:Depends} +Provides: fal-dev-acl, fal-dev-backplane, - fal-dev-qos-hw-info, + fal-dev-bfd-hw, + fal-dev-bfd-hw-nego, + fal-dev-bfd-hw-mode, + fal-dev-br-walk-neigh, + fal-dev-cmd-ret, fal-dev-cpp-rl, + fal-dev-ipmc, + fal-dev-gpc, + fal-dev-l2-upd-port-status, + fal-dev-l3-dump, + fal-dev-lag, + fal-dev-nh-router-intf, + fal-dev-packet-capture, + fal-dev-pause, + fal-dev-policer-clear-stats, + fal-dev-policer-colour-aware, + fal-dev-port-attr-hw-mirroring, + fal-dev-ptp, + fal-dev-ptp-additional-path, + fal-dev-ptp-g8275p2, + fal-dev-ptp-g8275p1, + fal-dev-synce, + fal-dev-qos-hw-info, fal-dev-qos-incremental-stats, + fal-dev-qos-mark-map-dp, + fal-dev-qos-queue-designation, + fal-dev-rif-stats, + fal-dev-route-walk, + fal-dev-router-interface, fal-dev-tun, - fal-dev-ptp, - fal-dev-acl, - fal-dev-ipmc, - fal-dev-cmd-ret, - fal-dev-br-walk-neigh, fal-dev-tun-dscp, + fal-dev-vlan-feat-create, + fal-dev-vlan-rx-stats-range, + fal-dev-l3-nh-usability, + fal-dev-deferred-free, + fal-dev-mac-limit, + fal-dev-br-vlan-set-empty, + fal-dev-bfd-max-interval-cnt, + fal-dev-qos-egressmap, + fal-dev-cpp-pim-ip-mc, + fal-dev-mirror-vlan, + fal-dev-bfd-sess-id, + fal-dev-capture-get-stats, + fal-dev-l2-port-dump, + fal-dev-bfd-session-dump, + fal-dev-qos-dscp-egressmap, + fal-dev-invalid-vrf-id, + fal-dev-route-vrf-obj, + fal-dev-neigh-rtr-intf-obj, + fal-dev-ret-plugin-consumed, Description: Forwarding Abstraction Library plugin development files An API for dataplane FAL plugins -Package: vyatta-dataplane-dbg -Section: non-free/debug -Architecture: any -Depends: vyatta-dataplane (= ${binary:Version}), ${misc:Depends} -Description: Vyatta dataplane debug symbols - Debug symbols for dataplane - Package: vyatta-dataplane-protocols-versions Architecture: all Depends: ${misc:Depends} @@ -108,3 +218,36 @@ Description: dataplane runtime protocols versions tracking meta-package This meta-package will provide a virtual package for each of the runtime protocols spoken by vyatta-dataplane, including their versions. + +Package: golang-github-danos-vyatta-dataplane-protobuf-dev +Architecture: all +Depends: golang-goprotobuf-dev, ${misc:Depends} +Description: Provides Go language bindings for the datplane API. + Go bindings for the dataplane API. + +Package: libvyatta-dataplane-proto1 +Architecture: any +Depends: ${misc:Depends}, ${shlibs:Depends} +Multi-Arch: same +Description: Provides C++ language bindings for the dataplane API. + C++ bindings for the dataplane API. + +Package: libvyatta-vplane +Architecture: any +Depends: ${misc:Depends}, ${perl:Depends}, ${shlibs:Depends} +Multi-Arch: same +Description: Provides Vyatta Vplane.pm modules + Perl modules that supports system initialization for the dataplane. + +Package: libvyatta-dataplane-proto-dev +Section: non-free/libdevel +Architecture: any +Depends: libvyatta-dataplane-proto1 (= ${binary:Version}), ${misc:Depends} +Description: Provides C++ headers for the dataplane API. + C++ headers for the dataplane API. + +Package: libvyatta-jsonw1 +Architecture: any +Depends: ${misc:Depends}, ${shlibs:Depends} +Description: Lightweight JSON writer library. + Lighweight, simple JSON writer library. diff --git a/debian/copyright b/debian/copyright index 165b9424..f218782a 100644 --- a/debian/copyright +++ b/debian/copyright @@ -15,7 +15,7 @@ Copyright: 2012 Paolo Borelli 2012 Dan Winship 2015 Bastien ROUCARIES -License: LGPLv2.1+ +License: LGPL-2.1+ This package is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; @@ -32,20 +32,6 @@ License: LGPLv2.1+ On Debian systems, the complete text of the GNU Lesser General Public License can be found in "/usr/share/common-licenses/LGPL-2.1". -Files: - m4/ax_cxx_compile_stdcxx.m4 - m4/ax_valgrind_check.m4 -Copyright: - 2008 Benjamin Kosnik - 2012 Zack Weinberg - 2013 Roy Stogner - 2014, 2015 Google Inc.; contributed by Alexey Sokolov - 2014, 2015, 2016 Philip Withnall - 2015 Paul Norman - 2015 Moritz Klammler -License: FSFAP - Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without any warranty. - Files: src/ip_id.c src/npf/alg/npf_alg_private.c @@ -142,6 +128,11 @@ Copyright: 2017-2019, AT&T Intellectual Property. License: LGPL-2.1-only and BSD-3-Clause +Files: + protobuf/go/*.go +Copyright: + 2019 AT&T Intellectual Property. All rights reserved. +License: MPL-2.0 License: LGPL-2.1-only This package is free software; you can redistribute it and/or @@ -202,3 +193,283 @@ License: BSD-3-Clause 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +License: MPL-2.0 + Mozilla Public License Version 2.0 + . + 1. Definitions + 1.1. "Contributor" means each individual or legal entity that creates, + contributes to the creation of, or owns Covered Software. + 1.2. "Contributor Version" means the combination of the Contributions + of others (if any) used by a Contributor and that particular Contributor's + Contribution. + 1.3. "Contribution" means Covered Software of a particular + Contributor. + 1.4. "Covered Software" means Source Code Form to which the initial + Contributor has attached the notice in Exhibit A, the Executable Form of such + Source Code Form, and Modifications of such Source Code Form, in each case + including portions thereof. + 1.5. "Incompatible With Secondary Licenses" means + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + (b) that the Covered Software was made available under the terms + of version 1.1 or earlier of the License, but not also under the terms of a + Secondary License. + 1.6. "Executable Form" means any form of the work other than Source + Code Form. + 1.7. "Larger Work" means a work that combines Covered Software with + other material, in a separate file or files, that is not Covered Software. + 1.8. "License" means this document. + 1.9. "Licensable" means having the right to grant, to the maximum + extent possible, whether at the time of the initial grant or subsequently, any + and all of the rights conveyed by this License. + 1.10. "Modifications" means any of the following: + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered Software; or + (b) any new file in Source Code Form that contains any Covered + Software. + 1.11. "Patent Claims" of a Contributor means any patent claim(s), + including without limitation, method, process, and apparatus claims, in any + patent Licensable by such Contributor that would be infringed, but for the + grant of the License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its Contributor + Version. + 1.12. "Secondary License" means either the GNU General Public License, + Version 2.0, the GNU Lesser General Public License, Version 2.1, the GNU + Affero General Public License, Version 3.0, or any later versions of those + licenses. + 1.13. "Source Code Form" means the form of the work preferred for + making modifications. + 1.14. "You" (or "Your") means an individual or a legal entity + exercising rights under this License. For legal entities, "You" includes any + entity that controls, is controlled by, or is under common control with You. + For purposes of this definition, "control" means (a) the power, direct or + indirect, to cause the direction or management of such entity, whether by + contract or otherwise, or (b) ownership of more than fifty percent (50%) of + the outstanding shares or beneficial ownership of such entity. + 2. License Grants and Conditions + 2.1. Grants + . + Each Contributor hereby grants You a world-wide, royalty-free, + non-exclusive license: + (a) under intellectual property rights (other than patent or + trademark) Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its Contributions, + either on an unmodified basis, with Modifications, or as part of a Larger + Work; and + (b) under Patent Claims of such Contributor to make, use, sell, + offer for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + 2.2. Effective Date + . + The licenses granted in Section 2.1 with respect to any Contribution + become effective for each Contribution on the date the Contributor first + distributes such Contribution. + 2.3. Limitations on Grant Scope + . + The licenses granted in this Section 2 are the only rights granted + under this License. No additional rights or licenses will be implied from the + distribution or licensing of Covered Software under this License. + Notwithstanding Section 2.1(b) above, no patent license is granted by a + Contributor: + (a) for any code that a Contributor has removed from Covered + Software; or + (b) for infringements caused by: (i) Your and any other third + party's modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor Version); + or + (c) under Patent Claims infringed by Covered Software in the + absence of its Contributions. + . + This License does not grant any rights in the trademarks, service + marks, or logos of any Contributor (except as may be necessary to comply with + the notice requirements in Section 3.4). + 2.4. Subsequent Licenses + . + No Contributor makes additional grants as a result of Your choice to + distribute the Covered Software under a subsequent version of this License + (see Section 10.2) or under the terms of a Secondary License (if permitted + under the terms of Section 3.3). + 2.5. Representation + . + Each Contributor represents that the Contributor believes its + Contributions are its original creation(s) or it has sufficient rights to + grant the rights to its Contributions conveyed by this License. + 2.6. Fair Use + . + This License is not intended to limit any rights You have under + applicable copyright doctrines of fair use, fair dealing, or other + equivalents. + 2.7. Conditions + . + Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted + in Section 2.1. + 3. Responsibilities + 3.1. Distribution of Source Form + . + All distribution of Covered Software in Source Code Form, including + any Modifications that You create or to which You contribute, must be under + the terms of this License. You must inform recipients that the Source Code + Form of the Covered Software is governed by the terms of this License, and how + they can obtain a copy of this License. You may not attempt to alter or + restrict the recipients' rights in the Source Code Form. + 3.2. Distribution of Executable Form + . + If You distribute Covered Software in Executable Form then: + (a) such Covered Software must also be made available in Source + Code Form, as described in Section 3.1, and You must inform recipients of the + Executable Form how they can obtain a copy of such Source Code Form by + reasonable means in a timely manner, at a charge no more than the cost of + distribution to the recipient; and + (b) You may distribute such Executable Form under the terms of + this License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter the + recipients' rights in the Source Code Form under this License. + 3.3. Distribution of a Larger Work + . + You may create and distribute a Larger Work under terms of Your + choice, provided that You also comply with the requirements of this License + for the Covered Software. If the Larger Work is a combination of Covered + Software with a work governed by one or more Secondary Licenses, and the + Covered Software is not Incompatible With Secondary Licenses, this License + permits You to additionally distribute such Covered Software under the terms + of such Secondary License(s), so that the recipient of the Larger Work may, at + their option, further distribute the Covered Software under the terms of + either this License or such Secondary License(s). + 3.4. Notices + . + You may not remove or alter the substance of any license notices + (including copyright notices, patent notices, disclaimers of warranty, or + limitations of liability) contained within the Source Code Form of the Covered + Software, except that You may alter any license notices to the extent required + to remedy known factual inaccuracies. + 3.5. Application of Additional Terms + . + You may choose to offer, and to charge a fee for, warranty, support, + indemnity or liability obligations to one or more recipients of Covered + Software. However, You may do so only on Your own behalf, and not on behalf of + any Contributor. You must make it absolutely clear that any such warranty, + support, indemnity, or liability obligation is offered by You alone, and You + hereby agree to indemnify every Contributor for any liability incurred by such + Contributor as a result of warranty, support, indemnity or liability terms You + offer. You may include additional disclaimers of warranty and limitations of + liability specific to any jurisdiction. + 4. Inability to Comply Due to Statute or Regulation + . + If it is impossible for You to comply with any of the terms of this + License with respect to some or all of the Covered Software due to statute, + judicial order, or regulation then You must: (a) comply with the terms of this + License to the maximum extent possible; and (b) describe the limitations and + the code they affect. Such description must be placed in a text file included + with all distributions of the Covered Software under this License. Except to + the extent prohibited by statute or regulation, such description must be + sufficiently detailed for a recipient of ordinary skill to be able to + understand it. + 5. Termination + 5.1. The rights granted under this License will terminate + automatically if You fail to comply with any of its terms. However, if You + become compliant, then the rights granted under this License from a particular + Contributor are reinstated (a) provisionally, unless and until such + Contributor explicitly and finally terminates Your grants, and (b) on an + ongoing basis, if such Contributor fails to notify You of the non-compliance + by some reasonable means prior to 60 days after You have come back into + compliance. Moreover, Your grants from a particular Contributor are reinstated + on an ongoing basis if such Contributor notifies You of the non-compliance by + some reasonable means, this is the first time You have received notice of + non-compliance with this License from such Contributor, and You become + compliant prior to 30 days after Your receipt of the notice. + 5.2. If You initiate litigation against any entity by asserting a + patent infringement claim (excluding declaratory judgment actions, + counter-claims, and cross-claims) alleging that a Contributor Version directly + or indirectly infringes any patent, then the rights granted to You by any and + all Contributors for the Covered Software under Section 2.1 of this License + shall terminate. + 5.3. In the event of termination under Sections 5.1 or 5.2 above, all + end user license agreements (excluding distributors and resellers) which have + been validly granted by You or Your distributors under this License prior to + termination shall survive termination. + 6. Disclaimer of Warranty + . + Covered Software is provided under this License on an "as is" basis, + without warranty of any kind, either expressed, implied, or statutory, + including, without limitation, warranties that the Covered Software is free of + defects, merchantable, fit for a particular purpose or non-infringing. The + entire risk as to the quality and performance of the Covered Software is with + You. Should any Covered Software prove defective in any respect, You (not any + Contributor) assume the cost of any necessary servicing, repair, or + correction. This disclaimer of warranty constitutes an essential part of this + License. No use of any Covered Software is authorized under this License + except under this disclaimer. + 7. Limitation of Liability + . + Under no circumstances and under no legal theory, whether tort (including + negligence), contract, or otherwise, shall any Contributor, or anyone who + distributes Covered Software as permitted above, be liable to You for any + direct, indirect, special, incidental, or consequential damages of any + character including, without limitation, damages for lost profits, loss of + goodwill, work stoppage, computer failure or malfunction, or any and all other + commercial damages or losses, even if such party shall have been informed of + the possibility of such damages. This limitation of liability shall not apply + to liability for death or personal injury resulting from such party's + negligence to the extent applicable law prohibits such limitation. Some + jurisdictions do not allow the exclusion or limitation of incidental or + consequential damages, so this exclusion and limitation may not apply to You. + 8. Litigation + . + Any litigation relating to this License may be brought only in the courts + of a jurisdiction where the defendant maintains its principal place of + business and such litigation shall be governed by laws of that jurisdiction, + without reference to its conflict-of-law provisions. Nothing in this Section + shall prevent a party's ability to bring cross-claims or counter-claims. + 9. Miscellaneous + . + This License represents the complete agreement concerning the subject + matter hereof. If any provision of this License is held to be unenforceable, + such provision shall be reformed only to the extent necessary to make it + enforceable. Any law or regulation which provides that the language of a + contract shall be construed against the drafter shall not be used to construe + this License against a Contributor. + 10. Versions of the License + 10.1. New Versions + . + Mozilla Foundation is the license steward. Except as provided in + Section 10.3, no one other than the license steward has the right to modify or + publish new versions of this License. Each version will be given a + distinguishing version number. + 10.2. Effect of New Versions + . + You may distribute the Covered Software under the terms of the version + of the License under which You originally received the Covered Software, or + under the terms of any subsequent version published by the license steward. + 10.3. Modified Versions + . + If you create software not governed by this License, and you want to + create a new license for such software, you may create and use a modified + version of this License if you rename the license and remove any references to + the name of the license steward (except to note that such modified license + differs from this License). + 10.4. Distributing Source Code Form that is Incompatible With + Secondary Licenses + . + If You choose to distribute Source Code Form that is Incompatible With + Secondary Licenses under the terms of this version of the License, the notice + described in Exhibit B of this License must be attached. + . + Exhibit A - Source Code Form License Notice + . + This Source Code Form is subject to the terms of the Mozilla Public License, + v. 2.0. If a copy of the MPL was not distributed with this file, You can + obtain one at http://mozilla.org/MPL/2.0/. + . + If it is not possible or desirable to put the notice in a particular file, + then You may include the notice in a location (such as a LICENSE file in a + relevant directory) where a recipient would be likely to look for such a + notice. + . + You may add additional accurate notices of copyright ownership. + . + Exhibit B - "Incompatible With Secondary Licenses" Notice + . + This Source Code Form is "Incompatible With Secondary Licenses", as defined by + the Mozilla Public License, v. 2.0. diff --git a/debian/golang-github-danos-vyatta-dataplane-protobuf-dev.install b/debian/golang-github-danos-vyatta-dataplane-protobuf-dev.install new file mode 100644 index 00000000..653271ba --- /dev/null +++ b/debian/golang-github-danos-vyatta-dataplane-protobuf-dev.install @@ -0,0 +1 @@ +/usr/share/gocode/src diff --git a/debian/libvyatta-dataplane-proto-dev.install b/debian/libvyatta-dataplane-proto-dev.install new file mode 100755 index 00000000..a68651e1 --- /dev/null +++ b/debian/libvyatta-dataplane-proto-dev.install @@ -0,0 +1,5 @@ +#!/usr/bin/dh-exec + +usr/include/vyatta-dataplane/protobuf +usr/lib/${DEB_HOST_MULTIARCH}/libvyatta-dataplane-proto.so +usr/lib/${DEB_HOST_MULTIARCH}/pkgconfig/vyatta-dataplane-proto.pc diff --git a/debian/libvyatta-dataplane-proto-support.install b/debian/libvyatta-dataplane-proto-support.install index 6e29c8d4..0ee33c06 100644 --- a/debian/libvyatta-dataplane-proto-support.install +++ b/debian/libvyatta-dataplane-proto-support.install @@ -1,4 +1,3 @@ usr/lib/python3/dist-packages/vyatta/proto usr/share/perl5/vyatta/proto -usr/share/vyatta-dataplane/protobuf/DataplaneEnvelope.proto -usr/share/vyatta-dataplane/protobuf/cpp_rl.proto +usr/share/vyatta-dataplane/protobuf diff --git a/debian/libvyatta-dataplane-proto-support.lintian-overrides b/debian/libvyatta-dataplane-proto-support.lintian-overrides index eaaa1b83..134bf1aa 100644 --- a/debian/libvyatta-dataplane-proto-support.lintian-overrides +++ b/debian/libvyatta-dataplane-proto-support.lintian-overrides @@ -1,2 +1 @@ -libvyatta-dataplane-proto-support: changelog-file-missing-in-native-package libvyatta-dataplane-proto-support: python-package-missing-depends-on-python diff --git a/debian/libvyatta-dataplane-proto1.install b/debian/libvyatta-dataplane-proto1.install new file mode 100755 index 00000000..da099c60 --- /dev/null +++ b/debian/libvyatta-dataplane-proto1.install @@ -0,0 +1,3 @@ +#!/usr/bin/dh-exec + +usr/lib/${DEB_HOST_MULTIARCH}/libvyatta-dataplane-proto.so.* diff --git a/debian/libvyatta-jsonw1.install b/debian/libvyatta-jsonw1.install new file mode 100644 index 00000000..a412213c --- /dev/null +++ b/debian/libvyatta-jsonw1.install @@ -0,0 +1 @@ +usr/lib/*/libvyatta-jsonw.so.* diff --git a/debian/libvyatta-vplane.install b/debian/libvyatta-vplane.install new file mode 100644 index 00000000..901865e0 --- /dev/null +++ b/debian/libvyatta-vplane.install @@ -0,0 +1 @@ +lib/Vyatta/Vplane.pm opt/vyatta/share/perl5/Vyatta diff --git a/debian/libvyattafal-dev.install b/debian/libvyattafal-dev.install old mode 100644 new mode 100755 index ef876285..a3bd6d62 --- a/debian/libvyattafal-dev.install +++ b/debian/libvyattafal-dev.install @@ -1,3 +1,7 @@ +#! /usr/bin/dh-exec + usr/include/vyatta-dataplane/fal_plugin.h usr/include/vyatta-dataplane/bridge_flags.h usr/include/vyatta-dataplane/bridge_vlan_set.h + +/usr/lib/${DEB_HOST_MULTIARCH}/pkgconfig/libvyattafal.pc diff --git a/debian/libvyattafal-dev.lintian-overrides b/debian/libvyattafal-dev.lintian-overrides deleted file mode 100644 index 0004e516..00000000 --- a/debian/libvyattafal-dev.lintian-overrides +++ /dev/null @@ -1 +0,0 @@ -libvyattafal-dev: changelog-file-missing-in-native-package diff --git a/debian/lintian-overrides b/debian/lintian-overrides index df4a574d..2582ad52 100644 --- a/debian/lintian-overrides +++ b/debian/lintian-overrides @@ -1,4 +1 @@ vyatta-dataplane: binary-without-manpage -vyatta-dataplane: new-package-should-close-itp-bug -vyatta-dataplane: changelog-file-missing-in-native-package -vyatta-dataplane: unstripped-binary-or-object usr/sbin/dataplane diff --git a/debian/rules b/debian/rules index 9d9f7a07..81697dc9 100755 --- a/debian/rules +++ b/debian/rules @@ -4,29 +4,37 @@ #export DH_VERBOSE=1 ifneq (,$(findstring coverage,$(DEB_BUILD_OPTIONS))) - CONFIGURE_COVERAGE_FLAGS = "--enable-code-coverage" -else - CONFIGURE_COVERAGE_FLAGS = "" + CONFIGURE_COVERAGE_FLAGS = "Db_coverage=true" endif -ifneq (,$(filter no_extra_tests,$(DEB_BUILD_OPTIONS))) - WHOLE_DP_TEST="--disable-whole_dp_test" -else - WHOLE_DP_TEST="" +ifneq (,$(filter all_tests,$(DEB_BUILD_OPTIONS))) + WHOLE_DP_ALL_TESTS="-Dall_tests=true" endif -ifneq (,$(filter no_dpi,$(DEB_BUILD_OPTIONS))) - DATAPLANE_DPI="--without-dpi" -else - DATAPLANE_DPI="" +ifneq (,$(filter sanitizer,$(DEB_BUILD_OPTIONS))) + DATAPLANE_SANITIZER="-Db_sanitize=address" endif ifneq (,$(filter no_lto,$(DEB_BUILD_OPTIONS))) - DATAPLANE_LTO="--without-lto" -else - DATAPLANE_LTO="" + DATAPLANE_LTO="-Db_lto=false" endif +ifneq (,$(filter nofused,$(DEB_BUILD_OPTIONS))) + DATAPLANE_FUSED="-Dfused_mode=disabled" +endif + +ifneq (,$(filter nocheck,$(DEB_BUILD_PROFILES))) + DATAPLANE_WITH_TESTS="-Dwith_tests=disabled" +endif + +DATAPLANE_CONFIGURE_FLAGS= \ + $(CONFIGURE_COVERAGE_FLAGS) \ + $(WHOLE_DP_ALL_TESTS) \ + $(DATAPLANE_SANITIZER) \ + $(DATAPLANE_LTO) \ + $(DATAPLANE_FUSED) \ + $(DATAPLANE_WITH_TESTS) + # Don't override the optimisation flags as they differ for different # dataplane make targets export DEB_CFLAGS_MAINT_STRIP = -O2 @@ -39,39 +47,24 @@ export DEB_BUILD_MAINT_OPTIONS=hardening=+format,-fortify,-stackprotector,+relro # Build in a separate directory %: - dh $@ --builddirectory=build --parallel --with autoreconf,systemd + dh $@ --builddirectory=build --parallel --with systemd --buildsystem=meson+ninja -# --enable-dependency-tracking needed if using --builddirectory until Debian bug #807050 is fixed override_dh_auto_configure: - dh_auto_configure -- --enable-dependency-tracking \ - $(CONFIGURE_COVERAGE_FLAGS) \ - $(WHOLE_DP_TEST) \ - $(DATAPLANE_DPI) \ - $(DATAPLANE_LTO) - -override_dh_auto_test: -ifneq (,$(filter pkg.vyatta-dataplane.valgrind,$(DEB_BUILD_PROFILES))) -# -j1 is need until Debian bug 822105 is fixed, so that 'check' depends on 'dataplane_test' - VERBOSE=1 dh_auto_test -- -j1 check-valgrind -else - VERBOSE=1 dh_auto_test -endif + dh_auto_configure -- $(DATAPLANE_CONFIGURE_FLAGS) override_dh_strip: - debian/bin/debug_strip --dbg-package=vyatta-dataplane-dbg - -override_dh_installchangelogs: + debian/bin/debug_strip --auto-dbgsym # _ are replaced with - as the former are not allowed in package names. # The generated virtual packages have format: # vyatta-dataplane--- -override_dh_gencontrol: OP_VERSIONS = $(shell build/dataplane --list_cmd_versions \ +override_dh_gencontrol: OP_VERSIONS = $(shell ASAN_OPTIONS=verify_asan_link_order=0:detect_leaks=0 build/src/dataplane --list_cmd_versions \ | sed "s/_/-/g ; \ s/ /-/ ; \ s/$$/,/ ; \ s/^/vyatta-dataplane-op-/" \ | tr '\n' ' ') -override_dh_gencontrol: CFG_VERSIONS = $(shell build/dataplane --list_msg_versions \ +override_dh_gencontrol: CFG_VERSIONS = $(shell ASAN_OPTIONS=verify_asan_link_order=0:detect_leaks=0 build/src/dataplane --list_msg_versions \ | sed "s/_/-/g ; \ s/ /-/ ; \ s/$$/,/ ; \ diff --git a/debian/vyatta-dataplane-dbg.lintian-overrides b/debian/vyatta-dataplane-dbg.lintian-overrides deleted file mode 100644 index 2ae777c4..00000000 --- a/debian/vyatta-dataplane-dbg.lintian-overrides +++ /dev/null @@ -1,2 +0,0 @@ -vyatta-dataplane-dbg: changelog-file-missing-in-native-package - diff --git a/debian/vyatta-dataplane-dev.install b/debian/vyatta-dataplane-dev.install old mode 100644 new mode 100755 index 177fa59b..09fbaae8 --- a/debian/vyatta-dataplane-dev.install +++ b/debian/vyatta-dataplane-dev.install @@ -1,5 +1,34 @@ -usr/include/vyatta-dataplane/json_writer.h -usr/include/vyatta-dataplane/pl_common.h -usr/include/vyatta-dataplane/pl_node.h +#! /usr/bin/dh-exec + usr/include/vyatta-dataplane/compiler.h -usr/include/vyatta-dataplane/protobuf.h +usr/include/vyatta-dataplane/config.h +usr/include/vyatta-dataplane/debug.h +usr/include/vyatta-dataplane/dpi.h +usr/include/vyatta-dataplane/dp_test/dp_test_cmd_check.h +usr/include/vyatta-dataplane/dp_test/dp_test_lib.h +usr/include/vyatta-dataplane/dp_test/dp_test_lib_intf.h +usr/include/vyatta-dataplane/dp_test/dp_test_firewall_lib.h +usr/include/vyatta-dataplane/dp_test/dp_test_macros.h +usr/include/vyatta-dataplane/dp_test/dp_test_netlink_state.h +usr/include/vyatta-dataplane/dp_test/dp_test_pktmbuf_lib.h +usr/include/vyatta-dataplane/dp_test/dp_test_session_lib.h +usr/include/vyatta-dataplane/event.h +usr/include/vyatta-dataplane/fal_bfd.h +usr/include/vyatta-dataplane/dp_session.h +usr/include/vyatta-dataplane/feature_commands.h +usr/include/vyatta-dataplane/feature_plugin.h +usr/include/vyatta-dataplane/interface.h +usr/include/vyatta-dataplane/ip.h +usr/include/vyatta-dataplane/ip_checksum.h +usr/include/vyatta-dataplane/ip_forward.h +usr/include/vyatta-dataplane/json_writer.h +usr/include/vyatta-dataplane/lcore_sched.h +usr/include/vyatta-dataplane/pipeline.h +usr/include/vyatta-dataplane/protobuf_util.h +usr/include/vyatta-dataplane/pktmbuf.h +usr/include/vyatta-dataplane/rcu.h +usr/include/vyatta-dataplane/urcu.h +usr/include/vyatta-dataplane/vrf.h + +/usr/lib/${DEB_HOST_MULTIARCH}/pkgconfig/vyatta-dataplane-dev.pc +usr/lib/${DEB_HOST_MULTIARCH}/libvyatta-jsonw.so diff --git a/debian/vyatta-dataplane-dev.lintian-overrides b/debian/vyatta-dataplane-dev.lintian-overrides deleted file mode 100644 index bf503f78..00000000 --- a/debian/vyatta-dataplane-dev.lintian-overrides +++ /dev/null @@ -1,2 +0,0 @@ -vyatta-dataplane-dev: changelog-file-missing-in-native-package - diff --git a/debian/vyatta-dataplane-protocols-versions.lintian-overrides b/debian/vyatta-dataplane-protocols-versions.lintian-overrides deleted file mode 100644 index 3ea22d64..00000000 --- a/debian/vyatta-dataplane-protocols-versions.lintian-overrides +++ /dev/null @@ -1 +0,0 @@ -vyatta-dataplane-protocols-versions: changelog-file-missing-in-native-package diff --git a/debian/vyatta-dataplane-sample-plugin.lintian-overrides b/debian/vyatta-dataplane-sample-plugin.lintian-overrides deleted file mode 100644 index 95ade2e6..00000000 --- a/debian/vyatta-dataplane-sample-plugin.lintian-overrides +++ /dev/null @@ -1,2 +0,0 @@ -vyatta-dataplane-sample-plugin: changelog-file-missing-in-native-package -vyatta-dataplane-sample-plugin: unstripped-binary-or-object diff --git a/debian/vyatta-dataplane-test.install b/debian/vyatta-dataplane-test.install new file mode 100755 index 00000000..ba54801e --- /dev/null +++ b/debian/vyatta-dataplane-test.install @@ -0,0 +1,7 @@ +#! /usr/bin/dh-exec + +usr/bin/dataplane_test +/usr/lib/${DEB_HOST_MULTIARCH}/vyatta-dataplane/fal_plugin_test.so +tests/whole_dp/dummyfs/* usr/share/vyatta-dataplane/tests/whole_dp/dummyfs +usr/share/vyatta/dataplane-drivers-default.conf usr/share/vyatta-dataplane/tests +tests/whole_dp/src/platform.conf usr/share/vyatta-dataplane/tests/whole_dp/dummyfs/run/dataplane/ diff --git a/debian/vyatta-dataplane-test.lintian-overrides b/debian/vyatta-dataplane-test.lintian-overrides new file mode 100644 index 00000000..b8c9ed7d --- /dev/null +++ b/debian/vyatta-dataplane-test.lintian-overrides @@ -0,0 +1 @@ +vyatta-dataplane-test: binary-without-manpage diff --git a/debian/vyatta-dataplane.init.d b/debian/vyatta-dataplane.init.d index 9ea5edd4..48b379a2 100755 --- a/debian/vyatta-dataplane.init.d +++ b/debian/vyatta-dataplane.init.d @@ -106,7 +106,7 @@ start() { rm -f $DATAPLANE_SOCKET start-stop-daemon --start --oknodo --quiet --name dataplane \ --pidfile $DATAPLANE_PID --make-pidfile --startas $DATAPLANE \ - --background --nicelevel -20 -- $DATAPLANE_ARGS -- $DPDK_ARGS + --background --nicelevel -10 -- $DATAPLANE_ARGS -- $DPDK_ARGS wait_for_dataplane diff --git a/debian/vyatta-dataplane.install b/debian/vyatta-dataplane.install index dbf6cc01..ad8a138c 100644 --- a/debian/vyatta-dataplane.install +++ b/debian/vyatta-dataplane.install @@ -1,5 +1,6 @@ -usr/sbin/dataplane -tools/* lib/vplane debian/vfio_iommu_type1.conf etc/modprobe.d -usr/share/vyatta/dataplane-drivers-default.conf +etc/modprobe.d/mlx4_core.conf etc/vyatta/dataplane-drivers.conf +tools/* lib/vplane +usr/sbin/dataplane +usr/share/vyatta/dataplane-drivers-default.conf diff --git a/debian/vyatta-dataplane.postinst b/debian/vyatta-dataplane.postinst index 85b5630f..d02cef5a 100644 --- a/debian/vyatta-dataplane.postinst +++ b/debian/vyatta-dataplane.postinst @@ -5,18 +5,10 @@ set -e if [ "x$1" = xconfigure ]; then - if [ ! `getent group dataplane >/dev/null` ]; then - deluser --quiet --system dataplane - fi adduser --quiet --system --group --home /var/run/dataplane dataplane # Required for route-broker interaction usermod dataplane -a -G routeadm - - log_dir=/var/log/dataplane - if [ -d $log_dir ]; then - rm -rf $log_dir - fi fi #DEBHELPER# diff --git a/debian/vyatta-dataplane.service b/debian/vyatta-dataplane.service index 16eb1190..1273e342 100644 --- a/debian/vyatta-dataplane.service +++ b/debian/vyatta-dataplane.service @@ -11,12 +11,12 @@ ConditionKernelCommandLine=!no-dataplane Type=notify Restart=always LimitNOFILE=10000 -Nice=-20 +Nice=-10 Slice=dataplane.slice Environment=TMP_EAL=/run/dataplane/eal-args Environment=HUGEPAGES=0 Environment="DATAPLANE_ARGS=-u dataplane -g adm" -Environment="DPDK_ARGS=--syslog local6 --log-level 7" +Environment="DPDK_ARGS=--syslog local6 --log-level 8" EnvironmentFile=-/etc/default/dataplane EnvironmentFile=/run/dataplane/eal-args ExecStartPre=/bin/rm -f /run/dataplane/platform.conf diff --git a/include/bridge_flags.h b/include/bridge_flags.h index 04073ae0..fcfebe70 100644 --- a/include/bridge_flags.h +++ b/include/bridge_flags.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. + * Copyright (c) 2017-2020, AT&T Intellectual Property. * All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. @@ -7,8 +7,8 @@ * SPDX-License-Identifier: LGPL-2.1-only */ -#ifndef BRIDGE_FLAGS_H -#define BRIDGE_FLAGS_H +#ifndef VYATTA_DATAPLANE_BRIDGE_FLAGS_H +#define VYATTA_DATAPLANE_BRIDGE_FLAGS_H /* Bridge STP port states */ enum bridge_ifstate { @@ -30,4 +30,4 @@ enum bridge_ifstate { #define STP_INST_MAX (STP_INST_COUNT - 1) #define STP_INST_IST 0 -#endif /* BRIDGE_FLAGS_H */ +#endif /* VYATTA_DATAPLANE_BRIDGE_FLAGS_H */ diff --git a/include/bridge_vlan_set.h b/include/bridge_vlan_set.h index 0159f89e..d9048341 100644 --- a/include/bridge_vlan_set.h +++ b/include/bridge_vlan_set.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017, AT&T Intellectual Property. + * Copyright (c) 2017,2019-2020, AT&T Intellectual Property. * All rights reserved. * Copyright (c) 2017 by Brocade Communications Systems, Inc. * All rights reserved. @@ -7,8 +7,8 @@ * SPDX-License-Identifier: LGPL-2.1-only */ -#ifndef BRIDGE_VLAN_SET_H -#define BRIDGE_VLAN_SET_H +#ifndef VYATTA_DATAPLANE_BRIDGE_VLAN_SET_H +#define VYATTA_DATAPLANE_BRIDGE_VLAN_SET_H #include #include @@ -53,6 +53,11 @@ bool bridge_vlan_set_is_member(struct bridge_vlan_set *set, uint16_t vlan); */ void bridge_vlan_set_clear(struct bridge_vlan_set *set); +/* + * Checks if the VLAN set is empty. + */ +bool bridge_vlan_set_is_empty(struct bridge_vlan_set *set); + /* * Callback for synchronization algorithm, takes a vlan id and the cb_data * that is passed to the synchronize function. @@ -70,4 +75,4 @@ void bridge_vlan_set_synchronize(struct bridge_vlan_set *old, bridge_vlan_synchronize_cb remove_cb, void *cb_data); -#endif /* BRIDGE_VLAN_SET_H */ +#endif /* VYATTA_DATAPLANE_BRIDGE_VLAN_SET_H */ diff --git a/include/compiler.h b/include/compiler.h index 0e74c70b..73319e3b 100644 --- a/include/compiler.h +++ b/include/compiler.h @@ -1,5 +1,10 @@ -#ifndef COMPILER_H -#define COMPILER_H +/* + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#ifndef VYATTA_DATAPLANE_COMPILER_H +#define VYATTA_DATAPLANE_COMPILER_H /* For CLANG vs GCC differences */ #ifdef __clang__ @@ -12,8 +17,9 @@ #define __unroll_loops #define __hot_data __attribute__((section("hot"))) #define __unused __attribute__ ((unused)) - #define __externally_visible #define expect_hint(expr, c) __builtin_expect(expr, c) + #define __FOR_EXPORT __attribute__((visibility("default"))) + #define IGNORE_SANITIZER __attribute__((no_sanitize_address)) #else /* ! __clang__ */ # ifdef __GNUC__ #define __cold_label __attribute__((cold)) @@ -25,8 +31,9 @@ #define __unroll_loops __attribute__((optimize("unroll-loops"))) #define __hot_data __attribute__((section("hot"))) #define __unused __attribute__ ((unused)) - #define __externally_visible __attribute__ ((externally_visible)) #define expect_hint(expr, c) __builtin_expect(expr, c) + #define __FOR_EXPORT __attribute__((visibility("default"))) + #define IGNORE_SANITIZER __attribute__((no_sanitize_address)) # else /* ! __GNUC__ */ #define __cold_label #define __cold_func @@ -37,8 +44,9 @@ #define __unroll_loops #define __hot_data #define __unused - #define __externally_visible #define expect_hint(expr, c) expr + #define __FOR_EXPORT + #define IGNORE_SANITIZER # endif #endif /* __clang__ */ @@ -50,4 +58,4 @@ #define unlikely(expr) expect_hint((expr), 0) #endif /* unlikely */ -#endif /* COMPILER_H */ +#endif /* VYATTA_DATAPLANE_COMPILER_H */ diff --git a/include/config.h b/include/config.h new file mode 100644 index 00000000..12fecf36 --- /dev/null +++ b/include/config.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#ifndef VYATTA_DATAPLANE_CONFIG_H +#define VYATTA_DATAPLANE_CONFIG_H + +/* + * Callback from config parser, in inih lib format for each name value. + * + * @param[in, out] arg Argument passed through from the call to + * dp_parse_config_files(). + * @param[in] section The section with in the config file that the given + * name is in. + * @param[in] name The name of the field within the config file. + * @param[in] value The value for the given field. + * + * @return 0 if an error + * @return 1 if success. + */ +typedef int (dp_parse_config_fn)(void *arg, const char *section, + const char *name, const char *value); + +/* + * Walk through the config files with a user provided parse function. This + * allows plugins to have access to the config files, without the core + * dataplane code needing to understand every line in the config file. + * + * @param[in] fn The callback function to call for each line in the + * config file. + * @param[in, out] arg Argument passed through to the callback function + * + * @return 0 on success + * @return -ve for failure + */ +int dp_parse_config_files(dp_parse_config_fn *fn, + void *arg); + +#endif /* VYATTA_DATAPLANE_CONFIG_H */ diff --git a/include/debug.h b/include/debug.h new file mode 100644 index 00000000..33ed0992 --- /dev/null +++ b/include/debug.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef VYATTA_DATAPLANE_DEBUG_H +#define VYATTA_DATAPLANE_DEBUG_H + +#include +#include + +/* + * Intro + * ===== + * + * The dataplane debug is built on top of the dpdk rte_log infra. + * That provides different users of logging, and features can add + * themselves to those groups. + * + * Logging + * ======= + * + * The default logging level is RTE_LOG_INFO, and this can be changed + * by calling dpdk APIs. + * + * The dataplane uses 5 of the dpdk user types as well as the built in + * dpdk types. + * + * RTE_LOGTYPE_USER1 for infrastructure debugs + * RTE_LOGTYPE_USER2 for layered devices, bridges, tunnels, etc + * RTE_LOGTYPE_USER3 for routing, arp etc. + * RTE_LOGTYPE_USER4 for features + * RTE_LOGTYPE_USER4 for crypto + * + * Features can add their own defined user type: For example: + * + * #define RTE_LOGTYPE_MY_FEATURE RTE_LOGTYPE_USER4 + * + * Features can then log by calling the dpdk log function + * rte_log() and using their own defined user type. If the + * log level specified is equal or higher that the configured + * logging level then the log will be shown. + * There are 2 configured logging levels, a global one and a + * per type one. Logs are shown if both levels are high enough. + * + * For example: + * rte_log(RTE_LOG_INFO, RTE_LOGTYPE_MY_FEATURE, + * "my feature debug %s", "is on"); + * + * + * + * Debug + * ===== + * + * A debug service is also offered where debug for individual 'events' can + * be registered and enabled/disabled. If the debug bit for the 'event' is + * not set then there is no call to the log. To make debug arrive at the + * log the logging level needs to be changed to RTE_LOG_DEBUG. + */ + +/* + * The log type for general dataplane debugs/logs. + */ +#define RTE_LOGTYPE_DATAPLANE RTE_LOGTYPE_USER1 + +/* + * Register an event type. Debug for this event type can then be turned on/off + * and the ID returned can then be used in calls to the debug macro. If that + * event is enabled then the debug will be generated, otherwise it will not be. + * In the case when the debug is not enabled the only cost will be the check + * to see if that debug type is enabled. + * + * This must be called on the main thread. + * + * @param[in] event_type A string representing a debug event type. + * @return A value that has a single bit set. This can then be used as the + * event_id in the DP_DEBUG_EVENT macro + * 0 for failure + */ +uint64_t dp_debug_register(const char *event_type); + +/* + * Enable the given debug type. The type must already be registered. + * + * @param[in] event_type An already registered event type. + * @return 0 for success + * -ve for failure + */ +int dp_debug_enable(const char *event_type); + +/* + * Disable the given debug type. The type must already be registered. + * + * @param[in] event_type An already registered event type. + * @return 0 for success + * -ve for failure + */ +int dp_debug_disable(const char *event_type); + +/* + * Is the given debug event id enabled. + * + * @param[in] event_id An event id returned when registering an event type. + * + * @return true is the debug type is enabled + * false if the debug type is not enabled + */ +bool dp_debug_is_enabled(uint64_t event_id); + +/* + * Macro to selectively enable logging by feature. If the given debug event + * is enabled then generate a debug message. This message will only make it + * to the log if the level specified is equal to or higher than the + * configured logging levels (the global level and the level for the type). + * + * @param[in] event_id The event id returned when registering an event type + * @param[in] level The level is between EMERG and DEBUG + * (note does not include the RTE_LOG at the start. + * @param[in] type The type should be the type defined for this feature, + * for example MY_FEATURE based on the definition of + * RTE_LOGTYPE_MY_FEATURE. + */ +#define DP_DEBUG_EVENT(event_id, level, type, fmt, args...) do { \ + if (unlikely(dp_debug_is_enabled(event_id))) \ + rte_log(RTE_LOG_##level, RTE_LOGTYPE_##type, \ + #type ": " fmt, ## args); \ + } while (0) + +#endif /* VYATTA_DATAPLANE_DEBUG_H */ diff --git a/include/dp_session.h b/include/dp_session.h new file mode 100644 index 00000000..8201da2e --- /dev/null +++ b/include/dp_session.h @@ -0,0 +1,312 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#ifndef VYATTA_DATAPLANE_DP_SESSION_H +#define VYATTA_DATAPLANE_DP_SESSION_H + +/** + * This file provides declarations for accessing firewall sessions. + * + * The dataplane firewall creates and maintains states for flows matching + * stateful firewall NAT rules in the session structure (struct session). + * A session changes as the packets matching the session is processed by + * the dataplane firewall. + * + * These functions provides a way to register a callback function to be + * called when a passing packet affect a session's state. It also provides + * a set of utility function that allows retrieval of sessions data. + */ + +#include +#include +#include +#include +#include + +/** + * Session types + * Bit masks for sessions. + */ +enum dp_session_type { + SESSION_TYPE_NONE = 0, + SESSION_TYPE_FW = 1, /**< A stateful firewall session */ + SESSION_TYPE_NAT = (1 << 1), /**< The session is natted */ + SESSION_TYPE_NAT64 = (1 << 2), /**< IPv6 to IPv4 nat session */ + SESSION_TYPE_NAT46 = (1 << 3), /**< IPv4 to IPv6 nat */ + SESSION_TYPE_ALG = (1 << 4), /**< Session is for an ALG */ +}; + +#define dp_is_session_type(flag, t) ((flag) & SESSION_TYPE_ ## t) + +/** + * Session event Hooks. + * + * A session_watch callback function is called on occurrence of one of + * the following session events. + */ +enum dp_session_hook { + SESSION_ACTIVATE, /**< A session is being activated */ + SESSION_STATE_CHANGE, /**< Session state has changed */ + SESSION_STATS_UPDATE, /**< Session stats are updated */ + SESSION_EXPIRE, /**< Session has expired - may be deleted */ + SESSION_MAX, +}; + +/** + * Sessions protocol states. + */ +enum dp_session_state { + SESSION_STATE_NONE = 0, + SESSION_STATE_NEW, + SESSION_STATE_ESTABLISHED, + SESSION_STATE_TERMINATING, + SESSION_STATE_CLOSED, +} __attribute__ ((__packed__)); + +#define SESSION_STATE_FIRST SESSION_STATE_NONE +#define SESSION_STATE_LAST SESSION_STATE_CLOSED +#define SESSION_STATE_SIZE (SESSION_STATE_LAST + 1) + +static inline bool +dp_session_state_is_valid(enum dp_session_state state) +{ + static_assert(SESSION_STATE_FIRST == 0, + "SESSION_STATE_FIRST != 0"); + return state <= SESSION_STATE_LAST; +} + +static inline const char * +dp_session_state_name(enum dp_session_state state, bool upper) +{ + switch (state) { + case SESSION_STATE_NEW: + return upper ? "OPENING" : "opening"; + case SESSION_STATE_ESTABLISHED: + return upper ? "ESTABLISHED" : "established"; + case SESSION_STATE_TERMINATING: + return upper ? "CLOSING" : "closing"; + case SESSION_STATE_CLOSED: + return upper ? "CLOSED" : "closed"; + case SESSION_STATE_NONE: + break; + }; + return upper ? "NONE" : "none"; +} + +static inline enum dp_session_state dp_session_name2state(const char *name) +{ + if (!strcmp(name, "new") || !strcmp(name, "opening")) + return SESSION_STATE_NEW; + else if (!strcmp(name, "established")) + return SESSION_STATE_ESTABLISHED; + else if (!strcmp(name, "terminating") || !strcmp(name, "closing")) + return SESSION_STATE_TERMINATING; + else if (!strcmp(name, "closed")) + return SESSION_STATE_CLOSED; + else + return SESSION_STATE_NONE; +} + +/** + * Session packing type. + * + * Used in dp_session_pack to indicate type of packing needed. + */ +enum session_pack_type { + SESSION_PACK_NONE = 0, /**< packing type not set */ + SESSION_PACK_FULL, /**< pack full session for later restoration */ + SESSION_PACK_UPDATE, /**< pack only session states and stats */ +} __attribute__ ((__packed__)); + +/** Forward declaration for session handle */ +struct session; + +/** + * Typedef for session watch callback function. + * + * Session watch function called by the dataplane session + * management code as packets causes changes to the session. + * This function call is dp_rcu_read_lock() protected. + * + * @param[in] session - pointer to the affected session. + * The existence of the session pointer is guaranteed only + * @param [in] hook - reason for which the call back is called. + * @param [in] data - pointer to the context passed at the time of + * registration. + * + * session watch callback should never block as this function is + * called from dataplane forwarding path and can affect forwarding + * performance. + */ +typedef void (session_watch_fn_t) (struct session *session, + enum dp_session_hook hook, void *data); + +/** + * typedef for session walk callback. + */ +typedef int (dp_session_walk_t)(struct session *session, void *data); + +/** + * A structure used for registering a session watcher callback. + */ +struct session_watch { + session_watch_fn_t *fn; /**< callback function */ + unsigned int types; /**< bitwise or of SESSION_TYPE_* to watch */ + void *data; /**< callback data */ + const char *name; /**< Session watcher name used for logging */ +}; + +/** + * Register a session watcher. Only one session watcher may be + * registered at a time. + * + * @param [in] se_watch - a filled up struct session_watch. + * + * @return - non-negative watcher id on success, + * -EBUSY if another watcher is already registered. + * -errno other errors. + */ +int dp_session_watch_register(struct session_watch *se_watch); + +/** + * unregister a previously registered watcher. + * + * @param [in] watcher_id - session watcher to unregister. + * + * @return - 0 on success + * - ENOENT - No such watch registered. + */ +int dp_session_watch_unregister(int watcher_id); + +/** + * get an id for use with set/get private data. + * + * @return - non-negative integer id on success. + * - EBUSY if other user data is already registered. + */ +int dp_session_user_data_register(void); + +/** + * indicate that private data is no longer used. + * + * @return - non-negative integer id on success. + * - ENOENT for for invalid id + */ +int dp_session_user_data_unregister(int id); + +/** + * attach private data to a session. + * + * if data pointer is non NULL it is set by atomic compare and exchange + * with NULL. + * If the data pointer is NULL, the old data pointer is cleared. + * + * @param [in] session - the affected session + * @param [in] data - pointer to private data. + * + * @return - 1 if the data can be set correctly. + * - 0 if the data can't be set + */ +bool dp_session_set_private(int id, struct session *session, void *data); + +/** + * get a session's attached private data + * + * @param [in] session - session with the private data attached + * + * @return - the pointer to attached private data. + */ +void *dp_session_get_private(int id, const struct session *session); + +/** + * Run a function over all sessions. + * Session walk gets terminated if the callback returns nonzero. + * + * @param fn - callback function. + * @param data - pointer to data or NULL + * @param types - bit mask of dp_session_types to walk + * + * @return - return of callback function. + */ +int dp_session_table_walk(dp_session_walk_t *fn, void *data, + unsigned int types); + +/** + * Get a session's unique id. + * + * @param [in] session + */ +uint64_t dp_session_unique_id(const struct session *session); + +/** + * Get a sessions generic protocol state + * + * @param [in] session + */ +enum dp_session_state dp_session_get_state(const struct session *session); + +/** + * Get a sessions generic protocol state name + * + * @param [in] session + * @param [in] upper + */ +const char *dp_session_get_state_name(const struct session *session, + bool upper); + +/** + * is session in an expired state? + * + * @param [in] session + */ +bool dp_session_is_expired(const struct session *session); + +/** + * is session in establised state? + * + * @param [in] session + */ +bool dp_session_is_established(const struct session *session); + +/** + * Get maximum buffer size required to pack a session. + * + * @return - maximum session buffer size required to pack a session. + */ +uint32_t dp_session_buf_size_max(void); + +/** + * Serialize a session into the buffer. + * + * This packed sessions are used to restore the session on a different router. + * Two different packing functions are provided, the first packs complete + * session information that can be used to fully recreate a session later using + * dp_session_restore(). dp_session_pack() may also be used + * to pack only stats and states of sessions and later the packed data may be + * used to update an already restored session. + * + * @param [in] session - session to be packed + * @param [in, out] buf - session buffer pointer. + * @param [in] size - size of buffer, + * @param [in] spt - SESSION_PACK_FULL, SESSION_PACK_UPDATE + * + * @return - packed length on success + * -errno on error. + */ +int dp_session_pack(struct session *session, void *buf, uint32_t size, + enum session_pack_type spt, struct session **session_peer); + +/* + * restore a session from the packed data or update its state. + * If the buf contains a SESSION_PACK_FULL payload any old session + * with same session key will be deleted. + * + * @param [in] buf - buffer to be restored + * @param [in] size_t size - length of buffer + * @param [out] spt - pack type. + */ +int dp_session_restore(void *buf, uint32_t size, enum session_pack_type *spt); + +#endif diff --git a/tests/whole_dp/src/dp_test_cmd_check.h b/include/dp_test/dp_test_cmd_check.h similarity index 80% rename from tests/whole_dp/src/dp_test_cmd_check.h rename to include/dp_test/dp_test_cmd_check.h index 41b5c53c..5c479c25 100644 --- a/tests/whole_dp/src/dp_test_cmd_check.h +++ b/include/dp_test/dp_test_cmd_check.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -14,8 +14,17 @@ #include #include +#include "../vrf.h" #include "dp_test_lib.h" +#define DP_TEST_POLL_INTERVAL 1 /* ms */ +#define DP_TEST_POLL_TOTAL_TIME 2000 /* ms */ +#define DP_TEST_POLL_COUNT (DP_TEST_POLL_TOTAL_TIME / DP_TEST_POLL_INTERVAL) + +#define DP_TEST_WAIT_SEC_DEFAULT 1 + +extern uint32_t dp_test_wait_sec; + #define TEST_MAX_CMD_LEN 1000 #define TEST_MAX_REPLY_LEN 10000 @@ -29,7 +38,8 @@ typedef enum dp_test_check_str_type_ { struct dp_test_cmd_check { const char *cmd; - const char *expected; + int exp_count; + const char **expected; char *actual; bool exp_err; bool negate_match; @@ -62,6 +72,15 @@ _dp_test_check_state_show(const char *file, int line, const char *cmd, _dp_test_check_state_show(__FILE__, __LINE__, cmd, expected, \ print, DP_TEST_CHECK_STR_SUBSET) +void +_dp_test_check_state_show_one_of(const char *file, int line, const char *cmd, + int exp_count, const char **expected, + bool print, dp_test_check_str_type type); +#define dp_test_check_state_show_one_of(cmd, exp_count, expected, print) \ + _dp_test_check_state_show_one_of(__FILE__, __LINE__, cmd, \ + exp_count, expected, \ + print, DP_TEST_CHECK_STR_SUBSET) + void _dp_test_check_state_gone_show(const char *file, int line, const char *cmd, const char *expected, bool print, @@ -97,6 +116,34 @@ _dp_test_check_json_poll_state(const char *cmd_str, gone, poll_cnt, \ __FILE__, __func__, __LINE__) +void +_dp_test_check_json_poll_state_interval(const char *cmd_str, + json_object * expected_json, + json_object * filter_json, + enum dp_test_check_json_mode mode, + bool negate_match, int poll_cnt, + unsigned int poll_interval, + const char *file, const char *func, + int line); +#define dp_test_check_json_poll_state_interval(cmd, expected, mode, gone, \ + poll_cnt, poll_interval) \ + _dp_test_check_json_poll_state_interval(cmd, expected, NULL, mode, \ + gone, poll_cnt, poll_interval, \ + __FILE__, __func__, __LINE__) + +typedef bool (*dp_test_state_pb_cb)(void *data, int len, void *arg); + +void +_dp_test_check_pb_poll_state(void *cmd, int len, + dp_test_state_pb_cb cb, + void *arg, + int poll_cnt, + const char *file, const char *func, int line); +#define dp_test_check_pb_poll_state(cmd, len, cb, arg, gone, poll_cnt) \ + _dp_test_check_pb_poll_state(cmd, len, cb, arg, \ + gone, poll_cnt, \ + __FILE__, __func__, __LINE__) + void _dp_test_check_json_state(const char *cmd_str, json_object *expected_json, json_object *filter_json, @@ -108,6 +155,16 @@ _dp_test_check_json_state(const char *cmd_str, json_object *expected_json, _dp_test_check_json_state(cmd_str, expected_json, NULL, mode, \ gone, __FILE__, __func__, __LINE__) +void +_dp_test_check_pb_state(void *buf, int len, + dp_test_state_pb_cb cb, + void *arg, + const char *file, const char *func, + int line); +#define dp_test_check_pb_state(buf, len, cb, arg) \ + _dp_test_check_pb_state(buf, len, cb, arg, \ + __FILE__, __func__, __LINE__) + void _dp_test_wait_for_route(const char *route_string, bool match_nh, bool all, const char *file, const char *func, int line); #define dp_test_wait_for_route(route_string, match_nh) \ @@ -207,7 +264,8 @@ void _dp_test_verify_route_no_neigh_present(const char *route, _dp_test_verify_route_no_neigh_present(route, \ __FILE__, __func__, __LINE__) -void _dp_test_verify_route_neigh_present(const char *route, const char *ifp, +void _dp_test_verify_route_neigh_present(const char *route, + const char *interface, bool set, const char *file, const char *func, int line); #define dp_test_verify_route_neigh_present(route, interface, set) \ @@ -230,7 +288,8 @@ void _dp_test_verify_route_no_neigh_created(const char *route, _dp_test_verify_route_no_neigh_created(route, \ __FILE__, __func__, __LINE__) -void _dp_test_verify_route_neigh_created(const char *route, const char *ifp, +void _dp_test_verify_route_neigh_created(const char *route, + const char *interface, bool set, const char *file, const char *func, int line); #define dp_test_verify_route_neigh_created(route, interface, set) \ diff --git a/include/dp_test/dp_test_firewall_lib.h b/include/dp_test/dp_test_firewall_lib.h new file mode 100644 index 00000000..d5939f89 --- /dev/null +++ b/include/dp_test/dp_test_firewall_lib.h @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * Whole dataplane test firewall library + */ +#ifndef __DP_TEST_FIREWALL_LIB_H__ +#define __DP_TEST_FIREWALL_LIB_H__ + +/* + * A firewall comprises a single firewall group structure and one or + * more firewall rule structures. + * + * First step is to create a firewall rules array, terminated with NULL_RULE: + * + * struct dp_test_fw_rule_t rules[] = { + * { + * .rule = "10", + * .pass = PASS, + * .stateful = STATELESS, + * .npf = "pass proto 6"}, + * RULE_DEF_BLOCK, + * NULL_RULE }; + * + * There are some predefined rules below, e.g. RULE_DEF_BLOCK is the same as + * sonfiguring the default action to 'block' + * + * Second step is to create the firewall group: + * + * struct dp_test_fw_ruleset_t fw = { + * .rstype = "fw-in", + * .name = "FW1_IN", + * .enable = 1, + * .intf = "dp2T1", + * .fwd = FWD, + * .dir = "in", + * .rules = rules + * }; + * + * The firewall group is added to the dataplane and assigned to an interface + * by calling: + * + * dp_test_npf_fw_add(&fw, false) + */ + +/* + * Simple, *short* definitions that make a firewall test matrix + * more readable. + */ +#define STATELESS false +#define STATEFUL true + +#define BLOCK false +#define PASS true + +#define ASSIGN true +#define REMOVE false + +#define FORWARDS false +#define REVERSE true + +#define FWD true +#define REV false + + + +/* + * Firewall rule + */ +struct dp_test_fw_rule_t { + const char *rule; /* Rule number e.g. "10" */ + bool pass; /* BLOCK or PASS */ + bool stateful; /* STATELESS or STATEFUL */ + const char *npf; /* Actual rule e.g. "pass proto 6" */ +}; + +/* + * npf ruleset + * + * If 'attach_point' is non-NULL then the ruleset is attached to that + * attach_point when dp_test_fw_ruleset_add is called. + * + * 'fwd' is a convenience variable to describe if the ruleset is used in the + * forwards or reverse packet flow for a particular test. It is not used by + * any library code. Currently it is only used by the test arrays in + * dp_test_npf_fw.c. Other users may ignore it. + */ +struct dp_test_fw_ruleset_t { + const char *rstype; /* Feature name e.g. "fw-in" */ + const char *name; /* Ruleset name e.g. "FW1" */ + bool enable; + const char *attach_point; /* Attach point e.g. interface name */ + bool fwd; /* true for forwards direction */ + const char *dir; /* "in" or "out" */ + /* + * Array of rules, terminated by a rule with NULL_RULE + */ + struct dp_test_fw_rule_t *rules; +}; + +/* + * Note, the dataplane has changed to only accept protocol numbers. and + * not strings + */ +#define FW_PROTO_TCP "proto-final=6" +#define FW_PROTO_UDP "proto-final=17" + +/* + * firewall rule (struct dp_test_fw_rule_t) templates + */ +#define NULL_RULE {NULL, BLOCK, STATELESS, NULL} +#define RULE_DEF_PASS {"10000", PASS, STATELESS, ""} +#define RULE_DEF_BLOCK {"10000", BLOCK, STATELESS, ""} + +/* struct dp_test_fw_ruleset_t templates */ +#define NULL_FW {NULL, NULL, 0, NULL, 0, "-", NULL} +/* + * Add a firewall ruleset. Attach to attach_point if rset->attach_point is set. + * + * If 'verify' is set we check the ruleset has been added to the dataplane. + */ +void +_dp_test_fw_ruleset_add(struct dp_test_fw_ruleset_t *rset, + const char *class, bool debug, bool verify, + const char *file, int line); + +#define dp_test_fw_ruleset_add(rs, class, debug) \ + _dp_test_fw_ruleset_add(rs, class, debug, \ + true, __FILE__, __LINE__) + + +/* + * Remove a firewall ruleset from and interface and delete it + * + * If 'verify' is set we check the ruleset has been removed from the + * dataplane. + */ +void +_dp_test_fw_ruleset_del(struct dp_test_fw_ruleset_t *rset, + const char *class, bool debug, bool verify, + const char *file, int line); + +#define dp_test_fw_ruleset_del(fw, debug) \ + _dp_test_fw_ruleset_del(fw, "fw", debug, \ + true, __FILE__, __LINE__) + + +#endif diff --git a/tests/whole_dp/src/dp_test_lib.h b/include/dp_test/dp_test_lib.h similarity index 67% rename from tests/whole_dp/src/dp_test_lib.h rename to include/dp_test/dp_test_lib.h index 978c365a..1f133b18 100644 --- a/tests/whole_dp/src/dp_test_lib.h +++ b/include/dp_test/dp_test_lib.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -16,32 +16,17 @@ #include #include - -#include "if_var.h" +#include #include "dp_test_pktmbuf_lib.h" +#include "protobuf/IPAddress.pb-c.h" + #define DP_TEST_MAX_NHS 32 #define DP_TEST_MAX_LBLS 16 -#define ETHER_TYPE_MPLS 0x8847 +#define RTE_ETHER_TYPE_MPLS 0x8847 typedef uint32_t label_t; -extern int spath_pipefd[2]; -extern int shadow_pipefd[DATAPLANE_MAX_PORTS]; - -/* Packet for read/readv. This can contain the user provided iov's */ -struct dp_read_pkt { - struct rte_mbuf *pkt; - portid_t port; - struct meta { - uint32_t ifindex; - uint16_t flags; - } m; - struct pi { - uint16_t proto; - } p; -}; - struct dp_test_prefix { struct dp_test_addr addr; uint8_t len; @@ -54,6 +39,7 @@ struct dp_test_nh { label_t labels[DP_TEST_MAX_LBLS]; bool neigh_created; bool neigh_present; + bool backup; }; struct dp_test_route { @@ -116,40 +102,6 @@ dp_test_set_tcphdr(struct rte_mbuf *m, uint16_t src_port, uint16_t dst_port); uint16_t dp_test_calc_udptcp_chksum(struct rte_mbuf *m); -/* - * Forwarding Result enum - */ -enum dp_test_fwd_result_e { - DP_TEST_FWD_LOCAL, - DP_TEST_FWD_DROPPED, - DP_TEST_FWD_CONSUMED, - DP_TEST_FWD_FORWARDED, - DP_TEST_FWD_UNDEFINED, -}; - -struct dp_test_expected; - -typedef void (*validate_cb)(struct rte_mbuf *pak, - struct ifnet *ifp, - struct dp_test_expected *expected, - enum dp_test_fwd_result_e fwd_result); - -/* - * Helper function to allow an idiom where we keep extending a string - * into a fixed size buffer with printf style calls and keep a running - * total of the number of non-null chars written. - * - * We return the number of characters in the string that results from - * the printf unless the string with its null exactly fills the - * remaining space at which point were return the remaining space. So - * subsequent calls will be given remaining == 0. - */ -int spush(char *s, size_t remaining, const char *format, ...) - __attribute__ ((__format__(printf, 3, 4))); - -void -dp_test_str_trim(char *str, uint16_t start_trim, uint16_t end_trim); - /* * Tests often need to check the result of forwarding a packet. These functions * allocate a structure that is used to build an expected state, which can @@ -176,7 +128,7 @@ dp_test_str_trim(char *str, uint16_t start_trim, uint16_t end_trim); struct dp_test_expected * dp_test_exp_create(struct rte_mbuf *test_pak); struct dp_test_expected * -dp_test_exp_create_m(struct rte_mbuf *test_pak, int m); +dp_test_exp_create_m(struct rte_mbuf *test_pak, int count); void dp_test_exp_append_m(struct dp_test_expected *exp, struct rte_mbuf *test_pak, int count); @@ -186,7 +138,7 @@ dp_test_exp_append_m(struct dp_test_expected *exp, struct rte_mbuf *test_pak, * packet that we expect to see. */ struct dp_test_expected * -dp_test_exp_create_with_packet(struct rte_mbuf *test_pak); +dp_test_exp_create_with_packet(struct rte_mbuf *exp_pak); void dp_test_exp_delete(struct dp_test_expected *exp); @@ -198,7 +150,7 @@ void dp_test_exp_set_check_start(struct dp_test_expected *exp, uint32_t start); void -dp_test_exp_set_fwd_status(struct dp_test_expected *exp, int); +dp_test_exp_set_fwd_status(struct dp_test_expected *exp, int status); void dp_test_exp_set_fwd_status_m(struct dp_test_expected *exp, @@ -236,11 +188,8 @@ void dp_test_exp_set_pak_m(struct dp_test_expected *exp, unsigned int packet, struct rte_mbuf *m); -validate_cb dp_test_exp_get_validate_cb(struct dp_test_expected *); - -validate_cb dp_test_exp_set_validate_cb(struct dp_test_expected *, validate_cb); -void *dp_test_exp_get_validate_ctx(struct dp_test_expected *); +void *dp_test_exp_get_validate_ctx(struct dp_test_expected *exp); void *dp_test_exp_set_validate_ctx(struct dp_test_expected *exp, void *ctx, bool auto_free); @@ -305,63 +254,85 @@ dp_test_pak_inject(struct rte_mbuf **paks_to_send, uint32_t num_paks, struct dp_test_expected *expected, const char *test_type); void dp_test_intf_wait_until_processed(struct rte_ring *ring); + +extern struct dp_test_expected *dp_test_global_expected; + +enum dp_test_fwd_result_e { + DP_TEST_FWD_LOCAL, + DP_TEST_FWD_DROPPED, + DP_TEST_FWD_CONSUMED, + DP_TEST_FWD_FORWARDED, + DP_TEST_FWD_UNDEFINED, +}; + /* - * Simulate injection of packet into the dataplane from the kernel + * Create a fully formatted dataplane protobuf config message. + * + * @param str [in] The string representing the type of the data message + * @param data [in, out] A formatted protobuf message for a feature. This + * will be freed by this function once sent. + * @param data_len [in] The length of the formatted protobuf data + * + * Create a dataplane envelope (that all the protobuf messages are packed into). + * The contents of this are the string which is used to determine the handler + * at the dataplane and the actual data. + * + * @return No return value as it asserts if there is a failure. */ +void dp_test_lib_pb_wrap_and_send_pb(const char *str, + void *data, size_t data_len); +void dp_test_send_config(const char *cmd_fmt_str, ...); +char *dp_test_console_request(const char *request, bool print); -void _dp_test_send_slowpath_pkt(struct rte_mbuf *pak, - struct dp_test_expected *expected, - const char *file, const char *func, int line); +int dp_ut_plugin_init(const char **name); -#define dp_test_send_slowpath_pkt(pak, expected) \ - _dp_test_send_slowpath_pkt(pak, expected, \ - __FILE__, __func__, __LINE__) +int dp_test_zmsg_popu32(zmsg_t *msg, uint32_t *p); -/* Inject packet on .spath interface from kernel */ -void _dp_test_send_spath_pkt(struct rte_mbuf *pak, const char *virt_oif_name, - struct dp_test_expected *expected, - const char *file, const char *func, int line); +typedef unsigned int (dp_test_event_msg_hdlr)(const char *event, zmsg_t *msg); -#define dp_test_send_spath_pkt(pak, virt_oif_name, expected) \ - _dp_test_send_spath_pkt(pak, virt_oif_name, expected, \ - __FILE__, __func__, __LINE__) +void dp_test_register_event_msg(dp_test_event_msg_hdlr handler); +void dp_test_unregister_event_msg(void); -extern struct dp_test_expected *dp_test_global_expected; +#define DP_MAX_EXTRA_CFG_LINES 100 +/* + * Some features need to add lines to the platform config file to allow + * proper testing. For example a feature that would typically create a + * tcp session via normal config might want to have code to allow that + * to be overridden in tests and to create an ipc connections to the + * test harness. This API allows the features to add platform config + * within the 'dataplane' section of the config file. Plugins that need + * this API should call it as part of dp_ut_plugin_init(). + * + * At most DP_MAX_EXTRA_CFG_LINES can be added. + * + * @param [in] argc Count of the number of lines in argv + * @param [in] argv Array of size argc that contains the lines that the + * feature wants to add. They should be of the form + * = + * + * @return 0 on success + * -ve on failure + */ +int dp_test_add_to_cfg_file(int argc, char **argv); -struct ifnet; -void -dp_test_pak_verify(struct rte_mbuf *pak, struct ifnet *ifp, - struct dp_test_expected *expected, - enum dp_test_fwd_result_e fwd_result); - -/* Read packet context processing functions */ -void dp_test_inject_pkt_slow_path(struct rte_mbuf *buf, portid_t port, - uint32_t ifindex, uint16_t flags, uint16_t proto); -struct rte_mbuf *dp_test_get_read_pkt(void); -uint8_t dp_test_get_read_port(void); -uint16_t dp_test_get_read_meta_flags(void); -uint32_t dp_test_get_read_meta_iif(void); -uint16_t dp_test_get_read_proto(void); -bool dp_test_read_pkt_available(void); +/* Helpers to manage interactions with protobufs */ /* - * Internal error in test framework - will crash notifying the line - * number that we are currently at. Do NOT use this for normal test - * conditions - for those use dp_fail_unless and other services in - * dp_test_macros.h. This is solely for internal unrecoverable errors - * in the test infra. + * Given an ip address (either v4 or v6) in string format, populate + * the protobuf formatted addr. + * + * @param addr [out] The protobuf address structure to be populated. + * @param str [in] The address, formatted as a string that is to be + * populated into the address. + * @param data [out] A scratch buffer of at least 16 bytes that is + * used in the case when the string is a V6 address + * as the addr needs space to store the address. + * + * Populate the addr with the address in the string, using the 'data' as the + * storage for this in the case of an IPv6 address. This is done to avoid + * having this function doing a malloc for the data and the requirement to + * then free it. */ -#define dp_test_assert_internal(expr) \ - ({ \ - if (!(expr)) { \ - printf("Internal error: %s:%d\n", \ - __func__, __LINE__); \ - } \ - assert(expr); \ - }) - -/* override soft-ticks time for tests that want to do timer dependent stuff. */ -void dp_test_enable_soft_tick_override(void); -void dp_test_disable_soft_tick_override(void); +void dp_test_lib_pb_set_ip_addr(IPAddress *addr, const char *str, void *data); #endif /*_DP_TEST_LIB_H_ */ diff --git a/include/dp_test/dp_test_lib_intf.h b/include/dp_test/dp_test_lib_intf.h new file mode 100644 index 00000000..023a61cc --- /dev/null +++ b/include/dp_test/dp_test_lib_intf.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * dataplane UT Interface helpers + */ + +#ifndef _DP_TEST_LIB_INTF_H_ +#define _DP_TEST_LIB_INTF_H_ + +#include +#include +#include /* conflicts with linux/if_bridge.h */ + +#include "../vrf.h" +#include "dp_test_pktmbuf_lib.h" + +#define DP_TEST_INTF_DEF_SRC_MAC "00:00:a6:00:00:01" + +int dp_test_intf_name2index(const char *if_name); +char *dp_test_intf_name2mac_str(const char *if_name); +void dp_test_intf_name2addr(const char *if_name, struct dp_test_addr *addr); +void dp_test_intf_name2addr_str(const char *if_name, int family, + char *addr_str, int buf_len); + +char *dp_test_intf_real(const char *test_name, char *real_name); + +vrfid_t _dp_test_translate_vrf_id(vrfid_t vrf_id, const char *file, + int line); + +#define dp_test_translate_vrf_id(vrf_id) \ + _dp_test_translate_vrf_id(vrf_id, __FILE__, __LINE__) + +/* Create / Delete bridge interfaces */ +void _dp_test_intf_bridge_create(const char *br_name, + const char *file, const char *func, + int line); +#define dp_test_intf_bridge_create(br_name) \ + _dp_test_intf_bridge_create(br_name, \ + __FILE__, __func__, __LINE__) + +void _dp_test_intf_bridge_del(const char *br_name, + const char *file, const char *func, + int line); +#define dp_test_intf_bridge_del(br_name) \ + _dp_test_intf_bridge_del(br_name, \ + __FILE__, __func__, __LINE__) + +void _dp_test_intf_bridge_add_port(const char *br_name, const char *if_name, + const char *file, const char *func, + int line); +#define dp_test_intf_bridge_add_port(br_name, if_name) \ + _dp_test_intf_bridge_add_port(br_name, if_name, \ + __FILE__, __func__, __LINE__) + +void _dp_test_intf_bridge_remove_port(const char *br_name, const char *if_name, + const char *file, const char *func, + int line); +#define dp_test_intf_bridge_remove_port(br_name, if_name) \ + _dp_test_intf_bridge_remove_port(br_name, if_name, \ + __FILE__, __func__, __LINE__) + +#endif /* _DP_TEST_LIB_INTF_H_ */ diff --git a/tests/whole_dp/src/dp_test_macros.h b/include/dp_test/dp_test_macros.h similarity index 79% rename from tests/whole_dp/src/dp_test_macros.h rename to include/dp_test/dp_test_macros.h index 332c5429..3401f9d6 100644 --- a/tests/whole_dp/src/dp_test_macros.h +++ b/include/dp_test/dp_test_macros.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -29,7 +29,9 @@ #include -#include "dp_test.h" +#include "dp_test_cmd_check.h" + +Suite * dp_test_get_suite(const char *filename); /* Private helper macro */ #define _DP_TEST_FNAME_ (strrchr(__FILE__, '/') ? \ @@ -96,6 +98,36 @@ * been declared using DP_DECL_TEST_CASE above. * TEST - is the name of the individual test being applied. */ + #if ((CHECK_MAJOR_VERSION > 0) || (CHECK_MINOR_VERSION >= 13)) + +#define _DP_START_TEST(TESTCASE, TEST, CONSTRUCT) \ + void dp_test_##TESTCASE##_##TEST##_register(void); \ + \ + static void dp_test_##TESTCASE##_##TEST##_fn(int); \ + \ + static const TTest dp_test_##TESTCASE##_##TEST##_ttest \ + = {"dp_test_##TESTCASE##_##TEST", \ + dp_test_##TESTCASE##_##TEST##_fn, \ + __FILE__, __LINE__}; \ + static const TTest * dp_test_##TESTCASE##_##TEST = \ + & dp_test_##TESTCASE##_##TEST##_ttest; \ + \ + CONSTRUCT \ + void \ + dp_test_##TESTCASE##_##TEST##_register(void) \ + { \ + TCase *tc = get_tcase_##TESTCASE(); \ + tcase_add_test(tc, dp_test_##TESTCASE##_##TEST); \ + } \ + \ + static void dp_test_##TESTCASE##_##TEST##_fn \ + (int _i CK_ATTRIBUTE_UNUSED) \ + { \ + bool _do_clean_check = \ + !dp_test_##TESTCASE##_teardown_fn; \ + +#else + #define _DP_START_TEST(TESTCASE, TEST, CONSTRUCT) \ void dp_test_##TESTCASE##_##TEST##_register(void); \ \ @@ -114,6 +146,8 @@ bool _do_clean_check = \ !dp_test_##TESTCASE##_teardown_fn; \ +#endif + /* * Start defining a test function. * @@ -130,6 +164,17 @@ #define DP_START_TEST_DONT_RUN(TESTCASE, TEST) \ _DP_START_TEST(TESTCASE, TEST, DONT_RUN) +/* + * Only run the FULL_RUN tests if FULL_RUN is defined + */ +#ifdef DP_TEST_FULL_RUN +#define DP_START_TEST_FULL_RUN(TESTCASE, TEST) \ + _DP_START_TEST(TESTCASE, TEST, __attribute__((constructor))) +#else +#define DP_START_TEST_FULL_RUN(TESTCASE, TEST) \ + _DP_START_TEST(TESTCASE, TEST, DONT_RUN) +#endif + @@ -187,7 +232,9 @@ static inline void _dp_test_fail_unless(bool condition, const char *file, abort(); } -#if ((CHECK_MAJOR_VERSION > 0) || (CHECK_MINOR_VERSION > 9) || \ +#if ((CHECK_MAJOR_VERSION > 0) || (CHECK_MINOR_VERSION >= 15)) + _ck_assert_failed(file, line, "", "%s", tmp_str); +#elif ((CHECK_MAJOR_VERSION > 0) || (CHECK_MINOR_VERSION > 9) || \ (CHECK_MICRO_VERSION >= 13)) _ck_assert_failed(file, line, "%s", tmp_str); #elif ((CHECK_MAJOR_VERSION > 0) || (CHECK_MINOR_VERSION > 9) || \ diff --git a/include/dp_test/dp_test_netlink_state.h b/include/dp_test/dp_test_netlink_state.h new file mode 100644 index 00000000..9379a95c --- /dev/null +++ b/include/dp_test/dp_test_netlink_state.h @@ -0,0 +1,152 @@ +/*- + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * A test controller/console for the dataplane test harness. + * This file provides a minimal implementation of a controlled + * and the console so that the dataplane can be programmed and + * queried. + */ + +#ifndef _DP_TEST_NETLINK_STATE_H_ +#define _DP_TEST_NETLINK_STATE_H_ + +#include +#include + +#include +#include +#include + +#include "../vrf.h" + +void _dp_test_netlink_add_neigh(const char *ifname, + const char *nh_addr_str, + const char *mac_str, + bool verify, + const char *file, const char *func, + int line); +#define dp_test_netlink_add_neigh(ifname, nh_addr_str, mac_str) \ + _dp_test_netlink_add_neigh(ifname, nh_addr_str, mac_str, \ + true, \ + __FILE__, __func__, __LINE__) + +void _dp_test_netlink_del_neigh(const char *ifname, + const char *nh_addr_str, + const char *mac_str, bool verify, + const char *file, const char *func, + int line); +#define dp_test_netlink_del_neigh(ifname, nh_addr_str, mac_str) \ + _dp_test_netlink_del_neigh(ifname, nh_addr_str, mac_str, \ + true, \ + __FILE__, __func__, __LINE__) + +void _dp_test_verify_neigh(const char *ifname, + const char *ipaddr, + const char *mac_str, + bool negate_match, + const char *file, const char *func, + int line); +#define dp_test_verify_neigh(ifname, nh_addr_str, mac_str, negate_match) \ + _dp_test_verify_neigh(ifname, nh_addr_str, mac_str, \ + negate_match, \ + __FILE__, __func__, __LINE__) + +/* VRF creation / deletion macros */ +void _dp_test_netlink_add_vrf(uint32_t vrf_id, uint32_t expected_ref_cnt, + const char *file, int line); + +#define dp_test_netlink_add_vrf(vrf_id, expected_ref_cnt) \ + _dp_test_netlink_add_vrf(vrf_id, expected_ref_cnt, \ + __FILE__, __LINE__) + +void _dp_test_netlink_del_vrf(uint32_t vrf_id, uint32_t expected_ref_cnt, + const char *file, int line); + +#define dp_test_netlink_del_vrf(vrf_id, expected_ref_cnt) \ + _dp_test_netlink_del_vrf(vrf_id, expected_ref_cnt, \ + __FILE__, __LINE__) +/* + * Bind interface to VRF (note sets MTU to default value). + */ +void _dp_test_netlink_set_interface_vrf(const char *name, uint32_t vrf_id, + bool verify, + const char *file, const char *func, + int line); +#define dp_test_netlink_set_interface_vrf(name, vrf_id) \ + _dp_test_netlink_set_interface_vrf(name, vrf_id, true, \ + __FILE__, __func__, __LINE__) +/** + * @brief Adds L3 address and adds route for the attached network + * + * @param [in] intf Name of the interface + * @param [in] addr IPv4 or IPv6 address string for the interface, of the + * form "addr/prefix" + */ +#define dp_test_nl_add_ip_addr_and_connected(intf, addr) \ + _dp_test_nl_add_ip_addr_and_connected(intf, addr, \ + VRF_DEFAULT_ID, __FILE__, \ + __func__, __LINE__) + +#define dp_test_nl_add_ip_addr_and_connected_vrf(intf, addr, vrf_id) \ + _dp_test_nl_add_ip_addr_and_connected(intf, addr, vrf_id, \ + __FILE__, __func__, \ + __LINE__) + +void +_dp_test_nl_add_ip_addr_and_connected(const char *intf, const char *addr, + vrfid_t vrf_id, const char *file, + const char *func, int line); + +/** + * @brief Remove interface address and attached network route + * + * @param [in] intf Name of the interface + * @param [in] addr IPv4 or IPv6 address string for the interface, of the + * form "addr/prefix" + */ +#define dp_test_nl_del_ip_addr_and_connected(intf, addr) \ + _dp_test_nl_del_ip_addr_and_connected(intf, addr, \ + VRF_DEFAULT_ID, __FILE__, \ + __func__, __LINE__) + +#define dp_test_nl_del_ip_addr_and_connected_vrf(intf, addr, vrf_id) \ + _dp_test_nl_del_ip_addr_and_connected(intf, addr, vrf_id, \ + __FILE__, __func__, \ + __LINE__) + +void +_dp_test_nl_del_ip_addr_and_connected(const char *intf, const char *addr, + vrfid_t vrf_id, const char *file, + const char *func, int line); + + +/* + * Add a route + * + * @param [in] route_str The route expressed as a string + */ +#define dp_test_netlink_add_route(route_str) \ + _dp_test_netlink_add_route(route_str, true, false, \ + __FILE__, __func__, __LINE__) + +void _dp_test_netlink_add_route(const char *route_str, bool verify, + bool incomplete, const char *file, + const char *func, int line); + +/* + * Delete a route + * + * @param [in] route_str The route expressed as a string + */ +#define dp_test_netlink_del_route(route_str) \ + _dp_test_netlink_del_route(route_str, true, \ + __FILE__, __func__, __LINE__) + +void _dp_test_netlink_del_route(const char *route_str, bool verify, + const char *file, const char *func, int line); + +#endif /* _DP_TEST_NETLINK_STATE_H_ */ diff --git a/include/dp_test/dp_test_pktmbuf_lib.h b/include/dp_test/dp_test_pktmbuf_lib.h new file mode 100644 index 00000000..d09b94a0 --- /dev/null +++ b/include/dp_test/dp_test_pktmbuf_lib.h @@ -0,0 +1,273 @@ +/* + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2011-2017 by Brocade Communications Systems, Inc. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * Library of functions for packet creation and handling + */ +#ifndef __DP_PKTMBUF_LIB_H__ +#define __DP_PKTMBUF_LIB_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct dp_test_addr { + int family; + union { + in_addr_t ipv4; + struct in6_addr ipv6; + uint32_t mpls; + } addr; +}; +/** + * Create and initialise a UDP IPv4 packet + * + * @param saddr [in] Source address string, e.g. "10.0.1.0" + * @param daddr [in] Dest address string + * @param n [in] Number of mbufs + * @param len [in] Array of per-mbuf payload lengths + * + * @return pak Pointer to mbuf if successful, else NULL + */ +struct rte_mbuf * +dp_test_create_ipv4_pak(const char *saddr, const char *daddr, + int n, const int *len); + +/** + * Create and initialise a raw IPv4 packet with the given protocol. + * + * @param saddr [in] Source address string, e.g. "10.0.1.0" + * @param daddr [in] Dest address string + * @param protocol [in] Protocol, e.g. IPPROTO_UDP + * @param n [in] Number of mbufs + * @param len [in] Array of 'n' per-mbuf payload lengths + * + * @return pak Pointer to mbuf if successful, else NULL + */ +struct rte_mbuf * +dp_test_create_raw_ipv4_pak(const char *saddr, const char *daddr, + uint8_t protocol, int n, const int *len); + +/** + * Create and initialise an IPv4 UDP packet + * + * @param saddr [in] Source address string, e.g. "10.0.1.0" + * @param daddr [in] Dest address string + * @param sport [in] UDP source port + * @param dport [in] UDP dest port + * @param n [in] Number of mbufs + * @param len [in] Array of per-mbuf payload lengths + * + * @return pak Pointer to mbuf if successful, else NULL + */ +struct rte_mbuf * +dp_test_create_udp_ipv4_pak(const char *saddr, const char *daddr, + uint16_t sport, uint16_t dport, + int n, const int *len); + +/** + * Create and initialise an IPv6 packet with no protocol. + * + * @param saddr [in] Source address string, e.g. "2001:101:8::1" + * @param daddr [in] Dest address string + * @param n [in] Number of mbufs + * @param len [in] Array of 'n' per-mbuf payload lengths + * + * @return pak Pointer to mbuf if successful, else NULL + */ +struct rte_mbuf * +dp_test_create_ipv6_pak(const char *saddr, const char *daddr, + int n, const int *len); + +/** + * Create and initialise a raw IPv6 packet with the given protocol. + * + * @param saddr [in] Source address string, e.g. "2001:101:8::1" + * @param daddr [in] Dest address string + * @param protocol [in] Protocol, e.g. IPPROTO_UDP + * @param n [in] Number of mbufs + * @param len [in] Array of 'n' per-mbuf payload lengths + * + * @return pak Pointer to mbuf if successful, else NULL + */ +struct rte_mbuf * +dp_test_create_raw_ipv6_pak(const char *saddr, const char *daddr, + uint8_t protocol, int n, const int *len); + +/** + * Create and initialise an IPv6 UDP packet + * + * @param saddr [in] Source address string, e.g. "2001:101:8::1" + * @param daddr [in] Dest address string + * @param sport [in] UDP source port (host order) + * @param dport [in] UDP dest port + * @param n [in] Number of mbufs + * @param len [in] Array of 'n' per-mbuf payload lengths + * + * @return pak Pointer to mbuf if successful, else NULL + */ +struct rte_mbuf * +dp_test_create_udp_ipv6_pak(const char *saddr, const char *daddr, + uint16_t sport, uint16_t dport, + int n, const int *len); +/** + * Create and initialise an IPv4 TCP packet + * + * @param saddr [in] Source address string, e.g. "10.0.1.0" + * @param daddr [in] Dest address string + * @param sport [in] TCP source port + * @param dport [in] TCP dest port + * @param flags [in] TCP header flags + * @param seq [in] TCP sequence number + * @param ack [in] TCP acknowledgment number + * @param win [in] TCP window value (host order) + * @param opts [in] Byte array of TCP options. See below. + * @param n [in] Number of mbufs + * @param len [in] Array of 'n' per-mbuf payload lengths + * + * @return pak Pointer to mbuf if successful, else NULL + * + * TCP options - type (1 byte), length (1 byte), value (length-2 bytes), e.g. + * + * uint8_t opts[] = { + * 2, 4, 0x18, 0x02, + * 1, + * 3, 3, 1, + * 0 + * }; + * + * The options list is terminated when 'type' is 0 (EOL). Note that when + * 'type' is 1 (NOP) then there is no length value. This is commonly used to + * separate options in a header. + */ +struct rte_mbuf * +dp_test_create_tcp_ipv4_pak(const char *saddr, const char *daddr, + uint16_t sport, uint16_t dport, uint8_t flags, + uint32_t seq, uint32_t ack, uint16_t win, + const uint8_t *opts, int n, const int *len); + +/** + * Create and initialise an IPv6 TCP packet + * + * @param saddr [in] Source address string, e.g. "2001:101:8::1" + * @param daddr [in] Dest address string + * @param sport [in] TCP source port (host order) + * @param dport [in] TCP dest port + * @param flags [in] TCP header flags + * @param seq [in] TCP sequence number + * @param ack [in] TCP acknowledgment number + * @param win [in] TCP window value (host order) + * @param opts [in] Byte array of TCP options. See above. + * @param n [in] Number of mbufs + * @param len [in] Array of 'n' per-mbuf payload lengths + * + * @return pak Pointer to mbuf if successful, else NULL + */ +struct rte_mbuf * +dp_test_create_tcp_ipv6_pak(const char *saddr, const char *daddr, + uint16_t sport, uint16_t dport, uint8_t flags, + uint32_t seq, uint32_t ack, uint16_t win, + const uint8_t *opts, int n, const int *len); + +/** + * Calculate IPv4 UDP or TCP checksum. + * + * The IPv4 header should not contains options. The layer 4 checksum + * must be set to 0 in the packet by the caller. The l4 header must be + * in the first mbuf. + * + * @param m [in] Pointer to the mbuf chain + * @param ip [in] Pointer to the contiguous IP header. + * @param l4_hdr [in] Pointer to the beginning of the L4 header + * @return + * The complemented checksum to set in the IPv4 UDP/TCP header + */ +uint16_t +dp_test_ipv4_udptcp_cksum(const struct rte_mbuf *m, const struct iphdr *ip, + void *l4_hdr); + +/** + * Calculate IPv6 UDP or TCP checksum. + * + * The layer 4 checksum must be set to 0 in the packet by the caller. + * + * @param ip6 + * The pointer to the contiguous IPv6 header. + * @param l4_hdr + * The pointer to the beginning of the L4 header (must be in first mbuf). + * @return + * The complemented checksum to set in the IPv6 UDP/TCP header + */ +uint16_t +dp_test_ipv6_udptcp_cksum(const struct rte_mbuf *m, + const struct ip6_hdr *ip6, + const void *l4_hdr); + + +/* + * API to allow us to set a given field within the ip header in a buffer. + */ +enum dp_test_pak_field_ { + DP_TEST_SET_VERSION, + DP_TEST_SET_SRC_ADDR_IPV4, + DP_TEST_SET_DST_ADDR_IPV4, + DP_TEST_SET_IP_ECN, + DP_TEST_SET_DF, + DP_TEST_SET_FRAG_MORE, + DP_TEST_SET_FRAG_OFFSET, + DP_TEST_SET_TOS, + DP_TEST_SET_PROTOCOL, + DP_TEST_SET_TTL, + DP_TEST_SET_IP_ID, +}; + +/** + * Initialize ethernet hdr. If l2_len is 0, prepend 14 bytes and set + * m->l2_len to 14. + * + * @param m [in] Pointer to packet mbuf + * @param d_addr [in] Dest mac string, e.g. "aa:bb:cc:dd:ee:ff" + * @param s_addr [in] Source mac string + * @param ether_type [in] Ethernet type (host order), may be 0 + * + * @return Pointer to eth header if successful, else NULL + * + * To just check and set the mbuf l2_len: + * (void)dp_test_pktmbuf_eth_init(m, NULL, NULL, 0); + * + * To just check and set the mbuf l2_len and ether type: + * (void)dp_test_pktmbuf_eth_init(m, NULL, NULL, RTE_ETHER_TYPE_IPV4); + */ +struct rte_ether_hdr * +dp_test_pktmbuf_eth_init(struct rte_mbuf *m, + const char *d_addr, + const char *s_addr, + uint16_t ether_type); + +void +dp_test_set_pak_ip_field(struct iphdr *ip, + enum dp_test_pak_field_ field, + uint32_t val); +void +dp_test_set_pak_ip6_field(struct ip6_hdr *ip, + enum dp_test_pak_field_ field, + uint32_t val); + +void +dp_test_ipv4_decrement_ttl(struct rte_mbuf *m); + +void +dp_test_ipv6_decrement_ttl(struct rte_mbuf *m); +#endif /* __DP_PKTMBUF_LIB_H__ */ diff --git a/include/dp_test/dp_test_session_lib.h b/include/dp_test/dp_test_session_lib.h new file mode 100644 index 00000000..ef363100 --- /dev/null +++ b/include/dp_test/dp_test_session_lib.h @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * session UT test lib + */ + +#ifndef __DP_TEST_SESSION_LIB_H__ +#define __DP_TEST_SESSION_LIB_H__ + +#define DP_TEST_MAX_TEST_SESSIONS 5 +#define DP_TEST_MAX_PKTS_PER_SESSION 20 + +/** + * Used to keep a list of sessions + */ +struct dp_test_session { + uint64_t se_id; /**< session_id retrieved from packed session */ + bool completed; /**< Is the received packed message complete */ +}; + +void _dp_test_session_msg_valid(void *msg, uint32_t size, + const char *file, int line); +#define dp_test_session_msg_valid(msg, size) \ + _dp_test_session_msg_valid(msg, size, __FILE__, __LINE__) + +bool _dp_test_session_msg_full(void *msg, const char *file, int line); +#define dp_test_session_msg_full(msg) \ + _dp_test_session_msg_full(msg, __FILE__, __LINE__) + +bool _dp_test_session_msg_update(void *msg, const char *file, int line); +#define dp_test_session_msg_update(msg) \ + _dp_test_session_msg_update(msg, __FILE__, __LINE__) + +uint64_t _dp_test_session_msg_get_id(void *msg, const char *file, int line); +#define dp_test_session_msg_get_id(msg) \ + _dp_test_session_msg_get_id(msg, __FILE__, __LINE__) + +void _dp_test_session_msg_check_rcvd(void *msg, + uint64_t pkts_per_session, + struct dp_test_session sess[], + const char *file, int line); +#define dp_test_session_msg_check_rcvd(msg, pkts_per_session, sess) \ + _dp_test_session_msg_check_rcvd(msg, pkts_per_session, sess, \ + __FILE__, __LINE__) + +bool _dp_test_session_msg_pulled_all(void *msg, + uint64_t pkts_per_session, + struct dp_test_session sess[], + const char *file, int line); +#define dp_test_session_msg_pulled_all(msg, pkts_per_session, sess) \ + _dp_test_session_msg_pulled_all(msg, pkts_per_session, \ + sess, __FILE__, __LINE__) + + +/* Count and clear sessions */ + +#define SC_WARN_ONLY true +#define SC_FAIL false + +/* + * sessions flags - verify the presence/absence of a session. + */ +#define SE_ACTIVE 0x0004 +#define SE_PASS 0x0008 +#define SE_EXPIRE 0x0010 +#define SE_GC_PASS_TWO 0x0020 +#define SE_BYPASS 0x0040 + +#define SE_FLAGS_MASK (SE_ACTIVE | SE_PASS | SE_EXPIRE | SE_BYPASS) +#define SE_FLAGS_AE (SE_ACTIVE | SE_EXPIRE) + +/* Clear all npf sessions. */ +void +dp_test_sessions_clear(void); + +/** + * Verify the presence/absence of a session + * + * @param desc [in] Optional text to be prepended to any error message + * @param saddr [in] Source address string + * @param src_id [in] Source ID in host order (TCP port, ICMP id) + * @param daddr [in] Dest address string + * @param dst_id [in] Dest ID in host order (TCP port, ICMP id) + * @param proto [in] IP protocol + * @param intf [in] Interface string, e.g. "dp2T1" + * @param exp_flags [in] Expected flags, e.g. SE_ACTIVE | SE_PASS + * @param flags_mask [in] Flags mask, e.g. SE_FLAGS_MASK + * @param state [in] true if we expect to find the session + * + * @return true if found + **/ +bool _dp_test_session_verify(char *desc, + const char *saddr, uint16_t src_id, + const char *daddr, uint16_t dst_id, + uint8_t proto, + const char *intf, + uint32_t exp_flags, uint32_t flags_mask, + bool exists, const char *file, int line); + +#define dp_test_session_verify(desc, saddr, src_id, daddr, dst_id, proto, \ + intf, flgs, msk, exists) \ + _dp_test_session_verify(desc, saddr, src_id, daddr, dst_id, \ + proto, intf, flgs, msk, exists, \ + __FILE__, __LINE__) + +/* + * Verify the presence/absence of an npf session. the counts must match as + * well as the values identifying the session. Poll for a matching session + * for the standard poll delay and record a test failure if not found. + * + * @param desc [in] Optional text to be prepended to any error message + * @param saddr [in] Source address string + * @param src_id [in] Source ID in host order (TCP port, ICMP id) + * @param daddr [in] Dest address string + * @param dst_id [in] Dest ID in host order (TCP port, ICMP id) + * @param proto [in] IP protocol + * @param intf [in] Interface string, e.g. "dp2T1" + * @param exp_flags [in] Expected flags, e.g. SE_ACTIVE | SE_PASS + * @param flags_mask [in] Flags mask, e.g. SE_FLAGS_MASK + * @param pkts_in [in] expected count, as an int due to json limitations + * @param bytes_in [in] expected count, as an int due to json limitations + * @param pkts_out [in] expected count, as an int due to json limitations + * @param bytes_out [in] expected count, as an int due to json limitations + * + * @return true if found + */ +void _dp_test_session_verify_count(char *desc, + const char *saddr, uint16_t src_id, + const char *daddr, uint16_t dst_id, + uint8_t proto, + const char *intf, + uint32_t exp_flags, uint32_t flags_mask, + int pkts_in, int bytes_in, + int pkts_out, int bytes_out, + const char *file, int line); + +#define dp_test_session_verify_count(desc, saddr, src_id, daddr, dst_id, \ + proto, intf, flgs, msk, \ + pkts_in, bytes_in, pkts_out, \ + bytes_out) \ + _dp_test_session_verify_count(desc, saddr, src_id, daddr, dst_id, \ + proto, intf, flgs, msk, \ + pkts_in, bytes_in, pkts_out, \ + bytes_out, \ + __FILE__, __LINE__) + +/* + * Verify the global session count + */ +void +_dp_test_session_count_verify(uint exp_count, bool warn, + const char *file, const char *func, int line); + +#define dp_test_session_count_verify(count) \ + _dp_test_session_count_verify(count, SC_FAIL, \ + __FILE__, __func__, __LINE__) + +/* + * Verify the global UDP session count + */ +void +_dp_test_session_udp_count_verify(uint exp_count, bool warn, + const char *file, int line); + +#define dp_test_session_udp_count_verify(count) \ + _dp_test_session_udp_count_verify(count, SC_FAIL, \ + __FILE__, __LINE__) + +/* + * Return counters for one session. Session filter should be fully specified, + * e.g. + * + * uint32_t pkts_in = 0, pkts_out = 0; + * uint32_t bytes_in = 0, bytes_out = 0; + * uint32_t sess_id = 0; + * + * dp_test_session_counters("start 0 count 1 " + * "src-addr 192.0.2.103 src-port 10000 " + * "dst-addr 203.0.113.203 dst-port 60000 " + * "proto 17 dir out intf dpT21", + * &pkts_in, &pkts_out, &bytes_in, &bytes_out, + * &sess_id); + */ +int dp_test_session_counters(const char *options, + uint32_t *pkts_in, uint32_t *pkts_out, + uint32_t *bytes_in, uint32_t *bytes_out, + uint32_t *sess_id); + +#endif /* __DP_TEST_SESSION_LIB_H__ */ diff --git a/src/dpi_public.h b/include/dpi.h similarity index 76% rename from src/dpi_public.h rename to include/dpi.h index 1993d80e..5ff4ab5c 100644 --- a/src/dpi_public.h +++ b/include/dpi.h @@ -1,14 +1,14 @@ /* * Public APIs for DPI. * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2017 by Brocade Communications Systems, Inc. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ -#ifndef DPI_PUBLIC_H -#define DPI_PUBLIC_H +#ifndef VYATTA_DATAPLANE_DPI_H +#define VYATTA_DATAPLANE_DPI_H #include #include @@ -23,7 +23,7 @@ struct ifnet; * * The call returns 0 on success an a negative errno on failure. */ -int dpi_enable(struct ifnet *ifp); +int dp_dpi_enable(struct ifnet *ifp); /* The caller no nonger requires DPI to run on the given interface. @@ -31,18 +31,18 @@ int dpi_enable(struct ifnet *ifp); * When no more clients require DPI, the engine will be stopped. * * Note that this must only be called if the caller previously - * called dpi_enable() and it returned success. + * called dp_dpi_enable() and it returned success. * * The call returns 0 on success an a negative errno on failure. */ -int dpi_disable(struct ifnet *ifp); +int dp_dpi_disable(struct ifnet *ifp); /* Returns true if DPI is enabled on any interface, else false. * * NB DPI state is not tracked per interface. */ -bool dpi_is_enabled(void); +bool dp_dpi_is_enabled(void); /* Return the L7 DPI application ID for the given packet. @@ -52,6 +52,6 @@ bool dpi_is_enabled(void); * can result in DPI_APP_ERROR being returned. If processing * is not yet complete DPI_APP_UNDETERMINED can be returned. */ -uint32_t dpi_get_app_id(struct rte_mbuf *mbuf); +uint32_t dp_dpi_get_app_id(struct rte_mbuf *mbuf); -#endif /* DPI_PUBLIC_H */ +#endif /* VYATTA_DATAPLANE_DPI_H */ diff --git a/include/event.h b/include/event.h new file mode 100644 index 00000000..409fc1ab --- /dev/null +++ b/include/event.h @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef VYATTA_DATAPLANE_EVENT_H +#define VYATTA_DATAPLANE_EVENT_H + +#include +#include "vrf.h" + +struct ifnet; + +/* + * Callback function to process events received on sockets. + * + * @param[in, out] arg Argument that was passed when the socket + * was registered. + * + * @return 0 for success + * @return -ve for a failure. + */ +typedef int (*ev_callback_t)(void *arg); + +/* + * Register a socket and a callback function to process messages on that + * socket. Messages received on the socket will be retrieved and passes + * to the handler for processing. + * + * The processing is always done on the main thread. The main use of + * this function is to allow events to be sent to the main thread where + * they will then be processed in turn. No guarantees are given about + * how quickly these events are processed. Typically the main thread + * will pull all messages out of a socket before moving on to the next one + * and so there may be significant delay during busy periods. + * + * If there is a failure processing the message (callback returns a + * -ve number) then no more messages will be read from that socket + * and it will be unregistered. + * + * @param[in] socket The socket to pull events from + * @param[in] callback The callback function to that processes the events + * @param[in] arg Argument passed through to the callback + * + * @return 0 if successful + * @return -ve for a failure + */ +int dp_register_event_socket(void *socket, ev_callback_t callback, void *arg); + +/* + * Unregister a previously registered socket + * + * @param[in] socket The socket to stop listening to + * + * @return 0 if successful + * @return -ve for a failure + */ +int dp_unregister_event_socket(void *socket); + +/* + * Send an event to vplaned. This function will push appropriate headers + * and send it to vplaned where it will be processed. + * + * @param [in] msg The event message to send. + * + * @return 0 if successful + * @return -ve for a failure + */ +int dp_send_event_to_vplaned(zmsg_t *msg); + +/* + * The set of events that used can register for notification of. + */ +enum dp_event { + DP_EVENT_VRF_CREATE = 1, + DP_EVENT_VRF_DELETE, + DP_EVENT_IF_RENAME, + DP_EVENT_IF_VRF_SET, + DP_EVENT_IF_ADDR_ADD, + DP_EVENT_IF_ADDR_DEL, + DP_EVENT_IF_CREATE, + DP_EVENT_IF_DELETE, + DP_EVENT_IF_LAG_CHANGE, + DP_EVENT_IF_LAG_ADD_MEMBER, + DP_EVENT_IF_LAG_DELETE_MEMBER, +}; + +enum dp_if_lag_event { + DP_IF_LAG_EVENT_MIN_LINKS_CHANGE, +}; + +/* + * Structure that users can use to register callbacks for certain types of + * events. + */ +struct dp_events_ops { + /* DP_EVENT_VRF_CREATE */ + void (*vrf_create)(struct vrf *vrf); + /* DP_EVENT_VRF_DELETE */ + void (*vrf_delete)(struct vrf *vrf); + /* DP_EVENT_IF_CREATE */ + void (*if_create)(struct ifnet *ifp); + /* DP_EVENT_IF_DELETE */ + void (*if_delete)(struct ifnet *ifp); + /* DP_EVENT_IF_RENAME */ + void (*if_rename)(struct ifnet *ifp, const char *old_name); + /* DP_EVENT_IF_VRF_SET */ + void (*if_vrf_set)(struct ifnet *ifp); + /* DP_EVENT_IF_ADDR_ADD */ + void (*if_addr_add)(struct ifnet *ifp, uint32_t ifindex, int af, + const void *addr); + /* DP_EVENT_IF_ADDR_DEL */ + void (*if_addr_delete)(struct ifnet *ifp, + uint32_t ifindex, int af, const void *addr); + /* DP_EVENT_IF_LAG_CHANGE */ + void (*if_lag_change)(struct ifnet *ifp, enum dp_if_lag_event event); + /* DP_EVENT_IF_LAG_ADD_MEMBER */ + void (*if_lag_add_member)(struct ifnet *team, + struct ifnet *ifp); + /* DP_EVENT_IF_LAG_DELETE_MEMBER */ + void (*if_lag_delete_member)(struct ifnet *team, + struct ifnet *ifp); + /* DP_EVENT_IF_LINK_CHANGE */ + void (*if_link_change)(struct ifnet *ifp, bool up, uint32_t speed); +}; + +/* + * Register an event ops structure with callbacks that will be called for + * each of the given event types. + * + * @param [in] ops The set of callbacks. If a callback is provided then it + * will be called for each event of that type. Entries + * can be set to NULL if the caller is not interested in + * some of the events. + * + * @return 0 if successful + * @return -ve for a failure + */ +int dp_events_register(const struct dp_events_ops *ops); + +/* + * Unregister a previously registered set of callbacks. + * + * @param [in] ops The set of callbacks to unregister + * + * @return 0 if successful + * @return -ve for a failure + */ +int dp_events_unregister(const struct dp_events_ops *ops); + +#endif /* VYATTA_DATAPLANE_EVENT_H */ diff --git a/include/fal_bfd.h b/include/fal_bfd.h new file mode 100644 index 00000000..0576b08c --- /dev/null +++ b/include/fal_bfd.h @@ -0,0 +1,116 @@ +/* + * FAL APIs for Bidirectional Forwarding Detection + * + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef VYATTA_DATAPLANE_FAL_BFD_H +#define VYATTA_DATAPLANE_FAL_BFD_H + +/* types defined in fal_plugin.h */ +#ifndef fal_object_t +typedef uintptr_t fal_object_t; +#endif +struct fal_attribute_t; + +/** + * @brief Create BFD session. + * + * @param[out] bfd_session_id BFD session id + * @param[in] attr_count Number of attributes + * @param[in] attr_list Value of attributes + * + * @return 0 if operation is successful otherwise a different + * error code is returned. + */ +int dp_fal_bfd_create_session(fal_object_t *bfd_session_id, + uint32_t attr_count, const struct fal_attribute_t *attr_list); + +/** + * @brief Delete BFD session. + * + * @param[in] bfd_session_id BFD session id + * + * @return 0 if operation is successful otherwise a different + * error code is returned. + */ +int dp_fal_bfd_delete_session(fal_object_t bfd_session_id); + +/** + * @brief Set BFD session attributes. + * + * @param[in] bfd_session_id BFD session id + * @param[in] attr_count Number of attributes + * @param[in] attr_list Value of attributes + * + * @return 0 if operation is successful otherwise a different + * error code is returned. + */ +int dp_fal_bfd_set_session_attribute(fal_object_t bfd_session_id, + uint32_t attr_count, const struct fal_attribute_t *attr_list); + +/** + * @brief Get BFD session attributes. + * + * @param[in] bfd_session_id BFD session id + * @param[in] attr_count Number of attributes + * @param[inout] attr_list Value of attribute + * + * @return 0 if operation is successful otherwise a different + * error code is returned. + */ +int dp_fal_bfd_get_session_attribute(fal_object_t bfd_session_id, + uint32_t attr_count, struct fal_attribute_t *attr_list); + +/** + * @brief Get BFD session statistics counters. + * + * @param[in] bfd_session_id BFD session id + * @param[in] num_of_counters Number of counters in the array + * @param[in] counter_ids Specifies the array of counter ids + * @param[out] counters Array of resulting counter values. + * + * @return 0 on success, failure status code on error + */ +int dp_fal_bfd_get_session_stats(fal_object_t bfd_session_id, + uint32_t num_of_counters, + const enum fal_bfd_session_stat_t *counter_ids, + uint64_t *counters); + +/** + * @brief Get BFD switch attributes. + * + * @param[in] attr_count Number of attributes + * @param[inout] attr_list Value of attribute + * + * @return 0 if operation is successful otherwise a different + * error code is returned. + */ +int dp_fal_bfd_get_switch_attrs(uint32_t attr_count, + struct fal_attribute_t *attr_list); + +/** + * @brief Set BFD switch attributes. + * + * @param[in] attr Value of attribute + * + * @return 0 if operation is successful otherwise a different + * error code is returned. + */ +int dp_fal_bfd_set_switch_attr(const struct fal_attribute_t *attr); + +/** + * @brief Dump BFD session state + * + * @param[in] bfd_session_id BFD session id. + * @param[inout] json JSON writer to dump the state into. + * + * @return 0 if operation is successful otherwise a different + * error code is returned. + */ +int dp_fal_bfd_dump_session(fal_object_t bfd_session_id, + json_writer_t *wr); + +#endif /* VYATTA_DATAPLANE_FAL_BFD_H */ diff --git a/include/fal_plugin.h b/include/fal_plugin.h index d49e220e..7902e000 100644 --- a/include/fal_plugin.h +++ b/include/fal_plugin.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. + * Copyright (c) 2017-2020, AT&T Intellectual Property. * All rights reserved. * Copyright (c) 2016-2017 by Brocade Communication Systems, Inc. * All rights reserved. @@ -257,19 +257,10 @@ * have involved a fair amount of extra code, especially translating * between DPDK interface APIs and SAI port attributes. * - * In addition, there are a number of objects and attributes that - * don't fit well with the way the dataplane is currently - * designed. Such objects include the router interface (there is no - * distinction between L2 and L3 interface in the dataplane), and not - * all interface types are represented in SAI and some of the - * interface types have special handling in certain places (tunnels - * use different attributes in L3 nexthops vs other interfaces), - * whereas it simplifies the dataplane side not to have to deal with - * such differences, even if the interface types are not fully modeled - * in the FAL yet. Another difference is that the SAI model does not - * account for having a multipath connected route (which could be - * present in certain use cases, e.g. VRRP) and it also requires the - * neighbour to be created before a nexthop can refer to it. + * In addition, the SAI model does not account for having a multipath + * connected route (which could be present in certain use cases, + * e.g. VRRP) and it also requires the neighbour to be created before + * a nexthop can refer to it. * * However, the biggest reason for not using SAI at this point is so * we can move quickly and diverge from the API where using SAI would @@ -280,15 +271,15 @@ * conflicts in a later version of SAI. */ -#ifndef FAL_PLUGIN_H -#define FAL_PLUGIN_H +#ifndef VYATTA_DATAPLANE_FAL_PLUGIN_H +#define VYATTA_DATAPLANE_FAL_PLUGIN_H #include #include #include #include #include -#include +#include "json_writer.h" #define PLATFORM_FILE "/run/dataplane/platform.conf" @@ -318,6 +309,32 @@ enum fal_traffic_type { FAL_TRAFFIC_MAX }; +/* Off the chip external packet bundle buffer counters to be + * stored in an array. + */ +enum fal_qos_external_buf_counters { + FAL_QOS_EXTERNAL_BUFFER_DESC_FREE = 0, + FAL_QOS_EXTERNAL_BUFFER_PKT_REJECT, + + /* Add J2 QOS external buffer counters */ + FAL_QOS_EXTERNAL_BUFFER_MAX_COUNTER +}; + +/* Off the chip external packet bundle buffer counter ids for FAL to + * retrieve from ASIC. + */ +enum fal_qos_external_buf_counter_ids { + FAL_QOS_EXTERNAL_BUFFER_COUNTER_ID = 0, + FAL_QOS_EXTERNAL_BUFFER_PKT_REJECT_COUNTER_ID, + + /* Add J2 QOS external buffer counter ids */ + FAL_QOS_EXTERNAL_BUFFER_MAX_ID +}; + +int fal_plugin_qos_get_counters(const uint32_t *cntr_ids, + uint32_t num_cntrs, + uint64_t *cntrs); + /* * Context handle used for FAL plugins to store state against a given * object type. @@ -331,6 +348,11 @@ struct fal_object_list_t { fal_object_t list[0]; }; +struct fal_u32_list_t { + uint32_t count; + uint32_t list[0]; +}; + /* * modeled after sai_packet_color_t * used to set actions based on packet colour @@ -339,6 +361,7 @@ enum fal_packet_colour { FAL_PACKET_COLOUR_GREEN, FAL_PACKET_COLOUR_YELLOW, FAL_PACKET_COLOUR_RED, + FAL_NUM_PACKET_COLOURS }; /* @@ -354,10 +377,13 @@ struct fal_qos_map_params_t { uint8_t dscp; uint8_t dot1p; uint8_t tc; + uint8_t des; uint8_t wrr; + uint8_t des_used; union { int dp; /* deprecated */ - enum fal_packet_colour color; + enum fal_packet_colour colour; + enum fal_packet_colour color; /* deprecated */ }; }; @@ -368,8 +394,13 @@ struct fal_qos_map_t { #define FAL_QOS_MAP_DSCP_VALUES 64 #define FAL_QOS_MAP_PCP_VALUES 8 +#define FAL_QOS_MAP_DESIGNATION_VALUES 8 + +#define FAL_QOS_MAP_DES_DP_VALUES \ + (FAL_QOS_MAP_DESIGNATION_VALUES * FAL_NUM_PACKET_COLOURS) struct fal_qos_map_list_t { + uint8_t des_used; uint32_t count; struct fal_qos_map_t list[FAL_QOS_MAP_DSCP_VALUES]; }; @@ -377,6 +408,14 @@ struct fal_qos_map_list_t { struct fal_acl_field_data_t; struct fal_acl_action_data_t; +struct fal_ptp_port_path_t { + uint16_t vlan_id; /** VLAN ID */ + uint32_t ifindex; /** Underlying interface */ +}; + +/* VRF ID value that will never be used for a forwarding VRF */ +#define FAL_INVALID_VRF_ID 0 + /* An attribute */ union fal_attribute_value_t { @@ -385,10 +424,11 @@ union fal_attribute_value_t { int8_t i8; uint16_t u16; uint32_t u32; + int32_t i32; uint64_t u64; fal_object_t objid; const void *ptr; - struct ether_addr mac; + struct rte_ether_addr mac; struct fal_ip_address_t ipaddr; struct fal_object_list_t *objlist; struct fal_qos_map_list_t *maplist; @@ -396,6 +436,8 @@ union fal_attribute_value_t { struct fal_acl_action_data_t *aclaction; char if_name[IFNAMSIZ]; uint8_t eui64[8]; + struct fal_u32_list_t *u32list; + struct fal_ptp_port_path_t ptp_port_path; }; struct fal_attribute_t { @@ -408,6 +450,37 @@ struct fal_attribute_t { */ extern int fal_port_byifindex(int ifindex, uint16_t *portid); +/** + * Allocate a block of memory that can be freed in a deferred manner + * + * The memory must be freed by fal_free_deferred(). + * + * @param[in] size Size of block of memory to be allocated + * @return Block of memory allocated or NULL if out of memory or some + * other error. + */ +void *fal_malloc(size_t size); + +/** + * Allocate an array of memory that can be freed in a deferred manner + * + * The memory must be freed by fal_free_deferred(). + * + * @param[in] nmemb Number of members of array to be allocated + * @param[in] size Size of array element to be allocated + * @return Block of zero'd memory allocated or NULL if out of memory + * or some other error. + */ +void *fal_calloc(int nmemb, size_t size); + +/** + * Free in a deferred manner some memory + * + * The memory must have been allocated by either fal_malloc() or fal_calloc(). + * + * @param[in] ptr Pointer to memory to be freed in a deferred manner. + */ +void fal_free_deferred(void *ptr); /* * All of the functions for plugins are optional, if one is not @@ -475,6 +548,24 @@ typedef enum _fal_port_poe_class_t { } fal_port_poe_class_t; + +/** + * @brief Attribute data for #FAL_PORT_ATTR_GLOBAL_FLOW_CONTROL_MODE + */ +enum fal_port_flow_control_mode_t { + /** Disable flow control for both tx and rx */ + FAL_PORT_FLOW_CONTROL_MODE_DISABLE, + + /** Enable flow control for rx only */ + FAL_PORT_FLOW_CONTROL_MODE_RX_ONLY, + + /** Enable flow control for tx only */ + FAL_PORT_FLOW_CONTROL_MODE_TX_ONLY, + + /** Enable flow control for both tx and rx */ + FAL_PORT_FLOW_CONTROL_MODE_BOTH_ENABLE, +}; + /* Layer 2 operations */ enum fal_port_attr_t { @@ -484,7 +575,7 @@ enum fal_port_attr_t { FAL_PORT_ATTR_VRF_ID, /* .u32 -- VRF id - deprecated */ FAL_PORT_ATTR_DPDK_PORT, /* .u8 -- port */ FAL_PORT_ATTR_VLAN_ID, /* .u16 -- VLAN ID - deprecated */ - FAL_PORT_ATTR_PARENT_IFINDEX, /* .u32 -- if_index */ + FAL_PORT_ATTR_PARENT_IFINDEX, /* .u32 -- ifindex */ FAL_PORT_ATTR_MTU, /* .u16 -- MTU */ FAL_PORT_ATTR_HW_SWITCH_MODE, /* .u8 - enable/disable */ FAL_PORT_ATTR_MAC_ADDRESS, /* .mac -- primary MAC address */ @@ -555,7 +646,24 @@ enum fal_port_attr_t { * @default empty */ FAL_PORT_ATTR_EGRESS_MIRROR_SESSION, - + /** + * @brief Ingress Mirror vlan list + * Ingress mirroring vlan list + * Delete all vlans for Ingress if count 0 in objlist. + * @type fal_object_list_t + * @flags CREATE_AND_SET + * @default empty + */ + FAL_PORT_ATTR_INGRESS_MIRROR_VLAN, + /** + * @brief Egress Mirror vlan list + * Egress(Tx) mirroring vlan list + * Delete all vlans for Egress if count 0 in objlist. + * @type fal_object_list_t + * @flags CREATE_AND_SET + * @default empty + */ + FAL_PORT_ATTR_EGRESS_MIRROR_VLAN, /** * @brief Is mirroring in hardware enabled * True means mirroring will be done in hardware, false @@ -609,6 +717,70 @@ enum fal_port_attr_t { * @default 0 */ FAL_PORT_ATTR_FDB_AGING_TIME, + + /** + * @brief Enable ingress QoS classification on port + * + * Set map id = FAL_NULL_OBJECT_ID to remove map + * @type fal_object_t + * @flags CREATE_AND_SET + * @default FAL_NULL_OBJECT_ID + * + */ + FAL_PORT_ATTR_QOS_INGRESS_MAP_ID, + + /** + * @brief Enable (bind) or disable (unbind) packet capture on this port + * + * Pass a capture object to enable packet capture, pass + * FAL_NULL_OBJECT_ID to disable packet capture. + * + * @type fal_object_t + * @flags CREATE_AND_SET + * @default FAL_NULL_OBJECT_ID + */ + FAL_PORT_ATTR_CAPTURE_BIND, + + /** + * @brief Is hardware packet capture enabled on this port + * + * @type bool + * @flags READ_ONLY + */ + FAL_PORT_ATTR_HW_CAPTURE, + + /** + * @brief Global pause-frame flow control on Interface. + * @type fal_port_flow_control_mode_t + * @flags CREATE_AND_SET + * @default FAL_PORT_FLOW_CONTROL_MODE_DISABLE + **/ + FAL_PORT_ATTR_GLOBAL_FLOW_CONTROL_MODE, + + /** + * @brief Query Remote port Advertised flow control mode + * @type fal_port_flow_control_mode_t + * @flags READ_ONLY + **/ + FAL_PORT_ATTR_REMOTE_ADVERTISED_FLOW_CONTROL_MODE, + + /** + * @brief Enable egress QoS marking on port + * + * Set map id = FAL_NULL_OBJECT_ID to remove map + * @type fal_object_t + * @flags CREATE_AND_SET + * @default FAL_NULL_OBJECT_ID + * + */ + FAL_PORT_ATTR_QOS_EGRESS_MAP_ID, + + /** @brief Enable/Disable SyncE on interface + * + * @type u8 - disable/enable + * @default - FAL_PORT_SYNCE_DISABLE + */ + FAL_PORT_ATTR_SYNCE_ADMIN_STATUS, }; enum fal_port_hw_switching_t { @@ -616,14 +788,19 @@ enum fal_port_hw_switching_t { FAL_PORT_HW_SWITCHING_ENABLE }; -void fal_plugin_l2_new_port(unsigned int if_index, +enum fal_port_synce_admin_status_t { + FAL_PORT_SYNCE_DISABLE, + FAL_PORT_SYNCE_ENABLE +}; + +void fal_plugin_l2_new_port(unsigned int ifindex, uint32_t attr_count, const struct fal_attribute_t *attr_list); /** - * @brief Get port attributes from interface if_index. + * @brief Get port attributes from interface ifindex. * - * @param[in] if_index The if_index of the interface + * @param[in] ifindex The ifindex of the interface * @param[in] attr_count Number of attributes * @param[inout] attr_list Array of attributes * @@ -631,43 +808,51 @@ void fal_plugin_l2_new_port(unsigned int if_index, * unsupported by the FAL plugin, it should return * an error. */ -int fal_plugin_l2_get_attrs(unsigned int if_index, +int fal_plugin_l2_get_attrs(unsigned int ifindex, uint32_t attr_count, struct fal_attribute_t *attr_list); /* - * Update the attributes on interface if_index + * Update the attributes on interface ifindex */ -void fal_plugin_l2_upd_port(unsigned int if_index, - struct fal_attribute_t *attr); +int fal_plugin_l2_upd_port(unsigned int ifindex, + struct fal_attribute_t *attr); /* - * Delete the interface if_index + * Delete the interface ifindex + */ +void fal_plugin_l2_del_port(unsigned int ifindex); + +/** + * @brief Dump port + * + * @param[in] ifindex The ifindex of the interface + * @param[inout] json JSON writer object */ -void fal_plugin_l2_del_port(unsigned int if_index); +void fal_plugin_l2_dump_port(unsigned int ifindex, json_writer_t *wr); /* No attributes */ /* - * Add the address to the interface if_index + * Add the address to the interface ifindex */ -void fal_plugin_l2_new_addr(unsigned int if_index, - const struct ether_addr *addr, +void fal_plugin_l2_new_addr(unsigned int ifindex, + const struct rte_ether_addr *addr, uint32_t attr_count, const struct fal_attribute_t *attr_list); /* - * Update the addr on the interface if_index + * Update the addr on the interface ifindex */ -void fal_plugin_l2_upd_addr(unsigned int if_index, - const struct ether_addr *addr, +void fal_plugin_l2_upd_addr(unsigned int ifindex, + const struct rte_ether_addr *addr, struct fal_attribute_t *attr); /* - * Delete the address on the interface if_index + * Delete the address on the interface ifindex */ -void fal_plugin_l2_del_addr(unsigned int if_index, - const struct ether_addr *addr); +void fal_plugin_l2_del_addr(unsigned int ifindex, + const struct rte_ether_addr *addr); /* Router interface operations */ @@ -689,10 +874,19 @@ enum fal_router_interface_attr_t { /** * @brief VRF ID * + * Deprecated in favour of FAL_ROUTER_INTERFACE_ATTR_VRF_OBJ. + * * @flags MANDATORY_ON_CREATE * @type uint32_t */ FAL_ROUTER_INTERFACE_ATTR_VRF_ID, + /** + * @brief VRF object bound to + * + * @flags MANDATORY_ON_CREATE + * @type fal_object_t + */ + FAL_ROUTER_INTERFACE_ATTR_VRF_OBJ, /** * @brief Associated Vlan * @@ -810,6 +1004,34 @@ enum fal_router_interface_attr_t { * @default FAL_NULL_OBJECT_ID */ FAL_ROUTER_INTERFACE_ATTR_V6_EGRESS_ACL, + /** + * @brief Bind point for IPv4 ingress QoS object + * + * Bind an ingress IPv4 QoS table to (or remove it from) an + * L3 interface. Ingress QoS handling is enabled (or updated) + * by assigning a valid QoS table; similarly ingress QoS handling + * is disabled by assigning FAL_NULL_OBJECT_ID + * + * @type fal_acl_table_t + * @flags CREATE_AND_SET + * @allownull true + * @default FAL_NULL_OBJECT_ID + */ + FAL_ROUTER_INTERFACE_ATTR_V4_INGRESS_QOS, + /** + * @brief Bind point for IPv6 ingress QoS object + * + * Bind an ingress IPv6 QoS table to (or remove it from) an + * L3 interface. Ingress QoS handling is enabled (or updated) + * by assigning a valid QoS table; similarly ingress QoS handling + * is disabled by assigning FAL_NULL_OBJECT_ID + * + * @type fal_acl_table_t + * @flags CREATE_AND_SET + * @allownull true + * @default FAL_NULL_OBJECT_ID + */ + FAL_ROUTER_INTERFACE_ATTR_V6_INGRESS_QOS, /** * @brief IPv4 mcast enable * @@ -839,9 +1061,46 @@ enum fal_router_interface_attr_t { */ FAL_ROUTER_INTERFACE_ATTR_V6_MCAST_ENABLE, + /** + * @brief Egress QOS Marking map + * + * If an egress map is applied on a L3 interface then the + * traffic sent out of the interface is subjected to egress + * marking and will be sent out with the remarked values + * corresponding to the egress map. + * + * @type fal_object_t + * @flags CREATE_AND_SET + * @default FAL_NULL_OBJECT_ID + */ + FAL_ROUTER_INTERFACE_ATTR_EGRESS_QOS_MAP, FAL_ROUTER_INTERFACE_ATTR_MAX }; +/** + * @brief Router interface stat counter IDs in + * fal_plugin_get_router_interface_stats() call + */ +enum fal_router_interface_stat_t { + FAL_ROUTER_INTERFACE_STAT_MIN, + + /** Ingress byte stat count */ + FAL_ROUTER_INTERFACE_STAT_IN_OCTETS = FAL_ROUTER_INTERFACE_STAT_MIN, + + /** Ingress packet stat count */ + FAL_ROUTER_INTERFACE_STAT_IN_PACKETS, + + FAL_ROUTER_INTERFACE_STAT_IN_MAX, + + /** Egress byte stat count */ + FAL_ROUTER_INTERFACE_STAT_OUT_OCTETS = FAL_ROUTER_INTERFACE_STAT_IN_MAX, + + /** Egress packet stat count */ + FAL_ROUTER_INTERFACE_STAT_OUT_PACKETS, + + FAL_ROUTER_INTERFACE_STAT_MAX +}; + /** * @brief Create router interface * @@ -876,6 +1135,42 @@ int fal_plugin_set_router_interface_attr(fal_object_t obj, const struct fal_attribute_t *attr_list); +/** + * @brief Get attributes of the router interface + * + * @param[in] obj Object id for router intf + * @param[in] attr Array of Attribute + * + * @return 0 on success. + */ +int +fal_plugin_get_router_interface_attr(fal_object_t obj, + uint32_t attr_count, + struct fal_attribute_t *attr_list); + +/** + * @brief Get router interface stats + * + * @param[in] obj Router interface object ID + * @param[in] cntr_count Number of counters in the array + * @param[in] cntr_ids Specifies the array of counter IDs + * @param[out] cntrs Counters array of resulting counter values + * + * @return 0 on success, error code for failure + */ +int fal_plugin_get_router_interface_stats( + fal_object_t obj, uint32_t cntr_count, + const enum fal_router_interface_stat_t *cntr_ids, + uint64_t *cntrs); + +/** + * @brief Dump router interface + * + * @param[in] obj Object id for router intf + * @param[inout] json JSON writer object + */ +void fal_plugin_dump_router_interface(fal_object_t obj, json_writer_t *wr); + /* Tunnel operations */ /** @@ -912,7 +1207,6 @@ enum fal_tunnel_ttl_mode_t { * field of inner header remains the same on decapsulation. */ FAL_TUNNEL_TTL_MODE_PIPE_MODEL - }; /** @@ -970,7 +1264,7 @@ enum fal_tunnel_attr_t { FAL_TUNNEL_ATTR_TYPE, /** - * @brief Tunnel underlay interface if_index + * @brief Tunnel underlay interface ifindex * * Underlay interface to provide transport reachability for the tunnel. * @@ -990,7 +1284,7 @@ enum fal_tunnel_attr_t { FAL_TUNNEL_ATTR_NEXTHOP, /** - * @brief Tunnel overlay interface if_index + * @brief Tunnel overlay interface ifindex * * Overlay interface is router interface. * @@ -1102,81 +1396,248 @@ void fal_plugin_br_upd_port(unsigned int child_ifindex, void fal_plugin_br_del_port(unsigned int bridge_ifindex, unsigned int child_ifindex); +/* LAG operations */ -/* - * Bridge neighbor operations - */ +enum fal_lag_attr_t { + /** + * @brief Start of LAG attributes + */ + FAL_LAG_ATTR_START, -enum fal_br_neigh_entry_attr_t { - FAL_BRIDGE_NEIGH_ATTR_STATE, /* .u16 */ - FAL_BRIDGE_NEIGH_ATTR_AGEING, /* .u32 */ + /** + * @brief LAG port list + * + * @flags READ_ONLY + * @type fal_object_list_t + * @objects FAL_OBJECT_TYPE_LAG_MEMBER + */ + FAL_LAG_ATTR_PORT_LIST = FAL_LAG_ATTR_START, + + FAL_LAG_ATTR_MAX }; -/* - * Add layer 2 destination to the interface child_ifindex and vlanid. - */ -void fal_plugin_br_new_neigh(unsigned int child_ifindex, - uint16_t vlanid, - const struct ether_addr *dst, - uint32_t attr_count, - const struct fal_attribute_t *attr_list); +enum fal_lag_member_attr_t { + /** + * @brief Start of LAG member attributes + */ + FAL_LAG_MEMBER_ATTR_START, -/* - * Update attribute layer 2 destination on the interface child_ifindex - * and vlanid. - */ -void fal_plugin_br_upd_neigh(unsigned int child_ifindex, - uint16_t vlanid, - const struct ether_addr *dst, - struct fal_attribute_t *attr); + /** + * @brief LAG ID + * + * @type fal_object_id_t + * @flags MANDATORY_ON_CREATE | CREATE_ONLY + * @objects FAL_OBJECT_TYPE_LAG + */ + FAL_LAG_MEMBER_ATTR_LAG_ID = FAL_LAG_MEMBER_ATTR_START, -/* - * Delete layer 2 destination on the interface child_ifindex and vlanid. + /** + * @brief LAG member port IF Index + * + * @type uint32_t + * @flags MANDATORY_ON_CREATE + */ + FAL_LAG_MEMBER_ATTR_IFINDEX, + + /** + * @brief Disable traffic distribution to this port as part of LAG + * + * @flags CREATE_AND_SET + * @type bool + * @default false + */ + FAL_LAG_MEMBER_ATTR_EGRESS_DISABLE, + + /** + * @brief Disable traffic collection from this port as part of LAG + * + * @flags CREATE_AND_SET + * @type bool + * @default false + */ + FAL_LAG_MEMBER_ATTR_INGRESS_DISABLE, + + FAL_LAG_MEMBER_ATTR_MAX +}; + +/** + * @brief Create a LAG interface + * + * @param[in] attr_count Number of attributes + * @param[in] attr_list Array of attributes + * @param[out] obj Object id for LAG intf, non-zero on success + * + * @return 0 on success, error code for failure */ -void fal_plugin_br_del_neigh(unsigned int child_ifindex, - uint16_t vlanid, - const struct ether_addr *dst); +int fal_plugin_create_lag(uint32_t attr_count, + struct fal_attribute_t *attr_list, + fal_object_t *obj); /** - * @brief Iterator function for walk of bridge neighbours + * @brief Delete a LAG interface * - * @param[in] vlanid VLAN - * @param[in] dst Address - * @param[in] child_ifindex Interface index - * @param[in] attr_count Number of attributes - * @param[in] attr_list List of all available attributes - * @param[in] arg Opaque caller context + * @param[in] obj Object id for the LAG * - * @return 0 on success. Negative errno on failure, terminating walk + * @return 0 on success, error code for failure */ -typedef int (*fal_br_walk_neigh_fn)(uint16_t vlanid, - const struct ether_addr *dst, - unsigned int child_ifindex, - uint32_t attr_count, - const struct fal_attribute_t *attr_list, - void *arg); +int fal_plugin_delete_lag(fal_object_t obj); /** - * @brief Walk selected number of bridge neighbours + * @brief Set attributes on the LAG * - * @param[in] bridge_ifindex Index of bridge interface to walk neighbours for - * @param[in] vlanid VLAN to match - * @param[in] dst Destination address to match - * @param[in] child_ifindex Index of child interface to match - * @param[in] cb Callback function called for each neighbour walked - * @param[in] arg Opaque caller context + * @param[in] obj Object id for the LAG + * @param[in] nattrs Number of attributes + * @param[in] attr Array of Attribute * - * @return 0 on success. Negative errno on failure. + * @return 0 on success. */ -int fal_plugin_br_walk_neigh(unsigned int bridge_ifindex, - uint16_t vlanid, - const struct ether_addr *dst, - unsigned int child_ifindex, - fal_br_walk_neigh_fn cb, - void *arg); +int +fal_plugin_set_lag_attr(fal_object_t obj, uint32_t nattrs, + const struct fal_attribute_t *attr_list); -enum fal_br_fdb_flush_entry_type_t { - FAL_BRIDGE_FDB_FLUSH_TYPE_ALL, +/** + * @brief Get attributes fn the LAG + * + * @param[in] obj Object id for the LAG + * @param[in] nattrs Number of attributes + * @param[in] attr Array of Attribute + * + * @return 0 on success. + */ +int +fal_plugin_get_lag_attr(fal_object_t obj, uint32_t nattrs, + struct fal_attribute_t *attr_list); + +/** + * @brief Dump LAG interface + * + * @param[in] obj Object id for the LAG + * @param[inout] json JSON writer object + */ +void fal_plugin_dump_lag(fal_object_t obj, json_writer_t *wr); + +/* LAG member functions */ + +/** + * @brief Create a LAG member + * + * @param[in] attr_count Number of attributes + * @param[in] attr_list Array of attributes + * @param[out] obj Object id for LAG member + * + * @return 0 on success, error code for failure + */ +int fal_plugin_create_lag_member(uint32_t attr_count, + struct fal_attribute_t *attr_list, + fal_object_t *obj); + +/** + * @brief Delete a LAG member + * + * @param[in] obj Object id for LAG member + * + * @return 0 on success, error code for failure + */ +int fal_plugin_delete_lag_member(fal_object_t obj); + +/** + * @brief Set LAG member's attribute + * + * @param[in] obj Object id of the LAG member + * @param[in] attr Attribute to be updated + * + * @return 0 on success, error code for failure + */ +int fal_plugin_set_lag_member_attr(fal_object_t obj, + const struct fal_attribute_t *attr); + +/** + * @brief Get LAG member's attributea + * + * @param[in] obj Object id of the LAG member + * @param[in] attr_count Number of attributes + * @param[inout] attr_list List of attributes + * + * @return 0 on success, error code for failure + */ +int fal_plugin_get_lag_member_attr(fal_object_t obj, + uint32_t attr_count, + struct fal_attribute_t *attr_list); + +/* + * Bridge neighbor operations + */ + +enum fal_br_neigh_entry_attr_t { + FAL_BRIDGE_NEIGH_ATTR_STATE, /* .u16 */ + FAL_BRIDGE_NEIGH_ATTR_AGEING, /* .u32 */ +}; + +/* + * Add layer 2 destination to the interface child_ifindex and vlanid. + */ +void fal_plugin_br_new_neigh(unsigned int child_ifindex, + uint16_t vlanid, + const struct rte_ether_addr *dst, + uint32_t attr_count, + const struct fal_attribute_t *attr_list); + +/* + * Update attribute layer 2 destination on the interface child_ifindex + * and vlanid. + */ +void fal_plugin_br_upd_neigh(unsigned int child_ifindex, + uint16_t vlanid, + const struct rte_ether_addr *dst, + struct fal_attribute_t *attr); + +/* + * Delete layer 2 destination on the interface child_ifindex and vlanid. + */ +void fal_plugin_br_del_neigh(unsigned int child_ifindex, + uint16_t vlanid, + const struct rte_ether_addr *dst); + +/** + * @brief Iterator function for walk of bridge neighbours + * + * @param[in] vlanid VLAN + * @param[in] dst Address + * @param[in] child_ifindex Interface index + * @param[in] attr_count Number of attributes + * @param[in] attr_list List of all available attributes + * @param[in] arg Opaque caller context + * + * @return 0 on success. Negative errno on failure, terminating walk + */ +typedef int (*fal_br_walk_neigh_fn)(uint16_t vlanid, + const struct rte_ether_addr *dst, + unsigned int child_ifindex, + uint32_t attr_count, + const struct fal_attribute_t *attr_list, + void *arg); + +/** + * @brief Walk selected number of bridge neighbours + * + * @param[in] bridge_ifindex Index of bridge interface to walk neighbours for + * @param[in] vlanid VLAN to match + * @param[in] dst Destination address to match + * @param[in] child_ifindex Index of child interface to match + * @param[in] cb Callback function called for each neighbour walked + * @param[in] arg Opaque caller context + * + * @return 0 on success. Negative errno on failure. + */ +int fal_plugin_br_walk_neigh(unsigned int bridge_ifindex, + uint16_t vlanid, + const struct rte_ether_addr *dst, + unsigned int child_ifindex, + fal_br_walk_neigh_fn cb, + void *arg); + +enum fal_br_fdb_flush_entry_type_t { + FAL_BRIDGE_FDB_FLUSH_TYPE_ALL, FAL_BRIDGE_FDB_FLUSH_TYPE_DYNAMIC, FAL_BRIDGE_FDB_FLUSH_TYPE_STATIC }; @@ -1320,6 +1781,40 @@ int fal_plugin_stp_get_port_attribute(unsigned int child_ifindex, /* Global switch operations */ +/** + * MPLS TTL modes + * + * See RFC 3443 for further details. + */ +enum fal_mpls_ttl_mode { + /** + * Uniform mode + * + * On pushing a label, inherit MPLS TTL from MPLS, IPv4 or + * IPv6 packet, decrementing it by 1. + * + * On popping a label, copy MPLS outermost TTL into next + * outermost MPLS TTL, if available, or into the IPv4 or IPv6 + * packet TTL. + */ + FAL_MPLS_TTL_MODE_UNIFORM, + /** + * Pipe mode + * + * On pushing a label, set TTL to configured value, + * decrementing payload TTL by 1. + * + * On popping a label, preserve the outermost MPLS TTL, if + * available, or the IPv4 or IPv6 packet TTL, but decrement by + * 1. + * + * On swapping for an implicit-null, preserve the outermost + * MPLS TTL, if available, or the IPv4 or IPv6 packet TTL, but + * don't decrement. + */ + FAL_MPLS_TTL_MODE_PIPE, +}; + enum fal_switch_attr_t { /** * @brief Action for Packets that result in ICMP Redirect @@ -1356,6 +1851,133 @@ enum fal_switch_attr_t { * @default false */ FAL_SWITCH_ATTR_PUNT_PVST, + + /** + * @brief Maximum allocated bundle buffer descriptors + * + * @type .u32 + * @flags READ_ONLY + * @default false + */ + FAL_SWITCH_ATTR_MAX_BUF_DESCRIPTOR, + + /** + * @brief The maximum burst size supported by the platform + * + * @type .u32 + * @flags READ_ONLY + * @default 0 + */ + FAL_SWITCH_ATTR_MAX_BURST_SIZE, + + /** + * @brief Set Switch BFD session state change event notification + * callback function passed to the adapter. + * + * Use fal_bfd_session_state_change_notification_fn as notification + * function. + * + * @type .ptr + * @flags CREATE_AND_SET + * @default NULL + */ + FAL_SWITCH_ATTR_BFD_SESSION_STATE_NOTIFY, + + /** + * @brief Max number of BFD IPv4 session supported in on-chip BFD + * + * @type .u32 + * @flags READ_ONLY + */ + FAL_SWITCH_ATTR_MAX_BFD_IPV4_SESSION, + + /** + * @brief Max number of BFD IPv6 session supported in on-chip BFD + * + * @type .u32 + * @flags READ_ONLY + */ + FAL_SWITCH_ATTR_MAX_BFD_IPV6_SESSION, + + /** + * @brief Max number of UDP source ports supported in on-chip IPv4 BFD + * + * @type .u32 + * @flags READ_ONLY + */ + FAL_SWITCH_ATTR_MAX_BFD_IPV4_UDP_SRC_PORT_CNT, + + /** + * @brief Max number of UDP source ports supported in on-chip IPv6 BFD + * + * @type .u32 + * @flags READ_ONLY + */ + FAL_SWITCH_ATTR_MAX_BFD_IPV6_UDP_SRC_PORT_CNT, + + /** + * @brief BFD IPv4 hw session running mode + * + * @type enum fal_bfd_hw_mode + * @flags READ_ONLY + */ + FAL_SWITCH_ATTR_BFD_IPV4_HW_MODE, + + /** + * @brief BFD IPv6 hw session running mode + * + * @type enum fal_bfd_hw_mode + * @flags READ_ONLY + */ + FAL_SWITCH_ATTR_BFD_IPV6_HW_MODE, + + /** + * @brief Max number of unique interval values supported in the HW + * for BFD sessions + * + * @type .u32 + * @flags READ_ONLY + */ + FAL_SWITCH_ATTR_MAX_BFD_INTERVAL_CNT, + + /** + * @brief Local discriminator requirements in the HW for BFD sessions. + * Some HW require the Local discriminator to be multiple of 2/4 etc. + * due to their Session DB management. In such cases the local + * discriminator is also used as a session ID + * + * @type .u32 + * @flags READ_ONLY + */ + FAL_SWITCH_ATTR_BFD_LOCAL_DISCRIMINATOR_SHIFT, + + /** + * @brief SyncE Lock clock to interface + * + * @type u32 - ifindex for interface for clk lock + * @default 0 + */ + FAL_SWITCH_ATTR_SYNCE_CLOCK_SOURCE_PORT, + + /** + * @brief How to treat TTL for MPLS encap and decap of IPv4 + * and IPv6 packets + * + * @type enum fal_mpls_ttl_mode + * @flags CREATE_AND_SET + * @default FAL_MPLS_TTL_MODE_UNIFORM + */ + FAL_SWITCH_ATTR_MPLS_IP_TTL_MODE, + + /** + * @brief TTL for MPLS encap of IPv4 and IPv6 packets when in + * pipe TTL mode + * + * @type .u8 + * @flags CREATE_AND_SET + * @default 255 + */ + FAL_SWITCH_ATTR_MPLS_PIPE_TTL, }; /* @@ -1378,26 +2000,26 @@ enum fal_address_entry_attr_t { }; /* - * Add the IP address to the interface if_index + * Add the IP address to the interface ifindex */ -void fal_plugin_ip_new_addr(unsigned int if_index, +void fal_plugin_ip_new_addr(unsigned int ifindex, struct fal_ip_address_t *ipaddr, uint8_t prefixlen, uint32_t attr_count, const struct fal_attribute_t *attr_list); /* - * Update the IP address on the interface if_index + * Update the IP address on the interface ifindex */ -void fal_plugin_ip_upd_addr(unsigned int if_index, +void fal_plugin_ip_upd_addr(unsigned int ifindex, struct fal_ip_address_t *ipaddr, uint8_t prefixlen, struct fal_attribute_t *attr); /* - * Delete the IP address on the interface if_index + * Delete the IP address on the interface ifindex */ -void fal_plugin_ip_del_addr(unsigned int if_index, +void fal_plugin_ip_del_addr(unsigned int ifindex, struct fal_ip_address_t *ipaddr, uint8_t prefixlen); @@ -1429,17 +2051,86 @@ enum fal_neighbor_entry_attr_t { FAL_NEIGH_ENTRY_ATTR_USED, }; +struct fal_neighbor_entry_t { + /* + * Router interface that the neighbor is on + */ + fal_object_t router_intf_obj; + + /* + * Address of the neighbor + */ + struct fal_ip_address_t ip_addr; +}; + +/** + * @brief Create an IP neighbor for address on router interface + * + * @param[in] neigh_entry Key for the neighbor + * @param[in] attr_count Count of the attributes + * @param[in] attr_list List of attributes + * + * @return 0 on success. Negative errno on failure. + */ +int fal_plugin_create_ip_neigh(const struct fal_neighbor_entry_t *neigh_entry, + uint32_t attr_count, + const struct fal_attribute_t *attr_list); + +/** + * @brief Update an IP neighbor + * + * @param[in] neigh_entry Key for the neighbor + * @param[in] attr Attribute to update + * + * @return 0 on success. Negative errno on failure. + */ +int fal_plugin_set_ip_neigh_attr(const struct fal_neighbor_entry_t *neigh_entry, + const struct fal_attribute_t *attr); + +/** + * @brief Query attributes for an IP neighbor + * + * @param[in] neigh_entry Key for the neighbor + * @param[in] attr_count Count of the attributes + * @param[in] attr_list List of attributes to query + * + * @return 0 on success. Negative errno on failure. + */ +int fal_plugin_get_ip_neigh_attrs( + const struct fal_neighbor_entry_t *neigh_entry, + uint32_t attr_count, struct fal_attribute_t *attr_list); + +/** + * @brief Delete an IP neighbor for address on router interface + * + * @param[in] neigh_entry Key for the neighbor + * + * @return 0 on success. Negative errno on failure. + */ +int fal_plugin_delete_ip_neigh(const struct fal_neighbor_entry_t *neigh_entry); + +/** + * @brief Dump info for an IP neighbor + * + * @param[in] neigh_entry Key for the neighbor + * @param[inout] wr json writer object + */ +void fal_plugin_dump_ip_neigh(const struct fal_neighbor_entry_t *neigh_entry, + json_writer_t *wr); + /** - * @brief Create an IP neighbor for address on interface if_index + * @brief Create an IP neighbor for address on interface ifindex + * + * Deprecated in favour of fal_plugin_create_ip_neigh. * - * @param[in] if_index Index of interface to add neighbour to + * @param[in] ifindex Index of interface to add neighbour to * @param[in] ipaddr Address of neighbour to add * @param[in] attr_count Count of the attributes * @param[in] attr_list List of attributes * * @return 0 on success. Negative errno on failure. */ -int fal_plugin_ip_new_neigh(unsigned int if_index, +int fal_plugin_ip_new_neigh(unsigned int ifindex, struct fal_ip_address_t *ipaddr, uint32_t attr_count, const struct fal_attribute_t *attr_list); @@ -1447,42 +2138,61 @@ int fal_plugin_ip_new_neigh(unsigned int if_index, /** * @brief Update an IP neighbor * - * @param[in] if_index Index of interface to update neighbour on + * Deprecated in favour of fal_plugin_set_ip_neigh_attr. + * + * @param[in] ifindex Index of interface to update neighbour on * @param[in] ipaddr Address of neighbour to update * @param[in] attr Attribute to update * * @return 0 on success. Negative errno on failure. */ -int fal_plugin_ip_upd_neigh(unsigned int if_index, +int fal_plugin_ip_upd_neigh(unsigned int ifindex, struct fal_ip_address_t *ipaddr, struct fal_attribute_t *attr); /** * @brief Query attributes for an IP neighbor * - * @param[in] if_index Index of interface for neighbour to query + * Deprecated in favour of fal_plugin_get_ip_neigh_attrs. + * + * @param[in] ifindex Index of interface for neighbour to query * @param[in] ipaddr Address of neighbour to query * @param[in] attr_count Count of the attributes * @param[in] attr_list List of attributes to query * * @return 0 on success. Negative errno on failure. */ -int fal_plugin_ip_get_neigh_attrs(unsigned int if_index, +int fal_plugin_ip_get_neigh_attrs(unsigned int ifindex, struct fal_ip_address_t *ipaddr, uint32_t attr_count, struct fal_attribute_t *attr_list); /** - * @brief Delete an IP neighbor for address on interface if_index + * @brief Delete an IP neighbor for address on interface ifindex * - * @param[in] if_index Index of interface to delete neighbour on + * Deprecated in favour of fal_plugin_delete_ip_neigh. + * + * @param[in] ifindex Index of interface to delete neighbour on * @param[in] ipaddr Address of neighbour to delete * * @return 0 on success. Negative errno on failure. */ -int fal_plugin_ip_del_neigh(unsigned int if_index, +int fal_plugin_ip_del_neigh(unsigned int ifindex, struct fal_ip_address_t *ipaddr); +/** + * @brief Dump info for an IP neighbor + * + * Deprecated in favour of fal_plugin_dump_ip_neigh. + * + * @param[in] ifindex Index of interface to dump neighbour on + * @param[in] ipaddr Address of neighbour to dump + * @param[inout] json writer object + */ +void fal_plugin_ip_dump_neigh(unsigned int ifindex, + struct fal_ip_address_t *ipaddr, + json_writer_t *wr); + /* * IP Route operations */ @@ -1500,32 +2210,201 @@ enum fal_packet_action_t { }; enum fal_route_entry_attr_t { - FAL_ROUTE_ENTRY_ATTR_NEXT_HOP_GROUP, /* .objid */ - FAL_ROUTE_ENTRY_ATTR_PACKET_ACTION, /* .u32 - fal_packet_action_t */ -}; - -int fal_plugin_ip_new_route(unsigned int vrf_id, - struct fal_ip_address_t *ipaddr, + /** + * @brief Next hop group id + * + * This attribute only takes effect when ATTR_PACKET_ACTION is set to + * FORWARD. + * + * @type fal_object_id_t + * @flags CREATE_AND_SET + * @default FAL_NULL_OBJECT_ID + * @validonly FAL_ROUTE_ENTRY_ATTR_PACKET_ACTION == + * FAL_PACKET_ACTION_FORWARD + */ + FAL_ROUTE_ENTRY_ATTR_NEXT_HOP_GROUP, /* .objid */ + /** + * @brief Packet action + * + * @type fal_packet_action_t + * @flags MANDATORY_ON_CREATE | CREATE_AND_SET + */ + FAL_ROUTE_ENTRY_ATTR_PACKET_ACTION, /* .u32 - fal_packet_action_t */ +}; + +struct fal_route_entry_t { + /* + * The VRF that the route belongs to + */ + fal_object_t vrf_obj; + + /* + * The address of the prefix for the route + */ + struct fal_ip_address_t ip_addr; + + /* + * The length of the prefix for the route + */ + uint8_t prefix_len; +}; + +/* Route walk type enum */ +enum fal_route_walk_type_t { + FAL_ROUTE_WALK_TYPE_ALL, +}; + +/* + * Route walk attributes + */ + +enum fal_route_walk_attr_t { + FAL_ROUTE_WALK_ATTR_VRFID, /* .u32 - Vrf id */ + FAL_ROUTE_WALK_ATTR_TABLEID, /* .u32 - Table id */ + FAL_ROUTE_WALK_ATTR_CNT, /* .u32 - count */ + FAL_ROUTE_WALK_ATTR_FAMILY, /* .u32 - fal_ip_addr_family_t */ + FAL_ROUTE_WALK_ATTR_TYPE, /* .u32 - fal_route_walk_type_t */ +}; + +/** + * @brief Create an IP route entry + * + * @param[in] route Key identifying the route + * @param[in] attr_count Count of the attributes + * @param[in] attr_list List of attributes to create the route with + * + * @return 0 on success. Negative errno on failure. + */ +int fal_plugin_create_route_entry(const struct fal_route_entry_t *route, + uint32_t attr_count, + const struct fal_attribute_t *attr_list); + +/** + * @brief Set an IP route entry's attribute + * + * @param[in] route Key identifying the route + * @param[in] attr Attributes to set for the route + * + * @return 0 on success. Negative errno on failure. + */ +int fal_plugin_set_route_entry_attr(const struct fal_route_entry_t *route, + struct fal_attribute_t *attr); + +/** + * @brief Delete an IP route entry + * + * @param[in] route Key identifying the route + * + * @return 0 on success. Negative errno on failure. + */ +int fal_plugin_delete_route_entry(const struct fal_route_entry_t *route); + +/** + * @brief Query attributes for an IP route entry + * + * @param[in] route Key identifying the route + * @param[in] attr_count Count of the attributes + * @param[inout] attr_list List of attributes to query + * + * @return 0 on success. Negative errno on failure. + */ +int fal_plugin_get_route_entry_attrs(const struct fal_route_entry_t *route, + uint32_t attr_count, + struct fal_attribute_t *attr_list); + +/* deprecated in favour of fal_plugin_create_route_entry */ +int fal_plugin_ip_new_route(unsigned int vrf_id, + struct fal_ip_address_t *ipaddr, uint8_t prefixlen, uint32_t tableid, uint32_t attr_count, const struct fal_attribute_t *attr_list); +/* deprecated in favour of fal_plugin_set_route_entry_attr */ int fal_plugin_ip_upd_route(unsigned int vrf_id, struct fal_ip_address_t *ipaddr, uint8_t prefixlen, uint32_t tableid, struct fal_attribute_t *attr); +/* deprecated in favour of fal_plugin_delete_route_entry */ int fal_plugin_ip_del_route(unsigned int vrf_id, struct fal_ip_address_t *ipaddr, uint8_t prefixlen, uint32_t tableid); +/** + * @brief Query attributes for a route + * + * Deprecated in favour of fal_plugin_get_route_entry_attrs. + * + * @param[in] vrf VRF ID of the route to be queried + * @param[in] ipaddr Network address of the route to be queried + * @param[in] prefixlen Prefix length of the route to be queried + * @param[in] tableid Prefix length of the route to be queried + * @param[in] attr_count Count of the attributes + * @param[inout] attr_list List of attributes to query + * + * @return 0 on success. Negative errno on failure. + */ +int fal_plugin_ip_get_route_attrs(unsigned int vrf_id, + struct fal_ip_address_t *ipaddr, + uint8_t prefixlen, + uint32_t tableid, + uint32_t attr_count, + struct fal_attribute_t *attr_list); + +/** + * @brief Iterator function for walk of routes + * + * @param[in] prefix + * @param[in] prefixlen + * @param[in] attr_count attribute counts + * @param[in] attr_list List of attributes + * @param[in] arg Arg passed to the walker function + * @return 0 on success. Negative errno on failure + */ +typedef int (*fal_plugin_route_walk_fn)(const struct fal_ip_address_t *pfx, + uint8_t prefixlen, + uint32_t attr_count, + const struct + fal_attribute_t *attr_list, + void *arg); + +/** + * @brief Walk routes + * @param[in] attr_cnt number of fal attributes + * @param[in] attr_list list of FAL attributes + * @param[inout] json writer object + */ +int fal_plugin_ip_walk_routes(fal_plugin_route_walk_fn cb, + uint32_t attr_cnt, + struct fal_attribute_t *attr_list, + void *arg); + /* * IP Nexthop Group operations */ +enum fal_next_hop_group_use { + /** + * @brief The next hop group will be used for IP routing + * + * Give a hint that the next hop group won't be linked to from + * MPLS label routes. + */ + FAL_NHG_USE_IP, + /** + * @brief The next hop group will be used for MPLS label switching. + * + * Give a hint that the next hop group will only be used for + * MPLS label switching, i.e. that the packet already has at + * least one label on it when it is subjected to this + * forwarding action. + */ + FAL_NHG_USE_MPLS_LABEL_SWITCH, +}; + /** * @brief Create a next hop group object * @@ -1559,6 +2438,93 @@ int fal_plugin_ip_upd_next_hop_group(fal_object_t obj, */ int fal_plugin_ip_del_next_hop_group(fal_object_t obj); +/** + * @brief Dump info for a next hop group object + * + * @param[in] obj Object ID of the next-hop-group to be dumped + * @param[inout] json writer object + */ +void fal_plugin_ip_dump_next_hop_group(fal_object_t obj, + json_writer_t *wr); + +enum fal_next_hop_group_attr_t { + /** + * @brief Next hop group next hop count + * + * @type uint32_t + * @flags READ_ONLY + */ + FAL_NEXT_HOP_GROUP_ATTR_NEXTHOP_COUNT, /* .u32 */ + /** + * @brief Next hop group next hop object + * + * @type fal_object_id_t + * @flags READ_ONLY + */ + FAL_NEXT_HOP_GROUP_ATTR_NEXTHOP_OBJECT, /* .objid */ + /** + * @brief Hint for how the next-hop-group will be used + * + * @type fal_next_hop_group_use + * @default FAL_NHG_USE_IP, + * @flags CREATE_ONLY + */ + FAL_NEXT_HOP_GROUP_ATTR_USE, +}; + +/** + * @brief Query attributes for a next hop group object + * + * @param[in] obj Object ID of the next-hop-group to be queried + * @param[in] attr_count Count of the attributes + * @param[inout] attr_list List of attributes to query + * + * @return 0 on success. Negative errno on failure. + */ +int fal_plugin_ip_get_next_hop_group_attrs(fal_object_t obj, + uint32_t attr_count, + struct fal_attribute_t *attr_list); + +enum fal_next_hop_configured_role { + /** + * @brief Next hop is primary + * + * The next hop is a primary next hop and by default will + * contribute to forwarding. + */ + FAL_NEXT_HOP_CONFIGURED_ROLE_PRIMARY, + + /** + * @brief Next hop is standby + * + * The next hop is a standby next hop and won't contribute to + * forwarding, unless the corresponding primary next hop(s) + * become unusable. For PIC Edge primary/standbies the standby + * next hop(s) should only be used if all primary next hops + * are unusable. + */ + FAL_NEXT_HOP_CONFIGURED_ROLE_STANDBY, +}; + +enum fal_next_hop_usability { + /** + * @brief Next hop is usable + * + * The next hop is usable and if a primary next hop can + * contribute to forwarding. + */ + FAL_NEXT_HOP_USABLE, + /** + * @brief Next hop is unusable + * + * The next hop is not usable and shouldn't contribute to + * forwarding. If there is a backup next hop then forwarding + * should cut over to that if there are no usable primary + * nexthops. + */ + FAL_NEXT_HOP_UNUSABLE, +}; + /* * IP Nexthop operations */ @@ -1571,12 +2537,24 @@ enum fal_next_hop_attr_t { */ FAL_NEXT_HOP_ATTR_NEXT_HOP_GROUP, /* .objid */ /** - * @brief Next hop interface + * @brief Next hop interface's if index + * + * Deprecated in favour of FAL_NEXT_HOP_ATTR_ROUTER_INTF. * * @type uint32_t * @flags MANDATORY_ON_CREATE | CREATE_ONLY */ FAL_NEXT_HOP_ATTR_INTF, /* .u32 */ + /** + * @brief Next hop router interface + * + * Mutually exclusive with FAL_NEXT_HOP_ATTR_VRF_LOOKUP + * + * @type fal_object_t + * @flags CREATE_ONLY + * @default FAL_NULL_OBJECT_ID + */ + FAL_NEXT_HOP_ATTR_ROUTER_INTF, /* .objid */ /** * @brief Next hop IP address * @@ -1584,6 +2562,52 @@ enum fal_next_hop_attr_t { * @flags CREATE_ONLY */ FAL_NEXT_HOP_ATTR_IP, /* .ipaddr */ + /** + * @brief Configured role for this next hop + * + * A next-hop group must not consist of only + * FAL_NEXT_HOP_CONFIGURED_ROLE_STANDBY nexthop(s). + * + * @type enum fal_next_hop_configured_role + * @flags CREATE_ONLY + * @default FAL_NEXT_HOP_CONFIGURED_ROLE_PRIMARY + */ + FAL_NEXT_HOP_ATTR_CONFIGURED_ROLE, /* .i32 */ + /** + * @brief Next hop usability + * + * @type enum fal_next_hop_usability + * @flags CREATE_AND_SET + */ + FAL_NEXT_HOP_ATTR_USABILITY, /* .i32 */ + /** + * @brief Next hop outgoing MPLS labels + * + * This gives the label stack to apply to the packet in bottom + * to top (inner-most to outer-most) ordering in host byte + * ordering. The following well-known labels are specific + * semantics: + * + * * implicit-NULL - won't appear on the wire, but will + * indicate a penultimate-hop pop. Not valid for next-hop + * groups link to by IP routes. Must be the one and only + * label. + * + * @type fal_u32_list_t + * @flags CREATE_ONLY + */ + FAL_NEXT_HOP_ATTR_MPLS_LABELSTACK, /* .u32list */ + /** + * @brief VRF to perform lookup in + * + * Mutually exclusive with FAL_NEXT_HOP_ATTR_ROUTER_INTF and + * label-stack must be empty. + * + * @type fal_object_t + * @flags CREATE_ONLY + * @default FAL_NULL_OBJECT_ID + */ + FAL_NEXT_HOP_ATTR_VRF_LOOKUP, /* .objid */ }; /** @@ -1628,6 +2652,28 @@ int fal_plugin_ip_upd_next_hop(fal_object_t obj, int fal_plugin_ip_del_next_hops(uint32_t nh_count, const fal_object_t *obj_list); +/** + * @brief Query attributes for a next hop object + * + * @param[in] obj Object ID of the next-hop to be queried + * @param[in] attr_count Count of the attributes + * @param[inout] attr_list List of attributes to query + * + * @return 0 on success. Negative errno on failure. + */ +int fal_plugin_ip_get_next_hop_attrs(fal_object_t obj, + uint32_t attr_count, + struct fal_attribute_t *attr_list); + +/** + * @brief Dump info for a next hop object + * + * @param[in] obj Object ID of the next-hop to be dumped + * @param[inout] json writer object + */ +void fal_plugin_ip_dump_next_hop(fal_object_t obj, + json_writer_t *wr); + /* * IP Multicast Route operations */ @@ -1641,7 +2687,6 @@ enum fal_ipmc_entry_type_t { /** IPMC entry with type (*,G) */ FAL_IPMC_ENTRY_TYPE_XG, - }; /** @@ -1691,7 +2736,7 @@ enum fal_ipmc_entry_attr_t { /** * @brief End of attributes */ - FAL_IPMC_ENTRY_ATTR_END, + FAL_IPMC_ENTRY_ATTR_END }; /** @@ -1822,7 +2867,7 @@ enum fal_ipmc_group_attr_t { /** * @brief End of attributes */ - FAL_IPMC_GROUP_ATTR_END, + FAL_IPMC_GROUP_ATTR_END }; enum fal_ipmc_group_member_attr_t { @@ -1853,7 +2898,7 @@ enum fal_ipmc_group_member_attr_t { /** * @brief End of attributes */ - FAL_IPMC_GROUP_MEMBER_ATTR_END, + FAL_IPMC_GROUP_MEMBER_ATTR_END }; /** @@ -1979,7 +3024,7 @@ enum fal_rpf_group_attr_t { /** * @brief End of attributes */ - FAL_RPF_GROUP_ATTR_END, + FAL_RPF_GROUP_ATTR_END }; enum fal_rpf_group_member_attr_t { @@ -2010,7 +3055,7 @@ enum fal_rpf_group_member_attr_t { /** * @brief End of attributes */ - FAL_RPF_GROUP_MEMBER_ATTR_END, + FAL_RPF_GROUP_MEMBER_ATTR_END }; /** @@ -2131,13 +3176,15 @@ union fal_pkt_feature_info { */ enum fal_feat_framer_ret_value { FAL_RET_ETHER_INPUT, - FAL_RET_PORTMONITOR_HW_INPUT + FAL_RET_PORTMONITOR_HW_INPUT, + FAL_RET_CAPTURE_HW_INPUT, + FAL_RET_PLUGIN_CONSUMED, }; /* * Queue mbufs from the fal plugin directly to a tx port, typically * used to queue mbufs to a backplane port. - * NOT safe to call from any threads on the master core. + * NOT safe to call from any threads on the main core. * * Returns: Number of mbufs queued, Unqueued mbufs are returned to the * caller. @@ -2192,6 +3239,12 @@ enum fal_policer_stat_type { /** accepted bytes */ FAL_POLICER_STAT_GREEN_BYTES, + /** exceeded packets */ + FAL_POLICER_STAT_YELLOW_PACKETS, + + /** exceeded bytes */ + FAL_POLICER_STAT_YELLOW_BYTES, + /** dropped packets */ FAL_POLICER_STAT_RED_PACKETS, @@ -2216,12 +3269,13 @@ int fal_plugin_policer_clear_stats(fal_object_t obj, enum fal_policer_meter_type { FAL_POLICER_METER_TYPE_PACKETS, FAL_POLICER_METER_TYPE_BYTES, - FAL_POLICER_METER_TYPE + FAL_POLICER_METER_TYPE_MAX }; enum fal_policer_mode_type { FAL_POLICER_MODE_STORM_CTL, FAL_POLICER_MODE_CPP, + FAL_POLICER_MODE_INGRESS, FAL_POLICER_MODE_MAX }; @@ -2230,6 +3284,16 @@ enum fal_stats_mode { FAL_STATS_MODE_READ_AND_CLEAR, }; +enum fal_policer_colour_source { + /* previous colour is ignored ("color-blind" in RFC4115) */ + FAL_POLICER_COLOUR_SOURCE_UNAWARE, + + /* previous colour is taken into account */ + FAL_POLICER_COLOUR_SOURCE_AWARE, + + FAL_POLICER_COLOUR_SOURCE_MAX +}; + /** * @brief FAL attributes for policers */ @@ -2239,31 +3303,50 @@ enum fal_policer_attr_t { * @type fal_policer_meter_type * @flags CREATE_AND_SET */ - FAL_POLICER_ATTR_METER_TYPE = 0x00000001, + FAL_POLICER_ATTR_METER_TYPE = 1, /** * @brief Policer mode * @type fal_policer_mode_type * @flags MANDATORY_ON_CREATE | CREATE_ONLY */ - FAL_POLICER_ATTR_MODE = 0x00000002, + FAL_POLICER_ATTR_MODE = 2, /** * @brief Committed burst size/packets * @type uint64_t * @flags CREATE_AND_SET */ - FAL_POLICER_ATTR_CBS = 0x00000003, + FAL_POLICER_ATTR_CBS = 3, /** * @brief Committed information rate BPS/PPS * @type uint64_t * @flags CREATE_AND_SET */ - FAL_POLICER_ATTR_CIR = 0x00000004, + FAL_POLICER_ATTR_CIR = 4, + /** + * @brief Excess burst size/packets + * @type uint64_t + * @flags CREATE_AND_SET + */ + FAL_POLICER_ATTR_EBS = 5, + /** + * @brief Excess information rate BPS/PPS + * @type uint64_t + * @flags CREATE_AND_SET + */ + FAL_POLICER_ATTR_EIR = 6, /** * @brief Action to take for RED colour packets * @type fal_packet_action_t * @flags CREATE_AND_SET */ - FAL_POLICER_ATTR_RED_PACKET_ACTION = 0x00000005, + FAL_POLICER_ATTR_RED_PACKET_ACTION = 7, + /** + * @brief Policer colour source + * @type enum fal_policer_colour_source + * @flags CREATE_ONLY + * @default FAL_POLICER_COLOUR_SOURCE_UNAWARE + */ + FAL_POLICER_ATTR_COLOUR_SOURCE = 8, }; /** @@ -2343,16 +3426,16 @@ void fal_plugin_policer_dump(fal_object_t obj, */ enum fal_qos_queue_type_t { /** H/w Queue for all types of traffic */ - FAL_QOS_QUEUE_TYPE_ALL = 0x00000000, + FAL_QOS_QUEUE_TYPE_ALL = 0, /** H/w Unicast Queue */ - FAL_QOS_QUEUE_TYPE_UNICAST = 0x00000001, + FAL_QOS_QUEUE_TYPE_UNICAST = 1, /** H/w Multicast (Broadcast, Unknown unicast, Multicast) Queue */ - FAL_QOS_QUEUE_TYPE_NON_UNICAST = 0x00000002, + FAL_QOS_QUEUE_TYPE_NON_UNICAST = 2, /** Max value */ - FAL_QOS_QUEUE_TYPE_MAX = FAL_QOS_QUEUE_TYPE_NON_UNICAST, + FAL_QOS_QUEUE_TYPE_MAX = FAL_QOS_QUEUE_TYPE_NON_UNICAST }; /** @@ -2365,7 +3448,7 @@ enum fal_qos_queue_attr_t { * @type fal_qos_queue_type_t * @flags MANDATORY_ON_CREATE | CREATE_ONLY | KEY */ - FAL_QOS_QUEUE_ATTR_TYPE = 0x00000000, + FAL_QOS_QUEUE_ATTR_TYPE = 0, /** * @brief Queue index @@ -2373,7 +3456,7 @@ enum fal_qos_queue_attr_t { * @type uint8_t * @flags MANDATORY_ON_CREATE | CREATE_ONLY | KEY */ - FAL_QOS_QUEUE_ATTR_INDEX = 0x00000001, + FAL_QOS_QUEUE_ATTR_INDEX = 1, /** * @brief Parent scheduler node @@ -2387,7 +3470,7 @@ enum fal_qos_queue_attr_t { * @objects FAL_QOS_OBJECT_TYPE_SCHEDULER_GROUP, * FAL_QOS_OBJECT_TYPE_PORT */ - FAL_QOS_QUEUE_ATTR_PARENT_ID = 0x00000002, + FAL_QOS_QUEUE_ATTR_PARENT_ID = 2, /** * @brief Attach WRED ID to queue @@ -2400,7 +3483,7 @@ enum fal_qos_queue_attr_t { * @allownull true * @default FAL_QOS_NULL_OBJECT_ID */ - FAL_QOS_QUEUE_ATTR_WRED_ID = 0x00000003, + FAL_QOS_QUEUE_ATTR_WRED_ID = 3, /** * @brief Attach buffer profile to queue @@ -2411,7 +3494,7 @@ enum fal_qos_queue_attr_t { * @allownull true * @default FAL_QOS_NULL_OBJECT_ID */ - FAL_QOS_QUEUE_ATTR_BUFFER_ID = 0x00000004, + FAL_QOS_QUEUE_ATTR_BUFFER_ID = 4, /** * @brief Attach scheduler to queue @@ -2422,7 +3505,7 @@ enum fal_qos_queue_attr_t { * @allownull true * @default FAL_QOS_NULL_OBJECT_ID */ - FAL_QOS_QUEUE_ATTR_SCHEDULER_ID = 0x00000005, + FAL_QOS_QUEUE_ATTR_SCHEDULER_ID = 5, /** * @brief Maximum queue length @@ -2432,7 +3515,7 @@ enum fal_qos_queue_attr_t { * @default 64 packets * @default 65536 bytes */ - FAL_QOS_QUEUE_ATTR_QUEUE_LIMIT = 0x00000006, + FAL_QOS_QUEUE_ATTR_QUEUE_LIMIT = 6, /** * @brief The TC that the queue is a member of @@ -2440,7 +3523,7 @@ enum fal_qos_queue_attr_t { * @type uint8_t * @flags CREATE_AND_SET */ - FAL_QOS_QUEUE_ATTR_TC = 0x00000007, + FAL_QOS_QUEUE_ATTR_TC = 7, /** * @brief Local control traffic priority queue @@ -2457,10 +3540,18 @@ enum fal_qos_queue_attr_t { * @type boolean * @flags MANDATORY_ON_CREATE | CREATE_AND_SET */ - FAL_QOS_QUEUE_ATTR_LOCAL_PRIORITY = 0x00000008, + FAL_QOS_QUEUE_ATTR_LOCAL_PRIORITY = 8, + + /** + * @brief Designator used to classify traffic to the queue + * + * @type uint8_t + * @flags MANDATORY_ON_CREATE | CREATE_AND_SET + */ + FAL_QOS_QUEUE_ATTR_DESIGNATOR = 9, /** Max value */ - FAL_QOS_QUEUE_ATTR_MAX = FAL_QOS_QUEUE_ATTR_LOCAL_PRIORITY, + FAL_QOS_QUEUE_ATTR_MAX = FAL_QOS_QUEUE_ATTR_DESIGNATOR }; /** @@ -2468,82 +3559,82 @@ enum fal_qos_queue_attr_t { */ enum fal_qos_queue_stat_t { /** Get/set tx packets count [uint64_t] */ - FAL_QOS_QUEUE_STAT_PACKETS = 0x00000000, + FAL_QOS_QUEUE_STAT_PACKETS = 0, /** Get/set tx bytes count [uint64_t] */ - FAL_QOS_QUEUE_STAT_BYTES = 0x00000001, + FAL_QOS_QUEUE_STAT_BYTES = 1, /** Get/set dropped packets count [uint64_t] */ - FAL_QOS_QUEUE_STAT_DROPPED_PACKETS = 0x00000002, + FAL_QOS_QUEUE_STAT_DROPPED_PACKETS = 2, /** Get/set dropped bytes count [uint64_t] */ - FAL_QOS_QUEUE_STAT_DROPPED_BYTES = 0x00000003, + FAL_QOS_QUEUE_STAT_DROPPED_BYTES = 3, - /** Get/set green color tx packets count [uint64_t] */ - FAL_QOS_QUEUE_STAT_GREEN_PACKETS = 0x00000004, + /** Get/set green colour tx packets count [uint64_t] */ + FAL_QOS_QUEUE_STAT_GREEN_PACKETS = 4, - /** Get/set green color tx bytes count [uint64_t] */ - FAL_QOS_QUEUE_STAT_GREEN_BYTES = 0x00000005, + /** Get/set green colour tx bytes count [uint64_t] */ + FAL_QOS_QUEUE_STAT_GREEN_BYTES = 5, - /** Get/set green color dropped packets count [uint64_t] */ - FAL_QOS_QUEUE_STAT_GREEN_DROPPED_PACKETS = 0x00000006, + /** Get/set green colour dropped packets count [uint64_t] */ + FAL_QOS_QUEUE_STAT_GREEN_DROPPED_PACKETS = 6, - /** Get/set green color dropped packets count [uint64_t] */ - FAL_QOS_QUEUE_STAT_GREEN_DROPPED_BYTES = 0x00000007, + /** Get/set green colour dropped bytes count [uint64_t] */ + FAL_QOS_QUEUE_STAT_GREEN_DROPPED_BYTES = 7, - /** Get/set yellow color tx packets count [uint64_t] */ - FAL_QOS_QUEUE_STAT_YELLOW_PACKETS = 0x00000008, + /** Get/set yellow colour tx packets count [uint64_t] */ + FAL_QOS_QUEUE_STAT_YELLOW_PACKETS = 8, - /** Get/set yellow color tx bytes count [uint64_t] */ - FAL_QOS_QUEUE_STAT_YELLOW_BYTES = 0x00000009, + /** Get/set yellow colour tx bytes count [uint64_t] */ + FAL_QOS_QUEUE_STAT_YELLOW_BYTES = 9, - /** Get/set yellow color dropped packets count [uint64_t] */ - FAL_QOS_QUEUE_STAT_YELLOW_DROPPED_PACKETS = 0x0000000a, + /** Get/set yellow colour dropped packets count [uint64_t] */ + FAL_QOS_QUEUE_STAT_YELLOW_DROPPED_PACKETS = 10, - /** Get/set yellow color dropped packets count [uint64_t] */ - FAL_QOS_QUEUE_STAT_YELLOW_DROPPED_BYTES = 0x0000000b, + /** Get/set yellow colour dropped bytes count [uint64_t] */ + FAL_QOS_QUEUE_STAT_YELLOW_DROPPED_BYTES = 11, - /** Get/set red color tx packets count [uint64_t] */ - FAL_QOS_QUEUE_STAT_RED_PACKETS = 0x0000000c, + /** Get/set red colour tx packets count [uint64_t] */ + FAL_QOS_QUEUE_STAT_RED_PACKETS = 12, - /** Get/set red color tx bytes count [uint64_t] */ - FAL_QOS_QUEUE_STAT_RED_BYTES = 0x0000000d, + /** Get/set red colour tx bytes count [uint64_t] */ + FAL_QOS_QUEUE_STAT_RED_BYTES = 13, - /** Get/set red color dropped packets count [uint64_t] */ - FAL_QOS_QUEUE_STAT_RED_DROPPED_PACKETS = 0x0000000e, + /** Get/set red colour dropped packets count [uint64_t] */ + FAL_QOS_QUEUE_STAT_RED_DROPPED_PACKETS = 14, - /** Get/set red color dropped packets count [uint64_t] */ - FAL_QOS_QUEUE_STAT_RED_DROPPED_BYTES = 0x0000000f, + /** Get/set red colour dropped bytes count [uint64_t] */ + FAL_QOS_QUEUE_STAT_RED_DROPPED_BYTES = 15, - /** Get/set WRED green color dropped packets count [uint64_t] */ - FAL_QOS_QUEUE_STAT_GREEN_WRED_DROPPED_PACKETS = 0x00000010, + /** Get/set WRED green colour dropped packets count [uint64_t] */ + FAL_QOS_QUEUE_STAT_GREEN_WRED_DROPPED_PACKETS = 16, - /** Get/set WRED green color dropped bytes count [uint64_t] */ - FAL_QOS_QUEUE_STAT_GREEN_WRED_DROPPED_BYTES = 0x00000011, + /** Get/set WRED green colour dropped bytes count [uint64_t] */ + FAL_QOS_QUEUE_STAT_GREEN_WRED_DROPPED_BYTES = 17, - /** Get/set WRED yellow color dropped packets count [uint64_t] */ - FAL_QOS_QUEUE_STAT_YELLOW_WRED_DROPPED_PACKETS = 0x00000012, + /** Get/set WRED yellow colour dropped packets count [uint64_t] */ + FAL_QOS_QUEUE_STAT_YELLOW_WRED_DROPPED_PACKETS = 18, - /** Get/set WRED yellow color dropped bytes count [uint64_t] */ - FAL_QOS_QUEUE_STAT_YELLOW_WRED_DROPPED_BYTES = 0x00000013, + /** Get/set WRED yellow colour dropped bytes count [uint64_t] */ + FAL_QOS_QUEUE_STAT_YELLOW_WRED_DROPPED_BYTES = 19, - /** Get/set WRED red color dropped packets count [uint64_t] */ - FAL_QOS_QUEUE_STAT_RED_WRED_DROPPED_PACKETS = 0x00000014, + /** Get/set WRED red colour dropped packets count [uint64_t] */ + FAL_QOS_QUEUE_STAT_RED_WRED_DROPPED_PACKETS = 20, - /** Get/set WRED red color dropped bytes count [uint64_t] */ - FAL_QOS_QUEUE_STAT_RED_WRED_DROPPED_BYTES = 0x00000015, + /** Get/set WRED red colour dropped bytes count [uint64_t] */ + FAL_QOS_QUEUE_STAT_RED_WRED_DROPPED_BYTES = 21, /** Get/set WRED dropped packets count [uint64_t] */ - FAL_QOS_QUEUE_STAT_WRED_DROPPED_PACKETS = 0x00000016, + FAL_QOS_QUEUE_STAT_WRED_DROPPED_PACKETS = 22, /** Get/set WRED dropped bytes count [uint64_t] */ - FAL_QOS_QUEUE_STAT_WRED_DROPPED_BYTES = 0x00000017, + FAL_QOS_QUEUE_STAT_WRED_DROPPED_BYTES = 23, /** Get current queue occupancy in bytes [uint64_t] */ - FAL_QOS_QUEUE_STAT_CURR_OCCUPANCY_BYTES = 0x00000018, + FAL_QOS_QUEUE_STAT_CURR_OCCUPANCY_BYTES = 24, /** Get watermark queue occupancy in bytes [uint64_t] */ - FAL_QOS_QUEUE_STAT_WATERMARK_BYTES = 0x00000019, + FAL_QOS_QUEUE_STAT_WATERMARK_BYTES = 25, /** Max value */ FAL_QOS_QUEUE_STAT_MAX = FAL_QOS_QUEUE_STAT_WATERMARK_BYTES @@ -2646,34 +3737,49 @@ int fal_plugin_qos_clear_queue_stats(fal_object_t queue_id, */ enum fal_qos_map_type_t { /** QOS Map to set DOT1P to Traffic class */ - FAL_QOS_MAP_TYPE_DOT1P_TO_TC = 0x00000000, + FAL_QOS_MAP_TYPE_DOT1P_TO_TC = 0, - /** QOS Map to set DOT1P to color */ - FAL_QOS_MAP_TYPE_DOT1P_TO_COLOR = 0x00000001, + /** QOS Map to set DOT1P to colour */ + FAL_QOS_MAP_TYPE_DOT1P_TO_COLOUR = 1, /** QOS Map to set DSCP to Traffic class */ - FAL_QOS_MAP_TYPE_DSCP_TO_TC = 0x00000002, + FAL_QOS_MAP_TYPE_DSCP_TO_TC = 2, - /** QOS Map to set DSCP to color */ - FAL_QOS_MAP_TYPE_DSCP_TO_COLOR = 0x00000003, + /** QOS Map to set DSCP to colour */ + FAL_QOS_MAP_TYPE_DSCP_TO_COLOUR = 3, /** QOS Map to set traffic class to queue */ - FAL_QOS_MAP_TYPE_TC_TO_QUEUE = 0x00000004, + FAL_QOS_MAP_TYPE_TC_TO_QUEUE = 4, - /** QOS Map to set traffic class and color to DSCP */ - FAL_QOS_MAP_TYPE_TC_AND_COLOR_TO_DSCP = 0x00000005, + /** QOS Map to set traffic class and colour to DSCP */ + FAL_QOS_MAP_TYPE_TC_AND_COLOUR_TO_DSCP = 5, - /** QOS Map to set traffic class and color to DOT1P */ - FAL_QOS_MAP_TYPE_TC_AND_COLOR_TO_DOT1P = 0x00000006, + /** QOS Map to set traffic class and colour to DOT1P */ + FAL_QOS_MAP_TYPE_TC_AND_COLOUR_TO_DOT1P = 6, /** QOS Map to set traffic class to priority group */ - FAL_QOS_MAP_TYPE_TC_TO_PRIORITY_GROUP = 0x00000007, + FAL_QOS_MAP_TYPE_TC_TO_PRIORITY_GROUP = 7, /** QOS Map to set DSCP to DOT1P */ - FAL_QOS_MAP_TYPE_DSCP_TO_DOT1P = 0x00000008, + FAL_QOS_MAP_TYPE_DSCP_TO_DOT1P = 8, + + /** QOS Map to set DSCP to designator */ + FAL_QOS_MAP_TYPE_DSCP_TO_DESIGNATOR = 9, + + /** QOS Map to set DOT1P to designator */ + FAL_QOS_MAP_TYPE_DOT1P_TO_DESIGNATOR = 10, + + /** QOS Map to set designator to DOT1P */ + FAL_QOS_MAP_TYPE_DESIGNATOR_TO_DOT1P = 11, + + /** QOS Map to set designator to DSCP */ + FAL_QOS_MAP_TYPE_DESIGNATOR_TO_DSCP = 12, + + /** QOS Map to set DSCP to DSCP */ + FAL_QOS_MAP_TYPE_DSCP_TO_DSCP = 13, /** Max value */ - FAL_QOS_MAP_TYPE_MAX = FAL_QOS_MAP_TYPE_DSCP_TO_DOT1P, + FAL_QOS_MAP_TYPE_MAX = FAL_QOS_MAP_TYPE_DSCP_TO_DSCP }; /** @@ -2686,20 +3792,20 @@ enum fal_qos_map_attr_t { * @type fal_qos_qos_map_type_t * @flags MANDATORY_ON_CREATE | CREATE_ONLY */ - FAL_QOS_MAP_ATTR_TYPE = 0x00000000, + FAL_QOS_MAP_ATTR_TYPE = 0, /** * @brief Dot1p/DSCP to TC Mapping * * Defaults: * - All Dot1p/DSCP maps to traffic class 0 - * - All Dot1p/DSCP maps to color #FAL_PACKET_COLOR_GREEN + * - All Dot1p/DSCP maps to colour #FAL_PACKET_COLOUR_GREEN * - All traffic class maps to queue 0 * * @type fal_qos_map_list_t * @flags MANDATORY_ON_CREATE | CREATE_AND_SET */ - FAL_QOS_MAP_ATTR_MAP_TO_VALUE_LIST = 0x00000001, + FAL_QOS_MAP_ATTR_MAP_TO_VALUE_LIST = 1, /** * @brief Local control traffic priority queue @@ -2718,10 +3824,25 @@ enum fal_qos_map_attr_t { * @type boolean * @flags MANDATORY_ON_CREATE | CREATE_AND_SET */ - FAL_QOS_MAP_ATTR_LOCAL_PRIORITY_QUEUE = 0x00000002, + FAL_QOS_MAP_ATTR_LOCAL_PRIORITY_QUEUE = 2, + + /** + * @brief System default setting + * + * A map may be applied as the system default for ingress + * classification. If a QoS policy is applied to any port or + * port/vlan then this map is applied to all ingress ports and + * port/vlans that do not have a specific ingress map applied + * + * Defaults: not required + * + * @type boolean + * @flags MANDATORY_ON_CREATE | CREATE_AND_SET + */ + FAL_QOS_MAP_ATTR_INGRESS_SYSTEM_DEFAULT = 3, /** Max value */ - FAL_QOS_MAP_ATTR_MAX = FAL_QOS_MAP_ATTR_LOCAL_PRIORITY_QUEUE, + FAL_QOS_MAP_ATTR_MAX = FAL_QOS_MAP_ATTR_INGRESS_SYSTEM_DEFAULT }; /** @@ -2764,7 +3885,7 @@ int fal_plugin_qos_upd_map(fal_object_t map_id, * @param[in] map object id * @param[in] json writer object */ -void fal_plugin_qos_dump_map(fal_object_t map, +void fal_plugin_qos_dump_map(fal_object_t map_id, json_writer_t *wr); /** @@ -2784,13 +3905,13 @@ int fal_plugin_qos_get_map_attrs(fal_object_t map_id, uint32_t attr_count, */ enum fal_qos_meter_type_t { /* Metering in bytes per second */ - FAL_QOS_METER_TYPE_BYTES = 0x00000000, + FAL_QOS_METER_TYPE_BYTES = 0, /* Metering in packets per second */ - FAL_QOS_METER_TYPE_PACKETS = 0x00000001, + FAL_QOS_METER_TYPE_PACKETS = 1, /* Max value */ - FAL_QOS_METER_TYPE_MAX = FAL_QOS_METER_TYPE_PACKETS, + FAL_QOS_METER_TYPE_MAX = FAL_QOS_METER_TYPE_PACKETS }; /** @@ -2798,16 +3919,16 @@ enum fal_qos_meter_type_t { */ enum fal_qos_scheduler_type_t { /** Strict Scheduling */ - FAL_QOS_SCHEDULING_TYPE_STRICT = 0x00000000, + FAL_QOS_SCHEDULING_TYPE_STRICT = 0, /** Weighted Round-Robin Scheduling */ - FAL_QOS_SCHEDULING_TYPE_WRR = 0x00000001, + FAL_QOS_SCHEDULING_TYPE_WRR = 1, /** Deficit Weighted Round-Robin Scheduling */ - FAL_QOS_SCHEDULING_TYPE_DWRR = 0x00000002, + FAL_QOS_SCHEDULING_TYPE_DWRR = 2, /* Max value */ - FAL_QOS_SCHEDULING_TYPE_MAX = FAL_QOS_SCHEDULING_TYPE_DWRR, + FAL_QOS_SCHEDULING_TYPE_MAX = FAL_QOS_SCHEDULING_TYPE_DWRR }; /** @@ -2821,7 +3942,7 @@ enum fal_qos_scheduler_attr_t { * @flags CREATE_AND_SET * @default FAL_QOS_SCHEDULING_TYPE_WRR */ - FAL_QOS_SCHEDULER_ATTR_SCHEDULING_TYPE = 0x00000000, + FAL_QOS_SCHEDULER_ATTR_SCHEDULING_TYPE = 0, /** * @brief Scheduling algorithm weight @@ -2834,7 +3955,7 @@ enum fal_qos_scheduler_attr_t { * @validonly FAL_QOS_SCHEDULER_ATTR_SCHEDULING_TYPE == * FAL_QOS_SCHEDULING_TYPE_DWRR */ - FAL_QOS_SCHEDULER_ATTR_SCHEDULING_WEIGHT = 0x00000001, + FAL_QOS_SCHEDULER_ATTR_SCHEDULING_WEIGHT = 1, /** * @brief Shaper @@ -2843,7 +3964,7 @@ enum fal_qos_scheduler_attr_t { * @flags CREATE_AND_SET * @default FAL_QOS_METER_TYPE_BYTES */ - FAL_QOS_SCHEDULER_ATTR_METER_TYPE = 0x00000002, + FAL_QOS_SCHEDULER_ATTR_METER_TYPE = 2, /** * @brief Maximum Bandwidth shape rate [bytes/sec or PPS] @@ -2854,7 +3975,7 @@ enum fal_qos_scheduler_attr_t { * @flags CREATE_AND_SET * @default 0 */ - FAL_QOS_SCHEDULER_ATTR_MAX_BANDWIDTH_RATE = 0x00000003, + FAL_QOS_SCHEDULER_ATTR_MAX_BANDWIDTH_RATE = 3, /** * @brief Maximum Burst for Bandwidth shape rate [bytes or Packets] @@ -2863,7 +3984,7 @@ enum fal_qos_scheduler_attr_t { * @flags CREATE_AND_SET * @default 0 */ - FAL_QOS_SCHEDULER_ATTR_MAX_BANDWIDTH_BURST_RATE = 0x00000004, + FAL_QOS_SCHEDULER_ATTR_MAX_BANDWIDTH_BURST_RATE = 4, /** * @brief Frame-overhead to be added/subtracted to a packet @@ -2872,11 +3993,11 @@ enum fal_qos_scheduler_attr_t { * @flags CREATE_AND_SET * @default FAL_QOS_FRAME_OVERHEAD */ - FAL_QOS_SCHEDULER_ATTR_FRAME_OVERHEAD = 0x00000005, + FAL_QOS_SCHEDULER_ATTR_FRAME_OVERHEAD = 5, /* Max value */ FAL_QOS_SCHEDULER_ATTR_MAX = - FAL_QOS_SCHEDULER_ATTR_FRAME_OVERHEAD, + FAL_QOS_SCHEDULER_ATTR_FRAME_OVERHEAD }; /** @@ -2940,7 +4061,7 @@ enum fal_qos_sched_group_level_t { FAL_QOS_SCHED_GROUP_LEVEL_TC = 4, FAL_QOS_SCHED_GROUP_LEVEL_QUEUE = 5, FAL_QOS_SCHED_GROUP_MAX_LEVEL = FAL_QOS_SCHED_GROUP_LEVEL_QUEUE, - FAL_QOS_SCHED_GROUP_TOTAL_IDS = FAL_QOS_SCHED_GROUP_MAX_LEVEL + 1, + FAL_QOS_SCHED_GROUP_TOTAL_IDS = FAL_QOS_SCHED_GROUP_MAX_LEVEL + 1 }; /** @@ -2953,7 +4074,7 @@ enum fal_qos_sched_group_attr_t { * @type uint32_t * @flags READ_ONLY */ - FAL_QOS_SCHED_GROUP_ATTR_CHILD_COUNT = 0x00000000, + FAL_QOS_SCHED_GROUP_ATTR_CHILD_COUNT = 0, /** * @brief Scheduler Group child object id list @@ -2962,12 +4083,12 @@ enum fal_qos_sched_group_attr_t { * @flags READ_ONLY * @objects FAL_QOS_OBJECT_TYPE_SCHED_GROUP, FAL_QOS_OBJECT_TYPE_QUEUE */ - FAL_QOS_SCHED_GROUP_ATTR_CHILD_LIST = 0x00000001, + FAL_QOS_SCHED_GROUP_ATTR_CHILD_LIST = 1, /** * @brief Scheduler group index * - * For FAL_QOS_SCHED_GROUP_LEVEL_PORT this is the if_index of the + * For FAL_QOS_SCHED_GROUP_LEVEL_PORT this is the ifindex of the * port on which the scheduler group should be applied. For all * other levels, this is a 0-based unique identifier particular to * the level that may be used for debugging purposes or configuration @@ -2976,7 +4097,7 @@ enum fal_qos_sched_group_attr_t { * @type uint32_t * @flags MANDATORY_ON_CREATE | CREATE_ONLY */ - FAL_QOS_SCHED_GROUP_ATTR_SG_INDEX = 0x00000002, + FAL_QOS_SCHED_GROUP_ATTR_SG_INDEX = 2, /** * @brief Scheduler group level @@ -2984,7 +4105,7 @@ enum fal_qos_sched_group_attr_t { * @type fal_qos_sched_level_t * @flags MANDATORY_ON_CREATE | CREATE_ONLY */ - FAL_QOS_SCHED_GROUP_ATTR_LEVEL = 0x00000003, + FAL_QOS_SCHED_GROUP_ATTR_LEVEL = 3, /** * @brief Maximum number of children on group @@ -2992,7 +4113,7 @@ enum fal_qos_sched_group_attr_t { * @type uint8_t * @flags MANDATORY_ON_CREATE | CREATE_ONLY */ - FAL_QOS_SCHED_GROUP_ATTR_MAX_CHILDREN = 0x00000004, + FAL_QOS_SCHED_GROUP_ATTR_MAX_CHILDREN = 4, /** * @brief Scheduler id @@ -3003,7 +4124,7 @@ enum fal_qos_sched_group_attr_t { * @allownull true * @default FAL_QOS_NULL_OBJECT_ID */ - FAL_QOS_SCHED_GROUP_ATTR_SCHEDULER_ID = 0x00000005, + FAL_QOS_SCHED_GROUP_ATTR_SCHEDULER_ID = 5, /** * @brief Scheduler group parent node @@ -3016,7 +4137,7 @@ enum fal_qos_sched_group_attr_t { * @objects FAL_QOS_OBJECT_TYPE_SCHEDULER_GROUP, * FAL_QOS_OBJECT_TYPE_PORT */ - FAL_QOS_SCHED_GROUP_ATTR_PARENT_ID = 0x00000006, + FAL_QOS_SCHED_GROUP_ATTR_PARENT_ID = 6, /** * @brief Scheduler group ingress map node @@ -3028,7 +4149,7 @@ enum fal_qos_sched_group_attr_t { * @flags CREATE_AND_SET * @objects FAL_QOS_OBJECT_TYPE_MAP */ - FAL_QOS_SCHED_GROUP_ATTR_INGRESS_MAP_ID = 0x00000007, + FAL_QOS_SCHED_GROUP_ATTR_INGRESS_MAP_ID = 7, /** * @brief Scheduler group vlan id @@ -3040,7 +4161,7 @@ enum fal_qos_sched_group_attr_t { * @type uint16_t * @flags CREATE_AND_SET */ - FAL_QOS_SCHED_GROUP_ATTR_VLAN_ID = 0x00000008, + FAL_QOS_SCHED_GROUP_ATTR_VLAN_ID = 8, /** * @brief Scheduler group egress map node @@ -3052,10 +4173,24 @@ enum fal_qos_sched_group_attr_t { * @flags CREATE_AND_SET * @objects FAL_QOS_OBJECT_TYPE_MAP */ - FAL_QOS_SCHED_GROUP_ATTR_EGRESS_MAP_ID = 0x00000009, + FAL_QOS_SCHED_GROUP_ATTR_EGRESS_MAP_ID = 9, + + /** + * @brief Scheduler group local priority queue designator + * + * The designator to be applied to locally generated priority traffic + * to classify it to the local priority queue. + * This is only valid when the level == 3, i.e. the sched-group + * represents a vyatta pipe. + * + * @type uint8_t + * @flags MANDATORY_ON_CREATE| CREATE_AND_SET + */ + FAL_QOS_SCHED_GROUP_ATTR_LOCAL_PRIORITY_DESIGNATOR = 10, /* Max value */ - FAL_QOS_SCHED_GROUP_ATTR_MAX = FAL_QOS_SCHED_GROUP_ATTR_EGRESS_MAP_ID, + FAL_QOS_SCHED_GROUP_ATTR_MAX = + FAL_QOS_SCHED_GROUP_ATTR_LOCAL_PRIORITY_DESIGNATOR }; /** @@ -3079,7 +4214,7 @@ int fal_plugin_qos_new_sched_group(fal_object_t switch_id, uint32_t attr_count, * * @return 0 on success, failure status code on error */ -int fal_plugin_qos_del_sched_group(fal_object_t scheduler_group); +int fal_plugin_qos_del_sched_group(fal_object_t sched_group_id); /** * @brief Update a scheduler group attribute @@ -3111,9 +4246,11 @@ int fal_plugin_qos_get_sched_group_attrs(fal_object_t sched_group_id, * @param[in] sched group object id * @param[in] json writer object */ -void fal_plugin_qos_dump_sched_group(fal_object_t sg, +void fal_plugin_qos_dump_sched_group(fal_object_t sched_group_id, json_writer_t *wr); +void fal_plugin_dump_memory_buffer_errors(json_writer_t *wr); + /** * @brief Enum defining WRED profile attributes */ @@ -3125,7 +4262,7 @@ enum fal_qos_wred_attr_t { * @flags CREATE_AND_SET * @default false */ - FAL_QOS_WRED_ATTR_GREEN_ENABLE = 0x00000000, + FAL_QOS_WRED_ATTR_GREEN_ENABLE = 0, /** * @brief Green minimum threshold bytes @@ -3139,7 +4276,7 @@ enum fal_qos_wred_attr_t { * @default 0 * @validonly FAL_QOS_WRED_ATTR_GREEN_ENABLE == true */ - FAL_QOS_WRED_ATTR_GREEN_MIN_THRESHOLD = 0x00000001, + FAL_QOS_WRED_ATTR_GREEN_MIN_THRESHOLD = 1, /** * @brief Green maximum threshold @@ -3152,7 +4289,7 @@ enum fal_qos_wred_attr_t { * @default 0 * @validonly FAL_QOS_WRED_ATTR_GREEN_ENABLE == true */ - FAL_QOS_WRED_ATTR_GREEN_MAX_THRESHOLD = 0x00000002, + FAL_QOS_WRED_ATTR_GREEN_MAX_THRESHOLD = 2, /** * @brief Percentage 0 ~ 100 @@ -3161,7 +4298,7 @@ enum fal_qos_wred_attr_t { * @flags CREATE_AND_SET * @default 100 */ - FAL_QOS_WRED_ATTR_GREEN_DROP_PROBABILITY = 0x00000003, + FAL_QOS_WRED_ATTR_GREEN_DROP_PROBABILITY = 3, /** * @brief Weight 0 ~ 15 @@ -3170,7 +4307,7 @@ enum fal_qos_wred_attr_t { * @flags CREATE_AND_SET * @default 0 */ - FAL_QOS_WRED_ATTR_WEIGHT = 0x00000004, + FAL_QOS_WRED_ATTR_WEIGHT = 4, /** * @brief Yellow enable @@ -3179,7 +4316,7 @@ enum fal_qos_wred_attr_t { * @flags CREATE_AND_SET * @default false */ - FAL_QOS_WRED_ATTR_YELLOW_ENABLE = 0x00000005, + FAL_QOS_WRED_ATTR_YELLOW_ENABLE = 5, /** * @brief Yellow minimum threshold bytes @@ -3193,7 +4330,7 @@ enum fal_qos_wred_attr_t { * @default 0 * @validonly FAL_QOS_WRED_ATTR_YELLOW_ENABLE == true */ - FAL_QOS_WRED_ATTR_YELLOW_MIN_THRESHOLD = 0x00000006, + FAL_QOS_WRED_ATTR_YELLOW_MIN_THRESHOLD = 6, /** * @brief Yellow maximum threshold @@ -3206,7 +4343,7 @@ enum fal_qos_wred_attr_t { * @default 0 * @validonly FAL_QOS_WRED_ATTR_GREEN_ENABLE == true */ - FAL_QOS_WRED_ATTR_YELLOW_MAX_THRESHOLD = 0x00000007, + FAL_QOS_WRED_ATTR_YELLOW_MAX_THRESHOLD = 7, /** * @brief Yellow Percentage 0 ~ 100 @@ -3215,7 +4352,7 @@ enum fal_qos_wred_attr_t { * @flags CREATE_AND_SET * @default 100 */ - FAL_QOS_WRED_ATTR_YELLOW_DROP_PROBABILITY = 0x00000008, + FAL_QOS_WRED_ATTR_YELLOW_DROP_PROBABILITY = 8, /** * @brief Red enable @@ -3224,7 +4361,7 @@ enum fal_qos_wred_attr_t { * @flags CREATE_AND_SET * @default false */ - FAL_QOS_WRED_ATTR_RED_ENABLE = 0x00000009, + FAL_QOS_WRED_ATTR_RED_ENABLE = 9, /** * @brief Red minimum threshold bytes @@ -3238,7 +4375,7 @@ enum fal_qos_wred_attr_t { * @default 0 * @validonly FAL_QOS_WRED_ATTR_RED_ENABLE == true */ - FAL_QOS_WRED_ATTR_RED_MIN_THRESHOLD = 0x0000000A, + FAL_QOS_WRED_ATTR_RED_MIN_THRESHOLD = 10, /** * @brief Red maximum threshold @@ -3251,7 +4388,7 @@ enum fal_qos_wred_attr_t { * @default 0 * @validonly FAL_QOS_WRED_ATTR_RED_ENABLE == true */ - FAL_QOS_WRED_ATTR_RED_MAX_THRESHOLD = 0x0000000B, + FAL_QOS_WRED_ATTR_RED_MAX_THRESHOLD = 11, /** * @brief Red percentage 0 ~ 100 @@ -3260,7 +4397,7 @@ enum fal_qos_wred_attr_t { * @flags CREATE_AND_SET * @default 100 */ - FAL_QOS_WRED_ATTR_RED_DROP_PROBABILITY = 0x0000000C, + FAL_QOS_WRED_ATTR_RED_DROP_PROBABILITY = 12, /* Max value */ FAL_QOS_WRED_ATTR_MAX = FAL_QOS_WRED_ATTR_RED_DROP_PROBABILITY @@ -3344,6 +4481,7 @@ enum fal_mirror_session_type_t { /** Enhanced Remote SPAN */ FAL_MIRROR_SESSION_TYPE_ENHANCED_REMOTE, }; + /** * @brief FAL attributes for portmonitor(mirror) session */ @@ -3460,6 +4598,43 @@ enum fal_vlan_feature_attr_t { * */ FAL_VLAN_FEATURE_ATTR_MULTICAST_STORM_CONTROL_POLICER_ID, + + /** + * @brief Enable ingress QoS classification on vlan on port + * + * Set map id = FAL_NULL_OBJECT_ID to remove map + * @type fal_object_t + * @flags CREATE_AND_SET + * @default FAL_NULL_OBJECT_ID + * + */ + FAL_VLAN_FEATURE_ATTR_QOS_INGRESS_MAP_ID, + + /** + * @brief Upper limit of number of MACs permitted + * in the MAC table for a given vlan on the port. + * @type uint32_t + * @flags CREATE_AND_SET + */ + FAL_VLAN_FEATURE_ATTR_MAC_LIMIT, + + /** + * @brief Get the current MAC count for a given vlan on the port. + * @type uint32_t + * @flags READ_ONLY + */ + FAL_VLAN_FEATURE_ATTR_MAC_COUNT, + + /** + * @brief Enable egress QoS marking on vlan on port + * + * Set map id = FAL_NULL_OBJECT_ID to remove map + * @type fal_object_t + * @flags CREATE_AND_SET + * @default FAL_NULL_OBJECT_ID + * + */ + FAL_VLAN_FEATURE_ATTR_QOS_EGRESS_MAP_ID, }; /** @@ -3494,13 +4669,28 @@ int fal_plugin_vlan_feature_set_attr(fal_object_t obj, const struct fal_attribute_t *attr); /** - * @brief set backplane port - * @param[in] bp_ifindex backplane interface ifindex - * @param[in] if_index interface for which backplane binding + * @brief Get vlan_feature attribute + * + * @param[in] obj vlan_feature object id + * @param[in] attr_count Number of attributes + * @param[inout] attr_list Array of attributes + * + * @return 0 on success. If an attribute in attr_list is + * unsupported by the FAL plugin, it should return + * an error. + */ +int fal_plugin_vlan_feature_get_attr(fal_object_t obj, + uint32_t attr_count, + struct fal_attribute_t *attr_list); + +/** + * @brief set backplane port + * @param[in] bp_ifindex backplane interface ifindex + * @param[in] ifindex interface for which backplane binding * is to be set * @return Returns 0 for success, error code on failure */ -int fal_plugin_backplane_bind(unsigned int bp_ifindex, unsigned int if_index); +int fal_plugin_backplane_bind(unsigned int bp_ifindex, unsigned int ifindex); /** * @brief dump backplane information for specified backplane port @@ -3620,6 +4810,22 @@ enum fal_cpp_limiter_attr_t { * @flags MANDATORY_ON_CREATE | CREATE_ONLY */ FAL_CPP_LIMITER_ATTR_DEFAULT = 13, + + /** + * @brief Rate limiter for PIM packets + * + * @type fal_object_t + * @flags CREATE_ONLY + */ + FAL_CPP_LIMITER_ATTR_PIM = 14, + + /** + * @brief Rate limiter for IP multicast packets + * + * @type fal_object_t + * @flags CREATE_ONLY + */ + FAL_CPP_LIMITER_ATTR_IP_MC = 15, }; /* @@ -3710,11 +4916,25 @@ enum fal_ptp_clock_attr_t { */ FAL_PTP_CLOCK_PROFILE, + /** + * @brief An antenna delay in nanoseconds that should + * be applied to the clock's GPS. Only useful + * with the G.8275.2 APTS profile. + * @type int32_t + * @flags CREATE_ONLY + */ + FAL_PTP_CLOCK_ANTENNA_DELAY, + FAL_PTP_CLOCK_MAX }; enum fal_ptp_clock_profile_t { FAL_PTP_CLOCK_DEFAULT_PROFILE = 1, /** IEEE 1588-2008 default profile */ + FAL_PTP_CLOCK_G82752_PROFILE = 2, /** G.8275.2 Telecom profile */ + FAL_PTP_CLOCK_G82752_APTS_PROFILE = 3, + /** G.8275.2 w/ APTS Telecom profile */ + FAL_PTP_CLOCK_G82751_FWD_PROFILE = 4, + FAL_PTP_CLOCK_G82751_NON_FWD_PROFILE = 5, }; /** @@ -3728,7 +4948,7 @@ enum fal_ptp_clock_profile_t { */ int fal_plugin_create_ptp_clock(uint32_t attr_count, struct fal_attribute_t *attr_list, - fal_object_t *clock); + fal_object_t *clock_id); /** * @brief Delete a PTP clock. @@ -3737,7 +4957,7 @@ int fal_plugin_create_ptp_clock(uint32_t attr_count, * * @return 0 on success, error code for failure */ -int fal_plugin_delete_ptp_clock(fal_object_t clock); +int fal_plugin_delete_ptp_clock(fal_object_t clock_id); /** * @brief Dump the status of a PTP clock. @@ -3745,7 +4965,7 @@ int fal_plugin_delete_ptp_clock(fal_object_t clock); * @param[in] obj PTP clock * @param[in] json JSON writer object */ -int fal_plugin_dump_ptp_clock(fal_object_t clock, json_writer_t *wr); +int fal_plugin_dump_ptp_clock(fal_object_t clock_id, json_writer_t *wr); enum fal_ptp_port_attr_t { /** @@ -3838,6 +5058,14 @@ enum fal_ptp_port_attr_t { */ FAL_PTP_PORT_DSCP, + /** + * @brief Additional incoming path for PTP packets that + * are destined for the clock port. + * @type fal_ptp_port_port_path_t + * @flags CREATE_ONLY + */ + FAL_PTP_PORT_ADDITIONAL_PATH, + FAL_PTP_PORT_MAX }; @@ -3852,7 +5080,7 @@ enum fal_ptp_port_attr_t { */ int fal_plugin_create_ptp_port(uint32_t attr_count, struct fal_attribute_t *attr_list, - fal_object_t *port); + fal_object_t *port_id); /** * @brief Delete a PTP port on a PTP clock. @@ -3861,12 +5089,12 @@ int fal_plugin_create_ptp_port(uint32_t attr_count, * * @return 0 on success, error code for failure */ -int fal_plugin_delete_ptp_port(fal_object_t port); +int fal_plugin_delete_ptp_port(fal_object_t port_id); enum fal_ptp_peer_type_t { FAL_PTP_PEER_MASTER, /**< PTP master */ FAL_PTP_PEER_SLAVE, /**< PTP slave */ - FAL_PTP_PEER_ALLOWED, /**< Whitelisted PTP peer */ + FAL_PTP_PEER_ALLOWED, /**< Allowed PTP peer */ }; enum fal_ptp_peer_attr_t { @@ -3913,7 +5141,7 @@ enum fal_ptp_peer_attr_t { */ int fal_plugin_create_ptp_peer(uint32_t attr_count, struct fal_attribute_t *attr_list, - fal_object_t *peer); + fal_object_t *peer_id); /** * @brief Delete a PTP peer on a PTP port. @@ -3922,7 +5150,7 @@ int fal_plugin_create_ptp_peer(uint32_t attr_count, * * @return 0 on success, error code for failure */ -int fal_plugin_delete_ptp_peer(fal_object_t peer); +int fal_plugin_delete_ptp_peer(fal_object_t peer_id); /* Stuff for L3 ACLs */ @@ -3957,6 +5185,9 @@ enum fal_acl_bind_point_type_t { enum fal_acl_action_type_t { FAL_ACL_ACTION_TYPE_PACKET_ACTION, FAL_ACL_ACTION_TYPE_COUNTER, + FAL_ACL_ACTION_TYPE_SET_DESIGNATION, + FAL_ACL_ACTION_TYPE_SET_COLOUR, + FAL_ACL_ACTION_TYPE_POLICER, }; /** @@ -4010,7 +5241,7 @@ struct fal_acl_field_data_t { * The value for an enabled ACL action */ union fal_acl_action_parameter_t { - int32_t s32; /* For enum values */ + int32_t s32; /* For enum or int values */ fal_object_t objid; }; @@ -4201,11 +5432,11 @@ enum fal_acl_entry_attr_t { FAL_ACL_ENTRY_ATTR_TABLE_ID, /** - * @brief Priority (Highest value has highest priority) + * @brief Rule number (Lowest value has highest priority) * @type uint16_t * @flags MANDATORY_ON_CREATE | CREATE_ONLY */ - FAL_ACL_ENTRY_ATTR_PRIORITY, + FAL_ACL_ENTRY_ATTR_RULE_NUMBER, /** * @brief Admin state (false is disabled) @@ -4243,6 +5474,34 @@ enum fal_acl_entry_attr_t { */ FAL_ACL_ENTRY_ATTR_ACTION_COUNTER, + /** + * @brief Action to set the designation (0-7) + * + * @type fal_acl_action_data_t int32_t + * @flags CREATE_AND_SET + * @default disabled + */ + FAL_ACL_ENTRY_ATTR_ACTION_SET_DESIGNATION, + + /** + * @brief Action to set the colour (green, yellow, or red) + * + * @type fal_acl_action_data_t fal_packet_colour + * @flags CREATE_AND_SET + * @default disabled + */ + FAL_ACL_ENTRY_ATTR_ACTION_SET_COLOUR, + + /** + * @brief Attach/detach a policer to the entry + * + * @type fal_acl_action_data_t fal_object_id_t + * @flags CREATE_AND_SET + * @objects FAL_OBJECT_TYPE_ACL_POLICER + * @default disabled + */ + FAL_ACL_ENTRY_ATTR_ACTION_POLICER, + /* * The following chunk has match fields */ @@ -4474,7 +5733,7 @@ int fal_plugin_acl_get_table_attr(fal_object_t table_id, */ int fal_plugin_acl_create_entry(uint32_t attr_count, const struct fal_attribute_t *attr, - fal_object_t *new_entry_id); + fal_object_t *entry_id); /** * @brief Delete an entry/rule @@ -4557,4 +5816,787 @@ int fal_plugin_acl_get_counter_attr(fal_object_t counter_id, struct fal_attribute_t *attr_list); /* End of ACL Stuff */ -#endif /* FAL_PLUGIN_H */ +/* + * Packet capture (snooping) attributes & functions + */ + +enum fal_capture_attr_t { + /** + * @brief Capture copy (crop) size - how much of the frame to + * capture + * @flags CREATE_ONLY + * @type uint32_t + * @default 0 (copy whole frame) + */ + FAL_CAPTURE_ATTR_COPY_LENGTH, + + /** + * @brief How much backplane bandwidth to be used by captured + * frames (Kbits/sec) + * @flags CREATE_ONLY + * @type uint32_t + * @default 0 (2000Kbps) + */ + FAL_CAPTURE_ATTR_BANDWIDTH, +}; + +int fal_plugin_capture_create(uint32_t attr_count, + const struct fal_attribute_t *attr_list, + fal_object_t *obj); +void fal_plugin_capture_delete(fal_object_t obj); + +/** + * @brief Stat counter IDs for use in fal_plugin_capture_get_stats() call. + */ +enum fal_capture_stat_type { + FAL_CAPTURE_STAT_DROPPED_PACKETS, + FAL_CAPTURE_STAT_MAX +}; + +/** + * @brief Get the counters for the capture object + * + * @param[in] obj Capture object + * @param[in] num_counters The size of the stats array being asked for + * @param[in] cntr_ids Array of stats to return + * @param[out] stats And array to write the requested stats values into. + * @return Returns 0 for success, error code on failure + */ +int fal_plugin_capture_get_stats( + fal_object_t obj, uint32_t num_counters, + const enum fal_capture_stat_type *cntr_ids, + uint64_t *stats); + + +/* BFD Definitions */ + +/** + * @brief FAL session type of BFD + */ +enum fal_bfd_session_type_t { + /** Demand Active Mode */ + FAL_BFD_SESSION_TYPE_DEMAND_ACTIVE = 0, + + /** Demand Passive Mode */ + FAL_BFD_SESSION_TYPE_DEMAND_PASSIVE, + + /** Asynchronous Active Mode */ + FAL_BFD_SESSION_TYPE_ASYNC_ACTIVE, + + /** Asynchronous Passive Mode */ + FAL_BFD_SESSION_TYPE_ASYNC_PASSIVE, +}; + +/** + * @brief FAL type of encapsulation tunnel for BFD + */ +enum fal_bfd_encapsulation_type_t { + /** + * @brief IP in IP Encapsulation | L2 Ethernet header | IP header | + * Inner IP header | Original BFD packet + */ + FAL_BFD_ENCAPSULATION_TYPE_IP_IN_IP, + + /** + * @brief L3 GRE Tunnel Encapsulation | L2 Ethernet header | IP header | + * GRE header | Original BFD packet + */ + FAL_BFD_ENCAPSULATION_TYPE_L3_GRE_TUNNEL, +}; + +/** + * @brief FAL BFD session state + */ +enum fal_bfd_session_state_t { + /** BFD Session is in Admin down */ + FAL_BFD_SESSION_STATE_ADMIN_DOWN, + + /** BFD Session is Down */ + FAL_BFD_SESSION_STATE_DOWN, + + /** BFD Session is in Initialization */ + FAL_BFD_SESSION_STATE_INIT, + + /** BFD Session is Up */ + FAL_BFD_SESSION_STATE_UP, +}; + +/** + * @brief FAL BFD session diagnostic + */ +enum fal_bfd_session_diag_t { + /** No Diagnostic */ + FAL_BFD_DIAG_NONE, + + /** Control Detection Time Expired */ + FAL_BFD_DIAG_DETECT_EXPIRE, + + /** Echo Function Failed */ + FAL_BFD_DIAG_ECHO_FAIL, + + /** Neighbor Signaled Session Down */ + FAL_BFD_DIAG_NEIGH_DOWN, + + /** Forwarding Plane Reset */ + FAL_BFD_DIAG_FWD_RESET, + + /** Path Down */ + FAL_BFD_DIAG_PATH_DOWN, + + /** Concatenated Path Down */ + FAL_BFD_DIAG_CONCAT_DOWN, + + /** Administratively Down */ + FAL_BFD_DIAG_ADMIN_DOWN, + + /** Reverse Concatenated Path Down */ + FAL_BFD_DIAG_REV_CONCAT_DOWN, +}; + +/** + * @ brief BFD PDU flags + */ +union fal_bfd_pdu_flags_t { + struct { + /** BFD PDU flags byte value */ + uint8_t flags; + /** BFD Param changed, 0 - not changed, 1 - changed */ + uint8_t param_changed; + uint8_t reserved1; + uint8_t reserved2; + }; + uint32_t val; +}; + +/** + * @brief Defines the operational status of the BFD session + */ +struct fal_bfd_session_state_notification_t { + /** BFD Session id */ + fal_object_t bfd_session_id; + + /** BFD session state */ + enum fal_bfd_session_state_t session_state; + + /** BFD remote session state */ + enum fal_bfd_session_state_t remote_state; + + /** BFD local session diagnostic */ + enum fal_bfd_session_diag_t local_diag; + + /** BFD remote session diagnostic */ + enum fal_bfd_session_diag_t remote_diag; + + /** BFD remote PDU flag bits */ + union fal_bfd_pdu_flags_t remote_pdu_flags; + + /** BFD remote discriminator */ + uint32_t remote_session_id; + + /** BFD rx interval received from remote peer */ + uint32_t remote_rx_required; + + /** BFD negotiated Tx interval max(local Tx, remote Rx) */ + uint32_t tx_negotiated; + + /** BFD negotiated Rx interval max(local Rx, remote Tx) */ + uint32_t rx_negotiated; + + /** BFD remote detect multiplier */ + uint32_t remote_detect_mult; +}; + +/** + * @brief FAL attributes for BFD session + */ +enum fal_bfd_session_attr_t { + /** + * @brief Start of attributes + */ + FAL_BFD_SESSION_ATTR_START, + + /** + * @brief BFD Session type DEMAND/ASYNCHRONOUS + * + * @type u8 + * @flags MANDATORY_ON_CREATE | CREATE_ONLY + */ + FAL_BFD_SESSION_ATTR_TYPE = FAL_BFD_SESSION_ATTR_START, + + /** + * @brief Router interface ojbect + * + * @type fal_object_t + * @flags CREATE_AND_SET + */ + FAL_BFD_SESSION_ATTR_ROUTER_INTERFACE, + + /** + * @brief Local discriminator + * + * @type u32 + * @flags MANDATORY_ON_CREATE | CREATE_ONLY + */ + FAL_BFD_SESSION_ATTR_LOCAL_DISCRIMINATOR, + + /** + * @brief Remote discriminator + * + * @type u32 + * @flags MANDATORY_ON_CREATE | CREATE_ONLY + */ + FAL_BFD_SESSION_ATTR_REMOTE_DISCRIMINATOR, + + /** + * @brief UDP Source port + * + * @type u32 + * @flags MANDATORY_ON_CREATE | CREATE_ONLY + */ + FAL_BFD_SESSION_ATTR_UDP_SRC_PORT, + + /** + * @brief Encapsulation type + * + * @type u8 fal_bfd_encapsulation_type_t + * @flags CREATE_ONLY + */ + FAL_BFD_SESSION_ATTR_BFD_ENCAPSULATION_TYPE, + + /** + * @brief IP header version + * + * @type u8 + * @flags MANDATORY_ON_CREATE | CREATE_ONLY + */ + FAL_BFD_SESSION_ATTR_IPHDR_VERSION, + + /** + * @brief IP header TOS + * + * @type u8 + * @flags CREATE_AND_SET + * @default 0 + */ + FAL_BFD_SESSION_ATTR_TOS, + + /** + * @brief IP header TTL + * + * @type u8 + * @flags CREATE_AND_SET + * @default 255 + */ + FAL_BFD_SESSION_ATTR_TTL, + + /** + * @brief Source IP + * + * @type ipaddr + * @flags MANDATORY_ON_CREATE | CREATE_ONLY + */ + FAL_BFD_SESSION_ATTR_SRC_IP_ADDRESS, + + /** + * @brief Destination IP + * + * @type ipaddr + * @flags MANDATORY_ON_CREATE | CREATE_ONLY + */ + FAL_BFD_SESSION_ATTR_DST_IP_ADDRESS, + + /** + * @brief Tunnel IP header TOS + * + * @type u8 + * @flags CREATE_AND_SET + * @default 0 + * @validonly FAL_BFD_SESSION_ATTR_BFD_ENCAPSULATION_TYPE == + * FAL_BFD_ENCAPSULATION_TYPE_IP_IN_IP + */ + FAL_BFD_SESSION_ATTR_TUNNEL_TOS, + + /** + * @brief Tunnel IP header TTL + * + * @type u8 + * @flags CREATE_AND_SET + * @default 255 + * @validonly FAL_BFD_SESSION_ATTR_BFD_ENCAPSULATION_TYPE == + * FAL_BFD_ENCAPSULATION_TYPE_IP_IN_IP + */ + FAL_BFD_SESSION_ATTR_TUNNEL_TTL, + + /** + * @brief Tunnel source IP + * + * @type ipaddr + * @flags MANDATORY_ON_CREATE | CREATE_ONLY + * @condition FAL_BFD_SESSION_ATTR_BFD_ENCAPSULATION_TYPE == + * FAL_BFD_ENCAPSULATION_TYPE_IP_IN_IP + */ + FAL_BFD_SESSION_ATTR_TUNNEL_SRC_IP_ADDRESS, + + /** + * @brief Tunnel destination IP + * + * @type ipaddr + * @flags MANDATORY_ON_CREATE | CREATE_ONLY + * @condition FAL_BFD_SESSION_ATTR_BFD_ENCAPSULATION_TYPE == + * FAL_BFD_ENCAPSULATION_TYPE_IP_IN_IP + */ + FAL_BFD_SESSION_ATTR_TUNNEL_DST_IP_ADDRESS, + + /** + * @brief Multi hop BFD session + * + * @type bool + * @flags CREATE_ONLY + * @default false + */ + FAL_BFD_SESSION_ATTR_MULTIHOP, + + /** + * @brief Minimum Transmit interval in microseconds + * + * @type u32 + * @flags MANDATORY_ON_CREATE | CREATE_AND_SET + */ + FAL_BFD_SESSION_ATTR_MIN_TX, + + /** + * @brief Negotiated Transmit interval in microseconds + * + * @type u32 + * @flags MANDATORY_ON_CREATE | CREATE_AND_SET + */ + FAL_BFD_SESSION_ATTR_NEGOTIATED_TX, + + /** + * @brief Minimum Receive interval in microseconds + * + * @type u32 + * @flags MANDATORY_ON_CREATE | CREATE_AND_SET + */ + FAL_BFD_SESSION_ATTR_MIN_RX, + + /** + * @brief Negotiated Receive interval in microseconds + * + * @type u32 + * @flags MANDATORY_ON_CREATE | CREATE_AND_SET + */ + FAL_BFD_SESSION_ATTR_NEGOTIATED_RX, + + /** + * @brief Detection time Multiplier of local endpoint + * + * @type u8 + * @flags MANDATORY_ON_CREATE | CREATE_AND_SET + */ + FAL_BFD_SESSION_ATTR_DETECT_MULT, + + /** + * @brief Minimum Remote Transmit interval in microseconds + * + * @type u32 + * @flags READ_ONLY + */ + FAL_BFD_SESSION_ATTR_REMOTE_MIN_TX, + + /** + * @brief Minimum Remote Receive interval in microseconds + * + * @type u32 + * @flags READ_ONLY + */ + FAL_BFD_SESSION_ATTR_REMOTE_MIN_RX, + + /** + * @brief Detection time Multiplier of remote endpoint + * + * @type u8 + * @flags READ_ONLY + */ + FAL_BFD_SESSION_ATTR_REMOTE_DETECT_MULT, + + /** + * @brief BFD session detection time in microseconds + * + * @type u32 + * @flags CREATE_AND_SET + */ + FAL_BFD_SESSION_ATTR_DETECTION_TIME, + + /** + * @brief BFD Session state + * + * @type u8 fal_bfd_session_state_t + * @flags READ_ONLY + */ + FAL_BFD_SESSION_ATTR_STATE, + + /** + * @brief BFD Remote Session state + * + * @type u8 fal_bfd_session_state_t + * @flags READ_ONLY + */ + FAL_BFD_SESSION_ATTR_REMOTE_STATE, + + /** + * @brief BFD Local diagnostic + * + * @type u8 fal_bfd_session_diag_t + * @flags READ_ONLY + */ + FAL_BFD_SESSION_ATTR_LOCAL_DIAG, + + /** + * @brief BFD Remote diagnostic + * + * @type u8 fal_bfd_session_diag_t + * @flags READ_ONLY + */ + FAL_BFD_SESSION_ATTR_REMOTE_DIAG, + + /** + * @brief Next hop for a multi hop BFD session + * + * @type ipaddr + * @flags CREATE_AND_SET + * @validonly FAL_BFD_SESSION_ATTR_MULTIHOP == true + */ + FAL_BFD_SESSION_ATTR_NEXTHOP, + + /** + * @brief BFD packet drop precedence in egress traffic, + * BFD should use FAL_PACKET_COLOUR_GREEN + * + * @type enum fal_packet_colour + * @flags MANDATORY_ON_CREATE | CREATE_AND_SET + */ + FAL_BFD_SESSION_ATTR_PKT_COLOUR, + + /** + * @brief BFD packet priority queue in egress traffic, + * Value range: 0-7. BFD should be applied to highest prioirty + * + * @type u32 + * @flags MANDATORY_ON_CREATE | CREATE_AND_SET + */ + FAL_BFD_SESSION_ATTR_PKT_DESIGNATOR, + + /** + * @brief BFD packet local flags + * + * @type u32 + * @flags MANDATORY_ON_CREATE | CREATE_AND_SET + */ + FAL_BFD_SESSION_ATTR_POLL_BIT, + + /** + * @brief BFD packet local flags + * + * @type u32 + * @flags MANDATORY_ON_CREATE | CREATE_AND_SET + */ + FAL_BFD_SESSION_ATTR_FINAL_BIT, + + /** + * @brief Indicate the session is a Micro BFD session + * + * @type bool + * @flags CREATE_ONLY + * @default false + */ + FAL_BFD_SESSION_ATTR_MICRO_BFD, + + /** + * @brief LAG's member interface object + * + * @type fal_object_t + * @flags CREATE_ONLY + * @validonly FAL_BFD_SESSION_ATTR_MICRO_BFD == true + */ + FAL_BFD_SESSION_ATTR_LAG_MEMBER_IF, + + /** + * @brief End of attributes + */ + FAL_BFD_SESSION_ATTR_END +}; + +/** + * @brief BFD Session counter IDs in fal_get_bfd_session_stats() call + */ +enum fal_bfd_session_stat_t { + /** Ingress packet stat count */ + FAL_BFD_SESSION_STAT_IN_PACKETS, + + /** Egress packet stat count */ + FAL_BFD_SESSION_STAT_OUT_PACKETS, + + /** Packet Drop stat count */ + FAL_BFD_SESSION_STAT_DROP_PACKETS +}; + +/** + * @brief HW mode of supporting BFD + */ +enum fal_bfd_hw_mode { + /** Unknown running mode */ + FAL_BFD_HW_MODE_UNKNOWN, + + /* + * HW BFD does not maintain state machine in hardware resource. + * Session state transition, flags and parameter negotiation + * depend on Dataplane software. + */ + FAL_BFD_HW_MODE_CP_DEPENDENT, + FAL_BFD_HW_MODE_DP_SW_DEPENDENT = FAL_BFD_HW_MODE_CP_DEPENDENT, + + /* + * HW BFD is Independent of the Dataplane software state. + * Full BFD state machine is maintained in hardware layer. + * HW session initial state cannot be set flexibly, but fixed + * to be DOWN + */ + FAL_BFD_HW_MODE_CP_INDEPENDENT, + FAL_BFD_HW_MODE_DP_SW_INDEPENDENT = FAL_BFD_HW_MODE_CP_INDEPENDENT, +}; + +/** + * @brief Create BFD session. + * + * @param[out] bfd_session_id BFD session id + * @param[in] attr_count Number of attributes + * @param[in] attr_list Value of attributes + * + * @return 0 if operation is successful otherwise a different + * error code is returned. + */ +int fal_plugin_bfd_create_session(fal_object_t *bfd_session_id, + uint32_t attr_count, + const struct fal_attribute_t *attr_list); + +/** + * @brief Delete BFD session. + * + * @param[in] bfd_session_id BFD session id + * + * @return 0 if operation is successful otherwise a different + * error code is returned. + */ +int fal_plugin_bfd_delete_session(fal_object_t bfd_session_id); + +/** + * @brief Set BFD session attributes. + * + * @param[in] bfd_session_id BFD session id + * @param[in] attr_count Number of attributes + * @param[in] attr_list Value of attributes + * + * @return 0 if operation is successful otherwise a different + * error code is returned. + */ +int fal_plugin_bfd_set_session_attribute(fal_object_t bfd_session_id, + uint32_t attr_count, + const struct fal_attribute_t *attr_list); + +/** + * @brief Get BFD session attributes. + * + * @param[in] bfd_session_id BFD session id + * @param[in] attr_count Number of attributes + * @param[inout] attr_list Value of attribute + * + * @return 0 if operation is successful otherwise a different + * error code is returned. + */ +int fal_plugin_bfd_get_session_attribute(fal_object_t bfd_session_id, + uint32_t attr_count, + struct fal_attribute_t *attr_list); + +/** + * @brief Get BFD session statistics counters. + * + * @param[in] bfd_session_id BFD session id + * @param[in] number_of_counters Number of counters in the array + * @param[in] counter_ids Specifies the array of counter ids + * @param[out] counters Array of resulting counter values. + * + * @return 0 on success, failure status code on error + */ +int fal_plugin_bfd_get_session_stats(fal_object_t bfd_session_id, + uint32_t number_of_counters, + const enum fal_bfd_session_stat_t *counter_ids, + uint64_t *counters); + +/** + * @brief BFD session state change notification + * + * Passed as a parameter to FAL_SWITCH_ATTR_BFD_SESSION_STATE_NOTIFY + * + * @count data[count] + * + * @param[in] count Number of notifications + * @param[in] data Array of BFD session state + */ +typedef void (*fal_bfd_session_state_change_notification_fn)( + uint32_t count, + struct fal_bfd_session_state_notification_t *data); + +/** + * @brief Dump BFD session + * + * @param[in] bfd_session_id BFD session id + * @param[inout] json JSON writer object + * + * @return 0 if operation is successful otherwise a different + * error code is returned. + */ +void fal_plugin_bfd_dump_session(fal_object_t bfd_session_id, + json_writer_t *wr); + +/* End of BFD Definitions */ + +enum fal_mpls_route_attr_t { + /** + * @brief Next hop group id + * + * This attribute only takes effect when ATTR_PACKET_ACTION is set to + * FORWARD. + * + * @type fal_object_id_t + * @flags CREATE_AND_SET + * @default FAL_NULL_OBJECT_ID + * @validonly FAL_MPLS_ROUTE_ATTR_PACKET_ACTION == + * FAL_PACKET_ACTION_FORWARD + */ + FAL_MPLS_ROUTE_ATTR_NEXT_HOP_GROUP, /* .objid */ + /** + * @brief Packet action + * + * @type fal_packet_action_t + * @flags MANDATORY_ON_CREATE | CREATE_AND_SET + */ + FAL_MPLS_ROUTE_ATTR_PACKET_ACTION, /* .u32 - fal_packet_action_t */ +}; + +struct fal_mpls_route_t { + /** + * @brief MPLS label + */ + uint32_t label; +}; + +/** + * @brief Create an MPLS route + * + * @param[in] mpls_route MPLS route key + * @param[in] attr_count Number of attributes + * @param[in] attr_list The attributes and values for the route + * + * @return 0 on success or failure status. + */ +int fal_plugin_create_mpls_route(const struct fal_mpls_route_t *mpls_route, + uint32_t attr_count, + const struct fal_attribute_t *attr_list); + +/** + * @brief Delete an MPLS route + * + * @param[in] mpls_route MPLS route key + * + * @return 0 on success or failure status. + */ +int fal_plugin_delete_mpls_route(const struct fal_mpls_route_t *mpls_route); + +/** + * @brief Set an MPLS route attribute + * + * @param[in] mpls_route MPLS route key + * @param[in] attr The attribute to change and the new value + * + * @return 0 on success or failure status. + */ +int fal_plugin_set_mpls_route_attr(const struct fal_mpls_route_t *mpls_route, + const struct fal_attribute_t *attr); + +/** + * @brief Get an MPLS route attribute + * + * @param[in] mpls_route MPLS route key + * @param[in] attr_count Number of attributes + * @param[in/out] attr_list A list of the attributes and their + * associated values + * + * @return 0 on success or failure status. + */ +int fal_plugin_get_mpls_route_attr(const struct fal_mpls_route_t *mpls_route, + uint32_t attr_count, + struct fal_attribute_t *attr_list); + +enum fal_vrf_attr_t { + /** + * @brief VRF ID + * + * Application-generated ID for the VRF that may be used by + * the FAL plugin for ease of debugging programming and packet + * forwarding issues. + * + * @type uint32_t + * @flags CREATE_ONLY | MANDATORY_ON_CREATE + */ + FAL_VRF_ATTR_ID, /* .u32 */ +}; + +/** + * @brief Create a VRF + * + * @param[in] attr_count Number of attributes + * @param[in] attr_list The attributes and values for the VRF + * @param[out] obj Object ID for the VRF returned + * + * @return 0 on success or failure status. + */ +int fal_plugin_create_vrf(uint32_t attr_count, + const struct fal_attribute_t *attr_list, + fal_object_t *obj); + +/** + * @brief Delete a VRF + * + * @param[in] obj Object ID for the VRF to be deleted + * + * @return 0 on success or failure status. + */ +int fal_plugin_delete_vrf(fal_object_t obj); + +/** + * @brief Set a VRF attribute + * + * @param[in] obj Object ID for the VRF to be updated + * @param[in] attr The attribute to change and the new value + * + * @return 0 on success or failure status. + */ +int fal_plugin_set_vrf_attr(fal_object_t obj, + const struct fal_attribute_t *attr); + +/** + * @brief Get a VRF attribute + * + * @param[in] obj Object ID for the VRF to be queried + * @param[in] attr_count Number of attributes + * @param[in/out] attr_list A list of the attributes and their + * associated values + * + * @return 0 on success or failure status. + */ +int fal_plugin_get_vrf_attr(fal_object_t obj, + uint32_t attr_count, + struct fal_attribute_t *attr_list); + +#endif /* VYATTA_DATAPLANE_FAL_PLUGIN_H */ diff --git a/include/feature_commands.h b/include/feature_commands.h new file mode 100644 index 00000000..e3b3200b --- /dev/null +++ b/include/feature_commands.h @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#ifndef VYATTA_DATAPLANE_FEATURE_COMMANDS_H +#define VYATTA_DATAPLANE_FEATURE_COMMANDS_H + +#include + +/* + * A feature plugin can register command handlers for config commands + * and op mode commands. + */ + +/* + * A config command is a set of bytes of a given length. This will have been + * generated by the feature code in the config system. It is expected to be + * in protobuf version2 format. + */ +struct pb_msg { + /* + * Pointer to the data. This will have been constructed by the + * feature code in the config system, and will be passed down + * unmodified. + */ + void *msg; + /* + * The length of the config message + */ + size_t msg_len; + /* + * File that errors or other useful information can be written to. + */ + FILE *fp; + /* + * If this message has a response then it will be stored here. Config + * messages do not have responses, op mode messages may have responses. + * This will be passed back unmodified. + */ + void *ret_msg; + /* + * The length of the response. + */ + size_t ret_msg_len; +}; + +/* + * A callback function for processing a config command message. + * + * @param[in] msg The structure containing the configuration. + * + * @return 0 if the command was successfully processed. + * -ve value for any error. + */ +typedef int +(pb_cmd_proc)(struct pb_msg *msg); + +/* + * Register a command handler for a given command. + * + * @param[in] name The name of the config command. Commands sent from the + * config system need to use this name too. + * @param[in] handler The handler function to process commands. + * + * @return 0 if the handler was successfully registered + * -EEXIST If the name is already in use. + * -ENOMEM If there is not enough memory to add this handler + */ +int +dp_feature_register_pb_cfg_handler(const char *name, + pb_cmd_proc handler); + +/* + * Use pb_cmd_err instead of fprintf(msg->fp, ""). msg->fp may be NULL + * if a command is deferred and then replayed, for example after an + * interface event. + */ +void dp_pb_cmd_err(struct pb_msg *msg, const char *fmt, ...); + +/* + * Register a command handler for a given op mode command. + * + * @param[in] name The name of the op mode command. Op mode commands from the + * system need to use this name too. + * @param[in] handler The handler function to process op mode commands. + * + * @return 0 if the handler was successfully registered + * -EEXIST If the name is already in use. + * -ENOMEM If there is not enough memory to add this handler + */ +int +dp_feature_register_pb_op_handler(const char *name, + pb_cmd_proc handler); + + +/* + * Callback function used when registering a string based op mode handler. + * + * @param[in, out] f File to write output to. + * @param[in] argc Count of the number of args in argv + * @param[in] argv Array of string arguments. + * + * @return 0 on success + * -ve on error. + * + * @deprecated. The is deprecated in favour of the existing protobuf + * handlers. + */ +typedef int (feature_string_op_fn)(FILE *f, int argc, char **argv); + +/* + * Register a new op mode command. + * + * @param[in] name The name of the command to register. Must not already be used + * @param[in] help A help string for the new command. + * @param[in] fn A function to call to process this commands + * + * @return 0 on success + * -ve on error. + * + * @deprecated. The is deprecated in favour of the existing protobuf + * handlers. + */ +int dp_feature_register_string_op_handler(const char *name, + const char *help, + feature_string_op_fn *fn); + +/* + * Callback function used when registering a string based cfg handler. + * + * @param[in, out] f File to write output to. + * @param[in] argc Count of the number of args in argv + * @param[in] argv Array of string arguments. + * + * @return 0 on success + * -ve on error. + * + * @deprecated. The is deprecated in favour of the existing protobuf + * handlers. + */ +typedef int (string_cfg_fn)(FILE *f, int argc, char **argv); + +/* + * Register a new op mode command. This must be done as part of the plugin init. + * If it is done later then the topic will not be registered correctly and + * messages will not be processed. + * + * @param[in] name The name of the cfg handler to register. Must not already + * be used + * @param[in] fn A function to call to process this config + * + * @return 0 on success + * -ve on error. + * + * @deprecated. The is deprecated in favour of the existing protobuf + * handlers. + */ +int dp_feature_register_string_cfg_handler(const char *name, + string_cfg_fn *fn); + +#endif /* VYATTA_DATAPLANE_FEATURE_COMMANDS_H */ diff --git a/include/feature_plugin.h b/include/feature_plugin.h new file mode 100644 index 00000000..5b68f680 --- /dev/null +++ b/include/feature_plugin.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#ifndef VYATTA_DATAPLANE_FEATURE_PLUGIN_H +#define VYATTA_DATAPLANE_FEATURE_PLUGIN_H + +/* + * Intro + * ===== + * + * The feature plugin layer is designed to allow features for the dataplane to + * be plugged in without having to modify the core dataplane code. + * + * Plugin libraries are installed in a known location and the dataplane will + * search for them when it is starting up. + * + * This plugin library functions that the dataplane will call are all in this + * file. A feature plugin need not implement all of them. + * + * There are also various helper APIs in other files in this directory that + * the plugins can use within its code. These allow access to some of the + * internals of the dataplane which is necessary for some types of feature. + * + * Packet Processing + * ================= + * A feature may need to do packet processing. If it does then this will all + * be done via the packet pipeline APIs. The pipeline APIs all allow nodes to + * be inserted in the pipeline and are described fully in pipeline.h + * + * Command Processing + * ================== + * + * A feature is likely to require configuring, and this is done by registering + * a configuration handler. This handler will be called when messages are + * received over the configuration channel for the given feature. The format + * of the messages is expected to be protocol buffers, version2. The handler + * will be given the protobuf message and it then needs to decode it and make + * the appropriate changes to the system, for example enabling a node in the + * packet pipeline. + * + * Show commands + * ============= + * + * A feature is likely to require show commands, and this is done by + * registering a show command handler. This handler will be called whenever + * a request for a feature show command is received over the show commands + * channel. The format of the show command request is expected to be a + * string with space delimited words. The return value is expected to be a + * JSON formatted message. + */ + +/* + * Initialise a new plugin. Each plugin must provide an implementation of + * this function. + * + * This function do any work require to set up the feature plugin. At the + * stage this is called the feature configuration will not yet have been + * received by the dataplane. + * + * @param[out] name The name of the feature. This will be used in the + * show command output. The feature should fill in this + * name. This name should also be used as the 'plugin_name' + * for any pipeline features that are registered. + * @return 0 on success + */ +int dp_feature_plugin_init(const char **name); + +/* + * Cleanup the resources a plugin was using. Each plugin should cleanup + * properly when this is called. + * + * @return 0 on success + */ +int dp_feature_plugin_cleanup(void); + +#endif /* VYATTA_DATAPLANE_FEATURE_PLUGIN_H */ + diff --git a/include/interface.h b/include/interface.h new file mode 100644 index 00000000..5efe2268 --- /dev/null +++ b/include/interface.h @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + */ + +#ifndef VYATTA_DATAPLANE_INTERFACE_H +#define VYATTA_DATAPLANE_INTERFACE_H + +#include +#include + +#include "vrf.h" +#include "fal_plugin.h" + +/* + * This declares functions exported by dataplane to access + * interface structure. Each Interface is represented via + * structure ifnet pointer and has an interface index associated + * with it. + */ +struct ifnet; + +/** + * @brief The duplex status of an interface + */ +enum dp_ifnet_link_duplex_type { + DP_IFNET_LINK_DUPLEX_HALF = 0, + DP_IFNET_LINK_DUPLEX_FULL = 1, + DP_IFNET_LINK_DUPLEX_UNKNOWN = 2, +}; + +/** + * @brief Value used for interface with unknown speed + */ +static const uint32_t DP_IFNET_LINK_SPEED_UNKNOWN = 0; + +/** + * @brief The status of an interface + * + * link_status true if interface is up. false otherwise. ifOperStatus in rfc2233 + * link_duplex duplex status of interface. + * link_speed speed in Mbps. ifHighSpeed in rfc2233 + */ +struct dp_ifnet_link_status { + bool link_status; + enum dp_ifnet_link_duplex_type link_duplex; + uint32_t link_speed; +}; + +/** + * @brief Get interface status + * + * @param ifp interface to get status of + * @param if_link the status of the interface + */ +void dp_ifnet_link_status(struct ifnet *ifp, + struct dp_ifnet_link_status *if_link); + +/** + * @brief Get interface admin status + * + * @param ifp interface to get admin status of + * @return True if the interface is admin up + * @return False if the interface is admin down + */ +bool dp_ifnet_admin_status(struct ifnet *ifp); + +/* + * Iterator function for walk of interfaces + * + * @param[in] ifp interface + * @param[in] arg opaque caller context + */ +typedef void dp_ifnet_iter_func_t(struct ifnet *ifp, void *arg); + +/* + * Walk all interfaces + * + * @param[in] func function to call in each interface + * @param[in] arg opaque caller context + */ +void dp_ifnet_walk(dp_ifnet_iter_func_t func, void *arg); + +/* + * Get interface index, Assumes valid ifnet pointer + * @param[in] ifp Pointer to ifnet structure + * @return interface index for a given interface pointer + */ +unsigned int dp_ifnet_ifindex(const struct ifnet *ifp); + +/* + * Get interface name, Assumes valid ifnet pointer + * @param[in] ifp Pointer to ifnet structure + * @return interface name for a given interface pointer + */ +const char *dp_ifnet_ifname(const struct ifnet *ifp); + +/* + * Get interface vrfid, Assumes valid ifnet pointer + * @param[in] ifp Pointer to ifnet structure + * @return interface vrfid for a given interface pointer + */ +vrfid_t dp_ifnet_vrfid(const struct ifnet *ifp); + +/* + * Get interface FAL L3 object. + * @param[in] ifp Pointer to ifnet structure + * @return interface fal_l3 for a given interface pointer + */ +fal_object_t dp_ifnet_fal_l3_if(const struct ifnet *ifp); + +/* + * Get interface FAL LAG member object. + * @param[in] ifp Pointer to ifnet structure + * @return interface FAL object corresponding to LAG member + */ +fal_object_t dp_ifnet_fal_lag_member(const struct ifnet *ifp); + +/* + * Get ifnet pointer from interface index + * @param[in] ifindex Interface index + * @return interface structure pointer for the given index + */ +struct ifnet *dp_ifnet_byifindex(unsigned int ifindex); + +/* + * Get ifnet pointer from interface name + * @param[in] name Interface Name + * @return interface structure pointer for the given index + */ +struct ifnet *dp_ifnet_byifname(const char *name); + +/* + * Interface types as defined in the ianaiftype-mib register specified + * as part of rfc2233 + */ +enum dp_ifnet_iana_type { + DP_IFTYPE_IANA_OTHER = 1, + DP_IFTYPE_IANA_ETHERNETCSMACD = 6, + DP_IFTYPE_IANA_PPP = 23, + DP_IFTYPE_IANA_SOFTWARELOOPBACK = 24, + DP_IFTYPE_IANA_TUNNEL = 131, + DP_IFTYPE_IANA_IEEE8023ADLAG = 161, + DP_IFTYPE_IANA_L2VLAN = 135, + DP_IFTYPE_IANA_BRIDGE = 209, +}; + +/* + * Get the rfc2233 interface type, Assumes valid ifnet pointer + * @param[in] ifp Pointer to ifnet interface + * @return interface type as defined in the ianaiftype-mib + */ +enum dp_ifnet_iana_type dp_ifnet_iana_type(struct ifnet *ifp); + +/* + * Is an interface a member of a bridge + * + * @param[in] ifp Pointer to the interface + * + * @return True if the interface is a bridge member + * @return False if the interface is nota bridge member + */ +bool dp_ifnet_is_bridge_member(struct ifnet *ifp); + +/** + * @brief rfc2233 IfEntry counters + * + * See https://tools.ietf.org/html/rfc2233 + */ +struct dp_ifnet_mib_counters { + uint64_t dp_ifnet_mib_counter_inoctets; + uint64_t dp_ifnet_mib_counter_inucastpkts; + uint64_t dp_ifnet_mib_counter_inmulticastpkts; + uint64_t dp_ifnet_mib_counter_inbroadcastpkts; + uint64_t dp_ifnet_mib_counter_indiscards; + uint64_t dp_ifnet_mib_counter_inerrors; + uint64_t dp_ifnet_mib_counter_inunknownprotos; + uint64_t dp_ifnet_mib_counter_outoctets; + uint64_t dp_ifnet_mib_counter_outucastpkts; + uint64_t dp_ifnet_mib_counter_outmulticastpkts; + uint64_t dp_ifnet_mib_counter_outbroadcastpkts; + uint64_t dp_ifnet_mib_counter_outdiscards; + uint64_t dp_ifnet_mib_counter_outerrors; +}; + +/** + * @brief Get MIB counters for an interface + * + * @note outmulticastpkts and outbroadcastpkts will always 0 + * + * @param ifp[in] interface to get counters for + * @param counter[out] the counters to populate + * @return 0 if counters populated. Non zero if not. + */ +int dp_ifnet_mib_counters(struct ifnet *ifp, + struct dp_ifnet_mib_counters *counters); + +/** + * Iterator function for walk of interface addresses + * + * @param[in] addr address associated with the interface + * @param[in] prefixlen prefix length of the address + * @param[in] arg opaque caller context + * + * @return 0 on success. Non zero terminates walk + */ +typedef int dp_ifnet_addr_iter_func_t(struct sockaddr *addr, uint8_t prefixlen, + void *arg); + +/** + * Walk all addresses on an interface + * + * @param[in] ifp interface to walk addresses on + * @param[in] func function to call for each address + * @param[in] arg opaque caller context + * + * @return 0 on success. Non zero, walk was terminated. + */ +int dp_ifnet_addr_walk(struct ifnet *ifp, dp_ifnet_addr_iter_func_t func, + void *arg); + +/* + * Interface output function to transmit packet on a given output + * interface. This function assumes a fully formed L2 frame and + * will simply transmit the packet without attempting any resolution. + * + * @param[in] in_ifp Input interface pointer + * @param[in] m Pointer to mbuf + * @param[in] out_ifp Output interface pointer + * @param[in] proto Ethernet protocol + */ +void dp_ifnet_output(struct ifnet *in_ifp, struct rte_mbuf *m, + struct ifnet *out_ifp, uint16_t proto); + +/* + * @brief Get the mac address of an interface + * + * @param[in] ifp Interface pointer + * @param[out] eth_addr Ethernet address + * + * @return 0 on success. + */ +int dp_ifnet_get_mac_addr(struct ifnet *ifp, struct rte_ether_addr *eth_addr); + +#endif /* VYATTA_DATAPLANE_INTERFACE_H */ diff --git a/include/ip.h b/include/ip.h new file mode 100644 index 00000000..ffb02d8a --- /dev/null +++ b/include/ip.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2020-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#ifndef VYATTA_DATAPLANE_IP_H +#define VYATTA_DATAPLANE_IP_H + +#include +#include + +#include "compiler.h" + +/* + * IPv4 and IPv6 related defines and helpers. + */ + +/* + * The IPv6 version is the 4 most significant bits of the first byte in + * the header. + */ +#define IPV6_VERSION 0x60 +#define IPV6_VERSION_MASK 0xf0 + +/* The default hoplimit for locally generated IPv6 packets */ +#define IPV6_DEFAULT_HOPLIMIT 64 + +/* Hoplimit for IPv6 packets that should remain on-link */ +#define IPV6_ONLINK_HOPLIMIT 255 + +/* + * Generate a random id to use in the Identification field of the IPv4 header. + * + * @param[in] salt Additional data to help randomise the returned value. + * + * @return A random id in the range of 0..65535 + */ +uint16_t dp_ip_randomid(uint16_t salt); + +union addr_u { + struct in_addr ip_v4; + struct in6_addr ip_v6; +}; + +/* structure to be used by functions that can take either IPv4 or IPv6 addr */ +struct ip_addr { + /* + * AF_INET or AF_INET6 + */ + uint32_t type; + union addr_u address; +}; + +static inline bool addr_u_eq_v4(const union addr_u *addr1, + const union addr_u *addr2) +{ + return addr1->ip_v4.s_addr == addr2->ip_v4.s_addr; +} + +static inline bool addr_u_eq_v6(const union addr_u *addr1, + const union addr_u *addr2) +{ + return IN6_ARE_ADDR_EQUAL(&addr1->ip_v6, &addr2->ip_v6); +} + +/* + * Check if 2 addresses are equal. + * + * @param[in] addr1 The first address. + * @param[in] addr2 The second address. + * + * @return true if they are equal and the type is set to AF_INET or AF_INET6 + * @return false if not equal or type is not set to a valid value. + * + */ +static inline bool dp_addr_eq(const struct ip_addr *addr1, + const struct ip_addr *addr2) +{ + if (addr1->type != addr2->type) + return false; + + if (addr1->type == AF_INET) + return addr_u_eq_v4(&addr1->address, &addr2->address); + + if (addr1->type == AF_INET6) + return addr_u_eq_v6(&addr1->address, &addr2->address); + + return false; +} + +/* + * Check if 2 IPv6 prefixes are equal. + * + * @param[in] a1 The first prefix. + * @param[in] a2 The second prefix. + * @param[in] prefix_len The length prefix to compare. + * + * @return true if the prefixes are equal up to prefix_len. + * @return false if prefixes are not equal up to prefix_len. + */ +static inline bool dp_in6_prefix_eq(const struct in6_addr *a1, + const struct in6_addr *a2, + unsigned int prefix_len) +{ + const uint32_t *p1 = a1->s6_addr32; + const uint32_t *p2 = a2->s6_addr32; + + while (prefix_len >= 32) { + if (*p1++ != *p2++) + return false; + prefix_len -= 32; + } + + if (likely(prefix_len == 0)) + return true; + + uint32_t m = htonl(~0ul << (32 - prefix_len)); + + /* find bits that differ, and mask in network byte order */ + return ((*p1 ^ *p2) & m) == 0; +} + +#endif /* VYATTA_DATAPLANE_IP_H */ diff --git a/include/ip_checksum.h b/include/ip_checksum.h new file mode 100644 index 00000000..f329c4bf --- /dev/null +++ b/include/ip_checksum.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#ifndef VYATTA_DATAPLANE_IP_CHECKSUM_H +#define VYATTA_DATAPLANE_IP_CHECKSUM_H + +#include +#include + +/* + * Checksum an IPv4 header. + * + @return The complemented checksum to set in IP header + + */ +uint32_t dp_in_cksum_hdr(const struct iphdr *ip); + +/* + * Set checksum for IPv4 header. + */ +void dp_set_cksum_hdr(struct iphdr *ip); + +/* + * Checksum a TCP, UDP or ICMP IPv4 packet. + * + * The IPv4 header should not contains options. The layer 4 checksum + * must be set to 0 in the packet by the caller. The l4 header must be + * in the first mbuf. + * + * @param m [in] Pointer to mbuf chain + * @param ip [in] Pointer to the contiguous IP header. Set to NULL for + * ICMP (the pseudo hdr is not checksummed) + * @param l4_hdr [in] Pointer to the beginning of the L4 header + * + * @return The complemented checksum to set in the L4 header + */ +uint16_t +dp_in4_cksum_mbuf(const struct rte_mbuf *m, const struct iphdr *ip, + const void *l4_hdr); + +/* + * Checksum a TCP, UDP or ICMP IPv6 packet. + * + * The layer 4 checksum must be set to 0 in the packet by the + * caller. The l4 header must be in the first mbuf. + * + * @param m [in] Pointer to mbuf chain + * @param ip [in] Pointer to the contiguous IPv6 header. + * @param l4_hdr [in] Pointer to the beginning of the L4 header + * + * @return The complemented checksum to set in the L4 header + */ +uint16_t +dp_in6_cksum_mbuf(const struct rte_mbuf *m, const struct ip6_hdr *ip, + const void *l4_hdr); + +#endif /* VYATTA_DATAPLANE_IP_CHECKSUM_H */ + diff --git a/include/ip_forward.h b/include/ip_forward.h new file mode 100644 index 00000000..4e6759e8 --- /dev/null +++ b/include/ip_forward.h @@ -0,0 +1,345 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + */ +#ifndef VYATTA_DATAPLANE_IP_FORWARD_H +#define VYATTA_DATAPLANE_IP_FORWARD_H + +#include +#include +#include + +#include "interface.h" +#include "ip.h" +#include "vrf.h" +#include "fal_plugin.h" + +/* + * This file declares IP forwarding, nexthop related APIs and route tracker + * APIs exported by dataplane for IPv4 and IPv6 address families + */ + +/* + * Holds parameters needed for calculating ecmp hash + */ +struct ecmp_hash_param { + const struct ip_addr src_ip; + const struct ip_addr dst_ip; + uint32_t src_port; + uint32_t dst_port; + uint8_t protocol; +}; + +/* + * Forward declaration of next_hop structure for Ipv4 and IPv6 + */ +struct next_hop; + +typedef void (*tracker_change_notif)(void *cb_ctx); + +/* + * Tracker information for route resolution + */ +struct rt_tracker_info; + +/* + * Get tracking status from RT Tracker + * @param[in] rt_info Route tracker information + * + * @return true if being tracking, false otherwise. + */ +bool dp_get_rt_tracker_tracking(struct rt_tracker_info *rt_info); + +/* + * Get tracking status from RT Tracker + * @param[in] rt_info Route tracker information + * + * @return Index of NH. + */ +uint32_t dp_get_rt_tracker_nh_index(struct rt_tracker_info *rt_info); + +/* + * Add a tracker to track the route resolution of a given destination + * + * @param[in] vrf VRF of the destination to be tracked + * @param[in] addr IP address to be tracked + * @param[in] cb_ctx Context for the callback + * @param[in] cb Registered callback, in case there are changes + * + * @return Tracker Information for route + */ +struct rt_tracker_info *dp_rt_tracker_add(struct vrf *vrf, + struct ip_addr *addr, void *cb_ctx, + tracker_change_notif cb); + +/* + * Delete a tracker to track the route resolution of a given destination + * + * @param[in] vrf VRF of the destination to be tracked + * @param[in] addr IP address to be tracked + * @param[in] cb_ctx Context for the callback + */ +void dp_rt_tracker_delete(const struct vrf *vrf, struct ip_addr *addr, + void *cb_ctx); + +/* + * IPv4 route lookup function for a given destination for the table id passed. + * Route table is identified by the tbl_id parameter. Table ids values + * as defined by Linux kernel(rtnetlink.h) are used. RT_TABLE_MAIN = 254 can + * be used for a lookup in the main table. Dataplane uses 1-128 table ids for + * PBR and table ids above 255 are used for VRF. The table ids for reference + * RT_TABLE_UNSPEC=0, + * 1-128 reserved for PBR + * RT_TABLE_COMPAT=252, + * RT_TABLE_DEFAULT=253, + * RT_TABLE_MAIN=254, + * RT_TABLE_LOCAL=255, + * + * @param[in] dst destination ipv4 address + * @param[in] tbl_id Table id for route lookup + * @param[in] m pointer to mbuf + * + * @return nexthop v4 pointer + */ +struct next_hop *dp_rt_lookup(in_addr_t dst, uint32_t tbl_id, + const struct rte_mbuf *m); + +/* + * Lookup NH information based on NH index, and use the hash in case + * the NH is a multi-path nexthop + * + * @param[in] nhindex NH index + * @param[in] hash Hash value used to obtain the path information in case + * of multi-path nexthop + * @param[out] nh IP address of the next hop + * @param[out] ifindex If index of the outgoing interface + * + * @return 0 for success, otherwise -1 + */ +int dp_nh_lookup_by_index(uint32_t nhindex, uint32_t hash, in_addr_t *nh, + uint32_t *ifindex); + +/* + * IPv6 route lookup function for a given destination for the table id passed. + * Route table is identified by the tbl_id parameter. Table ids values + * as defined by Linux kernel(rtnetlink.h) are used. RT_TABLE_MAIN = 254 can + * be used for a lookup in the main table. Dataplane uses 1-128 table ids for + * PBR and table ids above 255 are used for VRF. The table ids for reference + * RT_TABLE_UNSPEC=0, + * 1-128 reserved for PBR + * RT_TABLE_COMPAT=252, + * RT_TABLE_DEFAULT=253, + * RT_TABLE_MAIN=254, + * RT_TABLE_LOCAL=255, + * + * @param[in] dst destination IPv6 address + * @param[in] tbl_id Table id for route lookup + * @param[in] m pointer to mbuf + * + * @return nexthop pointer + */ +struct next_hop *dp_rt6_lookup(const struct in6_addr *dst, + uint32_t tbl_id, + const struct rte_mbuf *m); + +/* + * Lookup IPv6 NH information based on NH index, and use the hash in case + * the NH is a multi-path nexthop + * + * @param[in] nhindex IPv6 NH index + * @param[in] hash Hash value used to obtain the path information in case + * of multi-path nexthop + * @param[out] nh IPv6 address of the next hop + * @param[out] ifindex If index of the outgoing interface + * + * @return 0 for success, otherwise -1 + */ +int dp_nh6_lookup_by_index(uint32_t nhindex, uint32_t hash, + struct in6_addr *nh, uint32_t *ifindex); + +/* + * Get interface pointer for IPv4 next hop + * + * @param[in] next_hop IPv4 next_hop pointer + * @return interface pointer + * + * @deprecated + */ +struct ifnet * +dp_nh4_get_ifp(const struct next_hop *next_hop); + +/* + * Get address for IPv4 next hop + * + * @param[in] next_hop IPv4 next_hop pointer + * @return the ip_address + */ +const struct in_addr * +dp_nh4_get_addr(const struct next_hop *next_hop); + +/* + * Get interface pointer for IPv6 next hop + * + * @param[in] next_hop IPv6 nexthop pointer + * @return interface pointer + * + * @deprecated + */ +struct ifnet * +dp_nh6_get_ifp(const struct next_hop *next_hop); + +/* + * Get address for IPv6 next hop + * + * @param[in] next_hop IPv6 next_hop pointer + * @return pointer to the ip_address + */ +const struct in6_addr * +dp_nh6_get_addr(const struct next_hop *next_hop); + +/* + * Get interface pointer for the next hop. This should be used instead of the + * v4/v6 specific versions which are now deprecated. + * + * @param[in] next_hop nexthop pointer + * @return interface pointer + */ +struct ifnet * +dp_nh_get_ifp(const struct next_hop *next_hop); + +/* + * IPv6 output function to transmit packet on a given output interface. + * This function will populate the l2 address based on the output + * interface passed. + * + * @param[in] in_ifp Input interface pointer + * @param[in] m pointer to mbuf + * @param[in] out_ifp Output interface pointer + * @param[in] proto protocol + * + * @return True if packet sent , False otherwise + */ +bool +dp_ip6_l2_intf_output(struct ifnet *in_ifp, + struct rte_mbuf *m, + struct ifnet *out_ifp, + uint16_t proto); + +/* + * Function to transmit an IPv6 packet based on the forwarding information + * in the provided next_hop. This function will populate the l2 address + * based on the next_hop passed in. + + * + * @param[in] in_ifp Input interface of the packet. + * @param[in, out] mbuf Pointer to mbuf + * @param[out] nh Next hop that provides information about the output + * interface and the L2 encap. + * @param[in] proto The Layer 2 protocol. + * + * @return True if packet sent , False otherwise + * + */ +bool dp_ip6_l2_nh_output(struct ifnet *in_ifp, struct rte_mbuf *m, + struct next_hop *nh, uint16_t proto); + +/* + * IPv4 output function to transmit packet on a given output interface. + * This function will populate the l2 address based on the output + * interface passed. + * + * @param[in] in_ifp Input interface pointer + * @param[in] m pointer to mbuf + * @param[in] out_ifp Output interface pointer + * @param[in] proto protocol + * @return True if packet sent, False otherwise + */ +bool +dp_ip_l2_intf_output(struct ifnet *in_ifp, + struct rte_mbuf *m, + struct ifnet *out_ifp, + uint16_t proto); + +/* + * Function to transmit an IPv4 packet based on the forwarding information + * in the provided next_hop. This function will populate the l2 address + * based on the next_hop passed in. + * + * @param[in] in_ifp Input interface of the packet. + * @param[in, out] mbuf Pointer to mbuf + * @param[out] nh Next hop that provides information about the output + * interface and the L2 encap. + * @param[in] proto The Layer 2 protocol. + * + * @return True if packet sent , False otherwise + * + */ +bool dp_ip_l2_nh_output(struct ifnet *in_ifp, struct rte_mbuf *m, + struct next_hop *nh, uint16_t proto); + +/** + * Calculate ecmp hash with parameters held in structure + * 'struct ecmp_hash_param'. + * + * @param[in] hash_param Const poniter to data structure holding parameters + * for ecmp hash calculation + * + * @return hash value + */ +uint32_t dp_ecmp_hash(const struct ecmp_hash_param *hash_param); + +enum dp_rt_path_unusable_key_type { + DP_RT_PATH_UNUSABLE_KEY_INTF, + DP_RT_PATH_UNUSABLE_KEY_INTF_NEXTHOP, +}; + +struct dp_rt_path_unusable_key { + enum dp_rt_path_unusable_key_type type; + uint32_t ifindex; + struct ip_addr nexthop; +}; + +enum dp_rt_path_state { + DP_RT_PATH_USABLE, + DP_RT_PATH_UNUSABLE, + DP_RT_PATH_UNKNOWN, +}; + +/* + * Callback function to tell if a plugin has usability info for a path. + * + * @return DP_RT_PATH_USABLE is the plugin has state for this + * path and knows it is USABLE + * @return DP_RT_PATH_UNUSABLE is the plugin has state for this path + * and knows it is UNUSABLE + * @return DP_RT_PATH_UNKNOWN is the plugin has no state for this path, + * or has state and doesn't yet know if it is usable. + */ +typedef enum dp_rt_path_state +(dp_rt_get_path_state_fn)(const struct dp_rt_path_unusable_key *key); + +/* + * Register a callback function that can be used to query the usability + * state of a given path. Every plugin that signals the usability of a + * path should provide a callback to allow querying of the usabilty of paths. + * + * @return -EINVAL if the parameters are not valid. + */ +int dp_rt_register_path_state(const char *source, + dp_rt_get_path_state_fn *get_state_fn); + +/* + * Mark a path as unusable. This must be called from a thread that is + * registered with rcu, and rcu_online. + * + * @param[in] source The caller of the API + * @param[in] state The state of the path. Should be either USABLE or UNUSABLE. + * @param[in] key The key of the paths that have become unusable. + */ +void dp_rt_signal_path_state(const char *source, + enum dp_rt_path_state state, + const struct dp_rt_path_unusable_key *key); + +#endif /* VYATTA_DATAPLANE_IP_FORWARD_H */ diff --git a/include/json_writer.h b/include/json_writer.h index 65d98d54..9d59c5cc 100644 --- a/include/json_writer.h +++ b/include/json_writer.h @@ -4,15 +4,15 @@ * This takes care of the annoying bits of JSON syntax like the commas * after elements * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ -#ifndef JSON_WRITER_H -#define JSON_WRITER_H +#ifndef VYATTA_DATAPLANE_JSON_WRITER_H +#define VYATTA_DATAPLANE_JSON_WRITER_H #include #include @@ -43,7 +43,7 @@ void jsonw_null(json_writer_t *self); /* Useful Combinations of name and value */ void jsonw_string_field(json_writer_t *self, const char *prop, const char *val); -void jsonw_bool_field(json_writer_t *self, const char *prop, bool value); +void jsonw_bool_field(json_writer_t *self, const char *prop, bool val); void jsonw_float_field(json_writer_t *self, const char *prop, double num); void jsonw_uint_field(json_writer_t *self, const char *prop, uint64_t num); void jsonw_int_field(json_writer_t *self, const char *prop, int64_t num); @@ -59,4 +59,4 @@ void jsonw_end_array(json_writer_t *self); /* Override default exception handling */ typedef void (jsonw_err_handler_fn)(const char *); -#endif /* JSON_WRITER_H */ +#endif /* VYATTA_DATAPLANE_JSON_WRITER_H */ diff --git a/include/lcore_sched.h b/include/lcore_sched.h new file mode 100644 index 00000000..ef8a9f94 --- /dev/null +++ b/include/lcore_sched.h @@ -0,0 +1,337 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef VYATTA_DATAPLANE_LCORE_SCHED_H +#define VYATTA_DATAPLANE_LCORE_SCHED_H + +#include +#include +#include + +#include "compiler.h" + +/* + * Intro + * ===== + * + * The dataplane has 2 main types of threads. Those which run on dedicated + * lcores and are typically used for forwarding packets and are referred to + * as forwarders. The second type is those that are used for all other + * work. + * + * Forwarding threads + * ================== + * + * The forwarding threads run on dedicated logical cores (lcores) that have + * no other processing occurring on them other than some kernel threads that + * can not be moved. There is one forwarding thread per lcore. If the + * thread is not being used as there is no work assigned to it then the lcore + * it is associated with is made available for the rest of the system. + * + * The goal of the forwarding threads is to do the packet processing as + * quickly as possible so that we can process as many packets as possible with + * very low latency. + * + * A forwarding thread can have multiple pieces of work, and in that case + * it will round robin over each work source checking if there is work to + * do, and doing it. The sources of work are + * - interface rx queues. Check if packets have arrived and process them + * - interface tx queues. Check if there are packets to send, and send them + * - crypto. Check if there are packets that have been queued for crypto + * processing, and process them. + * + * The user can set the lcores that the interface processing should happen + * on on a system basis, or on a per interface basis. The set of lcores to + * use for crypto processing can also be specified. If these are set then + * they are respected, and these lcores can only be used for the specified + * work items types. If these are not set then each work item will be + * assigned an lcore as it is created. The system assigns weights to each work + * item and tries to give each lcore an equal workload. + * + * There are features other than packet processing that either require + * significant scale, or low latency that are ideal candidates for running + * on a dedicated forwarder. API are provided to allow this. When these APIs + * are used the lcore becomes dedicated to the feature that requests it and + * will do no further packet processing. An lcore that has configuration + * specifying either interface or crypto processing on it can not be allocated + * to a feature. An lcore doing crypto work due to arbitrary allocation can + * not be assigned to a feature. An lcore processing interface queues due to + * an arbitrary allocation can be assigned to a feature, and in that case the + * interface queue processing will be moved to a different lcore. + * + * Other threads + * ============= + * By default the other threads in the dataplane all run on the 'main' lcore + * which is lcore 0 by default. This means that they are many threads all + * sharing the same logical core, and probably sharing it with many other + * processes too. If there are features that have work to do in a non + * forwarding lcore, but it is too much for a single lcore to do then the + * feature can request a forwarding thread where it can do this work. + */ + +/* Like rte_lcore_id() + * but for all non-dataplane threads returns 0 instead of LCORE_ID_ANY + */ +RTE_DECLARE_PER_LCORE(unsigned int, _dp_lcore_id); +static ALWAYS_INLINE +unsigned int dp_lcore_id(void) +{ + return RTE_PER_LCORE(_dp_lcore_id); +} + +/* + * A callback function type for the foreach lcore iterator funcs. + * + * @param lcore The id of the lcore the callback is for. + * @param arg Argument passed through to allow the caller to + * provide state to the callback. + * + * @return 0 for success, -ve for error. + */ +typedef int (dp_per_lcore_fn)(unsigned int lcore, void *arg); + +/* + * Iterator functions to run a callback for each of the lcores. + * Depending on the iterator it runs for either all lcores or just the + * forwarding lcores. (Forwarding lcores are all apart from the main lcore) + * Note that it will run for all the lcores even those that are not currently + * active. + * + * @param func Callback function to call per lcore + * @param arg State that is passed through to the callback function. + * + * @return 0 for success. If any of the callback functions return a non + * zero value then the walk will stop and that value will be returned. + */ +int dp_foreach_lcore(dp_per_lcore_fn *fn, void *arg); +int dp_foreach_forwarding_lcore(dp_per_lcore_fn *fn, void *arg); + + +/* + * Is the given lcore active. The main lcore is always active. A forwarding + * lcore may be active or inactive. + * + * @return True if the lcore is active + * @return False if the lcore is inactive or an invalid lcore id was given. + */ +bool dp_lcore_is_active(unsigned int lcore); + +/* + * Structure holding callbacks that can be used to create/delete feature + * state when a lcore becomes active/inactive. + */ +struct dp_lcore_events { + /* + * Function called when a new lcore becomes active. The arg is passed + * through from the registration call. + */ + int (*dp_lcore_events_init_fn)(unsigned int lcore_id, void *arg); + /* + * Function called when a lcore becomes inactive. The arg is passed + * through from the registration call. + */ + int (*dp_lcore_events_teardown_fn)(unsigned int lcore_id, void *arg); +}; + +/* + * Register callbacks to be called for each lcore that is active. + * As lcores become active/inactive the registered init/teardown funcs will + * be called. This allows features to have per lcore state where they need + * to keep stats/state on each lcore. + * + * The callback happens on the lcore that has been made active/inactive. + * The callbacks do not get called for already active lcores. Registration + * needs to be done before the forwarding lcores are made active if that is + * required. + * + * This function must be called on the main thread. + * + * @param[in] events Structure containing the per event callbacks. + * @param[in, out] arg Argument structure passed through to the callbacks. + * + * @return 0 on success + * -EINVAL for invalid arguments + * -ENOMEM if not enough memory to register the callbacks. + */ +int dp_lcore_events_register(const struct dp_lcore_events *events, + void *arg); + +/* + * Unregister a previously registered set of callbacks. + * + * This function must be called on the main thread. + * + * @param[in] events The set of pointers that were previously registered. + * + * @return 0 on success + * -EINVAL for invalid arguments + * -ENOENT if there was no existing entry + */ +int dp_lcore_events_unregister(const struct dp_lcore_events *events); + + +/* + * The possible uses of lcores in the dataplane. There is one main thread + * that always runs on the main lcore. All threads that this creates also + * run on the main lcore. All the other lcores can be used as forwarders + * or for features. + */ +enum dp_lcore_use { + /* + * The main thread. This will run on the lowest number lcore, + * typically 0, but this can be changed by config. + */ + DP_LCORE_MAIN, + /* + * A packet forwarder. This thread should be processing packets + * as fast as possible, with low latency. Should avoid syscalls + * and long delays. Processes interface and crypto queues. + */ + DP_LCORE_FORWARDER, + /* + * An lcore dedicated to a feature because it needs too much processing, + * or has certain latency requirements. + */ + DP_LCORE_FEATURE, + /* + * An lcore in none of the above states, or perhaps one that doesn't + * exist. + */ + DP_LCORE_INVALID, +}; + +/* + * What is the given lcore being used for + * + * @param[in] lcore The lcore to return the state of. + * + * @return The state of the given lcore or DP_LCORE_INVALID if not in + * any of the other states. + */ +enum dp_lcore_use dp_lcore_get_current_use(unsigned int lcore); + +#define DP_LCORE_FEAT_MAX_NAME_SIZE 16 +struct dp_lcore_feat { + /* + * The name of the feature that is using this lcore. + */ + char name[DP_LCORE_FEAT_MAX_NAME_SIZE]; + /* + * Function to run the feature work on the given lcore. This function + * is expected to loop doing the feature work. It should return only + * when the feature is being unconfigured. + * + * @param[in] lcore_id The lcore this function is running on. This is + * useful when there is lcore specific state being stored. + * @param[in] arg Context argument passed through to function. + * + * @return 0 on success, -ve on error. + */ + int (*dp_lcore_feat_fn)(unsigned int lcore_id, void *arg); + /* + * The dataplane can be asked to report stats on a regular basis. + * If the feature wants to report those stats then it should + * populate one or both of these functions. + * + * @param[in] lcore_id The lcore to get the stats for + * @param[out] pkts The number of packets rx/tx'ed on this lcore. + * + * Note: not all features deal in packets/bytes. If the feature needs + * to report something different, for example 'sessions' + * then it should store this in the pkts field and ignore the + * bytes field. + */ + void (*dp_lcore_feat_get_rx)(unsigned int lcore_id, + uint64_t *pkts); + void (*dp_lcore_feat_get_tx)(unsigned int lcore_id, + uint64_t *pkts); +}; + + +/* + * Change the given lcore to being a feature lcore. There + * are some restrictions here: + * - Only lcores of type DP_LCORE_FORWARDER can be changed. + * - There must be at least one lcore of type DP_LCORE_FORWARDER remaining + * after the change. + * - lcores doing crypto work can not be changed. + * + * @param[in] lcore The lcore to make a DP_LCORE_FEATURE lcore. + * @param[in] dp_lcore_feat Structure holding function pointers for + * this feature. + * + * @return 0 for success + * -EBUSY if the lcore is a FORWARDER and has been configured + * for interface/crypto work + * -ve for failure + */ +int +dp_allocate_lcore_to_feature(unsigned int lcore, + struct dp_lcore_feat *feat); + +/* + * Change a lcore that has been allocated to features back to being + * a forwarding lcore. + * + * @param[in] lcore The lcore to return back to a forwarder. + * + * @return 0 for success + * -ve for failure + */ +int dp_unallocate_lcore_from_feature(unsigned int lcore); + +/* + * Set up a per lcore packet bust. A pkt_burst is used to store a batch of + * packets that are all being sent to the same place, for example out of + * the same interface. Each forwarding lcore sends packets to an lcore + * specific packet burst as an interim step on the way to sending the packet. + * If the packet burst gets full all the packets in it are immediately sent. + * If a packet is added to a packet burst and the output interface is + * different to the previously added packet then the packets in the burst + * are sent and the new packet it then added as the only packet in the burst. + * If the burst is not filled, or the interface does not change then the + * packets in the burst are send within a reasonable timeframe. + * + * This is there as an optimisation so that the cost of enqueuing packets onto + * the output rings of the interfaces is amortised over multiple packets. All + * lcores that are packet forwarders have their own packet bursts. + * + * This API allows a user to create a packet burst on an lcore that is dedicated + * to a feature. It creates the burst on the lcore it is called on. + */ +void dp_pkt_burst_setup(void); + +/* + * Free the packet burst associated with this lcore. + */ +void dp_pkt_burst_free(void); + +/* + * APIs that send packets out of an interface typically put them on the + * intermediate pkt_burst for performance reasons. If a feature has + * latency requirements and is generating packets then they can force them + * to be sent to the interface immediately by calling this function. The + * burst that is flushed is the one that is on the lcore that this call + * is made on. + */ +void dp_pkt_burst_flush(void); + +/** + * Is this the main thread. + * + * @return true if main thread. + * false it not the main thread. + */ +bool is_main_thread(void); + +/* + * Assert that this is the main thread. Kill the process if not + */ +#define ASSERT_MAIN() \ +{ if (!is_main_thread()) rte_panic("not on main thread\n"); \ +} + +#endif /* VYATTA_DATAPLANE_LCORE_SCHED_H */ diff --git a/include/meson.build b/include/meson.build new file mode 100644 index 00000000..0a83fc74 --- /dev/null +++ b/include/meson.build @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + +install_headers( + 'bridge_flags.h', + 'bridge_vlan_set.h', + 'compiler.h', + 'config.h', + 'debug.h', + 'dpi.h', + 'dp_session.h', + 'event.h', + 'fal_bfd.h', + 'fal_plugin.h', + 'feature_commands.h', + 'feature_plugin.h', + 'interface.h', + 'ip.h', + 'ip_checksum.h', + 'ip_forward.h', + 'json_writer.h', + 'lcore_sched.h', + 'pktmbuf.h', + 'pipeline.h', + 'protobuf_util.h', + 'rcu.h', + 'urcu.h', + 'vrf.h', + subdir: meson.project_name() +) + +public_include = include_directories('.') + +install_headers( + 'dp_test/dp_test_cmd_check.h', + 'dp_test/dp_test_lib.h', + 'dp_test/dp_test_lib_intf.h', + 'dp_test/dp_test_macros.h', + 'dp_test/dp_test_netlink_state.h', + 'dp_test/dp_test_pktmbuf_lib.h', + 'dp_test/dp_test_firewall_lib.h', + 'dp_test/dp_test_session_lib.h', + subdir: meson.project_name() / 'dp_test' +) + +public_test_include = include_directories('dp_test') diff --git a/include/pipeline.h b/include/pipeline.h new file mode 100644 index 00000000..3bfc4bd5 --- /dev/null +++ b/include/pipeline.h @@ -0,0 +1,541 @@ +/* + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#ifndef VYATTA_DATAPLANE_PIPELINE_H +#define VYATTA_DATAPLANE_PIPELINE_H + +#include + +/* + * The pipeline is the part that of the feature plugin that interacts + * with packets. When a packet is received it enters the pipeline where + * it makes its way through the nodes in a graph. Each node does its + * specific processing before passing the packet on to the next node + * Once the packet reaches an output node it is finished. + * + * The graph is constructed at dataplane start, using all the builtin nodes + * plus any that are added by the feature plugins. + * + * NODES + * ===== + * + * Each node declares the set of possible next nodes in the graph, and when + * a packet it being processed by a node it must transition to one of possible + * next-nodes. The next nodes are typically of the form: drop, consume, + * accept. They specify what this node has decided to do with the packet. + * They do not typically specify the overall order of the graph. That is done + * by the features. + * + * A node has a type that defines the behaviour for that node. Most nodes + * that are added as part of plugins will be of type PL_PROC. + * + * A node has a domain and a name. The default domain for all builtin nodes + * is 'vyatta' and this must not be used for nodes added by feature plugins. + * + * An example built in node (using the default domain 'vyatta') is: + * + * /----------------------\ + * | | -> ACCEPT (ipv4-route-lookup) + * | vyatta:ipv4-validate | -> DROP (term-drop) + * | | -> CONSUME (term-finish) + * \----------------------/ + * + * An example feature-plugin could be of the form: + * + * /--------------\ + * | | -> ACCEPT (term-noop) + * | domain:feat1 | -> CONSUME (term-finish) + * | | -> DROP (term-drop) + * | | -> DROP (domain1:drop-and-count) + * \--------------/ + * + * Where the feature will either decide to either: + * - accept the packet, and move to the next step of processing + * - consume the packet (its processing is now finished) + * - drop the given packet using the existing drop node. + * - drop the packet with a custom drop node (which is registered as another + * node) + * + * A node is registered using: pipeline_register_node() + * + * + * FEATURES + * ======== + * + * A node can be declared to be a feature-point node. When this happens there + * are some extra handler functions added and these allow features to be run + * on packets at that feature-point node. + * + * A feature is a node that is run from a feature-point. + * + * When a node is declared as a feature-point it will run a sub-graph + * of all the features for that feature-point, and after processing of that + * sub graph it will call its next node (unless the subgraph has called an + * output node). + * + * When the graph is built each feature point will sort the features + * associated with it based on the ordering constraints specified at + * feature registration. + * + * Feature nodes can be dynamically enabled/disabled, and during packet + * processing only the ones that are enabled will be called. + * + * There are some well known feature-points that are in the builtin graph + * and these are the ones that plugin features will be associated with. + * + * An example of this is the ipv4-validate node shown in the NODES section + * above. + * + * If we register our example feature node from above with it the graph + * for this part would become: + * + * + * /----------------------\ + * | | -> (feature_subgraph) -> ACCEPT (ipv4-route-lookup) + * | | -> CONSUME + * | | + * | vyatta:ipv4-validate | -> DROP + * | | -> CONSUME + * \----------------------/ + * + * And the feat1 node would be called if ipv4-validate does not drop or + * consume. It would then be run (along with any other features at this + * feature-point) and as long as they have a next node of ACCEPT (noop) + * then the subsequent features would be run. + * + * /--------------\ + * | | -> ACCEPT (term-noop) + * | domain:feat1 | -> CONSUME (term-finish) + * | | -> DROP (term-drop) + * | | -> DROP (domain1:drop-and-count) + * \--------------/ + * + * So in this case domain:feat1 would run if the ipv4_validate node + * successfully validated the packet. This feat1 node then does its + * processing and returns on of the 4 next node values. If the return + * is the ACCEPT then the ipv4-validate node will also return ACCEPT + * and move onto the ipv4-route-lookup. If it is any of the others the + * ipv4-validate node will return CONSUME (as the subgraph has consumed + * the packet and any drop counters etc have already been incremented + * by the subgraph. + * + * Once a feature is registered it is part of the graph, but it will only + * be visited if that feature has been enabled. + * + * A feature is registered using: pipeline_register_feature() + * A feature is enabled using: pipeline_enable_feature() + * pipeline_enable_feature_by_inst() + * + * A feature is disabled using: pipeline_disable_feature() + * pipeline_disable_feature_by_inst() + */ + +/* + * A pipeline node must have a type specified as part of the registration. + */ +enum pl_node_type { + /* + * The most common node type. This is the type that gives standard + * processing through the graph, where the node does some work + * then moves onto the next node. + */ + PL_PROC = 0, + /* + * A terminal node. When the graph walk reaches a node of this type + * the walk finishes. A node of this type has no next nodes registered. + */ + PL_OUTPUT, + /* + * A special type of node for making transitions. A node of this + * type has no next nodes registered. A node of this type is + * typically used as a next node for a (feature) that wants processing + * to continue. + */ + PL_CONTINUE, +}; + +/* + * The maximum number of storage entries on a pl_packet. + */ +#define PL_NODE_STORE_MAX 4 + +/* + * These are carry over from existing + * pipeline functionality but should be + * refactored out of existence if possible. + */ +enum validation_flags { + NEEDS_EMPTY = 0x0, + NEEDS_SLOWPATH = 0x1, +}; + +/* + * The structure that contains all the information about a packet. This is the + * structure that will be passed to each pipeline node. + */ +struct pl_packet { + /* + * A pointer to the mbuf that is being processed. + */ + struct rte_mbuf *mbuf; + /* + * A pointer to the Layer 3 header in the mbuf. This is set once + * the packet processing has got as as the L3 processing. It will + * be NULL until then. + */ + void *l3_hdr; + /* + * the type of the packet, unicast, multicast, broadcast + */ + int l2_pkt_type; + /* + * These will be refactored out soon. + */ + enum validation_flags val_flags; + /* + * A pointer to a next hop. If a node makes a forwarding decision then + * this can be stored here, and this is the next hop that the packet + * will use when forwarded. Note that later features may overwrite this + * decision. + */ + union { + struct next_hop *v4; + struct next_hop *v6; + } nxt; + /* + * Pointer to the input interface for this packet + */ + struct ifnet *in_ifp; + /* + * Pointer to the output interface to use for this packet. + */ + struct ifnet *out_ifp; + /* + * The table to use for forwarding the packet. This can be either + * a PBR table or the main table from a different VRF. + */ + uint32_t tblid; + /* + * NPF feature state. This should not be modified by feature_plugins. + */ + uint16_t npf_flags; + /* + * The L2 protocol, for example ETH_P_IP. This is not always set. + */ + uint16_t l2_proto; + /* + * A count of how many of the data storage nodes have been used + * for this packet. + */ + int max_data_used; + /* + * An array of pointers to store data. These can be used by nodes + * to store data that is (potentially) needed by a node later in + * the graph. + */ + void *data[PL_NODE_STORE_MAX]; +} __rte_cache_aligned; + +/* + * A callback function for packet processing in a node. + * + * When registering a node a packet processing function of this type + * is provided. This function is the one that does all the work of the + * node. + * + * @param[in,out] packet The structure that contains the packet being + * processed and all the related state. The function + * can modify the contents to of the packet/state as + * required. + * + * @param[in, out] context Pointer to the context registered for this node + * instance by the call to + * dp_pipeline_register_node_instance_storage(). If no + * context was registered then this will be NULL. + * + * @return The index of the next node. This is based on the set of next + * nodes provided at registration time. + */ +typedef unsigned int +(pl_proc) (struct pl_packet *packet, void *context); + +/* + * Register a new pipeline node. If called during the startup sequence of + * the dataplane this node will be inserted into the graph. The graph will + * be calculated and verified once all plugins are loaded. + * + * @param[in] name The name for this node. This is comprised of a domain and + * a name, separated by a colon. For example 'my_domain:feat1' + * @param[in] num_next_nodes The number of next nodes that this node can have. + * @param[in] next_nodes_names An array of strings of size num_next_nodes. Each + * entry in the array is the name of a possible next node. This next + * node name can optionally include a domain. If it does this is of + * the format :. If it does not then the default + * domain (vyatta) will be used. + * @param[in] node_type the type of this node. + * @param[in] handler The function that does the processing for this node. + * It does any required processing and returns the index of the + * next node to use. + * + * @return 0 on success + * -EBUSY if the dataplane has finished initialisation + * -EINVAL if invalid arguments are provided. + * + * The combination of domain and name must be unique. + * + * This function may return success now but there is a further phase of + * validation once all plugins are loaded. For example a node may have a next + * node from a not yet loaded plugin. In this case the behaviour is to return + * success now, and do a final graph validation at the stage where all the + * plugins have been loaded. If at the verification stage the node can not + * be installed properly due to missing next-nodes it will be removed from + * the graph and an error will be logged. + */ +int dp_pipeline_register_node(const char *name, + int num_next_nodes, + const char **next_node_names, + enum pl_node_type node_type, + pl_proc handler); +/* + * If storage for a node instance has been registered and the node + * instance goes away then this callback can be used to cleanup + * so that any memory for the instance is not leaked. + * + * @param[in] instance The instance that is being removed + * + * @param[in] context The context that was registered + */ +typedef void (dp_pipeline_inst_cleanup_cb)(const char *instance, + void *context); + +struct dp_pipeline_feat_registration { + /* + * The name of the plugin. This should be the same as + * the name returned in the dp_feature_plugin_init func. + */ + const char *plugin_name; + /* + * The name for this feature. This is comprised of a domain and + * a name, separated by a colon. For example 'my_domain:feat1' + */ + const char *name; + /* + * The name of the node being used by for this feature. + * This is comprised of a domain and a name, separated by a colon. + * For example 'my_domain:feat1' + */ + const char *node_name; + /* + * feature_point The feature point this feature should use. This + * can optionally include a domain. If it does this is of + * the format :. If it does not then the default + * domain (vyatta) will be used. + */ + const char *feature_point; + /* + * visit_before An optional argument that indicates that this feature + * should be invoked before the named feature. This can optionally + * include a domain. If it does this is of the format + * :. If it does not then the default domain (vyatta) + * will be used. + */ + const char *visit_before; + /* + * visit_after An optional argument that indicates that this feature + * should be invoked after the named feature. This can optionally + * include a domain. If it does this is of the format + * :. If it does not then the default domain (vyatta) + * will be used. + */ + const char *visit_after; + /* + * Only used for the case features. The case value to match + * on. Only features that have a matching value are executed, and + * there can only be one feature registering a given value. + */ + uint32_t value; + /* + * If a feature registers node instance storage then it can provide + * a callback via this field so that the storage can be cleaned up + * if the instance goes away. This field can be NULL. + */ + dp_pipeline_inst_cleanup_cb *cleanup_cb; +}; + +/* + * Register a new pipeline list feature. If called during the startup sequence + * of the dataplane this feature will be created. The graph will be calculated + * and verified once all plugins are loaded. + * + * @param[in] feat Structure containing all the information needed + * to register a feature. + * + * @return 0 on success + * -EBUSY if the dataplane has finished initialisation + * -EINVAL if invalid arguments are provided. + * + * The combination of domain and name that is being registered must be unique. + * All other names that are referred to must exist once all nodes/features + * are registered. + * + * This function may return success now but there is a further phase of + * validation once all plugins are loaded. For example a feature may be + * after another feature that is not yet loaded. In this case the + * behaviour is to return success now, and do a final graph validation at + * the stage where all the plugins have been loaded. If at the verification + * stage there are missing nodes/features from the graph and an error will + * be logged. + * @return 0 on success + * -EBUSY if the dataplane has finished initialisation + * -EINVAL if invalid arguments are provided. + */ +int +dp_pipeline_register_list_feature(struct dp_pipeline_feat_registration *feat); +int +dp_pipeline_register_case_feature(struct dp_pipeline_feat_registration *feat); + +/* + * If a feature wants per instance storage then it can allocate it + * with this API. This will be stored on a per instance basis, and + * will be passed to the processing function in the 'context' parameter. + * + * A cleanup_callback can be registered as part of the feature registration + * as the cleanup is per feature, not per feature per instance. + * + * Note that this must be called on the main thread. + * + * @param[in] name The name of the feature to allocate context for. + * This is comprised of a domain and a name, separated by + * a colon. For example 'my_domain:feat1' + * @param[in] instance The instance to add the context to, for example + * 'dp0s0p1' + * + * @param[in] context A pointer to the context for this node instance. This + * pointer to will be passed to the registred handler + * function for the node in the 'context' parameter. + * This param is optional. + * + * @return 0 on success + * -EINVAL if invalid arguments are provided. + */ +int dp_pipeline_register_inst_storage(const char *name, + const char *instance, + void *context); + +/* + * Unregister per node instance storage. + * + * Note that this must be called on the main thread. + * + * @param[in] name The name of the feature to deallocate storage for. + * This is comprised of a domain and a name, separated by + * a colon. For example 'my_domain:feat1' + * @param[in] instance The instance to remove the context from, for example + * 'dp0s0p1' + * + */ +int dp_pipeline_unregister_inst_storage(const char *node_name, + const char *instance); + +/* + * Get the per node instance storage that was previously registered. + * + * @param[in] name The name of the feature to retireve storage for. + * This is comprised of a domain and a name, separated by + * a colon. For example 'my_domain:feat1' + * @param[in] instance The instance to retrieve the context from, for example + * 'dp0s0p1' + * + * @return A pointer to the context that was registered. + * NULL if no context registered. + */ +void *dp_pipeline_get_inst_storage(const char *node_name, + const char *instance); + +/* + * Enable the given feature on the named instance. Instance names are currently + * interface names. + * + * @param[in] name The name of the feature to enable. This is comprised of + * a domain and a name, separated by a colon. For example + * 'my_domain:feat1' + * @param[in] instance The instance to enable the feature on, for example + * 'dp0s0p1' + * + * @return 0 on success + * -EINVAL if invalid arguments are provided. + */ +int dp_pipeline_enable_feature_by_inst(const char *name, + const char *instance); + +/* + * Disable the given feature on the named instance. Instance names are currently + * interface names. + * + * @param[in] name The name of the feature to disable. This is comprised of + * a domain and a name, separated by a colon. For example + * 'my_domain:feat1' + * @param[in] instance The instance to disable the feature on, for example + * 'dp0s0p1' + * + * @return 0 on success + * -EINVAL if invalid arguments are provided. + */ +int dp_pipeline_disable_feature_by_inst(const char *name, + const char *instance); + +/** + * @brief Determine if the given feature is enabled on the named instance. + * + * @param[in] name The name of the feature to check. This is comprised of + * a domain and a name, separated by a colon. For example + * 'my_domain:feat1' + * @param[in] instance The instance to check for the feature on, for example + * 'dp0s0p1' + * @return true The feature is enabled on the instance + * @return false The feature is disabled on the instance + */ +bool dp_pipeline_is_feature_enabled_by_inst(const char *name, + const char *instance); + +/* + * Enable the given feature globally. + * + * For list features it will enable the feature on all instances of the + * type the feature uses. It will also be enabled on future instances + * as they are created. + * + * For case features it will enable the feature on all instances of the + * type the feature uses. + * + * @param[in] name The name of the feature to enable. This is comprised of + * a domain and a name, separated by a colon. For example + * 'my_domain:feat1' + * @return 0 on success + * -EINVAL if invalid arguments are provided. + */ +int dp_pipeline_enable_global_feature(const char *name); + +/* + * Disable the given feature globally. + * + * For list features it will disable the feature on all instances of the + * type the feature uses. If a feature was enabled per instance, then + * globally, then turned off globally it will remove the feature from all + * instances including the one that was initially enabled on a per instance + * basis. + * + * For case features it will disable the feature on all instances of the + * type the feature uses. + * + * @param[in] name The name of the feature to disable. This is comprised of + * a domain and a name, separated by a colon. For example + * 'my_domain:feat1' + * @return 0 on success + * -EINVAL if invalid arguments are provided. + */ +int dp_pipeline_disable_global_feature(const char *name); + +#endif /* VYATTA_DATAPLANE_PIPELINE_H */ diff --git a/include/pktmbuf.h b/include/pktmbuf.h new file mode 100644 index 00000000..bea2314e --- /dev/null +++ b/include/pktmbuf.h @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2020-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#ifndef VYATTA_DATAPLANE_PKTMBUF_H +#define VYATTA_DATAPLANE_PKTMBUF_H + +#include +#include "vrf.h" + +/* + * Allocate an mbuf from the default pool and initialise it. + * + * @param[in] vrf_id The vrf_id to set in the meta data of the mbuf. + * + * @return + * - The pointer to the new mbuf on success. + * - NULL if allocation failed. + */ +struct rte_mbuf *dp_pktmbuf_alloc_from_default(vrfid_t vrf_id); + +/* + * Get the vrf associated with the packet. + * + * @param[in] m The buffer to get the vrf from. + * + * @return the ID of the vrf the packet is associated with. + */ +vrfid_t +dp_pktmbuf_get_vrf(const struct rte_mbuf *m); + +/* + * Set the vrf associated with the packet. + * + * @param[in] m The buffer to set the vrf on. + * @param[in] vrf_id The vrf to set. + */ +void +dp_pktmbuf_set_vrf(struct rte_mbuf *m, vrfid_t vrf_id); + +/* + * Mark a packet as having been locally generated. Locally generated + * packets may get put in a higher priority qos queue if configured. + * and so this should be set for all locally generated packets. + * + * @param[out] m The buffer to set as locally generated. + */ +void dp_pktmbuf_mark_locally_generated(struct rte_mbuf *m); + +/* + * A macro that points to the start of the L3 data in the mbuf. + * + * The returned pointer is cast to type t. Before using this + * function, the user must ensure that m_headlen(m) is large enough to + * read its data, and must ensure that the L2 length is set in the mbuf. + * + * @param[in,out] m The packet mbuf. + * @param[in,out] t The type to cast the result into. + */ +#define dp_pktmbuf_mtol3(m, t) ((t)(rte_pktmbuf_mtod(m, char *) + \ + (m)->l2_len)) + +/* + * A macro that points to the start of the L4 data in the mbuf. + * + * The returned pointer is cast to type t. Before using this + * function, the user must ensure that m_headlen(m) is large enough to + * read its data , and must ensure that the L2 and L3 lengths are set + * in the mbuf. + * + * @param[in,out] m The packet mbuf. + * @param[in,out] t The type to cast the result into. + */ +#define dp_pktmbuf_mtol4(m, t) ((t)(rte_pktmbuf_mtod(m, char *) + \ + (m)->l2_len + (m)->l3_len)) + +/* + * A macro that returns the length of the L2 header in the mbuf. + * + * The value can be read or assigned. + * + * @param[in,out] m The packet mbuf. + */ +#define dp_pktmbuf_l2_len(m) ((m)->l2_len) + +/* + * A macro that returns the length of the L3 header in the mbuf. + * + * The value can be read or assigned. + * + * @param[in,out] m The packet mbuf. + */ +#define dp_pktmbuf_l3_len(m) ((m)->l3_len) + +/* + * Pointers that the features can use in the invar meta data. Features must + * register for use of this, and they will get returned an index into the + * array if successful. Features should unregister when they no longer need it. + * + * The invar feature meta data is invariant for the lifetime of the packet, + * i.e. even if encapped or decapped, or reswitched through another + * interface. + */ +#define DP_PKTMBUF_MAX_INVAR_FEATURE_PTRS 1 + +/* + * Register for a feature pointer in the packet meta data. + * This must be called on the main thread + * + * @param[in] name The name of the feature registering. Used for debug and for + * unregistering. + * @return 0 or +ve if successful. The return value is the array index assigned. + * -ve for a failure. There was no space available. + */ +int dp_pktmbuf_mdata_invar_feature_register(const char *name); + +/* + * unregister a previously resisted feature pointer in the meta data. + * This must be called on the main thread + * + * @param[in] name The name that was used when registering. + * @param[in] slot The array slot that was given upon registration. + * + * @return 0 for success + * -ve for an error + */ +int dp_pktmbuf_mdata_invar_feature_unregister(const char *name, int slot); + +/* + * Mark the feature_ptr of the given ID as set within the packet meta data and + * set the value. + * + * @param[out] m The mbuf to set the flags in. + * @param[in] feature_id The offset into the array that the feature should use. + * @param[in] ptr Value to store in the meta data. + */ +void +dp_pktmbuf_mdata_invar_ptr_set(struct rte_mbuf *m, + uint32_t feature_id, + void *ptr); + +/* + * Check if the given feature pointer is set within the packet meta data. + * If it is then return the stored value. + * + * @param[out] m The mbuf to check + * @param[in] feature_id The offset into the array to be checked + * @param[out] ptr Place to return the ptr in. + + * @return True if the feature pointer is set. Return the value in *ptr. + * False if the feature pointer is not set. In this case ptr will + * not be changed. + */ +bool +dp_pktmbuf_mdata_invar_ptr_get(const struct rte_mbuf *m, + uint32_t feature_id, + void **ptr); + +/* + * Clear the given feature pointer flag within the packet meta data + * + * @param[out] m The mbuf to clear the flags in. + * @param[in] feature_id The offset into the array to clear. + */ +void +dp_pktmbuf_mdata_invar_ptr_clear(struct rte_mbuf *m, + uint32_t feature_id); + +#endif /* VYATTA_DATAPLANE_PKTMBUF_H */ diff --git a/include/protobuf.h b/include/protobuf.h deleted file mode 100644 index 6492ab7d..00000000 --- a/include/protobuf.h +++ /dev/null @@ -1,56 +0,0 @@ -/*- - * Copyright (c) 2018, AT&T Intellectual Property. - * All rights reserved. - * - * SPDX-License-Identifier: LGPL-2.1-only - */ -#ifndef PROTOBUF_H -#define PROTOBUF_H - -int pb_cmd(void *data, size_t size, FILE *f); - -/* command structure */ -struct pb_msg { - /* input */ - void *msg; - size_t msg_len; - - /* output */ - FILE *fp; -}; - -typedef int -(pb_cmd_proc)(struct pb_msg *cmd); - -struct pb_msg_handler { - uint32_t version; - const char *cmd; - pb_cmd_proc *handler; -}; - -int -pb_add_command(const struct pb_msg_handler *cmd); - -void -pb_register_cmd_err(const char *cmd); - - -#define PB_REGISTER_CMD(x, ...) \ - __VA_ARGS__ struct pb_msg_handler x; \ - static void __pb_add_command_##x(void) \ - __attribute__((__constructor__)); \ - static void __pb_add_command_##x(void) \ - { if (pb_add_command(&x) != 0) \ - pb_register_cmd_err(x.cmd); } \ - __VA_ARGS__ struct pb_msg_handler x - -/* - * Use pb_cmd_err instead of fprintf(msg->fp, ""). msg->fp may be NULL - * if a command is deferred and then replayed, for example after an - * interface event. - */ -void pb_cmd_err(struct pb_msg *msg, const char *fmt, ...); - -void list_all_protobuf_msg_versions(FILE *f); - -#endif diff --git a/include/protobuf_util.h b/include/protobuf_util.h new file mode 100644 index 00000000..edb1a10c --- /dev/null +++ b/include/protobuf_util.h @@ -0,0 +1,41 @@ +/*- + * Copyright (c) 2020, AT&T Intellectual Property. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef VYATTA_DATAPLANE_PROTOBUF_UTIL_H +#define VYATTA_DATAPLANE_PROTOBUF_UTIL_H + +#include +#include "ip.h" +#include "protobuf/IPAddress.pb-c.h" + +/* + * Supports conversion of protobuf IPAddress msg to ip_addr struct + * @param[in] addr_msg IPAddress message struct + * @param[in] ip_addr converted to ip_addr struct + * + * @return Error code or success + */ +int dp_protobuf_get_ipaddr(IPAddress *addr_msg, struct ip_addr *addr); + +/* + * Supports initialization of protobuf IPAddress msg + * @param[in] addr_msg ptr to IPAddress message struct + * + * @return Error code or success + */ +int dp_protobuf_create_ipaddr(IPAddress **addr_msg); + +/* + * Supports setting of protobuf IPAddress msg + * @param[in] addr_msg IPAddress pointer or destination + * @param[in] val value to set (struct ip_addr ptr) + * + * @return Error code or success + */ +int dp_protobuf_set_ipaddr(IPAddress *to, struct ip_addr *from); + +#endif diff --git a/include/rcu.h b/include/rcu.h new file mode 100644 index 00000000..487538ae --- /dev/null +++ b/include/rcu.h @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2011-2017 by Brocade Communications Systems, Inc. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + */ +#ifndef VYATTA_DATAPLANE_RCU_H +#define VYATTA_DATAPLANE_RCU_H + +#include + +#include "urcu.h" +#include "lcore_sched.h" + +/* + * The dataplane uses the QSBR flavour of userspace rcu + * and DPDk's RCU QSBR implementation. + */ + +/* dataplane global DPDK RCU QSBR variable */ +extern struct rte_rcu_qsbr *dp_qsbr_rcu_v; + +/* + * Setup RCU usage in the dataplane. + * + * This performs all prep work for all the used RCU + * implementations. + * + * Should be called only once by the main function/thread. + * + * DPDK QSBR RCU: + * Allocates global DPDK RCU QSBR variable. + * + * userspace RCU: + * No special setup required. + */ +int dp_rcu_setup(void); + +/* + * Register a thread for rcu. This is used when it is not known if a thread + * is already rcu registered. If the thread is already registered then this + * call will make the thread rcu_online. If it is not registered then it + * will register it, and part of registration is to make the thread + * rcu_online. + */ +void dp_rcu_register_thread(void); + +/* + * Unregister a thread from rcu and track that it is no longer registered + * so that further calls to dp_rcu_register_thread() will then re-register + * it. + */ +void dp_rcu_unregister_thread(void); + +/* + * Get the dataplane global DPDK RCU QSBR variable. + * + * Use this method to make use of DPDK of rte_rcu_qsbr + * aware APIs. + */ +struct rte_rcu_qsbr *dp_rcu_qsbr_get(void); + +/* + * Mark long periods of the thread/lcore_id as inactive. + * + * Reader threads should call this prior the call blocking + * methods/APIs. + */ +static __rte_always_inline void +dp_rcu_thread_offline(void) +{ + rcu_thread_offline(); + rte_rcu_qsbr_thread_offline(dp_qsbr_rcu_v, dp_lcore_id()); +} + +/* + * Mark long periods of the thread/lcore_id as active again. + * This should be called as counter operation to dp_rcu_thread_offline. + */ +static __rte_always_inline void +dp_rcu_thread_online(void) +{ + rcu_thread_online(); + rte_rcu_qsbr_thread_online(dp_qsbr_rcu_v, dp_lcore_id()); +} + +/* + * Update the quiescent state for the reader threads. + * All reader threads must call this periodically. + */ +static __rte_always_inline void +dp_rcu_quiescent_state(unsigned int lcore_id) +{ + rcu_quiescent_state(); + rte_rcu_qsbr_quiescent(dp_qsbr_rcu_v, lcore_id); +} + +/* + * Wait for all outstanding RCU callbacks to complete. + */ +static __rte_always_inline void +dp_rcu_barrier(void) +{ + rcu_barrier(); +} + +/* + * Block until all the readers threads enter the quiescent state or + * are offline. + * + * This is not a reader-writer lock. + */ +static __rte_always_inline void +dp_rcu_synchronize(void) +{ + synchronize_rcu(); + rte_rcu_qsbr_synchronize(dp_qsbr_rcu_v, dp_lcore_id()); +} + +/* + * Begin of an RCU read-side critical section. + * + * For DPDK RCU QSBR implementation this is NOOP, unless build + * with RTE_LIBRTE_RCU_DEBUG. + */ +static __rte_always_inline void +dp_rcu_read_lock(void) +{ + rcu_read_lock(); + rte_rcu_qsbr_lock(dp_qsbr_rcu_v, dp_lcore_id()); +} + +/* + * End of an RCU read-side critical section. + * + * For DPDK RCU QSBR implementation this is NOOP, unless build + * with RTE_LIBRTE_RCU_DEBUG. + */ +static __rte_always_inline void +dp_rcu_read_unlock(void) +{ + rcu_read_unlock(); + rte_rcu_qsbr_unlock(dp_qsbr_rcu_v, dp_lcore_id()); +} + +#endif /* VYATTA_DATAPLANE_RCU_H */ diff --git a/include/readme.md b/include/readme.md index 8a4372c9..b3f15b1d 100644 --- a/include/readme.md +++ b/include/readme.md @@ -13,3 +13,71 @@ Binary compatibility (source built against an earlier version of the headers should work when run with a later version of the dataplane) isn't a requirement at this stage, but it's best to also try to avoid making changes that would break this. + +# Feature Plugin + +Feature plugins allow features to be added to the dataplane without having +to change the core code. A library is loaded at init time and this provides +a feature that plugs into the public APIs. + +A typical feature requires some configuration which it uses to set up its +data structures etc. It will typically then do some packet processing using +those data structures and it will typically have some way of reporting state +back to the system. + +[See feature_plugin.h for more details about the plugins](feature_plugin.h) + +## Threading model + +The dataplane process contains many threads, and certain types of work must +be done on the correct thread. + +The dataplane uses the dpdk lcore infra. For each logical core in the system +a thread is created and that thread runs only on that core. The lowest core +number is used for control plane processing and is called the main thread. +All the other lcores are used for packet processing by default. + +See lcore_sched.h for more details about the [lcores](lcore_sched.h) + +To allow for efficient updates of control plane state without having to lock +the forwarding threads the dataplane uses RCU. This allows updates from a +single thread along with multiple concurrent readers. + +### main thread +All of the control plane state processing (routes, interface state, etc) is +done in the main thread (main lcore, typically lcore 0). The main thread +receives events from multiple sources and processes them in an RCU safe way. + +A feature plugin init function is always called on the main thread. For +features that register a command handler this handler will always be called +on the main thread. + +If a feature then has a need to process further updates on the main thread +as they arrive from a socket it can use the [event api](events.h) + +### console thread + +For features that register a show commands handler this handler will always +be called on the console thread. + +### forwarding threads + +For features that are involved in forwarding of packets the forwarding may +happen on multiple different forwarding lcores, or sometimes on the main +core. As there may be multiple cores there can be processing of multiple +packets at the same time. Multiple threads can read read the RCU controlled +state with no performance penalty. However writing in parallel can cause +performance issues due to the need to take locks. Where the features needs +to do writes per packet (for example counters) it is recommended to user per +lcore state so that there is only a single thread/core updating a given memory +location at a time. + +### Other threads + +Feature can create other threads as required, but this should be done +from the main thread. These threads will then inherit the cpu affinity +from the main thread and so will run on the main lcore only. +New threads that access memory used by other threads should be registered +with [rcu](urcu.h) + +# Fal plugin diff --git a/src/urcu.h b/include/urcu.h similarity index 55% rename from src/urcu.h rename to include/urcu.h index 15331752..744cc661 100644 --- a/src/urcu.h +++ b/include/urcu.h @@ -1,13 +1,18 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * * Dataplane uses Userspace RCU in accordance with Dynamic only linking * See userspace-rcu/LICENSE */ -#ifndef URCU_H -#define URCU_H +#ifndef VYATTA_DATAPLANE_URCU_H +#define VYATTA_DATAPLANE_URCU_H + +/* + * This file is used to make it easy to include the correct urcu headers. + * The dataplane uses the QSBR flavour of userspace rcu. + */ /* Allow URCU to inline small functions * performance vs shared library upgrade tradeoff @@ -18,4 +23,7 @@ #include #include #include -#endif /* URCU_H */ + +#include "rcu.h" + +#endif /* VYATTA_DATAPLANE_URCU_H */ diff --git a/include/vrf.h b/include/vrf.h new file mode 100644 index 00000000..39d06bc0 --- /dev/null +++ b/include/vrf.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef VYATTA_DATAPLANE_VRF_H +#define VYATTA_DATAPLANE_VRF_H + +/* + * This declares vrf related APIs exported by dataplane + */ + +/* Opaque vrf structure */ +struct vrf; + +/* vrfid type */ +typedef uint32_t vrfid_t; + +#define VRF_INVALID_ID 0 +#define VRF_DEFAULT_ID 1 + +/* + * get vrf id for vrf structure pointer + * + * @param[in] vrf Pointer to vrf structure + * + * @return vrf id + */ +vrfid_t dp_vrf_get_vid(struct vrf *vrf); + +/* + * get external vrf id from internal vrf id + * + * @param[in] internal_id Internal vrf id + * @return External vrf id + */ +vrfid_t dp_vrf_get_external_id(vrfid_t internal_id); + +/* + * get vrf struct pointer from external vrf id + * + * @param[in] external_id external vrf id + * @return Pointer to vrf structure + * + */ +struct vrf *dp_vrf_get_rcu_from_external(vrfid_t external_id); + +#endif /* VYATTA_DATAPLANE_VRF_H */ diff --git a/iwyu.mapping b/iwyu.mapping index a38a00e3..d3bd120d 100644 --- a/iwyu.mapping +++ b/iwyu.mapping @@ -18,4 +18,10 @@ { include: ["\"czmq_library.h\"", "private", "", "public"] }, { include: ["\"pl_fused_gen.h\"", "private", "\"pl_fused.h\"", "public"] }, { include: ["", "private", "", "public"] }, + { include: ["", "private", "", "public"] }, + { include: ["", "private", "", "public"] }, + { include: ["", "private", "", "public"] }, + { include: ["", "private", "", "public"] }, + { include: ["", "private", "", "public"] }, + { include: ["", "private", "", "public"] }, ] diff --git a/lib/Vyatta/Vplane.pm b/lib/Vyatta/Vplane.pm new file mode 100644 index 00000000..31d4732f --- /dev/null +++ b/lib/Vyatta/Vplane.pm @@ -0,0 +1,420 @@ +# Module Vplane.pm + +# Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. +# Copyright (c) 2015-2016, Brocade Communications Systems, Inc. +# All rights reserved. +# +# SPDX-License-Identifier: LGPL-2.1-only +# +# This modules provides routines to identify supported adpaters. + +package Vyatta::Vplane; + +use strict; +use warnings; +require Exporter; + +our @ISA = qw(Exporter); +our @EXPORT = qw(is_supported_device is_supported_ib_device is_supported_pci_device); + +# List of supported PCI device Id's generated from rte_pci_dev_ids.h in DPDK +my @pci_devices = ( + + # Intel E1000 + { vendor => 0x8086, device => 0x100e }, + { vendor => 0x8086, device => 0x100f }, + { vendor => 0x8086, device => 0x1011 }, + { vendor => 0x8086, device => 0x1010 }, + { vendor => 0x8086, device => 0x1012 }, + { vendor => 0x8086, device => 0x101d }, + { vendor => 0x8086, device => 0x105e }, + { vendor => 0x8086, device => 0x105f }, + { vendor => 0x8086, device => 0x1060 }, + { vendor => 0x8086, device => 0x10d9 }, + { vendor => 0x8086, device => 0x10da }, + { vendor => 0x8086, device => 0x10a4 }, + { vendor => 0x8086, device => 0x10d5 }, + { vendor => 0x8086, device => 0x10a5 }, + { vendor => 0x8086, device => 0x10bc }, + { vendor => 0x8086, device => 0x107d }, + { vendor => 0x8086, device => 0x107e }, + { vendor => 0x8086, device => 0x107f }, + { vendor => 0x8086, device => 0x10b9 }, + { vendor => 0x8086, device => 0x109a }, + { vendor => 0x8086, device => 0x10d3 }, + { vendor => 0x8086, device => 0x10f6 }, + { vendor => 0x8086, device => 0x150c }, + { vendor => 0x8086, device => 0x153a }, + { vendor => 0x8086, device => 0x153b }, + { vendor => 0x8086, device => 0x155a }, + { vendor => 0x8086, device => 0x1559 }, + { vendor => 0x8086, device => 0x15a0 }, + { vendor => 0x8086, device => 0x15a1 }, + { vendor => 0x8086, device => 0x15a2 }, + { vendor => 0x8086, device => 0x15a3 }, + + # Intel IGB + { vendor => 0x8086, device => 0x10c9 }, + { vendor => 0x8086, device => 0x10e6 }, + { vendor => 0x8086, device => 0x10e7 }, + { vendor => 0x8086, device => 0x10e8 }, + { vendor => 0x8086, device => 0x1526 }, + { vendor => 0x8086, device => 0x150a }, + { vendor => 0x8086, device => 0x1518 }, + { vendor => 0x8086, device => 0x150d }, + { vendor => 0x8086, device => 0x10a7 }, + { vendor => 0x8086, device => 0x10a9 }, + { vendor => 0x8086, device => 0x10d6 }, + { vendor => 0x8086, device => 0x150e }, + { vendor => 0x8086, device => 0x150f }, + { vendor => 0x8086, device => 0x1510 }, + { vendor => 0x8086, device => 0x1511 }, + { vendor => 0x8086, device => 0x1516 }, + { vendor => 0x8086, device => 0x1527 }, + { vendor => 0x8086, device => 0x1521 }, + { vendor => 0x8086, device => 0x1522 }, + { vendor => 0x8086, device => 0x1523 }, + { vendor => 0x8086, device => 0x1524 }, + { vendor => 0x8086, device => 0x1546 }, + { vendor => 0x8086, device => 0x1533 }, + { vendor => 0x8086, device => 0x1534 }, + { vendor => 0x8086, device => 0x1535 }, + { vendor => 0x8086, device => 0x1536 }, + { vendor => 0x8086, device => 0x1537 }, + { vendor => 0x8086, device => 0x1538 }, + { vendor => 0x8086, device => 0x1539 }, + { vendor => 0x8086, device => 0x1f40 }, + { vendor => 0x8086, device => 0x1f41 }, + { vendor => 0x8086, device => 0x1f45 }, + { vendor => 0x8086, device => 0x0438 }, + { vendor => 0x8086, device => 0x043a }, + { vendor => 0x8086, device => 0x043c }, + { vendor => 0x8086, device => 0x0440 }, + + # Intel IXGBE + { vendor => 0x8086, device => 0x10b6 }, + { vendor => 0x8086, device => 0x1508 }, + { vendor => 0x8086, device => 0x10c6 }, + { vendor => 0x8086, device => 0x10c7 }, + { vendor => 0x8086, device => 0x10c8 }, + { vendor => 0x8086, device => 0x150b }, + { vendor => 0x8086, device => 0x10db }, + { vendor => 0x8086, device => 0x10dd }, + { vendor => 0x8086, device => 0x10ec }, + { vendor => 0x8086, device => 0x10f1 }, + { vendor => 0x8086, device => 0x10e1 }, + { vendor => 0x8086, device => 0x10f4 }, + { vendor => 0x8086, device => 0x10f7 }, + { vendor => 0x8086, device => 0x1514 }, + { vendor => 0x8086, device => 0x1517 }, + { vendor => 0x8086, device => 0x10f8 }, + { vendor => 0x8086, device => 0x000c }, + { vendor => 0x8086, device => 0x10f9 }, + { vendor => 0x8086, device => 0x10fb }, + { vendor => 0x8086, device => 0x11a9 }, + { vendor => 0x8086, device => 0x1f72 }, + { vendor => 0x8086, device => 0x17d0 }, + { vendor => 0x8086, device => 0x0470 }, + { vendor => 0x8086, device => 0x152a }, + { vendor => 0x8086, device => 0x1529 }, + { vendor => 0x8086, device => 0x1507 }, + { vendor => 0x8086, device => 0x154d }, + { vendor => 0x8086, device => 0x154a }, + { vendor => 0x8086, device => 0x1558 }, + { vendor => 0x8086, device => 0x1557 }, + { vendor => 0x8086, device => 0x10fc }, + { vendor => 0x8086, device => 0x151c }, + { vendor => 0x8086, device => 0x154f }, + { vendor => 0x8086, device => 0x1528 }, + { vendor => 0x8086, device => 0x1560 }, + { vendor => 0x8086, device => 0x15ac }, + { vendor => 0x8086, device => 0x15ad }, + { vendor => 0x8086, device => 0x15ae }, + { vendor => 0x8086, device => 0x1563 }, + { vendor => 0x8086, device => 0x15aa }, + { vendor => 0x8086, device => 0x15ab }, + { vendor => 0x8086, device => 0x15b4 }, + { vendor => 0x8086, device => 0x15c2 }, + { vendor => 0x8086, device => 0x15c3 }, + { vendor => 0x8086, device => 0x15c4 }, + { vendor => 0x8086, device => 0x15c5 }, + { vendor => 0x8086, device => 0x15c6 }, + { vendor => 0x8086, device => 0x15c7 }, + { vendor => 0x8086, device => 0x15c8 }, + { vendor => 0x8086, device => 0x15ca }, + { vendor => 0x8086, device => 0x15cc }, + { vendor => 0x8086, device => 0x15ce }, + { vendor => 0x8086, device => 0x15e4 }, + { vendor => 0x8086, device => 0x15e5 }, + + # Intel I40E (Fortville) + { vendor => 0x8086, device => 0x1572 }, + { vendor => 0x8086, device => 0x1574 }, + { vendor => 0x8086, device => 0x157f }, + { vendor => 0x8086, device => 0x1580 }, + { vendor => 0x8086, device => 0x1581 }, + { vendor => 0x8086, device => 0x1583 }, + { vendor => 0x8086, device => 0x1584 }, + { vendor => 0x8086, device => 0x1585 }, + { vendor => 0x8086, device => 0x1586 }, + { vendor => 0x8086, device => 0x1587 }, + { vendor => 0x8086, device => 0x1588 }, + { vendor => 0x8086, device => 0x1589 }, + { vendor => 0x8086, device => 0x158a }, + { vendor => 0x8086, device => 0x158b }, + { vendor => 0x8086, device => 0x374c }, + { vendor => 0x8086, device => 0x37ce }, + { vendor => 0x8086, device => 0x37d0 }, + { vendor => 0x8086, device => 0x37d1 }, + { vendor => 0x8086, device => 0x37d2 }, + { vendor => 0x8086, device => 0x37d3 }, + + # Intel FM10K (Red Rock Canyon) + { vendor => 0x8086, device => 0x15a4 }, + { vendor => 0x8086, device => 0x15d0 }, + + # Intel IGB VF + { vendor => 0x8086, device => 0x10ca }, + { vendor => 0x8086, device => 0x152d }, + { vendor => 0x8086, device => 0x1520 }, + { vendor => 0x8086, device => 0x152f }, + + # Intel IXGBE VF + { vendor => 0x8086, device => 0x10ed }, + { vendor => 0x8086, device => 0x152e }, + { vendor => 0x8086, device => 0x1515 }, + { vendor => 0x8086, device => 0x1530 }, + { vendor => 0x8086, device => 0x1564 }, + { vendor => 0x8086, device => 0x1565 }, + { vendor => 0x8086, device => 0x15a8 }, + { vendor => 0x8086, device => 0x15a9 }, + + # Intel I40E VF + { vendor => 0x8086, device => 0x154c }, + { vendor => 0x8086, device => 0x1571 }, + { vendor => 0x8086, device => 0x37cd }, + { vendor => 0x8086, device => 0x37d9 }, + + # Intel FM10K VF + { vendor => 0x8086, device => 0x15a5 }, + + # Broadcom/Qlogic BNX2X + { vendor => 0x14e4, device => 0x168a }, + { vendor => 0x14e4, device => 0x16a9 }, + { vendor => 0x14e4, device => 0x164f }, + { vendor => 0x14e4, device => 0x168e }, + { vendor => 0x14e4, device => 0x16af }, + { vendor => 0x14e4, device => 0x163d }, + { vendor => 0x14e4, device => 0x163f }, + { vendor => 0x14e4, device => 0x168d }, + { vendor => 0x14e4, device => 0x16a1 }, + { vendor => 0x14e4, device => 0x16a2 }, + { vendor => 0x14e4, device => 0x16ad }, + + # Broadcom BNXT + { vendor => 0x14e4, device => 0x1614 }, + { vendor => 0x14e4, device => 0x16c1 }, + { vendor => 0x14e4, device => 0x16c8 }, + { vendor => 0x14e4, device => 0x16c9 }, + { vendor => 0x14e4, device => 0x16ca }, + { vendor => 0x14e4, device => 0x16cb }, + { vendor => 0x14e4, device => 0x16cc }, + { vendor => 0x14e4, device => 0x16cd }, + { vendor => 0x14e4, device => 0x16ce }, + { vendor => 0x14e4, device => 0x16cf }, + { vendor => 0x14e4, device => 0x16d0 }, + { vendor => 0x14e4, device => 0x16d1 }, + { vendor => 0x14e4, device => 0x16d2 }, + { vendor => 0x14e4, device => 0x16d3 }, + { vendor => 0x14e4, device => 0x16d4 }, + { vendor => 0x14e4, device => 0x16d5 }, + { vendor => 0x14e4, device => 0x16d6 }, + { vendor => 0x14e4, device => 0x16d7 }, + { vendor => 0x14e4, device => 0x16d8 }, + { vendor => 0x14e4, device => 0x16d9 }, + { vendor => 0x14e4, device => 0x16dc }, + { vendor => 0x14e4, device => 0x16de }, + { vendor => 0x14e4, device => 0x16df }, + { vendor => 0x14e4, device => 0x16e0 }, + { vendor => 0x14e4, device => 0x16e1 }, + { vendor => 0x14e4, device => 0x16e2 }, + { vendor => 0x14e4, device => 0x16e3 }, + { vendor => 0x14e4, device => 0x16e4 }, + { vendor => 0x14e4, device => 0x16e7 }, + { vendor => 0x14e4, device => 0x16e8 }, + { vendor => 0x14e4, device => 0x16e9 }, + { vendor => 0x14e4, device => 0x16ea }, + { vendor => 0x14e4, device => 0x16ec }, + { vendor => 0x14e4, device => 0x16ee }, + + # Virtio + { vendor => 0x1af4, device => 0x1000 }, + { vendor => 0x1af4, device => 0x1041 }, + + # Windriver Accelerated Virtual Port + { vendor => 0x1af4, device => 0x1110 }, + + # VMXNET3 + { vendor => 0x15ad, device => 0x07b0 }, + + # Cavium ThunderNic + { vendor => 0x177d, device => 0xa034 }, + { vendor => 0x177d, device => 0x0011 }, + + # Chelsio T5 adapters + { vendor => 0x1425, device => 0x5000 }, + { vendor => 0x1425, device => 0x5001 }, + { vendor => 0x1425, device => 0x5002 }, + { vendor => 0x1425, device => 0x5003 }, + { vendor => 0x1425, device => 0x5004 }, + { vendor => 0x1425, device => 0x5005 }, + { vendor => 0x1425, device => 0x5006 }, + { vendor => 0x1425, device => 0x5007 }, + { vendor => 0x1425, device => 0x5008 }, + { vendor => 0x1425, device => 0x5009 }, + { vendor => 0x1425, device => 0x500a }, + { vendor => 0x1425, device => 0x500d }, + { vendor => 0x1425, device => 0x500e }, + { vendor => 0x1425, device => 0x5010 }, + { vendor => 0x1425, device => 0x5011 }, + { vendor => 0x1425, device => 0x5012 }, + { vendor => 0x1425, device => 0x5013 }, + { vendor => 0x1425, device => 0x5014 }, + { vendor => 0x1425, device => 0x5015 }, + { vendor => 0x1425, device => 0x5016 }, + { vendor => 0x1425, device => 0x5017 }, + { vendor => 0x1425, device => 0x5018 }, + { vendor => 0x1425, device => 0x5019 }, + { vendor => 0x1425, device => 0x501a }, + { vendor => 0x1425, device => 0x501b }, + { vendor => 0x1425, device => 0x5080 }, + { vendor => 0x1425, device => 0x5081 }, + { vendor => 0x1425, device => 0x5082 }, + { vendor => 0x1425, device => 0x5083 }, + { vendor => 0x1425, device => 0x5084 }, + { vendor => 0x1425, device => 0x5085 }, + { vendor => 0x1425, device => 0x5086 }, + { vendor => 0x1425, device => 0x5087 }, + { vendor => 0x1425, device => 0x5088 }, + { vendor => 0x1425, device => 0x5089 }, + { vendor => 0x1425, device => 0x5090 }, + { vendor => 0x1425, device => 0x5091 }, + { vendor => 0x1425, device => 0x5092 }, + { vendor => 0x1425, device => 0x5093 }, + { vendor => 0x1425, device => 0x5094 }, + { vendor => 0x1425, device => 0x5095 }, + { vendor => 0x1425, device => 0x5096 }, + { vendor => 0x1425, device => 0x5097 }, + { vendor => 0x1425, device => 0x5098 }, + { vendor => 0x1425, device => 0x5099 }, + { vendor => 0x1425, device => 0x509A }, + { vendor => 0x1425, device => 0x509B }, + { vendor => 0x1425, device => 0x509c }, + { vendor => 0x1425, device => 0x509d }, + { vendor => 0x1425, device => 0x509e }, + { vendor => 0x1425, device => 0x509f }, + { vendor => 0x1425, device => 0x50a0 }, + { vendor => 0x1425, device => 0x50a1 }, + { vendor => 0x1425, device => 0x50a2 }, + { vendor => 0x1425, device => 0x50a3 }, + { vendor => 0x1425, device => 0x50a4 }, + { vendor => 0x1425, device => 0x50a5 }, + { vendor => 0x1425, device => 0x50a6 }, + { vendor => 0x1425, device => 0x50a7 }, + { vendor => 0x1425, device => 0x50a8 }, + { vendor => 0x1425, device => 0x50a9 }, + { vendor => 0x1425, device => 0x50aa }, + { vendor => 0x1425, device => 0x50ab }, + { vendor => 0x1425, device => 0x50ac }, + { vendor => 0x1425, device => 0x50ad }, + { vendor => 0x1425, device => 0x50ae }, + { vendor => 0x1425, device => 0x50af }, + { vendor => 0x1425, device => 0x50b0 }, + + # Chelsio T6 adapters + { vendor => 0x1425, device => 0x6001 }, + { vendor => 0x1425, device => 0x6002 }, + { vendor => 0x1425, device => 0x6003 }, + { vendor => 0x1425, device => 0x6004 }, + { vendor => 0x1425, device => 0x6005 }, + { vendor => 0x1425, device => 0x6006 }, + { vendor => 0x1425, device => 0x6007 }, + { vendor => 0x1425, device => 0x6008 }, + { vendor => 0x1425, device => 0x6009 }, + { vendor => 0x1425, device => 0x600d }, + { vendor => 0x1425, device => 0x6011 }, + { vendor => 0x1425, device => 0x6014 }, + { vendor => 0x1425, device => 0x6015 }, + { vendor => 0x1425, device => 0x6080 }, + { vendor => 0x1425, device => 0x6081 }, + { vendor => 0x1425, device => 0x6082 }, + { vendor => 0x1425, device => 0x6083 }, + { vendor => 0x1425, device => 0x6084 }, + { vendor => 0x1425, device => 0x6085 }, + { vendor => 0x1425, device => 0x6086 }, + { vendor => 0x1425, device => 0x6087 }, + { vendor => 0x1425, device => 0x6088 }, + { vendor => 0x1425, device => 0x6089 }, + { vendor => 0x1425, device => 0x608a }, + { vendor => 0x1425, device => 0x608b }, + + # Amazon ena adapters + { vendor => 0x1D0F, device => 0xEC20 }, + { vendor => 0x1D0F, device => 0xEC21 }, +); + +# List of Mellanox IB device. +my @ib_devices = ( + # Mellanox ConnectX-4 adapters + { vendor => 0x15b3, device => 0x1003 }, + { vendor => 0x15b3, device => 0x1004 }, + { vendor => 0x15b3, device => 0x1007 }, + + # Mellanox ConnectX-5 dapaters + { vendor => 0x15b3, device => 0x1013 }, + { vendor => 0x15b3, device => 0x1014 }, + { vendor => 0x15b3, device => 0x1015 }, + { vendor => 0x15b3, device => 0x1016 }, + { vendor => 0x15b3, device => 0x1017 }, + { vendor => 0x15b3, device => 0x1018 }, + { vendor => 0x15b3, device => 0x1019 }, + { vendor => 0x15b3, device => 0x101a }, + { vendor => 0x15b3, device => 0xa2d2 }, + { vendor => 0x15b3, device => 0xa2d3 }, + + # Mellanox ConnectX-6 adapters + { vendor => 0x15b3, device => 0x101b }, + { vendor => 0x15b3, device => 0x101c }, + { vendor => 0x15b3, device => 0x101d }, + { vendor => 0x15b3, device => 0x101e }, +); + +sub is_supported_pci_device { + my ( $vendor, $device ) = @_; + + return + unless grep { $_->{vendor} == $vendor && $_->{device} == $device } + @pci_devices; +} + +sub is_supported_ib_device { + my ( $vendor, $device ) = @_; + + return + unless grep { $_->{vendor} == $vendor && $_->{device} == $device } + @ib_devices; +} + +sub is_supported_device { + my ( $vendor, $device ) = @_; + + if (is_supported_pci_device($vendor, $device)) { + return 1; + } + + if (is_supported_ib_device($vendor, $device)) { + return 1; + } + + return 0; +} diff --git a/m4/ax_code_coverage.m4 b/m4/ax_code_coverage.m4 deleted file mode 100644 index 0934a44c..00000000 --- a/m4/ax_code_coverage.m4 +++ /dev/null @@ -1,264 +0,0 @@ -# =========================================================================== -# https://www.gnu.org/software/autoconf-archive/ax_code_coverage.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_CODE_COVERAGE() -# -# DESCRIPTION -# -# Defines CODE_COVERAGE_CPPFLAGS, CODE_COVERAGE_CFLAGS, -# CODE_COVERAGE_CXXFLAGS and CODE_COVERAGE_LIBS which should be included -# in the CPPFLAGS, CFLAGS CXXFLAGS and LIBS/LIBADD variables of every -# build target (program or library) which should be built with code -# coverage support. Also defines CODE_COVERAGE_RULES which should be -# substituted in your Makefile; and $enable_code_coverage which can be -# used in subsequent configure output. CODE_COVERAGE_ENABLED is defined -# and substituted, and corresponds to the value of the -# --enable-code-coverage option, which defaults to being disabled. -# -# Test also for gcov program and create GCOV variable that could be -# substituted. -# -# Note that all optimisation flags in CFLAGS must be disabled when code -# coverage is enabled. -# -# Usage example: -# -# configure.ac: -# -# AX_CODE_COVERAGE -# -# Makefile.am: -# -# @CODE_COVERAGE_RULES@ -# my_program_LIBS = ... $(CODE_COVERAGE_LIBS) ... -# my_program_CPPFLAGS = ... $(CODE_COVERAGE_CPPFLAGS) ... -# my_program_CFLAGS = ... $(CODE_COVERAGE_CFLAGS) ... -# my_program_CXXFLAGS = ... $(CODE_COVERAGE_CXXFLAGS) ... -# -# This results in a "check-code-coverage" rule being added to any -# Makefile.am which includes "@CODE_COVERAGE_RULES@" (assuming the module -# has been configured with --enable-code-coverage). Running `make -# check-code-coverage` in that directory will run the module's test suite -# (`make check`) and build a code coverage report detailing the code which -# was touched, then print the URI for the report. -# -# In earlier versions of this macro, CODE_COVERAGE_LDFLAGS was defined -# instead of CODE_COVERAGE_LIBS. They are both still defined, but use of -# CODE_COVERAGE_LIBS is preferred for clarity; CODE_COVERAGE_LDFLAGS is -# deprecated. They have the same value. -# -# This code was derived from Makefile.decl in GLib, originally licenced -# under LGPLv2.1+. -# -# LICENSE -# -# Copyright (c) 2012, 2016 Philip Withnall -# Copyright (c) 2012 Xan Lopez -# Copyright (c) 2012 Christian Persch -# Copyright (c) 2012 Paolo Borelli -# Copyright (c) 2012 Dan Winship -# Copyright (c) 2015 Bastien ROUCARIES -# -# This library is free software; you can redistribute it and/or modify it -# under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 2.1 of the License, or (at -# your option) any later version. -# -# This library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser -# General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program. If not, see . - -#serial 21 - -AC_DEFUN([AX_CODE_COVERAGE],[ - dnl Check for --enable-code-coverage - AC_REQUIRE([AC_PROG_SED]) - - # allow to override gcov location - AC_ARG_WITH([gcov], - [AS_HELP_STRING([--with-gcov[=GCOV]], [use given GCOV for coverage (GCOV=gcov).])], - [_AX_CODE_COVERAGE_GCOV_PROG_WITH=$with_gcov], - [_AX_CODE_COVERAGE_GCOV_PROG_WITH=gcov]) - - AC_MSG_CHECKING([whether to build with code coverage support]) - AC_ARG_ENABLE([code-coverage], - AS_HELP_STRING([--enable-code-coverage], - [Whether to enable code coverage support]),, - enable_code_coverage=no) - - AM_CONDITIONAL([CODE_COVERAGE_ENABLED], [test x$enable_code_coverage = xyes]) - AC_SUBST([CODE_COVERAGE_ENABLED], [$enable_code_coverage]) - AC_MSG_RESULT($enable_code_coverage) - - AS_IF([ test "$enable_code_coverage" = "yes" ], [ - # check for gcov - AC_CHECK_TOOL([GCOV], - [$_AX_CODE_COVERAGE_GCOV_PROG_WITH], - [:]) - AS_IF([test "X$GCOV" = "X:"], - [AC_MSG_ERROR([gcov is needed to do coverage])]) - AC_SUBST([GCOV]) - - dnl Check if gcc is being used - AS_IF([ test "$GCC" = "no" ], [ - AC_MSG_ERROR([not compiling with gcc, which is required for gcov code coverage]) - ]) - - AC_CHECK_PROG([LCOV], [lcov], [lcov]) - AC_CHECK_PROG([GENHTML], [genhtml], [genhtml]) - - AS_IF([ test -z "$LCOV" ], [ - AC_MSG_ERROR([To enable code coverage reporting you must have lcov installed]) - ]) - - AS_IF([ test -z "$GENHTML" ], [ - AC_MSG_ERROR([Could not find genhtml from the lcov package]) - ]) - - dnl Build the code coverage flags - dnl Define CODE_COVERAGE_LDFLAGS for backwards compatibility - CODE_COVERAGE_CPPFLAGS="-DNDEBUG" - CODE_COVERAGE_CFLAGS="-O0 -g -fprofile-arcs -ftest-coverage" - CODE_COVERAGE_CXXFLAGS="-O0 -g -fprofile-arcs -ftest-coverage" - CODE_COVERAGE_LIBS="-lgcov" - CODE_COVERAGE_LDFLAGS="$CODE_COVERAGE_LIBS" - - AC_SUBST([CODE_COVERAGE_CPPFLAGS]) - AC_SUBST([CODE_COVERAGE_CFLAGS]) - AC_SUBST([CODE_COVERAGE_CXXFLAGS]) - AC_SUBST([CODE_COVERAGE_LIBS]) - AC_SUBST([CODE_COVERAGE_LDFLAGS]) - - [CODE_COVERAGE_RULES_CHECK=' - -$(A''M_V_at)$(MAKE) $(AM_MAKEFLAGS) -k check - $(A''M_V_at)$(MAKE) $(AM_MAKEFLAGS) code-coverage-capture -'] - [CODE_COVERAGE_RULES_CAPTURE=' - $(code_coverage_v_lcov_cap)$(LCOV) $(code_coverage_quiet) $(addprefix --directory ,$(CODE_COVERAGE_DIRECTORY)) --capture --output-file "$(CODE_COVERAGE_OUTPUT_FILE).tmp" --test-name "$(call code_coverage_sanitize,$(PACKAGE_NAME)-$(PACKAGE_VERSION))" --no-checksum --compat-libtool $(CODE_COVERAGE_LCOV_SHOPTS) $(CODE_COVERAGE_LCOV_OPTIONS) - $(code_coverage_v_lcov_ign)$(LCOV) $(code_coverage_quiet) $(addprefix --directory ,$(CODE_COVERAGE_DIRECTORY)) --remove "$(CODE_COVERAGE_OUTPUT_FILE).tmp" "/tmp/*" $(CODE_COVERAGE_IGNORE_PATTERN) --output-file "$(CODE_COVERAGE_OUTPUT_FILE)" $(CODE_COVERAGE_LCOV_SHOPTS) $(CODE_COVERAGE_LCOV_RMOPTS) - -@rm -f $(CODE_COVERAGE_OUTPUT_FILE).tmp - $(code_coverage_v_genhtml)LANG=C $(GENHTML) $(code_coverage_quiet) $(addprefix --prefix ,$(CODE_COVERAGE_DIRECTORY)) --output-directory "$(CODE_COVERAGE_OUTPUT_DIRECTORY)" --title "$(PACKAGE_NAME)-$(PACKAGE_VERSION) Code Coverage" --legend --show-details "$(CODE_COVERAGE_OUTPUT_FILE)" $(CODE_COVERAGE_GENHTML_OPTIONS) - @echo "file://$(abs_builddir)/$(CODE_COVERAGE_OUTPUT_DIRECTORY)/index.html" -'] - [CODE_COVERAGE_RULES_CLEAN=' -clean: code-coverage-clean -distclean: code-coverage-clean -code-coverage-clean: - -$(LCOV) --directory $(top_builddir) -z - -rm -rf $(CODE_COVERAGE_OUTPUT_FILE) $(CODE_COVERAGE_OUTPUT_FILE).tmp $(CODE_COVERAGE_OUTPUT_DIRECTORY) - -find . \( -name "*.gcda" -o -name "*.gcno" -o -name "*.gcov" \) -delete -'] - ], [ - [CODE_COVERAGE_RULES_CHECK=' - @echo "Need to reconfigure with --enable-code-coverage" -'] - CODE_COVERAGE_RULES_CAPTURE="$CODE_COVERAGE_RULES_CHECK" - CODE_COVERAGE_RULES_CLEAN='' - ]) - -[CODE_COVERAGE_RULES=' -# Code coverage -# -# Optional: -# - CODE_COVERAGE_DIRECTORY: Top-level directory for code coverage reporting. -# Multiple directories may be specified, separated by whitespace. -# (Default: $(top_builddir)) -# - CODE_COVERAGE_OUTPUT_FILE: Filename and path for the .info file generated -# by lcov for code coverage. (Default: -# $(PACKAGE_NAME)-$(PACKAGE_VERSION)-coverage.info) -# - CODE_COVERAGE_OUTPUT_DIRECTORY: Directory for generated code coverage -# reports to be created. (Default: -# $(PACKAGE_NAME)-$(PACKAGE_VERSION)-coverage) -# - CODE_COVERAGE_BRANCH_COVERAGE: Set to 1 to enforce branch coverage, -# set to 0 to disable it and leave empty to stay with the default. -# (Default: empty) -# - CODE_COVERAGE_LCOV_SHOPTS_DEFAULT: Extra options shared between both lcov -# instances. (Default: based on $CODE_COVERAGE_BRANCH_COVERAGE) -# - CODE_COVERAGE_LCOV_SHOPTS: Extra options to shared between both lcov -# instances. (Default: $CODE_COVERAGE_LCOV_SHOPTS_DEFAULT) -# - CODE_COVERAGE_LCOV_OPTIONS_GCOVPATH: --gcov-tool pathtogcov -# - CODE_COVERAGE_LCOV_OPTIONS_DEFAULT: Extra options to pass to the -# collecting lcov instance. (Default: $CODE_COVERAGE_LCOV_OPTIONS_GCOVPATH) -# - CODE_COVERAGE_LCOV_OPTIONS: Extra options to pass to the collecting lcov -# instance. (Default: $CODE_COVERAGE_LCOV_OPTIONS_DEFAULT) -# - CODE_COVERAGE_LCOV_RMOPTS_DEFAULT: Extra options to pass to the filtering -# lcov instance. (Default: empty) -# - CODE_COVERAGE_LCOV_RMOPTS: Extra options to pass to the filtering lcov -# instance. (Default: $CODE_COVERAGE_LCOV_RMOPTS_DEFAULT) -# - CODE_COVERAGE_GENHTML_OPTIONS_DEFAULT: Extra options to pass to the -# genhtml instance. (Default: based on $CODE_COVERAGE_BRANCH_COVERAGE) -# - CODE_COVERAGE_GENHTML_OPTIONS: Extra options to pass to the genhtml -# instance. (Default: $CODE_COVERAGE_GENHTML_OPTIONS_DEFAULT) -# - CODE_COVERAGE_IGNORE_PATTERN: Extra glob pattern of files to ignore -# -# The generated report will be titled using the $(PACKAGE_NAME) and -# $(PACKAGE_VERSION). In order to add the current git hash to the title, -# use the git-version-gen script, available online. - -# Optional variables -CODE_COVERAGE_DIRECTORY ?= $(top_builddir) -CODE_COVERAGE_OUTPUT_FILE ?= $(PACKAGE_NAME)-$(PACKAGE_VERSION)-coverage.info -CODE_COVERAGE_OUTPUT_DIRECTORY ?= $(PACKAGE_NAME)-$(PACKAGE_VERSION)-coverage -CODE_COVERAGE_BRANCH_COVERAGE ?= -CODE_COVERAGE_LCOV_SHOPTS_DEFAULT ?= $(if $(CODE_COVERAGE_BRANCH_COVERAGE),\ ---rc lcov_branch_coverage=$(CODE_COVERAGE_BRANCH_COVERAGE)) -CODE_COVERAGE_LCOV_SHOPTS ?= $(CODE_COVERAGE_LCOV_SHOPTS_DEFAULT) -CODE_COVERAGE_LCOV_OPTIONS_GCOVPATH ?= --gcov-tool "$(GCOV)" -CODE_COVERAGE_LCOV_OPTIONS_DEFAULT ?= $(CODE_COVERAGE_LCOV_OPTIONS_GCOVPATH) -CODE_COVERAGE_LCOV_OPTIONS ?= $(CODE_COVERAGE_LCOV_OPTIONS_DEFAULT) -CODE_COVERAGE_LCOV_RMOPTS_DEFAULT ?= -CODE_COVERAGE_LCOV_RMOPTS ?= $(CODE_COVERAGE_LCOV_RMOPTS_DEFAULT) -CODE_COVERAGE_GENHTML_OPTIONS_DEFAULT ?=\ -$(if $(CODE_COVERAGE_BRANCH_COVERAGE),\ ---rc genhtml_branch_coverage=$(CODE_COVERAGE_BRANCH_COVERAGE)) -CODE_COVERAGE_GENHTML_OPTIONS ?= $(CODE_COVERAGE_GENHTML_OPTIONS_DEFAULTS) -CODE_COVERAGE_IGNORE_PATTERN ?= - -code_coverage_v_lcov_cap = $(code_coverage_v_lcov_cap_$(V)) -code_coverage_v_lcov_cap_ = $(code_coverage_v_lcov_cap_$(AM_DEFAULT_VERBOSITY)) -code_coverage_v_lcov_cap_0 = @echo " LCOV --capture"\ - $(CODE_COVERAGE_OUTPUT_FILE); -code_coverage_v_lcov_ign = $(code_coverage_v_lcov_ign_$(V)) -code_coverage_v_lcov_ign_ = $(code_coverage_v_lcov_ign_$(AM_DEFAULT_VERBOSITY)) -code_coverage_v_lcov_ign_0 = @echo " LCOV --remove /tmp/*"\ - $(CODE_COVERAGE_IGNORE_PATTERN); -code_coverage_v_genhtml = $(code_coverage_v_genhtml_$(V)) -code_coverage_v_genhtml_ = $(code_coverage_v_genhtml_$(AM_DEFAULT_VERBOSITY)) -code_coverage_v_genhtml_0 = @echo " GEN " $(CODE_COVERAGE_OUTPUT_DIRECTORY); -code_coverage_quiet = $(code_coverage_quiet_$(V)) -code_coverage_quiet_ = $(code_coverage_quiet_$(AM_DEFAULT_VERBOSITY)) -code_coverage_quiet_0 = --quiet - -# sanitizes the test-name: replaces with underscores: dashes and dots -code_coverage_sanitize = $(subst -,_,$(subst .,_,$(1))) - -# Use recursive makes in order to ignore errors during check -check-code-coverage:'"$CODE_COVERAGE_RULES_CHECK"' - -# Capture code coverage data -code-coverage-capture: code-coverage-capture-hook'"$CODE_COVERAGE_RULES_CAPTURE"' - -# Hook rule executed before code-coverage-capture, overridable by the user -code-coverage-capture-hook: - -'"$CODE_COVERAGE_RULES_CLEAN"' - -GITIGNOREFILES ?= -GITIGNOREFILES += $(CODE_COVERAGE_OUTPUT_FILE) $(CODE_COVERAGE_OUTPUT_DIRECTORY) - -A''M_DISTCHECK_CONFIGURE_FLAGS ?= -A''M_DISTCHECK_CONFIGURE_FLAGS += --disable-code-coverage - -.PHONY: check-code-coverage code-coverage-capture code-coverage-capture-hook code-coverage-clean -'] - - AC_SUBST([CODE_COVERAGE_RULES]) - m4_ifdef([_AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE([CODE_COVERAGE_RULES])]) -]) diff --git a/m4/ax_cxx_compile_stdcxx.m4 b/m4/ax_cxx_compile_stdcxx.m4 deleted file mode 100644 index 079e17d2..00000000 --- a/m4/ax_cxx_compile_stdcxx.m4 +++ /dev/null @@ -1,558 +0,0 @@ -# =========================================================================== -# http://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_CXX_COMPILE_STDCXX(VERSION, [ext|noext], [mandatory|optional]) -# -# DESCRIPTION -# -# Check for baseline language coverage in the compiler for the specified -# version of the C++ standard. If necessary, add switches to CXXFLAGS to -# enable support. VERSION may be '11' (for the C++11 standard) or '14' -# (for the C++14 standard). -# -# The second argument, if specified, indicates whether you insist on an -# extended mode (e.g. -std=gnu++11) or a strict conformance mode (e.g. -# -std=c++11). If neither is specified, you get whatever works, with -# preference for an extended mode. -# -# The third argument, if specified 'mandatory' or if left unspecified, -# indicates that baseline support for the specified C++ standard is -# required and that the macro should error out if no mode with that -# support is found. If specified 'optional', then configuration proceeds -# regardless, after defining HAVE_CXX${VERSION} if and only if a -# supporting mode is found. -# -# LICENSE -# -# Copyright (c) 2008 Benjamin Kosnik -# Copyright (c) 2012 Zack Weinberg -# Copyright (c) 2013 Roy Stogner -# Copyright (c) 2014, 2015 Google Inc.; contributed by Alexey Sokolov -# Copyright (c) 2015 Paul Norman -# Copyright (c) 2015 Moritz Klammler -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 1 - -dnl This macro is based on the code from the AX_CXX_COMPILE_STDCXX_11 macro -dnl (serial version number 13). - -AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl - m4_if([$1], [11], [], - [$1], [14], [], - [$1], [17], [m4_fatal([support for C++17 not yet implemented in AX_CXX_COMPILE_STDCXX])], - [m4_fatal([invalid first argument `$1' to AX_CXX_COMPILE_STDCXX])])dnl - m4_if([$2], [], [], - [$2], [ext], [], - [$2], [noext], [], - [m4_fatal([invalid second argument `$2' to AX_CXX_COMPILE_STDCXX])])dnl - m4_if([$3], [], [ax_cxx_compile_cxx$1_required=true], - [$3], [mandatory], [ax_cxx_compile_cxx$1_required=true], - [$3], [optional], [ax_cxx_compile_cxx$1_required=false], - [m4_fatal([invalid third argument `$3' to AX_CXX_COMPILE_STDCXX])]) - AC_LANG_PUSH([C++])dnl - ac_success=no - AC_CACHE_CHECK(whether $CXX supports C++$1 features by default, - ax_cv_cxx_compile_cxx$1, - [AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])], - [ax_cv_cxx_compile_cxx$1=yes], - [ax_cv_cxx_compile_cxx$1=no])]) - if test x$ax_cv_cxx_compile_cxx$1 = xyes; then - ac_success=yes - fi - - m4_if([$2], [noext], [], [dnl - if test x$ac_success = xno; then - for switch in -std=gnu++$1 -std=gnu++0x; do - cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch]) - AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch, - $cachevar, - [ac_save_CXXFLAGS="$CXXFLAGS" - CXXFLAGS="$CXXFLAGS $switch" - AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])], - [eval $cachevar=yes], - [eval $cachevar=no]) - CXXFLAGS="$ac_save_CXXFLAGS"]) - if eval test x\$$cachevar = xyes; then - CXXFLAGS="$CXXFLAGS $switch" - ac_success=yes - break - fi - done - fi]) - - m4_if([$2], [ext], [], [dnl - if test x$ac_success = xno; then - dnl HP's aCC needs +std=c++11 according to: - dnl http://h21007.www2.hp.com/portal/download/files/unprot/aCxx/PDF_Release_Notes/769149-001.pdf - dnl Cray's crayCC needs "-h std=c++11" - for switch in -std=c++$1 -std=c++0x +std=c++$1 "-h std=c++$1"; do - cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch]) - AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch, - $cachevar, - [ac_save_CXXFLAGS="$CXXFLAGS" - CXXFLAGS="$CXXFLAGS $switch" - AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])], - [eval $cachevar=yes], - [eval $cachevar=no]) - CXXFLAGS="$ac_save_CXXFLAGS"]) - if eval test x\$$cachevar = xyes; then - CXXFLAGS="$CXXFLAGS $switch" - ac_success=yes - break - fi - done - fi]) - AC_LANG_POP([C++]) - if test x$ax_cxx_compile_cxx$1_required = xtrue; then - if test x$ac_success = xno; then - AC_MSG_ERROR([*** A compiler with support for C++$1 language features is required.]) - fi - else - if test x$ac_success = xno; then - HAVE_CXX$1=0 - AC_MSG_NOTICE([No compiler with C++$1 support was found]) - else - HAVE_CXX$1=1 - AC_DEFINE(HAVE_CXX$1,1, - [define if the compiler supports basic C++$1 syntax]) - fi - - AC_SUBST(HAVE_CXX$1) - fi -]) - - -dnl Test body for checking C++11 support - -m4_define([_AX_CXX_COMPILE_STDCXX_testbody_11], - _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 -) - - -dnl Test body for checking C++14 support - -m4_define([_AX_CXX_COMPILE_STDCXX_testbody_14], - _AX_CXX_COMPILE_STDCXX_testbody_new_in_11 - _AX_CXX_COMPILE_STDCXX_testbody_new_in_14 -) - - -dnl Tests for new features in C++11 - -m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_11], [[ - -// If the compiler admits that it is not ready for C++11, why torture it? -// Hopefully, this will speed up the test. - -#ifndef __cplusplus - -#error "This is not a C++ compiler" - -#elif __cplusplus < 201103L - -#error "This is not a C++11 compiler" - -#else - -namespace cxx11 -{ - - namespace test_static_assert - { - - template - struct check - { - static_assert(sizeof(int) <= sizeof(T), "not big enough"); - }; - - } - - namespace test_final_override - { - - struct Base - { - virtual void f() {} - }; - - struct Derived : public Base - { - virtual void f() override {} - }; - - } - - namespace test_double_right_angle_brackets - { - - template < typename T > - struct check {}; - - typedef check single_type; - typedef check> double_type; - typedef check>> triple_type; - typedef check>>> quadruple_type; - - } - - namespace test_decltype - { - - int - f() - { - int a = 1; - decltype(a) b = 2; - return a + b; - } - - } - - namespace test_type_deduction - { - - template < typename T1, typename T2 > - struct is_same - { - static const bool value = false; - }; - - template < typename T > - struct is_same - { - static const bool value = true; - }; - - template < typename T1, typename T2 > - auto - add(T1 a1, T2 a2) -> decltype(a1 + a2) - { - return a1 + a2; - } - - int - test(const int c, volatile int v) - { - static_assert(is_same::value == true, ""); - static_assert(is_same::value == false, ""); - static_assert(is_same::value == false, ""); - auto ac = c; - auto av = v; - auto sumi = ac + av + 'x'; - auto sumf = ac + av + 1.0; - static_assert(is_same::value == true, ""); - static_assert(is_same::value == true, ""); - static_assert(is_same::value == true, ""); - static_assert(is_same::value == false, ""); - static_assert(is_same::value == true, ""); - return (sumf > 0.0) ? sumi : add(c, v); - } - - } - - namespace test_noexcept - { - - int f() { return 0; } - int g() noexcept { return 0; } - - static_assert(noexcept(f()) == false, ""); - static_assert(noexcept(g()) == true, ""); - - } - - namespace test_constexpr - { - - template < typename CharT > - unsigned long constexpr - strlen_c_r(const CharT *const s, const unsigned long acc) noexcept - { - return *s ? strlen_c_r(s + 1, acc + 1) : acc; - } - - template < typename CharT > - unsigned long constexpr - strlen_c(const CharT *const s) noexcept - { - return strlen_c_r(s, 0UL); - } - - static_assert(strlen_c("") == 0UL, ""); - static_assert(strlen_c("1") == 1UL, ""); - static_assert(strlen_c("example") == 7UL, ""); - static_assert(strlen_c("another\0example") == 7UL, ""); - - } - - namespace test_rvalue_references - { - - template < int N > - struct answer - { - static constexpr int value = N; - }; - - answer<1> f(int&) { return answer<1>(); } - answer<2> f(const int&) { return answer<2>(); } - answer<3> f(int&&) { return answer<3>(); } - - void - test() - { - int i = 0; - const int c = 0; - static_assert(decltype(f(i))::value == 1, ""); - static_assert(decltype(f(c))::value == 2, ""); - static_assert(decltype(f(0))::value == 3, ""); - } - - } - - namespace test_uniform_initialization - { - - struct test - { - static const int zero {}; - static const int one {1}; - }; - - static_assert(test::zero == 0, ""); - static_assert(test::one == 1, ""); - - } - - namespace test_lambdas - { - - void - test1() - { - auto lambda1 = [](){}; - auto lambda2 = lambda1; - lambda1(); - lambda2(); - } - - int - test2() - { - auto a = [](int i, int j){ return i + j; }(1, 2); - auto b = []() -> int { return '0'; }(); - auto c = [=](){ return a + b; }(); - auto d = [&](){ return c; }(); - auto e = [a, &b](int x) mutable { - const auto identity = [](int y){ return y; }; - for (auto i = 0; i < a; ++i) - a += b--; - return x + identity(a + b); - }(0); - return a + b + c + d + e; - } - - int - test3() - { - const auto nullary = [](){ return 0; }; - const auto unary = [](int x){ return x; }; - using nullary_t = decltype(nullary); - using unary_t = decltype(unary); - const auto higher1st = [](nullary_t f){ return f(); }; - const auto higher2nd = [unary](nullary_t f1){ - return [unary, f1](unary_t f2){ return f2(unary(f1())); }; - }; - return higher1st(nullary) + higher2nd(nullary)(unary); - } - - } - - namespace test_variadic_templates - { - - template - struct sum; - - template - struct sum - { - static constexpr auto value = N0 + sum::value; - }; - - template <> - struct sum<> - { - static constexpr auto value = 0; - }; - - static_assert(sum<>::value == 0, ""); - static_assert(sum<1>::value == 1, ""); - static_assert(sum<23>::value == 23, ""); - static_assert(sum<1, 2>::value == 3, ""); - static_assert(sum<5, 5, 11>::value == 21, ""); - static_assert(sum<2, 3, 5, 7, 11, 13>::value == 41, ""); - - } - - // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae - // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function - // because of this. - namespace test_template_alias_sfinae - { - - struct foo {}; - - template - using member = typename T::member_type; - - template - void func(...) {} - - template - void func(member*) {} - - void test(); - - void test() { func(0); } - - } - -} // namespace cxx11 - -#endif // __cplusplus >= 201103L - -]]) - - -dnl Tests for new features in C++14 - -m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_14], [[ - -// If the compiler admits that it is not ready for C++14, why torture it? -// Hopefully, this will speed up the test. - -#ifndef __cplusplus - -#error "This is not a C++ compiler" - -#elif __cplusplus < 201402L - -#error "This is not a C++14 compiler" - -#else - -namespace cxx14 -{ - - namespace test_polymorphic_lambdas - { - - int - test() - { - const auto lambda = [](auto&&... args){ - const auto istiny = [](auto x){ - return (sizeof(x) == 1UL) ? 1 : 0; - }; - const int aretiny[] = { istiny(args)... }; - return aretiny[0]; - }; - return lambda(1, 1L, 1.0f, '1'); - } - - } - - namespace test_binary_literals - { - - constexpr auto ivii = 0b0000000000101010; - static_assert(ivii == 42, "wrong value"); - - } - - namespace test_generalized_constexpr - { - - template < typename CharT > - constexpr unsigned long - strlen_c(const CharT *const s) noexcept - { - auto length = 0UL; - for (auto p = s; *p; ++p) - ++length; - return length; - } - - static_assert(strlen_c("") == 0UL, ""); - static_assert(strlen_c("x") == 1UL, ""); - static_assert(strlen_c("test") == 4UL, ""); - static_assert(strlen_c("another\0test") == 7UL, ""); - - } - - namespace test_lambda_init_capture - { - - int - test() - { - auto x = 0; - const auto lambda1 = [a = x](int b){ return a + b; }; - const auto lambda2 = [a = lambda1(x)](){ return a; }; - return lambda2(); - } - - } - - namespace test_digit_seperators - { - - constexpr auto ten_million = 100'000'000; - static_assert(ten_million == 100000000, ""); - - } - - namespace test_return_type_deduction - { - - auto f(int& x) { return x; } - decltype(auto) g(int& x) { return x; } - - template < typename T1, typename T2 > - struct is_same - { - static constexpr auto value = false; - }; - - template < typename T > - struct is_same - { - static constexpr auto value = true; - }; - - int - test() - { - auto x = 0; - static_assert(is_same::value, ""); - static_assert(is_same::value, ""); - return x; - } - - } - -} // namespace cxx14 - -#endif // __cplusplus >= 201402L - -]]) diff --git a/m4/ax_valgrind_check.m4 b/m4/ax_valgrind_check.m4 deleted file mode 100644 index 1c1c0cd4..00000000 --- a/m4/ax_valgrind_check.m4 +++ /dev/null @@ -1,235 +0,0 @@ -# =========================================================================== -# https://www.gnu.org/software/autoconf-archive/ax_valgrind_check.html -# =========================================================================== -# -# SYNOPSIS -# -# AX_VALGRIND_DFLT(memcheck|helgrind|drd|sgcheck, on|off) -# AX_VALGRIND_CHECK() -# -# DESCRIPTION -# -# AX_VALGRIND_CHECK checks whether Valgrind is present and, if so, allows -# running `make check` under a variety of Valgrind tools to check for -# memory and threading errors. -# -# Defines VALGRIND_CHECK_RULES which should be substituted in your -# Makefile; and $enable_valgrind which can be used in subsequent configure -# output. VALGRIND_ENABLED is defined and substituted, and corresponds to -# the value of the --enable-valgrind option, which defaults to being -# enabled if Valgrind is installed and disabled otherwise. Individual -# Valgrind tools can be disabled via --disable-valgrind-, the -# default is configurable via the AX_VALGRIND_DFLT command or is to use -# all commands not disabled via AX_VALGRIND_DFLT. All AX_VALGRIND_DFLT -# calls must be made before the call to AX_VALGRIND_CHECK. -# -# If unit tests are written using a shell script and automake's -# LOG_COMPILER system, the $(VALGRIND) variable can be used within the -# shell scripts to enable Valgrind, as described here: -# -# https://www.gnu.org/software/gnulib/manual/html_node/Running-self_002dtests-under-valgrind.html -# -# Usage example: -# -# configure.ac: -# -# AX_VALGRIND_DFLT([sgcheck], [off]) -# AX_VALGRIND_CHECK -# -# Makefile.am: -# -# @VALGRIND_CHECK_RULES@ -# VALGRIND_SUPPRESSIONS_FILES = my-project.supp -# EXTRA_DIST = my-project.supp -# -# This results in a "check-valgrind" rule being added to any Makefile.am -# which includes "@VALGRIND_CHECK_RULES@" (assuming the module has been -# configured with --enable-valgrind). Running `make check-valgrind` in -# that directory will run the module's test suite (`make check`) once for -# each of the available Valgrind tools (out of memcheck, helgrind and drd) -# while the sgcheck will be skipped unless enabled again on the -# commandline with --enable-valgrind-sgcheck. The results for each check -# will be output to test-suite-$toolname.log. The target will succeed if -# there are zero errors and fail otherwise. -# -# Alternatively, a "check-valgrind-$TOOL" rule will be added, for $TOOL in -# memcheck, helgrind, drd and sgcheck. These are useful because often only -# some of those tools can be ran cleanly on a codebase. -# -# The macro supports running with and without libtool. -# -# LICENSE -# -# Copyright (c) 2014, 2015, 2016 Philip Withnall -# -# Copying and distribution of this file, with or without modification, are -# permitted in any medium without royalty provided the copyright notice -# and this notice are preserved. This file is offered as-is, without any -# warranty. - -#serial 15 - -dnl Configured tools -m4_define([valgrind_tool_list], [[memcheck], [helgrind], [drd], [sgcheck]]) -m4_set_add_all([valgrind_exp_tool_set], [sgcheck]) -m4_foreach([vgtool], [valgrind_tool_list], - [m4_define([en_dflt_valgrind_]vgtool, [on])]) - -AC_DEFUN([AX_VALGRIND_DFLT],[ - m4_define([en_dflt_valgrind_$1], [$2]) -])dnl - -AC_DEFUN([AX_VALGRIND_CHECK],[ - dnl Check for --enable-valgrind - AC_ARG_ENABLE([valgrind], - [AS_HELP_STRING([--enable-valgrind], [Whether to enable Valgrind on the unit tests])], - [enable_valgrind=$enableval],[enable_valgrind=]) - - AS_IF([test "$enable_valgrind" != "no"],[ - # Check for Valgrind. - AC_CHECK_PROG([VALGRIND],[valgrind],[valgrind]) - AS_IF([test "$VALGRIND" = ""],[ - AS_IF([test "$enable_valgrind" = "yes"],[ - AC_MSG_ERROR([Could not find valgrind; either install it or reconfigure with --disable-valgrind]) - ],[ - enable_valgrind=no - ]) - ],[ - enable_valgrind=yes - ]) - ]) - - AM_CONDITIONAL([VALGRIND_ENABLED],[test "$enable_valgrind" = "yes"]) - AC_SUBST([VALGRIND_ENABLED],[$enable_valgrind]) - - # Check for Valgrind tools we care about. - [valgrind_enabled_tools=] - m4_foreach([vgtool],[valgrind_tool_list],[ - AC_ARG_ENABLE([valgrind-]vgtool, - m4_if(m4_defn([en_dflt_valgrind_]vgtool),[off],dnl -[AS_HELP_STRING([--enable-valgrind-]vgtool, [Whether to use ]vgtool[ during the Valgrind tests])],dnl -[AS_HELP_STRING([--disable-valgrind-]vgtool, [Whether to skip ]vgtool[ during the Valgrind tests])]), - [enable_valgrind_]vgtool[=$enableval], - [enable_valgrind_]vgtool[=]) - AS_IF([test "$enable_valgrind" = "no"],[ - enable_valgrind_]vgtool[=no], - [test "$enable_valgrind_]vgtool[" ]dnl -m4_if(m4_defn([en_dflt_valgrind_]vgtool), [off], [= "yes"], [!= "no"]),[ - AC_CACHE_CHECK([for Valgrind tool ]vgtool, - [ax_cv_valgrind_tool_]vgtool,[ - ax_cv_valgrind_tool_]vgtool[=no - m4_set_contains([valgrind_exp_tool_set],vgtool, - [m4_define([vgtoolx],[exp-]vgtool)], - [m4_define([vgtoolx],vgtool)]) - AS_IF([`$VALGRIND --tool=]vgtoolx[ --help >/dev/null 2>&1`],[ - ax_cv_valgrind_tool_]vgtool[=yes - ]) - ]) - AS_IF([test "$ax_cv_valgrind_tool_]vgtool[" = "no"],[ - AS_IF([test "$enable_valgrind_]vgtool[" = "yes"],[ - AC_MSG_ERROR([Valgrind does not support ]vgtool[; reconfigure with --disable-valgrind-]vgtool) - ],[ - enable_valgrind_]vgtool[=no - ]) - ],[ - enable_valgrind_]vgtool[=yes - ]) - ]) - AS_IF([test "$enable_valgrind_]vgtool[" = "yes"],[ - valgrind_enabled_tools="$valgrind_enabled_tools ]m4_bpatsubst(vgtool,[^exp-])[" - ]) - AC_SUBST([ENABLE_VALGRIND_]vgtool,[$enable_valgrind_]vgtool) - ]) - AC_SUBST([valgrind_tools],["]m4_join([ ], valgrind_tool_list)["]) - AC_SUBST([valgrind_enabled_tools],[$valgrind_enabled_tools]) - -[VALGRIND_CHECK_RULES=' -# Valgrind check -# -# Optional: -# - VALGRIND_SUPPRESSIONS_FILES: Space-separated list of Valgrind suppressions -# files to load. (Default: empty) -# - VALGRIND_FLAGS: General flags to pass to all Valgrind tools. -# (Default: --num-callers=30) -# - VALGRIND_$toolname_FLAGS: Flags to pass to Valgrind $toolname (one of: -# memcheck, helgrind, drd, sgcheck). (Default: various) - -# Optional variables -VALGRIND_SUPPRESSIONS ?= $(addprefix --suppressions=,$(VALGRIND_SUPPRESSIONS_FILES)) -VALGRIND_FLAGS ?= --num-callers=30 -VALGRIND_memcheck_FLAGS ?= --leak-check=full --show-reachable=no -VALGRIND_helgrind_FLAGS ?= --history-level=approx -VALGRIND_drd_FLAGS ?= -VALGRIND_sgcheck_FLAGS ?= - -# Internal use -valgrind_log_files = $(addprefix test-suite-,$(addsuffix .log,$(valgrind_tools))) - -valgrind_memcheck_flags = --tool=memcheck $(VALGRIND_memcheck_FLAGS) -valgrind_helgrind_flags = --tool=helgrind $(VALGRIND_helgrind_FLAGS) -valgrind_drd_flags = --tool=drd $(VALGRIND_drd_FLAGS) -valgrind_sgcheck_flags = --tool=exp-sgcheck $(VALGRIND_sgcheck_FLAGS) - -valgrind_quiet = $(valgrind_quiet_$(V)) -valgrind_quiet_ = $(valgrind_quiet_$(AM_DEFAULT_VERBOSITY)) -valgrind_quiet_0 = --quiet -valgrind_v_use = $(valgrind_v_use_$(V)) -valgrind_v_use_ = $(valgrind_v_use_$(AM_DEFAULT_VERBOSITY)) -valgrind_v_use_0 = @echo " USE " $(patsubst check-valgrind-%,%,$''@):; - -# Support running with and without libtool. -ifneq ($(LIBTOOL),) -valgrind_lt = $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=execute -else -valgrind_lt = -endif - -# Use recursive makes in order to ignore errors during check -check-valgrind: -ifeq ($(VALGRIND_ENABLED),yes) - $(A''M_V_at)$(MAKE) $(AM_MAKEFLAGS) -k \ - $(foreach tool, $(valgrind_enabled_tools), check-valgrind-$(tool)) -else - @echo "Need to reconfigure with --enable-valgrind" -endif - -# Valgrind running -VALGRIND_TESTS_ENVIRONMENT = \ - $(TESTS_ENVIRONMENT) \ - env VALGRIND=$(VALGRIND) \ - G_SLICE=always-malloc,debug-blocks \ - G_DEBUG=fatal-warnings,fatal-criticals,gc-friendly - -VALGRIND_LOG_COMPILER = \ - $(valgrind_lt) \ - $(VALGRIND) $(VALGRIND_SUPPRESSIONS) --error-exitcode=1 $(VALGRIND_FLAGS) - -define valgrind_tool_rule = -check-valgrind-$(1): -ifeq ($$(VALGRIND_ENABLED)-$$(ENABLE_VALGRIND_$(1)),yes-yes) - $$(valgrind_v_use)$$(MAKE) check-TESTS \ - TESTS_ENVIRONMENT="$$(VALGRIND_TESTS_ENVIRONMENT)" \ - LOG_COMPILER="$$(VALGRIND_LOG_COMPILER)" \ - LOG_FLAGS="$$(valgrind_$(1)_flags)" \ - TEST_SUITE_LOG=test-suite-$(1).log -else ifeq ($$(VALGRIND_ENABLED),yes) - @echo "Need to reconfigure with --enable-valgrind-$(1)" -else - @echo "Need to reconfigure with --enable-valgrind" -endif -endef - -$(foreach tool,$(valgrind_tools),$(eval $(call valgrind_tool_rule,$(tool)))) - -A''M_DISTCHECK_CONFIGURE_FLAGS ?= -A''M_DISTCHECK_CONFIGURE_FLAGS += --disable-valgrind - -MOSTLYCLEANFILES ?= -MOSTLYCLEANFILES += $(valgrind_log_files) - -.PHONY: check-valgrind $(add-prefix check-valgrind-,$(valgrind_tools)) -'] - - AC_SUBST([VALGRIND_CHECK_RULES]) - m4_ifdef([_AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE([VALGRIND_CHECK_RULES])]) -]) diff --git a/meson.build b/meson.build new file mode 100644 index 00000000..a562dd9e --- /dev/null +++ b/meson.build @@ -0,0 +1,123 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + +project('vyatta-dataplane', ['c', 'cpp'], + version: run_command('dpkg-parsechangelog', '-S', 'Version', check : true).stdout().strip(), + default_options: [ + 'debug=true', + 'optimization=3', + 'werror=true', + 'warning_level=2', + 'c_std=gnu11', + 'b_lto=true' + ] +) + +configure_file( + output : 'build_config.h', + configuration : { + 'PACKAGE_VERSION' : '"' + meson.project_version() + '"', + 'HAVE_SYSTEMD' : get_option('use_systemd').enabled(), + 'FUSED_MODE' : get_option('fused_mode').enabled(), + 'VYATTA_SYSCONF_DIR' : '"' + get_option('prefix') / get_option('sysconfdir') / 'vyatta' + '"', + 'VYATTA_DATA_DIR' : '"' + get_option('prefix') / get_option('datadir') / 'vyatta' + '"', + 'PKGLIB_DIR' : '"' + get_option('prefix') / get_option('libdir') / meson.project_name() + '"' + } +) + +cc = meson.get_compiler('c') + +add_project_arguments( + '-include', 'build_config.h', + '-D_GNU_SOURCE', + cc.get_supported_arguments([ + '-Wno-deprecated-declarations', + '-Wno-stringop-overflow', + '-Wno-stringop-truncation', + '-Wno-format-truncation' + ]), + language: 'c' +) + +add_project_link_arguments( + cc.get_supported_link_arguments([ + '-Wno-stringop-overflow' # 3 warnings in npf code + ]), + language: 'c' +) + +cap_dep = dependency('libcap') +check_dep = dependency('check', required: get_option('with_tests')) +crypto_dep = dependency('libcrypto') +czmq_dep = dependency('libczmq', version: '>= 3.0.2') +dl_dep = cc.find_library('dl', required : true) +dpdk_dep = dependency('libdpdk', version: '>= 19.11') +ini_dep = cc.find_library('inih', required: true) +json_dep = dependency('json-c', required: get_option('with_tests')) +m_dep = cc.find_library('m', required : true) +mnl_dep = dependency('libmnl') +ndpi_dep = dependency('libndpi', version: '>= 3.4') +osip2_dep = dependency('libosip2') +pcap_dep = cc.find_library('libpcap', required: true) +protobuf_dep = dependency('protobuf') +proto_c_dep = dependency('libprotobuf-c', version: '>= 1.0.0') +rte_bus_vdev_dep = cc.find_library('rte_bus_vdev', required : true) +rte_pmd_bond_dep = cc.find_library('rte_pmd_bond', required : true) +rte_pmd_ring_dep = cc.find_library('rte_pmd_ring', required: get_option('with_tests')) +rte_pmd_vhost_dep = cc.find_library('rte_pmd_vhost', required : true) +swport_dep = dependency('vyatta-dpdk-swport', version: '>= 0.1.3') +systemd_dep = dependency('libsystemd', required: get_option('use_systemd')) +threads_dep = dependency('threads') +urcu_cds_dep = dependency('liburcu-cds', version: '>= 0.8.0') +urcu_dep = dependency('liburcu', version: '>= 0.8.0') +urcu_qsbr_dep = dependency('liburcu-qsbr', version: '>= 0.8.0') +zmq_dep = dependency('libzmq', version: '>= 4.0.4') + +subdir('include') +subdir('protobuf') +subdir('src/pipeline') +subdir('src') + +if get_option('with_tests').enabled() + subdir('src/pipeline/nodes/sample') + subdir('tests/whole_dp') +endif + +install_data('dataplane-drivers.conf', + install_dir: get_option('sysconfdir') / 'vyatta' +) + +install_data('dataplane-drivers-default.conf', + install_dir: get_option('datadir') / 'vyatta' +) + +install_data('mlx4_core.conf', + install_dir: '/etc/modprobe.d' +) + +pkg = import('pkgconfig') +pkg.generate( + filebase: 'vyatta-dataplane-dev', + name: 'Vyatta Dataplane', + description: 'Dataplane Plugin Development', + subdirs: 'vyatta-dataplane', + libraries: jsonw_library +) +pkg.generate( + filebase: 'libvyattafal', + name: 'Vyatta Forwarding Abstraction Layer', + description: 'Dataplane FAL Plugin Development', + subdirs: 'vyatta-dataplane', + libraries: [] +) + +run_clang_tidy = find_program('run-clang-tidy', required: false) +if run_clang_tidy.found() and (cc.get_id() == 'clang') + run_target('clang-tidy', + command: [ 'scripts/run-clang-tidy_wrapper.sh' ], + depends: [pl_fused_gen_c, protobuf_generated_c, sample_generated_protobuf_c] + ) +else + run_target('clang-tidy', + command: ['echo', 'install clang-tidy and configure CC=clang', '&&', 'false']) +endif diff --git a/meson_options.txt b/meson_options.txt new file mode 100644 index 00000000..48cdaaa9 --- /dev/null +++ b/meson_options.txt @@ -0,0 +1,4 @@ +option('all_tests', type : 'boolean', value : 'false') +option('with_tests', type : 'feature', value : 'enabled') +option('use_systemd', type : 'feature', value : 'enabled') +option('fused_mode', type : 'feature', value : 'enabled') diff --git a/mlx4_core.conf b/mlx4_core.conf new file mode 100644 index 00000000..df8a3ab5 --- /dev/null +++ b/mlx4_core.conf @@ -0,0 +1 @@ +options mlx4_core log_num_mgm_entry_size=-1 diff --git a/protobuf/BreakoutConfig.proto b/protobuf/BreakoutConfig.proto new file mode 100644 index 00000000..1f683639 --- /dev/null +++ b/protobuf/BreakoutConfig.proto @@ -0,0 +1,37 @@ +// Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. +// +// SPDX-License-Identifier: LGPL-2.1-only +// +// Interface breakout config definitions +// + +syntax="proto2"; + +option go_package = "github.com/danos/vyatta-dataplane/protobuf/go/BreakoutConfig"; + +message BreakoutConfig { + enum Action { + SET = 0; + DELETE = 1; + } + + message BreakoutIfConfig { + optional Action action = 1; + + // Interface to break out into multiple sub-interfaces + // Required for both SET and DELETE + optional string ifname = 2; + + // Number of sub-interfaces to break out into + // Required for SET + optional uint32 numsubports = 3; + + // Name of interface that is reserved in order to + // break out this one + optional string reservedifname = 4; + } + + oneof mtype { + BreakoutIfConfig breakoutif = 1; + } +} diff --git a/protobuf/CgnatLogging.proto b/protobuf/CgnatLogging.proto new file mode 100644 index 00000000..04e87e07 --- /dev/null +++ b/protobuf/CgnatLogging.proto @@ -0,0 +1,159 @@ +// Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. +// +// SPDX-License-Identifier: LGPL-2.1-only +// +// CGNAT logging protobuf definitions +// + +syntax = "proto2"; + +option go_package = "github.com/danos/vyatta-dataplane/protobuf/go/CgnatLogging"; + +message CgnatLog { + optional string cgnInstance = 1; + oneof logType { + PortAllocationLog portAllocationLog = 2; + SessionLog sessionLog = 3; + SubscriberLog subscriberLog = 4; + ConstraintLog constraintLog = 5; + } +} + +// following is based on "google/protobuf/timestamp.proto" +message Timestamp { + optional int64 seconds = 1; + optional int32 nanos = 2; +} + +enum PortAllocationEventType { + PB_EVENT_UNKNOWN = 0; + PB_EVENT_ALLOCATED = 1; + PB_EVENT_RELEASED = 2; +} + +message PortAllocationLog { + optional PortAllocationEventType eventType = 1; + optional uint32 subscriberAddress = 2; + optional string policyName = 3; + optional uint32 natAllocatedAddress = 4; + optional string poolName = 5; + optional uint32 startPortNumber = 6; + optional uint32 endPortNumber = 7; + optional Timestamp startTimestamp = 8; + optional Timestamp endTimestamp = 9; +} + +enum SessionEventType { + SESSION_EVENT_UNKNOWN = 0; + SESSION_EVENT_CREATE = 1; + SESSION_EVENT_ACTIVE = 2; + SESSION_EVENT_END = 3; +} + +enum Direction { + DIRECTION_OTHER = 0; + DIRECTION_IN = 1; + DIRECTION_OUT = 2; +} + +enum SessionState { + SESSION_OTHER = 0; + SESSION_NONE = 1; + SESSION_OPENING = 2; + SESSION_ESTABLISHED = 3; + SESSION_TRANSITORY = 4; + SESSION_C_FIN_RCV = 5; + SESSION_S_FIN_RCV = 6; + SESSION_CS_FIN_RCV = 7; + SESSION_CLOSED = 8; +} + +message SessionLog { + optional SessionEventType eventType = 1; + optional uint64 sessionId = 2; + optional uint32 subSessionId = 3; + optional string ifName = 4; + optional uint32 protocol = 5; + optional Direction direction = 6; + optional uint32 subscriberAddress = 7; + optional uint32 subscriberPort = 8; + optional uint32 natAllocatedAddress = 9; + optional uint32 natAllocatedPort = 10; + optional uint32 destinationAddress = 11; + optional uint32 destinationPort = 12; + optional Timestamp startTimestamp = 13; + optional Timestamp currentTimestamp = 14; + optional SessionState state = 15; + optional uint32 stateHistory = 16; + optional uint64 inBytes = 17; + optional uint64 outBytes = 18; + optional uint64 inPackets = 19; + optional uint64 outPackets = 20; + optional uint64 networkRoundTripTime = 21; + optional uint64 internetRoundTripTime = 22; +} + +enum SubscriberEventType { + SUBSCRIBER_EVENT_UNKNOWN = 0; + SUBSCRIBER_EVENT_START = 1; + SUBSCRIBER_EVENT_END = 2; +} + +message SubscriberLog { + optional SubscriberEventType eventType = 1; + optional uint32 subscriberAddress = 2; + optional uint64 sessionCount = 3; + optional uint64 inBytes = 4; + optional uint64 outBytes = 5; + optional uint64 inPackets = 6; + optional uint64 outPackets = 7; + optional Timestamp startTimestamp = 8; + optional Timestamp endTimestamp = 9; +} + +enum ConstraintEventType { + CONSTRAINT_EVENT_UNKNOWN = 0; + CONSTRAINT_EVENT_SUBSCRIBER_TABLE = 1; + CONSTRAINT_EVENT_SESSION_TABLE = 2; + CONSTRAINT_EVENT_SESSION_CLEAR = 3; + CONSTRAINT_EVENT_DEST_SESSIONS = 4; + CONSTRAINT_EVENT_NAT_POOL = 5; + CONSTRAINT_EVENT_MAPPING_TABLE = 6; + CONSTRAINT_EVENT_BLOCKS_PER_SUBSCRIBER = 7; + CONSTRAINT_EVENT_BLOCKS_FOR_NAT_ALLOC_ADDR = 8; +} + +enum ConstraintLimit { + CONSTRAINT_LIMIT_UNKNOWN = 0; + CONSTRAINT_LIMIT_FULL = 1; + CONSTRAINT_LIMIT_AVAILABLE = 2; + CONSTRAINT_LIMIT_THRESHOLD = 3; +} + +message ConstraintLog { + optional ConstraintEventType eventType = 1; + optional ConstraintLimit constraintLimit = 2; + optional Timestamp timestamp = 3; + optional uint32 count = 4; + optional uint32 maxCount = 5; + + // The following are used for eventType CONSTRAINT_EVENT_DEST_SESSIONS. + // subscriberAddress is also used for eventType + // CONSTRAINT_EVENT_BLOCKS_PER_SUBSCRIBER. + // natAllocatedAddress is also used for eventType + // CONSTRAINT_EVENT_BLOCKS_FOR_NAT_ALLOC_ADDR. + optional string ifName = 6; + optional uint64 sessionId = 7; + optional uint32 protocol = 8; + optional uint32 subscriberAddress = 9; + optional uint32 subscriberPort = 10; + optional uint32 natAllocatedAddress = 11; + optional uint32 natAllocatedPort = 12; + + // used for eventType CONSTRAINT_EVENT_SESSION_CLEAR to indicate the + // clear command + optional string desc = 13; + + // used for eventType CONSTRAINT_EVENT_NAT_POOL + optional string poolName = 14; +} diff --git a/protobuf/CryptoPolicyConfig.proto b/protobuf/CryptoPolicyConfig.proto index 235093c9..56ebe0df 100644 --- a/protobuf/CryptoPolicyConfig.proto +++ b/protobuf/CryptoPolicyConfig.proto @@ -7,6 +7,8 @@ syntax="proto2"; +option go_package = "github.com/danos/vyatta-dataplane/protobuf/go/CryptoPolicyConfig"; + import "IPAddress.proto"; message CryptoPolicyConfig { diff --git a/protobuf/DataplaneEnvelope.proto b/protobuf/DataplaneEnvelope.proto index 2d40387e..8a377b85 100644 --- a/protobuf/DataplaneEnvelope.proto +++ b/protobuf/DataplaneEnvelope.proto @@ -1,9 +1,11 @@ -// Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. +// Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. // // SPDX-License-Identifier: LGPL-2.1-only syntax="proto2"; +option go_package = "github.com/danos/vyatta-dataplane/protobuf/go/DataplaneEnvelope"; + message DataplaneEnvelope { required string type = 1; //unique message name required bytes msg = 2; //message diff --git a/protobuf/FeatureAffinityConfig.proto b/protobuf/FeatureAffinityConfig.proto new file mode 100644 index 00000000..206909fc --- /dev/null +++ b/protobuf/FeatureAffinityConfig.proto @@ -0,0 +1,23 @@ +// Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. +// +// SPDX-License-Identifier: LGPL-2.1-only +// +// Affinity configuration for features +// + +syntax="proto2"; + +option go_package = "github.com/danos/vyatta-dataplane/protobuf/go/FeatureAffinityConfig"; + +message FeatureAffinityConfig { + enum Feature { + CRYPTO = 0; // cryptographic processing + CRYPTO_FWD = 1; // post-cryptographic forwarding + } + + // feature for which affinity is being specified + optional Feature feature = 1; + + // hex string containing CPU mask + optional bytes cpumask = 2; +} \ No newline at end of file diff --git a/protobuf/GArpConfig.proto b/protobuf/GArpConfig.proto new file mode 100644 index 00000000..3e55c2e7 --- /dev/null +++ b/protobuf/GArpConfig.proto @@ -0,0 +1,34 @@ +// Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. +// +// SPDX-License-Identifier: LGPL-2.1-only +// +// GArp Configuration protobuf definitions +// + +syntax="proto2"; + +option go_package = "github.com/danos/vyatta-dataplane/protobuf/go/GArpConfig"; + +message GArpConfig { + enum garp_pkt_action { + GARP_PKT_DROP = 0; + GARP_PKT_UPDATE = 1; + } + + enum arp_op { + ARPOP_REQUEST = 1; + ARPOP_REPLY = 2; + } + + //Interface name + optional string ifname = 1; + + //Set or delete + optional bool set = 2; + + //Arp option (request or reply) + optional arp_op op = 3; + + //garp action (drop or update) + optional garp_pkt_action action = 4; +} diff --git a/protobuf/GPCConfig.proto b/protobuf/GPCConfig.proto new file mode 100644 index 00000000..2220d98f --- /dev/null +++ b/protobuf/GPCConfig.proto @@ -0,0 +1,174 @@ +// Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. +// +// SPDX-License-Identifier: LGPL-2.1-only +// +// Generic Packet Classifier +// + +syntax="proto2"; + +option go_package = "github.com/danos/vyatta-dataplane/protobuf/go/GPCConfig"; + +import "IPAddress.proto"; + +message RuleMatch { + enum FragValue { + FRAGMENT_UNKNOWN = 0; + FRAGMENT_ANY = 1; + FRAGMENT_INITIAL = 2; + FRAGMENT_SUBSEQUENT = 3; + } + + message ICMPTypeAndCode { + optional uint32 typenum = 1; + optional uint32 code = 2; + } + + enum ICMPV6Class { + CLASS_UNKNOWN = 0; + CLASS_INFO = 1; + CLASS_ERROR = 2; + } + + oneof match_value { + IPPrefix src_ip = 1; + IPPrefix dest_ip = 2; + uint32 src_port = 3; + uint32 dest_port = 4; + FragValue fragment = 5; + uint32 dscp = 6; + uint32 ttl = 7; + ICMPTypeAndCode icmpv4 = 8; + ICMPTypeAndCode icmpv6 = 9; + ICMPV6Class icmpv6_class = 10; + uint32 proto_base = 11; + uint32 proto_final = 12; + } +} + +enum PolicerAwareness { + AWARENESS_UNKNOWN = 0; + COLOUR_AWARE = 1; + COLOUR_UNAWARE = 2; +} + +message PolicerParams { + // bandwidths are in bytes/sec + // bursts are in bytes + optional uint64 bw = 1; + optional uint64 excess_bw = 2; + optional uint64 burst = 3; + optional uint64 excess_burst = 4; + optional PolicerAwareness awareness = 5; +} + +message RuleAction { + enum PacketDecision { + DECISION_UNKNOWN = 0; + PASS = 1; + DROP = 2; + } + + enum ColourValue { + GREEN = 0; + YELLOW = 1; + RED = 2; + } + + oneof action_value { + PacketDecision decision = 1; + uint32 designation = 2; + ColourValue colour = 3; + PolicerParams policer = 4; + } +} + +message RuleCounter { + enum CounterType { + COUNTER_UNKNOWN = 0; + DISABLED = 1; // Counting disabled for this rule + AUTO = 2; // auto per rule per interface + NAMED = 3; + } + optional CounterType counter_type = 1; + + optional string name = 2; // only if NAMED, matches GPCCounter name +} + +message Rule { + // rule number 1-9999 + optional uint32 number = 1; + repeated RuleMatch matches = 2; + repeated RuleAction actions = 3; + optional RuleCounter counter = 4; + + // Values to tie back to config for debug purposes + optional uint32 table_index = 5; + optional uint32 orig_number = 6; + optional string result = 7; +} + +enum TrafficType { + TRAFFIC_UNKNOWN = 0; + IPV4 = 1; + IPV6 = 2; +} + +message Rules { + optional TrafficType traffic_type = 1; + repeated Rule rules = 2; +} + +// Key is interface/location/traffic +// +message GPCTable { + // Interface to apply on + optional string ifname = 1; + + enum FeatureLocation { + LOCATION_UNKNOWN = 0; + INGRESS = 1; + EGRESS = 2; + PUNT_PATH = 3; + } + optional FeatureLocation location = 2; + + optional TrafficType traffic_type = 3; + + optional Rules rules = 4; + + repeated string table_names = 5; // debug use only, QoS only uses 1 +} + +// Counter names will be constructed based on how they appear in config. +// +// [global|local]//[|result:|rule:]/[|all] +// +// eg. local/QOS_THINGY/result:ping/dp0xe3 +// +message GPCCounter { + optional string name = 1; + + enum CounterFormat { + FORMAT_UNKNOWN = 0; + PACKETS_ONLY = 1; + PACKETS_AND_L2_L3_BYTES = 2; // QoS default, all bytes except L1 + } + optional CounterFormat format = 2; +} + +// Entire config for the feature. +// Create/Modify/Delete should be inferred based on prior message. +// +message GPCConfig { + enum FeatureType { + FEATURE_UNKNOWN = 0; + QOS = 1; + ACL = 2; + // etc + } + optional FeatureType feature_type = 1; + + repeated GPCCounter counters = 2; + repeated GPCTable tables = 3; +} diff --git a/protobuf/IP6RedirectsConfig.proto b/protobuf/IP6RedirectsConfig.proto new file mode 100644 index 00000000..2a2209ac --- /dev/null +++ b/protobuf/IP6RedirectsConfig.proto @@ -0,0 +1,15 @@ +// Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. +// +// SPDX-License-Identifier: LGPL-2.1-only +// +// IP6Redirects protobuf definitions +// + +syntax="proto2"; + +option go_package = "github.com/danos/vyatta-dataplane/protobuf/go/IP6RedirectsConfig"; + +message IP6RedirectsConfig { + //IP6Redirect Enable|Disable + optional bool enable_redirects = 1; +} diff --git a/protobuf/IPAddress.proto b/protobuf/IPAddress.proto index dcaa870b..cd273938 100644 --- a/protobuf/IPAddress.proto +++ b/protobuf/IPAddress.proto @@ -7,9 +7,16 @@ syntax="proto2"; +option go_package = "github.com/danos/vyatta-dataplane/protobuf/go/IPAddress"; + message IPAddress { oneof address_oneof { uint32 ipv4_addr = 1; bytes ipv6_addr = 2; } -} \ No newline at end of file +} + +message IPPrefix { + optional IPAddress address = 1; + optional uint32 length = 2; +} diff --git a/protobuf/IPAddressOrLabel.proto b/protobuf/IPAddressOrLabel.proto new file mode 100644 index 00000000..d7c6283c --- /dev/null +++ b/protobuf/IPAddressOrLabel.proto @@ -0,0 +1,19 @@ +// Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. +// +// SPDX-License-Identifier: LGPL-2.1-only +// + +syntax="proto2"; + +option go_package = "github.com/danos/vyatta-dataplane/protobuf/go/IPAddressOrLabel"; + +message IPAddressOrLabel { + oneof address_oneof { + // IPv4 address in network byte order + fixed32 ipv4_addr = 1; + // IPv6 address - must by exactly 16 bytes in length + bytes ipv6_addr = 2; + // MPLS label value in host byte order + uint32 mpls_label = 3; + } +} diff --git a/protobuf/LAGConfig.proto b/protobuf/LAGConfig.proto new file mode 100644 index 00000000..91f5d31a --- /dev/null +++ b/protobuf/LAGConfig.proto @@ -0,0 +1,49 @@ +// Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. +// +// SPDX-License-Identifier: LGPL-2.1-only +// +// LAG protobuf definitions. + +syntax="proto2"; + +option go_package = "github.com/danos/vyatta-dataplane/protobuf/go/LAG"; + +message LAGConfig { + enum LagMode { + LAG_MODE_LACP = 0; + LAG_MODE_ACTIVE_BACKUP = 1; + LAG_MODE_BALANCED = 2; + } + + enum LacpActivity { + LACP_ACTIVITY_ACTIVE = 0; + LACP_ACTIVITY_PASSIVE = 1; + } + + enum LacpPeriodicRate { + LACP_PERIODIC_RATE_FAST = 0; + LACP_PERIODIC_RATE_SLOW = 1; + } + + message LagCreate { + optional string ifname = 1; + optional uint32 minimum_links = 2; + optional LagMode mode = 3; + + message LacpOptions { + optional uint32 key = 1; + optional LacpActivity lacp_activity = 2; + optional LacpPeriodicRate periodic_rate = 3; + } + optional LacpOptions lacp_options = 5; + } + + message LagDelete { + optional string ifname = 1; + } + + oneof mtype { + LagCreate lag_create = 1; + LagDelete lag_delete = 2; + } +} diff --git a/protobuf/MacLimitConfig.proto b/protobuf/MacLimitConfig.proto new file mode 100644 index 00000000..7e18969d --- /dev/null +++ b/protobuf/MacLimitConfig.proto @@ -0,0 +1,50 @@ +// Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. +// +// SPDX-License-Identifier: LGPL-2.1-only +// +// MAC limit config definitions +// + +syntax="proto2"; + +option go_package = "github.com/danos/vyatta-dataplane/protobuf/go/MacLimitConfig"; + +message MacLimitConfig { + enum Action { + SET = 0; + DELETE = 1; + } + + message MacLimitProfileConfig { + optional Action action = 1; + + // Profile name + // Required for both SET and DELETE + optional string profile = 2; + + // Limit of MAC addresses to learn + // Required for SET + optional uint32 limit = 3; + } + + message MacLimitIfVLANConfig { + optional Action action = 1; + + // Interface that MAC limit applies to + // Required for both SET and DELETE + optional string ifname = 2; + + // VLAN that MAC limit applies to + // Required for both SET and DELETE + optional uint32 vlan = 3; + + // MAC limit profile to apply to interface & VLAN + // Required for SET + optional string profile = 4; + } + + oneof mtype { + MacLimitProfileConfig profile = 1; + MacLimitIfVLANConfig ifvlan = 2; + } +} diff --git a/protobuf/NbrResConfig.proto b/protobuf/NbrResConfig.proto new file mode 100644 index 00000000..b6f0813f --- /dev/null +++ b/protobuf/NbrResConfig.proto @@ -0,0 +1,38 @@ +// Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. +// +// SPDX-License-Identifier: LGPL-2.1-only +// +// Neighbor Resolution Configuration protobuf definitions +// + +syntax="proto2"; + +option go_package = "github.com/danos/vyatta-dataplane/protobuf/go/NbrResConfig"; + +message NbrResConfig { + enum Prot { + ARP = 0; + ND6 = 1; + } + + enum Action { + SET = 0; + DELETE = 1; + } + + enum Param { + MAX_ENTRY = 0; + RES_TOKEN = 1; + AGING_TIME = 2; + } + + optional Prot prot = 1; + + optional Action action = 2; + + optional string ifname = 3; + + optional Param param = 4; + + optional uint32 value = 5; +} diff --git a/protobuf/PPPOEConfig.proto b/protobuf/PPPOEConfig.proto index baffc481..f25a680e 100644 --- a/protobuf/PPPOEConfig.proto +++ b/protobuf/PPPOEConfig.proto @@ -1,4 +1,4 @@ -// Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. +// Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. // // SPDX-License-Identifier: LGPL-2.1-only // @@ -7,6 +7,8 @@ syntax="proto2"; +option go_package = "github.com/danos/vyatta-dataplane/protobuf/go/PPPOEConfig"; + message PPPOEConfig { //PPPoE interface name optional string pppname = 1; diff --git a/protobuf/PauseConfig.proto b/protobuf/PauseConfig.proto new file mode 100644 index 00000000..53a6bc6e --- /dev/null +++ b/protobuf/PauseConfig.proto @@ -0,0 +1,32 @@ +// Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. +// +// SPDX-License-Identifier: LGPL-2.1-only +// +// Interface breakout config definitions +// + + +syntax="proto2"; + +option go_package = "github.com/danos/vyatta-dataplane/protobuf/go/PauseConfig"; + +message PauseConfig { + enum PauseValue { + NONE = 0; + RX = 1; + TX = 2; + BOTH = 3; + } + + message PauseIfConfig { + // Interface that pause config relates to + optional string ifname = 1; + + // Whether pause frames should be received or transmitted + optional PauseValue value = 2; + } + + oneof mtype { + PauseIfConfig pauseif = 1; + } +} diff --git a/protobuf/PipelineStatsConfig.proto b/protobuf/PipelineStatsConfig.proto index 82b1ea89..fc7e9d87 100644 --- a/protobuf/PipelineStatsConfig.proto +++ b/protobuf/PipelineStatsConfig.proto @@ -7,6 +7,8 @@ syntax="proto2"; +option go_package = "github.com/danos/vyatta-dataplane/protobuf/go/PipelineStatsConfig"; + message PipelineStatsConfig { optional bool enable_stats = 1; } \ No newline at end of file diff --git a/protobuf/RibUpdate.proto b/protobuf/RibUpdate.proto new file mode 100644 index 00000000..7e66c7ec --- /dev/null +++ b/protobuf/RibUpdate.proto @@ -0,0 +1,28 @@ +// Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. +// +// SPDX-License-Identifier: LGPL-2.1-only +// +// Route update definitions +// +// Note: defaults are optimised for scale (BGP) convergence case +// + +syntax="proto2"; + +option go_package = "github.com/danos/vyatta-dataplane/protobuf/go/RibUpdate"; + +import "Route.proto"; + +message RibUpdate { + enum Action { + // Create or update + UPDATE = 0; + DELETE = 1; + } + + optional Action action = 1 [default = UPDATE]; + + // The route to create/update or delete. + // Paths on the route are optional for a delete. + optional Route route = 2; +} diff --git a/protobuf/Route.proto b/protobuf/Route.proto new file mode 100644 index 00000000..21c8c4fa --- /dev/null +++ b/protobuf/Route.proto @@ -0,0 +1,105 @@ +// Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. +// +// SPDX-License-Identifier: LGPL-2.1-only +// +// Route definitions +// +// Note: defaults are optimised for scale (BGP) convergence case +// + +syntax="proto2"; + +option go_package = "github.com/danos/vyatta-dataplane/protobuf/go/Route"; + +import "IPAddress.proto"; +import "IPAddressOrLabel.proto"; + +message Path { + enum PathType { + // Packet should be dropped + BLACKHOLE = 0; + // Packet should generate ICMP unreachable + UNREACHABLE = 1; + // Packet should be subject to local delivery + LOCAL = 2; + // Unicast packet forwarding + UNICAST = 3; + } + + // The type of the path + optional PathType type = 1 [default = UNICAST]; + + // The nexthop of the path. If omitted this this indicates an + // attached (interface) route. + optional IPAddress nexthop = 2; + + // The interface index, as determined by the kernel and + // advertised in RTM_NEWLINK messages. + // Currently, this should always be present, since recursive + // route resolution isn't performed in the dataplane. + optional uint32 ifindex = 3; + + // MPLS label stack in host byte order, with labels listed + // from top-most (closest to L2 header) to bottom-most + // (closest to payload). Labels should not include + // implicit-null. + repeated uint32 mpls_labels = 4 [packed=true]; + + // Bottom-of-stack only traffic towards this destination + optional bool mpls_bos_only = 5; + + // Preferred source address for originating packets destined + // to this path + optional IPAddress preferred_source = 6; + + // This is a backup path. Without any other indication, this + // indicates a PIC edge backup path and will only be used if + // all the primary paths are unusable. + optional bool backup = 7; + + // Recursive via MPLS label. This indicates that the path can + // instead be resolved recursively via an MPLS label, instead + // of directly by nexthop, interface with a label stack. + optional uint32 via_mpls_label = 8; + + // The number of non-recursive labels, i.e. which can be + // replaced by via_mpls_label. Must be <= number of + // mpls_labels. + optional uint32 n_non_recursive_labels = 9; +} + +message Route { + enum PayloadType { + // For IP routes, n/a. For MPLS routes, either IPv4 or IPv6. + UNSPEC = 0; + IPV4 = 1; + IPV6 = 2; + } + + // Prefix address of the route. + // Currently expected to always be present. + optional IPAddressOrLabel prefix = 1; + + // The length of the prefix for IP routes + optional uint32 prefix_length = 2; + + // Table ID of the route. This ID encodes both VRF and PBR table IDs + // Default is RT_TABLE_MAIN + optional uint32 table_id = 3 [default = 254]; + + // The paths of the route. No paths is equivalent to a blackhole + repeated Path paths = 4; + + // Type of payload carried by this route. Currently applicable + // only for MPLS routes + optional PayloadType payload_type = 5 [default = UNSPEC]; + + // Scope of the route. Crude routing metric, but with higher + // number being more preferred and lower number being less + // preferred. Should be < 256. Default is RT_SCOPE_UNIVERSE. + optional uint32 scope = 6 [default = 0]; + + // Routing protocol, with well known values defined by + // linux/rtnetlink.h. Should be < 256. Default is RTPROT_ZEBRA. + optional uint32 routing_protocol = 7 [default = 11]; +} diff --git a/protobuf/SpeedConfig.proto b/protobuf/SpeedConfig.proto new file mode 100644 index 00000000..a27e9d24 --- /dev/null +++ b/protobuf/SpeedConfig.proto @@ -0,0 +1,27 @@ +// Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. +// +// SPDX-License-Identifier: LGPL-2.1-only +// +// cmd_speed protobuf definitions +// + +syntax="proto2"; + +option go_package = "github.com/danos/vyatta-dataplane/protobuf/go/SpeedConfig"; + +message SpeedConfig { + enum Duplex { + HALF = 0; + FULL = 1; + AUTO = -1; + } + + optional string ifname = 1; + + oneof speed { + uint32 numspeed = 2; + string auto = 3; + } + + optional Duplex duplex_option = 4; +} diff --git a/protobuf/SynceConfig.proto b/protobuf/SynceConfig.proto new file mode 100644 index 00000000..1e12ae51 --- /dev/null +++ b/protobuf/SynceConfig.proto @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: LGPL-2.1-only +// +// Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. +// +// SyncE Config Definitions +// + +syntax="proto2"; + +option go_package = "github.com/danos/vyatta-dataplane/protobuf/go/SynceConfig"; + +message SynceConfig { + enum Action { + SYNCE_ENABLE_INTF = 0; + SYNCE_DISABLE_INTF = 1; + SYNCE_SET_CLK_SRC = 2; + } + + optional Action action = 1; + + optional uint32 ifindex = 2; +} + diff --git a/protobuf/TCPMSSConfig.proto b/protobuf/TCPMSSConfig.proto index 2138e244..66dff1fc 100644 --- a/protobuf/TCPMSSConfig.proto +++ b/protobuf/TCPMSSConfig.proto @@ -1,4 +1,4 @@ -// Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. +// Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. // // SPDX-License-Identifier: LGPL-2.1-only @@ -6,6 +6,8 @@ syntax="proto2"; +option go_package = "github.com/danos/vyatta-dataplane/protobuf/go/TCPMSSConfig"; + message TCPMSSConfig { enum AddressFamily { TCP_MSS_V4 = 0; diff --git a/protobuf/VFPSetConfig.proto b/protobuf/VFPSetConfig.proto index 4254c154..5eba4255 100644 --- a/protobuf/VFPSetConfig.proto +++ b/protobuf/VFPSetConfig.proto @@ -7,6 +7,8 @@ syntax="proto2"; +option go_package = "github.com/danos/vyatta-dataplane/protobuf/go/VFPSetConfig"; + message VFPSetConfig { optional uint32 if_index = 1; optional string if_name = 2; diff --git a/protobuf/XConnectConfig.proto b/protobuf/XConnectConfig.proto index 8114d220..2a2a8382 100644 --- a/protobuf/XConnectConfig.proto +++ b/protobuf/XConnectConfig.proto @@ -1,4 +1,4 @@ -// Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. +// Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. // // SPDX-License-Identifier: LGPL-2.1-only // @@ -7,6 +7,8 @@ syntax="proto2"; +option go_package = "github.com/danos/vyatta-dataplane/protobuf/go/XConnectConfig"; + message XConnectConfig { enum CommandType { ADD = 0; diff --git a/protobuf/cpp_rl.proto b/protobuf/cpp_rl.proto index 2ed017cf..279945e0 100644 --- a/protobuf/cpp_rl.proto +++ b/protobuf/cpp_rl.proto @@ -1,4 +1,4 @@ -// Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. +// Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. // // SPDX-License-Identifier: LGPL-2.1-only // @@ -30,6 +30,8 @@ syntax="proto2"; package cpp_rl; +option go_package = "github.com/danos/vyatta-dataplane/protobuf/go/cpp_rl"; + // The CPP rate limiter message. message CPP_limiter { @@ -50,6 +52,8 @@ message CPP_limiter { CPP_ATTR_UDP = 11; CPP_ATTR_TCP = 12; CPP_ATTR_DEFAULT = 13; + CPP_ATTR_PIM = 14; + CPP_ATTR_IP_MULTICAST = 15; } // The CPP rate limiter attribute parameter message. diff --git a/protobuf/meson.build b/protobuf/meson.build new file mode 100644 index 00000000..ac984367 --- /dev/null +++ b/protobuf/meson.build @@ -0,0 +1,106 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + +protobuf_sources= [ + 'DataplaneEnvelope.proto', + 'XConnectConfig.proto', + 'PPPOEConfig.proto', + 'TCPMSSConfig.proto', + 'PipelineStatsConfig.proto', + 'CryptoPolicyConfig.proto', + 'IPAddress.proto', + 'VFPSetConfig.proto', + 'cpp_rl.proto', + 'IP6RedirectsConfig.proto', + 'SpeedConfig.proto', + 'GArpConfig.proto', + 'CgnatLogging.proto', + 'BreakoutConfig.proto', + 'IPAddressOrLabel.proto', + 'Route.proto', + 'RibUpdate.proto', + 'MacLimitConfig.proto', + 'PauseConfig.proto', + 'SynceConfig.proto', + 'LAGConfig.proto', + 'GPCConfig.proto', + 'FeatureAffinityConfig.proto', + 'NbrResConfig.proto', +] + +install_data(protobuf_sources, + install_dir: get_option('datadir') / meson.project_name() / 'protobuf' +) + +protoc = find_program('protoc') +perl_protobuf_generator = files('../scripts/vyatta-generate-pb-perl.pl') + +protobuf_generated_c = [] +protobuf_generated_c_headers = [] +protobuf_generated_cxx = [] +protobuf_generated_py = [] +protobuf_generated_go = [] +protobuf_generated_perl = [] +foreach protobuf_definition : protobuf_sources + + generated_c = custom_target('c_' + protobuf_definition, + command: [protoc, '--proto_path=@CURRENT_SOURCE_DIR@', '--c_out=@OUTDIR@', '@INPUT@'], + input: protobuf_definition, + output: ['@BASENAME@.pb-c.c', '@BASENAME@.pb-c.h'], + install: true, + install_dir: [false, get_option('includedir') / meson.project_name() / 'protobuf'] + ) + protobuf_generated_c += generated_c + protobuf_generated_c_headers += generated_c[1] + + protobuf_generated_cxx += custom_target('cxx_' + protobuf_definition, + command: [protoc, '--proto_path=@CURRENT_SOURCE_DIR@', '--cpp_out=@OUTDIR@', '@INPUT@'], + input: protobuf_definition, + output: ['@BASENAME@.pb.cc', '@BASENAME@.pb.h'], + install: true, + install_dir: [false, get_option('includedir') / meson.project_name() / 'protobuf'] + ) + + protobuf_generated_py += custom_target('py_' + protobuf_definition, + command: [protoc, '--proto_path=@CURRENT_SOURCE_DIR@', '--python_out=@OUTDIR@', '@INPUT@'], + input: protobuf_definition, + output: '@BASENAME@_pb2.py', + install: true, + install_dir: 'lib/python3/dist-packages/vyatta/proto' + ) + + protobuf_generated_go += custom_target('go_' + protobuf_definition, + command: [protoc, '--proto_path=@CURRENT_SOURCE_DIR@', '--go_out=paths=source_relative:@OUTDIR@', '@INPUT@'], + input: protobuf_definition, + output: '@BASENAME@.pb.go', + install: true, + install_dir: 'share/gocode/src/github.com/danos/vyatta-dataplane/protobuf/go/@0@'.format(protobuf_definition.split('.')[0]) + ) + + protobuf_generated_perl += custom_target('pl_' + protobuf_definition, + command: [perl_protobuf_generator, '@INPUT@', '@OUTDIR@', '@CURRENT_SOURCE_DIR@'], + input: protobuf_definition, + output: '@BASENAME@.pm', + install: true, + install_dir: 'share/perl5/vyatta/proto' + ) + +endforeach + +protobuf_generated_c_includes = include_directories('..') + +protobuf_generated_c_dependency = declare_dependency( + include_directories: protobuf_generated_c_includes, + sources: protobuf_generated_c_headers +) + +protobuf_cxx_library = shared_library( + 'vyatta-dataplane-proto', + sources: [protobuf_generated_cxx], + dependencies: [protobuf_dep], + install: true, + version: '1.0.0' +) + +pkg = import('pkgconfig') +pkg.generate(protobuf_cxx_library, subdirs: 'vyatta-dataplane/protobuf') diff --git a/scripts/checkpatch_wrapper.sh b/scripts/checkpatch_wrapper.sh index 224fa069..9574508a 100755 --- a/scripts/checkpatch_wrapper.sh +++ b/scripts/checkpatch_wrapper.sh @@ -11,9 +11,6 @@ usage () { TARGET="$1" # most likely "origin/master" SOURCE="$2" # most likely "bugfix/foo" or "feature/bar" -IGNORE_TYPES="FILE_PATH_CHANGES,LINE_SPACING,GIT_COMMIT_ID,SPLIT_STRING,\ -PREFER_PRINTF,SPDX_LICENSE_TAG" - MAX_CHANGED_LINES_ALLOWED=400 GIT_SOURCE_HASH=$(git rev-list -n1 "$SOURCE") @@ -47,10 +44,7 @@ do fi done -checkpatch.pl \ - --no-tree --no-signoff --show-types --emacs \ - --ignore "$IGNORE_TYPES" \ - --git "$GIT_MERGE_BASE..$GIT_SOURCE_HASH" +checkpatch.pl --git "$GIT_MERGE_BASE..$GIT_SOURCE_HASH" CHECKPATCH_EXIT_STATUS=$? if [ "$COMMIT_TOO_LARGE" = "true" ] || [ $CHECKPATCH_EXIT_STATUS -ne 0 ]; then diff --git a/scripts/pl_gen_fused b/scripts/pl_gen_fused index c3abdce0..8f31c38b 100755 --- a/scripts/pl_gen_fused +++ b/scripts/pl_gen_fused @@ -3,7 +3,7 @@ # Module: pl_gen_fused # # **** License **** -# Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. +# Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. # # SPDX-License-Identifier: LGPL-2.1-only # **** End License **** @@ -33,12 +33,14 @@ import io nodes = {} feats_for_feat_point = {} + def remove_quotes(string): """Remove leading and trailing double quotes from a string""" if not (string.startswith('"') and string.endswith('"')): raise RuntimeError('expected double quotes around string "{}"'.format(string)) return string[1:-1] + class NodeDecl: """ Parsed pipeline node declaration @@ -56,6 +58,7 @@ class NodeDecl: self.__references_self = False self.num_next = None self.feat_iterate = None + self.feat_type_find = None def set_handler(self, handler): self.handler = handler @@ -87,10 +90,15 @@ class NodeDecl: def set_feat_iterate(self, feat_iterate): self.feat_iterate = feat_iterate + def set_feat_type_find(self, feat_type_find): + self.feat_type_find = feat_type_find + @property def fused_no_dyn_feats_handler(self): if self.feat_iterate is not None: return self.handler.replace('_process', '_fused_no_dyn_feats') + if self.feat_type_find is not None: + return self.handler.replace('_process', '_fused_no_dyn_feats') return self.fused_handler @property @@ -118,7 +126,7 @@ class NodeDecl: def get_next_node(self, disp): """Returns the next node name corresponding to the given disposition""" next_node_name = self.next_nodes[disp] - if not ':' in next_node_name: + if ':' not in next_node_name: return self.domain + ':' + next_node_name return next_node_name @@ -128,6 +136,7 @@ class NodeDecl: if self.handler is None: raise RuntimeError('handler not set for node {}'.format(self.name)) + class FeatureDecl: """ Parsed pipeline feature declaration @@ -140,6 +149,8 @@ class FeatureDecl: self.visit_before = None self.__visit_after = None self.next_feature = None + self.feat_type = None + self.always_on = None def set_name(self, name): self.name = remove_quotes(name) @@ -159,6 +170,12 @@ class FeatureDecl: def set_visit_after(self, visit_after): self.__visit_after = remove_quotes(visit_after) + def set_feat_type(self, feat_type): + self.feat_type = feat_type + + def set_always_on(self, always_on): + self.always_on = always_on + @property def domain(self): return self.name[:self.name.find(':')] @@ -166,21 +183,21 @@ class FeatureDecl: @property def node_name(self): node_name = self.__node_name - if not ':' in node_name: + if ':' not in node_name: return self.domain + ':' + node_name return node_name @property def feature_point(self): feature_point = self.__feature_point - if not ':' in feature_point: + if ':' not in feature_point: return self.domain + ':' + feature_point return feature_point @property def visit_after(self): visit_after = self.__visit_after - if visit_after is not None and not ':' in visit_after: + if visit_after is not None and ':' not in visit_after: return self.domain + ':' + visit_after return visit_after @@ -194,6 +211,7 @@ class FeatureDecl: raise RuntimeError( 'feature_point not set for feature {}'.format(self.name)) + def parse_source_file(filename): """ Parse a node/feature source file @@ -235,7 +253,8 @@ def parse_source_file(filename): 'handler': parsing_node_decl.set_handler, 'type': parsing_node_decl.set_type, 'num_next': parsing_node_decl.set_num_next_sym, - 'feat_iterate': parsing_node_decl.set_feat_iterate, + 'feat_iterate': parsing_node_decl.set_feat_iterate, + 'feat_type_find': parsing_node_decl.set_feat_type_find, } field_start = line.find('.') if field_start < 0: @@ -254,14 +273,15 @@ def parse_source_file(filename): elif parsing_feature_decl is not None: if '};' in line: if args.debug: - print("{}: Feature '{}': node '{}', point '{}', id '{}', after '{}'".format( + print("{}: Feature '{}': node '{}', point '{}', id '{}', after '{}', type '{}'".format( filename, parsing_feature_decl.name, parsing_feature_decl.node_name, parsing_feature_decl.feature_point, parsing_feature_decl.id, - parsing_feature_decl.visit_after)) + parsing_feature_decl.visit_after, + parsing_feature_decl.feat_type)) parsing_feature_decl.validate() - if not parsing_feature_decl.feature_point in feats_for_feat_point: + if parsing_feature_decl.feature_point not in feats_for_feat_point: feats_for_feat_point[parsing_feature_decl.feature_point] = {} feats_for_feat_point[parsing_feature_decl.feature_point][parsing_feature_decl.name] = parsing_feature_decl parsing_feature_decl = None @@ -271,8 +291,10 @@ def parse_source_file(filename): 'node_name': parsing_feature_decl.set_node_name, 'feature_point': parsing_feature_decl.set_feature_point, 'visit_after': parsing_feature_decl.set_visit_after, - 'visit_before': parsing_feature_decl.set_visit_before, + 'visit_before': parsing_feature_decl.set_visit_before, 'id': parsing_feature_decl.set_id, + 'feat_type': parsing_feature_decl.set_feat_type, + 'always_on': parsing_feature_decl.set_always_on, } field_start = line.find('.') if field_start < 0: @@ -292,6 +314,7 @@ def parse_source_file(filename): elif 'PL_REGISTER_FEATURE(' in line: parsing_feature_decl = FeatureDecl() + def write_indent(f, indent_lvl, string): """Write to a file, indenting the string to the given level""" # Don't indent on empty lines @@ -299,7 +322,8 @@ def write_indent(f, indent_lvl, string): indent_lvl = 0 f.write('{}{}\n'.format('\t' * indent_lvl, string)) -def gen_invoke_fused_node(f, indent_lvl, from_feature_point, dyn_feats, node): + +def gen_invoke_fused_node(f, indent_lvl, from_feature_point, dyn_feats, from_case_feature, node): """ Generate the action of invoking a node @@ -342,32 +366,37 @@ def gen_invoke_fused_node(f, indent_lvl, from_feature_point, dyn_feats, node): if node.next_nodes: raise RuntimeError( 'continue node {} cannot have next nodes'.format(node.name)) - if not from_feature_point: + if not from_feature_point or from_case_feature: write_indent(f, indent_lvl, 'return true;') else: write_indent(f, indent_lvl, 'continue;') - return + return False elif node.node_type == 'PL_OUTPUT': if node.next_nodes: raise RuntimeError( 'output node {} cannot have next nodes'.format(node.name)) if dyn_feats: - write_indent(f, indent_lvl, '{}{}(pl_pkt);'.format(resp_assign, node.fused_handler)) + write_indent(f, indent_lvl, '{}{}(pl_pkt, NULL);'.format(resp_assign, node.fused_handler)) else: - write_indent(f, indent_lvl, '{}{}(pl_pkt);'.format(resp_assign, node.fused_no_dyn_feats_handler)) + write_indent(f, indent_lvl, '{}{}(pl_pkt, NULL);'.format(resp_assign, node.fused_no_dyn_feats_handler)) if from_feature_point: write_indent(f, indent_lvl, 'return false;') else: write_indent(f, indent_lvl, 'goto cleanup;') - return + return False elif node.node_type == 'PL_PROC': + if from_feature_point and not from_case_feature: + storage_ctx = "storage_ctx" + else: + storage_ctx = "NULL" + if node.references_self: write_indent(f, indent_lvl, 'do {') indent_lvl = indent_lvl + 1 if dyn_feats: - write_indent(f, indent_lvl, '{}{}(pl_pkt);'.format(resp_assign, node.fused_handler)) + write_indent(f, indent_lvl, '{}{}(pl_pkt, {});'.format(resp_assign, node.fused_handler, storage_ctx)) else: - write_indent(f, indent_lvl, '{}{}(pl_pkt);'.format(resp_assign, node.fused_no_dyn_feats_handler)) + write_indent(f, indent_lvl, '{}{}(pl_pkt, {});'.format(resp_assign, node.fused_no_dyn_feats_handler, storage_ctx)) else: raise RuntimeError( 'invalid node type: {} for node {}'.format(node.node_type, node.name)) @@ -386,8 +415,8 @@ def gen_invoke_fused_node(f, indent_lvl, from_feature_point, dyn_feats, node): write_indent(f, indent_lvl, '{}if (unlikely(resp == {})) {{'.format(else_str, disp)) else_str = '} else ' - r = gen_invoke_fused_node(f, indent_lvl + 1, from_feature_point, dyn_feats, nodes[next_node]) - if r == True: + r = gen_invoke_fused_node(f, indent_lvl + 1, from_feature_point, dyn_feats, from_case_feature, nodes[next_node]) + if r is True: ret = True if len(node.next_nodes) > 1: write_indent(f, indent_lvl, '}') @@ -395,18 +424,19 @@ def gen_invoke_fused_node(f, indent_lvl, from_feature_point, dyn_feats, node): indent_lvl = indent_lvl - 1 write_indent(f, indent_lvl, '}} while (unlikely(resp == {}));'.format(self_ref_disp)) write_indent(f, indent_lvl, '') - r = gen_invoke_fused_node(f, indent_lvl, from_feature_point, dyn_feats, nodes[node.get_next_node(node.default_disp)]) - if r == True: + r = gen_invoke_fused_node(f, indent_lvl, from_feature_point, dyn_feats, from_case_feature, nodes[node.get_next_node(node.default_disp)]) + if r is True: ret = True return ret + def gen_fused_graph(f, entry, dyn_feats): """Generate fused mode graph starting from the given entry point""" - if not entry in nodes: + if entry not in nodes: raise RuntimeError('Unknown entry-point node: {}'.format(entry)) node = nodes[entry] - write_indent(f, 0, 'inline __attribute__((always_inline)) bool') + write_indent(f, 0, 'ALWAYS_INLINE bool') if dyn_feats: write_indent(f, 0, 'pipeline_fused_{}(struct pl_packet *pl_pkt)'.format(node.c_name)) else: @@ -415,8 +445,8 @@ def gen_fused_graph(f, entry, dyn_feats): f_temp = io.StringIO() - ret = gen_invoke_fused_node(f_temp, 1, False, dyn_feats, node) - if (ret == True): + ret = gen_invoke_fused_node(f_temp, 1, False, dyn_feats, False, node) + if ret is True: write_indent(f, 1, 'int resp;') write_indent(f, 0, '') @@ -428,6 +458,95 @@ def gen_fused_graph(f, entry, dyn_feats): write_indent(f, 1, 'return false;') write_indent(f, 0, '}') + +def gen_fused_feature_invoke_by_case_find(f, node, feat_point, dyn_feats): + """ + Generate fused feature find functions for the given node. + + Finds the node based on the value without having to do an external lookup + """ + if feat_point in feats_for_feat_point: + features = dict(feats_for_feat_point[feat_point]) + else: + features = {} + + head_features = dict(features) + + write_indent(f, 0, 'ALWAYS_INLINE unsigned int') + if dyn_feats: + write_indent(f, 0, '{}_fused(unsigned int feat)'.format(node.feat_type_find)) + else: + write_indent(f, 0, '{}_fused_no_dyn_features(unsigned int feat)'.format(node.feat_type_find)) + + write_indent(f, 0, '{') + write_indent(f, 1, '') + write_indent(f, 1, 'switch (feat) {') + for feature in head_features.values(): + while True: + if feature.always_on: + write_indent(f, 1, 'case {}:'.format(feature.feat_type)) + write_indent(f, 2, 'return {};'.format(feature.id)) + if not feature.next_feature: + break + feature = feature.next_feature + write_indent(f, 1, 'default:') + write_indent(f, 2, 'break;') + write_indent(f, 1, '}') + if dyn_feats: + write_indent(f, 1, 'return {}(feat);'.format(node.feat_type_find)) + else: + write_indent(f, 1, 'return 0;') + write_indent(f, 0, '}') + + +def gen_fused_feature_invoke_by_case(f, node, feat_point, dyn_feats): + """ + Generate fused feature invocation for the given feature point + based on the feature type + + Only the registered feature for the type is invoked + """ + + if args.debug: + print("::{} {} {}".format(node, feat_point, dyn_feats)) + if feat_point in feats_for_feat_point: + features = dict(feats_for_feat_point[feat_point]) + else: + features = {} + + head_features = dict(features) + + write_indent(f, 0, 'ALWAYS_INLINE bool') + if dyn_feats: + write_indent(f, 0, 'pipeline_fused_{}_features(struct pl_packet *pl_pkt, unsigned int feat)'.format(node.c_name)) + else: + write_indent(f, 0, 'pipeline_fused_{}_no_dyn_features(struct pl_packet *pl_pkt, unsigned int feat)'.format(node.c_name)) + + write_indent(f, 0, '{') + write_indent(f, 1, 'int resp = true;') + + write_indent(f, 1, '') + write_indent(f, 1, 'switch (feat) {') + + write_indent(f, 1, 'case 0:') + write_indent(f, 2, 'return true;') + for feature in head_features.values(): + while True: + write_indent(f, 1, 'case {}:'.format(feature.id)) + gen_invoke_fused_node(f, 2, True, dyn_feats, True, nodes[feature.node_name]) + write_indent(f, 2, '') + if not feature.next_feature: + break + feature = feature.next_feature + write_indent(f, 1, 'default:') + write_indent(f, 2, 'if (!pl_node_invoke_feature({}_node_ptr, feat, pl_pkt, NULL))'.format(node.c_name)) + write_indent(f, 3, 'return false;') + write_indent(f, 2, 'break;') + write_indent(f, 1, '}') + write_indent(f, 1, 'return resp;') + write_indent(f, 0, '}') + + def gen_fused_features_invoke(f, feat_point, dyn_feats): """ Generate fused feature invocation for the given feature point @@ -435,16 +554,23 @@ def gen_fused_features_invoke(f, feat_point, dyn_feats): The features are listed in the order they get added to the feature point array """ - if not feat_point in nodes: + if feat_point not in nodes: raise RuntimeError('Unknown feature point node: {}'.format(feat_point)) node = nodes[feat_point] + + if node.feat_type_find: + gen_fused_feature_invoke_by_case(f, node, feat_point, dyn_feats) + gen_fused_feature_invoke_by_case_find(f, node, feat_point, dyn_feats) + return + if not node.feat_iterate: raise RuntimeError('Entry-point node not feature point: {}'.format(feat_point)) if feat_point in feats_for_feat_point: features = dict(feats_for_feat_point[feat_point]) else: features = {} - write_indent(f, 0, 'inline __attribute__((always_inline)) bool') + + write_indent(f, 0, 'ALWAYS_INLINE bool') if dyn_feats: write_indent(f, 0, 'pipeline_fused_{}_features(struct pl_packet *pl_pkt, struct pl_node *node)'.format(node.c_name)) else: @@ -452,18 +578,19 @@ def gen_fused_features_invoke(f, feat_point, dyn_feats): write_indent(f, 0, '{') write_indent(f, 1, 'unsigned int feature = ~0u;') write_indent(f, 1, 'void *context;') + write_indent(f, 1, 'void *storage_ctx = NULL;') write_indent(f, 1, 'bool more;') resp_written = False head_features = dict(features) for feature in features.values(): - if not feature.node_name in nodes: + if feature.node_name not in nodes: raise RuntimeError( "unknown node {} for feature {}".format(feature.node_name, feature.name)) if len(nodes[feature.node_name].next_nodes) > 1 and not resp_written: write_indent(f, 1, 'int resp;') resp_written = True if feature.visit_after: - if not feature.visit_after in features: + if feature.visit_after not in features: raise RuntimeError( "unknown visit after {} for feature {}".format(feature.visit_after, feature.name)) # We only support one feature referencing another feature @@ -474,23 +601,23 @@ def gen_fused_features_invoke(f, feat_point, dyn_feats): features[feature.visit_after].next_feature = feature del head_features[feature.name] write_indent(f, 1, '') - write_indent(f, 1, 'for (more = {}(node, true, &feature, &context);'.format(node.feat_iterate)) + write_indent(f, 1, 'for (more = {}(node, true, &feature, &context, &storage_ctx);'.format(node.feat_iterate)) write_indent(f, 1, ' more;') - write_indent(f, 1, ' more = {}(node, false, &feature, &context)) {{'.format(node.feat_iterate)) + write_indent(f, 1, ' more = {}(node, false, &feature, &context, &storage_ctx)) {{'.format(node.feat_iterate)) write_indent(f, 2, 'switch (feature) {') for feature in head_features.values(): while True: if feature.id is None: continue write_indent(f, 2, 'case {}:'.format(feature.id)) - gen_invoke_fused_node(f, 3, True, dyn_feats, nodes[feature.node_name]) + gen_invoke_fused_node(f, 3, True, dyn_feats, False, nodes[feature.node_name]) write_indent(f, 3, '') if not feature.next_feature: break feature = feature.next_feature if dyn_feats: write_indent(f, 2, 'default:') - write_indent(f, 3, 'if (!pl_node_invoke_feature({}_node_ptr, feature, pl_pkt))'.format(node.c_name)) + write_indent(f, 3, 'if (!pl_node_invoke_feature({}_node_ptr, feature, pl_pkt, storage_ctx))'.format(node.c_name)) write_indent(f, 4, 'return false;') write_indent(f, 3, 'continue;') write_indent(f, 2, '}') @@ -499,6 +626,7 @@ def gen_fused_features_invoke(f, feat_point, dyn_feats): write_indent(f, 1, 'return true;') write_indent(f, 0, '}') + def gen_preamble(f): """Write out preamble comment for generated source and header files""" f.write('/*\n') @@ -508,11 +636,13 @@ def gen_preamble(f): f.write(' * {}\n'.format(filename)) f.write(' */\n') + def gen_fused_impl(f, includes, entry_points, feat_points): """Generate fused implementation source file""" gen_preamble(f) f.write('#include \n') f.write('#include \n') + f.write('#include "compiler.h"\n') if includes: for include in includes: f.write('#include "{}"\n'.format(include)) @@ -535,9 +665,14 @@ def gen_fused_impl(f, includes, entry_points, feat_points): for node_name in sorted(nodes.keys()): node = nodes[node_name] write_indent(f, 1, 'if (strcmp(node->name, "{}") == 0) {{'.format(node.name)) - write_indent(f, 2, 'node->node_decl_id = PL_NODE_{}_ID; return; }}'.format(node.c_name.upper())) + write_indent(f, 2, 'node->node_decl_id = PL_NODE_{}_ID;'.format(node.c_name.upper())) + if node.feat_iterate or node.feat_type_find: + write_indent(f, 2, 'node->feature_point_id = PL_FEATURE_POINT_{}_ID; return; }}'.format(node.c_name.upper())) + else: + write_indent(f, 2, 'node->feature_point_id = PL_FEATURE_POINT_NONE_ID; return; }') f.write('}\n') + def gen_node_disps(f, node): """ Generate disposition enum for nodes @@ -553,6 +688,7 @@ def gen_node_disps(f, node): write_indent(f, 0, '};') write_indent(f, 0, '') + def gen_node_fused_func_decls(f): """ Generate node fused processing function declaration and feature @@ -563,34 +699,40 @@ def gen_node_fused_func_decls(f): for node_name in sorted(nodes.keys()): node = nodes[node_name] gen_node_disps(f, node) - write_indent(f, 0, 'extern unsigned int {}(struct pl_packet *);'.format(node.handler)); - if node.feat_iterate is not None: + write_indent(f, 0, 'extern unsigned int {}(struct pl_packet *, void *context);'.format(node.handler)) + if node.feat_iterate is not None or node.feat_type_find is not None: write_indent(f, 0, '') - write_indent(f, 0, 'extern unsigned int {}_common(struct pl_packet *, enum pl_mode);'.format(node.handler)); - write_indent(f, 0, 'inline static __attribute__((always_inline)) unsigned int') - write_indent(f, 0, '{}(struct pl_packet *pl_pkt)'.format(node.fused_handler)) + write_indent(f, 0, 'extern unsigned int {}_common(struct pl_packet *, void *context __unused, enum pl_mode);'.format(node.handler)) + write_indent(f, 0, 'static ALWAYS_INLINE unsigned int') + write_indent(f, 0, '{}(struct pl_packet *pl_pkt, void *context)'.format(node.fused_handler)) write_indent(f, 0, '{') write_indent(f, 1, 'pl_inc_node_stat(PL_NODE_{}_ID);'.format(node.c_name.upper())) - write_indent(f, 1, 'return {}_common(pl_pkt, PL_MODE_FUSED);'.format(node.handler)) + write_indent(f, 1, 'return {}_common(pl_pkt, context, PL_MODE_FUSED);'.format(node.handler)) write_indent(f, 0, '}') write_indent(f, 0, '') - write_indent(f, 0, 'inline static __attribute__((always_inline)) unsigned int') - write_indent(f, 0, '{}(struct pl_packet *pl_pkt)'.format(node.fused_no_dyn_feats_handler)) + write_indent(f, 0, 'static ALWAYS_INLINE unsigned int') + write_indent(f, 0, '{}(struct pl_packet *pl_pkt, void *context)'.format(node.fused_no_dyn_feats_handler)) write_indent(f, 0, '{') write_indent(f, 1, 'pl_inc_node_stat(PL_NODE_{}_ID);'.format(node.c_name.upper())) - write_indent(f, 1, 'return {}_common(pl_pkt, PL_MODE_FUSED_NO_DYN_FEATS);'.format(node.handler)) + write_indent(f, 1, 'return {}_common(pl_pkt, context, PL_MODE_FUSED_NO_DYN_FEATS);'.format(node.handler)) write_indent(f, 0, '}') write_indent(f, 0, '') - write_indent(f, 0, 'bool') - write_indent(f, 0, '{}(struct pl_node *node, bool first, unsigned int *feature_id, void **context);'.format(node.feat_iterate)) + if node.feat_iterate is not None: + write_indent(f, 0, 'bool') + write_indent(f, 0, '{}(struct pl_node *node, bool first, unsigned int *feature_id, void **context, void **storage_ctx);'.format(node.feat_iterate)) + elif node.feat_type_find is not None: + write_indent(f, 0, 'int') + write_indent(f, 0, '{}(uint32_t feat_type);'.format(node.feat_type_find)) + write_indent(f, 0, 'unsigned int {}_fused(unsigned int feat);'.format(node.feat_type_find)) + write_indent(f, 0, 'unsigned int {}_fused_no_dyn_features(unsigned int feat);'.format(node.feat_type_find)) else: - write_indent(f, 0, 'inline static __attribute__((always_inline)) unsigned int') - write_indent(f, 0, '{}(struct pl_packet *pl_pkt)'.format(node.fused_handler)) + write_indent(f, 0, 'static ALWAYS_INLINE unsigned int') + write_indent(f, 0, '{}(struct pl_packet *pl_pkt, void *context)'.format(node.fused_handler)) write_indent(f, 0, '{') write_indent(f, 1, 'pl_inc_node_stat(PL_NODE_{}_ID);'.format(node.c_name.upper())) - write_indent(f, 1, 'return {}(pl_pkt);'.format(node.handler)) + write_indent(f, 1, 'return {}(pl_pkt, context);'.format(node.handler)) write_indent(f, 0, '}') - write_indent(f, 0, '') + def gen_fused_header(f, c_file_name, entry_points, feat_points): """Generate fused header file""" @@ -599,6 +741,7 @@ def gen_fused_header(f, c_file_name, entry_points, feat_points): f.write('#ifndef __{}__\n'.format(c_file_name)) f.write('#define __{}__\n'.format(c_file_name)) f.write('\n') + f.write('#include "compiler.h"\n') f.write('#include "pl_common.h"\n') f.write('#include "pl_internal.h"\n') f.write('#include "util.h"\n') @@ -611,13 +754,22 @@ def gen_fused_header(f, c_file_name, entry_points, feat_points): for node_name in sorted(nodes.keys()): node = nodes[node_name] write_indent(f, 1, 'PL_NODE_{}_ID,'.format(node.c_name.upper())) - f.write('PL_NODE_NUM_IDS};\n'); + f.write('PL_NODE_NUM_IDS};\n') + + f.write('\n') + f.write('enum pl_feature_point_id {\n') + write_indent(f, 1, 'PL_FEATURE_POINT_NONE_ID,') + for node_name in sorted(nodes.keys()): + node = nodes[node_name] + if node.feat_iterate or node.feat_type_find: + write_indent(f, 1, 'PL_FEATURE_POINT_{}_ID,'.format(node.c_name.upper())) + f.write('PL_FEATURE_POINT_NUM_IDS};\n') gen_node_fused_func_decls(f) write_indent(f, 0, '/* Fused-mode graph entry points */') if entry_points is not None: for entry in entry_points: - if not entry in nodes: + if entry not in nodes: raise RuntimeError( 'Unknown entry-point node: {}'.format(entry)) node = nodes[entry] @@ -627,35 +779,43 @@ def gen_fused_header(f, c_file_name, entry_points, feat_points): write_indent(f, 0, '/* Fused-mode feature invocations */') if feat_points is not None: for feat_point in feat_points: - if not feat_point in nodes: + if feat_point not in nodes: raise RuntimeError( 'Unknown feature point node: {}'.format(feat_point)) node = nodes[feat_point] - if not node.feat_iterate: - raise RuntimeError( - 'Feature point {} not declared as feature point node {}'.format(feat_point)) - f.write('bool\n') - f.write('pipeline_fused_{}_features(struct pl_packet *pl_pkt, struct pl_node *node);\n'.format(node.c_name)) - f.write('bool\n') - f.write('pipeline_fused_{}_no_dyn_features(struct pl_packet *pl_pkt, struct pl_node *node);\n'.format(node.c_name)) + if not node.feat_type_find: + if not node.feat_iterate: + raise RuntimeError( + 'Feature point {} not declared as feature point node'.format(feat_point)) + f.write('bool\n') + f.write('pipeline_fused_{}_features(struct pl_packet *pl_pkt, struct pl_node *node);\n'.format(node.c_name)) + f.write('bool\n') + f.write('pipeline_fused_{}_no_dyn_features(struct pl_packet *pl_pkt, struct pl_node *node);\n'.format(node.c_name)) + else: + f.write('bool\n') + f.write('pipeline_fused_{}_features(struct pl_packet *pl_pkt, unsigned int feat);\n'.format(node.c_name)) + f.write('bool\n') + f.write('pipeline_fused_{}_no_dyn_features(struct pl_packet *pl_pkt, unsigned int feat);\n'.format(node.c_name)) + f.write('\n') f.write('#endif /* __{}__ */\n'.format(c_file_name)) -arg_parser = argparse.ArgumentParser(description = 'Generate pipeline fused mode files') + +arg_parser = argparse.ArgumentParser(description='Generate pipeline fused mode files') arg_parser.add_argument('--debug', action='store_true', - help = 'Enable printing of debugging information') + help='Enable printing of debugging information') arg_parser.add_argument('--entry', action='append', - help = 'Generate function as an entry point into a fused graph') -arg_parser.add_argument('--feature-point', action = 'append', - help = 'Generate function for invoking fused features on a node') + help='Generate function as an entry point into a fused graph') +arg_parser.add_argument('--feature-point', action='append', + help='Generate function for invoking fused features on a node') arg_parser.add_argument('source_files', nargs='+', metavar='source-file', - help='a source file containing node or feature declarations') + help='a source file containing node or feature declarations') arg_parser.add_argument('--impl-out', action='store', - help = 'File to output generated fused implementation to') + help='File to output generated fused implementation to') arg_parser.add_argument('--include', action='append', - help = 'Name of header to include in implementation') + help='Name of header to include in implementation') arg_parser.add_argument('--header-out', action='store', - help = 'File to output generated fused header to') + help='File to output generated fused header to') args = arg_parser.parse_args() for filename in args.source_files: diff --git a/scripts/run-clang-tidy_wrapper.sh b/scripts/run-clang-tidy_wrapper.sh new file mode 100755 index 00000000..bd0a9565 --- /dev/null +++ b/scripts/run-clang-tidy_wrapper.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +# Workaround for: https://github.com/mesonbuild/meson/pull/8365 +sed -i 's/-Xclang -fcolor-diagnostics/-fcolor-diagnostics/g' \ + "${MESON_BUILD_ROOT}/compile_commands.json" + +run-clang-tidy -quiet -j "$(nproc)" -p "${MESON_BUILD_ROOT}" \ + -header-filter='^((?!\.pb).)*\.h$' \ + '.*(?d_addr, 0xff, ETHER_ADDR_LEN); - ether_addr_copy(&ifp->eth_addr, &eh->s_addr); - eh->ether_type = htons(ETHER_TYPE_ARP); + memset(&eh->d_addr, 0xff, RTE_ETHER_ADDR_LEN); + rte_ether_addr_copy(&ifp->eth_addr, &eh->s_addr); + eh->ether_type = htons(RTE_ETHER_TYPE_ARP); ah = (struct ether_arp *) (eh+1); ah->arp_hrd = htons(ARPHRD_ETHER); - ah->arp_pro = htons(ETHER_TYPE_IPv4); - ah->arp_hln = ETHER_ADDR_LEN; /* hardware address length */ + ah->arp_pro = htons(RTE_ETHER_TYPE_IPV4); + ah->arp_hln = RTE_ETHER_ADDR_LEN; /* hardware address length */ ah->arp_pln = sizeof(in_addr_t); /* protocol address length */ ah->arp_op = htons(ARPOP_REQUEST); - ether_addr_copy(&ifp->eth_addr, (struct ether_addr *) ah->arp_sha); + rte_ether_addr_copy(&ifp->eth_addr, + (struct rte_ether_addr *) ah->arp_sha); memcpy(ah->arp_spa, &sip, sizeof(sip)); - memset(ah->arp_tha, 0, ETHER_ADDR_LEN); + memset(ah->arp_tha, 0, RTE_ETHER_ADDR_LEN); memcpy(ah->arp_tpa, &tip, sizeof(tip)); ARPSTAT_INC(if_vrfid(ifp), txrequests); @@ -229,17 +241,18 @@ arprequest(struct ifnet *ifp, struct sockaddr *sa) * this request so error code doesn't really matter. */ int arpresolve(struct ifnet *ifp, struct rte_mbuf *m, - in_addr_t addr, struct ether_addr *desten) + in_addr_t addr, struct rte_ether_addr *desten) { struct llentry *la; +lookup: la = in_lltable_find(ifp, addr); /* resolved now */ if (likely(la && (la->la_flags & LLE_VALID))) { resolved: rte_atomic16_clear(&la->ll_idle); - ether_addr_copy(&la->ll_addr, desten); + rte_ether_addr_copy(&la->ll_addr, desten); return 0; } @@ -247,10 +260,8 @@ int arpresolve(struct ifnet *ifp, struct rte_mbuf *m, if (la == NULL) { la = in_lltable_lookup(ifp, LLE_CREATE|LLE_LOCAL, addr); - /* out of memory */ + /* out of memory or cache limit hit */ if (unlikely(la == NULL)) { - RTE_LOG(NOTICE, ARP, - "lltable_lookup create failed\n"); rte_pktmbuf_free(m); return -ENOMEM; } @@ -263,6 +274,15 @@ int arpresolve(struct ifnet *ifp, struct rte_mbuf *m, /* Lock entry to hold off update and timer */ rte_spinlock_lock(&la->ll_lock); + /* + * Whilst waiting for the spin lock, has the main thread + * snuck in and deleted the entry? + */ + if (unlikely(la->la_flags & LLE_DELETED)) { + rte_spinlock_unlock(&la->ll_lock); + goto lookup; + } + /* create lost race with lladdr_update */ if (unlikely(la->la_flags & LLE_VALID)) { rte_spinlock_unlock(&la->ll_lock); @@ -297,7 +317,7 @@ int arpresolve(struct ifnet *ifp, struct rte_mbuf *m, m = arprequest(ifp, (struct sockaddr *) &taddr); if (m) - if_output(ifp, m, NULL, ETHER_TYPE_ARP); + if_output(ifp, m, NULL, RTE_ETHER_TYPE_ARP); } return -EWOULDBLOCK; @@ -306,7 +326,7 @@ int arpresolve(struct ifnet *ifp, struct rte_mbuf *m, /* Optimized inline version of arpresolve. */ ALWAYS_INLINE int arpresolve_fast(struct ifnet *ifp, struct rte_mbuf *m, - in_addr_t addr, struct ether_addr *desten) + in_addr_t addr, struct rte_ether_addr *desten) { struct llentry *la = in_lltable_find(ifp, addr); @@ -318,13 +338,13 @@ arpresolve_fast(struct ifnet *ifp, struct rte_mbuf *m, bool arp_input_validate(const struct ifnet *ifp, struct rte_mbuf *m) { - struct ether_hdr *eh; + struct rte_ether_hdr *eh; struct ether_arp *ah; in_addr_t itaddr, isaddr; uint16_t op; char addrb[INET_ADDRSTRLEN]; - eh = rte_pktmbuf_mtod(m, struct ether_hdr *); + eh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); if (rte_pktmbuf_data_len(m) < sizeof(*eh) + sizeof(*ah)) { ARP_DEBUG("runt packet len %u\n", rte_pktmbuf_data_len(m)); goto drop; @@ -332,7 +352,7 @@ bool arp_input_validate(const struct ifnet *ifp, struct rte_mbuf *m) ah = (struct ether_arp *) (eh + 1); if (ah->arp_hrd != htons(ARPHRD_ETHER) || - ah->arp_pro != htons(ETHER_TYPE_IPv4)) { + ah->arp_pro != htons(RTE_ETHER_TYPE_IPV4)) { ARP_DEBUG("ignore arp for hrd %#x protocol %#x\n", ntohs(ah->arp_hrd), ntohs(ah->arp_pro)); goto drop; @@ -344,12 +364,13 @@ bool arp_input_validate(const struct ifnet *ifp, struct rte_mbuf *m) goto drop; } - if (is_multicast_ether_addr((struct ether_addr *) ah->arp_sha)) { + if (rte_is_multicast_ether_addr( + (struct rte_ether_addr *) ah->arp_sha)) { ARP_DEBUG("source hardware addresss is multicast.\n"); goto drop; } - if (is_zero_ether_addr((struct ether_addr *) ah->arp_sha)) { + if (rte_is_zero_ether_addr((struct rte_ether_addr *) ah->arp_sha)) { ARP_DEBUG("source hardware address is invalid.\n"); goto drop; } @@ -373,12 +394,13 @@ bool arp_input_validate(const struct ifnet *ifp, struct rte_mbuf *m) goto drop; } - if (ether_addr_equal((struct ether_addr *) ah->arp_sha, &ifp->eth_addr)) { + if (rte_ether_addr_equal((struct rte_ether_addr *) ah->arp_sha, + &ifp->eth_addr)) { ARP_DEBUG("saw own arp?"); goto drop; /* it's from me, ignore it. */ } - if (is_broadcast_ether_addr((struct ether_addr *)ah->arp_sha)) { + if (rte_is_broadcast_ether_addr((struct rte_ether_addr *)ah->arp_sha)) { ARP_DEBUG("link address is broadcast for IP address %s!\n", inet_ntop(AF_INET, ah->arp_spa, addrb, sizeof(addrb))); @@ -391,25 +413,8 @@ bool arp_input_validate(const struct ifnet *ifp, struct rte_mbuf *m) return false; } -bool arp_is_arp_reply(struct ifnet *ifp, struct rte_mbuf *m) -{ - struct ether_hdr *eh; - struct ether_arp *ah; - uint16_t op; - - if (!arp_input_validate(ifp, m)) - return false; - - eh = rte_pktmbuf_mtod(m, struct ether_hdr *); - ah = (struct ether_arp *) (eh + 1); - op = ntohs(ah->arp_op); - if (op == ARPOP_REPLY) - return true; - return false; -} - /* Walk the ARP table. - * Only called by console (master thread); + * Only called by console (main thread); * Can not be called safely from forwarding loop. */ void @@ -423,3 +428,113 @@ arp_walk(const struct ifnet *ifp, ll_walkhash_f_t *f, void *arg) (*f)(ifp, lle, arg); } } + +/* Must be called with lle->ll_lock held */ +void +arp_entry_destroy(struct lltable *llt, struct llentry *lle) +{ + unsigned int pkts_dropped; + + pkts_dropped = llentry_destroy(llt, lle); + ARPSTAT_ADD(if_vrfid(llt->llt_ifp), dropped, pkts_dropped); +} + +/* + * arp-cfg ARP {SET|DELETE} + */ +static int cmd_arp_cfg_handler(struct pb_msg *pbmsg) +{ + NbrResConfig *msg = nbr_res_config__unpack(NULL, pbmsg->msg_len, + pbmsg->msg); + uint32_t val; + char *ifname; + int ret = -1; + bool set; + + if (!msg) { + RTE_LOG(ERR, ARP, + "Cfg failed to read NbrResConfig protobuf cmd\n"); + return ret; + } + if (msg->prot != NBR_RES_CONFIG__PROT__ARP) { + RTE_LOG(ERR, ARP, + "Cfg incorrect protocol (%d)\n", msg->prot); + goto end; + } + ifname = msg->ifname; + if (ifname && (*ifname != '\0' && strncmp("all", ifname, 4) != 0)) { + RTE_LOG(ERR, ARP, + "Cfg per-interface config not yet supported\n"); + goto end; + } + set = msg->action == NBR_RES_CONFIG__ACTION__SET; + val = msg->value; + + switch (msg->param) { + case NBR_RES_CONFIG__PARAM__AGING_TIME: + /* + * While ARP does not have a reachable time (30s) before entries + * can go stale due to it not yet having a state machine, ensure + * that entries cannot be aged out in less than this time. + */ + if (set && val < ARP_REACHABLE_TIME) { + RTE_LOG(ERR, ARP, + "Cfg res token value %d out of range\n", val); + goto end; + } + arp_cfg.arp_aging_time = set ? val : ARPT_KEEP; + ARP_DEBUG("Cfg param arp_aging_time (arp timeout) set to: %d\n", + arp_cfg.arp_aging_time); + break; + case NBR_RES_CONFIG__PARAM__MAX_ENTRY: + /* + * Changes to cache size only impact subsequent resolutions. + * So if cache size is reduced to less than the number of + * entries for an interface, then the latter number decreases + * only as entries fail to re-resolve. + */ + if (set && (int)val <= 0) { + RTE_LOG(ERR, ARP, + "Cfg max entry value %d out of range\n", val); + goto end; + } + arp_cfg.arp_max_entry = set ? val : ARP_MAX_ENTRY; + ARP_DEBUG("Cfg param arp_max_entry (cache size) set to: %d\n", + arp_cfg.arp_max_entry); + break; + default: + RTE_LOG(ERR, ARP, + "Cfg parameter not supported (%d)\n", msg->param); + goto end; + } + + ret = 0; +end: + nbr_res_config__free_unpacked(msg, NULL); + return ret; +} + +PB_REGISTER_CMD(arp_cfg_cmd) = { + .cmd = "vyatta:arp", + .handler = cmd_arp_cfg_handler, +}; + +int cmd_arp_get_cfg(FILE *f) +{ + json_writer_t *wr = jsonw_new(f); + + if (!wr) { + RTE_LOG(NOTICE, DATAPLANE, + "arp: Error creating JSON object for cfg params\n"); + return -1; + } + + jsonw_pretty(wr, true); + + jsonw_uint_field(wr, "Aging time", arp_cfg.arp_aging_time); + jsonw_int_field(wr, "Max entries", arp_cfg.arp_max_entry); + + jsonw_destroy(&wr); + + return 0; +} diff --git a/src/arp.h b/src/arp.h index a0c426f2..94bb6169 100644 --- a/src/arp.h +++ b/src/arp.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -14,14 +14,25 @@ struct ifnet; struct rte_mbuf; -struct ether_addr; +struct rte_ether_addr; struct sockaddr; +struct lltable; +struct llentry; bool arp_input_validate(const struct ifnet *ifp, struct rte_mbuf *m); int arpresolve(struct ifnet *ifp, struct rte_mbuf *m, - in_addr_t addr, struct ether_addr *desten); + in_addr_t addr, struct rte_ether_addr *desten); int arpresolve_fast(struct ifnet *ifp, struct rte_mbuf *m, - in_addr_t addr, struct ether_addr *desten); + in_addr_t addr, struct rte_ether_addr *desten); + +struct arp_nbr_cfg { + uint32_t arp_aging_time; + int32_t arp_max_entry; +}; + +extern struct arp_nbr_cfg arp_cfg; + +#define ARP_CFG(param) (arp_cfg.param) struct arp_stats { uint64_t txrequests; /* # of ARP requests sent by this host. */ @@ -40,6 +51,8 @@ struct arp_stats { uint64_t garp_reqs_dropped; /* # of GARP requests dropped */ uint64_t garp_reps_dropped; /* # of GARP replies dropped */ uint64_t mpoolfail; /* Memory pool limit hit */ + uint64_t memfail; /* Out of memory hit */ + uint64_t tablimit; /* Cache limit hit */ }; #define ARPSTAT_ADD(vrf_id, name, val) \ @@ -56,6 +69,7 @@ typedef void ll_walkhash_f_t(const struct ifnet *, struct llentry *, void *); void arp_walk(const struct ifnet *, ll_walkhash_f_t *, void *); struct rte_mbuf *arprequest(struct ifnet *ifp, struct sockaddr *sa); -bool arp_is_arp_reply(struct ifnet *ifp, struct rte_mbuf *m); +void arp_entry_destroy(struct lltable *llt, struct llentry *lle); +int cmd_arp_get_cfg(FILE *f); #endif /* ARP_H */ diff --git a/src/arp_cfg.h b/src/arp_cfg.h index 43b7cb6c..37a8c7bd 100644 --- a/src/arp_cfg.h +++ b/src/arp_cfg.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ diff --git a/src/backplane.c b/src/backplane.c index 5d892c8d..be777056 100644 --- a/src/backplane.c +++ b/src/backplane.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. + * Copyright (c) 2019-2020, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -11,15 +11,13 @@ #include #include #include -#ifdef HAVE_RTE_BUS_PCI_H #include -#endif #include #include "backplane.h" -#include "bridge.h" #include "control.h" #include "dp_event.h" #include "fal.h" +#include "if/bridge/bridge.h" #include "if_var.h" #include "json_writer.h" #include "vplane_log.h" @@ -46,15 +44,11 @@ static int backplane_port_get_index_and_name(uint16_t dpdk_port, int *index, unsigned int i; rte_eth_dev_info_get(dpdk_port, &dev); -#ifdef HAVE_RTE_ETH_DEV_INFO_DEVICE const struct rte_bus *bus = rte_bus_find_by_device(dev.device); struct rte_pci_device *pci = NULL; if (bus && streq(bus->name, "pci")) pci = RTE_DEV_TO_PCI(dev.device); -#else - const struct rte_pci_device *pci = dev.pci_dev; -#endif if (!pci) return -ENOENT; @@ -107,7 +101,7 @@ static int backplane_cache_ifindex(struct ifnet *ifp) } static void -backplane_event_if_index_set(struct ifnet *ifp, uint32_t ifindex __unused) +backplane_event_if_index_set(struct ifnet *ifp) { struct cfg_if_list_entry *le, *tmp_le; @@ -119,15 +113,15 @@ backplane_event_if_index_set(struct ifnet *ifp, uint32_t ifindex __unused) return; cds_list_for_each_entry_safe(le, tmp_le, &bp_cfg_list->if_list, le_node) { - ifp = ifnet_byifname(le->le_ifname); + ifp = dp_ifnet_byifname(le->le_ifname); if (!ifp) continue; RTE_LOG(INFO, BACKPLANE, "Replaying backplane command %s for interface %s\n", le->le_buf, ifp->if_name); - cfg_if_list_del(bp_cfg_list, ifp->if_name); cmd_backplane_cfg(NULL, le->le_argc, le->le_argv); + cfg_if_list_del(bp_cfg_list, ifp->if_name); } backplane_replay_destroy(); return; @@ -143,8 +137,8 @@ backplane_event_if_index_set(struct ifnet *ifp, uint32_t ifindex __unused) "Replaying backplane command %s for interface %s\n", le->le_buf, ifp->if_name); - cfg_if_list_del(bp_cfg_list, ifp->if_name); cmd_backplane_cfg(NULL, le->le_argc, le->le_argv); + cfg_if_list_del(bp_cfg_list, ifp->if_name); backplane_replay_destroy(); } @@ -189,19 +183,23 @@ int cmd_backplane_cfg(FILE *f, int argc, char **argv) struct ifnet *ifp, *bp_ifp; int rv; - if (argc != 4) { + if (argc != 4 && f) { fprintf(f, "\nInvalid command : "); for (int i = 0; i < argc; i++) fprintf(f, "%s ", argv[i]); goto error; } - if (strcmp(argv[1], "SET")) + if (strcmp(argv[1], "SET") != 0) goto error; - ifp = ifnet_byifname(argv[2]); - bp_ifp = ifnet_byifname(argv[3]); + ifp = dp_ifnet_byifname(argv[2]); + bp_ifp = dp_ifnet_byifname(argv[3]); if (!ifp || !bp_ifp) { + /* + * Need out of order checks as the controller does not cater + * for 2 different interfaces when guaranteeing ordering. + */ if (!bp_cfg_list && backplane_replay_init()) { RTE_LOG(ERR, BACKPLANE, "Could not set up command replay cache\n"); @@ -240,14 +238,18 @@ int cmd_backplane_cfg(FILE *f, int argc, char **argv) return 0; error: - fprintf(f, "Usage: backplane SET \n"); + if (f) + fprintf(f, "Usage: backplane SET \n"); + else + RTE_LOG(ERR, BACKPLANE, + "Usage: backplane SET \n"); return -EINVAL; } static void backplane_show(json_writer_t *wr, unsigned int i) { struct ifnet *ifp; - struct if_link_status link; + struct dp_ifnet_link_status link; jsonw_start_object(wr); jsonw_name(wr, "pci_address"); @@ -258,9 +260,9 @@ static void backplane_show(json_writer_t *wr, unsigned int i) jsonw_uint_field(wr, "function", bp_intfs[i].pci_addr.function); jsonw_end_object(wr); jsonw_uint_field(wr, "ifindex", bp_intfs[i].ifindex); - ifp = ifnet_byifindex(bp_intfs[i].ifindex); + ifp = dp_ifnet_byifindex(bp_intfs[i].ifindex); if (ifp) { - if_get_link_status(ifp, &link); + dp_ifnet_link_status(ifp, &link); jsonw_string_field(wr, "name", ifp->if_name); jsonw_string_field(wr, "link_state", @@ -282,13 +284,13 @@ int cmd_backplane_op(FILE *f, int argc, char **argv) goto usage; } - if (strcmp(argv[1], "show")) { + if (strcmp(argv[1], "show") != 0) { rv = -EINVAL; goto usage; } if (argc == 3) { - ifp = ifnet_byifname(argv[2]); + ifp = dp_ifnet_byifname(argv[2]); if (!ifp) { fprintf(f, "Could not find backplane interface %s\n", argv[2]); diff --git a/src/backplane.h b/src/backplane.h index 881b507f..b82975f1 100644 --- a/src/backplane.h +++ b/src/backplane.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -7,7 +7,7 @@ #ifndef BACKPLANE_H #define BACKPLANE_H -#include "config.h" +#include "config_internal.h" int backplane_init(struct pci_list *bp_list); int cmd_backplane_cfg(FILE *f, int argc, char **argv); diff --git a/src/bitmask.h b/src/bitmask.h index 4fcfad60..4058bc9f 100644 --- a/src/bitmask.h +++ b/src/bitmask.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. + * Copyright (c) 2017-2020, AT&T Intellectual Property. * All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. @@ -22,6 +22,10 @@ #include "urcu.h" +#ifndef BITS_PER_BYTE +#define BITS_PER_BYTE 8 +#endif + #ifndef howmany #define howmany(x, y) (((x) + ((y) - 1)) / (y)) #endif @@ -31,6 +35,7 @@ #endif #define BITMASK_SZ howmany(BITMASK_BITS, UINT64_BIT) #define BITMASK_STRSZ ((BITMASK_SZ * 16) + 1) +#define BITMASK_BYTESZ (BITMASK_BITS / BITS_PER_BYTE) struct bitmask { uint64_t _bits[BITMASK_SZ]; @@ -106,7 +111,18 @@ static inline void bitmask_copy(bitmask_t *a, CMM_LOAD_SHARED(b->_bits[pos])); } +static inline bool bitmask_equal(const bitmask_t *a, + const bitmask_t *b) +{ + for (unsigned int pos = 0; pos < BITMASK_SZ; pos++) + if (CMM_LOAD_SHARED(a->_bits[pos]) != + CMM_LOAD_SHARED(b->_bits[pos])) + return false; + return true; +} + int bitmask_parse(bitmask_t *msk, const char *str); void bitmask_sprint(const bitmask_t *msk, char *buf, size_t sz); +int bitmask_parse_bytes(bitmask_t *mask, const uint8_t *bytes, uint8_t len); #endif /* BITMASK_H */ diff --git a/src/bpf_filter.c b/src/bpf_filter.c index 8bb35ad4..d4f304b7 100644 --- a/src/bpf_filter.c +++ b/src/bpf_filter.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. */ /*- * SPDX-License-Identifier: (LGPL-2.1-only AND BSD-3-Clause) @@ -38,12 +38,9 @@ * * @(#)bpf_filter.c 8.1 (Berkeley) 6/10/93 */ - +#include #include -#if !defined(_KERNEL) -#include -#endif #if !defined(_KERNEL) || defined(sun) #include #endif @@ -57,14 +54,14 @@ #define EXTRACT_LONG(p) (ntohl(*(u_int32_t *)p)) #else #define EXTRACT_SHORT(p)\ - ((u_int16_t)\ - ((u_int16_t)*((u_char *)p+0)<<8|\ - (u_int16_t)*((u_char *)p+1)<<0)) + ((u_int16_t) \ + ((u_int16_t)*((u_char *)(p)+0)<<8| \ + (u_int16_t)*((u_char *)(p)+1)<<0)) #define EXTRACT_LONG(p)\ - ((u_int32_t)*((u_char *)p+0)<<24|\ - (u_int32_t)*((u_char *)p+1)<<16|\ - (u_int32_t)*((u_char *)p+2)<<8|\ - (u_int32_t)*((u_char *)p+3)<<0) + ((u_int32_t)*((u_char *)(p)+0)<<24| \ + (u_int32_t)*((u_char *)(p)+1)<<16| \ + (u_int32_t)*((u_char *)(p)+2)<<8| \ + (u_int32_t)*((u_char *)(p)+3)<<0) #endif #ifdef _KERNEL @@ -180,9 +177,7 @@ bpf_filter(const struct bpf_insn *pc, const u_char *p, u_int wirelen, u_int bufl { u_int32_t A = 0, X = 0; bpf_u_int32 k; - u_int32_t mem[BPF_MEMWORDS]; - - bzero(mem, sizeof(mem)); + u_int32_t mem[BPF_MEMWORDS] = { 0 }; if (pc == NULL) /* diff --git a/src/bridge_vlan_set.c b/src/bridge_vlan_set.c index 2a8fba92..179553c8 100644 --- a/src/bridge_vlan_set.c +++ b/src/bridge_vlan_set.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, AT&T Intellectual Property. + * Copyright (c) 2017,2019-2020, AT&T Intellectual Property. * All rights reserved. * Copyright (c) 2017 by Brocade Communications Systems, Inc. * All rights reserved. @@ -12,7 +12,7 @@ #include #include "compiler.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "util.h" /* @@ -25,7 +25,8 @@ struct bridge_vlan_set { struct rte_bitmap *map; }; -struct bridge_vlan_set __externally_visible * +__FOR_EXPORT +struct bridge_vlan_set * bridge_vlan_set_create(void) { struct bridge_vlan_set *set = zmalloc_aligned(sizeof(*set)); @@ -41,26 +42,30 @@ bridge_vlan_set_create(void) return set; } -void __externally_visible +__FOR_EXPORT +void bridge_vlan_set_free(struct bridge_vlan_set *set) { free(set->store); free(set); } -void __externally_visible +__FOR_EXPORT +void bridge_vlan_set_add(struct bridge_vlan_set *set, uint16_t vlan) { rte_bitmap_set(set->map, vlan); } -void __externally_visible +__FOR_EXPORT +void bridge_vlan_set_remove(struct bridge_vlan_set *set, uint16_t vlan) { rte_bitmap_clear(set->map, vlan); } -bool __externally_visible +__FOR_EXPORT +bool bridge_vlan_set_is_member(struct bridge_vlan_set *set, uint16_t vlan) { if (vlan > VLAN_N_VID) @@ -68,13 +73,28 @@ bridge_vlan_set_is_member(struct bridge_vlan_set *set, uint16_t vlan) return rte_bitmap_get(set->map, vlan); } -void __externally_visible +__FOR_EXPORT +void bridge_vlan_set_clear(struct bridge_vlan_set *set) { rte_bitmap_reset(set->map); } -void __externally_visible +__FOR_EXPORT +bool +bridge_vlan_set_is_empty(struct bridge_vlan_set *set) +{ + uint32_t pos = 0; + uint64_t slab = 0; + + if (rte_bitmap_scan(set->map, &pos, &slab)) + return false; + + return true; +} + +__FOR_EXPORT +void bridge_vlan_set_synchronize(struct bridge_vlan_set *old, struct bridge_vlan_set *new, bridge_vlan_synchronize_cb add_cb, diff --git a/src/capture.c b/src/capture.c index d64bf2bd..daa0f89e 100644 --- a/src/capture.c +++ b/src/capture.c @@ -1,7 +1,7 @@ /* * Simple data capture output. * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -34,12 +33,15 @@ #include #include "capture.h" -#include "config.h" +#include "config_internal.h" +#include "event.h" +#include "fal.h" #include "if_var.h" #include "ip_addr.h" +#include "lcore_sched.h" #include "main.h" #include "pipeline/nodes/pl_nodes_common.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_node.h" #include "urcu.h" #include "util.h" @@ -60,6 +62,14 @@ static struct timeval capture_tod; static uint64_t capture_base; static uint64_t capture_hz; +static zsock_t *capture_sock_main; +static zsock_t *capture_sock_console; +static pthread_mutex_t capture_sock_lock = PTHREAD_MUTEX_INITIALIZER; + +typedef int (*fal_func_t)(void *arg); + +static int capture_main_send(fal_func_t func, void *arg); + static void capture_time_resync(struct timeval *tod, uint64_t *base, uint64_t *hz) { @@ -177,6 +187,33 @@ static void pcapin_response(zsock_t *sock, char *type, zmsg_t *recv_msg, zmsg_destroy(&recv_msg); } +/* + * Send response to "DROP STATS" request. + * Get the capture stats and increment the total drop count (software) + * with the received drop count in the hardware stats. + */ +static void drop_stats_response(struct capture_info *cap_info, zsock_t *sock) +{ + enum fal_capture_stat_type cntr_ids[] = { + FAL_CAPTURE_STAT_DROPPED_PACKETS, + }; + uint64_t cntrs[ARRAY_SIZE(cntr_ids)]; + int ret; + + uint64_t drops = cap_info->pkt_drops; + + if (cap_info->falobj != FAL_NULL_OBJECT_ID) { + + ret = fal_capture_get_stats(cap_info->falobj, + ARRAY_SIZE(cntr_ids), + &cntr_ids[0], &cntrs[0]); + if (ret >= 0) + drops += cntrs[0]; + } + + zsock_send(sock, "s8", "OK DROP STATS", drops); +} + /* * Handler for incoming requests from libpcap (filters, heartbeats and stop * command). @@ -206,6 +243,10 @@ static int pcapin_handler(zsock_t *sock, struct ifnet *ifp) * Frame 1 - "STOP" * Frame 2 - * + * Drop statistics: + * Frame 1 - "DROP STATS" + * Frame 2 - + * * Heartbeat: * Frame 1 - "BEAT" */ @@ -240,7 +281,14 @@ static int pcapin_handler(zsock_t *sock, struct ifnet *ifp) int stop = capture_stop(ifp, slotmask); pcapin_response(sock, type, msg, "OK"); return stop; - } else if (!strcmp(type, "FILTER")) { + } + if (!strcmp(type, "DROP STATS")) { + free(type); + zmsg_destroy(&msg); + drop_stats_response(cap_info, sock); + return 0; + } + if (!strcmp(type, "FILTER")) { frame = zmsg_pop(msg); if (frame == NULL) { @@ -304,14 +352,15 @@ static void capture_get_timestamp(struct rte_mbuf *m, struct timeval *tv) rte_spinlock_unlock(&capture_time_lock); /* Check if we should resync the time base */ - if (us >= CAPTURE_TIME_RESYNC_USECS) { + if (us >= CAPTURE_TIME_RESYNC_USECS || + us + CAPTURE_TIME_RESYNC_USECS <= 0) { capture_time_resync(tv, &base, &hz); us = capture_usec_from_tod_base(ts, base, hz); } tv->tv_sec += us / USEC_PER_SEC; tv->tv_usec += us % USEC_PER_SEC; - if (tv->tv_usec > USEC_PER_SEC) { + if (tv->tv_usec >= USEC_PER_SEC) { ++tv->tv_sec; tv->tv_usec -= USEC_PER_SEC; } else if (tv->tv_usec < 0) { @@ -366,21 +415,39 @@ static int capture_enqueue(struct capture_info *cap_info, (void **)pkts, n, NULL); if (likely(ret > 0)) capture_wakeup(cap_info); + else + cap_info->pkt_drops += n; return ret; } +/* + * Add a hardware snooped packet, received directly from the platform + * backplane, to the capture ring. + */ +void capture_hardware(const struct ifnet *ifp, struct rte_mbuf *mbuf) +{ + mbuf->udata64 = rte_get_timer_cycles(); + + if (unlikely(!ifp->hw_capturing) || + (unlikely(capture_enqueue(ifp->cap_info, &mbuf, 1) == 0))) + rte_pktmbuf_free(mbuf); +} + /* Put mbuf(s) in capture ring. */ void capture_burst(const struct ifnet *ifp, struct rte_mbuf *pkts[], unsigned int n) { + struct capture_info *cap_info = ifp->cap_info; struct rte_mbuf *snap[n]; /* may be called with no packets on transmit with bonding interfaces */ - if (n == 0 || capture_mbuf_copy(pkts, snap, n) < 0) + if (n == 0 || capture_mbuf_copy(pkts, snap, n) < 0) { + cap_info->pkt_drops += n; return; + } - if (unlikely(capture_enqueue(ifp->cap_info, snap, n) < 0)) + if (unlikely(capture_enqueue(cap_info, snap, n) == 0)) pktmbuf_free_bulk(snap, n); } @@ -434,10 +501,10 @@ static int capture_write(struct rte_mbuf *m, struct ifnet *ifp) /* ... then PCAP header */ if (m->ol_flags & (PKT_TX_VLAN_PKT|PKT_RX_VLAN)) { - pcap.caplen += sizeof(struct vlan_hdr); + pcap.caplen += sizeof(struct rte_vlan_hdr); if (pcap.caplen > cap_info->snaplen) pcap.caplen = cap_info->snaplen; - pcap.len += sizeof(struct vlan_hdr); + pcap.len += sizeof(struct rte_vlan_hdr); } zmsg_addmem(msg, &pcap, sizeof(pcap)); @@ -448,14 +515,14 @@ static int capture_write(struct rte_mbuf *m, struct ifnet *ifp) * in a temporary buffer. */ if (m->ol_flags & (PKT_TX_VLAN_PKT|PKT_RX_VLAN)) { - const struct ether_hdr *eh - = rte_pktmbuf_mtod(m, struct ether_hdr *); + const struct rte_ether_hdr *eh + = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); struct { - struct ether_hdr eh; - struct vlan_hdr vh; + struct rte_ether_hdr eh; + struct rte_vlan_hdr vh; } vhdr; - memcpy(&vhdr.eh, eh, 2 * ETHER_ADDR_LEN); + memcpy(&vhdr.eh, eh, 2 * RTE_ETHER_ADDR_LEN); vhdr.eh.ether_type = htons(if_tpid(ifp)); vhdr.vh.vlan_tci = htons(m->vlan_tci); vhdr.vh.eth_proto = eh->ether_type; @@ -466,9 +533,11 @@ static int capture_write(struct rte_mbuf *m, struct ifnet *ifp) /* hide original ethernet header */ space = addmsg_if_space(msg, - rte_pktmbuf_mtod(m, char *) + ETHER_HDR_LEN, - (unsigned int)rte_pktmbuf_data_len(m) - ETHER_HDR_LEN, - space); + rte_pktmbuf_mtod(m, char *) + + RTE_ETHER_HDR_LEN, + (unsigned int)rte_pktmbuf_data_len(m) - + RTE_ETHER_HDR_LEN, + space); if (!space) goto msg_send; @@ -491,10 +560,63 @@ static int capture_write(struct rte_mbuf *m, struct ifnet *ifp) static void capture_flush(const struct capture_info *cap_info) { - struct rte_mbuf *m; + struct rte_mbuf *m = NULL; while (rte_ring_sc_dequeue(cap_info->cap_ring, (void **)&m) == 0) - rte_pktmbuf_free(m); + if (m) + rte_pktmbuf_free(m); +} + +/* + * Bind or unbind the FAL capture object to/from the interface. This + * action function runs in the context of the main thread. + */ + +struct capture_hw_bind_args { + uint32_t ifindex; + fal_object_t obj; +}; + +static int capture_hw_bind(void *arg) +{ + struct capture_hw_bind_args *bind_args = arg; + struct fal_attribute_t portattr; + + portattr.id = FAL_PORT_ATTR_CAPTURE_BIND; + portattr.value.objid = bind_args->obj; + return fal_l2_upd_port(bind_args->ifindex, &portattr); +} + +static void capture_hw_stop(const struct ifnet *ifp, + struct capture_info *cap_info) +{ + struct capture_hw_bind_args args; + + if (cap_info->falobj == 0) + return; + + args.ifindex = ifp->if_index; + args.obj = FAL_NULL_OBJECT_ID; + + /* + * Termination is triggered from normal capture cancellation + * and deletion of the interface (capture_cancel()). The + * former is in the context of the capture thread (loop + * termination), the latter is in the context of the main + * thread. + * + * In the interface deletion case, trying to schedule the + * unbind action on the main thread is never going to work + * (deadlock as we're already running on the main + * thread). Update the FAL directly. + */ + if (is_main_thread()) + capture_hw_bind(&args); + else + capture_main_send(capture_hw_bind, &args); + + fal_capture_delete(cap_info->falobj); + cap_info->falobj = 0; } static void capture_cleanup(void *arg) @@ -518,13 +640,8 @@ static void capture_cleanup(void *arg) rte_free(cap_filter); } - if (ifp->if_type == IFT_ETHER) { - if (cap_info->is_promisc) - ifpromisc(ifp, 0); - if (cap_info->offload_mask >= 0) - rte_eth_dev_set_vlan_offload(ifp->if_port, - cap_info->offload_mask); - } + if (cap_info->is_promisc) + ifpromisc(ifp, 0); RTE_LOG(INFO, DATAPLANE, "Capture stopped on %s\n", ifp->if_name); @@ -569,8 +686,10 @@ static void capture_loop(struct ifnet *ifp) rte_pktmbuf_free(m); - if (ret < 0) + if (ret < 0) { + cap_info->pkt_drops++; return; + } if (loops++ >= CAPTURE_MAX_LOOPS) { capture_wakeup(cap_info); @@ -618,33 +737,34 @@ static void *capture_thread(void *arg) pthread_cleanup_push(capture_cleanup, arg); - if (ifp->if_type == IFT_ETHER) { - int offload_mask; - /* Turn off vlan filtering */ - offload_mask = - rte_eth_dev_get_vlan_offload(ifp->if_port); - if (offload_mask > 0) - rte_eth_dev_set_vlan_offload(ifp->if_port, - offload_mask & - ~ETH_VLAN_FILTER_OFFLOAD); - cap_info->offload_mask = offload_mask; - if (cap_info->is_promisc) - ifpromisc(ifp, 1); - } - - ifp->capturing = 1; - if (capture_if_use_common_cap_points(ifp)) + if (cap_info->is_promisc) + ifpromisc(ifp, 1); + + if (cap_info->falobj != 0) + ifp->hw_capturing = 1; + else + ifp->capturing = 1; + + if (ifp->capturing && capture_if_use_common_cap_points(ifp)) { pl_node_add_feature_by_inst(&capture_ether_in_feat, ifp); - RTE_LOG(INFO, DATAPLANE, "Capture started on %s\n", - ifp->if_name); + pl_node_add_feature_by_inst(&capture_l2_output_feat, ifp); + } + + RTE_LOG(INFO, DATAPLANE, "%sCapture started on %s\n", + ifp->hw_capturing ? "Hardware " : "", ifp->if_name); capture_loop(ifp); - if (capture_if_use_common_cap_points(ifp)) + if (ifp->capturing && capture_if_use_common_cap_points(ifp)) { pl_node_remove_feature_by_inst(&capture_ether_in_feat, ifp); + pl_node_remove_feature_by_inst(&capture_l2_output_feat, ifp); + } + + capture_hw_stop(ifp, cap_info); + ifp->hw_capturing = 0; ifp->capturing = 0; - synchronize_rcu(); /* all threads stop capturing */ + dp_rcu_synchronize(); /* all threads stop capturing */ pthread_cleanup_pop(1); ifp->cap_info = NULL; @@ -666,7 +786,9 @@ void capture_cancel(struct ifnet *ifp) if (!ifp || !ifp->cap_info) return; cap_info = ifp->cap_info; + ifp->hw_capturing = 0; ifp->capturing = 0; + capture_hw_stop(ifp, cap_info); pthread_cancel(cap_info->cap_thread); pthread_join(cap_info->cap_thread, &join_res); @@ -678,9 +800,57 @@ void capture_cancel(struct ifnet *ifp) rte_free(cap_info); } +static bool capture_hw_start(FILE *f, const struct ifnet *ifp, + struct capture_info *cap_info) +{ + struct fal_attribute_t capattr[] = { + { .id = FAL_CAPTURE_ATTR_COPY_LENGTH, + .value.u32 = cap_info->snaplen }, + { .id = FAL_CAPTURE_ATTR_BANDWIDTH, + .value.u32 = cap_info->bandwidth } + }; + struct capture_hw_bind_args args; + fal_object_t obj; + int rc; + + if (cap_info->is_swonly || !if_is_hwport((struct ifnet *)ifp)) + return true; + + rc = fal_capture_create(ARRAY_SIZE(capattr), capattr, &obj); + if (rc == -EOPNOTSUPP) + return true; + + if (rc < 0) { + fprintf(f, "capture_start: hardware setup failed: %s\n", + strerror(-rc)); + return false; + } + + /* + * Bind the object to the interface (turn on packet capture) + */ + args.ifindex = ifp->if_index; + args.obj = obj; + rc = capture_main_send(capture_hw_bind, &args); + if (rc < 0) { + fal_capture_delete(obj); + + if (rc == -EOPNOTSUPP) + return true; + + fprintf(f, "capture_start: hardware enable failed: %s\n", + strerror(-rc)); + return false; + } + + cap_info->falobj = obj; + return true; +} + static struct capture_info *capture_new(FILE *f, const char *addrstr, struct ifnet *ifp, - bool is_promisc, unsigned int snaplen) + bool is_promisc, unsigned int snaplen, + bool swonly, unsigned int bandwidth) { struct capture_info *cap_info; int cap_pub_port, cap_pcapin_port; @@ -695,6 +865,8 @@ static struct capture_info *capture_new(FILE *f, const char *addrstr, cap_info->is_promisc = is_promisc; cap_info->snaplen = snaplen; + cap_info->is_swonly = swonly; + cap_info->bandwidth = bandwidth; snprintf(rname, RTE_RING_NAMESIZE, "capture_%s", ifp->if_name); cap_info->cap_ring = rte_ring_create(rname, CAPTURE_RING_SZ, @@ -748,6 +920,10 @@ static struct capture_info *capture_new(FILE *f, const char *addrstr, fprintf(f, "capture_start: wakeup fd create failed"); goto cleanup_pcapin_fail; } + + if (!capture_hw_start(f, ifp, cap_info)) + goto cleanup_pcapin_fail; + return cap_info; cleanup_pcapin_fail: @@ -766,7 +942,8 @@ static struct capture_info *capture_new(FILE *f, const char *addrstr, * are currently in use then do the necessary setup. */ static int capture_start(FILE *f, struct ifnet *ifp, - bool is_promisc, unsigned int snaplen) + bool is_promisc, unsigned int snaplen, + bool swonly, unsigned int bandwidth) { struct capture_info *cap_info = ifp->cap_info; char addrstr[INET6_ADDRSTRLEN]; @@ -781,7 +958,8 @@ static int capture_start(FILE *f, struct ifnet *ifp, if (cap_info == NULL) { cap_info = capture_new(f, addrstr, - ifp, is_promisc, snaplen); + ifp, is_promisc, snaplen, + swonly, bandwidth); if (cap_info == NULL) return -1; @@ -792,6 +970,7 @@ static int capture_start(FILE *f, struct ifnet *ifp, fprintf(f, "capture_start: pthread create failed"); zsock_destroy(&cap_info->cap_pcapin); zsock_destroy(&cap_info->cap_pub); + capture_hw_stop(ifp, cap_info); ifp->cap_info = NULL; rte_ring_free(cap_info->cap_ring); rte_free(cap_info); @@ -823,10 +1002,41 @@ static int capture_start(FILE *f, struct ifnet *ifp, return 0; } +static int +capture_show(FILE *f, const struct ifnet *ifp) +{ + const struct capture_info *cap_info = ifp->cap_info; + json_writer_t *wr = jsonw_new(f); + struct fal_attribute_t portattr = { + .id = FAL_PORT_ATTR_HW_CAPTURE, + }; + + if (wr == NULL) + return -1; + + jsonw_name(wr, "capture"); + jsonw_start_object(wr); + jsonw_string_field(wr, "interface", ifp->if_name); + jsonw_bool_field(wr, "active", cap_info != 0); + jsonw_bool_field(wr, "hardware-support", + fal_l2_get_attrs(ifp->if_index, 1, &portattr) == 0); + if (cap_info != NULL) { + jsonw_uint_field(wr, "snaplen", cap_info->snaplen); + jsonw_bool_field(wr, "promiscuous", cap_info->is_promisc); + jsonw_bool_field(wr, "hw-capture", ifp->hw_capturing); + jsonw_bool_field(wr, "software-only", cap_info->is_swonly); + jsonw_uint_field(wr, "bandwidth", cap_info->bandwidth); + } + jsonw_end_object(wr); + jsonw_destroy(&wr); + return 0; +} + /* * Handler for capture command. * - * capture start + * capture start + * capture show */ int cmd_capture(FILE *f, int argc, char **argv) { @@ -834,14 +1044,16 @@ int cmd_capture(FILE *f, int argc, char **argv) struct ifnet *ifp; bool is_promisc; unsigned int snaplen; + bool swonly = false; + unsigned int bandwidth = 0; - if (argc < 5) { - fprintf(f, "capture: invalid arguments"); + if (argc < 3) { + fprintf(f, "capture: invalid arguments (%d)", argc); return -1; } intf = argv[2]; - ifp = ifnet_byifname(intf); + ifp = dp_ifnet_byifname(intf); if (ifp == NULL) { fprintf(f, "capture: interface %s not found", intf); return -1; @@ -852,15 +1064,116 @@ int cmd_capture(FILE *f, int argc, char **argv) return -1; } - if (strcmp(argv[1], "start")) { + if (streq(argv[1], "show")) + return capture_show(f, ifp); + + if (argc < 5) { + fprintf(f, "capture: invalid arguments (%d)", argc); + return -1; + } + + if (strcmp(argv[1], "start") != 0) { fprintf(f, "capture: unknown command\n"); return -1; } is_promisc = (*argv[3] == '1'); snaplen = strtoul(argv[4], NULL, 10); + if (argc > 5) { + unsigned int value; - return capture_start(f, ifp, is_promisc, snaplen); + if (get_unsigned(argv[5], &value) == 0) + swonly = value > 0; + + /* + * Usable backplane bandwidth is in Kbit/sec, with a + * maximum value of 1 Gbit/sec. + */ + if ((get_unsigned(argv[6], &value) < 0) || + (value > (1*1000*1000))) { + fprintf(f, "capture: invalid bandwidth %s\n", + argv[6]); + return -1; + } + + bandwidth = value; + } + + return capture_start(f, ifp, is_promisc, snaplen, swonly, bandwidth); +} + +/* + * In order to maintain serialisation with other FAL updates, the FAL + * updates to packet capture must be run within the context of the + * main thread (as opposed to the console or capture threads). + * + * Use a simple synchronous RPC-like mechanism to schedule FAL action + * routines on the main thread. + */ +static int capture_main_receive(void *arg) +{ + zsock_t *sock = (zsock_t *)arg; + fal_func_t func; + void *func_arg; + int func_rc; + + if (zsock_recv(sock, "pp", &func, &func_arg) < 0) { + RTE_LOG(ERR, DATAPLANE, + "%s() failed to get action from console\n", __func__); + return -EIO; + } + + func_rc = (*func)(func_arg); + + if (zsock_send(sock, "i", func_rc) < 0) { + RTE_LOG(ERR, DATAPLANE, + "%s() failed to send response to console\n", __func__); + return -EIO; + } + + return 0; +} + +static int capture_main_send_locked(fal_func_t func, void *arg) +{ + int func_rc; + + if (zsock_send(capture_sock_console, "pp", func, arg) < 0) { + RTE_LOG(ERR, DATAPLANE, + "%s() failed to send action to main\n", + __func__); + return -EIO; + } + + if (zsock_recv(capture_sock_console, "i", &func_rc) < 0) { + RTE_LOG(ERR, DATAPLANE, + "%s() failed to get response from main\n", + __func__); + return -EIO; + } + + return func_rc; +} + +static int capture_main_send(fal_func_t func, void *arg) +{ + int rc; + + pthread_mutex_lock(&capture_sock_lock); + rc = capture_main_send_locked(func, arg); + pthread_mutex_unlock(&capture_sock_lock); + return rc; +} + +void capture_destroy(void) +{ + RTE_LOG(INFO, DATAPLANE, "Capture shutting down\n"); + dp_unregister_event_socket(zsock_resolve(capture_sock_main)); + zsock_destroy(&capture_sock_main); + + pthread_mutex_lock(&capture_sock_lock); + zsock_destroy(&capture_sock_console); + pthread_mutex_unlock(&capture_sock_lock); } /* @@ -881,4 +1194,16 @@ void capture_init(uint16_t mbuf_sz) rte_spinlock_init(&capture_time_lock); capture_time_resync(NULL, NULL, NULL); + + capture_sock_main = zsock_new_pair("@inproc://capture_main_event"); + if (capture_sock_main == NULL) + rte_panic("capture main socket failed"); + + capture_sock_console = zsock_new_pair(">inproc://capture_main_event"); + if (capture_sock_console == NULL) + rte_panic("capture console socket failed"); + + dp_register_event_socket(zsock_resolve(capture_sock_main), + capture_main_receive, + capture_sock_main); } diff --git a/src/capture.h b/src/capture.h index d4b8ac2c..17bf3f05 100644 --- a/src/capture.h +++ b/src/capture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -19,6 +19,7 @@ #include #include +#include "compiler.h" #include "if_var.h" struct rte_mbuf; @@ -42,12 +43,15 @@ struct capture_info { int cap_pub_port; zsock_t *cap_pcapin; int cap_pcapin_port; - int offload_mask; uint8_t capture_mask; /* bitmask of current captures */ struct capture_filter_list filters; struct timespec last_beat; + uint64_t pkt_drops; bool is_promisc; + bool is_swonly; unsigned int snaplen; + unsigned int bandwidth; + fal_object_t falobj; }; /* This should be expanded to all vplane interface types */ @@ -75,9 +79,12 @@ static inline bool capture_if_use_common_cap_points(const struct ifnet *ifp) } /* Capture interface */ +void capture_destroy(void); void capture_init(uint16_t); void capture_cancel(struct ifnet *ifp); +void capture_hardware(const struct ifnet *ifp, struct rte_mbuf *mbuf) + __cold_func; void capture_burst(const struct ifnet *ifp, struct rte_mbuf *pkts[], unsigned int n) - __attribute__((cold)); + __cold_func; int cmd_capture(FILE *f, int argc, char **argv); #endif /* CAPTURE_H */ diff --git a/src/commands.c b/src/commands.c index e6580bed..b72795a6 100644 --- a/src/commands.c +++ b/src/commands.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. + * Copyright (c) 2017-2021, AT&T Intellectual Property. * All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -42,61 +41,68 @@ #include "arp.h" #include "bitmask.h" -#include "bridge.h" -#include "bridge_port.h" #include "capture.h" #include "commands.h" #include "compat.h" #include "compiler.h" -#include "config.h" +#include "config_internal.h" #include "control.h" #include "crypto/crypto.h" #include "dp_event.h" -#include "event.h" -#include "gre.h" +#include "event_internal.h" +#include "feature_plugin_internal.h" +#include "if/bridge/bridge.h" +#include "if/bridge/bridge_port.h" +#include "if/bridge/switch.h" +#include "if/dpdk-eth/vhost.h" +#include "if/gre.h" +#include "if/vxlan.h" #include "if_var.h" #include "json_writer.h" #include "l2_rx_fltr.h" #include "l2tp/l2tpeth.h" #include "lag.h" #include "main.h" -#include "master.h" +#include "controller.h" #include "mstp.h" #include "netinet6/nd6_nbr.h" #include "netinet6/route_v6.h" #include "netinet6/ip6_funcs.h" #include "pipeline/nodes/pl_nodes_common.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pd_show.h" #include "pl_commands.h" #include "pl_common.h" #include "pl_node.h" #include "power.h" +#include "protobuf.h" +#include "protobuf/SpeedConfig.pb-c.h" +#include "protobuf/SynceConfig.pb-c.h" +#include "protobuf/BreakoutConfig.pb-c.h" +#include "rcu.h" #include "rt_tracker.h" #include "session/session_cmds.h" #include "shadow.h" #include "snmp_mib.h" -#include "urcu.h" #include "util.h" -#include "vhost.h" #include "vplane_debug.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" #include "vrf_if.h" -#include "vxlan.h" #include "fal.h" #include "npf/dpi/app_cmds.h" #include "storm_ctl.h" -#include "switch.h" +#include "mac_limit.h" #include "ip_icmp.h" #include "backplane.h" #include "vlan_modify.h" #include "ptp.h" +#include "protobuf/PauseConfig.pb-c.h" #define MAX_CMDLINE 512 #define MAX_ARGS 128 -enum console_cmd_master_flags { +enum console_cmd_main_flags { CONSOLE_CMD_ASYNC = 1<<0, }; @@ -116,88 +122,17 @@ const char *console_endpoint = "ipc:///var/run/vplane.socket"; /* * Socket pair to send commands from the console thread to the - * master thread for execution and get response back. Note that + * main thread for execution and get response back. Note that * only a pass/fail response is returned. If command output is * required then the command must run only on the console thread. * * The only current user of this is the "reset" command. */ -const char *cmd_server_endpoint = "@inproc://master_cmd_event"; -const char *cmd_client_endpoint = ">inproc://master_cmd_event"; -static zsock_t *master_cmd_server; /* only to be used on master thread */ +const char *cmd_server_endpoint = "@inproc://main_cmd_event"; +const char *cmd_client_endpoint = ">inproc://main_cmd_event"; +static zsock_t *main_cmd_server; /* only to be used on main thread */ static zsock_t *console_cmd_client; /* only to be used on console thread */ -/* Control over debug settings */ -/* Keep this in sync with vplane_debug.h */ -static const char *debug_bits[] = { - "init", "link", "arp", "bridge", - "nl_interface", "nl_route", "nl_address", "nl_neighbor", - "nl_netconf", "subscribe", "resync", "nd6", - "route", "macvlan", "vxlan", "qos", - "npf", "nat", "l2tp", "lag", - "dealer", "nsh", - "vti", "crypto", "crypto_data", "vhost", - "vrf", "multicast", "mpls_control", - "mpls_pkterr", "", "dpi", "qos_dp", - "qos_hw", "storm_ctl", "cpp_rl", "ptp", -}; - -/* find debug bit based on name, allow abbreviation */ -static int find_debug_bit(const char *str) -{ - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(debug_bits); i++) - if (strncmp(debug_bits[i], str, strlen(str)) == 0) - return i; - return -1; -} - -static void show_debug(FILE *f) -{ - unsigned int i; - - fprintf(f, "Debug %#lx", dp_debug); - for (i = 0; i < ARRAY_SIZE(debug_bits); i++) - if (dp_debug & (1ul<cur)); - - snprintf(label, sizeof(label), "%s_avg", name); - jsonw_name(wr, label); - jsonw_start_array(wr); - for (i = 0; i < 3; i++) - jsonw_uint(wr, if_scaled(stats->avg[i])); - jsonw_end_array(wr); -} - -/* Interface performance counters - * Only maintained on physical and vif ports now. - */ -static void show_perf_stats(json_writer_t *wr, struct ifnet *ifp) -{ - show_perf_info(wr, "tx_pps", &ifp->if_txpps); - show_perf_info(wr, "tx_bps", &ifp->if_txbps); - show_perf_info(wr, "rx_pps", &ifp->if_rxpps); - show_perf_info(wr, "rx_bps", &ifp->if_rxbps); -} - -static void show_stats(json_writer_t *wr, struct ifnet *ifp) -{ - struct if_data stats; - - jsonw_name(wr, "statistics"); - jsonw_start_object(wr); - - if_stats(ifp, &stats); - jsonw_uint_field(wr, "rx_packets", stats.ifi_ipackets); - jsonw_uint_field(wr, "rx_errors", stats.ifi_ierrors); - jsonw_uint_field(wr, "tx_packets", stats.ifi_opackets); - jsonw_uint_field(wr, "tx_errors", stats.ifi_oerrors); - jsonw_uint_field(wr, "rx_bytes", stats.ifi_ibytes); - jsonw_uint_field(wr, "tx_bytes", stats.ifi_obytes); - - if_dump_state(ifp, wr, IF_DS_STATS); - - show_perf_stats(wr, ifp); - - jsonw_uint_field(wr, "rx_dropped", stats.ifi_idropped); - jsonw_uint_field(wr, "tx_dropped", ifi_odropped(&stats)); - jsonw_uint_field(wr, "tx_dropped_txring", stats.ifi_odropped_txring); - jsonw_uint_field(wr, "tx_dropped_hwq", stats.ifi_odropped_hwq); - jsonw_uint_field(wr, "tx_dropped_proto", stats.ifi_odropped_proto); - jsonw_uint_field(wr, "rx_bridge", stats.ifi_ibridged); - jsonw_uint_field(wr, "rx_multicast", stats.ifi_imulticast); - jsonw_uint_field(wr, "rx_vlan", stats.ifi_ivlan); - jsonw_uint_field(wr, "rx_bad_vid", stats.ifi_no_vlan); - jsonw_uint_field(wr, "rx_bad_address", stats.ifi_no_address); - jsonw_uint_field(wr, "rx_non_ip", stats.ifi_unknown); - - jsonw_end_object(wr); -} - -static void -show_xstats(json_writer_t *wr, struct ifnet *ifp) -{ - jsonw_name(wr, "xstatistics"); - jsonw_start_object(wr); - - if_dump_state(ifp, wr, IF_DS_XSTATS); - - jsonw_end_object(wr); -} - -static void show_if_l2_filter(json_writer_t *wr, struct ifnet *ifp) -{ - struct cds_lfht_iter iter; - struct l2_mcfltr_node *l2mf; - struct cds_lfht *tmp_hash; - - jsonw_name(wr, "l2_mcast_filters"); - - jsonw_start_object(wr); - - jsonw_uint_field(wr, "if_allmcast_ref", ifp->if_allmcast_ref); - jsonw_string_field(wr, "sw_filter", !ifp->if_allmcast_ref - ? (!ifp->if_mac_filtr_active - ? "promiscuous" : "active") : "disabled"); - jsonw_string_field(wr, "hw_filter", ifp->if_mac_filtr_supported ? - (!ifp->if_mac_filtr_active ? - "promiscuous" : "active") : "unsupported"); - jsonw_name(wr, "addresses"); - jsonw_start_array(wr); - tmp_hash = rcu_dereference(ifp->if_mcfltr_hash); - if (tmp_hash) { - cds_lfht_for_each_entry(tmp_hash, &iter, l2mf, l2mf_node) { - char ebuf[32]; - - jsonw_string(wr, ether_ntoa_r(&l2mf->l2mf_addr, ebuf)); - } - } - jsonw_end_array(wr); - jsonw_end_object(wr); -} - -static bool print_pl_feats(struct pl_feature_registration *feat_reg, - void *context) -{ - json_writer_t *wr = context; - - jsonw_string(wr, feat_reg->name); - - return true; -} - -static void show_af_ifconfig(json_writer_t *wr, struct ifnet *ifp) -{ - jsonw_name(wr, "ipv4"); - - jsonw_start_object(wr); - jsonw_uint_field(wr, "forwarding", - !pl_node_is_feature_enabled( - &ipv4_in_no_forwarding_feat, ifp)); - jsonw_uint_field(wr, "proxy_arp", ifp->ip_proxy_arp); - jsonw_string_field(wr, "garp_req_op", - (ifp->ip_garp_op.garp_req_action == GARP_PKT_DROP) ? - "Drop" : "Update"); - jsonw_string_field(wr, "garp_rep_op", - (ifp->ip_garp_op.garp_rep_action == GARP_PKT_DROP) ? - "Drop" : "Update"); - jsonw_uint_field(wr, "mc_forwarding", ifp->ip_mc_forwarding); - jsonw_uint_field(wr, "redirects", ip_redirects_get()); - if (pl_node_is_feature_enabled(&ipv4_rpf_feat, ifp)) { - if (ifp->ip_rpf_strict) - jsonw_uint_field(wr, "rp_filter", 1); - else - jsonw_uint_field(wr, "rp_filter", 2); - } else { - jsonw_uint_field(wr, "rp_filter", 0); - } - jsonw_name(wr, "validate_features"); - jsonw_start_array(wr); - pl_node_iter_features(ipv4_validate_node_ptr, ifp, print_pl_feats, wr); - jsonw_end_array(wr); - jsonw_name(wr, "out_features"); - jsonw_start_array(wr); - pl_node_iter_features(ipv4_out_node_ptr, ifp, print_pl_feats, wr); - jsonw_end_array(wr); - jsonw_end_object(wr); - - jsonw_name(wr, "ipv6"); - - jsonw_start_object(wr); - jsonw_uint_field(wr, "forwarding", - !pl_node_is_feature_enabled( - &ipv6_in_no_forwarding_feat, ifp)); - jsonw_uint_field(wr, "mc_forwarding", ifp->ip6_mc_forwarding); - jsonw_uint_field(wr, "redirects", ip6_redirects_get()); - jsonw_name(wr, "validate_features"); - jsonw_start_array(wr); - pl_node_iter_features(ipv6_validate_node_ptr, ifp, print_pl_feats, wr); - jsonw_end_array(wr); - jsonw_name(wr, "out_features"); - jsonw_start_array(wr); - pl_node_iter_features(ipv6_out_node_ptr, ifp, print_pl_feats, wr); - jsonw_end_array(wr); - jsonw_end_object(wr); -} - -struct ifconfig_ctx { - bool verbose; - json_writer_t *wr; -}; - -/* Show information generic interface in JSON */ -static void ifconfig(struct ifnet *ifp, void *arg) -{ - struct ifconfig_ctx *ctx = arg; - struct bridge_port *brport; - json_writer_t *wr = ctx->wr; - struct ifnet *parent; - char ebuf[32]; - - jsonw_start_object(wr); - - jsonw_string_field(wr, "name", ifp->if_name); - jsonw_uint_field(wr, "vrf_id", - vrf_get_external_id(ifp->if_vrfid)); - jsonw_uint_field(wr, "ifindex", ifp->if_index); - jsonw_uint_field(wr, "cont_src", ifp->if_cont_src); - parent = rcu_dereference(ifp->if_parent); - if (parent) - jsonw_string_field(wr, "parent", parent->if_name); - brport = rcu_dereference(ifp->if_brport); - if (brport) - jsonw_string_field(wr, "bridge", - bridge_port_get_bridge(brport)->if_name); - jsonw_uint_field(wr, "role", if_role(ifp)); - jsonw_uint_field(wr, "mtu", ifp->if_mtu); - jsonw_uint_field(wr, "flags", ifp->if_flags); - jsonw_uint_field(wr, "hw_forwarding", ifp->hw_forwarding); - jsonw_uint_field(wr, "tpid_offloaded", ifp->tpid_offloaded); - - /* - * These are deprecated in favour of the ipv4/ipv6 sub-objects - * but are retained for compatibility. - */ - jsonw_uint_field(wr, "ip_forwarding", - !pl_node_is_feature_enabled( - &ipv4_in_no_forwarding_feat, ifp)); - jsonw_uint_field(wr, "ip_proxy_arp", ifp->ip_proxy_arp); - jsonw_uint_field(wr, "ip_mc_forwarding", ifp->ip_mc_forwarding); - if (pl_node_is_feature_enabled(&ipv4_rpf_feat, ifp)) { - if (ifp->ip_rpf_strict) - jsonw_uint_field(wr, "ip_rp_filter", 1); - else - jsonw_uint_field(wr, "ip_rp_filter", 2); - } else { - jsonw_uint_field(wr, "ip_rp_filter", 0); - } - jsonw_uint_field(wr, "ip6_forwarding", - !pl_node_is_feature_enabled( - &ipv6_in_no_forwarding_feat, ifp)); - jsonw_uint_field(wr, "ip6_mc_forwarding", ifp->ip6_mc_forwarding); - - jsonw_uint_field(wr, "dp_id", 0); - jsonw_string_field(wr, "ether", - ether_ntoa_r(&ifp->eth_addr, ebuf)); - if (!is_zero_ether_addr(&ifp->perm_addr)) - jsonw_string_field(wr, "perm_addr", - ether_ntoa_r(&ifp->perm_addr, ebuf)); - - jsonw_name(wr, "ether_lookup_features"); - jsonw_start_array(wr); - pl_node_iter_features(ether_lookup_node_ptr, ifp, print_pl_feats, wr); - jsonw_end_array(wr); - - jsonw_string_field(wr, "type", iftype_name(ifp->if_type)); - - if_dump_state(ifp, wr, IF_DS_STATE); - if (ctx->verbose) - if_dump_state(ifp, wr, IF_DS_STATE_VERBOSE); - if_dump_state(ifp, wr, IF_DS_DEV_INFO); - - show_link_state(wr, ifp); - show_address(wr, ifp); - show_stats(wr, ifp); - show_xstats(wr, ifp); - show_if_l2_filter(wr, ifp); - show_af_ifconfig(wr, ifp); - - jsonw_end_object(wr); -} - -static void ifconfig_up(struct ifnet *ifp, void *arg) +static int cmd_led(FILE *f, int argc, char **argv) { - if (ifp->if_flags & IFF_UP) - ifconfig(ifp, arg); -} -static int cmd_ifconfig(FILE *f, int argc, char **argv) -{ - struct ifconfig_ctx ctx; json_writer_t *wr = jsonw_new(f); - if (!wr) - return -1; - - jsonw_pretty(wr, true); - jsonw_name(wr, "interfaces"); - jsonw_start_array(wr); - ctx.wr = wr; - ctx.verbose = false; - if (argc == 1) - ifnet_walk(ifconfig_up, &ctx); - else if (strcmp(argv[1], "-a") == 0) - ifnet_walk(ifconfig, &ctx); - else { - if (strcmp(argv[1], "-v") == 0) { - ctx.verbose = true; - argc--, argv++; - } - while (--argc > 0) { - struct ifnet *ifp = ifnet_byifname(*++argv); - if (ifp) - ifconfig(ifp, &ctx); - } - } - jsonw_end_array(wr); - jsonw_destroy(&wr); - - return 0; -} - -static int cmd_led(FILE *f, int argc, char **argv) -{ + jsonw_name(wr, "response"); + jsonw_start_object(wr); if (argc < 3) { - fprintf(f, "usage: led ifname on|off\n"); - return -1; + jsonw_string_field(wr, "msg", "Error"); + goto out; } - struct ifnet *ifp = ifnet_byifname(argv[1]); + struct ifnet *ifp = dp_ifnet_byifname(argv[1]); if (!ifp) { - fprintf(f, "unknown interface %s\n", argv[1]); - return -1; - } - - if (!ifp->if_local_port) { - fprintf(f, "wrong dataplane for interface\n"); - return -1; - } - - if (ifp->if_type != IFT_ETHER) { - fprintf(f, "blink only works on dataplane port\n"); - return -1; + jsonw_string_field(wr, "msg", "Error"); + goto out; } if (strcmp(argv[2], "on") == 0) { if (if_blink(ifp, true) < 0) { - fprintf(f, "device does not have led support\n"); - return -1; + jsonw_string_field(wr, "msg", + "Error"); + goto out; } - } else if (strcmp(argv[2], "off") == 0) + } else if (strcmp(argv[2], "off") == 0) { if_blink(ifp, false); - else { - fprintf(f, "expected on or off\n"); - return -1; + } else { + jsonw_string_field(wr, "msg", "Error"); + goto out; } + + jsonw_string_field(wr, "msg", "Ok"); +out: + jsonw_end_object(wr); + jsonw_destroy(&wr); return 0; } @@ -713,7 +335,7 @@ static const char *arpstat_names[] = { "duplicate_ip", "dropped", "timeout", "proxy", "garp_reqs_dropped", "garp_reps_dropped", - "mpool_fail" + "mpool_fail", "mem_fail", "cache_limit" }; static void show_arpstat(json_writer_t *wr, struct vrf *vrf) @@ -831,7 +453,7 @@ static int cmd_netstat(FILE *f, int argc, char **argv) } } - vrf = vrf_get_rcu_from_external(vrf_id); + vrf = dp_vrf_get_rcu_from_external(vrf_id); if (vrf == NULL) { fprintf(f, "Unknown VRF ID\n"); jsonw_destroy(&wr); @@ -862,22 +484,13 @@ static int cmd_shadow(FILE *f, int argc, char **argv) static int cmd_ipsec_engine(FILE *f, int argc, char **argv) { - int rc; - argc -= 2; argv += 2; + (void) argc; if (strcmp(argv[0], "probe") == 0) return crypto_engine_probe(f); - if (strcmp(argv[0], "set") == 0) { - if (argc > 1) - rc = crypto_engine_set(f, argv[1]); - else - rc = crypto_engine_set(f, NULL); - return rc; - } - fprintf(f, "Invalid IPsec command\n"); return -1; } @@ -910,6 +523,8 @@ static int cmd_ipsec(FILE *f, int argc, char **argv) unsigned int run_cmds = 0; vrfid_t vrfid = VRF_DEFAULT_ID; + bool brief = false; + int brief_arg = 0; if (argc < 2 || strcmp(argv[1], "sad") == 0) { run_cmds |= CMD_IPSEC_SA; @@ -922,6 +537,13 @@ static int cmd_ipsec(FILE *f, int argc, char **argv) vrfid = cmd_ipsec_getvrf(f, argc, argv); if (vrfid < VRF_DEFAULT_ID) return -1; + + if (argc == 3) + brief_arg = 2; + else if (argc == 5) + brief_arg = 4; + if (strcmp(argv[brief_arg], "brief") == 0) + brief = true; } if (argc < 2 || strcmp(argv[1], "bind") == 0) { run_cmds |= CMD_IPSEC_BIND; @@ -946,7 +568,7 @@ static int cmd_ipsec(FILE *f, int argc, char **argv) if (argc > 2 && strcmp(argv[1], "engine") == 0) run_cmds = CMD_IPSEC_ENGINE; - if (argc > 4 || + if (argc > 5 || (argc > 2 && run_cmds & ~(CMD_IPSEC_CACHE | CMD_IPSEC_LISTENER | CMD_IPSEC_ENGINE | CMD_IPSEC_SA | CMD_IPSEC_POLICY | @@ -959,7 +581,7 @@ static int cmd_ipsec(FILE *f, int argc, char **argv) if (run_cmds & CMD_IPSEC_SA) crypto_sadb_show_summary(f, vrfid); if (run_cmds & CMD_IPSEC_POLICY) - crypto_policy_show_summary(f, vrfid); + crypto_policy_show_summary(f, vrfid, brief); if (run_cmds & CMD_IPSEC_BIND) crypto_policy_bind_show_summary(f, vrfid); if (run_cmds & CMD_IPSEC_COUNTERS) @@ -1134,7 +756,7 @@ int cmd_affinity_cfg(FILE *f, int argc, char **argv) return -1; } - struct ifnet *ifp = ifnet_byifindex(ifindex); + struct ifnet *ifp = dp_ifnet_byifindex(ifindex); if (ifp == NULL) { fprintf(f, "unknown ifindex %u\n", ifindex); return -1; @@ -1213,7 +835,7 @@ static int cmd_local(FILE *f, int argc, char **argv) if (strcmp(argv[1], "vrf_id") == 0) vrf_id = atoi(argv[2]); } - vrf = vrf_get_rcu_from_external(vrf_id); + vrf = dp_vrf_get_rcu_from_external(vrf_id); if (!vrf) return -1; @@ -1242,7 +864,7 @@ static int cmd_vrf(FILE *f, int argc __unused, char **argv __unused) if (vrf) { jsonw_start_object(wr); jsonw_uint_field(wr, "vrf_id", - vrf_get_external_id(vrf->v_id)); + dp_vrf_get_external_id(vrf->v_id)); jsonw_uint_field(wr, "internal_vrf_id", vrf->v_id); jsonw_uint_field(wr, "ref_count", vrf->v_ref_count); jsonw_end_object(wr); @@ -1253,103 +875,6 @@ static int cmd_vrf(FILE *f, int argc __unused, char **argv __unused) return 0; } -static int cmd_log_level(FILE *f, int argc, char **argv) -{ - if (argc > 1) - rte_log_set_global_level(atoi(argv[1])); - else { - json_writer_t *wr = jsonw_new(f); - - jsonw_uint_field(wr, "level", rte_log_get_global_level()); - jsonw_destroy(&wr); - } - - return 0; -} - -/* Log types (see rte_log.h) */ -static const char *log_type_bits[] = { - [0] = "EAL", [1] = "MALLOC", [2] = "RING", [3] = "MEMPOOL", - [4] = "TIMER", [5] = "PMD", [6] = "HASH", [7] = "LPM", - [8] = "KNI", [9] = "ACL", [10] = "POWER", [11] = "METER", - [12] = "SCHED", [13] = "PORT", [14] = "TABLE", [15] = "PIPELINE", - [16] = "MBUF", [17] = "CRYPTODEV", [18] = "EFD", [19] = "EVENTDEV", - - [24] = "USER1", [25] = "USER2", [26] = "USER3", [27] = "USER4", - [28] = "USER5", [29] = "USER6", [30] = "USER7", [31] = "USER8", -}; - -static int cmd_log_type(FILE *f, int argc, char **argv) -{ - unsigned int i; - unsigned int log_type_size = ARRAY_SIZE(log_type_bits); - const char *name; - int level; - - if (argc == 1) { - json_writer_t *wr = jsonw_new(f); - - for (i = 0; i < log_type_size; i++) { - name = log_type_bits[i]; - if (!name) - continue; - level = rte_log_get_level(i); - if (level < 0) - continue; - jsonw_int_field(wr, name, level); - } - jsonw_destroy(&wr); - return 0; - } - - while (--argc) { - const char *arg = *++argv; - int enable = 1; - - if (*arg == '-') { - enable = 0; - ++arg; - } - - for (i = 0; i < log_type_size; i++) { - name = log_type_bits[i]; - if (!name) - continue; - if (strcasecmp(name, arg) == 0) { - rte_log_set_level(i, - enable ? RTE_LOG_DEBUG - : rte_log_get_global_level()); - break; - } - } - if (i == log_type_size) { - fprintf(f, "%s unknown log type\n", arg); - return -1; - } - } - return 0; -} - -static int cmd_log(FILE *f, int argc, char **argv) -{ - - if (argc == 1) { - fprintf(f, "missing log command\n"); - return -1; - } - --argc, ++argv; - - if (strcmp(argv[0], "level") == 0) - return cmd_log_level(f, argc, argv); - else if (strcmp(argv[0], "type") == 0) - return cmd_log_type(f, argc, argv); - else { - fprintf(f, "unknown log command: %s\n", argv[0]); - return -1; - } - -} - /* Display help */ static int cmd_help(FILE *f, int argc __unused, char **argv __unused) { @@ -1388,7 +913,7 @@ static int cmd_snmp(FILE *f, int argc, char **argv) } argv += 2; } - vrf = vrf_get_rcu_from_external(vrf_id); + vrf = dp_vrf_get_rcu_from_external(vrf_id); if (vrf == NULL) { fprintf(f, "Unknown VRF ID\n"); return -1; @@ -1477,17 +1002,17 @@ static void l2tp_show_session(void *s, void *arg) jsonw_string_field(wr, "ifname", session->ifp ? session->ifp->if_name : ""); jsonw_uint_field(wr, "ip_forwarding", - session->ifp ? !pl_node_is_feature_enabled( + session->ifp ? !pl_node_is_feature_enabled_by_inst( &ipv4_in_no_forwarding_feat, session->ifp) : 0); jsonw_uint_field(wr, "ipv6_forwarding", - session->ifp ? !pl_node_is_feature_enabled( + session->ifp ? !pl_node_is_feature_enabled_by_inst( &ipv6_in_no_forwarding_feat, session->ifp) : 0); struct ifnet *ifp_xconnect = NULL; if (session->xconnect_ifidx) - ifp_xconnect = ifnet_byifindex(session->xconnect_ifidx); + ifp_xconnect = dp_ifnet_byifindex(session->xconnect_ifidx); jsonw_string_field(wr, "xconnect_ifname", ifp_xconnect ? ifp_xconnect->if_name : ""); jsonw_uint_field(wr, "xconnect_ttl", session->ttl); @@ -1553,7 +1078,9 @@ static int cmd_l2tp(FILE *f, int argc, char **argv) if (argc < 6) return -1; return l2tp_set_xconnect(argv[2], argv[3], argv[4], argv[5]); - } else if (strcmp(argv[1], "-s") == 0) { + } + + if (strcmp(argv[1], "-s") == 0) { json_writer_t *wr = jsonw_new(f); if (!wr) @@ -1566,7 +1093,7 @@ static int cmd_l2tp(FILE *f, int argc, char **argv) if (argc == 2) l2tp_session_walk(l2tp_show_session, wr); else { - struct ifnet *ifp = ifnet_byifname(argv[2]); + struct ifnet *ifp = dp_ifnet_byifname(argv[2]); if (ifp && ifp->if_softc) { struct l2tp_softc *sc = ifp->if_softc; l2tp_show_session(sc->sclp_session, wr); @@ -1576,7 +1103,9 @@ static int cmd_l2tp(FILE *f, int argc, char **argv) jsonw_destroy(&wr); return 0; - } else if (strcmp(argv[1], "-t") == 0) { + } + + if (strcmp(argv[1], "-t") == 0) { json_writer_t *wr = jsonw_new(f); if (!wr) @@ -1592,7 +1121,9 @@ static int cmd_l2tp(FILE *f, int argc, char **argv) jsonw_end_array(wr); jsonw_destroy(&wr); return 0; - } else if (strcmp(argv[1], "clear") == 0) { + } + + if (strcmp(argv[1], "clear") == 0) { if (argc == 2) l2tp_init_stats(NULL); else @@ -1610,138 +1141,135 @@ static int cmd_lag(FILE *f, int argc __unused, char **argv __unused) return 0; } -static struct cfg_if_list *speed_cfg_list; - -static void -speed_event_if_index_set(struct ifnet *ifp, uint32_t ifindex __unused); -static void -speed_event_if_index_unset(struct ifnet *ifp, uint32_t ifindex __unused); - -static const struct dp_event_ops speed_event_ops = { - .if_index_set = speed_event_if_index_set, - .if_index_unset = speed_event_if_index_unset, -}; - -static void -speed_event_if_index_set(struct ifnet *ifp, uint32_t ifindex __unused) +/* + * Set the speed and duplex of an interface + * + * speed set [auto|full|half] + */ +static int +cmd_speed_handler(struct pb_msg *msg) { - struct cfg_if_list_entry *le; + void *payload = (void *)((char *)msg->msg); + int len = msg->msg_len; + int ret = 0; - if (!speed_cfg_list) - return; - - le = cfg_if_list_lookup(speed_cfg_list, ifp->if_name); - if (!le) - return; + SpeedConfig *smsg = speed_config__unpack(NULL, len, payload); - cmd_speed(NULL, le->le_argc, le->le_argv); - cfg_if_list_del(speed_cfg_list, ifp->if_name); - if (!speed_cfg_list->if_list_count) { - dp_event_unregister(&speed_event_ops); - cfg_if_list_destroy(&speed_cfg_list); + if (!smsg) { + RTE_LOG(ERR, DATAPLANE, + "failed to read SpeedConfig protobuf command\n"); + return -1; } -} - -static void -speed_event_if_index_unset(struct ifnet *ifp, uint32_t ifindex __unused) -{ - if (!speed_cfg_list) - return; - cfg_if_list_del(speed_cfg_list, ifp->if_name); - if (!speed_cfg_list->if_list_count) { - dp_event_unregister(&speed_event_ops); - cfg_if_list_destroy(&speed_cfg_list); + struct ifnet *ifp; + uint32_t speed; + int duplex = -1; + bool autoneg = false; + + duplex = (smsg->duplex_option); + switch (smsg->speed_case) { + case SPEED_CONFIG__SPEED_NUMSPEED: + speed = (smsg->numspeed); + if (speed == 0) { + ret = -1; + goto OUT; + } + break; + default: + autoneg = true; + speed = 0; + break; } -} - -static int speed_replay_init(void) -{ - if (!speed_cfg_list) { - speed_cfg_list = cfg_if_list_create(); - if (!speed_cfg_list) - return -ENOMEM; - dp_event_register(&speed_event_ops); + ifp = dp_ifnet_byifname(smsg->ifname); + if (!ifp) { + RTE_LOG(ERR, DATAPLANE, + "speed applied, but interface missing %s\n", + smsg->ifname); + goto OUT; } - return 0; + if_set_speed(ifp, autoneg, speed, duplex); + + OUT: + speed_config__free_unpacked(smsg, NULL); + return ret; } -/* - * Set the speed and duplex of an interface - * - * speed set [auto|full|half] - */ -int cmd_speed(FILE *f, int argc, char **argv) +PB_REGISTER_CMD(speed_cmd) = { + .cmd = "vyatta:speed", + .handler = cmd_speed_handler, +}; + +static int cmd_synce_handler(struct pb_msg *msg) { - if (argc < 2) - goto usage; + void *payload = (void *)((char *)msg->msg); + struct fal_attribute_t synce_attr; + int len = msg->msg_len; + int ret = -1; + int action = -1; + int ifindex = -1; - if ((strcmp(argv[1], "set") == 0) && argc > 3 && argc < 6) { - struct ifnet *ifp; - uint32_t speed, link_speeds; - int duplex = -1; /* assume auto duplex */ - - if (argc == 5) { - if (strcmp(argv[4], "full") == 0) - duplex = 1; - else if (strcmp(argv[4], "half") == 0) - duplex = 0; - else if (strcmp(argv[4], "auto") == 0) - duplex = -1; - else - goto usage; - } + SynceConfig *smsg = synce_config__unpack(NULL, len, payload); - if (strcmp(argv[3], "auto") == 0) { - link_speeds = ETH_LINK_SPEED_AUTONEG; - } else { - speed = atoi(argv[3]); - if (speed == 0) - goto usage; - if (duplex == -1) { - /* - * Most speeds don't have a separate half- - * and full-duplex so or'ing their bitflags - * together is harmless. - */ - link_speeds = rte_eth_speed_bitflag(speed, 0) | - rte_eth_speed_bitflag(speed, 1); - } else { - link_speeds = - rte_eth_speed_bitflag(speed, duplex); - } - link_speeds |= ETH_LINK_SPEED_FIXED; + if (!smsg) { + RTE_LOG(ERR, DATAPLANE, + "Failed to read SynceConfig protobuf command\n"); + return ret; + } + + action = smsg->action; + ifindex = smsg->ifindex; + + switch (action) { + case SYNCE_CONFIG__ACTION__SYNCE_ENABLE_INTF: + synce_attr.id = FAL_PORT_ATTR_SYNCE_ADMIN_STATUS; + synce_attr.value.u8 = FAL_PORT_SYNCE_ENABLE; + ret = fal_l2_upd_port(ifindex, &synce_attr); + if (ret < 0) { + RTE_LOG(ERR, DATAPLANE, "ENABLE_INTF failed for " + "ifindex:%d, %d (%s)\n", ifindex, ret, + strerror(ret)); + } + break; + case SYNCE_CONFIG__ACTION__SYNCE_DISABLE_INTF: + synce_attr.id = FAL_PORT_ATTR_SYNCE_ADMIN_STATUS; + synce_attr.value.u8 = FAL_PORT_SYNCE_DISABLE; + ret = fal_l2_upd_port(ifindex, &synce_attr); + if (ret < 0) { + RTE_LOG(ERR, DATAPLANE, "DISABLE_INTF failed for " + "ifindex:%d, %d (%s)\n", ifindex, ret, + strerror(ret)); } - ifp = ifnet_byifname(argv[2]); - if (!ifp) { - bool failed = true; + break; + case SYNCE_CONFIG__ACTION__SYNCE_SET_CLK_SRC: + synce_attr.id = FAL_SWITCH_ATTR_SYNCE_CLOCK_SOURCE_PORT; + synce_attr.value.u32 = ifindex; - if (speed_replay_init() == 0) { - cfg_if_list_add(speed_cfg_list, - argv[2], argc, argv); - failed = false; - } - RTE_LOG(ERR, DATAPLANE, - "%s: failed to find %s (caching%s)\n", - __func__, argv[2], failed ? " failed" : ""); - goto out; + ret = fal_set_switch_attr(&synce_attr); + + if (ret < 0) { + RTE_LOG(ERR, DATAPLANE, "CMD_SET_CLK_SRC failed for " + "ifindex:%d, %d (%s)\n", ifindex, ret, + strerror(ret)); } - set_speed(ifp, link_speeds); - } else - goto usage; + break; + default: + RTE_LOG(ERR, DATAPLANE, "%s %d", __func__, __LINE__); + break; + } -out: - return 0; + synce_config__free_unpacked(smsg, NULL); + smsg = NULL; -usage: - if (f) - fprintf(f, "%s: speed set " - "[auto|full|half]", __func__); - return -1; + return ret; } +PB_REGISTER_CMD(synce_cmd) = { + .cmd = "vyatta:synce", + .handler = cmd_synce_handler, +}; + static const char *poe_class_to_string(fal_port_poe_class_t class) { switch (class) { @@ -1799,63 +1327,6 @@ static void poe_status(struct ifnet *ifp, void *arg) } } -static struct cfg_if_list *poe_cfg_list; - -static void -poe_event_if_index_set(struct ifnet *ifp, uint32_t ifindex __unused); -static void -poe_event_if_index_unset(struct ifnet *ifp, uint32_t ifindex __unused); - -static const struct dp_event_ops poe_event_ops = { - .if_index_set = poe_event_if_index_set, - .if_index_unset = poe_event_if_index_unset, -}; - -static void -poe_event_if_index_set(struct ifnet *ifp, uint32_t ifindex __unused) -{ - struct cfg_if_list_entry *le; - - if (!poe_cfg_list) - return; - - le = cfg_if_list_lookup(poe_cfg_list, ifp->if_name); - if (!le) - return; - - cmd_poe(NULL, le->le_argc, le->le_argv); - cfg_if_list_del(poe_cfg_list, ifp->if_name); - if (!poe_cfg_list->if_list_count) { - dp_event_unregister(&poe_event_ops); - cfg_if_list_destroy(&poe_cfg_list); - } -} - -static void -poe_event_if_index_unset(struct ifnet *ifp, uint32_t ifindex __unused) -{ - if (!poe_cfg_list) - return; - - cfg_if_list_del(poe_cfg_list, ifp->if_name); - if (!poe_cfg_list->if_list_count) { - dp_event_unregister(&poe_event_ops); - cfg_if_list_destroy(&poe_cfg_list); - } -} - -static int poe_replay_init(void) -{ - if (!poe_cfg_list) { - poe_cfg_list = cfg_if_list_create(); - if (!poe_cfg_list) - return -ENOMEM; - - dp_event_register(&poe_event_ops); - } - return 0; -} - /* * Set the PoE mode of an interface * @@ -1867,7 +1338,7 @@ int cmd_poe(FILE *f, int argc, char **argv) { struct ifnet *ifp; fal_port_poe_priority_t priority = FAL_PORT_POE_PRIORITY_LOW; - struct fal_attribute_t poe_attr = { FAL_PORT_ATTR_POE_PRIORITY, }; + struct fal_attribute_t poe_attr = { .id = FAL_PORT_ATTR_POE_PRIORITY, }; int rc = 0; if (argc < 2) @@ -1890,19 +1361,13 @@ int cmd_poe(FILE *f, int argc, char **argv) goto usage; } - ifp = ifnet_byifname(argv[2]); + ifp = dp_ifnet_byifname(argv[2]); if (!ifp) { - if (poe_replay_init() == 0) { - cfg_if_list_add(poe_cfg_list, - argv[2], argc, argv); - rc = 0; - } - RTE_LOG(ERR, DATAPLANE, - "%s: failed to find %s (%scaching)\n", - __func__, argv[2], rc ? "not " : ""); - fprintf(f, "%s: failed to find %s (%scaching)\n", - __func__, argv[2], rc ? "not " : ""); + "poe applied, but interface missing %s\n", + argv[2]); + fprintf(f, "%s: failed to find %s\n", + __func__, argv[2]); goto err_out; } @@ -1935,7 +1400,7 @@ static int cmd_poe_op(FILE *f, int argc, char **argv) if ((strcmp(argv[1], "status") == 0)) { if (argc == 3) { - ifp = ifnet_byifname(argv[2]); + ifp = dp_ifnet_byifname(argv[2]); if (!ifp) { RTE_LOG(ERR, DATAPLANE, "%s: failed to find %s\n", @@ -1953,7 +1418,7 @@ static int cmd_poe_op(FILE *f, int argc, char **argv) if (ifp) poe_status(ifp, wr); else - ifnet_walk(poe_status, wr); + dp_ifnet_walk(poe_status, wr); jsonw_end_array(wr); jsonw_destroy(&wr); @@ -1987,12 +1452,96 @@ static int cmd_ring(FILE *f, int argc, char **argv) return 0; } +static int +cmd_pause_handler(struct pb_msg *msg) +{ + struct fal_attribute_t attr; + void *payload = msg->msg; + int len = msg->msg_len; + struct ifnet *ifp; + int rv = 0; + enum fal_port_flow_control_mode_t pause_mode; + + PauseConfig *bmsg = pause_config__unpack(NULL, len, payload); + + if (!bmsg) { + RTE_LOG(ERR, DATAPLANE, + "failed to read PauseConfig protobuf command\n"); + return -1; + } + + switch (bmsg->mtype_case) { + case PAUSE_CONFIG__MTYPE_PAUSEIF: + if (!bmsg->pauseif->ifname) + goto free_msg; + + ifp = dp_ifnet_byifname(bmsg->pauseif->ifname); + if (!ifp) { + RTE_LOG(ERR, DATAPLANE, + "Invalid interface in PauseConfig protobuf command\n"); + rv = -1; + goto free_msg; + } + + RTE_LOG(DEBUG, DATAPLANE, + "Rcvd Pause Mode: %d of interface %s\n", + bmsg->pauseif->value, ifp->if_name); + + switch (bmsg->pauseif->value) { + case PAUSE_CONFIG__PAUSE_VALUE__NONE: + pause_mode = FAL_PORT_FLOW_CONTROL_MODE_DISABLE; + break; + case PAUSE_CONFIG__PAUSE_VALUE__RX: + pause_mode = FAL_PORT_FLOW_CONTROL_MODE_RX_ONLY; + break; + case PAUSE_CONFIG__PAUSE_VALUE__TX: + pause_mode = FAL_PORT_FLOW_CONTROL_MODE_TX_ONLY; + break; + case PAUSE_CONFIG__PAUSE_VALUE__BOTH: + pause_mode = FAL_PORT_FLOW_CONTROL_MODE_BOTH_ENABLE; + break; + default: + RTE_LOG(ERR, DATAPLANE, + "Unknown PAUSE_CONFIG__PAUSE_VALUE\n"); + rv = -1; + goto free_msg; + } + break; + default: + RTE_LOG(ERR, DATAPLANE, + "Unknown mtype pauseif message :"); + rv = -1; + goto free_msg; + } + + attr.value.u8 = pause_mode; + ifp->if_pause = pause_mode; + attr.id = FAL_PORT_ATTR_GLOBAL_FLOW_CONTROL_MODE; + + rv = fal_l2_upd_port(ifp->if_index, &attr); + if (rv < 0) { + RTE_LOG(ERR, DATAPLANE, + "Fal l2 update for port: %d Failed\n", ifp->if_index); + } + +free_msg: + pause_config__free_unpacked(bmsg, NULL); + return rv; +} + +PB_REGISTER_CMD(pause_cmd) = { + .cmd = "vyatta:pause", + .handler = cmd_pause_handler, +}; + static struct cfg_if_list *breakout_cfg_list; static void -breakout_event_if_index_set(struct ifnet *ifp, uint32_t ifindex __unused); +breakout_event_if_index_set(struct ifnet *ifp); static void breakout_event_if_index_unset(struct ifnet *ifp, uint32_t ifindex __unused); +static int +cmd_breakout_handler(struct pb_msg *msg); static const struct dp_event_ops breakout_event_ops = { .if_index_set = breakout_event_if_index_set, @@ -2000,9 +1549,10 @@ static const struct dp_event_ops breakout_event_ops = { }; static void -breakout_event_if_index_set(struct ifnet *ifp, uint32_t ifindex __unused) +breakout_event_if_index_set(struct ifnet *ifp) { struct cfg_if_list_entry *le; + struct pb_msg msg; if (!breakout_cfg_list) return; @@ -2011,7 +1561,11 @@ breakout_event_if_index_set(struct ifnet *ifp, uint32_t ifindex __unused) if (!le) return; - cmd_breakout(NULL, le->le_argc, le->le_argv); + msg.msg_len = le->le_argc; + msg.msg = le->le_buf; + msg.fp = NULL; + cmd_breakout_handler(&msg); + cfg_if_list_del(breakout_cfg_list, ifp->if_name); if (!breakout_cfg_list->if_list_count) { dp_event_unregister(&breakout_event_ops); @@ -2044,40 +1598,133 @@ static int breakout_replay_init(void) return 0; } -int cmd_breakout(FILE *f, int argc, char **argv) +static int +cmd_breakout_handler(struct pb_msg *msg) { + struct ifnet *reserved_ifp = NULL; struct fal_attribute_t attr; + void *payload = msg->msg; + int len = msg->msg_len; + struct ifnet *ifp; + BreakoutConfig *bmsg = breakout_config__unpack(NULL, len, payload); - if (argc < 4) { - fprintf(f, "Usage: breakout SET|DELETE "); + if (!bmsg) { + RTE_LOG(ERR, DATAPLANE, + "failed to read BreakoutConfig protobuf command\n"); return -1; } - struct ifnet *ifp = ifnet_byifname(argv[2]); - if (!ifp) { - if (breakout_replay_init() == 0) - cfg_if_list_add(breakout_cfg_list, - argv[2], argc, argv); - return 0; - } + switch (bmsg->mtype_case) { + case BREAKOUT_CONFIG__MTYPE_BREAKOUTIF: + ifp = dp_ifnet_byifname(bmsg->breakoutif->ifname); + if (!ifp) { + if (breakout_replay_init() == 0) + cfg_if_list_bin_add(breakout_cfg_list, + bmsg->breakoutif->ifname, + payload, len); + goto free_msg; + } + /* reserved interface is optional */ + if (bmsg->breakoutif->reservedifname) { + reserved_ifp = dp_ifnet_byifname( + bmsg->breakoutif->reservedifname); + if (!reserved_ifp) { + if (breakout_replay_init()) + goto free_msg; + cfg_if_list_bin_add( + breakout_cfg_list, + bmsg->breakoutif->reservedifname, + payload, len); + goto free_msg; + } + } + + attr.id = FAL_PORT_ATTR_BREAKOUT; + switch (bmsg->breakoutif->action) { + case BREAKOUT_CONFIG__ACTION__DELETE: + RTE_LOG(INFO, DATAPLANE, + "Removing breakout of interface %s\n", + ifp->if_name); + attr.value.u8 = 0; + ifp->if_broken_out = 0; + if (reserved_ifp) + reserved_ifp->if_broken_out = 0; + break; + case BREAKOUT_CONFIG__ACTION__SET: + if (bmsg->breakoutif->numsubports > UINT8_MAX) { + RTE_LOG(INFO, DATAPLANE, + "number of sub-ports too large in BreakoutConfig message: %d, max %d\n", + bmsg->breakoutif->numsubports, + UINT8_MAX); + goto free_msg; + } + if (reserved_ifp) { + /* + * For the purposes of the dataplane, + * this interface is broken out. No + * need to notify FAL plugin beyond + * the feature cleanup. + */ + reserved_ifp->if_broken_out = 1; + if_notify_emb_feat_change(reserved_ifp); + RTE_LOG(INFO, DATAPLANE, + "Reserved breakout interface %s\n", + reserved_ifp->if_name); + } + attr.value.u8 = bmsg->breakoutif->numsubports; + RTE_LOG(INFO, DATAPLANE, + "Breaking out interface %s into %d ports\n", + ifp->if_name, attr.value.u8); + ifp->if_broken_out = 1; + if_notify_emb_feat_change(ifp); + break; + default: + goto free_msg; + } - attr.id = FAL_PORT_ATTR_BREAKOUT; - if (!strcmp(argv[1], "DELETE")) - attr.value.u8 = 0; - else if (!strcmp(argv[1], "SET")) - attr.value.u8 = atoi(argv[3]); + fal_l2_upd_port(ifp->if_index, &attr); - fal_l2_upd_port(ifp->if_index, &attr); + /* + * only do this after updating FAL to avoid incorrect + * transient state + */ + if (!ifp->if_broken_out) + if_notify_emb_feat_change(ifp); + if (reserved_ifp && !reserved_ifp->if_broken_out) + if_notify_emb_feat_change(reserved_ifp); + break; + default: + RTE_LOG(INFO, DATAPLANE, + "unhandled BreakoutConfig message type %d\n", + bmsg->mtype_case); + goto free_msg; + } + +free_msg: + breakout_config__free_unpacked(bmsg, NULL); return 0; } +PB_REGISTER_CMD(breakout_cmd) = { + .cmd = "vyatta:breakout", + .handler = cmd_breakout_handler, +}; + static int cmd_vlan_mod(FILE *f, int argc __unused, char **argv __unused) { vlan_mod_cmd(f, argc, argv); return 0; } +struct dynamic_op_command_entry { + cmd_t cmd; + struct cds_list_head list_entry; +}; + +static struct cds_list_head dynamic_op_command_list_head = + CDS_LIST_HEAD_INIT(dynamic_op_command_list_head); + /* * Table of possible commands. * Add new commands in alpha order to keep help output sorted. @@ -2086,6 +1733,7 @@ static const cmd_t cmd_table[] = { { 0, "affinity", cmd_affinity, "Show/set CPU affinity" }, { 0, "app-op", cmd_app_op, "Application commands" }, { 0, "arp", cmd_arp, "Show/reset ARP table" }, + { 1, "arp", cmd_arp, "Show/reset ARP table" }, { 0, "backplane", cmd_backplane_op, "Backplane op mode cmds" }, { 0, "bridge", cmd_bridge, "Show/clear bridge MAC table" }, { 0, "capture", cmd_capture, "Enable/disable packet capture" }, @@ -2096,12 +1744,14 @@ static const cmd_t cmd_table[] = { { 0, "debug", cmd_debug, "Debug logging level" }, { 0, "ecmp", cmd_ecmp, "Show/set ecmp options" }, { 0, "fal", cmd_fal, "FAL debugging commands" }, + { 1, "gpc", cmd_gpc_op, "GPC OP mode information" }, { 0, "gre", cmd_gre, "Show gre information" }, { 0, "help", cmd_help, "This help" }, { 0, "hotplug", cmd_hotplug, "Hotplug event" }, { 0, "ifconfig", cmd_ifconfig, "Show interface settings" }, { 1, "ifconfig", cmd_ifconfig, "Show interface settings" }, { 2, "ifconfig", cmd_ifconfig, "Show interface settings" }, + { 3, "ifconfig", cmd_ifconfig, "Show interface settings" }, { 0, "incomplete", cmd_incomplete, "Show incomplete stats" }, { 0, "ipsec", cmd_ipsec, "Show IPsec information" }, { 0, "l2tpeth", cmd_l2tp, "Show l2tp sessions" }, @@ -2109,35 +1759,35 @@ static const cmd_t cmd_table[] = { { 0, "led", cmd_led, "Toggle interface LED" }, { 0, "local", cmd_local, "Show local IP addresses" }, { 0, "log", cmd_log, "Show log messages" }, - { 0, "master", cmd_master, "state machine information" }, + { 0, "main", cmd_main, "state machine information" }, { 0, "memory", cmd_memory, "Memory pool statistics" }, { 0, "mode", cmd_power_show, "Power management mode" }, { 0, "mpls", cmd_mpls, "Show mpls information" }, + { 1, "mpls", cmd_mpls, "Show mpls information" }, { 0, "mstp-op", cmd_mstp_op, "MSTP operational commands" }, { 0, "mstp-ut", cmd_mstp_ut, "MSTP unit-test" }, { 0, "multicast", cmd_multicast, "Multicast information" }, { 0, "nat-op", cmd_nat_op, "NAT OP mode" }, { 0, "nat-ut", cmd_nat_ut, "NAT UT mode" }, - { 1, "nd6", cmd_nd6, "IPv6 Neighbour discovery" }, + { 2, "nd6", cmd_nd6, "IPv6 Neighbour discovery" }, { 0, "netstat", cmd_netstat, "Network statistics" }, - { 0, "npf-op", cmd_npf_op, "NPF (FW/NAT/PBR) OP mode" }, + { 1, "npf-op", cmd_npf_op, "NPF (FW/NAT/PBR) OP mode" }, { 1, "npf-ut", cmd_npf_ut, "NPF (FW/NAT/PBR) UT mode" }, { 1, "pathmonitor", cmd_pathmonitor, "pathmonitor command" }, { 1, "pd", cmd_pd, "Platform dependent data" }, { 0, "pipeline", op_pipeline, "Pipeline op dispatcher" }, + { 0, "feat-plugin", cmd_feat_plugin,"Feature plugin commands" }, { 0, "poe", cmd_poe_op, "poe commands" }, { 0, "poe-ut", cmd_poe_ut, "poe commands" }, { 0, "portmonitor", cmd_portmonitor, "portmonitor command" }, + { 1, "portmonitor", cmd_portmonitor, "portmonitor command" }, { 0, "ptp", cmd_ptp_op, "PTP commands" }, { 0, "ptp-ut", cmd_ptp_ut, "PTP (unit tests)" }, - { 1, "qos", cmd_qos_op, "Show Qos information" }, - { 2, "qos", cmd_qos_op, "Show Qos information" }, - { 3, "qos", cmd_qos_op, "Show Qos information" }, - { 4, "qos", cmd_qos_op, "Show Qos information" }, - { 5, "qos", cmd_qos_op, "Show Qos information" }, + { 9, "qos", cmd_qos_op, "Show Qos information" }, { 0, "reset", cmd_reset, "Reset dataplane" }, { 0, "ring", cmd_ring, "Display ring information" }, { 0, "route", cmd_route, "Display routing information" }, + { 1, "route", cmd_route, "Display routing information" }, { 0, "route6", cmd_route6, "Display ipv6 routing information" }, { 0, "rt-tracker", cmd_rt_tracker_op, "Route Tracker commands" }, { 0, "session-op", cmd_session_op, "Display session table info" }, @@ -2154,6 +1804,7 @@ static const cmd_t cmd_table[] = { { 0, "vlan_mod", cmd_vlan_mod, "show vlan_mod info" }, { 0, "vrf", cmd_vrf, "Show VRF information" }, { 0, "vxlan", cmd_vxlan, "VXLAN commands" }, + { 0, "mac-limit", cmd_mac_limit_op, "MAC limiting commands" }, { 0, NULL, NULL, NULL } }; @@ -2166,15 +1817,83 @@ void list_all_cmd_versions(FILE *f) static const struct cmd *find_cmd(const struct cmd *tbl, const char *name) { const cmd_t *cmd = NULL; + struct dynamic_op_command_entry *dynamic_op_cmd = NULL; if (name[0] != '#' && name[0] != 0) { for (cmd = tbl; cmd->name; ++cmd) if (strcmp(cmd->name, name) == 0) return cmd; } + + /* And check the dynamically registered commands too */ + cds_list_for_each_entry_rcu(dynamic_op_cmd, + &dynamic_op_command_list_head, + list_entry) { + if (strcmp(dynamic_op_cmd->cmd.name, name) == 0) + return &dynamic_op_cmd->cmd; + } + return NULL; } +int dp_feature_register_string_op_handler(const char *name, + const char *help, + feature_string_op_fn *fn) +{ + const cmd_t *cmd = NULL; + struct dynamic_op_command_entry *dynamic_op_cmd; + + if (!name || !fn || !help) + return -EINVAL; + + cmd = find_cmd(cmd_table, name); + if (cmd) { + RTE_LOG(ERR, DATAPLANE, + "Can not register op cmd. Cmd %s already exists\n", + cmd->name); + return -EINVAL; + } + + dynamic_op_cmd = calloc(1, sizeof(*dynamic_op_cmd)); + if (!dynamic_op_cmd) { + RTE_LOG(ERR, DATAPLANE, + "Can not register op cmd. No memory\n"); + return -EINVAL; + } + + dynamic_op_cmd->cmd.version = 0; + dynamic_op_cmd->cmd.name = strdup(name); + if (!dynamic_op_cmd->cmd.name) { + RTE_LOG(ERR, DATAPLANE, + "Can not register op cmd. No memory\n"); + free(dynamic_op_cmd); + return -EINVAL; + } + dynamic_op_cmd->cmd.func = fn; + dynamic_op_cmd->cmd.help = help; + + cds_list_add_rcu(&dynamic_op_cmd->list_entry, + &dynamic_op_command_list_head); + return 0; +} + +void feature_unregister_all_string_op_handlers(void) +{ + struct dynamic_op_command_entry *cmd = NULL; + struct cds_list_head *this_entry, *next; + + cds_list_for_each_safe(this_entry, next, + &dynamic_op_command_list_head) { + cmd = cds_list_entry(this_entry, + struct dynamic_op_command_entry, + list_entry); + + cds_list_del_rcu(&cmd->list_entry); + free((char *)cmd->cmd.name); + free(cmd); + } +} + /* Split command string into argument vector. * NB: Silently truncates if too many arguments given. */ @@ -2195,6 +1914,26 @@ static int split(char *line, char **argv, size_t maxargs) return i; } +/* + * Join a string. + * returns NULL if the size isn't right. + */ +static const char *unsplit(char *line, int size, int argc, char **argv) +{ + int i; + int total = 0; + + for (i = 0; i < argc; ++i) { + int n; + n = snprintf(line + total, size - total, "%s ", argv[i]); + if (n < 0 || n >= size - total) + return NULL; + total += n; + } + line[--total] = '\0'; /* Remove trailing NUL */ + return line; +} + /* Free the memory that was allocated by open_memstream() to hold output of command. Called by zmq when send completes. */ static void out_free(void *data, void *hint __unused) @@ -2202,25 +1941,33 @@ static void out_free(void *data, void *hint __unused) free(data); } -/* Send console command to be handled on the master thread. +/* Send console command to be handled on the main thread. * async == true: don't wait for a response. */ -static int send_console_cmd(cmd_func_t fn, char *line, bool async) +static int send_console_cmd(cmd_func_t fn, int argc, char **argv, bool async) { - int rv, cmd_response; - enum console_cmd_master_flags flags = 0; + int rv = -1; + int cmd_response; + enum console_cmd_main_flags flags = 0; + char line[MAX_CMDLINE]; if (async) flags |= CONSOLE_CMD_ASYNC; - rcu_read_unlock(); - rcu_thread_offline(); + dp_rcu_read_unlock(); + dp_rcu_thread_offline(); + + if (unsplit(line, MAX_CMDLINE, argc, argv) == NULL) { + RTE_LOG(ERR, DATAPLANE, + "send console cmd: too many args\n"); + goto out; + } rv = zsock_send(console_cmd_client, "psi", fn, line, flags); if (rv < 0) { RTE_LOG(ERR, DATAPLANE, - "failed to send console cmd to master\n"); - return rv; + "failed to send console cmd to main\n"); + goto out; } if (async) @@ -2230,18 +1977,18 @@ static int send_console_cmd(cmd_func_t fn, char *line, bool async) if (rv < 0) RTE_LOG(ERR, DATAPLANE, - "failed to get console cmd response from master\n"); + "failed to get console cmd response from main\n"); else rv = cmd_response; out: - rcu_thread_online(); - rcu_read_lock(); + dp_rcu_thread_online(); + dp_rcu_read_lock(); return rv; } int console_cmd(char *line, char **outbuf, size_t *outsize, cmd_func_t fn, - bool on_master) + bool on_main) { char *argv[MAX_ARGS] = { NULL }; int argc = split(line, argv, MAX_ARGS); @@ -2264,12 +2011,12 @@ int console_cmd(char *line, char **outbuf, size_t *outsize, cmd_func_t fn, if (fn) { /* - * The reset command can only run on the master thread. - * If this is reset and we are not on the master + * The reset command can only run on the main thread. + * If this is reset and we are not on the main * thread, send it there. */ - if (!on_master && fn == cmd_reset) - rc = send_console_cmd(fn, line, true); + if (!on_main && (fn == cmd_reset)) + rc = send_console_cmd(fn, argc, argv, true); else /* Stash output from command in buffer. */ rc = (*fn)(f, argc, argv); @@ -2298,16 +2045,36 @@ console_request(zloop_t *loop __rte_unused, zsock_t *sock, "console msg receive failed: %s\n", strerror(errno)); return -1; } - rcu_thread_online(); + dp_rcu_thread_online(); char *outbuf = NULL; size_t outsize = 0; + dp_rcu_read_lock(); + + int rc = 0; + + /* dispatch protobuf op commands here */ + if (strncmp(line, "protobuf", 8) == 0) { + zmsg_t *msg = zmsg_recv(sock); + zframe_t *frame = zmsg_first(msg); + unsigned char *data = zframe_data(frame); + int len = zframe_size(frame); + + rc = pb_op_cmd(sock, data, len, NULL); + + zmsg_destroy(&msg); + dp_rcu_read_unlock(); + dp_rcu_thread_offline(); + zstr_free(&line); + + return rc; + } - rcu_read_lock(); - int rc = console_cmd(line, &outbuf, &outsize, NULL, false); - rcu_read_unlock(); + rc = console_cmd(line, &outbuf, &outsize, NULL, false); - rcu_thread_offline(); + dp_rcu_read_unlock(); + + dp_rcu_thread_offline(); zstr_free(&line); @@ -2319,6 +2086,7 @@ console_request(zloop_t *loop __rte_unused, zsock_t *sock, zmq_msg_init_data(&m, outbuf, outsize, out_free, NULL); rc = zmq_msg_send(&m, zsock_resolve(sock), 0); zmq_msg_close(&m); + return (rc < 0) ? rc : 0; } @@ -2398,7 +2166,7 @@ console_pair_request(zloop_t *loop __rte_unused, zsock_t *sock, void *arg) rc = -1; } else { RTE_LOG(ERR, DATAPLANE, "Unknown message %s" - " received by %s", argv[0], __func__); + " received by %s\n", argv[0], __func__); rc = -1; } } @@ -2421,7 +2189,7 @@ console_handler(zsock_t *pipe, void *arg __rte_unused) rte_panic("can't bind console endpoint: %s : %s\n", console_endpoint, strerror(errno)); - /* Socket to send commands to the master thread */ + /* Socket to send commands to the main thread */ console_cmd_client = zsock_new_pair(cmd_client_endpoint); if (!console_cmd_client) rte_panic("failed to create cmd socket: %s\n", strerror(errno)); @@ -2445,13 +2213,13 @@ console_handler(zsock_t *pipe, void *arg __rte_unused) zsock_signal(pipe, 0); zstr_send(pipe, NULL); - rcu_register_thread(); - rcu_thread_offline(); + dp_rcu_register_thread(); + dp_rcu_thread_offline(); while (!zsys_interrupted) { if (zloop_start(loop) != 0) break; /* error detected */ } - rcu_unregister_thread(); + dp_rcu_unregister_thread(); zloop_destroy(&loop); zsock_destroy(&console_sock); zsock_destroy(&console_cmd_client); @@ -2459,15 +2227,15 @@ console_handler(zsock_t *pipe, void *arg __rte_unused) /* * Receive commands from the console thread that require execution - * on the master thread and optionally send a response back. + * on the main thread and optionally send a response back. */ -static int console_cmd_master_handler(void *arg) +static int console_cmd_main_handler(void *arg) { zsock_t *sock = (zsock_t *)arg; int rv, cmd_response; cmd_func_t fn; char *line; - enum console_cmd_master_flags flags; + enum console_cmd_main_flags flags; char *outbuf = NULL; size_t outsize = 0; @@ -2490,7 +2258,7 @@ static zactor_t *console_actor; /* * Setup the console thread and communication between it - * and the master thread + * and the main thread */ void console_setup(void) { @@ -2499,12 +2267,12 @@ void console_setup(void) rte_panic("zactor_new failed for console handler\n"); free(zstr_recv(console_actor)); - master_cmd_server = zsock_new_pair(cmd_server_endpoint); - if (!master_cmd_server) - rte_panic("master cmd server socket failed"); + main_cmd_server = zsock_new_pair(cmd_server_endpoint); + if (!main_cmd_server) + rte_panic("main cmd server socket failed"); - register_event_socket(zsock_resolve(master_cmd_server), - console_cmd_master_handler, master_cmd_server); + dp_register_event_socket(zsock_resolve(main_cmd_server), + console_cmd_main_handler, main_cmd_server); } void @@ -2513,7 +2281,7 @@ console_destroy(void) free(config.console_url_bound); free(config.console_url_bound_uplink); zactor_destroy(&console_actor); - zsock_destroy(&master_cmd_server); + zsock_destroy(&main_cmd_server); } int @@ -2576,7 +2344,7 @@ console_unbind(enum cont_src_en cont_src) response = zstr_recv(console_actor); if (response) { - if (strcmp(response, "OK")) + if (strcmp(response, "OK") != 0) RTE_LOG(ERR, DATAPLANE, "Console unbind" " failed for ep %s\n", console_url_bound); } @@ -2589,12 +2357,6 @@ console_unbind(enum cont_src_en cont_src) config.console_url_bound_uplink = NULL; } -const char * -console_endpoint_get(void) -{ - return console_endpoint; -} - void console_endpoint_set(const char *endpoint) { diff --git a/src/commands.h b/src/commands.h index cc095a1a..8502de09 100644 --- a/src/commands.h +++ b/src/commands.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. + * Copyright (c) 2017-2021, AT&T Intellectual Property. * All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. @@ -17,10 +17,7 @@ struct ifnet; -typedef int (*cmd_func_t)(FILE *f, int argc, char **argv); - int cmd_arp(FILE *f, int argc, char **argv); -int cmd_arp_cfg(FILE *f, int argc, char **argv); int cmd_route(FILE *f, int argc, char **argv); int cmd_multicast(FILE *f, int argc, char **argv); int cmd_npf_cfg(FILE *f, int argc, char **argv); @@ -45,20 +42,17 @@ int cmd_gre(FILE *f, int argc, char **argv); int cmd_mpls(FILE *f, int argc, char **argv); int cmd_affinity_cfg(FILE *f, int argc, char **argv); int cmd_xconnect_cfg(FILE *f, int argc, char **argv); -int cmd_speed(FILE *f, int argc, char **argv); int cmd_poe(FILE *f, int argc, char **argv); int cmd_ip(FILE *f, int argc, char **argv); -int cmd_ip6(FILE *f, int argc, char **argv); -int cmd_breakout(FILE *f, int argc, char **argv); int cmd_cpp_rl_op(FILE *f, int argc, char **argv); +int cmd_gpc_op(FILE *f, int argc, char **argv); void list_all_cmd_versions(FILE *f); int console_cmd(char *line, char **outbuf, size_t *outsize, cmd_func_t fn, - bool on_master); + bool on_main); int console_bind(enum cont_src_en cont_src); void console_unbind(enum cont_src_en cont_src); -const char *console_endpoint_get(void); void console_endpoint_set(const char *endpoint); void show_address(json_writer_t *wr, const struct ifnet *ifp); diff --git a/src/compat.h b/src/compat.h index bde98da4..ac4b8eba 100644 --- a/src/compat.h +++ b/src/compat.h @@ -58,11 +58,7 @@ #define MAX_MP_SELECT_LABELS 4 #endif -#ifdef HAVE_RTE_PORT_ID_16_BITS typedef uint16_t portid_t; -#else -typedef uint8_t portid_t; -#endif #ifndef PKT_RX_VLAN #define PKT_RX_VLAN PKT_RX_VLAN_PKT diff --git a/src/config.c b/src/config.c index dfcaf0e2..f17493c9 100644 --- a/src/config.c +++ b/src/config.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * @@ -29,16 +29,19 @@ #include #include -#include "config.h" +#include "config_internal.h" +#include "fal_plugin.h" #include "main.h" #include "util.h" #include "vplane_debug.h" #include "vplane_log.h" +static const char *config_file = VYATTA_SYSCONF_DIR"/dataplane.conf"; +const char *platform_file = PLATFORM_FILE; + #define DEFAULT_CONTROLLER_REQ_PORT 4415 #define DEFAULT_CONTROLLER_REQ_IPC "ipc:///var/run/vyatta/vplaned.req" - struct config_param config; struct platform_param platform_cfg; @@ -103,11 +106,11 @@ static int get_eth_pci_addr(const char *ifname, char *addr_str, size_t len) /* Take list of ethernet device names: "eth2,eth3" - * and produce PCI black list. + * and exclude each PCI address. * * Uses: strtok therefore overwrites argument */ -static void parse_blacklist(char *list) +static void parse_exclude(char *list) { char *ifname; const char sep[] = " ,\t\r\n"; @@ -119,15 +122,11 @@ static void parse_blacklist(char *list) if (get_eth_pci_addr(ifname, addr_str, sizeof(addr_str))) continue; -#ifdef HAVE_RTE_DEVARGS_ADD if (rte_devargs_add(RTE_DEVTYPE_BLACKLISTED_PCI, -#else - if (rte_eal_devargs_add(RTE_DEVTYPE_BLACKLISTED_PCI, -#endif addr_str) < 0) /* can't use rte_log yet, EAL not started */ fprintf(stderr, - "Error: cannot blacklist %s %s", + "Error: cannot exclude %s %s", ifname, addr_str); } } @@ -168,52 +167,59 @@ static int parse_entry(void *user, const char *section, } else if (strcasecmp(section, "controller") == 0) { if (strcmp(name, "publish") == 0) return copy_str(&cfg->publish_url, value); - else if (strcmp(name, "request") == 0) + if (strcmp(name, "request") == 0) return copy_str(&cfg->request_url, value); - else if (strcmp(name, "publish_uplink") == 0) + if (strcmp(name, "publish_uplink") == 0) return copy_str(&cfg->publish_url_uplink, value); - else if (strcmp(name, "request_uplink") == 0) + if (strcmp(name, "request_uplink") == 0) return copy_str(&cfg->request_url_uplink, value); - else if (strcmp(name, "ip") == 0) + if (strcmp(name, "ip") == 0) return parse_ipaddr(&cfg->remote_ip, value); - else if (strcmp(name, "certificate") == 0) + if (strcmp(name, "certificate") == 0) return copy_str(&cfg->remote_cert, value); } else if (strcasecmp(section, "dataplane") == 0) { if (strcmp(name, "ip") == 0) return parse_ipaddr(&cfg->local_ip, value); - else if (strcmp(name, "certificate") == 0) + if (strcmp(name, "certificate") == 0) return copy_str(&cfg->certificate, value); - else if (strcmp(name, "control") == 0) { + if (strcmp(name, "control") == 0) { cfg->console_url_set = true; return copy_str(&cfg->console_url, value); - } else if (strcmp(name, "control-uplink") == 0) + } + if (strcmp(name, "control-uplink") == 0) return copy_str(&cfg->console_url_uplink, value); - else if (strcmp(name, "control-interface") == 0) + if (strcmp(name, "control-interface") == 0) return copy_str(&cfg->ctrl_intf_name, value); - else if (strcmp(name, "blacklist") == 0) - parse_blacklist(strdupa(value)); + if (strcmp(name, "exclude-interfaces") == 0 || + strcmp(name, "blacklist") == 0) + parse_exclude(strdupa(value)); else if (strcmp(name, "backplane") == 0) cfg->backplane = strdup(value); else if (strcmp(name, "update") == 0) cfg->port_update = atoi(value); else if (strcmp(name, "uuid") == 0) return copy_str(&cfg->uuid, value); - else if (strcmp(name, "dataplane-id") == 0) + else if (strcmp(name, "dataplane-id") == 0) cfg->dp_index = atoi(value); else if (strcmp(name, "uplink-mac") == 0) return ether_aton_r(value, &cfg->uplink_addr) != NULL; } else if (strcasecmp(section, "rib") == 0) { if (strcmp(name, "ip") == 0) return parse_ipaddr(&cfg->rib_ip, value); - else if (strcmp(name, "control") == 0) + if (strcmp(name, "control") == 0) return copy_str(&cfg->rib_ctrl_url, value); + } else if (strcasecmp(section, "xfrm_client") == 0) { + if (strcmp(name, "pull") == 0) + return copy_str(&cfg->xfrm_pull_url, value); + if (strcmp(name, "push") == 0) + return copy_str(&cfg->xfrm_push_url, value); } return 1; /* good */ } /* convert from generic IP address to ZMQ bind URL */ -static char *addr_to_tcp(const struct ip_addr *addr, uint16_t port) +char *addr_to_tcp(const struct ip_addr *addr, uint16_t port) { char abuf[INET6_ADDRSTRLEN]; char pbuf[32]; @@ -240,9 +246,9 @@ static char *default_endpoint_controller(void) { if (config.local_controller) return strdup(DEFAULT_CONTROLLER_REQ_IPC); - else - return addr_to_tcp(&config.remote_ip, - DEFAULT_CONTROLLER_REQ_PORT); + + return addr_to_tcp(&config.remote_ip, + DEFAULT_CONTROLLER_REQ_PORT); } @@ -255,8 +261,8 @@ char *default_endpoint_dataplane(void) { if (config.local_controller) return strdup("ipc://*"); - else - return addr_to_tcp(&config.local_ip, 0); + + return addr_to_tcp(&config.local_ip, 0); } static char *default_endpoint_dataplane_uplink(void) @@ -264,13 +270,28 @@ static char *default_endpoint_dataplane_uplink(void) return strdup("ipc://*"); } +void set_config_file(const char *filename) +{ + config_file = filename; +} + +void set_platform_cfg_file(const char *filename) +{ + platform_file = filename; +} + +const char *get_platform_cfg_file(void) +{ + return platform_file; +} + /* Load config file and do sanity checks */ -void parse_config(const char *cfgfile) +void parse_config(void) { - FILE *f = fopen(cfgfile, "r"); + FILE *f = fopen(config_file, "r"); if (f == NULL) { - perror(cfgfile); + perror(config_file); exit(EXIT_FAILURE); } @@ -281,7 +302,7 @@ void parse_config(const char *cfgfile) if (rc) { fprintf(stderr, "Config file format error %s line %d\n", - cfgfile, rc); + config_file, rc); exit(EXIT_FAILURE); } @@ -321,7 +342,7 @@ void parse_config(const char *cfgfile) fprintf(stderr, "Dataplane IP deprecated\n"); exit(EXIT_FAILURE); } - if (is_zero_ether_addr(&config.uplink_addr)) { + if (rte_is_zero_ether_addr(&config.uplink_addr)) { fprintf(stderr, "Uplink Mac address not configured\n"); exit(EXIT_FAILURE); } @@ -362,6 +383,83 @@ void parse_config(const char *cfgfile) } } +struct str_val { + const char *str; + uint64_t value; +}; + +struct str_val rx_offload_strs[] = { + { "keep_crc", DEV_RX_OFFLOAD_KEEP_CRC }, +}; + +#define MAX_RX_OFFLOAD_STRS (sizeof(rx_offload_strs) / \ + sizeof(rx_offload_strs[0])) + +struct str_val tx_offload_strs[] = { + { "dev_tx_offload_multi_segs", DEV_TX_OFFLOAD_MULTI_SEGS }, + { "dev_tx_offload_vlan_insert", DEV_TX_OFFLOAD_VLAN_INSERT }, +}; + +#define MAX_TX_OFFLOAD_STRS (sizeof(tx_offload_strs) / \ + sizeof(tx_offload_strs[0])) + +struct str_val dev_flags_strs[] = { + { "rte_eth_dev_intr_lsc", RTE_ETH_DEV_INTR_LSC }, +}; + +#define MAX_DEV_FLAGS_STRS (sizeof(dev_flags_strs) / \ + sizeof(dev_flags_strs[0])) + +static void parse_option_strs(char *value, + struct str_val *option_strs, + uint8_t max_option_strs, + uint64_t *option_flags, + uint64_t *neg_option_flags) +{ + const char sep[] = " ,\t\r\n"; + const char *option_str; + + for (option_str = strtok(value, sep); option_str != NULL; + option_str = strtok(NULL, sep)) { + bool is_negation = false; + + if (option_str[0] == '!') { + is_negation = true; + option_str++; + } + + for (uint8_t i = 0; i < max_option_strs; i++) { + if (strcmp(option_str, option_strs[i].str) == 0) { + if (is_negation) + *neg_option_flags |= + option_strs[i].value; + else + *option_flags |= + option_strs[i].value; + } + } + } +} + +struct str_val rx_mq_mode_strs[] = { + { "eth_mq_rx_none", ETH_MQ_RX_NONE }, + { "eth_mq_rx_rss", ETH_MQ_RX_RSS }, +}; + +#define MAX_RX_MQ_MODE_STRS (sizeof(rx_mq_mode_strs) / \ + sizeof(rx_mq_mode_strs[0])) + +static void parse_enum_str(char *value, + struct str_val *enum_strs, + uint8_t max_enum_strs, + uint64_t *enum_flag) +{ + for (uint8_t i = 0; i < max_enum_strs; i++) { + if (strcmp(value, enum_strs[i].str) == 0) + *enum_flag = enum_strs[i].value; + } +} + /* * Callback from inih library for each name value * return 0 = error, 1 = ok @@ -512,6 +610,52 @@ static int parse_driver_entry(void *user, const char *section, param->drv_flags |= DRV_PARAM_USE_ALL_TXQ; } } + if (strcmp(name, "rx_offloads") == 0) { + parse_option_strs(strdupa(value), rx_offload_strs, + MAX_RX_OFFLOAD_STRS, + ¶m->rx_offloads, + ¶m->neg_rx_offloads); + DP_DEBUG(INIT, INFO, DATAPLANE, + "Set rx offloads for %s, 0x%lx, !0x%lx\n", + section, param->rx_offloads, param->neg_rx_offloads); + } + if (strcmp(name, "tx_offloads") == 0) { + parse_option_strs(strdupa(value), tx_offload_strs, + MAX_TX_OFFLOAD_STRS, + ¶m->tx_offloads, + ¶m->neg_tx_offloads); + DP_DEBUG(INIT, INFO, DATAPLANE, + "Set tx offloads for %s, 0x%lx, !0x%lx\n", + section, param->tx_offloads, param->neg_tx_offloads); + } + if (strcmp(name, "rx_mq_mode") == 0) { + param->rx_mq_mode_set = true; + parse_enum_str(strdupa(value), rx_mq_mode_strs, + MAX_RX_MQ_MODE_STRS, + ¶m->rx_mq_mode); + DP_DEBUG(INIT, INFO, DATAPLANE, + "Set rx mq_mode for %s, 0x%lx\n", + section, param->rx_mq_mode); + } + if (strcmp(name, "tx_desc_vm_multiplier") == 0) { + val = strtoul(value, &end, 10); + /* make sure val is sane */ + if (val <= MAX_TX_DESC_VM_MULTIPLIER) { + DP_DEBUG(INIT, INFO, DATAPLANE, + "Setting TX bufs vm multiplier for %s, %lu\n", + section, val); + param->tx_desc_vm_multiplier = val; + } + } + if (strcmp(name, "dev_flags") == 0) { + parse_option_strs(strdupa(value), dev_flags_strs, + MAX_DEV_FLAGS_STRS, + ¶m->dev_flags, + ¶m->neg_dev_flags); + DP_DEBUG(INIT, INFO, DATAPLANE, + "Set dev flags for %s, 0x%lx, !0x%lx\n", + section, param->dev_flags, param->neg_dev_flags); + } return 1; /* good */ } @@ -550,6 +694,28 @@ static void backplane_list_destroy(void) } } +static bool parse_pci_addr(const char *value, struct rte_pci_addr *pci_addr) +{ + int rc; + + /* Check long PCI format */ + rc = sscanf(value, "%x:%hhx:%hhx.%hhu", &pci_addr->domain, + &pci_addr->bus, &pci_addr->devid, + &pci_addr->function); + if (rc == 4) + return true; + + pci_addr->domain = 0; + + /* Check short PCI format */ + rc = sscanf(value, "%hhx:%hhx.%hhu", &pci_addr->bus, + &pci_addr->devid, &pci_addr->function); + if (rc == 3) + return true; + + return false; +} + /* * Callback from inih library for each name value * return 0 = error, 1 = ok @@ -562,80 +728,83 @@ static int parse_platform_entry(void *user, const char *section, if (strcasecmp(section, "dataplane") == 0) { if (strncmp(name, "backplane_port", strlen("backplane_port")) == 0) { - int rc; struct bkplane_pci *bp; - bool name = false; - char bp_name[IFNAMSIZ]; + char *pci_addr_str; + char *bp_name; bp = calloc(1, sizeof(*bp)); - if (!bp) { - fprintf(stderr, - "Malloc failed for platform bkplane config\n"); - return 0; - } - /* First check long PCI format with name */ - rc = sscanf(value, "%x:%hhx:%hhx.%hhu,%2s", - &bp->pci_addr.domain, - &bp->pci_addr.bus, - &bp->pci_addr.devid, - &bp->pci_addr.function, - bp_name); - if (rc == 5) { - name = true; - goto backplane_port_parsed; + if (!bp) + goto malloc_failed; + pci_addr_str = strdup(value); + if (!pci_addr_str) { + free(bp); + goto malloc_failed; } - /* check long PCI format without name */ - rc = sscanf(value, "%x:%hhx:%hhx.%hhu", - &bp->pci_addr.domain, - &bp->pci_addr.bus, - &bp->pci_addr.devid, - &bp->pci_addr.function); - if (rc == 4) - goto backplane_port_parsed; - - /* Check short PCI format with name*/ - rc = sscanf(value, "%hhx:%hhx.%hhu,%2s", - &bp->pci_addr.bus, - &bp->pci_addr.devid, - &bp->pci_addr.function, - bp_name); - if (rc == 4) { - name = true; - bp->pci_addr.domain = 0; - goto backplane_port_parsed; + bp_name = strchr(pci_addr_str, ','); + if (bp_name) { + /* + * nul-terminate PCI address and skip + * over comma separator + */ + *bp_name = '\0'; + bp_name++; } - /* Check short PCI format without name*/ - rc = sscanf(value, "%hhx:%hhx.%hhu", - &bp->pci_addr.bus, - &bp->pci_addr.devid, - &bp->pci_addr.function); - if (rc != 3) { + if (!parse_pci_addr(pci_addr_str, &bp->pci_addr)) { DP_DEBUG(INIT, ERR, DATAPLANE, "backplane port format error\n"); + free(pci_addr_str); free(bp); return 0; } - bp->pci_addr.domain = 0; -backplane_port_parsed: + /* Add to backplane port list */ fprintf(stderr, "Backplane %s pci(%x:%hhx:%hhx.%hhu) added\n", - name ? bp_name : "()", + bp_name ? bp_name : "()", bp->pci_addr.domain, bp->pci_addr.bus, bp->pci_addr.devid, bp->pci_addr.function); - if (name) + if (bp_name) bp->name = strdup(bp_name); LIST_INSERT_HEAD(&cfg->bp_list, bp, link); + free(pci_addr_str); } else if (strcmp(name, "fal_plugin") == 0) { if (value) cfg->fal_plugin = strdup(value); + } else if (strncmp(name, "mgmt_port", + strlen("mgmt_port")) == 0) { + struct config_pci_entry *pci_entry; + + pci_entry = calloc(1, sizeof(*pci_entry)); + if (!pci_entry) + goto malloc_failed; + + if (!parse_pci_addr(value, &pci_entry->pci_addr)) { + DP_DEBUG(INIT, ERR, DATAPLANE, + "management port format error: %s\n", + value); + free(pci_entry); + return 0; + } + LIST_INSERT_HEAD(&cfg->mgmt_list, pci_entry, link); + } + } else if (strcasecmp(section, "hardware-features") == 0) { + if (strcmp(name, "bonding.hardware-members-only") == 0) { + if (value) + cfg->hardware_lag = atoi(value); } } return 1; + +malloc_failed: + fprintf(stderr, + "Out of memory during processing of %s:%s config\n", + section, name); + return 0; + } /* @@ -661,6 +830,7 @@ void parse_platform_config(const char *cfgfile) fprintf(stderr, "Parsing platform config file %s\n", cfgfile); LIST_INIT(&platform_cfg.bp_list); + LIST_INIT(&platform_cfg.mgmt_list); rc = ini_parse_file(f, parse_platform_entry, &platform_cfg); if (rc) { @@ -679,3 +849,30 @@ void platform_config_cleanup(void) free(platform_cfg.fal_plugin); } + +int dp_parse_config_files(dp_parse_config_fn *parser_fn, + void *arg) +{ + FILE *f; + int rc; + + /* The main config file must exist */ + f = fopen(config_file, "r"); + if (f == NULL) + return -ENOENT; + + rc = ini_parse_file(f, parser_fn, arg); + fclose(f); + if (rc) + return rc; + + /* The platform config file may exist */ + f = fopen(platform_file, "r"); + if (!f) + return 0; + + rc = ini_parse_file(f, parser_fn, arg); + fclose(f); + + return rc; +} diff --git a/src/config.h b/src/config_internal.h similarity index 79% rename from src/config.h rename to src/config_internal.h index a28594b6..3538ad73 100644 --- a/src/config.h +++ b/src/config_internal.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -9,8 +9,8 @@ * Parameters read from /etc/vyatta/dataplane.conf */ -#ifndef DP_CONFIG_H -#define DP_CONFIG_H +#ifndef CONFIG_INTERNAL_H +#define CONFIG_INTERNAL_H #include #include @@ -18,8 +18,8 @@ #include #include +#include "config.h" #include "ip_addr.h" -#include "main.h" struct rxtx_param; @@ -62,9 +62,11 @@ struct config_param { disconnect */ char *certificate; /* Our 0MQ authentication certificate */ char *remote_cert; /* Remote controller 0MQ certificate */ - struct ether_addr uplink_addr; /* uplink intf perm mac addr */ + struct rte_ether_addr uplink_addr; /* uplink intf perm mac addr */ struct ip_addr rib_ip; /* rib ctrl ip */ char *rib_ctrl_url; /* rib control url */ + char *xfrm_push_url; /* xfrm push from the DP url */ + char *xfrm_pull_url; /* xfrm pull to the DP url */ }; struct bkplane_pci { @@ -73,18 +75,30 @@ struct bkplane_pci { char *name; }; +struct config_pci_entry { + LIST_ENTRY(config_pci_entry) link; + struct rte_pci_addr pci_addr; +}; + /* Platform parameter structure */ struct platform_param { LIST_HEAD(pci_list, bkplane_pci) bp_list; /* backplane pci list */ char *fal_plugin; /* fal_plugin to load (if any) */ + /* whether to use hardware LAG, or otherwise DPDK LAG */ + bool hardware_lag; + /* management port pci list */ + LIST_HEAD(config_mgmt_pci_list, config_pci_entry) mgmt_list; }; extern struct config_param config; extern struct platform_param platform_cfg; -void parse_config(const char *filename); +void set_config_file(const char *filename); +void set_platform_cfg_file(const char *filename); +const char *get_platform_cfg_file(void); +void parse_config(void); void parse_driver_config(struct rxtx_param **driver_param, - const char *filename); + const char *cfgfile); /* * Are we running as VR or using uplink to a remote controller ? @@ -105,6 +119,7 @@ uint32_t config_ctrl_ip_af(void); /* Convert IP address string, result is the same as inet_pton() */ int parse_ipaddress(struct ip_addr *addr, const char *str); +char *addr_to_tcp(const struct ip_addr *addr, uint16_t port); /* default ZMQ url creation */ char *default_endpoint_dataplane(void); @@ -113,4 +128,5 @@ char *default_endpoint_dataplane(void); void parse_platform_config(const char *cfgfile); /* Cleanup platform configuration */ void platform_config_cleanup(void); -#endif + +#endif /* CONFIG_INTERNAL_H */ diff --git a/src/control.c b/src/control.c index 6bc57e79..37fcf73b 100644 --- a/src/control.c +++ b/src/control.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. + * Copyright (c) 2017-2020, AT&T Intellectual Property. * All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. @@ -26,31 +26,36 @@ #include "commands.h" #include "compiler.h" -#include "config.h" +#include "config_internal.h" #include "control.h" +#include "crypto/crypto.h" #include "crypto/crypto_policy.h" #include "dpmsg.h" -#include "event.h" +#include "event_internal.h" +#include "feature_commands.h" +#include "feature_plugin_internal.h" +#include "if/dpdk-eth/vhost.h" #include "if_var.h" #include "ip_addr.h" -#include "master.h" +#include "controller.h" #include "mstp.h" #include "netlink.h" #include "npf/config/npf_config.h" #include "pl_commands.h" #include "power.h" #include "protobuf.h" +#include "protobuf/FeatureAffinityConfig.pb-c.h" #include "rt_tracker.h" #include "session/session_cmds.h" #include "urcu.h" #include "util.h" -#include "vhost.h" #include "vplane_debug.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" #include "storm_ctl.h" #include "backplane.h" #include "ptp.h" +#include "crypto/xfrm_client.h" #define ZMQ_IPC_HWM (0) @@ -276,7 +281,7 @@ static int report_config_error(const char *cmd, int code) if (result < 0) goto err; - return send_dp_event(msg); + return dp_send_event_to_vplaned(msg); err: zmsg_destroy(&msg); @@ -340,8 +345,8 @@ static int process_config_cmd(enum cont_src_en cont_src, result = report_config_error(cmd_log, rc); if (result < 0) RTE_LOG(ERR, DATAPLANE, - "Failed to send cmd report for cmd " - "\"%s\": %s\n", cmd_log, strerror(result)); + "Failed to send cmd report for cmd \"%s\": %s\n", + cmd_log, strerror(-result)); RTE_LOG(NOTICE, DATAPLANE, "(%s) cmd [ %s ] : %s\n", cont_src_name(cont_src), @@ -358,6 +363,10 @@ static int process_xfrm_policy_cmd(enum cont_src_en cont_src, void *data, size_t size, const struct msg_handler *h __unused) { + struct xfrm_client_aux_data aux; + + xfrm_direct = false; + if (cont_src != CONT_SRC_MAIN) { RTE_LOG(ERR, DATAPLANE, "(%s) xfrm POLICY invalid controller\n", @@ -366,8 +375,11 @@ static int process_xfrm_policy_cmd(enum cont_src_en cont_src, } vrfid_t vrf_id = VRF_DEFAULT_ID; + + aux.vrf = &vrf_id; + int rc = mnl_cb_run(data, size, 0, 0, rtnl_process_xfrm, - &vrf_id); + &aux); if (rc != MNL_CB_OK) { RTE_LOG(ERR, DATAPLANE, "netlink POLICY message parse error\n"); return -1; @@ -380,6 +392,10 @@ static int process_xfrm_sa_cmd(enum cont_src_en cont_src, void *data, size_t size, const struct msg_handler *h __unused) { + struct xfrm_client_aux_data aux; + + xfrm_direct = false; + if (cont_src != CONT_SRC_MAIN) { RTE_LOG(ERR, DATAPLANE, "(%s) xfrm SA invalid controller\n", @@ -388,8 +404,10 @@ static int process_xfrm_sa_cmd(enum cont_src_en cont_src, } vrfid_t vrf_id = VRF_DEFAULT_ID; + aux.vrf = &vrf_id; + int rc = mnl_cb_run(data, size, 0, 0, rtnl_process_xfrm_sa, - &vrf_id); + &aux); /* SA errors are recoverable */ if (rc != MNL_CB_OK) { @@ -461,24 +479,23 @@ static int process_netlink_data(enum cont_src_en cont_src, * Topic must not be a substring of another topic, as a match * of a topic msg to a topic does not need to be on a word boundary for * performance reasons. + * + * Please do not add any further entries to this table. All new commands + * should be in protobuf format. */ static const struct msg_handler message_handlers_main[] = { { 0, "address", process_netlink_data, NULL }, { 0, "affinity", process_config_cmd, cmd_affinity_cfg }, { 1, "affinity", process_config_cmd, cmd_affinity_cfg }, - { 0, "arp", process_config_cmd, cmd_arp_cfg }, { 0, "backplane", process_config_cmd, cmd_backplane_cfg }, - { 0, "breakout", process_config_cmd, cmd_breakout }, { 0, "bridge_link", process_netlink_data, NULL }, { 0, "cgn-cfg", process_config_cmd, cmd_cgn }, { 0, "ecmp", process_config_cmd, NULL }, { 0, "ip4", process_config_cmd, cmd_ip }, { 0, "ipsec", process_config_cmd, NULL }, - { 0, "ip6", process_config_cmd, cmd_ip6 }, { 0, "l2tpeth", process_config_cmd, NULL }, { 0, "l2tp_", process_l2tp_cmd, NULL }, { 0, "link", process_netlink_data, NULL }, - { 0, "speed", process_config_cmd, cmd_speed }, { 0, "mode", process_config_cmd, cmd_power_cfg }, { 0, "mpls", process_config_cmd, NULL }, { 0, "mstp", process_config_cmd, cmd_mstp }, @@ -489,9 +506,12 @@ static const struct msg_handler message_handlers_main[] = { { 0, "pathmonitor", process_config_cmd, NULL }, { 0, "poe", process_config_cmd, cmd_poe }, { 0, "portmonitor", process_config_cmd, NULL }, + { 1, "portmonitor", process_config_cmd, NULL }, { 0, "protobuf", process_pb_cmd, NULL }, { 0, "ptp", process_config_cmd, cmd_ptp_cfg }, - { 4, "qos", process_config_cmd, cmd_qos_cfg }, + { 14, "qos", process_config_cmd, cmd_qos_cfg }, + { 15, "qos", process_config_cmd, cmd_qos_cfg }, + { 16, "qos", process_config_cmd, cmd_qos_cfg }, { 0, "route", process_netlink_data, NULL }, { 3, "storm-ctl", process_config_cmd, cmd_storm_ctl_cfg }, { 0, "tablemap", process_config_cmd, cmd_tablemap_cfg }, @@ -514,7 +534,7 @@ static const struct msg_handler message_handlers_main[] = { { 0, "tc_qdisc", process_netlink_data, NULL }, { 0, "tc_chain", process_netlink_data, NULL }, { 0, "tc_filter", process_netlink_data, NULL }, - { 0, NULL, NULL } + { 0, NULL, NULL, NULL } }; void list_all_main_msg_versions(FILE *f) @@ -536,7 +556,7 @@ static const struct msg_handler message_handlers_uplink[] = { { 0, "neigh", process_netlink_data, NULL }, { 0, "netconf", process_netlink_data, NULL }, { 0, "route", process_netlink_data, NULL }, - { 0, NULL, NULL } + { 0, NULL, NULL, NULL } }; static const struct msg_handler *message_handlers[CONT_SRC_COUNT] = { @@ -548,9 +568,46 @@ static const struct msg_handler *message_handlers[CONT_SRC_COUNT] = { * Topics accepted in ready state */ static const struct msg_handler ready_handlers[] = { - { 0, NULL, NULL } + { 0, NULL, NULL, NULL } }; +/* + * Dynamically registered handlers + */ +struct dynamic_cfg_command_entry { + struct msg_handler handler; + struct cds_list_head list_entry; +}; + +static struct cds_list_head dynamic_cfg_command_list_head = + CDS_LIST_HEAD_INIT(dynamic_cfg_command_list_head); + +static const struct msg_handler * +find_msg_handler(const struct msg_handler *handlers, + const char *name, size_t len) +{ + const struct msg_handler *h; + struct dynamic_cfg_command_entry *dynamic_cmd; + + for (h = handlers; h->topic; ++h) { + if (memcmp(name, h->topic, MIN(len, strlen(h->topic))) != 0) + continue; + + return h; + } + + /* And check the dynamically registered commands too */ + cds_list_for_each_entry_rcu(dynamic_cmd, &dynamic_cfg_command_list_head, + list_entry) { + if (memcmp(name, dynamic_cmd->handler.topic, + MIN(len, strlen(dynamic_cmd->handler.topic))) != 0) + continue; + return &dynamic_cmd->handler; + } + + return NULL; +} + static int process_topic_msg(enum cont_src_en cont_src, const struct msg_handler *handlers, dpmsg_t *dpmsg) @@ -558,16 +615,14 @@ process_topic_msg(enum cont_src_en cont_src, const struct msg_handler *h; int ret; - for (h = handlers; h->topic; ++h) { - if (memcmp(zmq_msg_data(&dpmsg->topic_msg), h->topic, - MIN(zmq_msg_size(&dpmsg->topic_msg), - strlen(h->topic)))) - continue; - - rcu_read_lock(); + h = find_msg_handler(handlers, + zmq_msg_data(&dpmsg->topic_msg), + zmq_msg_size(&dpmsg->topic_msg)); + if (h) { + dp_rcu_read_lock(); ret = (*h->handler)(cont_src, zmq_msg_data(&dpmsg->data_msg), - zmq_msg_size(&dpmsg->data_msg), h); - rcu_read_unlock(); + zmq_msg_size(&dpmsg->data_msg), h); + dp_rcu_read_unlock(); return ret; } @@ -581,6 +636,63 @@ process_topic_msg(enum cont_src_en cont_src, return -1; } +int dp_feature_register_string_cfg_handler(const char *name, + feature_string_op_fn *fn) +{ + struct dynamic_cfg_command_entry *dynamic_cfg_cmd; + const struct msg_handler *cmd; + + if (!name || !fn) + return -EINVAL; + + cmd = find_msg_handler(message_handlers_main, name, strlen(name)); + if (cmd) { + RTE_LOG(ERR, DATAPLANE, + "Can not register op cmd. Cmd %s already exists\n", + cmd->topic); + return -EINVAL; + } + + dynamic_cfg_cmd = calloc(1, sizeof(*dynamic_cfg_cmd)); + if (!dynamic_cfg_cmd) { + RTE_LOG(ERR, DATAPLANE, + "Can not register op cmd. No memory\n"); + return -EINVAL; + } + + dynamic_cfg_cmd->handler.version = 0; + dynamic_cfg_cmd->handler.topic = strdup(name); + if (!dynamic_cfg_cmd->handler.topic) { + RTE_LOG(ERR, DATAPLANE, + "Can not register op cmd. No memory\n"); + free(dynamic_cfg_cmd); + return -EINVAL; + } + dynamic_cfg_cmd->handler.handler = process_config_cmd; + dynamic_cfg_cmd->handler.cmd_handler = fn; + + cds_list_add_rcu(&dynamic_cfg_cmd->list_entry, + &dynamic_cfg_command_list_head); + return 0; +} + +void feature_unregister_all_string_cfg_handlers(void) +{ + struct dynamic_cfg_command_entry *cmd = NULL; + struct cds_list_head *this_entry, *next; + + cds_list_for_each_safe(this_entry, next, + &dynamic_cfg_command_list_head) { + cmd = cds_list_entry(this_entry, + struct dynamic_cfg_command_entry, + list_entry); + + cds_list_del_rcu(&cmd->list_entry); + free((char *)cmd->handler.topic); + free(cmd); + } +} + /* Process message either from pub-sub socket * or received during resynchronization. * Returns: 0 - OK @@ -612,7 +724,7 @@ static void process_snapshot_end(void) int controller_snapshot(enum cont_src_en cont_src) { DP_DEBUG(RESYNC, INFO, DATAPLANE, - "master(%s) controller resync started\n", + "main(%s) controller resync started\n", cont_src_name(cont_src)); return zstr_send(cont_socket_get(cont_src), "WHATSUP?"); @@ -631,14 +743,14 @@ int process_snapshot_one(enum cont_src_en cont_src, dpmsg_t *dpmsg, int *eof) if (!memcmp(zmq_msg_data(&dpmsg->topic_msg), done, MIN(strlen(done), zmq_msg_size(&dpmsg->topic_msg)))) { DP_DEBUG(RESYNC, INFO, DATAPLANE, - "master(%s) resync [%"PRIu64"] completed\n", + "main(%s) resync [%"PRIu64"] completed\n", cont_src_name(cont_src), cont_src_info[cont_src].sub_last_seqno); process_snapshot_end(); *eof = 1; } else { DP_DEBUG(RESYNC, INFO, DATAPLANE, - "master(%s) resync [%"PRIu64"] %.*s\n", + "main(%s) resync [%"PRIu64"] %.*s\n", cont_src_name(cont_src), get_seqno(dpmsg), (int)zmq_msg_size(&dpmsg->topic_msg), @@ -647,7 +759,7 @@ int process_snapshot_one(enum cont_src_en cont_src, dpmsg_t *dpmsg, int *eof) rc = process_dpmsg(cont_src, dpmsg); if (rc) DP_DEBUG(RESYNC, NOTICE, DATAPLANE, - "master(%s) %.*s: failed\n", + "main(%s) %.*s: failed\n", cont_src_name(cont_src), (int)zmq_msg_size(&dpmsg->topic_msg), (char *)zmq_msg_data(&dpmsg->topic_msg)); @@ -677,7 +789,7 @@ static int subscriber_recv(void *cont_src_info_arg) cont_src_info->sub_last_seqno = get_seqno(&dpmsg); DP_DEBUG(SUBSCRIBER, DEBUG, DATAPLANE, - "master(%s) sub [%"PRIu64"] %.*s\n", + "main(%s) sub [%"PRIu64"] %.*s\n", cont_src_name(cont_src_info->cont_src), get_seqno(&dpmsg), (int)zmq_msg_size(&dpmsg.topic_msg), @@ -690,7 +802,7 @@ static int subscriber_recv(void *cont_src_info_arg) (char *)zmq_msg_data(&dpmsg.topic_msg)); } else { DP_DEBUG(SUBSCRIBER, DEBUG, DATAPLANE, - "master(%s) sub ignore [%"PRIu64" < %"PRIu64"] %.*s\n", + "main(%s) sub ignore [%"PRIu64" < %"PRIu64"] %.*s\n", cont_src_name(cont_src_info->cont_src), get_seqno(&dpmsg), cont_src_info->sub_last_seqno, @@ -709,12 +821,12 @@ void controller_unsubscribe(enum cont_src_en cont_src) zsock_t *subscriber = cont_src_info[cont_src].subscriber; if (csocket) { - unregister_event_socket(zsock_resolve(csocket)); + dp_unregister_event_socket(zsock_resolve(csocket)); zsock_destroy(&cont_src_info[cont_src].csocket); } if (subscriber) { - unregister_event_socket(zsock_resolve(subscriber)); + dp_unregister_event_socket(zsock_resolve(subscriber)); zsock_destroy(&cont_src_info[cont_src].subscriber); } } @@ -725,6 +837,7 @@ void controller_init(enum cont_src_en cont_src) const struct msg_handler *h; char *publish_url = NULL; zsock_t *subscriber; + struct dynamic_cfg_command_entry *dynamic_cmd; switch (cont_src) { case CONT_SRC_MAIN: @@ -758,6 +871,11 @@ void controller_init(enum cont_src_en cont_src) for (h = message_handlers[cont_src]; h->topic; ++h) zsock_set_subscribe(subscriber, h->topic); + /* And subscribe for the dynamically handled events */ + cds_list_for_each_entry_rcu(dynamic_cmd, &dynamic_cfg_command_list_head, + list_entry) + zsock_set_subscribe(subscriber, dynamic_cmd->handler.topic); + cont_src_info[cont_src].subscriber = subscriber; } @@ -871,6 +989,45 @@ void controller_init_event_handler(enum cont_src_en cont_src) &cont_src_info[cont_src], cont_src); } +/* + * Traverse a set of cached commands on a list, removing those + * for a given interface, and if supplied, calling the handler for + * those commands. If the list is empty after traversal, it's destroyed. + * + * Suitable to be called on an if_index set or unset event. + */ +int cfg_if_list_replay(struct cfg_if_list **cfg_list, const char *ifname, + cmd_func_t handler) +{ + struct cfg_if_list *if_list = *cfg_list; + struct cfg_if_list_entry *entry, *temp_entry; + int rv; + + if (!if_list) + return 0; + + cds_list_for_each_entry_safe(entry, temp_entry, &if_list->if_list, + le_node) { + if (strcmp(ifname, entry->le_ifname) != 0) + continue; + + if (handler) { + rv = handler(NULL, entry->le_argc, entry->le_argv); + if (rv) + return rv; + } + + rv = cfg_if_list_del(if_list, ifname); + if (rv) + return rv; + } + + if (!if_list->if_list_count) + return cfg_if_list_destroy(cfg_list); + + return 0; +} + struct cfg_if_list_entry * cfg_if_list_lookup(struct cfg_if_list *if_list, const char *ifname) { @@ -899,16 +1056,18 @@ struct cfg_if_list *cfg_if_list_create(void) return if_list; } -int cfg_if_list_add(struct cfg_if_list *if_list, const char *ifname, - int argc, char *argv[]) +static int +cfg_if_list_add_internal(struct cfg_if_list *if_list, const char *ifname, + int argc, char *argv[], bool multiple_per_if) { - struct cfg_if_list_entry *le; + struct cfg_if_list_entry *le = NULL; int i, size; if (strlen(ifname) + 1 > IFNAMSIZ) return -EINVAL; - le = cfg_if_list_lookup(if_list, ifname); + if (!multiple_per_if) + le = cfg_if_list_lookup(if_list, ifname); if (!le) { le = zmalloc_aligned(sizeof(*le)); if (!le) @@ -951,6 +1110,26 @@ int cfg_if_list_add(struct cfg_if_list *if_list, const char *ifname, return 0; } +/* + * Only 1 entry is allowed per interface and a subsequent add will + * overwrite the entry. + */ +int cfg_if_list_add(struct cfg_if_list *if_list, const char *ifname, + int argc, char *argv[]) +{ + return cfg_if_list_add_internal(if_list, ifname, argc, argv, false); +} + +/* + * Multiple entries are allowed per interface and a subsequent add will + * be added at the tail of the list. + */ +int cfg_if_list_add_multi(struct cfg_if_list *if_list, const char *ifname, + int argc, char *argv[]) +{ + return cfg_if_list_add_internal(if_list, ifname, argc, argv, true); +} + int cfg_if_list_bin_add(struct cfg_if_list *if_list, const char *ifname, char *msg, int len) { @@ -1023,3 +1202,61 @@ int cfg_if_list_destroy(struct cfg_if_list **if_list) *if_list = NULL; return 0; } + +int cfg_if_list_cache_command(struct cfg_if_list **if_list, const char *ifname, + int argc, char **argv) +{ + if (!*if_list) { + *if_list = cfg_if_list_create(); + if (!*if_list) + return -ENOMEM; + } + + return cfg_if_list_add_multi(*if_list, ifname, argc, argv); +} + +static int feature_affinity_cmd_handler(struct pb_msg *msg) +{ + void *payload = (void *)((char *)msg->msg); + int len = msg->msg_len, ret = 0; + + if (!payload || !len) { + RTE_LOG(ERR, DATAPLANE, + "Invalid FeatureAffinity message, payload = 0x%lx, len = %d", + (uintptr_t)payload, len); + return -EINVAL; + } + + FeatureAffinityConfig *fmsg = + feature_affinity_config__unpack(NULL, len, payload); + + if (!fmsg) { + RTE_LOG(ERR, DATAPLANE, + "failed to unpack FeatureAffinity protobuf command\n"); + return -1; + } + + switch (fmsg->feature) { + case FEATURE_AFFINITY_CONFIG__FEATURE__CRYPTO: + ret = crypto_engine_set(fmsg->cpumask.data, + fmsg->cpumask.len); + break; + + case FEATURE_AFFINITY_CONFIG__FEATURE__CRYPTO_FWD: + ret = crypto_set_fwd_cores(fmsg->cpumask.data, + fmsg->cpumask.len); + break; + + default: + ret = -EINVAL; + break; + } + + feature_affinity_config__free_unpacked(fmsg, NULL); + return ret; +} + +PB_REGISTER_CMD(feature_affinity_cmd) = { + .cmd = "vyatta:feature-affinity", + .handler = feature_affinity_cmd_handler, +}; diff --git a/src/control.h b/src/control.h index e391be48..f79a1f05 100644 --- a/src/control.h +++ b/src/control.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -33,6 +33,9 @@ zsock_t *cont_socket_get(enum cont_src_en cont_src); unsigned int cont_src_ifindex(enum cont_src_en cont_src, int ifindex); +/* Generic command handler for console or config messages */ +typedef int (*cmd_func_t)(FILE *f, int argc, char **argv); + /* Helper functions to handle interface config replay */ /* @@ -52,15 +55,23 @@ struct cfg_if_list { }; struct cfg_if_list *cfg_if_list_create(void); +int cfg_if_list_replay(struct cfg_if_list **cfg_list, const char *ifname, + cmd_func_t handler); struct cfg_if_list_entry * cfg_if_list_lookup(struct cfg_if_list *if_list, const char *ifname); +/* 1 entry per interface, will overwrite if already present */ int cfg_if_list_add(struct cfg_if_list *if_list, const char *ifname, int argc, char *argv[]); +/* multiple entries per interface */ +int cfg_if_list_add_multi(struct cfg_if_list *if_list, const char *ifname, + int argc, char *argv[]); int cfg_if_list_bin_add(struct cfg_if_list *if_list, const char *ifname, char *msg, int len); int cfg_if_list_del(struct cfg_if_list *if_list, const char *ifname); int cfg_if_list_destroy(struct cfg_if_list **if_list); +int cfg_if_list_cache_command(struct cfg_if_list **if_list, const char *ifname, + int argc, char **argv); zsock_t *cont_src_get_broker_ctrl(enum cont_src_en cont_src); zsock_t *cont_src_get_broker_data(enum cont_src_en cont_src); diff --git a/src/master.c b/src/controller.c similarity index 51% rename from src/master.c rename to src/controller.c index a2bdbbd6..50e63ed1 100644 --- a/src/master.c +++ b/src/controller.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -7,7 +7,7 @@ */ /* - * Master thread. + * Main thread. */ #include @@ -40,18 +40,20 @@ #include "commands.h" #include "compiler.h" -#include "config.h" +#include "config_internal.h" #include "control.h" #include "dealer.h" #include "dp_event.h" #include "dpmsg.h" -#include "event.h" +#include "event_internal.h" +#include "if/dpdk-eth/dpdk_eth_if.h" +#include "if/dpdk-eth/hotplug.h" #include "if_ether.h" #include "if_var.h" #include "ip_addr.h" #include "json_writer.h" #include "main.h" -#include "master.h" +#include "controller.h" #include "npf/npf_event.h" #include "route.h" #include "route_broker.h" @@ -63,16 +65,34 @@ #include "util.h" #include "vplane_debug.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" +#include "crypto/xfrm_client.h" #include "zmq_dp.h" -#include "hotplug.h" -/* Frequency of updates to soft_ticks */ +/* Frequency of updates to soft_ticks (i.e. every 10ms) */ #define SOFT_CLOCK_HZ 100 + +/* Millisecs since dataplane started */ volatile uint64_t soft_ticks; + +/* Microsecs since dataplane started */ +uint64_t soft_ticks_us; + +/* Unix epoch in microsecs */ +uint64_t unix_epoch_us; + static uint64_t soft_clock_override; -/* How long to wait in master loop (poll). +/* Unix epoch when dataplane started */ +static struct timespec start_ts; + +/* Unix epoch in microsecs when dataplane started */ +static uint64_t start_us; + +/* Unix epoch, refreshed every 10ms */ +static struct timespec unix_epoch_ts; + +/* How long to wait in main loop (poll). Determines the minimum resolution of timers used ARP, Heartbeat, etc */ #define TIMER_INTERVAL_MS (1000/SOFT_CLOCK_HZ) @@ -88,10 +108,13 @@ static uint64_t soft_clock_override; /* Limit for response for next part of snapshot. */ #define RESYNC_TIMEOUT 300 /* seconds */ +/* Time to wait for response to adding a port */ +#define ADDPORT_TIMEOUT 60 /* seconds */ + static struct rte_timer load_average_timer; static struct rte_timer soft_clock_timer; -struct master_time_s { +struct main_time_s { struct rte_timer reset_timer; struct rte_timer connect_timer; struct rte_timer snapshot_timer; @@ -99,74 +122,98 @@ struct master_time_s { uint64_t connect_timeout; /* in rte ticks */ uint64_t resync_timeout; /* in rte ticks */ }; -static struct master_time_s master_time[CONT_SRC_COUNT]; +static struct main_time_s main_time[CONT_SRC_COUNT]; + +enum request_state { + REQUEST_STATE_UNKNOWN = 0, + REQUEST_STATE_SENT_DEL, + REQUEST_STATE_SENT_INI, + REQUEST_STATE_SENT_ADD +}; + +static uint64_t port_request_last_seqno; -/* expected asynchronous responses from controller */ -struct response { +/* + * State describing the request message (INIPORT, ADDPORT & DELPORT) + * sent to the controller + */ +struct port_request { + enum request_state state; unsigned int portid; uint64_t seqno; - bool is_teardown; struct rte_timer timer; - enum cont_src_en rsp_cont_src; + enum cont_src_en cont_src; }; -static int new_port_response(enum cont_src_en cont_src, portid_t port, - zmsg_t *msg, uint32_t *ifindex, char **ifname); -static zlist_t *response_list[CONT_SRC_COUNT]; -static zlist_t *resync_list[CONT_SRC_COUNT]; -static void master_cleanup(enum cont_src_en cont_src); + +/* + * During DPDK port initialisation port_request_list tracks INIPORT + * messages sent to the controller; port_request_list_alt tracks + * ADDPORT and DELPORT messages sent to the controller. + * + * Only when port_request_list is empty (all INIPORT responses + * received) can the main thread kick off the snapshot request (no + * need to wait for the ADDPORTs to complete before processing netlink + * messages). + */ +static zlist_t *port_request_list[CONT_SRC_COUNT]; +static zlist_t *port_request_list_alt[CONT_SRC_COUNT]; +static void main_cleanup(enum cont_src_en cont_src); /* Uplink: Do we have an L3 source address we can use to connect to a remote * controller ? */ static bool control_addr; -enum master_state_en { - MASTER_IDLE, - MASTER_SETUP, - MASTER_RESYNC_NEEDED, - MASTER_CONNECT, - MASTER_CONNECT_WAIT, - MASTER_SOCKET_CREATE, +enum main_state_en { + MAIN_IDLE, + MAIN_SETUP, + MAIN_SETUP_WAIT, + MAIN_RESYNC_NEEDED, + MAIN_CONNECT, + MAIN_CONNECT_WAIT, + MAIN_SOCKET_CREATE, /* The following states can handle event callbacks. They must be the - * highest values, and MASTER_RESYNC must be first - * see master_state_is_event_ready + * highest values, and MAIN_RESYNC must be first + * see main_state_is_event_ready */ - MASTER_RESYNC, - MASTER_READY, - MASTER_RESET, + MAIN_RESYNC, + MAIN_READY, + MAIN_RESET, }; -#define MASTER_COUNT (MASTER_RESET + 1) +#define MAIN_COUNT (MAIN_RESET + 1) -static const char *master_state_name(enum master_state_en state) +static const char *main_state_name(enum main_state_en state) { switch (state) { - case MASTER_IDLE: + case MAIN_IDLE: return "idle"; - case MASTER_SETUP: + case MAIN_SETUP: return "setup"; - case MASTER_RESYNC_NEEDED: + case MAIN_SETUP_WAIT: + return "setup-wait"; + case MAIN_RESYNC_NEEDED: return "resync-needed"; - case MASTER_RESYNC: + case MAIN_RESYNC: return "resync"; - case MASTER_READY: + case MAIN_READY: return "ready"; - case MASTER_RESET: + case MAIN_RESET: return "reset"; - case MASTER_CONNECT: + case MAIN_CONNECT: return "connect"; - case MASTER_CONNECT_WAIT: + case MAIN_CONNECT_WAIT: return "connect-wait"; - case MASTER_SOCKET_CREATE: + case MAIN_SOCKET_CREATE: return "socket-create"; default: return "unknown"; } } -struct master_state_stats_s { - uint32_t state_in[MASTER_COUNT]; /* Times we have entered this state */ +struct main_state_stats_s { + uint32_t state_in[MAIN_COUNT]; /* Times we have entered this state */ }; -static struct master_state_stats_s master_state_stats[CONT_SRC_COUNT]; +static struct main_state_stats_s main_state_stats[CONT_SRC_COUNT]; /* * Perform a dummy route lookup to the controller address to make @@ -202,7 +249,7 @@ static bool check_uplink_route(enum cont_src_en cont_src) } /* Send an event to be published by vplaned */ -int send_dp_event(zmsg_t *msg) +int dp_send_event_to_vplaned(zmsg_t *msg) { zsock_t *csocket = cont_socket_get(CONT_SRC_MAIN); int result; @@ -221,60 +268,60 @@ int send_dp_event(zmsg_t *msg) return result; } -static enum master_state_en -master_state_info(enum cont_src_en cont_src, - enum master_state_en new_state, bool set) +static enum main_state_en +main_state_info(enum cont_src_en cont_src, + enum main_state_en new_state, bool set) { - static enum master_state_en master_state[CONT_SRC_COUNT] - = { MASTER_IDLE, MASTER_IDLE }; + static enum main_state_en main_state[CONT_SRC_COUNT] + = { MAIN_IDLE, MAIN_IDLE }; if (set) { DP_DEBUG(INIT, INFO, DATAPLANE, - "master(%s) state change %s -> %s\n", + "main(%s) state change %s -> %s\n", cont_src_name(cont_src), - master_state_name(master_state[cont_src]), - master_state_name(new_state)); - master_state[cont_src] = new_state; + main_state_name(main_state[cont_src]), + main_state_name(new_state)); + main_state[cont_src] = new_state; } - if ((master_state[cont_src] < MASTER_IDLE) || - (master_state[cont_src] >= MASTER_COUNT)) - rte_panic("Invalid master(%s) state %i\n", - cont_src_name(cont_src), master_state[cont_src]); + if ((main_state[cont_src] < MAIN_IDLE) || + (main_state[cont_src] >= MAIN_COUNT)) + rte_panic("Invalid main(%s) state %i\n", + cont_src_name(cont_src), main_state[cont_src]); - return master_state[cont_src]; + return main_state[cont_src]; } -static enum master_state_en -master_state_get(enum cont_src_en cont_src) +static enum main_state_en +main_state_get(enum cont_src_en cont_src) { - return master_state_info(cont_src, 0, false); + return main_state_info(cont_src, 0, false); } static void -master_state_set(enum cont_src_en cont_src, enum master_state_en new_state) +main_state_set(enum cont_src_en cont_src, enum main_state_en new_state) { - if (master_state_get(cont_src) == new_state) + if (main_state_get(cont_src) == new_state) return; if ((cont_src == CONT_SRC_UPLINK) && - (master_state_get(cont_src) == MASTER_READY)) + (main_state_get(cont_src) == MAIN_READY)) /* local vplaned leaving ready state, idle main */ - master_state_set(CONT_SRC_MAIN, MASTER_IDLE); + main_state_set(CONT_SRC_MAIN, MAIN_IDLE); - master_state_info(cont_src, new_state, true); + main_state_info(cont_src, new_state, true); - master_state_stats[cont_src].state_in[new_state]++; + main_state_stats[cont_src].state_in[new_state]++; if (!is_local_controller() && (cont_src == CONT_SRC_MAIN)) { - switch (master_state_get(cont_src)) { - case MASTER_IDLE: + switch (main_state_get(cont_src)) { + case MAIN_IDLE: /* remote vplaned going idle, clean up */ - master_cleanup(cont_src); + main_cleanup(cont_src); break; - case MASTER_READY: + case MAIN_READY: /* Reached ready state, reset the retry_delay */ - master_time[cont_src].retry_delay = RETRY_MIN_TICKS; + main_time[cont_src].retry_delay = RETRY_MIN_TICKS; break; default: break; @@ -286,51 +333,85 @@ master_state_set(enum cont_src_en cont_src, enum master_state_en new_state) * fd / sockets */ static bool -master_state_is_event_ready(enum cont_src_en cont_src) +main_state_is_event_ready(enum cont_src_en cont_src) { - return master_state_get(cont_src) >= MASTER_RESYNC; + return main_state_get(cont_src) >= MAIN_RESYNC; } static bool -master_state_all_event_ready(void) +main_state_all_event_ready(void) { - return master_state_is_event_ready(CONT_SRC_MAIN) && - master_state_is_event_ready(CONT_SRC_UPLINK); + return main_state_is_event_ready(CONT_SRC_MAIN) && + main_state_is_event_ready(CONT_SRC_UPLINK); } -static struct response *find_response(enum cont_src_en cont_src, uint64_t seqno) +static struct port_request *__get_request(zlist_t *list[], + enum cont_src_en cont_src, + uint64_t seqno) { - struct response *rsp; + struct port_request *req; - for (rsp = zlist_first(response_list[cont_src]); - rsp; - rsp = zlist_next(response_list[cont_src])) - if (rsp->seqno == seqno) + for (req = zlist_first(list[cont_src]); + req; + req = zlist_next(list[cont_src])) + if (req->seqno == seqno) { + zlist_remove(list[cont_src], req); break; + } - return rsp; + return req; } -static void cleanup_responses(enum cont_src_en cont_src) +static void __cleanup_requests(zlist_t *list[], enum cont_src_en cont_src) { - struct response *rsp; + struct port_request *req; - while ((rsp = zlist_pop(response_list[cont_src])) != NULL) { - if (rte_timer_pending(&rsp->timer)) - rte_timer_stop_sync(&rsp->timer); - free(rsp); + while ((req = zlist_pop(list[cont_src])) != NULL) { + if (rte_timer_pending(&req->timer)) + rte_timer_stop_sync(&req->timer); + free(req); } } -static void cleanup_resync(enum cont_src_en cont_src) +static struct port_request *get_request(enum cont_src_en cont_src, + uint64_t seqno) { - zmsg_t *msg; + return __get_request(port_request_list, cont_src, seqno); +} - while ((msg = zlist_pop(resync_list[cont_src])) != NULL) - zmsg_destroy(&msg); +static struct port_request *get_request_alt(enum cont_src_en cont_src, + uint64_t seqno) +{ + return __get_request(port_request_list_alt, cont_src, seqno); } -static void master_cleanup(enum cont_src_en cont_src) +static void cleanup_requests(enum cont_src_en cont_src) +{ + __cleanup_requests(port_request_list, cont_src); + __cleanup_requests(port_request_list_alt, cont_src); +} + +static void destroy_requests(enum cont_src_en cont_src) +{ + if (port_request_list[cont_src]) + zlist_destroy(&port_request_list[cont_src]); + if (port_request_list_alt[cont_src]) + zlist_destroy(&port_request_list_alt[cont_src]); +} + +static void init_requests(enum cont_src_en cont_src) +{ + port_request_list[cont_src] = zlist_new(); + if (!port_request_list[cont_src]) + rte_panic("%s Unable to allocate request list\n", + cont_src_name(cont_src)); + port_request_list_alt[cont_src] = zlist_new(); + if (!port_request_list_alt[cont_src]) + rte_panic("%s Unable to allocate alternate request list\n", + cont_src_name(cont_src)); +} + +static void main_cleanup(enum cont_src_en cont_src) { if (is_local_controller()) return; @@ -338,8 +419,34 @@ static void master_cleanup(enum cont_src_en cont_src) console_unbind(cont_src); controller_unsubscribe(cont_src); route_broker_unsubscribe(cont_src); - cleanup_responses(cont_src); - cleanup_resync(cont_src); + xfrm_client_unsubscribe(); + cleanup_requests(cont_src); +} + +static uint64_t ts_to_usecs(struct timespec *ts) +{ + return (ts->tv_sec * USEC_PER_SEC) + (ts->tv_nsec / NSEC_PER_USEC); +} + +static void timestamp_init(void) +{ + /* Get unix epoch start time. Precision of 1ns. */ + clock_gettime(CLOCK_REALTIME, &start_ts); + unix_epoch_ts = start_ts; + + start_us = ts_to_usecs(&start_ts); + unix_epoch_us = start_us; +} + +/* Update unix_epoch_ts and calculate microsecs since start */ +static inline void update_soft_ticks(void) +{ + /* Get unix epoch time. Precision of 1ms. */ + clock_gettime(CLOCK_REALTIME_COARSE, &unix_epoch_ts); + + unix_epoch_us = ts_to_usecs(&unix_epoch_ts); + soft_ticks_us = unix_epoch_us - start_us; + soft_ticks = soft_ticks_us / USEC_PER_MSEC; } /* Call back from timer every second. */ @@ -369,7 +476,7 @@ static void soft_clock_event(struct rte_timer *tim __rte_unused, if (soft_clock_override) return; - soft_ticks += 1000 / SOFT_CLOCK_HZ; + update_soft_ticks(); } /* Call back from timer after reset sleep has completed. */ @@ -393,11 +500,11 @@ static void reset_timer_event(struct rte_timer *tim __rte_unused, /* Only the remote vplaned connection is being reset. */ /* If the restart delay has got too long, restart process */ - if (master_time[cont_src].retry_delay > RETRY_MAX_DELAY_TICKS) { + if (main_time[cont_src].retry_delay > RETRY_MAX_DELAY_TICKS) { RTE_LOG(NOTICE, DATAPLANE, - "master(%s) Shutting down, retry %lus > retry max %ds\n", + "main(%s) Shutting down, retry %lus > retry max %ds\n", cont_src_name(cont_src), - master_time[cont_src].retry_delay / rte_get_timer_hz(), + main_time[cont_src].retry_delay / rte_get_timer_hz(), RETRY_MAX_DELAY_SEC); running = false; return; @@ -406,15 +513,15 @@ static void reset_timer_event(struct rte_timer *tim __rte_unused, /* Only increase retry_delay after timer expires, we may have multiple * reset signals whilst timer is running. */ - master_time[cont_src].retry_delay += RETRY_MIN_TICKS; + main_time[cont_src].retry_delay += RETRY_MIN_TICKS; /* Until we hear otherwise we still have an uplink with the * local-vplane provided ip address. Go back to idle state, to * attempt to reconnect. */ - RTE_LOG(NOTICE, DATAPLANE, "master(%s) Starting resynch\n", + RTE_LOG(NOTICE, DATAPLANE, "main(%s) Starting resynch\n", cont_src_name(cont_src)); - master_state_set(cont_src, MASTER_IDLE); + main_state_set(cont_src, MAIN_IDLE); } /* Force stop of all traffic. @@ -422,13 +529,13 @@ static void reset_timer_event(struct rte_timer *tim __rte_unused, void reset_dataplane(enum cont_src_en cont_src, bool delay) { RTE_LOG(NOTICE, DATAPLANE, - "master(%s) RESET, reconnecting in %lus (max %ds)\n", + "main(%s) RESET, reconnecting in %lus (max %ds)\n", cont_src_name(cont_src), - delay ? master_time[cont_src].retry_delay / rte_get_timer_hz() + delay ? main_time[cont_src].retry_delay / rte_get_timer_hz() : 0, RETRY_MAX_DELAY_SEC); - master_state_set(cont_src, MASTER_RESET); + main_state_set(cont_src, MAIN_RESET); /* Flush old state */ dp_event(DP_EVT_RESET_CONFIG, cont_src, NULL, 0, 0, NULL); @@ -439,270 +546,194 @@ void reset_dataplane(enum cont_src_en cont_src, bool delay) if (delay) { /* Lastly set timer to delay reconnection attempt */ - rte_timer_reset(&master_time[cont_src].reset_timer, - master_time[cont_src].retry_delay, + rte_timer_reset(&main_time[cont_src].reset_timer, + main_time[cont_src].retry_delay, SINGLE, rte_get_master_lcore(), reset_timer_event, (void *)cont_src); } else { /* Operator reset, return to the min retry delay. */ - master_time[cont_src].retry_delay = RETRY_MIN_TICKS; + main_time[cont_src].retry_delay = RETRY_MIN_TICKS; reset_timer_event(NULL, (void *)cont_src); } } -static void handle_port_response(enum cont_src_en cont_src, - struct response *rsp, uint32_t ifindex, - char *ifname) -{ - if (ifindex) { - struct ifnet *ifp = ifport_table[rsp->portid]; - - if (ifp) { - /* Set the if dp id to the local vplane id */ - if_set_cont_src(ifp, cont_src); - if_rename(ifp, ifname); - if_set_ifindex(ifp, ifindex); - if_finish_create(ifp, - is_team(ifp) ? "team" : "ether", - NULL, &ifp->eth_addr); - - DP_DEBUG(INIT, DEBUG, DATAPLANE, - "master(%s) port %u ifindex %u ifname %s\n", - cont_src_name(cont_src), ifp->if_port, - ifindex, ifname); - - if (is_team(ifp)) - return; - - int rc = shadow_init_port(ifp->if_port, ifname, - &ifp->eth_addr); - - if (rc < 0) { - char port_name[RTE_ETH_NAME_MAX_LEN]; - RTE_LOG(ERR, DATAPLANE, - "master(%s) cannot init shadow for port %u\n", - cont_src_name(cont_src), ifp->if_port); - if (rte_eth_dev_get_name_by_port(ifp->if_port, - port_name) < 0) - RTE_LOG(ERR, DATAPLANE, - "port(%u) to name failed\n", - ifp->if_port); - else if (detach_device(port_name)) - RTE_LOG(ERR, DATAPLANE, - "detach device %s failed\n", - port_name); - } - } - } -} - -static int process_ready(enum cont_src_en cont_src, zmsg_t *msg) +/* + * Build and send multi-part message: + * [0] DELPORT + * [1] 64bit + * [2] 32bit + * [3] 32bit (ignored) + * [4] ipv4/ipv6 address + */ +static void del_port_request(enum cont_src_en cont_src, zsock_t *zsock, + uint64_t seqno, portid_t portid) { - dpmsg_t dpmsg; - int rc; + uint32_t ignored = 0; + uint32_t port; + zmsg_t *msg = zmsg_new(); + if (!msg) + return; - rc = dpmsg_convert_zmsg(msg, &dpmsg); - if (rc >= 0) - rc = process_ready_msg(cont_src, &dpmsg); + zmsg_addstr(msg, "DELPORT"); + zmsg_addmem(msg, &seqno, sizeof(seqno)); + /* controller expects 32 bit value for port */ + port = portid; + zmsg_addmem(msg, &port, sizeof(port)); + zmsg_addmem(msg, &ignored, sizeof(ignored)); + zmsg_addmem(msg, &config.local_ip, sizeof(struct ip_addr)); - zmsg_destroy(&msg); + RTE_LOG(DEBUG, DATAPLANE, + "main(%s) DELPORT request port %u\n", + cont_src_name(cont_src), port); - return rc; + zmsg_send_and_destroy(&msg, zsock); } -/* Asynchronous response from server. - * This detects when controller has restarted: - * LINKUP 1 127.0.0.1 --> - * <-- PORT FAIL +/* + * Build and send multi-part message: + * [0] ADDPORT + * [1] 64bit + * [2] 32bit - As returned by INI response + * [3] string - Interface name as returned by INI response + * + * Response + * [1] 64bit + * [2] 32bit - Interface ifindex + * [3] string - Interface name */ -static int async_response(void *cont_src_ptr) +static int add_port_request(enum cont_src_en cont_src, zsock_t *zsock, + uint64_t seqno, uint32_t cookie, + const char *ifname) { - enum cont_src_en cont_src = (uintptr_t)cont_src_ptr; - - zmsg_t *msg = zmsg_recv(cont_socket_get(cont_src)); - - if (!msg) { - RTE_LOG(ERR, DATAPLANE, - "master(%s) no message in response from controller\n", - cont_src_name(cont_src)); - return -1; - } - - if (zmsg_size(msg) < 2) { - char *str = zmsg_popstr(msg); - RTE_LOG(ERR, DATAPLANE, - "master(%s) short message from controller: %s\n", - cont_src_name(cont_src), str); - free(str); - goto msgerr; - } - - /* peek at the sequence number */ - zmsg_first(msg); - - zframe_t *frame = zmsg_next(msg); - uint64_t seqno; - - if (zframe_size(frame) != sizeof(uint64_t)) { - RTE_LOG(ERR, DATAPLANE, - "master(%s) expect uint64_t message got size %zd\n", - cont_src_name(cont_src), zframe_size(frame)); - goto msgerr; - } - memcpy(&seqno, zframe_data(frame), sizeof(uint64_t)); - - struct response *rsp = find_response(cont_src, seqno); - - if (rsp) { - uint32_t ifindex = 0; - char *ifname = NULL; - if (!rsp->is_teardown) { - int rc = new_port_response(cont_src, rsp->portid, msg, - &ifindex, &ifname); - - if (rc == 0) - handle_port_response(cont_src, rsp, ifindex, - ifname); - rte_timer_stop(&rsp->timer); - } - zlist_remove(response_list[cont_src], rsp); - free(rsp); - zmsg_destroy(&msg); - free(ifname); - } else if (master_state_get(cont_src) == MASTER_RESYNC) { - /* stash away this message for later */ - zlist_append(resync_list[cont_src], msg); - } else { - /* - * Unsol message received in MASTER_READY - */ - if (process_ready(cont_src, msg) < 0) { - RTE_LOG(ERR, DATAPLANE, - "master(%s) unexpected message in ready", - cont_src_name(cont_src)); - reset_dataplane(cont_src, true); - } - return 0; - } - - /* if we have no more expected MYPORT? responses. process whatever - * snapshot messages are queued. - */ - zmsg_t *msg2; - - while ((zlist_size(response_list[cont_src]) == 0) && - (msg2 = zlist_pop(resync_list[cont_src])) != NULL) { - int rc; - int eof = 0; - dpmsg_t dpmsg; - - rc = dpmsg_convert_zmsg(msg2, &dpmsg); - if (rc < 0) { - zmsg_destroy(&msg2); - reset_dataplane(cont_src, true); - break; - } - rc = process_snapshot_one(cont_src, &dpmsg, &eof); - zmsg_destroy(&msg2); - - if (rc < 0) { - reset_dataplane(cont_src, true); - break; - } else if (eof) { - master_state_set(cont_src, MASTER_READY); - controller_init_event_handler(cont_src); - route_broker_init_event_handler(cont_src); - } + zmsg_t *msg = zmsg_new(); + if (!msg) + return -ENOMEM; - } - return 0; + RTE_LOG(DEBUG, DATAPLANE, + "main(%s) ADDPORT request '%u %s'\n", cont_src_name(cont_src), + cookie, ifname); -msgerr: - zmsg_destroy(&msg); - reset_dataplane(cont_src, true); + zmsg_addstr(msg, "ADDPORT"); + zmsg_addmem(msg, &seqno, sizeof(seqno)); + zmsg_addmem(msg, &cookie, sizeof(cookie)); + zmsg_addstr(msg, ifname); + zmsg_send_and_destroy(&msg, zsock); return 0; } /* * Build and send multi-part message: - * [0] NEWPORT + * [0] INIPORT * [1] 64bit - * [2] ipv4/ipv6 address - * [3] string - JSON encoded slot related info + * [2] string - JSON encoded slot related info + * + * Response + * [1] 64bit + * [2] 32bit - context to be included in ADDPORT + * [3] string - generated interface name */ -static int new_port_request(enum cont_src_en cont_src, zsock_t *zsock, - uint64_t seqno, const struct ifnet *ifp) +static int ini_port_request(enum cont_src_en cont_src, zsock_t *zsock, + uint64_t seqno, portid_t portid) { - zmsg_t *msg = zmsg_new(); if (!msg) return -ENOMEM; - char *devinfo = if_port_info(ifp); + char *devinfo = dpdk_eth_vplaned_devinfo(portid); if (!devinfo) { zmsg_destroy(&msg); return -ENOMEM; } RTE_LOG(DEBUG, DATAPLANE, - "master(%s) new port request '%s'\n", cont_src_name(cont_src), + "main(%s) INIPORT request '%s'\n", cont_src_name(cont_src), devinfo); - zmsg_addstr(msg, "NEWPORT"); + zmsg_addstr(msg, "INIPORT"); zmsg_addmem(msg, &seqno, sizeof(seqno)); - zmsg_addmem(msg, &config.local_ip, sizeof(struct ip_addr)); zmsg_addstr(msg, devinfo); free(devinfo); - zmsg_send_and_destroy(&msg, zsock); return 0; } /* - * Build and send multi-part message: - * [0] DELPORT - * [1] 64bit - * [2] 32bit - * [3] 32bit - * [4] ipv4/ipv6 address + * The controller took to long to answer. Clean up and reset */ -static void del_port_request(enum cont_src_en cont_src, zsock_t *zsock, - uint64_t seqno, const struct ifnet *ifp) +static void expire_request(struct rte_timer *t __unused, void *arg) { - uint32_t port; - zmsg_t *msg = zmsg_new(); - if (!msg) - return; + struct port_request *req = arg; - zmsg_addstr(msg, "DELPORT"); - zmsg_addmem(msg, &seqno, sizeof(seqno)); - /* controller expects 32 bit value for port */ - port = ifp->if_port; - zmsg_addmem(msg, &port, sizeof(port)); - zmsg_addmem(msg, &ifp->if_index, sizeof(ifp->if_index)); - zmsg_addmem(msg, &config.local_ip, sizeof(struct ip_addr)); + RTE_LOG(ERR, DATAPLANE, + "main(%s) controller request for port %u timeout [seqno %"PRIu64"]\n", + cont_src_name(req->cont_src), req->portid, req->seqno); + reset_dataplane(req->cont_src, true); +} - RTE_LOG(DEBUG, DATAPLANE, - "master(%s) del port request port %u if_index %u\n", - cont_src_name(cont_src), port, ifp->if_index); +static int ini_port_process_response(enum cont_src_en cont_src, + struct port_request *req, uint32_t cookie, + char *ifname) +{ + int rc; - zmsg_send_and_destroy(&msg, zsock); + /* + * Kick off part 2 of the port initialisation sequence - the + * ADDPORT. + */ + port_request_last_seqno++; + rc = add_port_request(cont_src, cont_socket_get(cont_src), + port_request_last_seqno, cookie, ifname); + if (rc < 0) { + RTE_LOG(ERR, DATAPLANE, + "main(%s) ADDPORT request: %s\n", + cont_src_name(cont_src), strerror(-rc)); + return rc; + } + + /* + * Add the port to the name to port map. Once the controller + * has fully registered the interface, the associated NEWLINK + * from the kernel creates the interface. + */ + rc = dpdk_name_to_eth_port_map_add(ifname, req->portid); + if (rc < 0) { + RTE_LOG(ERR, DATAPLANE, + "main(%s) name map add %s failed: %s\n", + cont_src_name(cont_src), ifname, strerror(-rc)); + return rc; + } + + req->seqno = port_request_last_seqno; + req->state = REQUEST_STATE_SENT_ADD; + rte_timer_reset(&req->timer, ADDPORT_TIMEOUT * rte_get_timer_hz(), + SINGLE, rte_get_master_lcore(), expire_request, + req); + /* + * Add the response to the alternate list rather than the main + * list. This allows any received netlink messages to be + * processed immediately rather than being stored for + * processing after the ADDPORT response is received (see + * async_response()). + */ + zlist_append(port_request_list_alt[cont_src], req); + return 0; } /* - * Parse response from controller: + * Parse ADDPORT response from controller: * Expect: * [0] OK * [1] seqno - * [2] ifindex - 32bit host byte order - * [3] ifname - interface name + * [2] ifindex - ifindex + * [3] ifname - interface name * * Returns: * 0 - not found or protocol error * <0 - error */ -static int new_port_response(enum cont_src_en cont_src, portid_t port, - zmsg_t *msg, uint32_t *ifindex, char **ifname) +static int add_port_parse_response(enum cont_src_en cont_src, zmsg_t *msg, + uint32_t portno, uint32_t *ifindex, + char **ifname) { char *answer; uint64_t seqno; @@ -711,42 +742,41 @@ static int new_port_response(enum cont_src_en cont_src, portid_t port, answer = zmsg_popstr(msg); if (!answer) { RTE_LOG(ERR, DATAPLANE, - "master(%s) missing status in initial response\n", + "main(%s) ADDPORT missing status\n", cont_src_name(cont_src)); goto fail; } if (!streq(answer, "OK")) { RTE_LOG(ERR, DATAPLANE, - "master(%s) got '%s' from controller\n", answer, - cont_src_name(cont_src)); + "main(%s) ADDPORT got '%s' from controller\n", + cont_src_name(cont_src), answer); goto fail; } if (zmsg_popu64(msg, &seqno) < 0) { RTE_LOG(ERR, DATAPLANE, - "master(%s) missing seqno in response\n", + "main(%s) ADDPORT missing seqno\n", cont_src_name(cont_src)); goto fail; } if (zmsg_popu32(msg, ifindex) < 0) { RTE_LOG(ERR, DATAPLANE, - "master(%s) missing ifindex in response\n", + "main(%s) ADDPORT missing ifindex\n", cont_src_name(cont_src)); goto fail; } *ifname = zmsg_popstr(msg); if (!*ifname) { RTE_LOG(ERR, DATAPLANE, - "master(%s) missing ifname in response\n", + "main(%s) ADDPORT missing ifname\n", cont_src_name(cont_src)); goto fail; } RTE_LOG(DEBUG, DATAPLANE, - "master(%s) new port %u response %s(%u->%u)\n", - cont_src_name(cont_src), port, *ifname, *ifindex, + "main(%s) ADDPORT %u response %s(%u->%u)\n", + cont_src_name(cont_src), portno, *ifname, *ifindex, cont_src_ifindex(cont_src, *ifindex)); *ifindex = cont_src_ifindex(cont_src, *ifindex); - retval = 0; fail: @@ -758,13 +788,242 @@ static int new_port_response(enum cont_src_en cont_src, portid_t port, return retval; } +/* + * Parse INIPORT response from controller: + * Expect: + * [0] OK + * [1] seqno + * [2] cookie - 32bit host byte order + * [3] ifname - interface name + * + * Returns: + * 0 - not found or protocol error + * <0 - error + */ +static int ini_port_parse_response(enum cont_src_en cont_src, zmsg_t *msg, + uint32_t *cookie, char **ifname) +{ + char *answer; + uint64_t seqno; + int retval = -EINVAL; + + answer = zmsg_popstr(msg); + if (!answer) { + RTE_LOG(ERR, DATAPLANE, + "main(%s) INIPORT missing status\n", + cont_src_name(cont_src)); + goto fail; + } + if (!streq(answer, "OK")) { + RTE_LOG(ERR, DATAPLANE, + "main(%s) INIPORT got '%s' from controller\n", + cont_src_name(cont_src), answer); + goto fail; + } + if (zmsg_popu64(msg, &seqno) < 0) { + RTE_LOG(ERR, DATAPLANE, + "main(%s) INIPORT missing seqno\n", + cont_src_name(cont_src)); + goto fail; + } + if (zmsg_popu32(msg, cookie) < 0) { + RTE_LOG(ERR, DATAPLANE, + "main(%s) INIPORT missing cookie\n", + cont_src_name(cont_src)); + goto fail; + } + *ifname = zmsg_popstr(msg); + if (!*ifname) { + RTE_LOG(ERR, DATAPLANE, + "main(%s) INIPORT missing ifname\n", + cont_src_name(cont_src)); + goto fail; + } + + RTE_LOG(DEBUG, DATAPLANE, + "main(%s) INIPORT response '%u %s'\n", + cont_src_name(cont_src), *cookie, *ifname); + retval = 0; +fail: + if (retval < 0) { + *ifname = NULL; + *cookie = 0; + } + free(answer); + return retval; +} + +static bool process_port_response(enum cont_src_en cont_src, + zmsg_t *msg, uint64_t seqno) +{ + struct port_request *req; + uint32_t ifindex = 0; + uint32_t cookie = 0; + char *ifname = NULL; + int rc; + + req = get_request(cont_src, seqno); + if (req == NULL) + req = get_request_alt(cont_src, seqno); + + if (req == NULL) + return false; + + rte_timer_stop(&req->timer); + switch (req->state) { + case REQUEST_STATE_SENT_DEL: + break; + case REQUEST_STATE_SENT_INI: + rc = ini_port_parse_response(cont_src, msg, &cookie, &ifname); + if (rc == 0) + rc = ini_port_process_response(cont_src, req, + cookie, ifname); + if (rc == 0) + req = NULL; + + break; + case REQUEST_STATE_SENT_ADD: + /* + * Having established the ifindex the port can be + * inserted into the main IFP database. Note that + * depending on ordering, the IFP may have already + * been updated when the associated NEWLINK message + * arrived. + */ + rc = add_port_parse_response(cont_src, msg, req->portid, + &ifindex, &ifname); + if (rc < 0) + RTE_LOG(ERR, DATAPLANE, + "main(%s) unexpected add port parse response: %s\n", + cont_src_name(cont_src), strerror(-rc)); + break; + default: + RTE_LOG(ERR, DATAPLANE, + "main(%s) unexpected port response state: %d\n", + cont_src_name(cont_src), req->state); + break; + } + + free(ifname); + free(req); + return true; +} + +static int process_ready(enum cont_src_en cont_src, zmsg_t *msg) +{ + dpmsg_t dpmsg; + int rc; + + rc = dpmsg_convert_zmsg(msg, &dpmsg); + if (rc >= 0) + rc = process_ready_msg(cont_src, &dpmsg); + + return rc; +} + +static bool process_async_response(enum cont_src_en cont_src, zmsg_t *msg) +{ + zframe_t *frame; + uint64_t seqno; + + if (zmsg_size(msg) < 2) { + char *str = zmsg_popstr(msg); + RTE_LOG(ERR, DATAPLANE, + "main(%s) short message from controller: %s\n", + cont_src_name(cont_src), str); + free(str); + return false; + } + + /* peek at the sequence number */ + zmsg_first(msg); + frame = zmsg_next(msg); + + if (zframe_size(frame) != sizeof(uint64_t)) { + RTE_LOG(ERR, DATAPLANE, + "main(%s) expect uint64_t message got size %zd\n", + cont_src_name(cont_src), zframe_size(frame)); + return false; + } + + memcpy(&seqno, zframe_data(frame), sizeof(uint64_t)); + + if (process_port_response(cont_src, msg, seqno)) + return true; + + if (main_state_get(cont_src) == MAIN_RESYNC) { + int rc; + int eof = 0; + dpmsg_t dpmsg; + + rc = dpmsg_convert_zmsg(msg, &dpmsg); + if (rc < 0) + return false; + + rc = process_snapshot_one(cont_src, &dpmsg, &eof); + if (rc < 0) + return false; + + if (eof) { + main_state_set(cont_src, MAIN_READY); + controller_init_event_handler(cont_src); + route_broker_init_event_handler(cont_src); + rc = xfrm_client_init(); + if (rc < 0) + reset_dataplane(cont_src, true); + } + + return true; + } + + /* + * Unsol message received in MAIN_READY + */ + if (process_ready(cont_src, msg) < 0) { + RTE_LOG(ERR, DATAPLANE, + "main(%s) unexpected message in state %s\n", + cont_src_name(cont_src), + main_state_name(main_state_get(cont_src))); + return false; + } + + return true; +} + +/* Asynchronous response from server. + * This detects when controller has restarted: + * LINKUP 1 127.0.0.1 --> + * <-- PORT FAIL + */ +static int async_response(void *cont_src_ptr) +{ + enum cont_src_en cont_src = (uintptr_t)cont_src_ptr; + + zmsg_t *msg = zmsg_recv(cont_socket_get(cont_src)); + + if (!msg) { + RTE_LOG(ERR, DATAPLANE, + "main(%s) no message in response from controller\n", + cont_src_name(cont_src)); + return -1; + } + + bool ok = process_async_response(cont_src, msg); + + zmsg_destroy(&msg); + if (!ok) + reset_dataplane(cont_src, true); + + return 0; +} + static void connect_timeout(struct rte_timer *t __unused, void *cont_src_ptr) { enum cont_src_en cont_src = (uintptr_t)cont_src_ptr; - if (master_state_get(cont_src) == MASTER_CONNECT_WAIT) { + if (main_state_get(cont_src) == MAIN_CONNECT_WAIT) { RTE_LOG(ERR, DATAPLANE, - "master(%s) controller connect timeout\n", + "main(%s) controller connect timeout\n", cont_src_name(cont_src)); reset_dataplane(cont_src, true); } @@ -774,27 +1033,23 @@ static void snapshot_timeout(struct rte_timer *t __unused, void *cont_src_ptr) { enum cont_src_en cont_src = (uintptr_t)cont_src_ptr; - if (master_state_get(cont_src) == MASTER_RESYNC) { + if (main_state_get(cont_src) == MAIN_RESYNC) { RTE_LOG(ERR, DATAPLANE, - "master(%s) controller snapshot timeout\n", + "main(%s) controller snapshot timeout\n", cont_src_name(cont_src)); reset_dataplane(cont_src, true); } } /* - * The controller took to long to answer. Clean up and reset + * Port setup complete? That is, all the initial INIPORT messages & + * associated responses (ifname) have been processed. The ADDPORT + * messages will have been issued, but we don't need to wait for the + * responses before asking for the snapshot. */ -static void expire_response(struct rte_timer *t __unused, void *arg) +static bool setup_interfaces_done(enum cont_src_en cont_src) { - struct response *rsp = arg; - - RTE_LOG(ERR, DATAPLANE, - "master(%s) controller response timeout [%"PRIu64"]\n", - cont_src_name(rsp->rsp_cont_src), rsp->seqno); - reset_dataplane(rsp->rsp_cont_src, true); - - + return zlist_size(port_request_list[cont_src]) == 0; } /* @@ -815,9 +1070,7 @@ static int setup_interfaces(uint8_t startid, uint8_t num_ports, } for (portid = startid; portid < startid + num_ports; portid++) { - struct ifnet *ifp = ifport_table[portid]; - - if (!ifp) + if (!bitmask_isset(&enabled_port_mask, portid)) continue; if (!is_local_controller()) { @@ -830,46 +1083,60 @@ static int setup_interfaces(uint8_t startid, uint8_t num_ports, continue; } - RTE_LOG(NOTICE, DATAPLANE, "master(%s) port %d (%s)\n", - cont_src_name(cont_src), portid, ifp->if_name); + struct port_request *request = malloc(sizeof(*request)); - struct response *expected = malloc(sizeof(*expected)); - - if (!expected) { + if (!request) { RTE_LOG(NOTICE, DATAPLANE, - "master(%s) unable to allocate response entry\n", + "main(%s) unable to allocate request entry\n", cont_src_name(cont_src)); continue; } + enum request_state expect_state; + zlist_t *list = port_request_list[cont_src]; + + ++seqno; if (is_teardown) { - del_port_request(cont_src, ctrl_socket, ++seqno, ifp); + dpdk_eth_port_map_del_port(portid); + + del_port_request(cont_src, ctrl_socket, seqno, portid); + expect_state = REQUEST_STATE_SENT_DEL; + /* + * Don't need to wait for the reply from the + * controller before processing any netlink + * messages (see async_response()). + */ + list = port_request_list_alt[cont_src]; } else { - int rc = new_port_request(cont_src, ctrl_socket, - ++seqno, ifp); + int rc; + rc = ini_port_request(cont_src, ctrl_socket, seqno, + portid); if (rc != 0) { RTE_LOG(ERR, DATAPLANE, - "master(%s) new_port request: %s\n", + "main(%s) INIPORT request: %s\n", cont_src_name(cont_src), strerror(-rc)); - free(expected); + free(request); return -1; } + expect_state = REQUEST_STATE_SENT_INI; } - expected->portid = portid; - expected->seqno = seqno; - expected->is_teardown = is_teardown; - expected->rsp_cont_src = cont_src; - rte_timer_init(&expected->timer); + request->state = expect_state; + request->portid = portid; + request->seqno = seqno; + request->cont_src = cont_src; + rte_timer_init(&request->timer); if (!is_teardown) - rte_timer_reset(&expected->timer, - master_time[cont_src].retry_delay, - SINGLE, rte_get_master_lcore(), - expire_response, expected); - zlist_append(response_list[cont_src], expected); + rte_timer_reset( + &request->timer, + ADDPORT_TIMEOUT * rte_get_timer_hz(), + SINGLE, rte_get_master_lcore(), + expire_request, request); + zlist_append(list, request); } + port_request_last_seqno = seqno; return 0; } @@ -914,6 +1181,14 @@ void send_port_status(uint32_t port_id, const struct rte_eth_link *link) if (!csocket) return; + /* + * Unlike regular ports, the link state of bonding interfaces + * isn't owned by the dataplane but is determined by higher + * levels of the system, so don't try to override it. + */ + if (is_team(ifp)) + return; + zmsg_t *msg = zmsg_new(); if (!msg) { RTE_LOG(ERR, DATAPLANE, "out of memory for port status msg\n"); @@ -1016,7 +1291,7 @@ void send_sg_cnt(struct sioc_sg_req *rq, vrfid_t vrf_id, uint32_t flags) zmsg_send_and_destroy(&msg, csocket); } -void send_sg6_cnt(struct sioc_sg_req6 *sr, vrfid_t vrf_id, uint32_t flags) +void send_sg6_cnt(struct sioc_sg_req6 *rq, vrfid_t vrf_id, uint32_t flags) { zmsg_t *msg; zsock_t *csocket = cont_socket_get(CONT_SRC_MAIN); @@ -1030,47 +1305,38 @@ void send_sg6_cnt(struct sioc_sg_req6 *sr, vrfid_t vrf_id, uint32_t flags) return; zmsg_addstr(msg, "MRT6STAT"); - zmsg_addmem(msg, sr, sizeof(*sr)); + zmsg_addmem(msg, rq, sizeof(*rq)); zmsg_addmem(msg, &vrf_id, sizeof(vrf_id)); zmsg_addmem(msg, &flags, sizeof(flags)); zmsg_send_and_destroy(&msg, csocket); } static void -master_init_src(enum cont_src_en cont_src) +main_init_src(enum cont_src_en cont_src) { - rte_timer_init(&master_time[cont_src].reset_timer); - rte_timer_init(&master_time[cont_src].connect_timer); - rte_timer_init(&master_time[cont_src].snapshot_timer); - master_time[cont_src].retry_delay = RETRY_MIN_TICKS; - master_time[cont_src].connect_timeout = + rte_timer_init(&main_time[cont_src].reset_timer); + rte_timer_init(&main_time[cont_src].connect_timer); + rte_timer_init(&main_time[cont_src].snapshot_timer); + main_time[cont_src].retry_delay = RETRY_MIN_TICKS; + main_time[cont_src].connect_timeout = CONNECT_TIMEOUT * rte_get_timer_hz(); - master_time[cont_src].resync_timeout = + main_time[cont_src].resync_timeout = RESYNC_TIMEOUT * rte_get_timer_hz(); - - response_list[cont_src] = zlist_new(); - if (!response_list[cont_src]) - rte_panic("%s Unable to allocate response list\n", - cont_src_name(cont_src)); - resync_list[cont_src] = zlist_new(); - if (!resync_list[cont_src]) - rte_panic("%s Unable to allocate resync list\n", - cont_src_name(cont_src)); + init_requests(cont_src); } static void -master_destroy_src(enum cont_src_en cont_src) +main_destroy_src(enum cont_src_en cont_src) { - if (response_list[cont_src]) - zlist_destroy(&response_list[cont_src]); - if (resync_list[cont_src]) - zlist_destroy(&resync_list[cont_src]); + cleanup_requests(cont_src); + destroy_requests(cont_src); controller_unsubscribe(cont_src); route_broker_unsubscribe(cont_src); + xfrm_client_unsubscribe(); } -static void master_control_intf(struct ifnet *ifp, uint8_t family, - const void *addr, bool add) +static void main_control_intf(struct ifnet *ifp, uint8_t family, + const void *addr, bool add) { char addr_str[INET6_ADDRSTRLEN]; struct ip_addr ctrladdr = { @@ -1099,7 +1365,7 @@ static void master_control_intf(struct ifnet *ifp, uint8_t family, if (add) { if (control_addr) { - if (!addr_eq(&config.local_ip, &ctrladdr)) + if (!dp_addr_eq(&config.local_ip, &ctrladdr)) RTE_LOG(ERR, DATAPLANE, "control inf was set. Ignoring %s\n", addr_str); @@ -1131,57 +1397,57 @@ static void master_control_intf(struct ifnet *ifp, uint8_t family, control_addr = false; /* We have no control address, idle main state machine */ - master_state_set(CONT_SRC_MAIN, MASTER_IDLE); + main_state_set(CONT_SRC_MAIN, MAIN_IDLE); } } /* Handle a change of interface address */ -static void master_addr_sig(struct ifnet *ifp, uint32_t ifindex, uint8_t family, - const void *addr, bool add) +static void main_addr_sig(struct ifnet *ifp, uint32_t ifindex, uint8_t family, + const void *addr, bool add) { if (!ifp) { RTE_LOG(DEBUG, DATAPLANE, - "master addr %s on unknown intf index %u\n", + "main addr %s on unknown intf index %u\n", add ? "add" : "del", ifindex); return; } - master_control_intf(ifp, family, addr, add); + main_control_intf(ifp, family, addr, add); } -static void master_addr_sig_add(enum cont_src_en cont_src, struct ifnet *ifp, +static void main_addr_sig_add(enum cont_src_en cont_src, struct ifnet *ifp, uint32_t ifindex, int family, const void *addr) { if (cont_src != CONT_SRC_UPLINK) return; - master_addr_sig(ifp, ifindex, family, addr, true); + main_addr_sig(ifp, ifindex, family, addr, true); } -static void master_addr_sig_del(enum cont_src_en cont_src, struct ifnet *ifp, +static void main_addr_sig_del(enum cont_src_en cont_src, struct ifnet *ifp, uint32_t ifindex, int family, const void *addr) { if (cont_src != CONT_SRC_UPLINK) return; - master_addr_sig(ifp, ifindex, family, addr, false); + main_addr_sig(ifp, ifindex, family, addr, false); } -static const struct dp_event_ops master_event_ops = { - .if_addr_add = master_addr_sig_add, - .if_addr_delete = master_addr_sig_del, +static const struct dp_event_ops main_event_ops = { + .if_addr_add = main_addr_sig_add, + .if_addr_delete = main_addr_sig_del, }; -static void __attribute__ ((constructor)) master_event_init(void) +static void __attribute__ ((constructor)) main_event_init(void) { - dp_event_register(&master_event_ops); + dp_event_register(&main_event_ops); } /* - * Master lcore used for console, bridge ageing timer + * Main lcore used for console, bridge ageing timer * and checking link status */ -void master_loop(void) +void main_loop(void) { enum cont_src_en cont_src = CONT_SRC_MAIN; @@ -1197,9 +1463,12 @@ void master_loop(void) rte_get_timer_hz() / SOFT_CLOCK_HZ, PERIODICAL, rte_get_master_lcore(), soft_clock_event, NULL); - master_init_src(CONT_SRC_MAIN); + /* Init timestamps */ + timestamp_init(); + + main_init_src(CONT_SRC_MAIN); if (!is_local_controller()) - master_init_src(CONT_SRC_UPLINK); + main_init_src(CONT_SRC_UPLINK); while (running) { int rc; @@ -1213,28 +1482,28 @@ void master_loop(void) } rte_timer_manage(); - rcu_quiescent_state(); - switch (master_state_get(cont_src)) { - case MASTER_IDLE: + dp_rcu_quiescent_state(rte_get_master_lcore()); + switch (main_state_get(cont_src)) { + case MAIN_IDLE: if (is_local_controller() || (cont_src == CONT_SRC_UPLINK)) - master_state_set(cont_src, - MASTER_SOCKET_CREATE); + main_state_set(cont_src, + MAIN_SOCKET_CREATE); /* Can we start main state machine ? */ if ((cont_src == CONT_SRC_MAIN) - && (master_state_get(CONT_SRC_UPLINK) - == MASTER_READY) + && (main_state_get(CONT_SRC_UPLINK) + == MAIN_READY) && control_addr) - master_state_set(CONT_SRC_MAIN, - MASTER_SOCKET_CREATE); + main_state_set(CONT_SRC_MAIN, + MAIN_SOCKET_CREATE); break; - case MASTER_SOCKET_CREATE: + case MAIN_SOCKET_CREATE: if (console_bind(cont_src) == 0) - master_state_set(cont_src, MASTER_CONNECT); + main_state_set(cont_src, MAIN_CONNECT); break; - case MASTER_CONNECT: + case MAIN_CONNECT: if (!check_uplink_route(cont_src)) break; @@ -1246,10 +1515,10 @@ void master_loop(void) struct rte_timer *timer; uint64_t timeout; - timer = &master_time[cont_src].connect_timer; - timeout = master_time[cont_src].connect_timeout; + timer = &main_time[cont_src].connect_timer; + timeout = main_time[cont_src].connect_timeout; - master_state_set(cont_src, MASTER_CONNECT_WAIT); + main_state_set(cont_src, MAIN_CONNECT_WAIT); rte_timer_reset(timer, timeout, SINGLE, rte_get_master_lcore(), connect_timeout, @@ -1263,17 +1532,17 @@ void master_loop(void) reset_dataplane(cont_src, true); break; - case MASTER_CONNECT_WAIT: + case MAIN_CONNECT_WAIT: rc = try_controller_response(cont_socket_get(cont_src), cont_src); if (rc < 0) { if (rc != -EAGAIN) reset_dataplane(cont_src, true); } else - master_state_set(cont_src, MASTER_SETUP); + main_state_set(cont_src, MAIN_SETUP); break; - case MASTER_SETUP: + case MAIN_SETUP: /* Get conf parameters */ conf_query(cont_src); @@ -1282,47 +1551,54 @@ void master_loop(void) /* Connect shadow interfaces to controller */ rc = setup_interfaces(0, -#ifdef HAVE_RTE_ETH_DEV_COUNT_AVAIL - rte_eth_dev_count_avail(), -#else - rte_eth_dev_count(), -#endif + nb_ports_total, cont_src, false); if (rc < 0) reset_dataplane(cont_src, true); else - master_state_set(cont_src, - MASTER_RESYNC_NEEDED); + main_state_set(cont_src, + MAIN_SETUP_WAIT); break; - case MASTER_RESYNC_NEEDED: - unregister_event_socket( - zsock_resolve( - cont_socket_get(cont_src))); + case MAIN_SETUP_WAIT: + dp_unregister_event_socket( + zsock_resolve( + cont_socket_get(cont_src))); register_event_socket_src( zsock_resolve( cont_socket_get(cont_src)), async_response, (void *)cont_src, cont_src); + + if (get_next_event(cont_src, TIMER_INTERVAL_MS, + true) < 0) + return; + + if (setup_interfaces_done(cont_src)) + main_state_set(cont_src, + MAIN_RESYNC_NEEDED); + break; + + case MAIN_RESYNC_NEEDED: /* Get netlink state from controller */ rc = controller_snapshot(cont_src); if (rc < 0) { reset_dataplane(cont_src, true); break; } - master_state_set(cont_src, MASTER_RESYNC); - rte_timer_reset(&master_time[cont_src].snapshot_timer, - master_time[cont_src].resync_timeout, + main_state_set(cont_src, MAIN_RESYNC); + rte_timer_reset(&main_time[cont_src].snapshot_timer, + main_time[cont_src].resync_timeout, SINGLE, rte_get_master_lcore(), snapshot_timeout, (void *)cont_src); break; - case MASTER_RESYNC: - case MASTER_RESET: - case MASTER_READY: + case MAIN_RESYNC: + case MAIN_RESET: + case MAIN_READY: if (get_next_event(cont_src, TIMER_INTERVAL_MS, - master_state_all_event_ready()) < 0) + main_state_all_event_ready()) < 0) return; break; } @@ -1331,9 +1607,9 @@ void master_loop(void) running = false; } - master_destroy_src(CONT_SRC_MAIN); + main_destroy_src(CONT_SRC_MAIN); if (!is_local_controller()) - master_destroy_src(CONT_SRC_UPLINK); + main_destroy_src(CONT_SRC_UPLINK); RTE_LOG(NOTICE, DATAPLANE, "Shutdown started\n"); @@ -1343,7 +1619,7 @@ void master_loop(void) } static int -master_state_show(FILE *f) +main_state_show(FILE *f) { enum cont_src_en cont_src; json_writer_t *wr = jsonw_new(f); @@ -1351,15 +1627,15 @@ master_state_show(FILE *f) if (wr == NULL) return -1; - jsonw_name(wr, "master_state"); + jsonw_name(wr, "main_state"); jsonw_start_object(wr); for (cont_src = 0; cont_src < CONT_SRC_COUNT; cont_src++) { - enum master_state_en state = master_state_get(cont_src); + enum main_state_en state = main_state_get(cont_src); jsonw_name(wr, cont_src_name(cont_src)); jsonw_start_object(wr); - jsonw_int_field(wr, master_state_name(state), - master_state_stats[cont_src].state_in[state]); + jsonw_int_field(wr, main_state_name(state), + main_state_stats[cont_src].state_in[state]); jsonw_end_object(wr); } jsonw_end_object(wr); @@ -1368,27 +1644,27 @@ master_state_show(FILE *f) return 0; } -/* cmd "master state" */ +/* cmd "main state" */ int -cmd_master(FILE *f, int argc, char **argv) +cmd_main(FILE *f, int argc, char **argv) { if (argc != 2) { fprintf(f, "Wrong number of state command arguments\n"); return -1; } if (strcmp(argv[1], "state") == 0) - return master_state_show(f); + return main_state_show(f); - fprintf(f, "Unknown master command\n"); + fprintf(f, "Unknown main command\n"); return -1; } /* Just for whole_dp UT */ bool -dp_test_master_ready(enum cont_src_en cont_src) +dp_test_main_ready(enum cont_src_en cont_src) { if (is_local_controller()) - return master_state_get(CONT_SRC_MAIN) == MASTER_READY; + return main_state_get(CONT_SRC_MAIN) == MAIN_READY; - return master_state_get(cont_src) == MASTER_READY; + return main_state_get(cont_src) == MAIN_READY; } diff --git a/src/master.h b/src/controller.h similarity index 63% rename from src/master.h rename to src/controller.h index e269c50a..718d6d8c 100644 --- a/src/master.h +++ b/src/controller.h @@ -1,14 +1,14 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * - * master loop api + * main loop api */ -#ifndef MASTER_H -#define MASTER_H +#ifndef CONTROLLER_H +#define CONTROLLER_H #include #include @@ -17,19 +17,17 @@ #include "control.h" #include "compat.h" -void master_loop(void); +void main_loop(void); void reset_dataplane(enum cont_src_en cont_src, bool delay); int setup_interface_portid(portid_t portid); int teardown_interface_portid(portid_t portid); -int cmd_master(FILE *f, int argc, char **argv); +int cmd_main(FILE *f, int argc, char **argv); -bool dp_test_master_ready(enum cont_src_en cont_src); - -int send_dp_event(zmsg_t *msg); +bool dp_test_main_ready(enum cont_src_en cont_src); /* For whole dp tests */ void enable_soft_clock_override(void); void disable_soft_clock_override(void); -#endif /* MASTER_H */ +#endif /* CONTROLLER_H */ diff --git a/src/cpp_rate_limiter.c b/src/cpp_rate_limiter.c index 4226cfd9..9664d215 100644 --- a/src/cpp_rate_limiter.c +++ b/src/cpp_rate_limiter.c @@ -1,7 +1,7 @@ /* * cpp_rate_limiter.c * - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -20,6 +20,7 @@ #define CPP_RL_ERR(fmt, args...) \ rte_log(RTE_LOG_ERR, RTE_LOGTYPE_CPP_RL, "CPP_RL: " fmt, ## args) +#define CPP_RL_DEF_BURST_MS 100 /* default burst rate in milliseconds */ /* === cfg-mode === */ @@ -72,6 +73,10 @@ static const uint32_t cpp_rl_pb_attr_map[] = { FAL_CPP_LIMITER_ATTR_TCP, [CPP_RL__CPP_LIMITER__CPP_ATTRIBUTE__CPP_ATTR_EN__CPP_ATTR_DEFAULT] = FAL_CPP_LIMITER_ATTR_DEFAULT, +[CPP_RL__CPP_LIMITER__CPP_ATTRIBUTE__CPP_ATTR_EN__CPP_ATTR_PIM] = + FAL_CPP_LIMITER_ATTR_PIM, +[CPP_RL__CPP_LIMITER__CPP_ATTRIBUTE__CPP_ATTR_EN__CPP_ATTR_IP_MULTICAST] = + FAL_CPP_LIMITER_ATTR_IP_MC, }; static uint32_t cpp_rl_pb_attr_map_size = sizeof(cpp_rl_pb_attr_map) / @@ -121,6 +126,8 @@ static const char * const cpp_rl_prot_name[] = { [FAL_CPP_LIMITER_ATTR_UDP] = "udp", [FAL_CPP_LIMITER_ATTR_TCP] = "tcp", [FAL_CPP_LIMITER_ATTR_DEFAULT] = "default", + [FAL_CPP_LIMITER_ATTR_PIM] = "pim", + [FAL_CPP_LIMITER_ATTR_IP_MC] = "ip-multicast", }; static uint32_t cpp_rl_prot_name_size = sizeof(cpp_rl_prot_name) / @@ -373,8 +380,8 @@ cpp_rl_limiter_cfg_changed(CppRl__CPPLimiter *cpp_msg) if (limiter_obj_id == FAL_NULL_OBJECT_ID) { if (cpp_msg->n_attributes) return true; /* creating limiters */ - else - return false; /* not creating limiters - no change */ + + return false; /* not creating limiters - no change */ } /* Iterate over attributes, which have entries per-protocol */ @@ -617,30 +624,40 @@ cpp_rl_cfg(struct pb_msg *msg) /* FAL_POLICER_ATTR_METER_TYPE attribute */ policer_attr_list[0].value.u32 = FAL_POLICER_METER_TYPE_PACKETS; - /* - * FAL_POLICER_ATTR_CBS attribute - - * need a minimum of 1 pps - */ - policer_attr_list[3].value.u64 = 1; + /* FAL_POLICER_ATTR_CIR attribute */ policer_attr_list[4].value.u64 = parameter->rate_pps; + + /* + * FAL_POLICER_ATTR_CBS attribute is + * the CIR rate * ms burst size, giving + * the packets-per-second burst. + */ + policer_attr_list[3].value.u64 = + policer_attr_list[4].value.u64 * + CPP_RL_DEF_BURST_MS / 1000; } if (parameter->has_rate_kbps) { /* FAL_POLICER_ATTR_METER_TYPE attribute */ policer_attr_list[0].value.u32 = FAL_POLICER_METER_TYPE_BYTES; - /* - * FAL_POLICER_ATTR_CBS attribute - - * need a minimum of 1 kbps - */ - policer_attr_list[3].value.u64 = 1 * (1024 / 8); + /* FAL_POLICER_ATTR_CIR attribute */ /* convert from kilobits into bytes */ policer_attr_list[4].value.u64 = ((uint64_t)parameter->rate_kbps) * (1024 / 8); + + /* + * FAL_POLICER_ATTR_CBS attribute is + * the CIR rate * ms burst size, giving + * the bytes-per-second burst. + */ + policer_attr_list[3].value.u64 = + policer_attr_list[4].value.u64 * + CPP_RL_DEF_BURST_MS / 1000; } } diff --git a/src/crypto/crypto.c b/src/crypto/crypto.c index 28852102..c3fcdc22 100644 --- a/src/crypto/crypto.c +++ b/src/crypto/crypto.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -35,54 +36,69 @@ #include "capture.h" #include "compiler.h" #include "crypto.h" -#include "crypto/crypto_policy_cache.h" #include "crypto_internal.h" #include "crypto_main.h" #include "crypto_policy.h" +#include "crypto_rte_pmd.h" #include "crypto_sadb.h" #include "dp_event.h" #include "esp.h" #include "ether.h" -#include "event.h" +#include "event_internal.h" #include "if_var.h" #include "ip6_funcs.h" #include "ip_funcs.h" #include "json_writer.h" +#include "lcore_sched.h" #include "main.h" #include "npf/fragment/ipv6_rsmbl.h" #include "npf/npf_cache.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" +#include "rldb.h" #include "shadow.h" #include "udp_handler.h" #include "urcu.h" #include "util.h" #include "vplane_debug.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" #include "vti.h" +#include "crypto_rte_pmd.h" +#include "xfrm_client.h" struct cds_list_head; +struct crypto_pkt_buffer *cpbdb[RTE_MAX_LCORE]; + +/* + * crypto garbage collection timer + * Setting up a PMD is an expensive operation. Ideally, PMD setup/teardown + * should be triggered by configuration. However, since we create/destroy PMDs + * based on SA presence, it is efficient to dampen the deletion to ensure + * that a frequent transition of the SA count between zero and 1 does not result + * in a lot of unnecessary activity. + */ +#define CRYPTO_GC_TIMER_INTERVAL 10 + /* * The return ring size needs to be a multiple of the the PMD ring, as * an RX thread could have many packets queued to many PMD rings. */ #define PKT_RET_RING_SIZE PMD_RING_SIZE -#define CRYPTO_PREFETCH_OFFSET 3 - static struct crypto_dp g_crypto_dp; struct crypto_dp *crypto_dp_sp = &g_crypto_dp; -static struct rte_timer pr_cache_timer; +static struct rte_timer flow_cache_timer; +static struct rte_timer crypto_gc_timer; -/* between crypto and master thread */ -static zsock_t *crypto_master_pull; -static const char crypto_inproc[] = "inproc://crypto_to_master"; -static int handle_crypto_event(void *); +/* between crypto and main thread */ +static zsock_t *crypto_main_pull; +static const char crypto_inproc[] = "inproc://crypto_to_main"; +static int handle_crypto_event(void *arg); -/* from the master thread to the rekey listener */ +/* from the main thread to the rekey listener */ static zsock_t *rekey_listener; enum crypto_action { @@ -96,6 +112,10 @@ enum crypto_action { RTE_DEFINE_PER_LCORE(struct crypto_pkt_buffer *, crypto_pkt_buffer); +RTE_DEFINE_PER_LCORE(struct crypto_fwd_info *, crypto_fwd); + +struct crypto_fwd_info crypto_fwd[RTE_MAX_LCORE]; + static const char * const ipsec_counter_names[] = { [ENQUEUED_INPUT_IPV4] = "v4_in", [ENQUEUED_INPUT_IPV6] = "v6_in", @@ -133,12 +153,32 @@ static const char * const ipsec_counter_names[] = { [DROPPED_NO_IFP] = "dropped no ifp", [DROPPED_INVALID_PMD_DEV_ID] = "dropped invalid pmd dev id", [DROPPED_NO_SPI_TO_SA] = "dropped no SA from SPI", - [PR_CACHE_ADD] = "Entry added to PR cache", - [PR_CACHE_ADD_FAIL] = "Failed to add entry to PR cache", - [PR_CACHE_HIT] = "hit PR cache", - [PR_CACHE_MISS] = "missed PR cache", + [FLOW_CACHE_ADD] = "Entry added to flow cache", + [FLOW_CACHE_ADD_FAIL] = "Failed to add entry to flow cache", + [FLOW_CACHE_HIT] = "hit flow cache", + [FLOW_CACHE_MISS] = "missed flow cache", [DROPPED_NO_BIND] = "dropped feature attachment point missing", - [DROPPED_ON_FP_NO_PR] = "dropped on fp but no policy" + [DROPPED_ON_FP_NO_PR] = "dropped on fp but no policy", + [DROPPED_COP_ALLOC_FAILED] = "dropped on crypto op allocation failure", + [CRYPTO_OP_FAILED] = "encrypt/decrypt op failed", + [CRYPTO_OP_ASSOC_FAILED] = "failed to associate session with crypto op", + [CRYPTO_OP_PREPARE_FAILED] = "failed to prepare crypto op", + [DROPPED_ESP_IP_FRAG] = "ESP IP fragment dropped", + [ESP_NOT_IN_FIRST_SEG] = "ESP not in first segment", + [INVALID_CIPHERTEXT_LEN] = "Invalid ciphertext length", + [ESP_TAIL_TRIM_FAILED] = "ESP tail trim failed", + [ESP_INVALID_NXT_HDR] = "Invalid next header protocol", + [INVALID_IPSEC_MODE] = "Invalid IPsec mode", + [ESP_ETH_HDR_FIXUP_FAILED] = "Ethernet hdr fixup failed", + [ESP_OUT_HDR_PARSE6_FAILED] = "IPv6 header parse failed", + [ESP_HDR_PREPEND_FAILED] = "Failed to prepend ESP hdr", + [ESP_TAIL_APPEND_FAILED] = "Failed to append ESP tail", + [CRYPTO_CHAIN_INIT_FAILED] = "Failed to set up cipher op chain", + [CRYPTO_AUTH_OP_FAILED] = "Failed auth op", + [CRYPTO_CIPHER_OP_FAILED] = "Failed cipher op", + [CRYPTO_DIGEST_OP_FAILED] = "Failed digest op", + [CRYPTO_DIGEST_CB_FAILED] = "Failed digest cb", + [CRYPTO_PP_ENQ_FAILED] = "Postprocessing enqueue failed", }; unsigned long ipsec_counters[RTE_MAX_LCORE][IPSEC_CNT_MAX] __rte_cache_aligned; @@ -153,41 +193,6 @@ struct crypto_iphdr_ctx { uint8_t nxt_proto; }; -/* - * Per packet crypto context. This carries information - * from the policy lookup in the forwarding thread that - * is needed for the SA lookup in the crypto thread. - */ -struct crypto_pkt_ctx { - /* - * These fields are set up by the forwarding - * thread and used to select the actions the - * crypto thread will perform on the packet. - */ - struct rte_mbuf *mbuf; - uint32_t reqid; - uint32_t spi; - void *l3hdr; - struct ifnet *in_ifp; - struct ifnet *nxt_ifp; - /* - * These fields are are bi-directional. They may be - * set by the forwarding thread and modified by the - * crypto thread. - * - * TODO: Replace direction with an input action - * of either ENCRYPT or DECRYPT. - */ - uint8_t action; - uint8_t in_ifp_port; - uint16_t SPARE1; - uint16_t direction; - uint8_t orig_family; - uint8_t family; - xfrm_address_t dst; /* Only used for outbound traffic */ - vrfid_t vrfid; -}; - static int crypto_vrf_insert(struct crypto_vrf_ctx *vrf_ctx) { struct vrf *vrf; @@ -214,12 +219,37 @@ struct crypto_vrf_ctx *crypto_vrf_find_external(vrfid_t vrfid) { struct vrf *vrf; - vrf = vrf_get_rcu_from_external(vrfid); + vrf = dp_vrf_get_rcu_from_external(vrfid); if (!vrf) return NULL; return rcu_dereference(vrf->crypto); } +static int crypto_rldb_create(struct crypto_vrf_ctx *vrf_ctx, const char *name, + int flags, struct rldb_db_handle **db) +{ + int rc; + char buf[RLDB_NAME_MAX]; + + snprintf(buf, sizeof(buf), "%s-vrf%u", name, vrf_ctx->vrfid); + rc = rldb_create(buf, flags, db); + if (rc < 0) { + DP_DEBUG(CRYPTO, ERR, POLICY, + "Failed to create policy rule database %s\n", buf); + return rc; + } + + rc = rldb_start_transaction(*db); + if (rc < 0) { + DP_DEBUG(CRYPTO, ERR, POLICY, + "Failed to prepare transaction for rule database %s\n", + buf); + return rc; + } + + return 0; +} + /* * Lookup/create crypto VRF context block */ @@ -273,6 +303,30 @@ struct crypto_vrf_ctx *crypto_vrf_get(vrfid_t vrfid) if (!vrf_ctx->s2s_bind_hash_table) goto vrf_ctx_get_fail; + /* + * Allocate ACL rule database + */ + + if (crypto_rldb_create(vrf_ctx, "crypto-in4", NPFRL_FLAG_V4_PFX, + &vrf_ctx->input_policy_v4_rldb) < 0) { + goto vrf_ctx_get_fail; + } + + if (crypto_rldb_create(vrf_ctx, "crypto-out4", NPFRL_FLAG_V4_PFX, + &vrf_ctx->output_policy_v4_rldb) < 0) { + goto vrf_ctx_get_fail; + } + + if (crypto_rldb_create(vrf_ctx, "crypto-in6", NPFRL_FLAG_V6_PFX, + &vrf_ctx->input_policy_v6_rldb) < 0) { + goto vrf_ctx_get_fail; + } + + if (crypto_rldb_create(vrf_ctx, "crypto-out6", NPFRL_FLAG_V6_PFX, + &vrf_ctx->output_policy_v6_rldb) < 0) { + goto vrf_ctx_get_fail; + } + /* * Hang crypto block off VRF */ @@ -293,6 +347,15 @@ struct crypto_vrf_ctx *crypto_vrf_get(vrfid_t vrfid) cds_lfht_destroy(vrf_ctx->output_policy_rule_sel_ht, NULL); if (vrf_ctx->s2s_bind_hash_table) cds_lfht_destroy(vrf_ctx->s2s_bind_hash_table, NULL); + if (vrf_ctx->input_policy_v4_rldb) + rldb_destroy(vrf_ctx->input_policy_v4_rldb); + if (vrf_ctx->output_policy_v4_rldb) + rldb_destroy(vrf_ctx->output_policy_v4_rldb); + if (vrf_ctx->input_policy_v6_rldb) + rldb_destroy(vrf_ctx->input_policy_v6_rldb); + if (vrf_ctx->output_policy_v6_rldb) + rldb_destroy(vrf_ctx->output_policy_v6_rldb); + free(vrf_ctx); return NULL; } @@ -309,6 +372,11 @@ static inline void crypto_vrf_free(struct rcu_head *head) dp_ht_destroy_deferred(vrf_ctx->spi_out_hash_table); dp_ht_destroy_deferred(vrf_ctx->s2s_bind_hash_table); + rldb_destroy(vrf_ctx->input_policy_v4_rldb); + rldb_destroy(vrf_ctx->input_policy_v6_rldb); + rldb_destroy(vrf_ctx->output_policy_v4_rldb); + rldb_destroy(vrf_ctx->output_policy_v6_rldb); + free(vrf_ctx); } @@ -320,8 +388,8 @@ crypto_vrf_check_remove(struct crypto_vrf_ctx *vrf_ctx) * deleted all SAs and SPs in the VRF */ if (vrf_ctx && - !vrf_ctx->crypto_live_ipv4_policies && - !vrf_ctx->crypto_live_ipv6_policies && + !vrf_ctx->crypto_total_ipv4_policies && + !vrf_ctx->crypto_total_ipv6_policies && !vrf_ctx->count_of_sas && !vrf_ctx->count_of_peers && !vrf_ctx->s2s_bindings) { @@ -346,11 +414,7 @@ static struct crypto_pkt_ctx *allocate_crypto_packet_ctx(void) cache = rte_mempool_default_cache(crypto_dp_sp->pool, rte_lcore_id()); if (unlikely(rte_mempool_generic_get(crypto_dp_sp->pool, (void *)&ctx, -#ifdef HAVE_RTE_MEMPOOL_GENERIC_FLAGS - 1, cache, 1) != 0)) { -#else 1, cache) != 0)) { -#endif return NULL; } IPSEC_CNT_INC(CTX_ALLOCATED); @@ -363,11 +427,7 @@ static void release_crypto_packet_ctx(struct crypto_pkt_ctx *ctx) cache = rte_mempool_default_cache(crypto_dp_sp->pool, rte_lcore_id()); IPSEC_CNT_INC(CTX_FREED); -#ifdef HAVE_RTE_MEMPOOL_GENERIC_FLAGS - rte_mempool_generic_put(crypto_dp_sp->pool, (void *)&ctx, 1, cache, 1); -#else rte_mempool_generic_put(crypto_dp_sp->pool, (void *)&ctx, 1, cache); -#endif } static inline const @@ -424,7 +484,7 @@ static struct ifnet *crypto_ctx_to_in_ifp(struct crypto_pkt_ctx *ctx, struct ifnet *ifp; if (pktmbuf_mdata_exists(m, PKT_MDATA_IFINDEX)) { - ifp = ifnet_byifindex(pktmbuf_mdata(m)->md_ifindex.ifindex); + ifp = dp_ifnet_byifindex(pktmbuf_mdata(m)->md_ifindex.ifindex); pktmbuf_mdata_clear(m, PKT_MDATA_IFINDEX); } else { assert(ctx->in_ifp_port < DATAPLANE_MAX_PORTS); @@ -434,148 +494,203 @@ static struct ifnet *crypto_ctx_to_in_ifp(struct crypto_pkt_ctx *ctx, return ifp; } -/* - * crypto_process_decrypt_packet() - * - * Decrypt the packet described by the supplied context. - */ -static void crypto_process_decrypt_packet(struct crypto_pkt_ctx *cctx, - struct rte_mbuf *m, - struct sadb_sa *sa, - uint32_t *bytes) +static inline void +crypto_post_decrypt_handle_vti(struct crypto_pkt_ctx *cctx, + struct rte_mbuf *m, + struct ifnet *vti_ifp) { - int rc; - struct ifnet *vti_ifp = NULL; + if (!(vti_ifp->if_flags & IFF_UP)) { + cctx->action = CRYPTO_ACT_DROP; + return; + } + cctx->in_ifp = vti_ifp; + pktmbuf_clear_rx_vlan(m); + pktmbuf_set_vrf(m, vti_ifp->if_vrfid); + set_spath_rx_meta_data(m, vti_ifp, + ntohs(ethhdr(m)->ether_type), + TUN_META_FLAGS_DEFAULT); + if (unlikely(vti_ifp->capturing)) + capture_burst(vti_ifp, &m, 1); + cctx->action = CRYPTO_ACT_VTI_INPUT; + if_incr_in(vti_ifp, m); +} - /* - * If this packet has come from a VTI, replace the - * physical input interface with the VTI. Doing so - * enables both accounting and input features. - */ - unsigned int mark = crypto_sadb_get_mark_val(sa); - - if ((mark != 0) && - (vti_handle_inbound( - crypto_get_src(pktmbuf_mtol3(m, void *), - cctx->family), - cctx->family, mark, m, &vti_ifp) < 0)) { - CRYPTO_DATA_ERR("No VTI interface found\n"); - IPSEC_CNT_INC(NO_VTI); +static inline void +crypto_post_decrypt_handle_vfp(struct crypto_pkt_ctx *cctx, + struct rte_mbuf *m, + struct ifnet *vfp_ifp) +{ + if (!(vfp_ifp->if_flags & IFF_UP)) { cctx->action = CRYPTO_ACT_DROP; - IF_INCR_ERROR(crypto_ctx_to_in_ifp(cctx, m)); return; } + cctx->in_ifp = vfp_ifp; - if (cctx->family == AF_INET) - rc = esp_input(m, sa, bytes, &cctx->family); - else - rc = esp_input6(m, sa, bytes, &cctx->family); + if (unlikely(vfp_ifp->capturing)) + capture_burst(vfp_ifp, &m, 1); + cctx->action = CRYPTO_ACT_INPUT_WITH_FEATURES; + if_incr_in(vfp_ifp, m); +} + +static inline void +crypto_post_decrypt_set_overlay_vrf(struct sadb_sa *sa, struct rte_mbuf *m, + struct ifnet *vfp_ifp) +{ + /* + * Set the overlay vrf if different from input + * VRF. If this goes to the kernel then it + * will need the correct vrf set, so set it in + * meta too just in case. + */ + if (pktmbuf_get_vrf(m) == sa->overlay_vrf_id) + return; + + pktmbuf_set_vrf(m, sa->overlay_vrf_id); + set_spath_rx_meta_data(m, + vfp_ifp ? vfp_ifp : + dp_ifnet_byifindex( + dp_vrf_get_external_id( + sa->overlay_vrf_id)), + ntohs(ethhdr(m)->ether_type), + TUN_META_FLAGS_DEFAULT); +} + +static inline void +crypto_post_decrypt_handle_packet(struct crypto_pkt_ctx *cctx, + struct sadb_sa *sa, + struct rte_mbuf *m, + int rc, struct ifnet *vti_ifp) +{ if (rc < 0) { - IF_INCR_ERROR(vti_ifp ? vti_ifp : - crypto_ctx_to_in_ifp(cctx, m)); - CRYPTO_DATA_ERR("ESP Input failed %d\n", rc); + if (vti_ifp) + if_incr_error(vti_ifp); IPSEC_CNT_INC(DROPPED_ESP_INPUT_FAIL); cctx->action = CRYPTO_ACT_DROP; - } else { - if (vti_ifp) { - if (!(vti_ifp->if_flags & IFF_UP)) { + return; + } + + if (vti_ifp) + crypto_post_decrypt_handle_vti(cctx, m, vti_ifp); + else { + struct ifnet *feat_attach_ifp = + rcu_dereference(sa->feat_attach_ifp); + + /* + * If the SA has a virtual feature point bound to + * it, then switch the input interface to the feature + * point so that input features can be run. + */ + if (feat_attach_ifp) { + crypto_post_decrypt_handle_vfp(cctx, m, + feat_attach_ifp); + } else { + cctx->in_ifp = crypto_ctx_to_in_ifp(cctx, m); + if (unlikely(!cctx->in_ifp)) { + IPSEC_CNT_INC(DROPPED_NO_IFP); cctx->action = CRYPTO_ACT_DROP; return; } - cctx->in_ifp = vti_ifp; - pktmbuf_clear_rx_vlan(m); - pktmbuf_set_vrf(m, vti_ifp->if_vrfid); - set_spath_rx_meta_data(m, vti_ifp, - ntohs(ethhdr(m)->ether_type), - TUN_META_FLAGS_DEFAULT); - if (unlikely(vti_ifp->capturing)) - capture_burst(vti_ifp, &m, 1); - cctx->action = CRYPTO_ACT_VTI_INPUT; - } else { - struct ifnet *feat_attach_ifp = - rcu_dereference(sa->feat_attach_ifp); - - /* - * If the SA has a virtual feature point bound to - * it, then switch the input interface to the feature - * point so that input features can be run. - */ - if (feat_attach_ifp) { - if (!(feat_attach_ifp->if_flags & IFF_UP)) { - cctx->action = CRYPTO_ACT_DROP; - return; - } - cctx->in_ifp = feat_attach_ifp; - - if (unlikely(feat_attach_ifp->capturing)) - capture_burst(feat_attach_ifp, &m, 1); - - cctx->action = CRYPTO_ACT_INPUT_WITH_FEATURES; - } else { - cctx->in_ifp = crypto_ctx_to_in_ifp(cctx, m); - if (unlikely(!cctx->in_ifp)) { - CRYPTO_DATA_ERR("No_ifp\n"); - IPSEC_CNT_INC(DROPPED_NO_IFP); - cctx->action = CRYPTO_ACT_DROP; - return; - } - - cctx->action = CRYPTO_ACT_INPUT; - } - /* - * Set the overlay vrf if different from input - * VRF. If this goes to the kernel then it - * will need the correct vrf set, so set it in - * meta too just in case. - */ - if (pktmbuf_get_vrf(m) != sa->overlay_vrf_id) { - pktmbuf_set_vrf(m, sa->overlay_vrf_id); - set_spath_rx_meta_data( - m, - feat_attach_ifp ? feat_attach_ifp : - ifnet_byifindex(vrf_get_external_id( - sa->overlay_vrf_id)), - ntohs(ethhdr(m)->ether_type), - TUN_META_FLAGS_DEFAULT); - } + cctx->action = CRYPTO_ACT_INPUT; } - IF_INCR_IN(cctx->in_ifp, m); + crypto_post_decrypt_set_overlay_vrf(sa, m, feat_attach_ifp); } } -static void crypto_process_encrypt_packet(struct crypto_pkt_ctx *cctx, - struct rte_mbuf *m, - struct sadb_sa *sa, - uint32_t *bytes) +static inline void +crypto_process_decrypt_packets(uint16_t count, + struct crypto_pkt_ctx *cctx[], + uint32_t *bytes) { - int rc; + struct rte_mbuf *m; + uint16_t i; - if (cctx->family == AF_INET) - rc = esp_output(m, cctx->orig_family, cctx->l3hdr, sa, bytes); - else - rc = esp_output6(m, cctx->orig_family, cctx->l3hdr, sa, bytes); + for (i = 0; i < count; i++) { + if (unlikely(cctx[i]->action == CRYPTO_ACT_DROP)) + continue; + + crypto_prefetch_ctx(cctx, count, i); - if (rc < 0) { - IF_INCR_OERROR(cctx->nxt_ifp); - CRYPTO_DATA_ERR("ESP Output failed %d\n", rc); - cctx->action = CRYPTO_ACT_DROP; - IPSEC_CNT_INC(DROPPED_ESP_OUTPUT_FAIL); - } else { - cctx->in_ifp = crypto_ctx_to_in_ifp(cctx, m); - if (unlikely(!cctx->in_ifp)) { - CRYPTO_DATA_ERR("No_ifp\n"); - IPSEC_CNT_INC(DROPPED_NO_IFP); - cctx->action = CRYPTO_ACT_DROP; - return; - } - cctx->action = CRYPTO_ACT_OUTPUT; /* - * And put it into the correct vrf now that we - * have added new headers. At the moment we only - * support default for the transport/underlay. + * If this packet has come from a VTI, replace the + * physical input interface with the VTI. Doing so + * enables both accounting and input features. */ - pktmbuf_set_vrf(m, VRF_DEFAULT_ID); + unsigned int mark = crypto_sadb_get_mark_val(cctx[i]->sa); + + m = cctx[i]->mbuf; + if ((mark != 0) && + (vti_handle_inbound( + crypto_get_src(dp_pktmbuf_mtol3(m, void *), + cctx[i]->family), + cctx[i]->family, mark, m, + &cctx[i]->vti_ifp) < 0)) { + IPSEC_CNT_INC(NO_VTI); + cctx[i]->action = CRYPTO_ACT_DROP; + continue; + } + + crypto_prefetch_ctx_data(cctx, count, i); + } + + esp_input(cctx, count); + + for (i = 0; i < count; i++) { + if (unlikely(cctx[i]->action == CRYPTO_ACT_DROP)) + continue; + + crypto_prefetch_ctx(cctx, count, i); + + crypto_post_decrypt_handle_packet(cctx[i], + cctx[i]->sa, + cctx[i]->mbuf, + cctx[i]->status, + cctx[i]->vti_ifp); + *bytes += cctx[i]->bytes; + + crypto_prefetch_ctx_data(cctx, count, i); + } +} + +static void crypto_process_encrypt_packets(uint16_t count, + struct crypto_pkt_ctx *cctx[], + uint32_t *bytes) +{ + uint16_t i; + struct crypto_pkt_ctx *tmp_cctx; + + esp_output(cctx, count); + + for (i = 0; i < count; i++) { + + crypto_prefetch_ctx(cctx, count, i); + + tmp_cctx = cctx[i]; + if (tmp_cctx->status < 0) { + if (tmp_cctx->nxt_ifp) + if_incr_oerror(tmp_cctx->nxt_ifp); + tmp_cctx->action = CRYPTO_ACT_DROP; + IPSEC_CNT_INC(DROPPED_ESP_OUTPUT_FAIL); + } else { + tmp_cctx->in_ifp = crypto_ctx_to_in_ifp(tmp_cctx, + tmp_cctx->mbuf); + if (unlikely(!tmp_cctx->in_ifp)) { + IPSEC_CNT_INC(DROPPED_NO_IFP); + tmp_cctx->action = CRYPTO_ACT_DROP; + continue; + } + tmp_cctx->action = CRYPTO_ACT_OUTPUT; + /* + * And put it into the correct vrf now that we + * have added new headers. At the moment we only + * support default for the transport/underlay. + */ + pktmbuf_set_vrf(tmp_cctx->mbuf, VRF_DEFAULT_ID); + + *bytes += tmp_cctx->bytes; + } + + crypto_prefetch_ctx_data(cctx, count, i); } } @@ -717,6 +832,8 @@ static int crypto_enqueue_internal(enum crypto_xfrm xfrm, if (unlikely(pmd_dev_id == CRYPTO_PMD_INVALID_ID)) { IPSEC_CNT_INC(DROPPED_INVALID_PMD_DEV_ID); + if (nxt_ifp && is_vti(nxt_ifp)) + if_incr_full_proto(nxt_ifp, 1); goto free_mbuf_on_error; } @@ -743,12 +860,16 @@ static int crypto_enqueue_internal(enum crypto_xfrm xfrm, CRYPTO_DATA_ERR("Crypto burst_ring %u full\n", (uint32_t)xfrm); IPSEC_CNT_INC(BURST_RING_FULL); + if (nxt_ifp && is_vti(nxt_ifp)) + if_incr_full_txring(nxt_ifp, 1); goto free_mbuf_on_error; } ctx = allocate_crypto_packet_ctx(); if (unlikely(!ctx)) { IPSEC_CNT_INC(FAILED_TO_ALLOCATE_CTX); + if (nxt_ifp && is_vti(nxt_ifp)) + if_incr_full_proto(nxt_ifp, 1); goto free_mbuf_on_error; } @@ -772,8 +893,14 @@ static int crypto_enqueue_internal(enum crypto_xfrm xfrm, release_crypto_packet_ctx(ctx); goto free_mbuf_on_error; } + if (family == AF_INET) + ctx->out_ethertype = ETH_P_IP; + else + ctx->out_ethertype = ETH_P_IPV6; } ctx->in_ifp = NULL; + ctx->vti_ifp = NULL; + crypto_ctx_save_ifp(ctx, m, in_ifp); ctx->nxt_ifp = nxt_ifp; ctx->spi = spi; @@ -808,11 +935,12 @@ static inline bool crypto_check_hdr_single_seg(struct rte_mbuf *m, { unsigned int len; - len = rte_pktmbuf_data_len(m) - pktmbuf_l2_len(m); + len = rte_pktmbuf_data_len(m) - dp_pktmbuf_l2_len(m); if (len < h->iphlen + sizeof(struct ip_esp_hdr) + ((h->nxt_proto == IPPROTO_UDP) ? sizeof(struct udphdr) : 0)) { CRYPTO_DATA_ERR("Bad segment length\n"); - IF_INCR_ERROR(in_ifp); + if (in_ifp) + if_incr_full_proto(in_ifp, 1); return false; } return true; @@ -846,7 +974,7 @@ int crypto_enqueue_inbound_v4(struct rte_mbuf *m, ip = iphdr(m); spi = crypto_retrieve_spi((unsigned char *)ip + - pktmbuf_l3_len(m)); + dp_pktmbuf_l3_len(m)); } pmd_dev_id = crypto_spi_to_pmd_dev_id(spi); @@ -854,12 +982,10 @@ int crypto_enqueue_inbound_v4(struct rte_mbuf *m, if (!crypto_check_hdr_single_seg(m, &h, input_if)) return -1; - if (crypto_enqueue_internal(CRYPTO_DECRYPT, m, AF_INET, AF_INET, + if (!crypto_enqueue_internal(CRYPTO_DECRYPT, m, AF_INET, AF_INET, NULL, input_if, NULL, 0, pmd_dev_id, spi, - iphdr(m)) < 0) - IF_INCR_ERROR(input_if); - else + iphdr(m))) IPSEC_CNT_INC(ENQUEUED_INPUT_IPV4); return 0; @@ -891,7 +1017,7 @@ int crypto_enqueue_inbound_v6(struct rte_mbuf *m, ip6 = ip6hdr(m); spi = crypto_retrieve_spi((unsigned char *)ip6 + - pktmbuf_l3_len(m)); + dp_pktmbuf_l3_len(m)); } pmd_dev_id = crypto_spi_to_pmd_dev_id(spi); @@ -899,12 +1025,10 @@ int crypto_enqueue_inbound_v6(struct rte_mbuf *m, if (!crypto_check_hdr_single_seg(m, &h, input_if)) return -1; - if (crypto_enqueue_internal(CRYPTO_DECRYPT, m, AF_INET6, AF_INET6, + if (!crypto_enqueue_internal(CRYPTO_DECRYPT, m, AF_INET6, AF_INET6, NULL, input_if, NULL, 0, pmd_dev_id, spi, - ip6hdr(m)) < 0) - IF_INCR_ERROR(input_if); - else + ip6hdr(m))) IPSEC_CNT_INC(ENQUEUED_INPUT_IPV6); return 0; @@ -930,36 +1054,125 @@ void crypto_enqueue_outbound(struct rte_mbuf *m, uint16_t orig_family, return; } - crypto_enqueue_internal(CRYPTO_ENCRYPT, m, orig_family, family, dst, - in_ifp, nxt_ifp, reqid, - pmd_dev_id, spi, iphdr(m)); + if (!crypto_enqueue_internal(CRYPTO_ENCRYPT, m, + orig_family, family, dst, + in_ifp, nxt_ifp, reqid, + pmd_dev_id, spi, iphdr(m))) { + if (family == AF_INET) + IPSEC_CNT_INC(ENQUEUED_OUTPUT_IPV4); + else + IPSEC_CNT_INC(ENQUEUED_OUTPUT_IPV6); + } +} - if (family == AF_INET) - IPSEC_CNT_INC(ENQUEUED_OUTPUT_IPV4); - else - IPSEC_CNT_INC(ENQUEUED_OUTPUT_IPV6); +static inline void +crypto_redirect_packet_batch(uint8_t core, + struct crypto_pkt_ctx **contexts, + unsigned int batch_cnt) +{ + if (!rte_ring_mp_enqueue_bulk(crypto_fwd[core].fwd_q, + (void **)contexts, batch_cnt, + NULL)) { + /* + * highly unlikely scenario. Free all contexts that could + * not be enqueued to post-processing thread + */ + for (unsigned int j = 0; j < batch_cnt; j++) { + IPSEC_CNT_INC(CRYPTO_PP_ENQ_FAILED); + rte_pktmbuf_free(contexts[j]->mbuf); + release_crypto_packet_ctx(contexts[j]); + } + } +} + +static void crypto_redirect_processed_packets(struct crypto_pkt_ctx **contexts, + unsigned int count) +{ + uint16_t i, batch_cnt = 0; + uint8_t fwd_lcore, prev_fwd_lcore = 0; + struct crypto_pkt_ctx *ctx; + struct crypto_pkt_ctx *tmp_contexts[count]; + + for (i = 0; i < count; i++) { + ctx = contexts[i]; + + /* No SA found in the SADB. */ + if (!ctx->sa) { + crypto_pkt_ctx_forward_and_free(ctx); + continue; + } + + fwd_lcore = ctx->sa->fwd_core; + + /* + * no post-crypto forwarding core has been allocated + * continue forwarding on the same core + */ + if (!fwd_lcore) { + crypto_pkt_ctx_forward_and_free(ctx); + continue; + } + + /* starting first batch */ + if (!prev_fwd_lcore) + prev_fwd_lcore = fwd_lcore; + + /* continuing existing batch */ + if (prev_fwd_lcore == fwd_lcore) { + tmp_contexts[batch_cnt++] = contexts[i]; + continue; + } + + /* flush batch */ + crypto_redirect_packet_batch(prev_fwd_lcore, tmp_contexts, + batch_cnt); + + /* start new batch after flush */ + batch_cnt = 0; + prev_fwd_lcore = fwd_lcore; + tmp_contexts[batch_cnt++] = contexts[i]; + } + + /* flush final batch */ + if (batch_cnt) + crypto_redirect_packet_batch(prev_fwd_lcore, tmp_contexts, + batch_cnt); } -static void crypto_fwd_processed_packets(struct crypto_pkt_ctx **contexts, - unsigned int count) +void crypto_fwd_processed_packets(void) { - uint32_t i; + struct crypto_pkt_ctx *contexts[MAX_CRYPTO_PKT_BURST]; + unsigned int i, count, lcore = dp_lcore_id(); - for (i = 0; i < count; i++) + if (rte_ring_empty(crypto_fwd[lcore].fwd_q)) + return; + + count = rte_ring_mc_dequeue_burst(crypto_fwd[lcore].fwd_q, + (void **)&contexts, + MAX_CRYPTO_PKT_BURST, NULL); + crypto_fwd[lcore].fwd_cnt += count; + + for (i = 0; i < count; i++) { + if (unlikely(contexts[i]->status < 0)) + contexts[i]->action = CRYPTO_ACT_DROP; + + crypto_prefetch_ctx(contexts, count, i); crypto_pkt_ctx_forward_and_free(contexts[i]); + crypto_prefetch_ctx_data(contexts, count, i+1); + } } struct crypto_processing_cb { - void (*process)(struct crypto_pkt_ctx *, struct rte_mbuf *, - struct sadb_sa *, uint32_t *bytes); + void (*process)(uint16_t count, struct crypto_pkt_ctx *ctx_arr[], + uint32_t *bytes); void (*post_process)(struct crypto_pkt_ctx **, uint32_t); }; static const struct crypto_processing_cb crypto_cb[MAX_CRYPTO_XFRM] = { - {crypto_process_encrypt_packet, - crypto_fwd_processed_packets}, - {crypto_process_decrypt_packet, - crypto_fwd_processed_packets} }; + {crypto_process_encrypt_packets, + crypto_redirect_processed_packets}, + {crypto_process_decrypt_packets, + crypto_redirect_processed_packets} }; void crypto_purge_queue(struct rte_ring *pmd_queue) { @@ -974,6 +1187,7 @@ void crypto_purge_queue(struct rte_ring *pmd_queue) for (i = 0; i < count; i++) { struct crypto_pkt_ctx *ctx = contexts[i]; + rte_pktmbuf_free(ctx->mbuf); release_crypto_packet_ctx(ctx); } @@ -1002,10 +1216,12 @@ sadb_lookup_sa(struct rte_mbuf *m __unused, enum crypto_xfrm xfrm, struct ifnet *err_ifp; ctx->action = CRYPTO_ACT_DROP; - IPSEC_CNT_INC(NO_OUT_SA); + if (xfrm == CRYPTO_ENCRYPT) + IPSEC_CNT_INC(NO_OUT_SA); err_ifp = ((xfrm == CRYPTO_ENCRYPT) ? ctx->nxt_ifp : crypto_ctx_to_in_ifp(ctx, ctx->mbuf)); - IF_INCR_OERROR(err_ifp); + if (err_ifp && is_vti(err_ifp)) + if_incr_oerror(err_ifp); return NULL; } rte_prefetch0(sa->session); @@ -1013,28 +1229,48 @@ sadb_lookup_sa(struct rte_mbuf *m __unused, enum crypto_xfrm xfrm, } static inline unsigned int -crypto_pmd_process_packet(struct crypto_pkt_ctx *contexts, - enum crypto_xfrm xfrm) +crypto_pmd_process_packets(struct crypto_pkt_ctx *contexts[], + uint16_t count, enum crypto_xfrm xfrm) { struct rte_mbuf *m; - unsigned int packet_size = 0; - struct sadb_sa *sa; - - m = contexts->mbuf; - if (unlikely(!m)) { - CRYPTO_DATA_ERR("Null mbuf\n"); - contexts->action = CRYPTO_ACT_DROP; - IPSEC_CNT_INC(DROPPED_NO_MBUF); - return 0; + unsigned int total_bytes = 0; + uint16_t i, bad_idx[count], bad_count = 0; + + /* + * Prefetch entire burst of contexts into L2 cache + */ + for (i = 0; i < count; i++) + rte_prefetch1(contexts[i]); + + for (i = 0; i < count; i++) { + crypto_prefetch_ctx(contexts, count, i); + + m = contexts[i]->mbuf; + if (unlikely(!m)) { + contexts[i]->action = CRYPTO_ACT_DROP; + IPSEC_CNT_INC(DROPPED_NO_MBUF); + continue; + } + assert(contexts[i]->direction == xfrm); + + contexts[i]->bytes = 0; + contexts[i]->sa = sadb_lookup_sa(m, xfrm, contexts[i]); + if (unlikely(!contexts[i]->sa)) { + contexts[i]->status = -1; + contexts[i]->action = CRYPTO_ACT_DROP; + bad_idx[bad_count++] = i; + } else + contexts[i]->status = 0; + + crypto_prefetch_ctx_data(contexts, count, i); } - assert(contexts->direction == xfrm); - sa = sadb_lookup_sa(m, xfrm, contexts); - if (unlikely(!sa)) - return 0; + move_bad_mbufs(contexts, count, bad_idx, bad_count); + count -= bad_count; + + crypto_cb[xfrm].process(count, contexts, &total_bytes); - crypto_cb[xfrm].process(contexts, m, sa, &packet_size); - return packet_size; + return total_bytes; } /* @@ -1049,7 +1285,7 @@ static bool crypto_pmd_walk_cb(int pmd_dev_id __unused, enum crypto_xfrm xfrm, uint32_t *packets) { struct crypto_pkt_ctx *contexts[MAX_CRYPTO_PKT_BURST]; - unsigned int i, count, total_bytes = 0; + unsigned int count, total_bytes = 0; if (!rte_ring_empty(pmd_queue)) { count = rte_ring_sc_dequeue_burst(pmd_queue, @@ -1057,25 +1293,7 @@ static bool crypto_pmd_walk_cb(int pmd_dev_id __unused, enum crypto_xfrm xfrm, MAX_CRYPTO_PKT_BURST, NULL); - for (i = 0; i < CRYPTO_PREFETCH_OFFSET && i < count; i++) - rte_prefetch0(contexts[i]); - - /* Process the packets in the burst. */ - for (i = 0; i + CRYPTO_PREFETCH_OFFSET < count; i++) { - rte_prefetch0(contexts[i + CRYPTO_PREFETCH_OFFSET]); - rte_prefetch0( - contexts[i + CRYPTO_PREFETCH_OFFSET - 1]->mbuf); - rte_prefetch0( - contexts[i + CRYPTO_PREFETCH_OFFSET - 1]->l3hdr); - total_bytes += crypto_pmd_process_packet(contexts[i], - xfrm); - } - - /* Process the remaining contexts */ - for (; i < count; i++) { - total_bytes += crypto_pmd_process_packet(contexts[i], - xfrm); - } + total_bytes = crypto_pmd_process_packets(contexts, count, xfrm); crypto_cb[xfrm].post_process(contexts, count); *packets = count; @@ -1127,18 +1345,48 @@ const char *crypto_xfrm_name(enum crypto_xfrm xfrm) return xfrm_names[xfrm]; } +void crypto_create_fwd_queue(unsigned int lcore_id) +{ + if (!RTE_PER_LCORE(crypto_fwd)) { + struct crypto_fwd_info *fwd_info = &crypto_fwd[lcore_id]; + unsigned int cpu_socket = rte_lcore_to_socket_id(lcore_id); + + fwd_info->fwd_q = crypto_create_ring("fwd-q", PMD_RING_SIZE, + cpu_socket, lcore_id, 0); + /* crypto_create_ring is always expected to succeed */ + + RTE_PER_LCORE(crypto_fwd) = fwd_info; + } +} + +void crypto_destroy_fwd_queue(void) +{ + if (RTE_PER_LCORE(crypto_fwd)) { + crypto_delete_queue(RTE_PER_LCORE(crypto_fwd)->fwd_q); + RTE_PER_LCORE(crypto_fwd)->fwd_q = NULL; + RTE_PER_LCORE(crypto_fwd) = NULL; + } +} + /* - * dp_crypto_per_lcore_init() + * dp_crypto_lcore_init() * * Allocate an initialise the crypto packet buffer, which is used to * manage the interaction between a forwarding thread and the crypto * thread. */ -void dp_crypto_per_lcore_init(unsigned int lcore_id) +static int dp_crypto_lcore_init(unsigned int lcore_id, + void *arg __unused) { struct crypto_pkt_buffer *cpb; unsigned int cpu_socket; uint32_t q; + int err, i; + + err = crypto_flow_cache_init_lcore(lcore_id); + if (err) + rte_panic("Failed to create crypto flow cache for cpu %d\n", + lcore_id); if (!RTE_PER_LCORE(crypto_pkt_buffer)) { cpu_socket = rte_lcore_to_socket_id(lcore_id); @@ -1153,11 +1401,32 @@ void dp_crypto_per_lcore_init(unsigned int lcore_id) for (q = MIN_CRYPTO_XFRM; q < MAX_CRYPTO_XFRM; q++) cpb->pmd_dev_id[q] = CRYPTO_PMD_INVALID_ID; - cpb->pr_cache_tbl = pr_cache_init(); + err = crypto_rte_op_alloc(cpb->cops, MAX_CRYPTO_PKT_BURST); + if (err) + rte_panic("no memory for crypto ops on lcore %u", + lcore_id); + cpbdb[lcore_id] = cpb; + for (i = 0; i < MAX_CRYPTO_PKT_BURST; i++) { + err = RAND_bytes(cpb->iv_cache[i], + CRYPTO_MAX_IV_LENGTH); + if (err != 1) + rte_panic("Could not generate random bytes for crypto lcore %u. System might be low on entropy", + lcore_id); + } RTE_PER_LCORE(crypto_pkt_buffer) = cpb; } + return 0; +} + +static int dp_crypto_lcore_teardown(unsigned int lcore_id, + void *arg __unused) +{ + struct crypto_pkt_buffer *cpb = cpbdb[lcore_id]; + + crypto_rte_op_free(cpb->cops, MAX_CRYPTO_PKT_BURST); + return crypto_flow_cache_teardown_lcore(lcore_id); } static void init_context(struct rte_mempool *pool __unused, @@ -1176,7 +1445,7 @@ static void init_context(struct rte_mempool *pool __unused, /* Callback from event manager when ifp set into vrf */ static void crypto_if_vrf_set(struct ifnet *ifp) { - if (ifp->if_type == IFT_VRFMASTER) { + if (ifp->if_type == IFT_VRF) { crypto_incmpl_policy_make_complete(); crypto_incmpl_sa_make_complete(); } @@ -1193,6 +1462,82 @@ static void crypto_incomplete_init(void) crypto_incmpl_sa_init(); } +static struct dp_lcore_events crypto_lcore_events = { + .dp_lcore_events_init_fn = dp_crypto_lcore_init, + .dp_lcore_events_teardown_fn = dp_crypto_lcore_teardown, +}; + +static bitmask_t crypto_fwd_cores; +static uint16_t num_sas[RTE_MAX_LCORE]; + +int crypto_set_fwd_cores(const uint8_t *bytes, uint8_t len) +{ + int rc; + char tmp[BITMASK_STRSZ]; + + rc = bitmask_parse_bytes(&crypto_fwd_cores, bytes, len); + if (rc) { + RTE_LOG(ERR, DATAPLANE, + "Failed to parse cpumask for post-crypto forwarding\n"); + return rc; + } + + bitmask_sprint(&crypto_fwd_cores, tmp, sizeof(tmp)); + DP_DEBUG(INIT, INFO, DATAPLANE, + "Post-crypto forwarding cores set: %s\n", tmp); + + return rc; +} + +/* + * return the next least loaded forwarding core to allocate as + * the post-processing core for a specific SA + */ +uint8_t crypto_sa_alloc_fwd_core(void) +{ + uint16_t tmp_num_sas = UINT16_MAX; + uint8_t fwd_core = 0, i; + + RTE_LCORE_FOREACH(i) { + if (bitmask_isset(&crypto_fwd_cores, i)) { + if (num_sas[i] < tmp_num_sas) { + tmp_num_sas = num_sas[i]; + fwd_core = i; + } + } + } + + if (fwd_core) { + num_sas[fwd_core]++; + if (num_sas[fwd_core] == 1) + enable_crypto_fwd(fwd_core); + } + return fwd_core; +} + +/* + * deallocate post processing core + */ +void crypto_sa_free_fwd_core(uint8_t fwd_core) +{ + struct crypto_pkt_ctx *ctx; + struct crypto_fwd_info *fwd_info = &crypto_fwd[fwd_core]; + + if (fwd_core) { + num_sas[fwd_core]--; + if (!num_sas[fwd_core]) { + disable_crypto_fwd(fwd_core); + + /* drain queue & free */ + while (!rte_ring_mc_dequeue(fwd_info->fwd_q, + (void **)&ctx)) { + rte_pktmbuf_free(ctx->mbuf); + release_crypto_packet_ctx(ctx); + } + } + } +} + static unsigned int crypto_ctx_pool; /* * General initialisation for crypto services @@ -1201,6 +1546,8 @@ void dp_crypto_init(void) { unsigned int cores, cache; + bitmask_zero(&crypto_fwd_cores); + CRYPTO_INFO("Crypto thread initialise begin\n"); cores = rte_lcore_count(); @@ -1228,15 +1575,25 @@ void dp_crypto_init(void) if (!crypto_dp_sp->pool) rte_panic("Could not allocate crypto context pool\n"); - crypto_engine_load(); + if (crypto_rte_setup()) + rte_panic("Could not set up crypto infrastructure pools\n"); - crypto_master_pull = zsock_new_pull(crypto_inproc); + if (crypto_engine_load()) + rte_panic("Could not set up crypto engine\n"); - if (!crypto_master_pull) - rte_panic("cannot bind to crypto master pull socket\n"); + if (crypto_flow_cache_init()) + rte_panic("Could not allocate crypto flow cache"); - register_event_socket(zsock_resolve(crypto_master_pull), - handle_crypto_event, crypto_master_pull); + if (dp_lcore_events_register(&crypto_lcore_events, NULL)) + rte_panic("can not initialise crypto per thread\n"); + + crypto_main_pull = zsock_new_pull(crypto_inproc); + + if (!crypto_main_pull) + rte_panic("cannot bind to crypto main pull socket\n"); + + dp_register_event_socket(zsock_resolve(crypto_main_pull), + handle_crypto_event, crypto_main_pull); if (crypto_sadb_init() < 0) rte_panic("Failed to initialise crypto SADB\n"); @@ -1253,9 +1610,17 @@ void dp_crypto_init(void) crypto_incomplete_init(); crypto_engine_init(); - rte_timer_init(&pr_cache_timer); - rte_timer_reset(&pr_cache_timer, rte_get_timer_hz(), PERIODICAL, - rte_get_master_lcore(), pr_cache_timer_handler, NULL); + rte_timer_init(&flow_cache_timer); + rte_timer_reset(&flow_cache_timer, rte_get_timer_hz(), PERIODICAL, + rte_get_master_lcore(), crypto_flow_cache_timer_handler, + NULL); + + rte_timer_init(&crypto_gc_timer); + rte_timer_reset(&crypto_gc_timer, + rte_get_timer_hz() * CRYPTO_GC_TIMER_INTERVAL, + PERIODICAL, rte_get_master_lcore(), + crypto_gc_timer_handler, + NULL); CRYPTO_INFO("Crypto initialised\n"); } @@ -1263,12 +1628,13 @@ void dp_crypto_init(void) void dp_crypto_shutdown(void) { CRYPTO_INFO("crypto shutting down\n"); - unregister_event_socket(zsock_resolve(crypto_master_pull)); - zsock_destroy(&crypto_master_pull); + dp_unregister_event_socket(zsock_resolve(crypto_main_pull)); + zsock_destroy(&crypto_main_pull); zsock_destroy(&rekey_listener); udp_handler_unregister(AF_INET, htons(ESP_PORT)); udp_handler_unregister(AF_INET6, htons(ESP_PORT)); crypto_engine_shutdown(); + crypto_rte_shutdown(); } void crypto_show_summary(FILE *f) @@ -1296,13 +1662,16 @@ void crypto_show_summary(FILE *f) rte_mempool_in_use_count(crypto_dp_sp->pool)); for (i = 0; i < IPSEC_CNT_MAX; i++) - jsonw_uint_field(wr, ipsec_counter_names[i], agg_counters[i]); + if (agg_counters[i]) + jsonw_uint_field(wr, ipsec_counter_names[i], + agg_counters[i]); jsonw_end_object(wr); jsonw_destroy(&wr); } /* runs in the context of a crypto thread */ void crypto_expire_request(uint32_t spi, uint32_t reqid, + xfrm_address_t dst, uint16_t family, uint8_t proto, uint8_t hard) { int rv; @@ -1314,60 +1683,36 @@ void crypto_expire_request(uint32_t spi, uint32_t reqid, return; } - rv = zsock_bsend(sock, "4411", spi, reqid, proto, hard); + rv = zsock_bsend(sock, "444444211", spi, reqid, dst.a6[0], dst.a6[1], + dst.a6[2], dst.a6[3], family, proto, hard); if (rv < 0) - CRYPTO_ERR("Failed to send expire event to master (%d)\n", rv); + CRYPTO_ERR("Failed to send expire event to main (%d)\n", rv); zsock_destroy(&sock); } -/* running in the master thread, handle crypto events */ +/* running in the main thread, handle crypto events */ static int handle_crypto_event(void *arg) { - zsock_t *sock = (zsock_t *)arg; - int rc; + xfrm_address_t dst; + uint16_t family; uint8_t proto, hard; uint32_t spi, reqid; + zsock_t *sock = (zsock_t *)arg; + int rc; - rc = zsock_brecv(sock, "4411", &spi, &reqid, &proto, &hard); + rc = zsock_brecv(sock, "444444211", &spi, &reqid, + &dst.a6[0], &dst.a6[1], + &dst.a6[2], &dst.a6[3], + &family, &proto, &hard); if (rc < 0) { - CRYPTO_ERR("Failed to receive event for master\n"); + CRYPTO_ERR("Failed to receive event for main\n"); return 0; } - if (!rekey_listener) - return 0; - - char *outbuf = NULL; - size_t outsize = 0; - FILE *f = open_memstream(&outbuf, &outsize); - - if (!f) { - CRYPTO_ERR("Failed to open stream for rekey\n"); - return 0; - } - - json_writer_t *wr = jsonw_new(f); - - if (!wr) { - CRYPTO_ERR("Failed to open json writer for rekey\n"); - fclose(f); - free(outbuf); - return 0; - } - - jsonw_name(wr, "REKEY"); - jsonw_start_object(wr); - jsonw_uint_field(wr, "SPI", spi); - jsonw_uint_field(wr, "proto", proto); - jsonw_uint_field(wr, "reqid", reqid); - jsonw_uint_field(wr, "hard", hard); - jsonw_end_object(wr); - jsonw_destroy(&wr); - - /* the buffer isn't flushed until fclose */ - fclose(f); - zstr_send(rekey_listener, outbuf); + rc = xfrm_client_send_expire(&dst, family, spi, reqid, proto, hard); + if (rc < 0) + CRYPTO_ERR("Failed to send SA expire\n"); return 0; } @@ -1388,6 +1733,34 @@ unsigned long hash_xfrm_address(const xfrm_address_t *addr, { if (family == AF_INET) return addr->a4; - else - return (addr->a6[0] + addr->a6[1] + addr->a6[2] + addr->a6[3]); + return (addr->a6[0] + addr->a6[1] + addr->a6[2] + addr->a6[3]); } + +/* The vrf has been deleted so flush all the crypto state in it. */ +static void crypto_vrf_flush(struct vrf *vrf) +{ + struct crypto_vrf_ctx *vrf_ctx; + + vrf_ctx = crypto_vrf_find(vrf->v_id); + if (!vrf_ctx) + return; + + crypto_policy_flush_vrf(vrf_ctx); + crypto_sadb_flush_vrf(vrf_ctx); + policy_feat_flush_vrf(vrf_ctx); +} + +void crypto_flush_all(void) +{ + vrfid_t vrf_id; + struct vrf *vrf; + + VRF_FOREACH(vrf, vrf_id) + crypto_vrf_flush(get_vrf(vrf_id)); +} + +static const struct dp_event_ops crypto_events = { + .vrf_delete = crypto_vrf_flush, +}; + +DP_STARTUP_EVENT_REGISTER(crypto_events); diff --git a/src/crypto/crypto.h b/src/crypto/crypto.h index 5fc4ab15..7e0d6f6e 100644 --- a/src/crypto/crypto.h +++ b/src/crypto/crypto.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -19,7 +19,7 @@ #include #include "util.h" -#include "vrf.h" +#include "vrf_internal.h" struct iphdr; struct udphdr; @@ -78,16 +78,22 @@ int udp_esp_dp6(struct rte_mbuf *m, void *ip, struct udphdr *udp, struct ifnet *ifp); void crypto_sadb_show_summary(FILE *f, vrfid_t vrfid); -void crypto_policy_show_summary(FILE *f, vrfid_t vrfid); +void crypto_policy_show_summary(FILE *f, vrfid_t vrfid, bool brief); void crypto_policy_bind_show_summary(FILE *f, vrfid_t vrfid); void crypto_show_summary(FILE *f); void crypto_add_listener(const char *url); void crypto_show_pmd(FILE *f); void crypto_sadb_show_spi_mapping(FILE *f, vrfid_t vrfid); -int crypto_engine_set(FILE *f, const char *str); +int crypto_engine_set(uint8_t *bytes, uint8_t len); int crypto_engine_probe(FILE *f); void crypto_show_cache(FILE *f, const char *str); -struct cds_lfht *pr_cache_init(void); +int crypto_flow_cache_init_lcore(unsigned int lcore_id); +int crypto_flow_cache_teardown_lcore(unsigned int lcore_id); +int crypto_flow_cache_init(void); unsigned long hash_xfrm_address(const xfrm_address_t *addr, const uint16_t family); +uint8_t crypto_sa_alloc_fwd_core(void); +void crypto_sa_free_fwd_core(uint8_t fwd_core); +int crypto_set_fwd_cores(const uint8_t *bytes, uint8_t len); +void crypto_flush_all(void); #endif /* CRYPTO_H */ diff --git a/src/crypto/crypto_defs.h b/src/crypto/crypto_defs.h new file mode 100644 index 00000000..dbbbb5e0 --- /dev/null +++ b/src/crypto/crypto_defs.h @@ -0,0 +1,36 @@ +/*- + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#ifndef CRYPTO_DEFS_H + +#define CRYPTO_DEFS_H + +/* maximum length (in bytes) of initialization vector in any algorithm */ +#define CRYPTO_MAX_IV_LENGTH 16 + +/* maximum length (in bytes) of key in any algorithm */ +#define CRYPTO_MAX_CIPHER_KEY_LENGTH 32 + +#define CRYPTO_MAX_AUTH_KEY_LENGTH 64 + +/* + * constants for various encryption/hash algorithms + */ + +#define AES_GCM_AAD_LENGTH 8 /* no ESN support yet */ +#define AES_GCM_IV_LENGTH 8 +#define AES_GCM_NONCE_LENGTH 4 + +/* iv sizes for different algorithms */ +enum { + IPSEC_AES_CBC_IV_SIZE = 16, + IPSEC_AES_GCM_IV_SIZE = 12, + /* TripleDES supports IV size of 32bits or 64bits but he library + * only supports 64bits. + */ + IPSEC_3DES_IV_SIZE = sizeof(uint64_t), +}; + +#endif diff --git a/src/crypto/crypto_engine.c b/src/crypto/crypto_engine.c index b3f5173b..00b0a538 100644 --- a/src/crypto/crypto_engine.c +++ b/src/crypto/crypto_engine.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -37,6 +38,7 @@ #include "util.h" #include "vplane_debug.h" #include "vplane_log.h" +#include "crypto_rte_pmd.h" #define ENGINE_DEBUG(args...) \ DP_DEBUG(CRYPTO, DEBUG, ENGINE, args) @@ -57,16 +59,6 @@ struct md_algo_table { evp_md_fn_t fn; }; -static const struct md_algo_table md_algorithms[] = { - { "hmac(sha1)", (evp_md_fn_t)EVP_sha1}, - { "hmac(sha256)", (evp_md_fn_t)EVP_sha256}, - { "hmac(sha384)", (evp_md_fn_t)EVP_sha384}, - { "hmac(sha512)", (evp_md_fn_t)EVP_sha512}, - { "hmac(md5)", (evp_md_fn_t)EVP_md5}, - { "rfc4106(gcm(aes))", (evp_md_fn_t)EVP_md_null}, - { "aNULL", (evp_md_fn_t)EVP_md_null}, -}; - const char *eng_cmd_str[] = {"ENG_CIPHER_INIT |", "ENG_DIGEST_INIT |", "ENG_CIPHER_BLOCK |", "ENG_DIGEST_BLOCK |", "ENG_CIPHER_FINALISE |", "ENG_DIGEST_FINALISE |", @@ -162,7 +154,7 @@ static void ENGINE_ERR_print_errors(void) static int hmac_update(struct crypto_session *sa, unsigned char *text, uint32_t len) { - if (!HMAC_Update(sa->hmac_ctx, text, len)) { + if (!HMAC_Update(sa->o_info->hmac_ctx, text, len)) { ENGINE_ERR("HMAC update failed\n"); return -1; } @@ -186,7 +178,8 @@ static int openssl_cipher_set_iv(struct crypto_visitor_ctx *ctx, memcpy(alg_iv, s->nonce, s->nonce_len); memcpy(alg_iv + s->nonce_len, iv, s->iv_len); - if (EVP_CipherInit_ex(s->ctx, NULL, NULL, NULL, alg_iv, -1) != 1) { + if (EVP_CipherInit_ex(s->o_info->ctx, NULL, NULL, NULL, + alg_iv, -1) != 1) { ENGINE_ERR_print_errors(); return -1; } @@ -200,7 +193,7 @@ static int openssl_hmac_set_icv(struct crypto_visitor_ctx *ctx, { struct crypto_session *session = ctx_session(ctx); - if (!HMAC_Init_ex(session->hmac_ctx, NULL, 0, NULL, NULL)) { + if (!HMAC_Init_ex(session->o_info->hmac_ctx, NULL, 0, NULL, NULL)) { ENGINE_ERR("HMAC init failed\n"); return -1; } @@ -215,7 +208,7 @@ static int openssl_encrypt_hmac_payload_block( struct crypto_session *session = ctx_session(ctx); int len; - if (EVP_EncryptUpdate(session->ctx, element->o_data, &len, + if (EVP_EncryptUpdate(session->o_info->ctx, element->o_data, &len, element->i_data, element->data_len) != 1) { ENGINE_ERR_print_errors(); return -1; @@ -250,7 +243,7 @@ static int openssl_hmac_decrypt_payload_block( return -1; } - if (EVP_DecryptUpdate(s->ctx, element->o_data, &len, + if (EVP_DecryptUpdate(s->o_info->ctx, element->o_data, &len, element->i_data, element->data_len) != 1) { ENGINE_ERR_print_errors(); return -1; @@ -265,7 +258,7 @@ static int openssl_cipher_payload_finalise(struct crypto_visitor_ctx *ctx, struct crypto_session *s = ctx_session(ctx); int len; - if (EVP_CipherFinal_ex(s->ctx, element->o_data, &len) != 1) { + if (EVP_CipherFinal_ex(s->o_info->ctx, element->o_data, &len) != 1) { ENGINE_ERR_print_errors(); return -1; } @@ -292,7 +285,7 @@ static int openssl_hmac_finalise(struct crypto_visitor_ctx *ctx, struct crypto_session *session = ctx_session(ctx); uint32_t md_len = 0; - if (!HMAC_Final(session->hmac_ctx, element->o_data, &md_len)) { + if (!HMAC_Final(session->o_info->hmac_ctx, element->o_data, &md_len)) { ENGINE_ERR("Digest finalise failed\n"); return -1; } @@ -314,9 +307,7 @@ static int null_hmac_set_icv(struct crypto_visitor_ctx *ctx __rte_unused, } static int -openssl_null_hmac_set_auth_key(struct crypto_session *ctx __rte_unused, - unsigned int length __rte_unused, - const char key[] __rte_unused) +openssl_null_hmac_set_auth_key(struct crypto_session *ctx __rte_unused) { return 0; } @@ -369,120 +360,6 @@ null_hmac_encrypt_openssl_vops = { .icv_finalise = null_hmac_update, }; -static int openssl_aead_set_icv(struct crypto_visitor_ctx *ctx, - unsigned int length, - unsigned char icv[]) -{ - struct crypto_session *session = ctx_session(ctx); - - if (!EVP_CIPHER_CTX_ctrl(session->ctx, EVP_CTRL_GCM_SET_TAG, - length, icv)) { - ENGINE_PKT_ERR("Setting GCM tag failed\n"); - ENGINE_ERR_print_errors(); - return -1; - } - - return 0; -} - -static int openssl_aead_aad_update(struct crypto_visitor_ctx *ctx, - struct crypto_chain_elem *element) -{ - struct crypto_session *session = ctx_session(ctx); - int len; - - if (!EVP_CipherUpdate(session->ctx, NULL, &len, element->i_data, - element->data_len)) { - ENGINE_PKT_ERR("AAD update failed\n"); - ENGINE_ERR_print_errors(); - return -1; - } - - return 0; -} - -static int openssl_aead_get_tag(struct crypto_visitor_ctx *ctx, - struct crypto_chain_elem *element) -{ - struct crypto_session *session = ctx_session(ctx); - - if (!EVP_CIPHER_CTX_ctrl(session->ctx, EVP_CTRL_GCM_GET_TAG, - element->data_len, element->o_data)) { - ENGINE_PKT_ERR("Getting GCM tag failed\n"); - ENGINE_ERR_print_errors(); - return -1; - } - - return 0; -} - -static int openssl_aead_encrypt_payload_block( - struct crypto_visitor_ctx *ctx, - struct crypto_chain_elem *element) -{ - struct crypto_session *s = ctx_session(ctx); - int len; - - if (EVP_EncryptUpdate(s->ctx, element->o_data, &len, - element->i_data, element->data_len) != 1) { - ENGINE_ERR_print_errors(); - return -1; - } - - return 0; -} - -static int openssl_aead_decrypt_payload_block( - struct crypto_visitor_ctx *ctx, - struct crypto_chain_elem *element) -{ - struct crypto_session *s = ctx_session(ctx); - int len; - - if (EVP_DecryptUpdate(s->ctx, element->o_data, &len, - element->i_data, element->data_len) != 1) { - ENGINE_ERR_print_errors(); - return -1; - } - - return 0; -} - -static int nop_set(struct crypto_visitor_ctx *ctx __unused, - unsigned int length __unused, - unsigned char icv[] __unused) -{ - return 0; -} - -static int nop_vops(struct crypto_visitor_ctx *ctx __unused, - struct crypto_chain_elem *element __unused) -{ - return 0; -} - -const struct crypto_visitor_operations decrypt_openssl_aead_vops = { - .set_iv = openssl_cipher_set_iv, - .set_icv = openssl_aead_set_icv, - - .payload_iv = nop_vops, - .payload_block = openssl_aead_decrypt_payload_block, - .payload_finalise = openssl_cipher_payload_finalise, - .header_block = openssl_aead_aad_update, - .icv_finalise = nop_vops, -}; - -const struct crypto_visitor_operations encrypt_openssl_aead_vops = { - .set_iv = openssl_cipher_set_iv, - .set_icv = nop_set, - - .payload_iv = nop_vops, - .payload_block = openssl_aead_encrypt_payload_block, - .payload_finalise = openssl_cipher_payload_finalise, - .header_block = openssl_aead_aad_update, - .icv_finalise = openssl_aead_get_tag, -}; - /* * Based on RFC4303, Section 2, Table 1 + 2. */ @@ -593,8 +470,8 @@ const struct crypto_visitor_operations * crypto_session_get_vops(struct crypto_session *session) { return session->direction == XFRM_POLICY_OUT ? - session->s_ops->encrypt_vops : - session->s_ops->decrypt_vops; + session->o_info->s_ops->encrypt_vops : + session->o_info->s_ops->decrypt_vops; } int crypto_chain_init(struct crypto_chain *chain, @@ -617,159 +494,171 @@ int crypto_chain_init(struct crypto_chain *chain, return 0; } -int crypto_session_set_enc_key(struct crypto_session *session, - unsigned int length, const char key[]) +int crypto_session_set_enc_key(struct crypto_session *session) { - if (!session->s_ops->set_enc_key) { + if (!session->o_info->s_ops->set_enc_key) { ENGINE_DEBUG("Function not supported: set_enc_key()\n"); return -ENOTSUP; } - return session->s_ops->set_enc_key(session, length, key); + return session->o_info->s_ops->set_enc_key(session); } -int crypto_session_set_auth_key(struct crypto_session *session, - unsigned int length, const char key[]) +int crypto_session_set_auth_key(struct crypto_session *session) { - if (!session->s_ops->set_auth_key) { + if (!session->o_info->s_ops->set_auth_key) { ENGINE_DEBUG("Function not supported: set_auth_key()\n"); return -ENOTSUP; } - return session->s_ops->set_auth_key(session, length, key); + return session->o_info->s_ops->set_auth_key(session); } -int crypto_session_generate_iv(struct crypto_session *session, - char iv[]) +void crypto_save_iv(uint16_t idx, const char iv[], uint16_t length) { - if (!session->s_ops->generate_iv) { - ENGINE_DEBUG("Function not supported: generate_iv()\n"); - return -ENOTSUP; + struct crypto_pkt_buffer *cpb = cpbdb[dp_lcore_id()]; + + /* should never happen */ + if (idx >= MAX_CRYPTO_PKT_BURST || length > CRYPTO_MAX_IV_LENGTH) { + ENGINE_ERR("Unexpected packet index (%d) or IV length (%d)", + idx, length); + return; } - return session->s_ops->generate_iv(session, iv); + memcpy(cpb->iv_cache[idx], iv, length); } -int crypto_session_set_iv(struct crypto_session *session, unsigned int length, - const char iv[]) +void crypto_get_iv(uint16_t idx, char iv[], uint16_t length) { - if (session->s_ops->set_iv) - return session->s_ops->set_iv(session, length, iv); + struct crypto_pkt_buffer *cpb = cpbdb[dp_lcore_id()]; - return 0; + /* should never happen */ + if (idx >= MAX_CRYPTO_PKT_BURST || length > CRYPTO_MAX_IV_LENGTH) { + ENGINE_ERR("Unexpected packet index (%d) or IV length (%d)", + idx, length); + return; + } + + memcpy(iv, cpb->iv_cache[idx], length); } -static int setup_cipher_type(struct crypto_session *ctx, - const char *algo_name, - const uint32_t key_len) + +static int setup_cipher_type(struct crypto_session *ctx) { - if (strcmp("cbc(aes)", algo_name) == 0) { + struct crypto_openssl_info *o_ctx = ctx->o_info; + uint32_t key_len = ctx->key_len * BITS_PER_BYTE; + + if (ctx->cipher_algo == RTE_CRYPTO_CIPHER_LIST_END) { + RTE_LOG(ERR, DATAPLANE, "Invalid cipher algorithm\n"); + return -EINVAL; + } + + switch (ctx->cipher_algo) { + case RTE_CRYPTO_CIPHER_AES_CBC: switch (key_len) { case 128: - ctx->cipher = EVP_aes_128_cbc(); - ctx->cipher_name = "CBS(AES) 128"; - return 0; + o_ctx->cipher = EVP_aes_128_cbc(); + break; case 192: - ctx->cipher = EVP_aes_192_cbc(); - ctx->cipher_name = "CBS(AES) 192"; - return 0; + o_ctx->cipher = EVP_aes_192_cbc(); + break; case 256: - ctx->cipher = EVP_aes_256_cbc(); - ctx->cipher_name = "CBS(AES) 256"; - return 0; + o_ctx->cipher = EVP_aes_256_cbc(); + break; default: ENGINE_ERR("Unsupported cbc(aes) key size %d\n", key_len); - ctx->cipher_name = "CBC(AES) Unknown"; - return -1; + return -EINVAL; } - } - if (strcmp("cbc(des3_ede)", algo_name) == 0) { - ctx->cipher = EVP_des_ede3_cbc(); - ctx->cipher_name = "3DES"; - return 0; - } + break; - if (strcmp("rfc4106(gcm(aes))", algo_name) == 0) { - switch (key_len) { - case 160: - ctx->cipher = EVP_aes_128_gcm(); - ctx->cipher_name = "gcm(aes) 128"; - return 0; - case 288: - ctx->cipher = EVP_aes_256_gcm(); - ctx->cipher_name = "gcm(aes) 256"; - return 0; - default: - ENGINE_ERR("Unsupported gcm(aes) key size: %d\n", - key_len); - ctx->cipher_name = "gcm(aes) unknown"; - return -1; - } + case RTE_CRYPTO_CIPHER_3DES_CBC: + o_ctx->cipher = EVP_des_ede3_cbc(); + break; + + case RTE_CRYPTO_CIPHER_NULL: + o_ctx->cipher = EVP_enc_null(); + break; + + default: + ENGINE_ERR("Unsupported crypto algo %s\n", + rte_crypto_cipher_algorithm_strings[ + ctx->cipher_algo]); } - if (strcmp("eNULL", algo_name) == 0 || - strcmp("ecb(cipher_null)", algo_name) == 0) { - ctx->cipher = EVP_enc_null(); - ctx->cipher_name = "eNULL"; - return 0; + if (!o_ctx->cipher) { + RTE_LOG(ERR, DATAPLANE, "Could not allocate cipher context\n"); + return -ENOMEM; } - ENGINE_ERR("Unsupported crypto algo %s\n", algo_name); - ctx->cipher_name = "Unsupported"; - return -1; + return 0; } -static int setup_md_type(struct crypto_session *ctx, - const char *algo_name, - const uint32_t alg_trunc_len __unused) +static int setup_md_type(struct crypto_session *ctx) { - unsigned int i; + static evp_md_fn_t evp_fns[RTE_CRYPTO_AUTH_LIST_END] = { + [RTE_CRYPTO_AUTH_NULL] = (evp_md_fn_t)EVP_md_null, + [RTE_CRYPTO_AUTH_SHA1_HMAC] = (evp_md_fn_t)EVP_sha1, + [RTE_CRYPTO_AUTH_SHA256_HMAC] = (evp_md_fn_t)EVP_sha256, + [RTE_CRYPTO_AUTH_SHA384_HMAC] = (evp_md_fn_t)EVP_sha384, + [RTE_CRYPTO_AUTH_SHA512_HMAC] = (evp_md_fn_t)EVP_sha512, + [RTE_CRYPTO_AUTH_MD5_HMAC] = (evp_md_fn_t)EVP_md5, + }; + + if (ctx->auth_algo == RTE_CRYPTO_AUTH_LIST_END) { + RTE_LOG(ERR, DATAPLANE, "Invalid digest algorithm\n"); + return -EINVAL; + } - for (i = 0; i < ARRAY_SIZE(md_algorithms); i++) - if (!strcmp(md_algorithms[i].name, algo_name)) { - ctx->md_name = md_algorithms[i].name; - ctx->md = md_algorithms[i].fn(); - return 0; - } + if (!evp_fns[ctx->auth_algo]) { + RTE_LOG(ERR, DATAPLANE, "Unsupported digest algo %s\n", + rte_crypto_auth_algorithm_strings[ctx->auth_algo]); + return -EOPNOTSUPP; + } - ENGINE_ERR("Unsupported digest algo %s\n", algo_name); - ctx->md_name = "Unsupported"; - return -1; + ctx->o_info->md = evp_fns[ctx->auth_algo](); + if (!ctx->o_info->md) { + RTE_LOG(ERR, DATAPLANE, + "Could not set up openssl context for %s\n", + rte_crypto_auth_algorithm_strings[ctx->auth_algo]); + return -ENOMEM; + } + return 0; } int openssl_session_cipher_init(struct crypto_session *s) { int encrypt = s->direction == XFRM_POLICY_OUT; + struct crypto_openssl_info *o_s = s->o_info; if (likely(s->cipher_init || !s->block_size)) return 0; - s->ctx = EVP_CIPHER_CTX_new(); - if (!s->ctx) { + o_s->ctx = EVP_CIPHER_CTX_new(); + if (!o_s->ctx) { ENGINE_ERR_print_errors(); return -1; } - if (EVP_CipherInit_ex(s->ctx, s->cipher, NULL, + if (EVP_CipherInit_ex(o_s->ctx, o_s->cipher, NULL, s->key, NULL, encrypt) != 1) { ENGINE_ERR_print_errors(); - EVP_CIPHER_CTX_free(s->ctx); - s->ctx = NULL; + EVP_CIPHER_CTX_free(o_s->ctx); + o_s->ctx = NULL; return -1; } - if (EVP_CIPHER_mode(s->cipher) == EVP_CIPH_GCM_MODE) { - if (!EVP_CIPHER_CTX_ctrl(s->ctx, EVP_CTRL_GCM_SET_IVLEN, + if (EVP_CIPHER_mode(o_s->cipher) == EVP_CIPH_GCM_MODE) { + if (!EVP_CIPHER_CTX_ctrl(o_s->ctx, EVP_CTRL_GCM_SET_IVLEN, s->nonce_len + s->iv_len, NULL)) { - EVP_CIPHER_CTX_free(s->ctx); - s->ctx = NULL; + EVP_CIPHER_CTX_free(o_s->ctx); + o_s->ctx = NULL; ENGINE_ERR_print_errors(); return -1; } } - if (EVP_CIPHER_CTX_set_padding(s->ctx, 0) != 1) { - EVP_CIPHER_CTX_free(s->ctx); - s->ctx = NULL; + if (EVP_CIPHER_CTX_set_padding(o_s->ctx, 0) != 1) { + EVP_CIPHER_CTX_free(o_s->ctx); + o_s->ctx = NULL; ENGINE_ERR_print_errors(); return -1; } @@ -778,18 +667,8 @@ int openssl_session_cipher_init(struct crypto_session *s) return 0; } -static int openssl_session_set_enc_key(struct crypto_session *ctx, - unsigned int length, - const char key[]) +static int openssl_session_set_enc_key(struct crypto_session *ctx) { - if (length > ARRAY_SIZE(ctx->key)) { - ENGINE_ERR("Unexpect encyption key len: %d\n", length); - return -1; - } - - ctx->key_len = length; - memcpy(ctx->key, key, length); - if ((ctx->direction != -1) && openssl_session_cipher_init(ctx)) return -1; @@ -797,29 +676,20 @@ static int openssl_session_set_enc_key(struct crypto_session *ctx, return 0; } -static int openssl_session_set_auth_key(struct crypto_session *ctx, - unsigned int length, - const char key[]) +static int openssl_session_set_auth_key(struct crypto_session *ctx) { - if (length > ARRAY_SIZE(ctx->auth_alg_key)) { - ENGINE_ERR("Unexpect integrity key len: %d\n", length); - return -1; - } - - ctx->hmac_ctx = HMAC_CTX_new(); - if (!ctx->hmac_ctx) { + ctx->o_info->hmac_ctx = HMAC_CTX_new(); + if (!ctx->o_info->hmac_ctx) { ENGINE_ERR_print_errors(); return -1; } - ctx->auth_alg_key_len = length; - memcpy(ctx->auth_alg_key, key, length); - if (!HMAC_Init_ex(ctx->hmac_ctx, + if (!HMAC_Init_ex(ctx->o_info->hmac_ctx, ctx->auth_alg_key, ctx->auth_alg_key_len, - ctx->md, NULL)) { - HMAC_CTX_free(ctx->hmac_ctx); - ctx->hmac_ctx = NULL; + ctx->o_info->md, NULL)) { + HMAC_CTX_free(ctx->o_info->hmac_ctx); + ctx->o_info->hmac_ctx = NULL; ENGINE_ERR_print_errors(); return -1; } @@ -865,54 +735,84 @@ const struct crypto_session_operations null_hmac_openssl_sops = { .set_iv = openssl_session_set_iv, }; -static int rfc4106_session_set_enc_key(struct crypto_session *ctx, - unsigned int length, - const char key[]) +int crypto_openssl_session_setup(struct crypto_session *sess) { - /* setup AES-GCM according to RFC4106 */ - if (length < 4) { - ENGINE_ERR("key_len too small: %d\n", length); - return -1; - } + struct crypto_openssl_info *o_ctx; + int err; - ctx->key_len = length - 4; + if (sess->o_info) + return -EEXIST; - if (ctx->key_len > ARRAY_SIZE(ctx->key)) { - ENGINE_ERR("Unexpect encyption key len: %d\n", length); - return -1; + sess->o_info = calloc(1, sizeof(*sess->o_info)); + if (!sess->o_info) + return -ENOMEM; + + o_ctx = sess->o_info; + + if (sess->auth_algo != RTE_CRYPTO_AUTH_LIST_END) + o_ctx->s_ops = &default_openssl_sops; + else + o_ctx->s_ops = &null_hmac_openssl_sops; + + if (sess->cipher_algo == RTE_CRYPTO_CIPHER_LIST_END || + sess->auth_algo == RTE_CRYPTO_AUTH_LIST_END) { + RTE_LOG(ERR, DATAPLANE, + "Invalid cipher/auth algo: cipher (%d), auth (%d)\n", + sess->cipher_algo, sess->auth_algo); + return -EINVAL; } - memcpy(ctx->key, key, ctx->key_len); - ctx->nonce_len = 4; - memcpy(ctx->nonce, key + ctx->key_len, ctx->nonce_len); - ctx->iv_len = 8; + ENGINE_DEBUG("Setup cipher %s, key size(%d)\n", + rte_crypto_cipher_algorithm_strings[sess->cipher_algo], + sess->key_len * BITS_PER_BYTE); - if ((ctx->direction != -1) && - openssl_session_cipher_init(ctx)) - return -1; + if (setup_cipher_type(sess) != 0) + goto error; + + sess->block_size = EVP_CIPHER_block_size(o_ctx->cipher); + + ENGINE_DEBUG("Setup digest %s\n", + rte_crypto_auth_algorithm_strings[sess->auth_algo]); + + if (setup_md_type(sess) != 0) + goto error; + + err = crypto_session_set_enc_key(sess); + if (err) { + ENGINE_ERR("Failed to set session encryption key\n"); + goto error; + } + + err = crypto_session_set_auth_key(sess); + if (err) { + ENGINE_ERR("Failed to set session integrity key\n"); + goto error; + } return 0; + +error: + return -1; } -static int rfc4106_session_set_auth_key(struct crypto_session *ctx __unused, - unsigned int length __unused, - const char key[] __unused) +void crypto_openssl_session_teardown(struct crypto_session *sess) { - return 0; -} + if (!sess->o_info) + return; -const struct crypto_session_operations rfc4106_openssl_sops = { - .decrypt_vops = &decrypt_openssl_aead_vops, - .encrypt_vops = &encrypt_openssl_aead_vops, - .set_enc_key = rfc4106_session_set_enc_key, - .set_auth_key = rfc4106_session_set_auth_key, - .generate_iv = openssl_session_generate_iv, - .set_iv = openssl_session_set_iv, -}; + if (sess->o_info->hmac_ctx) + HMAC_CTX_free(sess->o_info->hmac_ctx); + if (sess->o_info->ctx) + EVP_CIPHER_CTX_free(sess->o_info->ctx); + + free(sess->o_info); + sess->o_info = NULL; +} struct crypto_session * crypto_session_create(const struct xfrm_algo *algo_crypt, - const struct xfrm_algo_auth *algo_auth, + const struct xfrm_algo_auth *algo_trunc_auth, + const struct xfrm_algo *algo_auth, int direction) { struct crypto_session *ctx; @@ -921,34 +821,24 @@ crypto_session_create(const struct xfrm_algo *algo_crypt, if (!ctx) return NULL; - if (algo_auth) - ctx->s_ops = &default_openssl_sops; - else - ctx->s_ops = &null_hmac_openssl_sops; - - ctx->direction = direction; - - if (algo_crypt) { - ENGINE_DEBUG("Setup Cipher %s%d\n", algo_crypt->alg_name, - algo_crypt->alg_key_len); - if (setup_cipher_type(ctx, algo_crypt->alg_name, - algo_crypt->alg_key_len) != 0) - goto err; - ctx->block_size = EVP_CIPHER_block_size(ctx->cipher); - ctx->iv_len = EVP_CIPHER_iv_length(ctx->cipher); - RAND_bytes((unsigned char *)ctx->iv, ctx->iv_len); - - if (strcmp("rfc4106(gcm(aes))", algo_crypt->alg_name) == 0) - ctx->s_ops = &rfc4106_openssl_sops; + /* set up DPDK versions of data structures */ + if (crypto_rte_set_session_parameters(ctx, algo_crypt, + algo_trunc_auth, algo_auth)) { + RTE_LOG(ERR, DATAPLANE, + "Failed to set session parameters for %s %s%s\n", + algo_crypt->alg_name, + algo_auth ? algo_auth->alg_name : "", + algo_trunc_auth ? algo_trunc_auth->alg_name : ""); + goto err; } - if (algo_auth) { - ENGINE_DEBUG("Setup Digest %s\n", algo_auth->alg_name); - if (setup_md_type(ctx, algo_auth->alg_name, - algo_auth->alg_trunc_len) != 0) - goto err; - memcpy(ctx->auth_alg_name, algo_auth->alg_name, 64); - ctx->digest_len = algo_auth->alg_trunc_len >> 3; + ctx->direction = direction; + if (RAND_bytes((unsigned char *)ctx->iv, + ctx->iv_len + ctx->nonce_len) != 1) { + RTE_LOG(ERR, DATAPLANE, + "Could not generate random bytes for crypto IV." + " System might be low on entropy\n"); + goto err; } return ctx; @@ -958,24 +848,26 @@ crypto_session_create(const struct xfrm_algo *algo_crypt, return NULL; } -void crypto_session_destroy(struct crypto_session *ctx) +static void +crypto_session_destroy(struct crypto_session *ctx, uint8_t rte_cdev_id) { if (!ctx) return; - if (ctx->hmac_ctx) - HMAC_CTX_free(ctx->hmac_ctx); - if (ctx->ctx) - EVP_CIPHER_CTX_free(ctx->ctx); + crypto_rte_destroy_session(ctx, rte_cdev_id); + + crypto_openssl_session_teardown(ctx); free(ctx); } -static int check_algorithmic_requirements(const struct xfrm_algo *crypt, - const struct xfrm_algo_auth *auth) +static int +check_algorithmic_requirements(const struct xfrm_algo *crypt, + const struct xfrm_algo_auth *trunc_auth, + const struct xfrm_algo *auth) { /* check RFC4301 */ - if (!crypt && !auth) { + if (!crypt && !auth && !trunc_auth) { ENGINE_ERR("Invalid algorithmic combination: both NULL\n"); return -1; } @@ -990,7 +882,8 @@ static int check_algorithmic_requirements(const struct xfrm_algo *crypt, } /* check RFC3686 */ - if ((strcmp("ctr(aes)", crypt->alg_name) == 0) && !auth) { + if ((strcmp("ctr(aes)", crypt->alg_name) == 0) && !auth && + !trunc_auth) { ENGINE_ERR("Invalid AES-CTR authentication method: NULL\n"); return -1; } @@ -999,45 +892,25 @@ static int check_algorithmic_requirements(const struct xfrm_algo *crypt, } int cipher_setup_ctx(const struct xfrm_algo *algo_crypt, - const struct xfrm_algo_auth *algo_auth, + const struct xfrm_algo_auth *algo_trunc_auth, + const struct xfrm_algo *algo_auth, const struct xfrm_usersa_info *sa_info, const struct xfrm_encap_tmpl *tmpl, struct sadb_sa *sa, uint32_t extra_flags) { - int ret; - - if (check_algorithmic_requirements(algo_crypt, algo_auth)) + if (check_algorithmic_requirements(algo_crypt, algo_trunc_auth, + algo_auth)) return -1; - sa->session = crypto_session_create(algo_crypt, algo_auth, -1); + sa->session = crypto_session_create(algo_crypt, algo_trunc_auth, + algo_auth, -1); if (!sa->session) return -1; - if (algo_crypt) { - ret = crypto_session_set_enc_key( - sa->session, - algo_crypt->alg_key_len >> 3, - algo_crypt->alg_key); - if (ret) { - ENGINE_ERR("Failed to set session encryption key\n"); - return ret; - } - } - if (algo_auth) { - ret = crypto_session_set_auth_key( - sa->session, - algo_auth->alg_key_len >> 3, - algo_auth->alg_key); - if (ret) { - ENGINE_ERR("Failed to set session integrity key\n"); - return ret; - } - } - sa->udp_encap = 0; if (tmpl) { if (tmpl->encap_type == UDP_ENCAP_ESPINUDP) { - sa->udp_encap = 1; + sa->udp_encap = sizeof(struct udphdr); sa->udp_sport = tmpl->encap_sport; sa->udp_dport = tmpl->encap_dport; } else { @@ -1071,7 +944,7 @@ int cipher_setup_ctx(const struct xfrm_algo *algo_crypt, .version = IPVERSION, .protocol = sa->udp_encap ? IPPROTO_UDP : IPPROTO_ESP, }; - sa->iphdr.check = in_cksum_hdr(&sa->iphdr); + sa->iphdr.check = dp_in_cksum_hdr(&sa->iphdr); } else { struct ip6_hdr *ip6_hdr = &sa->ip6_hdr; @@ -1088,7 +961,7 @@ int cipher_setup_ctx(const struct xfrm_algo *algo_crypt, void cipher_teardown_ctx(struct sadb_sa *sa) { - crypto_session_destroy(sa->session); + crypto_session_destroy(sa->session, sa->rte_cdev_id); sa->session = NULL; } @@ -1134,26 +1007,49 @@ uint32_t cipher_get_encryption_overhead(struct sadb_sa *sa, return overhead; } -void crypto_engine_load(void) +int crypto_engine_load(void) { ENGINE_DEBUG("Cryptolib init\n"); ERR_load_crypto_strings(); OpenSSL_add_all_algorithms(); - OPENSSL_init_crypto(OPENSSL_INIT_LOAD_CONFIG | - OPENSSL_INIT_ENGINE_ALL_BUILTIN, NULL); + + if (OPENSSL_init_crypto(OPENSSL_INIT_LOAD_CONFIG | + OPENSSL_INIT_ENGINE_ALL_BUILTIN, NULL) != 1) + return -1; + OpenSSL_add_all_digests(); + + return 0; } void crypto_engine_summary(json_writer_t *wr, const struct sadb_sa *sa) { + struct crypto_session *sess; + if (!sa->session) return; - jsonw_string_field(wr, "cipher", sa->session->cipher_name); - - jsonw_string_field(wr, "digest", sa->session->md_name ? - sa->session->md_name : "null"); - jsonw_uint_field(wr, "replay_window", sa->replay_window); + sess = sa->session; + + if (sess->aead_algo != RTE_CRYPTO_AEAD_LIST_END) { + jsonw_string_field( + wr, "cipher", + rte_crypto_aead_algorithm_strings[sess->aead_algo]); + jsonw_uint_field( + wr, "cipher_key_len", + (sess->key_len - sess->nonce_len) * BITS_PER_BYTE); + } else if (sess->cipher_algo != RTE_CRYPTO_CIPHER_LIST_END) { + jsonw_string_field( + wr, "cipher", + rte_crypto_cipher_algorithm_strings[ + sess->cipher_algo]); + jsonw_uint_field(wr, "cipher_key_len", + sess->key_len * BITS_PER_BYTE); + } else + jsonw_string_field(wr, "cipher", "Unknown"); + + jsonw_string_field(wr, "digest", + rte_crypto_auth_algorithm_strings[sess->auth_algo]); } static int crypto_chain_dump_set_iv(struct crypto_visitor_ctx *ctx, @@ -1224,12 +1120,6 @@ const struct crypto_visitor_operations crypto_chain_dump_vops = { .icv_finalise = crypto_chain_dump_elem, }; -const struct crypto_visitor_operations * -crypto_chain_dump_get_vops(void) -{ - return &crypto_chain_dump_vops; -} - /* * libcrypto locking mechanism callbacks for multi threading */ @@ -1267,7 +1157,7 @@ void crypto_engine_init(void) int i; lockarray = OPENSSL_malloc(CRYPTO_num_locks() * - sizeof(*lockarray)); + sizeof(lockarray[0])); for (i = 0; i < CRYPTO_num_locks(); i++) (void)pthread_mutex_init(&(lockarray[i]), NULL); diff --git a/src/crypto/crypto_forward.h b/src/crypto/crypto_forward.h index 9216ff9e..e86a4e65 100644 --- a/src/crypto/crypto_forward.h +++ b/src/crypto/crypto_forward.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -11,18 +11,7 @@ #include #include -#include "nh.h" - -/* - * crypto_policy_outbound_match() - * - * Determine if a packet matches the IPsec output policies/ - * - * Return value: true or false - * - */ -bool crypto_policy_outbound_match(struct ifnet *in_ifp, struct rte_mbuf **mbuf, - uint16_t ether); +#include "nh_common.h" /* * crypto_policy_check_outbound() @@ -44,7 +33,7 @@ bool crypto_policy_check_outbound(struct ifnet *in_ifp, struct rte_mbuf **mbuf, uint32_t tbl_id, uint16_t eth_type, - union next_hop_v4_or_v6_ptr *nh); + struct next_hop **nh); /* * Call crypto_policy_check_inbound() for locally terminating diff --git a/src/crypto/crypto_internal.h b/src/crypto/crypto_internal.h index 9f0b90e9..7dd57584 100644 --- a/src/crypto/crypto_internal.h +++ b/src/crypto/crypto_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -17,15 +17,19 @@ #include #include #include +#include #include #include #include #include +#include "crypto_defs.h" #include "crypto_main.h" #include "json_writer.h" +#include "rldb.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" +#include "crypto_rte_pmd.h" #define CRYPTO_DATA_ERR(args...) \ DP_DEBUG(CRYPTO_DATA, ERR, CRYPTO, args) @@ -57,49 +61,6 @@ struct crypto_dp { struct rte_ring *crypto_q[MAX_CRYPTO_XFRM]; }; -#if !HAVE_DECL_HMAC_CTX_NEW -static inline HMAC_CTX *HMAC_CTX_new(void) -{ - return (HMAC_CTX *)calloc(1, sizeof(HMAC_CTX)); -} - -static inline void HMAC_CTX_free(HMAC_CTX *ctx) -{ - HMAC_CTX_cleanup(ctx); - free(ctx); -} - -struct ossl_init_settings_st { - char *appname; -}; - -# define OPENSSL_INIT_LOAD_CRYPTO_STRINGS 0x00000002L -# define OPENSSL_INIT_NO_ADD_ALL_DIGESTS 0x00000020L -# define OPENSSL_INIT_LOAD_CONFIG 0x00000040L -# define OPENSSL_INIT_NO_LOAD_CONFIG 0x00000080L -# define OPENSSL_INIT_ASYNC 0x00000100L -# define OPENSSL_INIT_ENGINE_RDRAND 0x00000200L -# define OPENSSL_INIT_ENGINE_DYNAMIC 0x00000400L -# define OPENSSL_INIT_ENGINE_OPENSSL 0x00000800L -# define OPENSSL_INIT_ENGINE_CRYPTODEV 0x00001000L -# define OPENSSL_INIT_ENGINE_CAPI 0x00002000L -# define OPENSSL_INIT_ENGINE_PADLOCK 0x00004000L -# define OPENSSL_INIT_ENGINE_AFALG 0x00008000L - -# define OPENSSL_INIT_ENGINE_ALL_BUILTIN \ - (OPENSSL_INIT_ENGINE_RDRAND | OPENSSL_INIT_ENGINE_DYNAMIC \ - | OPENSSL_INIT_ENGINE_CRYPTODEV | OPENSSL_INIT_ENGINE_CAPI | \ - OPENSSL_INIT_ENGINE_PADLOCK) - -static inline void -OPENSSL_init_crypto(uint32_t opts __attribute__ ((__unused__)), - const struct ossl_init_settings_st *settings - __attribute__ ((__unused__))) -{ - OPENSSL_config(NULL); -} -#endif - #define CRYPTO_PMD_INVALID_ID -1 struct crypto_session_operations; @@ -110,35 +71,47 @@ enum crypto_dir { CRYPTO_DIR_OUT }; +struct crypto_openssl_info { + const struct crypto_session_operations *s_ops; + EVP_CIPHER_CTX *ctx; + HMAC_CTX *hmac_ctx; + const EVP_CIPHER *cipher; + const EVP_MD *md; +}; + struct crypto_session { /* All perpacket in first cacheline */ - const struct crypto_session_operations *s_ops; + + struct rte_cryptodev_sym_session *rte_session; int8_t direction; /* -1 | XFRM_POLICY_IN | _OUT*/ uint8_t cipher_init; - uint16_t digest_len; /* in bytes */ - uint16_t block_size; /* in bytes */ - uint16_t iv_len; /* in bytes */ - EVP_CIPHER_CTX *ctx; - HMAC_CTX *hmac_ctx; - uint16_t nonce_len; /* in bytes */ - char iv[EVP_MAX_IV_LENGTH]; + uint8_t digest_len; /* in bytes */ + uint8_t block_size; /* in bytes */ + uint8_t iv_len; /* in bytes */ + uint8_t nonce_len; /* in bytes */ + uint8_t key_len; /* in bytes */ + uint8_t auth_alg_key_len; /* in bytes */ + char iv[CRYPTO_MAX_IV_LENGTH]; + unsigned char nonce[CRYPTO_MAX_IV_LENGTH]; + uint8_t key[CRYPTO_MAX_CIPHER_KEY_LENGTH]; + + /* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */ + /* - * Max nonce slips into 2rd cacheline, however normal use case - * aes128g/256gcm is 4 bytes and so it is within first cache - * line + * For AES-128-GCM, all the data required should be within the + * first cacheline. For all other ciphers, it will take 2 cachelines + * to load all the required data */ - unsigned char nonce[EVP_MAX_IV_LENGTH]; - /* Cacheline1 */ - uint16_t key_len; /* in bytes */ - uint16_t auth_alg_key_len; /* in bytes */ - uint8_t key[EVP_MAX_KEY_LENGTH]; - char auth_alg_name[64]; - char auth_alg_key[EVP_MAX_KEY_LENGTH]; - - const EVP_CIPHER *cipher; - const EVP_MD *md; - const char *md_name; - const char *cipher_name; + char auth_alg_key[CRYPTO_MAX_AUTH_KEY_LENGTH]; + + struct crypto_openssl_info *o_info; + + enum rte_crypto_aead_algorithm aead_algo; + enum rte_crypto_cipher_algorithm cipher_algo; + + /* --- cacheline 2 boundary (128 bytes) --- */ + + enum rte_crypto_auth_algorithm auth_algo; }; /* @@ -157,7 +130,7 @@ struct sadb_sa { uint32_t spi; /* Network byte order */ uint32_t mark_val; bool blocked; - char SPARE1; + uint8_t rte_cdev_id; uint16_t family; enum crypto_dir dir; struct iphdr iphdr; @@ -168,7 +141,7 @@ struct sadb_sa { uint8_t udp_encap; uint16_t id; struct crypto_session *session; - /* Cacheline 1 boundary */ + /* --- cacheline 1 boundary (64 bytes) --- */ uint16_t udp_sport; uint16_t udp_dport; uint32_t seq; @@ -178,8 +151,8 @@ struct sadb_sa { uint64_t packet_limit; uint64_t byte_count; uint64_t byte_limit; - /* Cacheline 2 boundary */ xfrm_address_t dst; + /* --- cacheline 2 boundary (128 bytes) --- */ struct cds_list_head peer_links; uint32_t reqid; int pmd_dev_id; @@ -187,24 +160,31 @@ struct sadb_sa { xfrm_address_t src; uint32_t seq_drop; int del_pmd_dev_id; - /* Cacheline 3 boundary */ + /* --- cacheline 3 boundary (192 bytes) --- */ uint8_t replay_window; uint8_t pending_del; + uint8_t fwd_core; uint64_t replay_bitmap; struct ip6_hdr ip6_hdr; struct ifnet *feat_attach_ifp; vrfid_t overlay_vrf_id; + uint64_t epoch; }; +static_assert(offsetof(struct sadb_sa, udp_sport) == 64, + "first cache line exceeded"); +static_assert(offsetof(struct sadb_sa, peer_links) == 128, + "second cache line exceeded"); +static_assert(offsetof(struct sadb_sa, replay_window) == 192, + "third cache line exceeded"); + struct crypto_chain_elem; struct crypto_session_operations { const struct crypto_visitor_operations *decrypt_vops; const struct crypto_visitor_operations *encrypt_vops; - int (*set_enc_key)(struct crypto_session *session, - unsigned int length, const char key[]); - int (*set_auth_key)(struct crypto_session *session, - unsigned int length, const char key[]); + int (*set_enc_key)(struct crypto_session *session); + int (*set_auth_key)(struct crypto_session *session); int (*generate_iv)(struct crypto_session *session, char iv[]); int (*set_iv)(struct crypto_session *session, unsigned int length, const char iv[]); @@ -248,9 +228,6 @@ struct crypto_visitor_operations { const struct crypto_visitor_operations * crypto_session_get_vops(struct crypto_session *session); -const struct crypto_visitor_operations * -crypto_chain_dump_get_vops(void); - static inline uint32_t crypto_session_block_size(const struct crypto_session *ctx) { @@ -269,35 +246,15 @@ crypto_session_digest_len(const struct crypto_session *ctx) return ctx->digest_len; } -int crypto_session_set_enc_key(struct crypto_session *session, - unsigned int length, const char key[]); -int crypto_session_set_auth_key(struct crypto_session *session, - unsigned int length, - const char key[]); -int crypto_session_generate_iv(struct crypto_session *session, - char iv[]); -int crypto_session_set_iv(struct crypto_session *session, unsigned int length, - const char iv[]); +int crypto_session_set_enc_key(struct crypto_session *session); +int crypto_session_set_auth_key(struct crypto_session *session); struct crypto_session * crypto_session_create(const struct xfrm_algo *algo_crypt, - const struct xfrm_algo_auth *algo_auth, + const struct xfrm_algo_auth *algo_trunc_auth, + const struct xfrm_algo *algo_auth, int direction); -void crypto_session_destroy(struct crypto_session *ctx); - -/* - * DEPRECATED: This function is a temporary helper to set the crypto_session - * direction. It will be removed as soon as the policy direction is can get - * resolved on crypto_session creation. - */ -static inline void -crypto_session_set_direction(struct crypto_session *ctx, int direction) -{ - if (unlikely(ctx->direction == -1)) - ctx->direction = direction; -} - /* * Returns TRUE if two IPv4 (or IPv6) addresses are equal. */ @@ -382,34 +339,22 @@ int crypto_chain_walk(struct crypto_chain *chain); int crypto_chain_init(struct crypto_chain *chain, struct crypto_session *session); -void crypto_engine_load(void); +int crypto_engine_load(void); int cipher_setup_ctx(const struct xfrm_algo *, const struct xfrm_algo_auth *, + const struct xfrm_algo *, const struct xfrm_usersa_info *, const struct xfrm_encap_tmpl *t, struct sadb_sa *, uint32_t extra_flags); void cipher_teardown_ctx(struct sadb_sa *sa); -void crypto_engine_summary(json_writer_t *wr, const struct sadb_sa *sa); - -#define IF_INCR_Mx(_ifp, _m, _x) \ -do { \ - if (_ifp) \ - if_incr ## _x(_ifp, _m); \ -} while (0) +int crypto_openssl_session_setup(struct crypto_session *sess); -#define IF_INCR_x(_ifp, _x) \ -do { \ - if (_ifp) \ - if_incr ## _x(_ifp); \ -} while (0) +void crypto_openssl_session_teardown(struct crypto_session *sess); -#define IF_INCR_OERROR(_ifp) IF_INCR_x(_ifp, _oerror) -#define IF_INCR_ERROR(_ifp) IF_INCR_x(_ifp, _error) -#define IF_INCR_IN(_ifp, _m) IF_INCR_Mx(_ifp, _m, _in) -#define IF_INCR_OUT(_ifp, _m) IF_INCR_Mx(_ifp, _m, _out) +void crypto_engine_summary(json_writer_t *wr, const struct sadb_sa *sa); extern uint32_t crypto_rekey_requests; @@ -469,12 +414,32 @@ enum ipsec_cnt_types { DROPPED_NO_IFP, DROPPED_INVALID_PMD_DEV_ID, DROPPED_NO_SPI_TO_SA, - PR_CACHE_ADD, - PR_CACHE_ADD_FAIL, - PR_CACHE_HIT, - PR_CACHE_MISS, + FLOW_CACHE_ADD, + FLOW_CACHE_ADD_FAIL, + FLOW_CACHE_HIT, + FLOW_CACHE_MISS, DROPPED_NO_BIND, DROPPED_ON_FP_NO_PR, + DROPPED_COP_ALLOC_FAILED, + CRYPTO_OP_FAILED, + CRYPTO_OP_ASSOC_FAILED, + CRYPTO_OP_PREPARE_FAILED, + DROPPED_ESP_IP_FRAG, + ESP_NOT_IN_FIRST_SEG, + INVALID_CIPHERTEXT_LEN, + ESP_TAIL_TRIM_FAILED, + ESP_INVALID_NXT_HDR, + INVALID_IPSEC_MODE, + ESP_ETH_HDR_FIXUP_FAILED, + ESP_OUT_HDR_PARSE6_FAILED, + ESP_HDR_PREPEND_FAILED, + ESP_TAIL_APPEND_FAILED, + CRYPTO_CHAIN_INIT_FAILED, + CRYPTO_AUTH_OP_FAILED, + CRYPTO_CIPHER_OP_FAILED, + CRYPTO_DIGEST_OP_FAILED, + CRYPTO_DIGEST_CB_FAILED, + CRYPTO_PP_ENQ_FAILED, IPSEC_CNT_MAX /* this must be last */ }; @@ -487,7 +452,22 @@ struct crypto_vrf_ctx { struct cds_lfht *spi_out_hash_table; struct cds_lfht *sadb_hash_table; struct cds_lfht *s2s_bind_hash_table; + struct rldb_db_handle *input_policy_v4_rldb; + struct rldb_db_handle *output_policy_v4_rldb; + struct rldb_db_handle *input_policy_v6_rldb; + struct rldb_db_handle *output_policy_v6_rldb; vrfid_t vrfid; + /* + * total policy counts indicate the number + * of policies added to NPF prior to any commit + * occurring + */ + uint32_t crypto_total_ipv4_policies; + uint32_t crypto_total_ipv6_policies; + /* + * live policy counts indicate the number + * of policies active after the NPF commit is done + */ uint32_t crypto_live_ipv6_policies; uint32_t crypto_live_ipv4_policies; unsigned int count_of_sas; @@ -511,18 +491,11 @@ ipsec_counters[RTE_MAX_LCORE][IPSEC_CNT_MAX] __rte_cache_aligned; uint32_t cipher_get_encryption_overhead(struct sadb_sa *sa, uint16_t family); -void crypto_sadb_peer_overhead_subscribe(const xfrm_address_t *peer_address, - uint16_t family, uint32_t reqid, +void crypto_sadb_tunl_overhead_subscribe(uint32_t reqid, struct crypto_overhead *overhead, vrfid_t vrfid); -void crypto_sadb_peer_overhead_unsubscribe(const xfrm_address_t *peer_address, - uint16_t family, - struct crypto_overhead *overhead, - vrfid_t vrfid); - -int crypto_sadb_peer_overhead_change_reqid(const xfrm_address_t *peer_address, - uint16_t family, uint32_t reqid, +void crypto_sadb_tunl_overhead_unsubscribe(uint32_t reqid, struct crypto_overhead *overhead, vrfid_t vrfid); @@ -536,6 +509,7 @@ void vti_reqid_set(const xfrm_address_t *dst, uint8_t family, void vti_reqid_clear(const xfrm_address_t *dst, uint8_t family, uint32_t mark); void crypto_expire_request(uint32_t spi, uint32_t reqid, + xfrm_address_t dst, uint16_t family, uint8_t proto, uint8_t hard); void crypto_engine_init(void); void crypto_engine_shutdown(void); @@ -556,7 +530,10 @@ void crypto_delete_queue(struct rte_ring *pmd_queue); */ void crypto_remove_sa_from_pmd(int crypto_dev_id, enum crypto_xfrm xfrm, bool pending); -int crypto_allocate_pmd(enum crypto_xfrm xfrm); +int crypto_allocate_pmd(enum crypto_xfrm xfrm, + enum rte_crypto_cipher_algorithm cipher_algo, + enum rte_crypto_aead_algorithm aead_algo, + bool *setup_openssl); struct rte_ring *crypto_pmd_get_q(int dev_id, enum crypto_xfrm xfrm); typedef bool (*crypto_pmd_walker_cb)(int pmd_dev_id, enum crypto_xfrm, struct rte_ring *, @@ -564,11 +541,199 @@ typedef bool (*crypto_pmd_walker_cb)(int pmd_dev_id, enum crypto_xfrm, uint32_t *packets); unsigned int crypto_pmd_walk_per_xfrm(struct cds_list_head *pmd_head, crypto_pmd_walker_cb cb); -void crypto_pmd_inc_pending_del(int pmd_dev_id, enum crypto_xfrm xfrm); +void crypto_pmd_mod_pending_del(int pmd_dev_id, enum crypto_xfrm xfrm, + bool inc); void crypto_pmd_dec_pending_del(int pmd_dev_id, enum crypto_xfrm xfrm); struct crypto_vrf_ctx *crypto_vrf_find(vrfid_t vrfid); struct crypto_vrf_ctx *crypto_vrf_find_external(vrfid_t vrfid); struct crypto_vrf_ctx *crypto_vrf_get(vrfid_t vrfid); void crypto_vrf_check_remove(struct crypto_vrf_ctx *vrf_ctx); -struct ifnet *crypto_policy_feat_attach_by_reqid(uint32_t reqid); +struct ifnet *crypto_policy_feat_attach_by_reqid(struct crypto_vrf_ctx *vrf_ctx, + uint32_t reqid); + +/* + * Per packet crypto context. This carries information + * from the policy lookup in the forwarding thread that + * is needed for the SA lookup in the crypto thread. + */ +struct crypto_pkt_ctx { + /* + * The fields are ordered to minimize holes and + * place as much critical data as possible in the + * first cache line + */ + struct rte_mbuf *mbuf; + uint32_t reqid; + uint32_t spi; + void *l3hdr; + struct ifnet *in_ifp; + struct ifnet *nxt_ifp; + uint16_t out_ethertype; + int8_t status; + uint8_t udp_len; + uint8_t esp_len; + uint8_t icv_len; + uint8_t orig_family; + uint8_t family; + struct sadb_sa *sa; + struct ifnet *vti_ifp; + + /* --- cacheline 1 boundary (64 bytes) --- */ + + uint16_t iphlen; + uint16_t base_len; + uint16_t ciphertext_len; + uint16_t plaintext_size; + uint16_t plaintext_size_orig; + uint16_t prev_off; + uint16_t head_trim; + uint16_t out_hdr_len; + uint8_t action; + uint8_t in_ifp_port; + uint16_t direction; + /* bytes encrypted/decrypted */ + uint32_t bytes; + unsigned char *esp; + unsigned char *iv; + unsigned char *icv; + char *hdr; + char *tail; + unsigned int counter_modify; + xfrm_address_t dst; /* Only used for outbound traffic */ + vrfid_t vrfid; +}; + +/* + * Move bad (unprocessed) mbufs beyond the good (processed) ones. + * bad_idx[] contains the indexes of bad context pointers. + */ +static inline void +move_bad_mbufs(struct crypto_pkt_ctx *ctx_arr[], uint16_t count, + const uint16_t bad_idx[], uint16_t bad_count) +{ + uint16_t i, j, k; + + if (likely(!bad_count)) + return; + + struct crypto_pkt_ctx *tmp_ctx_arr[bad_count]; + + j = 0; + k = 0; + + /* copy bad ones into a temp place */ + for (i = 0; i < count; i++) { + if (j != bad_count && i == bad_idx[j]) + tmp_ctx_arr[j++] = ctx_arr[i]; + else + ctx_arr[k++] = ctx_arr[i]; + } + + /* copy bad ones after the good ones */ + for (i = 0; i != bad_count; i++) + ctx_arr[k + i] = tmp_ctx_arr[i]; +} + +#define CRYPTO_PREFETCH_LOOKAHEAD 10 + +static inline +void crypto_prefetch_ctx(struct crypto_pkt_ctx *ctx_arr[], uint16_t count, + uint16_t cur) +{ + uint16_t i, j; + + if (likely(cur % CRYPTO_PREFETCH_LOOKAHEAD)) + return; + + i = cur + CRYPTO_PREFETCH_LOOKAHEAD; + j = cur; + for (; j < count && j < i; j++) + rte_prefetch0(ctx_arr[j]); +} + +static inline +void crypto_prefetch_ctx_data(struct crypto_pkt_ctx *ctx_arr[], uint16_t count, + uint16_t cur) +{ + uint16_t i, j; + + if (likely(cur % CRYPTO_PREFETCH_LOOKAHEAD)) + return; + + i = cur + CRYPTO_PREFETCH_LOOKAHEAD; + j = cur; + for (; j < count && j < i; j++) { + rte_prefetch0(ctx_arr[j]->mbuf); + rte_prefetch0(ctx_arr[j]->sa); + } +} + +static inline +void crypto_prefetch_mbuf_data(struct crypto_pkt_ctx *ctx_arr[], uint16_t count, + uint16_t cur) +{ + uint16_t i, j; + + if (likely(cur % CRYPTO_PREFETCH_LOOKAHEAD)) + return; + + i = cur + CRYPTO_PREFETCH_LOOKAHEAD; + j = cur + 1; + for (; j < count && j < i; j++) + rte_prefetch0(ctx_arr[j]->mbuf->cacheline1); +} + +/* + * Fetch data for entire burst into L2 cache + * This results in a significant increase in throughput + * with multiple cores due to a reduction in memory + * contention + */ +static inline +void crypto_prefetch_mbuf_payload(struct rte_mbuf *m) +{ + uint16_t offset = 0; + + for (offset = 0; offset < rte_pktmbuf_data_len(m); + offset += RTE_CACHE_LINE_SIZE) + rte_prefetch1(rte_pktmbuf_mtod_offset(m, void *, + offset)); +} + +static inline +void crypto_prefetch_ivs(void) +{ + struct crypto_pkt_buffer *cpb = cpbdb[dp_lcore_id()]; + uint16_t i; + + if (unlikely(!cpb)) + return; + + for (i = 0; i < MAX_CRYPTO_PKT_BURST; ) { + rte_prefetch0(cpb->iv_cache[i]); + i += RTE_CACHE_LINE_SIZE / CRYPTO_MAX_IV_LENGTH; + } +} + +void crypto_save_iv(uint16_t idx, const char iv[], uint16_t length); +void crypto_get_iv(uint16_t idx, char iv[], uint16_t length); + +static inline +void crypto_prefetch_ops(uint16_t cur, uint16_t count) +{ + struct crypto_pkt_buffer *cpb = cpbdb[dp_lcore_id()]; + uint16_t i, j; + + if (unlikely(!cpb)) + return; + + if (likely(cur % CRYPTO_PREFETCH_LOOKAHEAD)) + return; + + i = cur + CRYPTO_PREFETCH_LOOKAHEAD; + j = cur; + for (; j < count && j < i; j++) + rte_prefetch0(cpb->cops[j]); +} + #endif /* CRYPTO_INTERNAL_H */ diff --git a/src/crypto/crypto_main.h b/src/crypto/crypto_main.h index f026c0e7..b190f99a 100644 --- a/src/crypto/crypto_main.h +++ b/src/crypto/crypto_main.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -13,7 +13,8 @@ #include #include -#include "crypto_policy_cache.h" +#include "crypto_defs.h" +#include "crypto_rte_pmd.h" #include "urcu.h" /* @@ -48,15 +49,37 @@ enum crypto_xfrm { struct crypto_pkt_buffer { int pmd_dev_id[MAX_CRYPTO_XFRM]; uint32_t local_q_count[MAX_CRYPTO_XFRM]; - rte_atomic16_t pr_cache_count; char SPARE[6]; - struct cds_lfht *pr_cache_tbl; struct crypto_pkt_ctx *local_crypto_q[MAX_CRYPTO_XFRM] [MAX_CRYPTO_PKT_BURST]; + struct rte_crypto_op *cops[MAX_CRYPTO_PKT_BURST]; + unsigned char iv_cache[MAX_CRYPTO_PKT_BURST][CRYPTO_MAX_IV_LENGTH]; }; +/* + * crypto per-core post-processing queue + * The processing that needs to occur after encryption/decryption is standard + * IP forwarding that can occur in parallel on each forwarding core. The only + * constraint is that all packets associated with a particular SA need to + * be processed on the same forwarding core. + */ +struct crypto_fwd_info { + struct rte_ring *fwd_q; + uint64_t fwd_cnt; +}; + +RTE_DECLARE_PER_LCORE(struct crypto_fwd_info *, crypto_fwd); + +extern struct crypto_fwd_info crypto_fwd[RTE_MAX_LCORE]; + RTE_DECLARE_PER_LCORE(struct crypto_pkt_buffer *, crypto_pkt_buffer); +/* + * Crypto Pkt Buffer (CPB) DB, containing pointers to all the + * per CORE CPB. + */ +extern struct crypto_pkt_buffer *cpbdb[RTE_MAX_LCORE]; + int crypto_send_burst(struct crypto_pkt_buffer *cpb, enum crypto_xfrm xfrm, bool drop); @@ -70,13 +93,26 @@ static inline void crypto_send(struct crypto_pkt_buffer *cpb) false); } -void dp_crypto_per_lcore_init(unsigned int lcore_id); void dp_crypto_init(void); -unsigned int dp_crypto_poll(struct cds_list_head *pmd_list); +unsigned int dp_crypto_poll(struct cds_list_head *pmd_head); void dp_crypto_shutdown(void); -int crypto_attach_pmd(struct cds_list_head *pmd_list, +int crypto_attach_pmd(struct cds_list_head *pmd_head, int crypto_dev_id, int lcore); -void dp_crypto_periodic(struct cds_list_head *pmd_list); +void dp_crypto_periodic(struct cds_list_head *pmd_head); void crypto_pmd_remove_all(void); +void crypto_flow_cache_timer_handler(struct rte_timer *tmr, void *arg); +int crypto_pmd_get_info(int pmd_dev_id, uint8_t *rte_dev_id, + enum cryptodev_type *dev_type); + +void crypto_create_fwd_queue(unsigned int lcore_id); +void crypto_destroy_fwd_queue(void); +void crypto_fwd_processed_packets(void); + +/* crypto garbage collection */ +void crypto_gc_timer_handler(struct rte_timer *tmr, + void *arg); + +/* Invoked from rcu callback to signal unbind of SA from PMD */ +void crypto_sa_unbind_rcu(int dev_id); #endif /* _CRYPTO_MAIN_H_ */ diff --git a/src/crypto/crypto_pmd.c b/src/crypto/crypto_pmd.c index a54390d1..b99493df 100644 --- a/src/crypto/crypto_pmd.c +++ b/src/crypto/crypto_pmd.c @@ -1,11 +1,13 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2017 by Brocade Communications Systems, Inc. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ +#include #include +#include #include #include #include @@ -26,8 +28,13 @@ #include "urcu.h" #include "vplane_debug.h" #include "vplane_log.h" +#include "crypto_rte_pmd.h" + +#define PMD_DEBUG(args...) \ + DP_DEBUG(CRYPTO, DEBUG, PMD, args) + +#define MAX_CRYPTO_PMD 128 -#define MAX_CRYPTO_PMD 32 /* * Dynamic number of pmds supported. Based upon the number of crypto * engines available which is reported from either a probe or a set. @@ -48,14 +55,17 @@ struct pmd_counters { uint64_t bytes; }; +#define DEV_NAME_LEN 64 + struct crypto_pmd { struct cds_list_head next; struct crypto_pmd_q_pair q_pair; unsigned int lcore; int dev_id; - unsigned int sa_cnt; - char SPARE[4]; + enum cryptodev_type dev_type; + int rte_cdev_id; struct rcu_head pmd_rcu; + /* --- cacheline 1 boundary (64 bytes) --- */ /* * The counters are forced into a new cache line to stop * dcache sharing issues as they are updated by the engine and @@ -64,10 +74,17 @@ struct crypto_pmd { char *padding[0] __rte_cache_aligned; struct pmd_counters cnt[MAX_CRYPTO_XFRM]; struct rate_stats rates[MAX_CRYPTO_XFRM]; + rte_atomic32_t sa_cnt; unsigned int sa_cnt_per_type[MAX_CRYPTO_XFRM]; unsigned int pending_remove[MAX_CRYPTO_XFRM]; + char dev_name[DEV_NAME_LEN]; }; +static_assert(offsetof(struct crypto_pmd, padding) == 64, + "first cache line exceeded"); +static_assert(offsetof(struct crypto_pmd, cnt) == 64, + "first cache line exceeded"); + static struct crypto_pmd *crypto_pmd_devs[MAX_CRYPTO_PMD]; /* @@ -146,19 +163,20 @@ pmd_lb_tiebreak(struct crypto_pmd *best_pmd, struct crypto_pmd *pmd, static int pmd_weighted_sa_cnt(struct crypto_pmd *pmd) { - return pmd->sa_cnt - + return rte_atomic32_read(&pmd->sa_cnt) - crypto_pmd_pend_rm_cnt(pmd, MAX_CRYPTO_XFRM); } static struct crypto_pmd * -crypto_pmd_alloc_loadshare(enum crypto_xfrm xfrm) +crypto_pmd_alloc_loadshare(enum crypto_xfrm xfrm, + enum cryptodev_type dev_type) { struct crypto_pmd *pmd, *best_pmd = NULL; unsigned int i, best_count = 0xffff, weight; for (i = 0; i < MAX_CRYPTO_PMD; i++) { pmd = crypto_pmd_devs[i]; - if (!pmd) + if (!pmd || pmd->dev_type != dev_type) continue; weight = pmd_weighted_sa_cnt(pmd); if (weight < best_count) { @@ -169,22 +187,57 @@ crypto_pmd_alloc_loadshare(enum crypto_xfrm xfrm) pmd_lb_tiebreak(best_pmd, pmd, xfrm); } } + + PMD_DEBUG("Reusing pmd %s\n", best_pmd->dev_name); + return best_pmd; } +/* + * array of pmd dev ids per core per pmd type + * Used to determine if we already have a specific type of PMD + * running on the desired core + */ +static int8_t lcore_dev_ids[RTE_MAX_LCORE][CRYPTODEV_MAX]; + static struct crypto_pmd * -crypto_pmd_find_or_create(enum crypto_xfrm xfrm) +crypto_pmd_find_or_create(enum crypto_xfrm xfrm, + enum cryptodev_type dev_type) { - unsigned int cpu_socket, dev_id; + unsigned int cpu_socket; + uint8_t dev_id; struct crypto_pmd *pmd; enum crypto_xfrm q; + int err; + int lcore; + + if (pmd_alloc == 0) + memset(lcore_dev_ids, -1, sizeof(lcore_dev_ids)); if (xfrm == MAX_CRYPTO_XFRM) return NULL; if (pmd_alloc >= max_pmds) - return crypto_pmd_alloc_loadshare(xfrm); + return crypto_pmd_alloc_loadshare(xfrm, dev_type); + /* + * check if we have an existing PMD of the desired type + * on the next available crypto core + */ + lcore = next_available_crypto_lcore(); + if (lcore < 0) + return NULL; + + if (lcore_dev_ids[lcore][dev_type] != CRYPTO_PMD_INVALID_ID) { + dev_id = lcore_dev_ids[lcore][dev_type]; + PMD_DEBUG("Found device %s\n", + crypto_pmd_devs[dev_id]->dev_name); + return crypto_pmd_devs[dev_id]; + } + + /* + * allocate id for device + */ for (dev_id = 0; dev_id < MAX_CRYPTO_PMD; dev_id++) if (!crypto_pmd_devs[dev_id]) break; @@ -196,6 +249,7 @@ crypto_pmd_find_or_create(enum crypto_xfrm xfrm) } cpu_socket = rte_lcore_to_socket_id(rte_get_master_lcore()); + pmd = rte_zmalloc_socket("crypto pmd", sizeof(*pmd), RTE_CACHE_LINE_SIZE, @@ -206,6 +260,19 @@ crypto_pmd_find_or_create(enum crypto_xfrm xfrm) return NULL; } + pmd->dev_id = dev_id; + + err = crypto_rte_create_pmd(cpu_socket, dev_id, + dev_type, pmd->dev_name, DEV_NAME_LEN, + &pmd->rte_cdev_id); + if (err != 0) { + CRYPTO_ERR("Could not create DPDK PMD\n"); + rte_free(pmd); + return NULL; + } + + pmd->dev_type = dev_type; + CDS_INIT_LIST_HEAD(&pmd->next); pmd->q_pair.q[CRYPTO_ENCRYPT] = @@ -216,13 +283,13 @@ crypto_pmd_find_or_create(enum crypto_xfrm xfrm) crypto_create_ring("pmd-de-q", PMD_RING_SIZE, cpu_socket, dev_id, RING_F_SC_DEQ); - pmd->dev_id = dev_id; + /* Need to add the pmd to the table as the callback * from crypto_assign_engine needs to locate pmd */ rcu_assign_pointer(crypto_pmd_devs[dev_id], pmd); - if (crypto_assign_engine(pmd->dev_id) < 0) { + if (crypto_assign_engine(pmd->dev_id, lcore) < 0) { pmd_engine_assign_fail++; rcu_assign_pointer(crypto_pmd_devs[dev_id], NULL); for (q = MIN_CRYPTO_XFRM; q < MAX_CRYPTO_XFRM; q++) @@ -231,18 +298,23 @@ crypto_pmd_find_or_create(enum crypto_xfrm xfrm) return NULL; } + lcore_dev_ids[lcore][dev_type] = pmd->dev_id; pmd_alloc++; pmd_total_created++; return pmd; } -void crypto_pmd_inc_pending_del(int pmd_dev_id, enum crypto_xfrm xfrm) +void crypto_pmd_mod_pending_del(int pmd_dev_id, enum crypto_xfrm xfrm, bool inc) { if (pmd_dev_id == CRYPTO_PMD_INVALID_ID) return; - if (crypto_pmd_devs[pmd_dev_id]) - crypto_pmd_devs[pmd_dev_id]->pending_remove[xfrm]++; + if (crypto_pmd_devs[pmd_dev_id]) { + if (inc) + crypto_pmd_devs[pmd_dev_id]->pending_remove[xfrm]++; + else + crypto_pmd_devs[pmd_dev_id]->pending_remove[xfrm]--; + } } void crypto_pmd_dec_pending_del(int pmd_dev_id, enum crypto_xfrm xfrm) @@ -284,37 +356,42 @@ int crypto_engine_probe(FILE *f) bool sticky; num = probe_crypto_engines(&sticky); - set_max_pmd(num); + + /* each core can have one PMD of each type */ + set_max_pmd(num * CRYPTODEV_MAX); return f ? crypto_cpu_describe(f, num, sticky) : (int) num; } -int crypto_engine_set(FILE *f, const char *str) +int crypto_engine_set(uint8_t *bytes, uint8_t len) { bool tmp_sticky; - int num = set_crypto_engines(str, &tmp_sticky); - - if (!f) - return -1; + int num = set_crypto_engines(bytes, len, &tmp_sticky); if (num < 0) { - fprintf(f, "error invalid mask\n"); - return -1; + RTE_LOG(ERR, DATAPLANE, + "Invalid cpu mask specified for crypto\n"); + return -EINVAL; } set_max_pmd(num); - return crypto_cpu_describe(f, num, tmp_sticky); + return 0; } /* * Return a PMD to be used by the caller, either reusing an * existing PMD or create a new one. If a new one is created * then link it to a crypto_engine */ -int crypto_allocate_pmd(enum crypto_xfrm xfrm) +int crypto_allocate_pmd(enum crypto_xfrm xfrm, + enum rte_crypto_cipher_algorithm cipher_algo, + enum rte_crypto_aead_algorithm aead_algo, + bool *setup_openssl) { struct crypto_pmd *pmd; + enum cryptodev_type dev_type; + int err; /* If this is the first SA then lets go probe the number * of crypto engines we have. @@ -322,14 +399,25 @@ int crypto_allocate_pmd(enum crypto_xfrm xfrm) if (!pmd_alloc) (void)crypto_engine_probe(NULL); - pmd = crypto_pmd_find_or_create(xfrm); + err = crypto_rte_select_pmd_type(cipher_algo, aead_algo, &dev_type, + setup_openssl); + if (err) { + CRYPTO_ERR("Failed to select pmd type for %s\n", + (cipher_algo == RTE_CRYPTO_CIPHER_LIST_END ? + rte_crypto_aead_algorithm_strings[aead_algo] : + rte_crypto_cipher_algorithm_strings[cipher_algo])); + return CRYPTO_PMD_INVALID_ID; + } + pmd = crypto_pmd_find_or_create(xfrm, dev_type); if (!pmd) { + CRYPTO_ERR("Failed to find or create pmd for type %d\n", + dev_type); pmd_alloc_fail++; return CRYPTO_PMD_INVALID_ID; } - pmd->sa_cnt++; + rte_atomic32_inc(&pmd->sa_cnt); pmd->sa_cnt_per_type[xfrm]++; pmd_sa_active++; @@ -349,9 +437,16 @@ static void pmd_purge_and_release_queues(struct crypto_pmd *pmd) static void pmd_rcu_free(struct rcu_head *head) { struct crypto_pmd *pmd; + int err; pmd = caa_container_of(head, struct crypto_pmd, pmd_rcu); pmd_purge_and_release_queues(pmd); + + err = crypto_rte_destroy_pmd(pmd->dev_type, pmd->dev_name, + pmd->dev_id); + if (err != 0) + CRYPTO_ERR("Could not destroy pmd %s\n", pmd->dev_name); + rte_free(pmd); } @@ -364,6 +459,8 @@ static void crypto_pmd_remove(int dev_id) if (!pmd) return; + lcore_dev_ids[pmd->lcore][pmd->dev_type] = CRYPTO_PMD_INVALID_ID; + rcu_assign_pointer(crypto_pmd_devs[dev_id], NULL); pmd_alloc--; @@ -383,6 +480,41 @@ void crypto_pmd_remove_all(void) crypto_pmd_remove(i); } +/* + * Invoked from SA cleanup RCU callback to signal completion + * of SA deletion. This is to ensure that each PMD gets deleted + * only after all SAs associated with it have been freed + */ +void crypto_sa_unbind_rcu(int dev_id) +{ + bool err; + struct crypto_pmd *pmd = crypto_dev_id_to_pmd(dev_id, + &err); + + if (!pmd) { + CRYPTO_ERR("No PMD for ID %d\n", dev_id); + return; + } + + rte_atomic32_dec(&pmd->sa_cnt); +} + +void crypto_gc_timer_handler(struct rte_timer *tmr __rte_unused, + void *arg __rte_unused) +{ + struct crypto_pmd *pmd; + int i; + + for (i = 0; i < MAX_CRYPTO_PMD; i++) { + pmd = crypto_pmd_devs[i]; + if (!pmd) + continue; + + if (!rte_atomic32_read(&pmd->sa_cnt)) + crypto_pmd_remove(i); + } +} + void crypto_remove_sa_from_pmd(int dev_id, enum crypto_xfrm xfrm, bool pending) { @@ -397,20 +529,16 @@ void crypto_remove_sa_from_pmd(int dev_id, enum crypto_xfrm xfrm, } pmd->sa_cnt_per_type[xfrm]--; - pmd->sa_cnt--; pmd_sa_active--; if (pending) crypto_pmd_dec_pending_del(dev_id, xfrm); - - if (!pmd->sa_cnt) - crypto_pmd_remove(dev_id); } /* * Insert a PMD into the list of PMDs being procssed by an engine, * i.e. an lcore or a pthread */ -int crypto_attach_pmd(struct cds_list_head *pmd_list, int dev_id, int lcore) +int crypto_attach_pmd(struct cds_list_head *pmd_head, int dev_id, int lcore) { bool err; struct crypto_pmd *new_pmd = crypto_dev_id_to_pmd(dev_id, @@ -423,7 +551,7 @@ int crypto_attach_pmd(struct cds_list_head *pmd_list, int dev_id, int lcore) } new_pmd->lcore = lcore; - cds_list_add_rcu(&new_pmd->next, pmd_list); + cds_list_add_rcu(&new_pmd->next, pmd_head); return 0; } @@ -486,13 +614,31 @@ static void crypto_show_pmd_counters(json_writer_t *wr, struct crypto_pmd *pmd) { enum crypto_xfrm q; + struct rte_cryptodev_stats stats; + int err; if (!pmd) return; jsonw_start_object(wr); jsonw_uint_field(wr, "pmd_dev_id", pmd->dev_id); - jsonw_uint_field(wr, "active_sa", pmd->sa_cnt); + jsonw_uint_field(wr, "rte_dev_id", pmd->rte_cdev_id); + + err = rte_cryptodev_stats_get(pmd->rte_cdev_id, &stats); + if (!err) { + jsonw_name(wr, "rte_stats"); + jsonw_start_object(wr); + jsonw_uint_field(wr, "enqueued_cnt", stats.enqueued_count); + jsonw_uint_field(wr, "dequeued_cnt", stats.dequeued_count); + jsonw_uint_field(wr, "enqueued_err_cnt", + stats.enqueue_err_count); + jsonw_uint_field(wr, "dequeued_err_cnt", + stats.dequeue_err_count); + jsonw_end_object(wr); + } + + jsonw_string_field(wr, "dev_name", pmd->dev_name); + jsonw_uint_field(wr, "active_sa", rte_atomic32_read(&pmd->sa_cnt)); jsonw_uint_field(wr, "lcore", pmd->lcore); jsonw_start_array(wr); jsonw_name(wr, "per_pmd_counters"); @@ -555,3 +701,20 @@ void crypto_show_pmd(FILE *f) jsonw_end_object(wr); jsonw_destroy(&wr); } + +int crypto_pmd_get_info(int pmd_dev_id, uint8_t *rte_dev_id, + enum cryptodev_type *dev_type) +{ + struct crypto_pmd *pmd; + bool err; + + pmd = crypto_dev_id_to_pmd(pmd_dev_id, &err); + if (!pmd) { + pmd_not_found++; + return -ENOENT; + } + + *rte_dev_id = pmd->rte_cdev_id; + *dev_type = pmd->dev_type; + return 0; +} diff --git a/src/crypto/crypto_policy.c b/src/crypto/crypto_policy.c index 4581aa9c..6a8c2e46 100644 --- a/src/crypto/crypto_policy.c +++ b/src/crypto/crypto_policy.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -7,6 +7,7 @@ */ #include #include +#include #include #include #include @@ -44,23 +45,18 @@ #include "crypto/crypto_internal.h" #include "crypto/crypto_main.h" #include "crypto/crypto_policy.h" -#include "crypto/crypto_policy_cache.h" #include "crypto/crypto_sadb.h" #include "crypto/esp.h" #include "if_var.h" #include "ip_funcs.h" #include "ip_icmp.h" #include "json_writer.h" -#include "nh.h" -#include "npf/npf.h" -#include "npf/config/npf_attach_point.h" -#include "npf/config/npf_config.h" -#include "npf/config/npf_rule_group.h" -#include "npf/config/npf_ruleset_type.h" -#include "npf_shim.h" +#include "lcore_sched.h" +#include "nh_common.h" #include "pipeline/nodes/pl_nodes_common.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_node.h" +#include "rldb.h" #include "route.h" #include "route_flags.h" #include "route_v6.h" @@ -69,7 +65,9 @@ #include "urcu.h" #include "vplane_debug.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" +#include "flow_cache.h" +#include "xfrm_client.h" #include "protobuf.h" #include "protobuf_util.h" @@ -100,30 +98,26 @@ struct rte_timer; * path. */ struct pr_feat_attach { - union { - struct next_hop nh; - struct next_hop_v6 nh6; - } next; + struct next_hop nh; struct rcu_head pr_feat_rcu; }; +#define POLICY_F_PENDING_ADD 0x0001 +#define POLICY_F_PENDING_DEL 0x0002 +#define POLICY_F_PENDING_UPDATE 0x0004 /* * struct policy_rule * * This is the type for entries in the policy rule - * database. Each entry tracks a single NPF rule. - * A given NPF rule can be used by both an input and + * database. Each entry tracks a single rldb rule handle. + * A given rldb rule can be used by both an input and * output policy since their selectors can overlap. * - * The policy database is indexd by the NPF rule tag + * The policy database is indexd by the rule_index * value and also by a subset of the fields in the - * selector. This subset is the same subset used to - * build the text of the NPF rule corresponding to0 - * the selector. + * selector. */ struct policy_rule { - struct cds_lfht_node tag_ht_node; - uint32_t tag; int action; struct cds_lfht_node sel_ht_node; struct xfrm_selector sel; @@ -138,8 +132,9 @@ struct policy_rule { uint32_t policy_priority; uint32_t rule_index; bool vti_tunnel_policy; - bool pending_delete; + uint8_t flags; struct pr_feat_attach *feat_attach; + struct rldb_rule_handle *rh; }; struct policy_rule_key { @@ -147,59 +142,27 @@ struct policy_rule_key { const struct xfrm_mark *mark; }; -#define PR_CACHE_HASH_MIN 8 -#define PR_CACHE_HASH_MAX 2048 - -#define PR_CACHE_MAX_COUNT 4096 -#define PR_CACHE_MAX_MARKER (PR_CACHE_MAX_COUNT + 1) - -bool policy_cache_disabled; - -/* - * Lock free hash tables for policy rule database. - */ -struct cds_lfht *input_policy_rule_tag_ht; -struct cds_lfht *output_policy_rule_tag_ht; +bool flow_cache_disabled; uint32_t crypto_rekey_requests; #define POLICY_RULE_BUFSIZE (1024 * sizeof(char)) #define ATTACH_GROUP_BUFSIZE 32 -/* - * The NPF rules corresponding to policies have tag values - * appended to the rule text. These are used to associate - * a rule matched by an NPF query with the corresponding - * struct policy_rule (see below). - * - * The tag map is a segmented bitmap that is used to - * allocate a unique tag value to the NPF rule that is - * created for each policy. When a packet is matched - * in NPF, the tag value returned is used to find the - * corresponding struct policy_rule. - */ -#define PR_TAG_SIZE 13 -#define TM_SECTION_SIZE 512 -#define TM_SECTION_BITS (TM_SECTION_SIZE << 3) -#define TM_WORD_BITS LONGBITS -#define TM_SECTION_WORDS (TM_SECTION_BITS / TM_WORD_BITS) -#define TM_SECTION_COUNT ((1 << PR_TAG_SIZE) / TM_SECTION_BITS) -#define TM_SECTION_OF_BIT(b) ((b) / TM_SECTION_BITS) -#define TM_SECTION_BIT(b) ((b) % TM_SECTION_BITS) -#define TM_WORD_OF_BIT(b) ((b) / TM_WORD_BITS) -#define TM_BIT_WITHIN_WORD(b) (1 << ((b) - 1)) - -struct tagmap_section { - unsigned long bitmap_words[TM_SECTION_WORDS]; - unsigned long inuse_count; -}; +#define CRYPTO_FLOW_CACHE_MAX_COUNT 8192 -struct tagmap { - struct tagmap_section *sections[TM_SECTION_COUNT]; - int next_section; -}; +static struct flow_cache *flow_cache; -static struct tagmap policy_tagmap; +union crypto_ctx { + uint16_t context; + struct { + uint8_t in_rule_checked:1, + in_rule_drop:1, + no_rule_fwd:1, + PR_UNUSED:5; + char SPARE[7]; + }; +}; /* * A binding between a s2s policy and a feature attachment point. @@ -212,204 +175,49 @@ struct s2s_binding { uint ifindex; }; -static bool policy_rule_peer_is_set(const struct policy_rule *pr) -{ - uint16_t af = pr->output_peer_af; - return af == AF_INET || af == AF_INET6; -} - -static bool tagmap_expand(struct tagmap *tm) +static struct flow_cache_entry * +crypto_flow_cache_lookup(struct rte_mbuf *m, bool v4) { - if (tm->next_section >= TM_SECTION_COUNT) - return false; - - tm->sections[tm->next_section] = - calloc(1, sizeof(struct tagmap_section)); - if (!tm->sections[tm->next_section]) - return false; - - tm->next_section++; - - return true; -} + struct flow_cache_entry *entry; + int err; -static bool tagmap_init(struct tagmap *tm) -{ - if (tm) { - int i; + /* Any host generated packet don't make use of the flow cache table*/ + if (flow_cache_disabled) + return NULL; - for (i = 0; i < TM_SECTION_COUNT; i++) - tm->sections[i] = NULL; - tm->next_section = 0; + err = flow_cache_lookup(flow_cache, m, + v4 ? FLOW_CACHE_IPV4 : FLOW_CACHE_IPV6, + &entry); + if (err) + return NULL; - return tagmap_expand(tm); - } - return false; + return entry; } -static unsigned int tagmap_section_alloc(struct tagmap_section *tms) +static void crypto_flow_cache_add(struct flow_cache *flow_cache, + struct policy_rule *pr, struct rte_mbuf *m, + bool v4, bool seen_by_crypto, + int dir) { - unsigned int i; - int bit; + union crypto_ctx ctx = { .context = 0 }; /* SA Fix */ + enum flow_cache_ftype af = v4 ? FLOW_CACHE_IPV4 : FLOW_CACHE_IPV6; + struct flow_cache_entry *cache_entry; - if (!tms || (tms->inuse_count == TM_SECTION_BITS)) - return 0; + if (!flow_cache || flow_cache_disabled) + return; - for (i = 0; i < TM_SECTION_WORDS; i++) { - bit = __builtin_ffsl(~tms->bitmap_words[i]); - if (bit) { - tms->bitmap_words[i] |= 1L << (bit - 1); - tms->inuse_count++; - return bit + i * TM_WORD_BITS; + if (pr) { + if (!seen_by_crypto) { + ctx.in_rule_checked = 1; + ctx.in_rule_drop = (pr->action == XFRM_POLICY_BLOCK); + } + } else { + if (dir == XFRM_POLICY_OUT) { + ctx.in_rule_checked = 0; + ctx.in_rule_drop = 0; + ctx.no_rule_fwd = 1; } } - return 0; -} - -static bool tagmap_section_free(struct tagmap_section *tms, int bit) -{ - unsigned long mask; - unsigned int idx; - - if (!tms || (tms->inuse_count == 0)) - return false; - - idx = TM_WORD_OF_BIT(bit); - mask = 1L << (bit % TM_WORD_BITS); - - if (tms->bitmap_words[idx] & mask) { - tms->bitmap_words[idx] &= ~mask; - tms->inuse_count--; - return true; - } - - return false; -} - -/* PR cache management */ -static inline void -pr_cache_entry_free(struct rcu_head *head) -{ - free(caa_container_of(head, struct policy_cache_rule, - policy_cache_rcu)); -} - -static inline void -pr_cache_entry_destroy(struct policy_cache_rule *pr_cache) -{ - call_rcu(&pr_cache->policy_cache_rcu, pr_cache_entry_free); -} - -static inline int -pr_cache_match(struct cds_lfht_node *node, const void *key) -{ - const struct pr_cache_hash_key *pr_cache_key = key; - const struct policy_cache_rule *pr_cache = caa_container_of( - node, const struct policy_cache_rule, pr_node); - - if ((pr_cache->key.src != pr_cache_key->src) || - (pr_cache->key.dst != pr_cache_key->dst) || - (pr_cache->key.proto != pr_cache_key->proto) || - (pr_cache->key.vrfid != pr_cache_key->vrfid)) - return 0; - - return 1; -} - -_Static_assert(sizeof(struct pr_cache_hash_key) % 4 == 0, - "struct pr_cache_hash_key must be a multiple of 4 bytes"); - -static inline uint32_t -pr_cache_hash(const struct pr_cache_hash_key *h_key) -{ - return rte_jhash(h_key, sizeof(*h_key) / 4, POLICY_CACHE_HASH_SEED); -} - -static inline void -pr_cache_entry_remove(struct crypto_pkt_buffer *cpb, - struct cds_lfht *table, - struct policy_cache_rule *pr_cache) -{ - /* - * To avoid a race where an entry has been added but the count - * hasn't been bumped - */ - if (rte_atomic16_read(&cpb->pr_cache_count) == 0) - return; - - cds_lfht_del(table, &pr_cache->pr_node); - pr_cache_entry_destroy(pr_cache); - rte_atomic16_dec(&cpb->pr_cache_count); -} - -static int -pr_cache_insert(struct cds_lfht *tbl, struct policy_cache_rule *pr_cache, - const struct pr_cache_hash_key *h_key) -{ - struct cds_lfht_node *ret_node; - - cds_lfht_node_init(&pr_cache->pr_node); - uint32_t hash = pr_cache_hash(h_key); - - ret_node = cds_lfht_add_unique(tbl, hash, pr_cache_match, h_key, - &pr_cache->pr_node); - - return (ret_node != &pr_cache->pr_node) ? -1 : 0; -} - -static inline void -pr_cache_parse_hdr4(struct rte_mbuf *m, struct pr_cache_hash_key *h) -{ - const struct iphdr *ip = iphdr(m); - - h->dst = ip->daddr; - h->src = ip->saddr; - h->proto = ip->protocol; - h->vrfid = pktmbuf_get_vrf(m); -} - -static struct policy_cache_rule * -pr_cache_lookup(struct rte_mbuf *m, bool v4) -{ - struct cds_lfht_iter iter; - struct cds_lfht_node *node; - struct crypto_pkt_buffer *cpb = RTE_PER_LCORE(crypto_pkt_buffer); - struct pr_cache_hash_key h_key; - struct cds_lfht *table; - - /* Any host generated or v6 don't make use of the PR cache table*/ - if (policy_cache_disabled || !cpb || !v4) - return NULL; - - table = rcu_dereference(cpb->pr_cache_tbl); - if (!table) - return NULL; - pr_cache_parse_hdr4(m, &h_key); - cds_lfht_lookup(table, pr_cache_hash(&h_key), - pr_cache_match, &h_key, - &iter); - - node = cds_lfht_iter_get_node(&iter); - if (node) - return caa_container_of(node, struct policy_cache_rule, - pr_node); - else - return NULL; -} - -static int -pr_cache_add(struct crypto_pkt_buffer *cpb, struct policy_rule *pr, - struct rte_mbuf *m, bool v4, bool seen_by_crypto, - int dir) -{ - struct policy_cache_rule *pr_cache; - int error; - struct pr_cache_hash_key h_key; - struct cds_lfht *table = rcu_dereference(cpb->pr_cache_tbl); - - /* Any v6 don't make use of the PR cache table */ - if (!v4 || !table) - return -1; /* * In case this is an input policy match, check to see if the @@ -418,175 +226,51 @@ pr_cache_add(struct crypto_pkt_buffer *cpb, struct policy_rule *pr, * for unencrypted packets */ if (dir == XFRM_POLICY_IN) { - pr_cache = pr_cache_lookup(m, v4); - if (pr_cache) { - pr_cache->in_rule_checked = 1; - pr_cache->in_rule_drop = - (pr->action == XFRM_POLICY_BLOCK); - return 0; + cache_entry = crypto_flow_cache_lookup(m, v4); + if (cache_entry) { + flow_cache_entry_set_info(cache_entry, pr, + ctx.context); + return; } } - pr_cache_parse_hdr4(m, &h_key); - pr_cache = malloc_aligned(sizeof(struct policy_cache_rule)); - if (unlikely(pr_cache == NULL)) - return -1; - - pr_cache->key = h_key; - pr_cache->pr = pr; - - error = pr_cache_insert(table, pr_cache, &h_key); - - if (unlikely(error != 0)) { - free(pr_cache); - return -1; - } - if (!seen_by_crypto) { - pr_cache->in_rule_checked = 1; - pr_cache->in_rule_drop = (pr->action == XFRM_POLICY_BLOCK); - } - rte_atomic16_inc(&cpb->pr_cache_count); - return 0; -} - -struct cds_lfht * -pr_cache_init(void) -{ - struct cds_lfht *pr_cache_tbl; - - pr_cache_tbl = cds_lfht_new(PR_CACHE_HASH_MIN, - PR_CACHE_HASH_MIN, - PR_CACHE_HASH_MAX, - CDS_LFHT_AUTO_RESIZE, - NULL); - if (pr_cache_tbl == NULL) - POLICY_ERR("Failed to allocate PR cache table\n"); - - return pr_cache_tbl; -} - -static inline void -pr_cache_empty_table(struct crypto_pkt_buffer *cpb) -{ - struct policy_cache_rule *pr_cache; - struct cds_lfht_iter iter; - struct cds_lfht *table; - table = rcu_dereference(cpb->pr_cache_tbl); - if (!table) - return; - - cds_lfht_for_each_entry(table, &iter, pr_cache, pr_node) - pr_cache_entry_remove(cpb, table, pr_cache); + IPSEC_CNT_INC(FLOW_CACHE_MISS); + if (flow_cache_add(flow_cache, pr, ctx.context, m, af) != 0) + IPSEC_CNT_INC(FLOW_CACHE_ADD_FAIL); + else + IPSEC_CNT_INC(FLOW_CACHE_ADD); } -/* - * This may be called in an rcu_callback or in the master thread. In the - * rcu_callback it must be in clear_only mode. - */ -static void -pr_cache_invalidate(bool disable, bool clear_only) +int crypto_flow_cache_init_lcore(unsigned int lcore_id) { - unsigned int lcore_id; - - RTE_LCORE_FOREACH(lcore_id) { - struct crypto_pkt_buffer *cpb = - rcu_dereference(cpbdb[lcore_id]); + int err; - if (unlikely(!cpb)) - continue; - - if (cpb->pr_cache_tbl) { - pr_cache_empty_table(cpb); - if (disable && !clear_only) { - if (cds_lfht_destroy(cpb->pr_cache_tbl, - NULL)) - POLICY_ERR( - "Cache tbl destroy failed\n"); - - rcu_assign_pointer( - cpb->pr_cache_tbl, NULL); - } - } else { - if (!disable && !clear_only) - cpb->pr_cache_tbl = pr_cache_init(); - } - } - - POLICY_INFO("Crypto policy cache %s\n", - disable && !clear_only ? "disabled" : "invalidated"); + err = flow_cache_init_lcore(flow_cache, lcore_id); + return err; } -void -pr_cache_timer_handler(struct rte_timer *timer __rte_unused, - void *arg __rte_unused) +int crypto_flow_cache_teardown_lcore(unsigned int lcore_id) { - unsigned int lcore_id; + int err; - RTE_LCORE_FOREACH(lcore_id) { - struct crypto_pkt_buffer *cpb = - rcu_dereference(cpbdb[lcore_id]); - - if (unlikely(!cpb)) - continue; - - if (cpb->pr_cache_tbl && - (rte_atomic16_read(&cpb->pr_cache_count) > - PR_CACHE_MAX_COUNT)) { - POLICY_INFO("Clearing the cache on core %d: Aged out\n", - lcore_id); - pr_cache_empty_table(cpb); - } - } + err = flow_cache_teardown_lcore(flow_cache, lcore_id); + return err; } -static unsigned int allocate_tag(struct tagmap *tm) +int crypto_flow_cache_init(void) { - unsigned int bit; - int i; - - if (!tm) - return 0; - - /* - * Attempt to allocate a tag in an - * existing section of the tagmap. - */ - for (i = 0; i < tm->next_section; i++) { - bit = tagmap_section_alloc(tm->sections[i]); - if (bit) - return bit + i * TM_SECTION_BITS; - } - - /* - * All the existing sections of the - * tagmap are full so add a new one. - */ - if (!tagmap_expand(tm)) - return 0; - - bit = tagmap_section_alloc(tm->sections[i]); - if (bit) - return bit + i * TM_SECTION_BITS; + flow_cache = flow_cache_init(CRYPTO_FLOW_CACHE_MAX_COUNT); + if (!flow_cache) + return -ENOMEM; return 0; } -static bool free_tag(struct tagmap *tm, unsigned int tag) +void +crypto_flow_cache_timer_handler(struct rte_timer *tmr __rte_unused, + void *arg __rte_unused) { - unsigned int bit = tag - 1; - int section_idx; - - if (!tm || tag == 0) - return false; - - section_idx = TM_SECTION_OF_BIT(bit); - if ((section_idx >= TM_SECTION_COUNT) || - (section_idx >= tm->next_section) || - (!tm->sections[section_idx])) - return false; - - return tagmap_section_free(tm->sections[section_idx], - TM_SECTION_BIT(bit)); + flow_cache_age(flow_cache); } static unsigned long policy_rule_sel_hash(const struct policy_rule_key *key) @@ -634,7 +318,7 @@ static int policy_rule_sel_match(struct cds_lfht_node *node, const void *key) /* * Match if and only if the all the fields used to build the - * NPF rule are the same (see build_policy_npf_rule()). + * rldb rule are the same (see policy_prepare_rldb_rule()). * */ return (policy_rule_sel_eq(&pr->sel, search_key->sel) && @@ -642,7 +326,7 @@ static int policy_rule_sel_match(struct cds_lfht_node *node, const void *key) (!search_key->mark && (pr->mark.v == 0)))); } -static bool policy_rule_add_to_selector_ht(struct policy_rule *pr) +static int policy_rule_add_to_selector_ht(struct policy_rule *pr) { struct cds_lfht_node *ret_node; struct cds_lfht *hash_table; @@ -651,7 +335,7 @@ static bool policy_rule_add_to_selector_ht(struct policy_rule *pr) vrf_ctx = crypto_vrf_get(pr->vrfid); if (!vrf_ctx) - return false; + return -EINVAL; switch (pr->dir) { case XFRM_POLICY_IN: @@ -663,7 +347,7 @@ static bool policy_rule_add_to_selector_ht(struct policy_rule *pr) default: POLICY_ERR( "Failed to add policy rule to hash table: Bad direction\n"); - return false; + return -EINVAL; } key.sel = &pr->sel; @@ -675,20 +359,20 @@ static bool policy_rule_add_to_selector_ht(struct policy_rule *pr) &key, &pr->sel_ht_node); if (ret_node != &pr->sel_ht_node) { POLICY_ERR("Failed to add rule to selector hash table\n"); - return false; + return -EEXIST; } - return true; + return 0; } -static void policy_rule_remove_from_selector_ht(struct policy_rule *pr) +static int policy_rule_remove_from_selector_ht(struct policy_rule *pr) { struct cds_lfht *hash_table; struct crypto_vrf_ctx *vrf_ctx; vrf_ctx = crypto_vrf_find(pr->vrfid); if (!vrf_ctx) - return; + return -EINVAL; switch (pr->dir) { case XFRM_POLICY_IN: @@ -698,19 +382,19 @@ static void policy_rule_remove_from_selector_ht(struct policy_rule *pr) hash_table = vrf_ctx->output_policy_rule_sel_ht; break; default: - POLICY_ERR( - "Failed to remove policy rule from hash table: Bad direction\n"); - return; + return -EINVAL; } cds_lfht_del(hash_table, &pr->sel_ht_node); + return 0; } -static struct policy_rule* +static int policy_rule_find_by_selector(vrfid_t vrfid, const struct xfrm_selector *sel, const struct xfrm_mark *mark, - int policy_direction) + int policy_direction, + struct policy_rule **pr) { struct cds_lfht *hash_table; struct policy_rule_key key; @@ -720,7 +404,7 @@ policy_rule_find_by_selector(vrfid_t vrfid, vrf_ctx = crypto_vrf_find(vrfid); if (!vrf_ctx) - return NULL; + return -EINVAL; switch (policy_direction) { case XFRM_POLICY_IN: @@ -732,7 +416,7 @@ policy_rule_find_by_selector(vrfid_t vrfid, default: POLICY_ERR( "Failed to find policy rule in hash table: Bad direction\n"); - return NULL; + return -EINVAL; } key.sel = sel; @@ -743,97 +427,161 @@ policy_rule_find_by_selector(vrfid_t vrfid, policy_rule_sel_match, &key, &iter); node = cds_lfht_iter_get_node(&iter); + if (node) { + *pr = caa_container_of(node, struct policy_rule, sel_ht_node); + return 0; + } + + *pr = NULL; - return node ? caa_container_of(node, struct policy_rule, sel_ht_node) - : NULL; + return -ESRCH; } -static int policy_rule_tag_match(struct cds_lfht_node *node, const void *tag_p) +static int policy_prepare_rldb_rule(struct policy_rule *pr, + struct rldb_rule_spec *rule) { - uint32_t search_tag = *(const uint32_t *)tag_p; - const struct policy_rule *pr; + rule->rldb_user_data = (uintptr_t)pr; + rule->rldb_priority = pr->policy_priority; - pr = caa_container_of(node, const struct policy_rule, tag_ht_node); + if (pr->sel.proto) { + rule->rldb_flags |= NPFRL_FLAG_PROTO; + rule->rldb_proto.npfrl_proto = pr->sel.proto; + } - return (pr->tag == search_tag); -} + if (pr->sel.family == AF_INET) { + struct rldb_v4_prefix *pfx; + rule->rldb_flags |= NPFRL_FLAG_V4_PFX; + + /* src */ + if (pr->sel.prefixlen_s) { + rule->rldb_flags |= NPFRL_FLAG_SRC_PFX; + pfx = &rule->rldb_src_addr.v4_pfx; + memcpy(pfx->npfrl_bytes, &pr->sel.saddr.a4, + sizeof(pfx->npfrl_bytes)); + pfx->npfrl_plen = pr->sel.prefixlen_s; + } -static bool policy_rule_add_to_tag_ht(struct policy_rule *pr) -{ - struct cds_lfht_node *ret_node; - struct cds_lfht *hash_table; + /* dst */ + if (pr->sel.prefixlen_d) { + rule->rldb_flags |= NPFRL_FLAG_DST_PFX; + pfx = &rule->rldb_dst_addr.v4_pfx; + memcpy(pfx->npfrl_bytes, &pr->sel.daddr.a4, + sizeof(pfx->npfrl_bytes)); + pfx->npfrl_plen = pr->sel.prefixlen_d; + } + } else if (pr->sel.family == AF_INET6) { + struct rldb_v6_prefix *pfx; + rule->rldb_flags |= NPFRL_FLAG_V6_PFX; + + /* src */ + if (pr->sel.prefixlen_s) { + rule->rldb_flags |= NPFRL_FLAG_SRC_PFX; + pfx = &rule->rldb_src_addr.v6_pfx; + memcpy(pfx->npfrl_bytes, &pr->sel.saddr.a6, + sizeof(pfx->npfrl_bytes)); + pfx->npfrl_plen = pr->sel.prefixlen_s; + } - switch (pr->dir) { - case XFRM_POLICY_IN: - hash_table = input_policy_rule_tag_ht; - break; - case XFRM_POLICY_OUT: - hash_table = output_policy_rule_tag_ht; - break; - default: + /* dst */ + if (pr->sel.prefixlen_d) { + rule->rldb_flags |= NPFRL_FLAG_DST_PFX; + pfx = &rule->rldb_dst_addr.v6_pfx; + memcpy(pfx->npfrl_bytes, &pr->sel.daddr.a6, + sizeof(pfx->npfrl_bytes)); + pfx->npfrl_plen = pr->sel.prefixlen_d; + } + } else { POLICY_ERR( - "Failed to add policy rule to hash table: Bad direction\n"); - return false; + "Failed to add policy rule: AF not supported\n"); + return -EAFNOSUPPORT; } - ret_node = cds_lfht_add_unique(hash_table, pr->tag, - policy_rule_tag_match, - &pr->tag, - &pr->tag_ht_node); + /* + * Port ranges are not yet supported. + * This mimics the pre-RLDB behavior. + */ + if (pr->sel.sport) { + rule->rldb_flags |= NPFRL_FLAG_SRC_PORT_RANGE; + rule->rldb_src_port_range.npfrl_loport = pr->sel.sport; + rule->rldb_src_port_range.npfrl_hiport = pr->sel.sport; + } - if (ret_node != &pr->tag_ht_node) { - POLICY_ERR("Failed to add rule to tag hash table\n"); - return false; + if (pr->sel.dport) { + rule->rldb_flags |= NPFRL_FLAG_DST_PORT_RANGE; + rule->rldb_dst_port_range.npfrl_loport = pr->sel.dport; + rule->rldb_dst_port_range.npfrl_hiport = pr->sel.dport; } - return true; + + return 0; } -static void policy_rule_remove_from_tag_ht(struct policy_rule *pr) +static +struct rldb_db_handle *policy_rule_get_rldb(struct crypto_vrf_ctx *vrf_ctx, + struct policy_rule *pr) { - struct cds_lfht *hash_table; + struct rldb_db_handle *db = NULL; switch (pr->dir) { case XFRM_POLICY_IN: - hash_table = input_policy_rule_tag_ht; + if (pr->sel.family == AF_INET) + db = vrf_ctx->input_policy_v4_rldb; + else + db = vrf_ctx->input_policy_v6_rldb; break; case XFRM_POLICY_OUT: - hash_table = output_policy_rule_tag_ht; + if (pr->sel.family == AF_INET) + db = vrf_ctx->output_policy_v4_rldb; + else + db = vrf_ctx->output_policy_v6_rldb; break; default: POLICY_ERR( - "Failed to remove policy rule from hash table: Bad direction\n"); - return; + "Failed to find rule database: Bad direction\n"); + break; } - cds_lfht_del(hash_table, &pr->tag_ht_node); + return db; } -static struct policy_rule *policy_rule_find_by_tag(uint32_t tag, - int policy_direction) +static int policy_rule_add_to_rldb(struct crypto_vrf_ctx *vrf_ctx, + struct policy_rule *pr) { - struct cds_lfht *hash_table; - struct cds_lfht_node *node; - struct cds_lfht_iter iter; + int rc; + struct rldb_db_handle *db = NULL; + struct rldb_rule_spec rule = { 0 }; - switch (policy_direction) { - case XFRM_POLICY_IN: - hash_table = input_policy_rule_tag_ht; - break; - case XFRM_POLICY_OUT: - hash_table = output_policy_rule_tag_ht; - break; - default: - POLICY_ERR( - "Failed to find policy rule to hash table: Bad direction\n"); - return false; - } + /* + * Packets are routed into VTI tunnels, so we + * don't need to create RLDB rules for them. + */ + if (pr->vti_tunnel_policy) + return 0; - cds_lfht_lookup(hash_table, tag, policy_rule_tag_match, &tag, &iter); + rc = policy_prepare_rldb_rule(pr, &rule); + if (rc < 0) + return rc; - node = cds_lfht_iter_get_node(&iter); + db = policy_rule_get_rldb(vrf_ctx, pr); + if (!db) + return -ENOENT; + + rc = rldb_add_rule(db, pr->rule_index, &rule, &pr->rh); + if (rc < 0) { + POLICY_ERR("Failed to add policy rule to rule database\n"); + return rc; + } + + if (pr->sel.family == AF_INET) { + ++vrf_ctx->crypto_total_ipv4_policies; + POLICY_DEBUG("Active IPv4 policies: %d\n", + vrf_ctx->crypto_total_ipv4_policies); + } else { + ++vrf_ctx->crypto_total_ipv6_policies; + POLICY_DEBUG("Active IPv6 policies: %d\n", + vrf_ctx->crypto_total_ipv6_policies); + } - return node ? caa_container_of(node, struct policy_rule, tag_ht_node) - : NULL; + return 0; } static void @@ -843,7 +591,8 @@ policy_rule_set_peer_info(struct policy_rule *pr, { struct ifnet *ifp; - ifp = pr->feat_attach ? nh4_get_ifp(&pr->feat_attach->next.nh) : NULL; + ifp = pr->feat_attach ? + dp_nh_get_ifp(&pr->feat_attach->nh) : NULL; pr->reqid = tmpl->reqid; pr->output_peer_af = tmpl->family; memcpy(&pr->output_peer, dst, sizeof(pr->output_peer)); @@ -853,9 +602,7 @@ policy_rule_set_peer_info(struct policy_rule *pr, vti_reqid_set(&pr->output_peer, pr->output_peer_af, pr->mark.v, pr->reqid); else - crypto_sadb_peer_overhead_subscribe(&pr->output_peer, - pr->output_peer_af, - pr->reqid, &pr->overhead, + crypto_sadb_tunl_overhead_subscribe(pr->reqid, &pr->overhead, pr->vrfid); } @@ -865,7 +612,7 @@ policy_rule_set_mark(struct policy_rule *pr, const struct xfrm_mark *mark) if (mark) { /* * This policy is for a VTI tunnel so we inhibit - * the creation of an NPF rule as these are only + * the creation of an rldb rule as these are only * required for site-to site tunnels. */ pr->vti_tunnel_policy = true; @@ -878,41 +625,19 @@ policy_rule_set_mark(struct policy_rule *pr, const struct xfrm_mark *mark) } } -static struct policy_rule * +static int policy_rule_create(const struct xfrm_userpolicy_info *usr_policy, const struct xfrm_user_tmpl *tmpl, const struct xfrm_mark *mark, const xfrm_address_t *dst, - vrfid_t vrfid) + vrfid_t vrfid, struct policy_rule **pr_ptr) { struct policy_rule *pr; - /* - * The policy priority is used as the top 16 bits of the NPF - * rule index. Since NPF uses a signed int for the index, we - * want to avoid setting the top bit. The algorithm currently - * used by strongSwan to calculate policy priority always gives - * a result that is less than 2^14, but make sure we catch any - * future changes. - */ - if (usr_policy->priority > (uint32_t) - ((1 << (32 - PR_TAG_SIZE)) - 1)) { - POLICY_ERR( - "Failed to create policy rule: priority too high\n"); - return NULL; - } - pr = zmalloc_aligned(sizeof(*pr)); if (!pr) { POLICY_ERR("Policy rule allocation failed\n"); - return NULL; - } - - pr->tag = allocate_tag(&policy_tagmap); - if (!pr->tag) { - POLICY_ERR("Policy rule tag allocation failed\n"); - free(pr); - return NULL; + return -ENOMEM; } /* @@ -924,8 +649,7 @@ policy_rule_create(const struct xfrm_userpolicy_info *usr_policy, XFRM_POLICY_BLOCK : usr_policy->action); pr->dir = usr_policy->dir; pr->policy_priority = usr_policy->priority; - /* Policy priority is not unique, so rule index must include tag. */ - pr->rule_index = (usr_policy->priority << PR_TAG_SIZE) + pr->tag; + pr->rule_index = usr_policy->index; memcpy(&pr->sel, &usr_policy->sel, sizeof(pr->sel)); policy_rule_set_mark(pr, mark); @@ -940,18 +664,18 @@ policy_rule_create(const struct xfrm_userpolicy_info *usr_policy, POLICY_ERR( "Failed to create policy rule: " "Mismatch of tmpl and dst\n"); - free_tag(&policy_tagmap, pr->tag); + free(pr); - return NULL; + return -EINVAL; } if (tmpl && dst) policy_rule_set_peer_info(pr, tmpl, dst); } - cds_lfht_node_init(&pr->tag_ht_node); cds_lfht_node_init(&pr->sel_ht_node); - pr->pending_delete = false; - return pr; + *pr_ptr = pr; + + return 0; } static void policy_feat_attach_free(struct rcu_head *head) @@ -985,12 +709,6 @@ static void policy_rule_rcu_free(struct rcu_head *head) static void policy_rule_rcu_invalidate(struct rcu_head *head) { - /* - * The callback to invalidate a policy rule. At this stage - * we need to make sure that the policy rule is no longer in - * any of the pr caches. - */ - pr_cache_invalidate(true, true); /* * Now the PR is gone from the cache, but other threads * may still hold references to it, so wait for another @@ -1007,298 +725,97 @@ static void policy_rule_destroy(struct policy_rule *pr) vti_reqid_clear(&pr->output_peer, pr->output_peer_af, pr->mark.v); else - crypto_sadb_peer_overhead_unsubscribe( - &pr->output_peer, - pr->output_peer_af, + crypto_sadb_tunl_overhead_unsubscribe( + pr->reqid, &pr->overhead, pr->vrfid); } - if (!free_tag(&policy_tagmap, pr->tag)) - POLICY_ERR("Failed to free policy tag %d\n", pr->tag); - policy_feat_attach_destroy(pr); - pr->pending_delete = true; - call_rcu(&pr->policy_rule_rcu, policy_rule_rcu_invalidate); -} - -static bool policy_rule_add_to_hash_tables(struct policy_rule *pr) -{ - if (!policy_rule_add_to_selector_ht(pr)) - return false; - if (policy_rule_add_to_tag_ht(pr)) - return true; - policy_rule_remove_from_selector_ht(pr); - return false; -} - -static void policy_rule_remove_from_hash_tables(struct policy_rule *pr) -{ - policy_rule_remove_from_selector_ht(pr); - policy_rule_remove_from_tag_ht(pr); -} - -static bool policy_rule_build_npf_str(const struct policy_rule *pr, - char *buf, size_t len) -{ - const struct xfrm_selector *sel = &pr->sel; - char saddr_str[INET6_ADDRSTRLEN+1]; - char daddr_str[INET6_ADDRSTRLEN+1]; - struct in_addr ia_src, ia_dst; - struct in6_addr i6a; - char proto_str[32]; - char sport_str[32]; - char dport_str[32]; - char tag_str[23]; - - int res; - - if (sel->family == AF_INET6) { - memcpy(&i6a.s6_addr32, &sel->saddr.a6, sizeof(i6a.s6_addr32)); - if (!inet_ntop(AF_INET6, &i6a, saddr_str, INET6_ADDRSTRLEN)) { - POLICY_ERR("Crypto policy src get fail-%d\n", errno); - return false; - } - memcpy(&i6a.s6_addr32, &sel->daddr.a6, sizeof(i6a.s6_addr32)); - if (!inet_ntop(AF_INET6, &i6a, daddr_str, INET6_ADDRSTRLEN)) { - POLICY_ERR("Crypto policy dst get fail-%d\n", errno); - return false; - } - } else { - ia_src.s_addr = sel->saddr.a4; - if (!inet_ntop(AF_INET, &ia_src, saddr_str, INET_ADDRSTRLEN)) { - POLICY_ERR("Crypto policy src get fail-%d\n", errno); - return false; - } - ia_dst.s_addr = sel->daddr.a4; - if (!inet_ntop(AF_INET, &ia_dst, daddr_str, INET_ADDRSTRLEN)) { - POLICY_ERR("Crypto policy dst get fail- %d\n", errno); - return false; - } - } - - if (sel->proto > 0) - snprintf(proto_str, sizeof(proto_str) - 1, "proto=%d ", - sel->proto); - else - proto_str[0] = '\0'; + pr->flags |= POLICY_F_PENDING_DEL; - if (sel->sport > 0) - snprintf(sport_str, sizeof(sport_str) - 1, "src-port=%d ", - ntohs(sel->sport)); - else - sport_str[0] = '\0'; - - if (sel->dport > 0) - snprintf(dport_str, sizeof(dport_str) - 1, "dst-port=%d ", - ntohs(sel->dport)); - else - dport_str[0] = '\0'; - - char const *npf_action; - - /* NB: While non-passthrough IN policies arrive as ALLOW, what they - * mean in DP terms is allow encrypted traffic and drop any - * packets that arrive in the clear matching the policies. - * DP doesn't check IN policies for encrypted traffic and - * therefore these are simply marked as DROP and only checked - * for packets arriving in the clear. + /* + * At this stage we need to make sure that the policy rule is no longer + * in the flow cache. */ - if ((pr->action == XFRM_POLICY_ALLOW) && - ((pr->dir == XFRM_POLICY_OUT) || - !policy_rule_peer_is_set(pr))) { - npf_action = "action=accept"; - snprintf(tag_str, sizeof(tag_str) - 1, "handle=tag(%u)", - pr->tag); - } else { - npf_action = "action=drop"; - snprintf(tag_str, sizeof(tag_str) - 1, "handle=tag(%u)", - pr->tag); - } - - res = snprintf(buf, len-1, - "%s %s src-addr=%s/%d %s dst-addr=%s/%d %s %s", - npf_action, proto_str, - saddr_str, sel->prefixlen_s, sport_str, - daddr_str, sel->prefixlen_d, dport_str, tag_str); + flow_cache_invalidate(flow_cache, flow_cache_disabled, true); - if ((res < 0) || (res > (int)(len-2))) { - POLICY_ERR("Failed to format NPF rule from XFRM selector\n"); - return false; - } - return true; + call_rcu(&pr->policy_rule_rcu, policy_rule_rcu_invalidate); } -static void group_name_by_vrf(char *buf, int buflen, int dir, vrfid_t vrf) +static int policy_rule_add_to_hash_tables(struct policy_rule *pr) { - if (dir == XFRM_POLICY_IN) - snprintf(buf, buflen, "in-%d", vrf); - else - snprintf(buf, buflen, "out-%d", vrf); + return policy_rule_add_to_selector_ht(pr); } -static bool policy_rule_update_npf(struct policy_rule *pr) +static int policy_rule_remove_from_hash_tables(struct policy_rule *pr) { - char buffer[POLICY_RULE_BUFSIZE]; - char group_name[ATTACH_GROUP_BUFSIZE]; + int rc; - /* - * Packets are routed into VTI tunnels, - * so we don't have an NPF rule to update. - */ - if (pr->vti_tunnel_policy) - return true; - - group_name_by_vrf(group_name, sizeof(group_name), pr->dir, - vrf_get_external_id(pr->vrfid)); - - if (!policy_rule_build_npf_str(pr, buffer, POLICY_RULE_BUFSIZE)) - return false; - - /* NPF returns 0 on success - this replaces any existing rule */ - int rule_ret = npf_cfg_rule_add(NPF_RULE_CLASS_IPSEC, group_name, - pr->rule_index, buffer); - - if (rule_ret != 0) { - POLICY_ERR("Failed to update rule for %s crypto NPF rule tag " - "%d: %s - errno %d\n", - pr->dir == XFRM_POLICY_IN ? "input" : "output", - pr->tag, buffer, -rule_ret); - return false; - } - - POLICY_DEBUG("Updated %s crypto NPF rule index %d: %s\n", - pr->dir == XFRM_POLICY_IN ? "input" : "output", - pr->rule_index, buffer); - - return true; + rc = policy_rule_remove_from_selector_ht(pr); + if (rc < 0) + POLICY_ERR( + "Failed to remove policy rule from hash table:" + "rc %d\n", rc); + return rc; } -#define POL_VRF_STRLEN 16 -static bool policy_rule_add_to_npf(struct policy_rule *pr) +static int policy_rule_update_rldb(struct policy_rule *pr) { - char buffer[POLICY_RULE_BUFSIZE]; - char vrf_buf[POL_VRF_STRLEN]; - char attach_buf[ATTACH_GROUP_BUFSIZE]; - char group_name[ATTACH_GROUP_BUFSIZE]; + int rc; + struct rldb_db_handle *db; + struct rldb_rule_spec rule = { 0 }; struct crypto_vrf_ctx *vrf_ctx; - vrf_ctx = crypto_vrf_get(pr->vrfid); - if (!vrf_ctx) - return false; /* - * Packets are routed into VTI tunnels, so we - * don't need to create NPF rules for them. + * Packets are routed into VTI tunnels, + * so we don't have an rldb rule to update. */ if (pr->vti_tunnel_policy) - return true; + return 0; - if (!policy_rule_build_npf_str(pr, buffer, POLICY_RULE_BUFSIZE)) - return false; + vrf_ctx = crypto_vrf_get(pr->vrfid); + if (!vrf_ctx) + return -EINVAL; - snprintf(vrf_buf, sizeof(vrf_buf), "%d", - vrf_get_external_id(pr->vrfid)); - - bool attach_group = - (vrf_ctx->crypto_live_ipv4_policies + - vrf_ctx->crypto_live_ipv6_policies == 0); - - if (attach_group) { - group_name_by_vrf(attach_buf, sizeof(attach_buf), - XFRM_POLICY_IN, - vrf_get_external_id(pr->vrfid)); - - int attach_ret = - npf_cfg_attach_dir_group( - NPF_ATTACH_TYPE_VRF, vrf_buf, - NPF_RS_IPSEC, NPF_RULE_CLASS_IPSEC, attach_buf, - NPF_RS_FLAG_DIR_IN); - if (attach_ret != 0) { - POLICY_ERR("Failed to attach input group for %s " - "crypto NPF rule tag %d: %s - errno %d\n", - pr->dir == XFRM_POLICY_IN ? - "input" : "output", - pr->tag, buffer, -attach_ret); - return false; - } + db = policy_rule_get_rldb(vrf_ctx, pr); + if (!db) + return -ENOENT; - group_name_by_vrf(attach_buf, sizeof(attach_buf), - XFRM_POLICY_OUT, - vrf_get_external_id(pr->vrfid)); - attach_ret = - npf_cfg_attach_dir_group( - NPF_ATTACH_TYPE_VRF, vrf_buf, - NPF_RS_IPSEC, NPF_RULE_CLASS_IPSEC, attach_buf, - NPF_RS_FLAG_DIR_OUT); - if (attach_ret != 0) { - POLICY_ERR("Failed to attach output group for %s " - "crypto NPF rule tag %d: %s - errno %d\n", - pr->dir == XFRM_POLICY_IN ? - "input" : "output", - pr->tag, buffer, -attach_ret); - goto failed_attach_group; - } - POLICY_INFO("Attached NPF groups in VRF %s\n", vrf_buf); + rc = rldb_del_rule(db, pr->rh); + if (rc < 0) { + POLICY_ERR("Failed to update rule %u: %d\n", + pr->rule_index, -rc); + return rc; } - group_name_by_vrf(group_name, sizeof(group_name), pr->dir, - vrf_get_external_id(pr->vrfid)); - - int rule_ret = npf_cfg_rule_add(NPF_RULE_CLASS_IPSEC, group_name, - pr->rule_index, buffer); - - if (rule_ret != 0) { - POLICY_ERR("Failed to add rule for %s crypto NPF rule tag %d: " - "%s - errno %d\n", - pr->dir == XFRM_POLICY_IN ? "input" : "output", - pr->tag, buffer, -rule_ret); - if (attach_group) - goto failed_add_rule; + rc = policy_prepare_rldb_rule(pr, &rule); + if (rc < 0) return false; - } - POLICY_DEBUG("Added %s crypto NPF rule index %d: %s\n", - pr->dir == XFRM_POLICY_IN ? "input" : "output", - pr->rule_index, buffer); - - if (pr->sel.family == AF_INET) { - if (++vrf_ctx->crypto_live_ipv4_policies == 1) - pl_node_add_feature_by_inst(&ipv4_ipsec_out_feat, - get_vrf(pr->vrfid)); - POLICY_DEBUG("Active IPv4 policies: %d\n", - vrf_ctx->crypto_live_ipv4_policies); - } else { - if (++vrf_ctx->crypto_live_ipv6_policies == 1) - pl_node_add_feature_by_inst(&ipv6_ipsec_out_feat, - get_vrf(pr->vrfid)); - POLICY_DEBUG("Active IPv6 policies: %d\n", - vrf_ctx->crypto_live_ipv6_policies); + rc = rldb_add_rule(db, pr->rule_index, &rule, &pr->rh); + if (rc < 0) { + POLICY_ERR("Failed to update rule %u: %d\n", + pr->rule_index, -rc); + return rc; } - return true; - -failed_add_rule: - group_name_by_vrf(attach_buf, sizeof(attach_buf), XFRM_POLICY_OUT, - vrf_get_external_id(pr->vrfid)); - npf_cfg_detach_group(NPF_ATTACH_TYPE_VRF, vrf_buf, - NPF_RS_IPSEC, NPF_RULE_CLASS_IPSEC, attach_buf); -failed_attach_group: - group_name_by_vrf(attach_buf, sizeof(attach_buf), XFRM_POLICY_IN, - vrf_get_external_id(pr->vrfid)); - npf_cfg_detach_group(NPF_ATTACH_TYPE_VRF, vrf_buf, - NPF_RS_IPSEC, NPF_RULE_CLASS_IPSEC, attach_buf); - return false; + return 0; } -static bool all_other_policies_can_be_cached(const struct policy_rule *pr) +static bool all_other_policies_can_be_cached(struct crypto_vrf_ctx *vrf_ctx, + const struct policy_rule *pr) { + bool result = true; + const struct policy_rule *check_pr; struct cds_lfht_iter iter; - bool result = true; + struct cds_lfht *ht; + + ht = vrf_ctx->output_policy_rule_sel_ht; - cds_lfht_for_each_entry(output_policy_rule_tag_ht, - &iter, check_pr, tag_ht_node) { + cds_lfht_for_each_entry(ht, &iter, check_pr, sel_ht_node) { if (check_pr == pr) continue; if ((check_pr->sel.sport > 0) || (check_pr->sel.dport > 0)) { @@ -1310,75 +827,41 @@ static bool all_other_policies_can_be_cached(const struct policy_rule *pr) return result; } -static void policy_rule_remove_from_npf(struct policy_rule *pr, +static int policy_rule_remove_from_rldb(struct policy_rule *pr, + struct crypto_vrf_ctx *vrf_ctx, bool vti_tunnel_policy, - uint32_t rule_index) + struct rldb_rule_handle *rh) { - char vrf_buf[POL_VRF_STRLEN]; - struct crypto_vrf_ctx *vrf_ctx; - char attach_buf[ATTACH_GROUP_BUFSIZE]; - char group_name[ATTACH_GROUP_BUFSIZE]; + int rc; + struct rldb_db_handle *db; - /* We don't create NPF rules for VTI tunnel policies */ + /* We don't create rldb rules for VTI tunnel policies */ if (vti_tunnel_policy) - return; + return 0; - vrf_ctx = crypto_vrf_find(pr->vrfid); - if (!vrf_ctx) - return; + db = policy_rule_get_rldb(vrf_ctx, pr); + if (!db) { + POLICY_ERR("Failed to delete policy\n"); + return -ENOENT; + } - snprintf(vrf_buf, sizeof(vrf_buf), "%d", - vrf_get_external_id(pr->vrfid)); - - group_name_by_vrf(group_name, sizeof(group_name), pr->dir, - vrf_get_external_id(pr->vrfid)); - - bool detach_group = - (vrf_ctx->crypto_live_ipv4_policies + - vrf_ctx->crypto_live_ipv6_policies == 1); - - int rule_ret = npf_cfg_rule_delete(NPF_RULE_CLASS_IPSEC, group_name, - rule_index, NULL); - - if (rule_ret != 0) - POLICY_ERR("Failed to delete rule for %s crypto NPF rule tag " - "%d - errno %d\n", - pr->dir == XFRM_POLICY_IN ? "input" : "output", - pr->tag, -rule_ret); - else { - POLICY_DEBUG("Removed %s crypto NPF rule tag %d index %d\n", - pr->dir == XFRM_POLICY_IN ? "input" : "output", - pr->tag, rule_index); - if (detach_group) { - group_name_by_vrf(attach_buf, sizeof(attach_buf), - XFRM_POLICY_OUT, - vrf_get_external_id(pr->vrfid)); - npf_cfg_detach_group(NPF_ATTACH_TYPE_VRF, vrf_buf, - NPF_RS_IPSEC, NPF_RULE_CLASS_IPSEC, - attach_buf); - - snprintf(attach_buf, sizeof(attach_buf), "in-%d", - vrf_get_external_id(pr->vrfid)); - npf_cfg_detach_group(NPF_ATTACH_TYPE_VRF, vrf_buf, - NPF_RS_IPSEC, NPF_RULE_CLASS_IPSEC, - attach_buf); - POLICY_INFO("Detached NPF groups in VRF %s\n", vrf_buf); - } + rc = rldb_del_rule(db, rh); + if (rc < 0) { + POLICY_ERR("Failed to delete policy from rule database\n"); + return rc; } if (pr->sel.family == AF_INET) { - if (!--vrf_ctx->crypto_live_ipv4_policies) - pl_node_remove_feature_by_inst(&ipv4_ipsec_out_feat, - get_vrf(pr->vrfid)); + --vrf_ctx->crypto_total_ipv4_policies; POLICY_DEBUG("Remaining IPv4 policies: %d\n", - vrf_ctx->crypto_live_ipv4_policies); + vrf_ctx->crypto_total_ipv4_policies); } else { - if (!--vrf_ctx->crypto_live_ipv6_policies) - pl_node_remove_feature_by_inst(&ipv6_ipsec_out_feat, - get_vrf(pr->vrfid)); + --vrf_ctx->crypto_total_ipv6_policies; POLICY_DEBUG("Remaining IPv6 policies: %d\n", - vrf_ctx->crypto_live_ipv6_policies); + vrf_ctx->crypto_total_ipv6_policies); } + + return 0; } static unsigned long policy_bind_sel_hash(const struct xfrm_selector *sel) @@ -1402,37 +885,43 @@ static int policy_bind_sel_match(struct cds_lfht_node *node, const void *key) return policy_rule_sel_eq(sel, &bind->sel); } -static void bind_table_vrf_inc(vrfid_t vrfid) +static int bind_table_vrf_inc(vrfid_t vrfid) { struct crypto_vrf_ctx *vrf_ctx; vrf_ctx = crypto_vrf_get(vrfid); if (!vrf_ctx) - return; + return -EINVAL; vrf_ctx->s2s_bindings++; + return 0; } -static void bind_table_vrf_dec(vrfid_t vrfid) +static int bind_table_vrf_dec(vrfid_t vrfid) { struct crypto_vrf_ctx *vrf_ctx; vrf_ctx = crypto_vrf_get(vrfid); if (!vrf_ctx) - return; + return -EINVAL; vrf_ctx->s2s_bindings--; + return 0; } -static struct cds_lfht *bind_table_vrf_get(vrfid_t vrfid) +static int bind_table_vrf_get(vrfid_t vrfid, struct cds_lfht **table) { struct crypto_vrf_ctx *vrf_ctx; vrf_ctx = crypto_vrf_get(vrfid); if (!vrf_ctx) - return NULL; + return -EINVAL; - return vrf_ctx->s2s_bind_hash_table; + if (vrf_ctx->s2s_bind_hash_table) { + *table = vrf_ctx->s2s_bind_hash_table; + return 0; + } + return -ENOENT; } static void policy_bind_free(struct rcu_head *rcu_head) @@ -1443,20 +932,24 @@ static void policy_bind_free(struct rcu_head *rcu_head) free(bind); } -static void policy_bind_del(struct s2s_binding *bind) +static int policy_bind_del(struct s2s_binding *bind) { struct cds_lfht *bind_table; + int rc; - bind_table = bind_table_vrf_get(bind->vrfid); - - if (!bind_table) { + rc = bind_table_vrf_get(bind->vrfid, &bind_table); + if (rc < 0) { POLICY_ERR("Failed to get binding table for del\n"); - return; + return rc; } cds_lfht_del(bind_table, &bind->bind_ht_node); - bind_table_vrf_dec(bind->vrfid); + rc = bind_table_vrf_dec(bind->vrfid); + if (rc < 0) + POLICY_ERR("Failed to dec vrf binding table\n"); call_rcu(&bind->bind_rcu_head, policy_bind_free); + + return rc; } static struct s2s_binding *policy_bind_lookup(vrfid_t vrfid, @@ -1465,10 +958,10 @@ static struct s2s_binding *policy_bind_lookup(vrfid_t vrfid, struct cds_lfht *bind_table; struct cds_lfht_node *node; struct cds_lfht_iter iter; + int rc; - bind_table = bind_table_vrf_get(vrfid); - - if (!bind_table) { + rc = bind_table_vrf_get(vrfid, &bind_table); + if (rc < 0) { POLICY_ERR("Failed to get binding table for lookup\n"); return NULL; } @@ -1493,11 +986,13 @@ static void policy_bind_feat_attach(vrfid_t vrfid, struct xfrm_mark mark; struct ifnet *ifp; struct pr_feat_attach *attach; + int rc; mark.v = mark.m = 0; - pr = policy_rule_find_by_selector(vrfid, sel, &mark, XFRM_POLICY_OUT); + rc = policy_rule_find_by_selector(vrfid, sel, &mark, XFRM_POLICY_OUT, + &pr); - if (!pr) { + if (rc != 0) { POLICY_DEBUG("Failed bind lookup for policy\n"); return; } @@ -1511,7 +1006,7 @@ static void policy_bind_feat_attach(vrfid_t vrfid, rcu_assign_pointer(pr->feat_attach, attach); } - ifp = ifnet_byifindex(ifindex); + ifp = dp_ifnet_byifindex(ifindex); if (!ifp) { POLICY_DEBUG("Failed bind lookup for ifi %u\n", ifindex); return; @@ -1522,10 +1017,7 @@ static void policy_bind_feat_attach(vrfid_t vrfid, return; } - if (pr->sel.family == AF_INET) - nh4_set_ifp(&pr->feat_attach->next.nh, ifp); - else - nh6_set_ifp(&pr->feat_attach->next.nh6, ifp); + nh_set_ifp(&pr->feat_attach->nh, ifp); /* * If there are any SAs already present for this policy, we @@ -1554,53 +1046,188 @@ static void policy_update_pending_vfp_bind(vrfid_t vrfid, static uint32_t crypto_npf_cfg_commit_count; static struct rte_timer crypto_npf_cfg_commit_all_timer; +#define CRYPTO_NPF_CFG_COMMIT_FORCE_COUNT 2000 + +static uint32_t batch_seq[CRYPTO_NPF_CFG_COMMIT_FORCE_COUNT]; +static struct policy_rule *batch_pr[CRYPTO_NPF_CFG_COMMIT_FORCE_COUNT]; + +static int crypto_policy_rldb_commit(struct crypto_vrf_ctx *vrf_ctx) +{ + int rc_ret, rc = 0; + + rc_ret = rldb_commit_transaction(vrf_ctx->input_policy_v4_rldb); + if (rc_ret < 0) { + POLICY_ERR("Policy commit for IPv4 inbound failed\n"); + rc = rc_ret; + } + rldb_start_transaction(vrf_ctx->input_policy_v4_rldb); + + rc_ret = rldb_commit_transaction(vrf_ctx->output_policy_v4_rldb); + if (rc_ret < 0) { + POLICY_ERR("Policy commit for IPv4 outbound failed\n"); + rc = rc_ret; + } + rldb_start_transaction(vrf_ctx->output_policy_v4_rldb); + + rc_ret = rldb_commit_transaction(vrf_ctx->input_policy_v6_rldb); + if (rc_ret < 0) { + POLICY_ERR("Policy commit for IPv6 inbound failed\n"); + rc = rc_ret; + } + rldb_start_transaction(vrf_ctx->input_policy_v6_rldb); + + rc_ret = rldb_commit_transaction(vrf_ctx->output_policy_v6_rldb); + if (rc_ret < 0) { + POLICY_ERR("Policy commit for IPv6 outbound failed\n"); + rc = rc_ret; + } + rldb_start_transaction(vrf_ctx->output_policy_v6_rldb); + + return rc; +} + +void crypto_npf_cfg_commit_flush(void) +{ + vrfid_t vrf_id; + struct vrf *vrf; + struct crypto_vrf_ctx *vrf_ctx; + uint32_t i; + int rc = 0; + + VRF_FOREACH(vrf, vrf_id) { + vrf_ctx = crypto_vrf_find(vrf_id); + if (!vrf_ctx) + continue; + + rc = (crypto_policy_rldb_commit(vrf_ctx)); + + /* Enable IPsec in IPv4 feature pipeline and + * update live count. + */ + + if (vrf_ctx->crypto_live_ipv4_policies == 0 && + vrf_ctx->crypto_total_ipv4_policies > 0) { + + pl_node_add_feature_by_inst(&ipv4_ipsec_out_feat, vrf); + + } else if (vrf_ctx->crypto_live_ipv4_policies > 0 && + vrf_ctx->crypto_total_ipv4_policies == 0) { + + pl_node_remove_feature_by_inst(&ipv4_ipsec_out_feat, + vrf); + } + + vrf_ctx->crypto_live_ipv4_policies = + vrf_ctx->crypto_total_ipv4_policies; + + + /* Enable IPsec in IPv6 feature pipeline and + * update live count. + */ + + if (vrf_ctx->crypto_live_ipv6_policies == 0 && + vrf_ctx->crypto_total_ipv6_policies > 0) { + + pl_node_add_feature_by_inst(&ipv6_ipsec_out_feat, vrf); + + } else if (vrf_ctx->crypto_live_ipv6_policies > 0 && + vrf_ctx->crypto_total_ipv6_policies == 0) { + + pl_node_remove_feature_by_inst(&ipv6_ipsec_out_feat, + vrf); + } + + vrf_ctx->crypto_live_ipv6_policies = + vrf_ctx->crypto_total_ipv6_policies; + } + + /* + * There is an assumption that npf_cfg_commit_all completed + * successfully as there is no return value. Any issues should + * have been caught when the individual policies were added + * at which point an error should have been returned to the + * xfrm source. + */ + for (i = 0; i < crypto_npf_cfg_commit_count ; i++) { + if (batch_pr[i]) + batch_pr[i]->flags &= + ~(POLICY_F_PENDING_ADD | + POLICY_F_PENDING_UPDATE); + if (xfrm_direct) + xfrm_client_send_ack(batch_seq[i], rc); + } + + crypto_npf_cfg_commit_count = 0; + flow_cache_invalidate(flow_cache, flow_cache_disabled, + false); + +} + static void crypto_npf_cfg_commit_all_timer_handler( struct rte_timer *timer __rte_unused, void *arg __rte_unused) { - ASSERT_MASTER(); + ASSERT_MAIN(); if (crypto_npf_cfg_commit_count) - npf_cfg_commit_all(); - - crypto_npf_cfg_commit_count = 0; + crypto_npf_cfg_commit_flush(); } -#define CRYPTO_NPF_CFG_COMMIT_FORCE_COUNT 100 /* * As the npf commit is slow and does a rebuild of the entire state * batch up the calls to it. This can possibly delay the application of * a rule, but overall will be much faster. */ -static void crypto_npf_cfg_commit_all(struct policy_rule *pr __unused) +static void crypto_npf_cfg_commit_all(struct policy_rule *pr, + uint32_t seq) { - ASSERT_MASTER(); + ASSERT_MAIN(); - if (crypto_npf_cfg_commit_count == 0) { + /* + * If the xfrm_direct path is not programming the classifier + * then no batch completed will be signal and so the existing + * timer based mechanism is required to commit the policies. + */ + if (!xfrm_direct && crypto_npf_cfg_commit_count == 0) { rte_timer_reset(&crypto_npf_cfg_commit_all_timer, rte_get_timer_hz(), SINGLE, rte_get_master_lcore(), crypto_npf_cfg_commit_all_timer_handler, NULL); } + + if (xfrm_direct) + batch_seq[crypto_npf_cfg_commit_count] = seq; + else + batch_seq[crypto_npf_cfg_commit_count] = 0; + + if (!(pr->flags & POLICY_F_PENDING_DEL)) + batch_pr[crypto_npf_cfg_commit_count] = pr; + else + batch_pr[crypto_npf_cfg_commit_count] = NULL; crypto_npf_cfg_commit_count++; /* Force the commit if we have batched up too many */ - if (crypto_npf_cfg_commit_count == CRYPTO_NPF_CFG_COMMIT_FORCE_COUNT) { - npf_cfg_commit_all(); - crypto_npf_cfg_commit_count = 0; - } + if (crypto_npf_cfg_commit_count == CRYPTO_NPF_CFG_COMMIT_FORCE_COUNT) + crypto_npf_cfg_commit_flush(); } -static bool +static int policy_rule_update(struct policy_rule *pr, + struct crypto_vrf_ctx *vrf_ctx, const struct xfrm_userpolicy_info *usr_policy, const xfrm_address_t *dst, const struct xfrm_user_tmpl *tmpl, - const struct xfrm_mark *mark) + const struct xfrm_mark *mark, + uint32_t seq, + bool *send_ack) { bool was_vti_policy = pr->vti_tunnel_policy; bool changed = false; + int rc; + + *send_ack = true; if (usr_policy->dir == XFRM_POLICY_OUT) { + if ((usr_policy->action == XFRM_POLICY_ALLOW) && (pr->action == XFRM_POLICY_BLOCK)) { /* @@ -1611,7 +1238,7 @@ policy_rule_update(struct policy_rule *pr, if (!tmpl || !dst) { POLICY_ERR( "Policy update to allow ignored: missing TMPL or destination\n"); - return false; + return -EINVAL; } policy_rule_set_mark(pr, mark); @@ -1629,9 +1256,8 @@ policy_rule_update(struct policy_rule *pr, pr->output_peer_af, pr->mark.v); else - crypto_sadb_peer_overhead_unsubscribe( - &pr->output_peer, - pr->output_peer_af, + crypto_sadb_tunl_overhead_unsubscribe( + pr->reqid, &pr->overhead, pr->vrfid); @@ -1650,41 +1276,53 @@ policy_rule_update(struct policy_rule *pr, } if (pr->policy_priority != usr_policy->priority) { - uint32_t old_rule_index = pr->rule_index; - + struct rldb_rule_handle *old_rh = pr->rh; /* * Since the priority of the policy has changed, we must - * insert a new NPF rule with an index that is based on + * insert a new rldb rule with an index that is based on * the new priority and remove the old rule. */ pr->policy_priority = usr_policy->priority; - /* - * Policy priority is not unique, so the - * rule index must include the tag. - */ - pr->rule_index = (pr->policy_priority << PR_TAG_SIZE) + pr->tag; + pr->rule_index = usr_policy->index; - if (!policy_rule_add_to_npf(pr)) { + rc = policy_rule_remove_from_rldb(pr, vrf_ctx, was_vti_policy, + old_rh); + if (rc < 0) { POLICY_ERR( - "Failed to add updated policy rule to NPF\n"); - return false; + "Failed to del policy rule from rldb\n"); + return rc; } - policy_rule_remove_from_npf(pr, was_vti_policy, - old_rule_index); - crypto_npf_cfg_commit_all(pr); + rc = policy_rule_add_to_rldb(vrf_ctx, pr); + if (rc < 0) { + POLICY_ERR( + "Failed to add updated policy rule to rldb\n"); + return rc; + } + + *send_ack = false; + pr->flags |= POLICY_F_PENDING_UPDATE; + crypto_npf_cfg_commit_all(pr, seq); if ((pr->dir == XFRM_POLICY_OUT) && (!was_vti_policy || !pr->vti_tunnel_policy)) - pr_cache_invalidate(policy_cache_disabled, false); + flow_cache_invalidate(flow_cache, flow_cache_disabled, + false); } else if (changed) { - policy_rule_update_npf(pr); - crypto_npf_cfg_commit_all(pr); + rc = policy_rule_update_rldb(pr); + if (rc < 0) { + POLICY_ERR("Failed to update rldb rule %u\n", + pr->rule_index); + return rc; + } + pr->flags |= POLICY_F_PENDING_UPDATE; + crypto_npf_cfg_commit_all(pr, seq); + *send_ack = false; } /* Check if this update means we need to rebind */ policy_update_pending_vfp_bind(pr->vrfid, pr); - return true; + return 0; } /* @@ -1698,50 +1336,70 @@ int crypto_policy_add(const struct xfrm_userpolicy_info *usr_policy, const xfrm_address_t *dst, const struct xfrm_user_tmpl *tmpl, const struct xfrm_mark *mark, - vrfid_t vrfid) + vrfid_t vrfid, + uint32_t seq, + bool *send_ack) { struct policy_rule *pr; + struct crypto_vrf_ctx *vrf_ctx; + int rc; + + vrf_ctx = crypto_vrf_get(vrfid); + if (!vrf_ctx) + return -EINVAL; - pr = policy_rule_find_by_selector(vrfid, &usr_policy->sel, mark, - usr_policy->dir); - if (pr) { - if (!policy_rule_update(pr, usr_policy, dst, tmpl, mark)) { + *send_ack = true; + + rc = policy_rule_find_by_selector(vrfid, &usr_policy->sel, mark, + usr_policy->dir, &pr); + if (rc == 0 && pr) { + rc = policy_rule_update(pr, vrf_ctx, usr_policy, dst, tmpl, + mark, seq, send_ack); + if (rc < 0) { POLICY_ERR( "Policy add failed to update existing policy\n"); - return -1; + return rc; } - - return 1; + return 0; } + if (rc != -ESRCH) + return rc; - pr = policy_rule_create(usr_policy, tmpl, mark, dst, vrfid); - if (!pr) { + rc = policy_rule_create(usr_policy, tmpl, mark, dst, vrfid, &pr); + if (rc < 0) { POLICY_ERR("Failed to create policy rule\n"); - return -1; + return rc; } - if (!policy_rule_add_to_hash_tables(pr)) { + pr->flags = POLICY_F_PENDING_ADD; + + rc = policy_rule_add_to_hash_tables(pr); + if (rc != 0) { POLICY_ERR("Failed to add policy rule to hash tables\n"); policy_rule_destroy(pr); - return -1; + return rc; } - if (!policy_rule_add_to_npf(pr)) { - POLICY_ERR("Failed to add policy rule NPF filter\n"); - policy_rule_remove_from_hash_tables(pr); + rc = policy_rule_add_to_rldb(vrf_ctx, pr); + if (rc < 0) { + POLICY_ERR("Failed to add policy rule to rldb\n"); + (void)policy_rule_remove_from_hash_tables(pr); policy_rule_destroy(pr); - return -1; + return rc; } - crypto_npf_cfg_commit_all(pr); + + *send_ack = false; + crypto_npf_cfg_commit_all(pr, seq); /* * Any policy rule added, where the port is specified as part of the * selection criteria, the cache is disabled. */ if (pr->dir == XFRM_POLICY_OUT) { - if (!policy_cache_disabled) { - policy_cache_disabled = ((pr->sel.sport > 0) || - (pr->sel.dport > 0)); - pr_cache_invalidate(policy_cache_disabled, false); + if (!flow_cache_disabled) { + flow_cache_disabled = ((pr->sel.sport > 0) || + (pr->sel.dport > 0)); + flow_cache_invalidate(flow_cache, flow_cache_disabled, + false); } /* @@ -1751,7 +1409,7 @@ int crypto_policy_add(const struct xfrm_userpolicy_info *usr_policy, policy_update_pending_vfp_bind(vrfid, pr); } - return 1; + return 0; } /* @@ -1765,13 +1423,21 @@ int crypto_policy_update(const struct xfrm_userpolicy_info *usr_policy, const xfrm_address_t *dst, const struct xfrm_user_tmpl *tmpl, const struct xfrm_mark *mark, - vrfid_t vrfid) + vrfid_t vrfid, + uint32_t seq, + bool *send_ack) { struct policy_rule *pr; + struct crypto_vrf_ctx *vrf_ctx; + int rc; - pr = policy_rule_find_by_selector(vrfid, &usr_policy->sel, mark, - usr_policy->dir); - if (!pr) { + vrf_ctx = crypto_vrf_get(vrfid); + if (!vrf_ctx) + return -EINVAL; + + rc = policy_rule_find_by_selector(vrfid, &usr_policy->sel, mark, + usr_policy->dir, &pr); + if (rc == -ESRCH) { POLICY_INFO("Could not update policy: Not found\n"); /* @@ -1779,15 +1445,18 @@ int crypto_policy_update(const struct xfrm_userpolicy_info *usr_policy, * restart and the controller collapsed the add, so * treat it like an add now. */ - return crypto_policy_add(usr_policy, dst, tmpl, mark, vrfid); + return crypto_policy_add(usr_policy, dst, tmpl, mark, vrfid, + seq, send_ack); } + if (rc != 0) + return rc; - if (!policy_rule_update(pr, usr_policy, dst, tmpl, mark)) { + rc = policy_rule_update(pr, vrf_ctx, usr_policy, dst, tmpl, mark, + seq, send_ack); + if (rc < 0) POLICY_ERR("Failed to update existing policy\n"); - return -1; - } - return 1; + return rc; } /* @@ -1797,21 +1466,27 @@ int crypto_policy_update(const struct xfrm_userpolicy_info *usr_policy, * * MUST be called from the main thread */ -void crypto_policy_delete(const struct xfrm_userpolicy_id *id, - const struct xfrm_mark *mark, - vrfid_t vrfid) +static int crypto_policy_delete_internal(struct policy_rule *pr, + struct crypto_vrf_ctx *vrf_ctx, + uint32_t seq, bool ack) { - struct policy_rule *pr; + int rc; - pr = policy_rule_find_by_selector(vrfid, &id->sel, mark, id->dir); - if (!pr) { - /* This is a legitimate outcome on DP restart */ - POLICY_INFO("Cannot delete policy: not found\n"); - return; + rc = policy_rule_remove_from_rldb(pr, vrf_ctx, pr->vti_tunnel_policy, + pr->rh); + if (rc < 0) + return rc; + /* + * Is the policy is being purged as the result of a flush + * style event?. If so no ack needs to be generated, as the + * event is not called due to the reception of a policy delete + * from strongswan. + */ + if (ack) { + pr->flags |= POLICY_F_PENDING_DEL; + crypto_npf_cfg_commit_all(pr, seq); } - policy_rule_remove_from_npf(pr, pr->vti_tunnel_policy, pr->rule_index); - crypto_npf_cfg_commit_all(pr); if (pr->dir == XFRM_POLICY_OUT) { /* * The cache is disabled any time there is a policy that @@ -1819,19 +1494,70 @@ void crypto_policy_delete(const struct xfrm_userpolicy_id *id, * we're deleting is one such, we may be able to enable * it if it was the only one. */ - if (policy_cache_disabled && + if (flow_cache_disabled && ((pr->sel.sport > 0) || (pr->sel.dport > 0)) && - all_other_policies_can_be_cached(pr)) - policy_cache_disabled = false; + all_other_policies_can_be_cached(vrf_ctx, pr)) + flow_cache_disabled = false; - if (!policy_cache_disabled) - pr_cache_invalidate(policy_cache_disabled, false); + if (!flow_cache_disabled) + flow_cache_invalidate(flow_cache, flow_cache_disabled, + false); } - policy_rule_remove_from_hash_tables(pr); + rc = policy_rule_remove_from_hash_tables(pr); policy_rule_destroy(pr); - crypto_vrf_check_remove(crypto_vrf_find(vrfid)); + crypto_vrf_check_remove(vrf_ctx); + + return rc; +} + +int crypto_policy_delete(const struct xfrm_userpolicy_id *id, + const struct xfrm_mark *mark, + vrfid_t vrfid, + uint32_t seq, bool *send_ack) +{ + struct policy_rule *pr; + struct crypto_vrf_ctx *vrf_ctx; + int rc; + + vrf_ctx = crypto_vrf_find(vrfid); + if (!vrf_ctx) + return -EINVAL; + + rc = policy_rule_find_by_selector(vrfid, &id->sel, mark, id->dir, + &pr); + if (rc == -ESRCH) { + /* + * Might have been removed by a flush, + * or never received if there was a dp restart + */ + *send_ack = true; + return 0; + } + + if (rc == 0) + rc = crypto_policy_delete_internal(pr, vrf_ctx, seq, true); + + return rc; +} + +void crypto_policy_flush_vrf(struct crypto_vrf_ctx *vrf_ctx) +{ + struct cds_lfht_iter iter; + struct policy_rule *pr; + + POLICY_DEBUG("Flush all policies for VRF %d\n", vrf_ctx->vrfid); + + cds_lfht_for_each_entry(vrf_ctx->input_policy_rule_sel_ht, + &iter, pr, sel_ht_node) { + (void)crypto_policy_delete_internal(pr, vrf_ctx, 0, false); + } + + cds_lfht_for_each_entry(vrf_ctx->output_policy_rule_sel_ht, + &iter, pr, sel_ht_node) { + (void)crypto_policy_delete_internal(pr, vrf_ctx, 0, false); + } } int crypto_policy_get_vti_reqid(vrfid_t vrfid, @@ -1841,6 +1567,7 @@ int crypto_policy_get_vti_reqid(vrfid_t vrfid, struct xfrm_selector sel; struct policy_rule *pr; struct xfrm_mark mark; + int rc; if (!peer || !reqid) { POLICY_ERR("Bad parameters on VTI reqid lookup\n"); @@ -1852,8 +1579,9 @@ int crypto_policy_get_vti_reqid(vrfid_t vrfid, mark.v = mark_value; mark.m = 0; - pr = policy_rule_find_by_selector(vrfid, &sel, &mark, XFRM_POLICY_OUT); - if (!pr) { + rc = policy_rule_find_by_selector(vrfid, &sel, &mark, XFRM_POLICY_OUT, + &pr); + if (rc != 0) { POLICY_DEBUG("Policy not found for VTI reqid lookup\n"); return -1; } @@ -1902,16 +1630,15 @@ crypto_policy_handle_packet_outbound_checks(struct rte_mbuf *mbuf, { struct vrf *vrf = vrf_get_rcu_fast(VRF_DEFAULT_ID); struct next_hop *nxt = NULL; - struct next_hop_v6 *nxt6 = NULL; /* Currently only support underlay in default vrf */ if (pr->output_peer_af == AF_INET) { nxt = rt_lookup_fast(vrf, (in_addr_t)(pr->output_peer.a4), tbl_id, mbuf); } else { - nxt6 = rt6_lookup_fast(vrf, - (struct in6_addr *)(&pr->output_peer.a6), - tbl_id, mbuf); + nxt = rt6_lookup_fast(vrf, + (struct in6_addr *)(&pr->output_peer.a6), + tbl_id, mbuf); } /* @@ -1919,40 +1646,22 @@ crypto_policy_handle_packet_outbound_checks(struct rte_mbuf *mbuf, * broadcast route, the encrypted packet would be dropped in * ip_lookup_and_originate so drop it early here. */ - if (pr->output_peer_af == AF_INET) { - if (!nxt) { - *no_next_hop = true; - return; - } - if (nxt->flags & (RTF_BLACKHOLE | RTF_BROADCAST)) { - *bh_or_bc = true; - return; - } - } else { - if (!nxt6) { - *no_next_hop = true; - return; - } - if (nxt6->flags & (RTF_BLACKHOLE | RTF_BROADCAST)) { - *bh_or_bc = true; - return; - } + if (!nxt) { + *no_next_hop = true; + return; + } + if (nxt->flags & (RTF_BLACKHOLE | RTF_BROADCAST)) { + *bh_or_bc = true; + return; } /* * Filter reject routes out now. If we hit this post encryption * we won't be able to send the ICMP error back to the source. */ - if (pr->output_peer_af == AF_INET) { - if (unlikely(nxt->flags & RTF_REJECT)) { - *reject = true; - return; - } - } else { - if (unlikely(nxt6->flags & RTF_REJECT)) { - *reject = true; - return; - } + if (unlikely(nxt->flags & RTF_REJECT)) { + *reject = true; + return; } /* @@ -1961,31 +1670,20 @@ crypto_policy_handle_packet_outbound_checks(struct rte_mbuf *mbuf, * packet before encryption. For other destinations * we allow the packet to be fragmented post encryption. */ - if (pr->output_peer_af == AF_INET) - *nxt_ifp = nh4_get_ifp(nxt); - else - *nxt_ifp = nh6_get_ifp(nxt6); - + *nxt_ifp = dp_nh_get_ifp(nxt); if (!*nxt_ifp) return; - if (pr->output_peer_af == AF_INET) { - if (!(nxt->flags & RTF_SLOWPATH)) { - *not_slowpath = true; - return; - } - } else { - if (!(nxt6->flags & RTF_SLOWPATH)) { - *not_slowpath = true; - return; - } + if (!(nxt->flags & RTF_SLOWPATH)) { + *not_slowpath = true; + return; } } /* * crypto_policy_handle_packet_outbound() * - * Handle a packet that has matched the NPF rule for an IPsec output policy. + * Handle a packet that has matched the rldb rule for an IPsec output policy. * * This function always consumes the packet, either dropping it on an error * or queuing it to the crypto thread for encryption. @@ -2098,13 +1796,12 @@ crypto_policy_handle_packet_outbound(struct ifnet *vfp_ifp, drop: rte_pktmbuf_free(mbuf); - return; } /* * crypto_policy_handle_packet6_outbound() * - * Handle a packet that has matched the NPF rule for an IPsec output policy. + * Handle a packet that has matched the rldb rule for an IPsec output policy. * * This function always consumes the packet, either dropping it on an error * or queuing it to the crypto thread for encryption. @@ -2317,15 +2014,8 @@ static void policy_rule_to_json(json_writer_t *wr, jsonw_uint_field(wr, "mark_m", pr->mark.m); jsonw_uint_field(wr, "index", pr->rule_index); - if (pr->sel.family == AF_INET && pr->feat_attach) { - ifp = nh4_get_ifp(&pr->feat_attach->next.nh); - if (ifp) - jsonw_string_field(wr, "virtual-feature-point", - ifp->if_name); - } - - if (pr->sel.family == AF_INET6 && pr->feat_attach) { - ifp = nh6_get_ifp(&pr->feat_attach->next.nh6); + if (pr->feat_attach) { + ifp = dp_nh_get_ifp(&pr->feat_attach->nh); if (ifp) jsonw_string_field(wr, "virtual-feature-point", ifp->if_name); @@ -2361,7 +2051,7 @@ void crypto_policy_bind_show_summary(FILE *f, vrfid_t vrfid) jsonw_uint_field(wr, "virtual-feature-point_ifi", bind->ifindex); - ifp = ifnet_byifindex(bind->ifindex); + ifp = dp_ifnet_byifindex(bind->ifindex); if (ifp) jsonw_string_field(wr, "virtual-feature-point_name", ifp->if_name); @@ -2370,12 +2060,13 @@ void crypto_policy_bind_show_summary(FILE *f, vrfid_t vrfid) jsonw_destroy(&wr); } -void crypto_policy_show_summary(FILE *f, vrfid_t vrfid) +void crypto_policy_show_summary(FILE *f, vrfid_t vrfid, bool brief) { json_writer_t *wr; + struct crypto_vrf_ctx *vrf_ctx; const struct policy_rule *pr; + struct cds_lfht *ht; struct cds_lfht_iter iter; - struct crypto_vrf_ctx *vrf_ctx; vrf_ctx = crypto_vrf_find_external(vrfid); @@ -2392,38 +2083,80 @@ void crypto_policy_show_summary(FILE *f, vrfid_t vrfid) jsonw_start_object(wr); jsonw_uint_field(wr, "rekey_requests", crypto_rekey_requests); jsonw_end_object(wr); - jsonw_name(wr, "policy_count"); + jsonw_name(wr, "total_policy_count"); + jsonw_start_object(wr); + jsonw_uint_field(wr, "ipv4", vrf_ctx ? + vrf_ctx->crypto_total_ipv4_policies : 0); + jsonw_uint_field(wr, "ipv6", vrf_ctx ? + vrf_ctx->crypto_total_ipv6_policies : 0); + jsonw_end_object(wr); + jsonw_name(wr, "live_policy_count"); jsonw_start_object(wr); jsonw_uint_field(wr, "ipv4", vrf_ctx ? vrf_ctx->crypto_live_ipv4_policies : 0); jsonw_uint_field(wr, "ipv6", vrf_ctx ? vrf_ctx->crypto_live_ipv6_policies : 0); jsonw_end_object(wr); - jsonw_name(wr, "policies"); - jsonw_start_array(wr); - cds_lfht_for_each_entry(output_policy_rule_tag_ht, &iter, pr, - tag_ht_node) { - if (vrf_get_external_id(pr->vrfid) == vrfid) - policy_rule_to_json(wr, pr); - } + if (!brief) { + jsonw_name(wr, "policies"); + jsonw_start_array(wr); + + ht = vrf_ctx ? vrf_ctx->output_policy_rule_sel_ht : NULL; + if (ht) { + cds_lfht_for_each_entry(ht, &iter, pr, sel_ht_node) { + if (pr->flags & POLICY_F_PENDING_ADD) + continue; + if (dp_vrf_get_external_id(pr->vrfid) == vrfid) + policy_rule_to_json(wr, pr); + } + } - cds_lfht_for_each_entry(input_policy_rule_tag_ht, &iter, pr, - tag_ht_node) { - if (vrf_get_external_id(pr->vrfid) == vrfid) - policy_rule_to_json(wr, pr); + ht = vrf_ctx ? vrf_ctx->input_policy_rule_sel_ht : NULL; + if (ht) { + cds_lfht_for_each_entry(ht, &iter, pr, sel_ht_node) { + if (pr->flags & POLICY_F_PENDING_ADD) + continue; + if (dp_vrf_get_external_id(pr->vrfid) == vrfid) + policy_rule_to_json(wr, pr); + } + } + jsonw_end_array(wr); } - jsonw_end_array(wr); jsonw_end_object(wr); jsonw_destroy(&wr); } +static void +crypto_flow_cache_dump_entry(struct flow_cache_entry *entry, + bool detail, json_writer_t *wr) +{ + struct policy_rule *pr = NULL; + union crypto_ctx ctx; + + if (!detail) + return; + + flow_cache_entry_get_info(entry, + (void **)&pr, + &ctx.context); + if (pr) { + jsonw_uint_field(wr, "PR_index", + pr->rule_index); + jsonw_uint_field(wr, "PR_Tag", 0); + } + jsonw_uint_field(wr, "IN_rule_checked", + ctx.in_rule_checked); + jsonw_uint_field(wr, "IN_rule_drop", + ctx.in_rule_drop); + jsonw_uint_field(wr, "NO_rule_fwd", + ctx.no_rule_fwd); +} + void crypto_show_cache(FILE *f, const char *str) { - int i; json_writer_t *wr = jsonw_new(f); - char addrbuf[INET6_ADDRSTRLEN]; bool detail = (str ? strcmp(str, "detail") == 0 : 0); if (!wr) @@ -2431,67 +2164,7 @@ void crypto_show_cache(FILE *f, const char *str) jsonw_pretty(wr, true); jsonw_name(wr, "IPsec-Cache"); - jsonw_start_object(wr); - jsonw_start_array(wr); - - RTE_LCORE_FOREACH(i) { - struct policy_cache_rule *pr_cache; - struct cds_lfht_iter iter; - struct cds_lfht *table; - struct crypto_pkt_buffer *cpb = rcu_dereference(cpbdb[i]); - bool disabled = false; - - jsonw_uint_field(wr, "core_id", i); - - if (!cpb) { - disabled = true; - } else { - table = rcu_dereference(cpb->pr_cache_tbl); - if (!table) - disabled = true; - } - if (disabled) { - jsonw_string_field(wr, "pr_cache", "disabled"); - continue; - } - jsonw_string_field(wr, "pr_cache", "enabled"); - jsonw_start_object(wr); - jsonw_uint_field(wr, "PR_Cache_count", - rte_atomic16_read(&cpb->pr_cache_count)); - jsonw_end_object(wr); - if (!detail) - continue; - - jsonw_start_array(wr); - cds_lfht_for_each_entry(table, &iter, - pr_cache, pr_node) { - jsonw_start_object(wr); - jsonw_string_field(wr, "dst", - inet_ntop(AF_INET, - &pr_cache->key.dst, - addrbuf, - sizeof(addrbuf))); - jsonw_string_field(wr, "src", - inet_ntop(AF_INET, - &pr_cache->key.src, - addrbuf, - sizeof(addrbuf))); - jsonw_uint_field(wr, "proto", pr_cache->key.proto); - jsonw_uint_field(wr, "PR_index", - pr_cache->pr->rule_index); - jsonw_uint_field(wr, "PR_Tag", - pr_cache->pr->tag); - jsonw_uint_field(wr, "IN_rule_checked", - pr_cache->in_rule_checked); - jsonw_uint_field(wr, "IN_rule_drop", - pr_cache->in_rule_drop); - jsonw_end_object(wr); - } - jsonw_end_array(wr); - } - - jsonw_end_array(wr); - jsonw_end_object(wr); + flow_cache_dump(flow_cache, wr, detail, crypto_flow_cache_dump_entry); jsonw_destroy(&wr); } @@ -2502,90 +2175,11 @@ void crypto_show_cache(FILE *f, const char *str) */ int crypto_policy_init(void) { - if (!tagmap_init(&policy_tagmap)) { - POLICY_ERR("Failed to initialise policy rule bitmap\n"); - return -1; - } - - /* - * Create hash tables for input policy rule structures - */ - input_policy_rule_tag_ht = cds_lfht_new(POLICY_RULE_HT_MIN_BUCKETS, - POLICY_RULE_HT_MIN_BUCKETS, - POLICY_RULE_HT_MAX_BUCKETS, - CDS_LFHT_AUTO_RESIZE, - NULL); - if (!input_policy_rule_tag_ht) { - POLICY_ERR("Failed to allocate policy rule tag hash table\n"); - return -1; - } - - /* - * Create hash tables for output policy rule structures - */ - output_policy_rule_tag_ht = cds_lfht_new(POLICY_RULE_HT_MIN_BUCKETS, - POLICY_RULE_HT_MIN_BUCKETS, - POLICY_RULE_HT_MAX_BUCKETS, - CDS_LFHT_AUTO_RESIZE, - NULL); - if (!output_policy_rule_tag_ht) { - POLICY_ERR("Failed to allocate policy rule tag hash table\n"); - return -1; - } - rte_timer_init(&crypto_npf_cfg_commit_all_timer); return 0; } -bool crypto_policy_outbound_match(struct ifnet *in_ifp, struct rte_mbuf **mbuf, - uint16_t ether) -{ - struct npf_config *npf_conf = vrf_get_npf_conf_rcu(in_ifp->if_vrfid); - - if (!npf_active(npf_conf, NPF_IPSEC)) - return false; - - const npf_ruleset_t *ruleset - = npf_get_ruleset(npf_conf, NPF_RS_IPSEC); - npf_result_t result - = npf_hook_notrack(ruleset, mbuf, in_ifp, PFIL_OUT, 0, ether); - - return (result.decision != NPF_DECISION_UNMATCHED); -} - -bool crypto_policy_outbound_active(struct ifnet *in_ifp, struct rte_mbuf **mbuf, - uint32_t *af, void **addr, uint16_t eth_type) -{ - struct npf_config *npf_conf = vrf_get_npf_conf_rcu(in_ifp->if_vrfid); - struct policy_rule *pr; - npf_result_t result; - - if (npf_active(npf_conf, NPF_IPSEC)) { - result = npf_hook_notrack(npf_get_ruleset(npf_conf, - NPF_RS_IPSEC), mbuf, in_ifp, - PFIL_OUT, 0, eth_type); - if (likely(result.decision == NPF_DECISION_UNMATCHED)) - return false; - - /* Only in the case of an ALLOW policy do we set a tag */ - if (unlikely(!result.tag_set)) { - *af = 0; - return true; - } - - pr = policy_rule_find_by_tag(result.tag, XFRM_POLICY_OUT); - if (unlikely(!pr)) - return false; - - *af = pr->output_peer_af; - *addr = &pr->output_peer; - - return true; - } - return false; -} - /* * Encrypt and output a packet on a s2s virtual feature point interface. */ @@ -2642,40 +2236,62 @@ crypto_policy_post_features_outbound(struct ifnet *vfp_ifp, */ bool crypto_policy_check_outbound(struct ifnet *in_ifp, struct rte_mbuf **mbuf, uint32_t tbl_id, uint16_t eth_type, - union next_hop_v4_or_v6_ptr *nh) + struct next_hop **nh) { struct policy_rule *pr = NULL; - struct policy_cache_rule *pr_cache; - vrfid_t vrfid = pktmbuf_get_vrf(*mbuf); - bool v4 = (eth_type == htons(ETHER_TYPE_IPv4)); + struct flow_cache_entry *cache_entry; + vrfid_t vrfid; + bool v4 = (eth_type == htons(RTE_ETHER_TYPE_IPV4)); bool freed = false; - struct npf_config *npf_conf = vrf_get_npf_conf_rcu(vrfid); bool seen_by_crypto; - - if (likely(!npf_active(npf_conf, NPF_IPSEC))) - return false; + int err; + union crypto_ctx ctx; seen_by_crypto = ((*mbuf)->ol_flags & PKT_RX_SEEN_BY_CRYPTO); + /* * Do we have a cached lookup result for this policy? */ - pr_cache = pr_cache_lookup(*mbuf, v4); + cache_entry = crypto_flow_cache_lookup(*mbuf, v4); + if (cache_entry) + flow_cache_entry_get_info(cache_entry, (void **)&pr, + &ctx.context); /* - * Use the PR cache under following conditions: + * Use the flow cache under following conditions: * - received an encrypted packet * - received an UNencrypted packet and we have cached the input * policy check result. */ - if (pr_cache && pr_cache->pr && - (seen_by_crypto || pr_cache->in_rule_checked)) { - IPSEC_CNT_INC(PR_CACHE_HIT); - pr = pr_cache->pr; + if (cache_entry) { + IPSEC_CNT_INC(FLOW_CACHE_HIT); + if (!pr) { + /* + * cleartext packet found in cache. Forward as-is + */ + if (ctx.no_rule_fwd) + return false; + } } else { - struct crypto_pkt_buffer *cpb = - RTE_PER_LCORE(crypto_pkt_buffer); - const npf_ruleset_t *rlset = - npf_get_ruleset(npf_conf, NPF_RS_IPSEC); + struct rldb_result result; + struct crypto_vrf_ctx *vrf_ctx; + struct rldb_db_handle *db_in, *db_out; + int dir = XFRM_POLICY_OUT; + + vrfid = pktmbuf_get_vrf(*mbuf); + vrf_ctx = crypto_vrf_find(vrfid); + + /* no crypto fo this VRF */ + if (!vrf_ctx) + return false; + + if (v4) { + db_in = vrf_ctx->input_policy_v4_rldb; + db_out = vrf_ctx->output_policy_v4_rldb; + } else { + db_in = vrf_ctx->input_policy_v6_rldb; + db_out = vrf_ctx->output_policy_v6_rldb; + } /* * If this packet was received encrypted, then we don't need to @@ -2683,7 +2299,18 @@ bool crypto_policy_check_outbound(struct ifnet *in_ifp, struct rte_mbuf **mbuf, * it should have been received encrypted, and so now needs to * be dropped. */ - int dir = PFIL_OUT | (seen_by_crypto ? 0 : PFIL_IN); + + if (likely(!seen_by_crypto)) { + err = rldb_match(db_in, mbuf, 1, &result); + if (likely(err == -ENOENT)) + ; + else if (err == 0) { + + dir = XFRM_POLICY_IN; + pr = (struct policy_rule *) + result.rldb_user_data; + } + } /* * Packets matching an input policy must be dropped if @@ -2691,31 +2318,33 @@ bool crypto_policy_check_outbound(struct ifnet *in_ifp, struct rte_mbuf **mbuf, * and this routine is only called for such unencrypted * packets. * - * If no policy matches we find NPF_DECISION_UNMATCHED. - * Otherwise one of NPF_DECISION_PASS (for an ALLOW policy) - * or NPF_DECISION_BLOCK (for a BLOCK policy). + * If no policy matches we find -ENOENT. + * Otherwise one a ALLOW policy or a BLOCK polic. * * Only block rules are currently used in the input policy. */ - npf_result_t result = - npf_hook_notrack(rlset, mbuf, in_ifp, dir, 0, eth_type); + + if (likely(!pr)) { + + err = rldb_match(db_out, mbuf, 1, &result); + if (likely(err == -ENOENT)) + ; + else if (err == 0) { + + dir = XFRM_POLICY_OUT; + pr = (struct policy_rule *) + result.rldb_user_data; + } + } /* * No input and no output policy matched, allow normal * processing */ - if (likely(result.decision == NPF_DECISION_UNMATCHED)) + if (likely(!pr && err == -ENOENT)) { + crypto_flow_cache_add(flow_cache, NULL, *mbuf, v4, + seen_by_crypto, XFRM_POLICY_OUT); return false; - - if (likely(result.tag_set)) { - dir = XFRM_POLICY_OUT; - pr = policy_rule_find_by_tag(result.tag, dir); - if (!pr) { - pr = policy_rule_find_by_tag(result.tag, - XFRM_POLICY_IN); - if (pr) - dir = XFRM_POLICY_IN; - } } /* @@ -2725,30 +2354,21 @@ bool crypto_policy_check_outbound(struct ifnet *in_ifp, struct rte_mbuf **mbuf, if (pr && pr->sel.ifindex && nh) { struct ifnet *ifp = NULL; - if (v4 && nh->v4) - ifp = nh4_get_ifp(nh->v4); - else if (nh->v6) - ifp = nh6_get_ifp(nh->v6); + if (v4 && *nh) + ifp = dp_nh_get_ifp(*nh); + else if (*nh) + ifp = dp_nh_get_ifp(*nh); if (!ifp || pr->sel.ifindex != (int)ifp->if_index) /* We don't have a match */ return false; } - if (cpb && !policy_cache_disabled && pr && - (rte_atomic16_read(&cpb->pr_cache_count) < - PR_CACHE_MAX_MARKER)) { - IPSEC_CNT_INC(PR_CACHE_MISS); - if (pr_cache_add(cpb, pr, *mbuf, v4, - seen_by_crypto, - dir) != 0) - IPSEC_CNT_INC(PR_CACHE_ADD_FAIL); - else - IPSEC_CNT_INC(PR_CACHE_ADD); - } + crypto_flow_cache_add(flow_cache, pr, *mbuf, v4, + seen_by_crypto, dir); } - if (pr && !pr->pending_delete) { + if (pr && !(pr->flags & POLICY_F_PENDING_DEL)) { if (pr->action != XFRM_POLICY_BLOCK) { struct pr_feat_attach *attach; struct pktmbuf_mdata *mdata; @@ -2766,56 +2386,37 @@ bool crypto_policy_check_outbound(struct ifnet *in_ifp, struct rte_mbuf **mbuf, attach = rcu_dereference(pr->feat_attach); struct ifnet *vfp_ifp = NULL; - if (v4) { - if (attach) { - vfp_ifp = nh4_get_ifp(&attach->next.nh); + if (attach) { + vfp_ifp = dp_nh_get_ifp(&attach->nh); - if (!vfp_ifp) { - IPSEC_CNT_INC(DROPPED_NO_BIND); - goto drop; - } + if (!vfp_ifp) { + IPSEC_CNT_INC(DROPPED_NO_BIND); + goto drop; + } - if (nh) { - nh->v4 = &attach->next.nh; - mdata = pktmbuf_mdata(*mbuf); - mdata->pr = pr; - pktmbuf_mdata_set(*mbuf, + if (nh) { + *nh = &attach->nh; + mdata = pktmbuf_mdata(*mbuf); + mdata->pr = pr; + pktmbuf_mdata_set(*mbuf, PKT_MDATA_CRYPTO_PR); - return false; - } - if_incr_out(vfp_ifp, *mbuf); + return false; } + if_incr_out(vfp_ifp, *mbuf); + } + + if (v4) crypto_policy_handle_packet_outbound(vfp_ifp, in_ifp, *mbuf, tbl_id, pr); - } else { - if (attach) { - vfp_ifp = nh6_get_ifp( - &attach->next.nh6); - - if (!vfp_ifp) { - IPSEC_CNT_INC(DROPPED_NO_BIND); - goto drop; - } - - if (nh) { - nh->v6 = &attach->next.nh6; - mdata = pktmbuf_mdata(*mbuf); - mdata->pr = pr; - pktmbuf_mdata_set(*mbuf, - PKT_MDATA_CRYPTO_PR); - return false; - } - if_incr_out(vfp_ifp, *mbuf); - } + else crypto_policy_handle_packet6_outbound(vfp_ifp, in_ifp, *mbuf, tbl_id, pr); - } return true; } } else { @@ -2856,34 +2457,40 @@ crypto_policy_check_inbound(struct ifnet *in_ifp, struct rte_mbuf **mbuf, uint16_t eth_type) { struct policy_rule *pr = NULL; - struct policy_cache_rule *pr_cache; - bool v4 = (eth_type == htons(ETHER_TYPE_IPv4)); + struct flow_cache_entry *cache_entry; + bool v4 = (eth_type == htons(RTE_ETHER_TYPE_IPV4)); bool freed = false; - vrfid_t vrfid = pktmbuf_get_vrf(*mbuf); - struct npf_config *npf_conf = vrf_get_npf_conf_rcu(vrfid); - - if (likely(!npf_active(npf_conf, NPF_IPSEC))) - return false; + vrfid_t vrfid; + union crypto_ctx ctx; + struct crypto_vrf_ctx *vrf_ctx; if ((*mbuf)->ol_flags & PKT_RX_SEEN_BY_CRYPTO) return false; /* - * Use the PR cache only if we have already cached the input check. + * Use the flow cache only if we have already cached the input check. */ - pr_cache = pr_cache_lookup(*mbuf, v4); - if (pr_cache && pr_cache->pr && pr_cache->in_rule_checked) { - IPSEC_CNT_INC(PR_CACHE_HIT); - if (pr_cache->pr->action == XFRM_POLICY_BLOCK) + cache_entry = crypto_flow_cache_lookup(*mbuf, v4); + if (cache_entry) + flow_cache_entry_get_info(cache_entry, (void **)&pr, + &ctx.context); + + if (cache_entry && pr && ctx.in_rule_checked) { + IPSEC_CNT_INC(FLOW_CACHE_HIT); + if (pr->action == XFRM_POLICY_BLOCK) goto drop; } else { - struct crypto_pkt_buffer *cpb = - RTE_PER_LCORE(crypto_pkt_buffer); - const npf_ruleset_t *rlset = - npf_get_ruleset(npf_conf, NPF_RS_IPSEC); + struct rldb_result result; + struct rldb_db_handle *db; + int err; + + vrfid = pktmbuf_get_vrf(*mbuf); + vrf_ctx = crypto_vrf_find(vrfid); - int dir = PFIL_IN; + /* no crypto fo this VRF */ + if (!vrf_ctx) + return false; /* * Packets matching an input policy must be dropped if @@ -2891,56 +2498,44 @@ crypto_policy_check_inbound(struct ifnet *in_ifp, struct rte_mbuf **mbuf, * and this routine is only called for such unencrypted * packets. * - * If no policy matches we find NPF_DECISION_UNMATCHED. - * Otherwise one of NPF_DECISION_PASS (for an ALLOW policy) - * or NPF_DECISION_BLOCK (for a BLOCK policy). + * If no policy matches we find -ENOENT. + * Otherwise one a ALLOW policy or a BLOCK policy. * * Only block rules are currently used in the input policy. */ - npf_result_t result = - npf_hook_notrack(rlset, mbuf, in_ifp, dir, 0, eth_type); - /* No input policy matched */ - if (likely(result.decision == NPF_DECISION_UNMATCHED)) + db = v4 ? vrf_ctx->input_policy_v4_rldb + : vrf_ctx->input_policy_v6_rldb; + + err = rldb_match(db, mbuf, 1, &result); + if (err) return false; - if (likely(result.tag_set)) { - pr = policy_rule_find_by_tag(result.tag, - XFRM_POLICY_IN); - if (pr) { - /* - * We found an input policy. If it has a - * selector with an ifindex set, then - * check we match. - */ - if (pr->sel.ifindex) { - if (pr->sel.ifindex != - (int)in_ifp->if_index) { - /* We don't have a match */ - return false; - } - } + pr = (struct policy_rule *)result.rldb_user_data; - /* - * We found an input policy, add it to the - * PR cache and drop the packet. - */ - if (cpb && !policy_cache_disabled && pr && - (rte_atomic16_read(&cpb->pr_cache_count) < - PR_CACHE_MAX_MARKER)) { - IPSEC_CNT_INC(PR_CACHE_MISS); - if (pr_cache_add(cpb, pr, *mbuf, v4, - false, - XFRM_POLICY_IN) != 0) - IPSEC_CNT_INC( - PR_CACHE_ADD_FAIL); - else - IPSEC_CNT_INC(PR_CACHE_ADD); + if (pr) { + /* + * We found an input policy. If it has a + * selector with an ifindex set, then + * check we match. + */ + if (pr->sel.ifindex) { + if (pr->sel.ifindex != + (int)in_ifp->if_index) { + /* We don't have a match */ + return false; } - if (pr->action == XFRM_POLICY_BLOCK) - goto drop; } + /* + * We found an input policy, add it to the + * flow cache and drop the packet. + */ + crypto_flow_cache_add(flow_cache, pr, *mbuf, v4, + false, XFRM_POLICY_IN); + + if (pr->action == XFRM_POLICY_BLOCK) + goto drop; } } return false; @@ -2971,7 +2566,7 @@ bool crypto_policy_check_inbound_terminating(struct ifnet *in_ifp, { uint8_t proto; - if (eth_type == htons(ETHER_TYPE_IPv4)) { + if (eth_type == htons(RTE_ETHER_TYPE_IPV4)) { struct iphdr *ip = iphdr(*mbuf); proto = ip->protocol; @@ -2982,7 +2577,7 @@ bool crypto_policy_check_inbound_terminating(struct ifnet *in_ifp, } if (proto == IPPROTO_UDP) { - struct udphdr *udp = pktmbuf_mtol4(*mbuf, struct udphdr *); + struct udphdr *udp = dp_pktmbuf_mtol4(*mbuf, struct udphdr *); if (udp->uh_dport == htons(IKE_PORT)) return false; @@ -2995,29 +2590,27 @@ bool crypto_policy_check_inbound_terminating(struct ifnet *in_ifp, * For a given reqid, find the matching output policy and retrieve the * virtual feature point interface, if any. */ -struct ifnet *crypto_policy_feat_attach_by_reqid(uint32_t reqid) +struct ifnet *crypto_policy_feat_attach_by_reqid(struct crypto_vrf_ctx *vrf_ctx, + uint32_t reqid) { struct cds_lfht_iter iter; struct cds_lfht_node *node; + struct cds_lfht *ht; - cds_lfht_first(output_policy_rule_tag_ht, &iter); + ht = vrf_ctx->output_policy_rule_sel_ht; + + cds_lfht_first(ht, &iter); while ((node = cds_lfht_iter_get_node(&iter)) != NULL) { struct policy_rule *pr; - pr = caa_container_of(node, struct policy_rule, tag_ht_node); + pr = caa_container_of(node, struct policy_rule, sel_ht_node); - if (pr->reqid == reqid) { - if (pr->sel.family == AF_INET) - return pr->feat_attach ? - nh4_get_ifp(&pr->feat_attach->next.nh) : - NULL; - else - return pr->feat_attach ? - nh6_get_ifp(&pr->feat_attach->next.nh6) : - NULL; - } - cds_lfht_next(output_policy_rule_tag_ht, &iter); + if (pr->reqid == reqid) + return pr->feat_attach ? + dp_nh_get_ifp(&pr->feat_attach->nh) : NULL; + cds_lfht_next(ht, &iter); } + return NULL; } @@ -3028,10 +2621,10 @@ void crypto_policy_update_pending_if(struct ifnet *ifp) struct cds_lfht *bind_table; struct cds_lfht_iter iter; struct cds_lfht_node *node; + int rc; - bind_table = bind_table_vrf_get(vrfid); - - if (!bind_table) { + rc = bind_table_vrf_get(vrfid, &bind_table); + if (rc < 0) { POLICY_ERR("Failed to get binding table for if walk\n"); return; } @@ -3051,27 +2644,52 @@ void crypto_policy_update_pending_if(struct ifnet *ifp) } } +static int policy_feat_detach_internal(vrfid_t vrfid, + const struct xfrm_selector *sel, + struct s2s_binding *bind) +{ + struct policy_rule *pr; + struct xfrm_mark mark; + int rc; + if (bind) { + rc = policy_bind_del(bind); + if (rc < 0) + return rc; + } + mark.v = mark.m = 0; + rc = policy_rule_find_by_selector(vrfid, sel, &mark, XFRM_POLICY_OUT, + &pr); + + if (rc == 0 && pr) { + crypto_sadb_feat_attach_in(pr->reqid, NULL); + policy_feat_attach_destroy(pr); + } + return 0; +} + /* Unbind a policy and virtual feature point. */ static int policy_feat_detach(vrfid_t vrfid, const struct xfrm_selector *sel, uint ifindex __unused) { - struct policy_rule *pr; - struct xfrm_mark mark; struct s2s_binding *bind; bind = policy_bind_lookup(vrfid, sel); - if (bind) - policy_bind_del(bind); + return policy_feat_detach_internal(vrfid, sel, bind); +} + +void policy_feat_flush_vrf(struct crypto_vrf_ctx *vrf_ctx) +{ + struct s2s_binding *bind; + struct cds_lfht_iter iter; - mark.v = mark.m = 0; - pr = policy_rule_find_by_selector(vrfid, sel, &mark, XFRM_POLICY_OUT); + POLICY_DEBUG("Flush all feature bindings for VRF %d\n", + vrf_ctx->vrfid); - if (pr) { - crypto_sadb_feat_attach_in(pr->reqid, NULL); - policy_feat_attach_destroy(pr); + cds_lfht_for_each_entry(vrf_ctx->s2s_bind_hash_table, + &iter, bind, bind_ht_node) { + policy_feat_detach_internal(vrf_ctx->vrfid, &bind->sel, bind); } - return 0; } /* Bind a policy and virtual feature point. */ @@ -3081,6 +2699,7 @@ static int policy_feat_attach(vrfid_t vrfid, const struct xfrm_selector *sel, struct s2s_binding *bind; struct cds_lfht_node *node; struct cds_lfht *bind_table; + int rc; bind = malloc(sizeof(*bind)); @@ -3091,12 +2710,11 @@ static int policy_feat_attach(vrfid_t vrfid, const struct xfrm_selector *sel, cds_lfht_node_init(&bind->bind_ht_node); - bind_table = bind_table_vrf_get(vrfid); - - if (!bind_table) { + rc = bind_table_vrf_get(vrfid, &bind_table); + if (rc < 0) { POLICY_ERR("Failed to get binding table for add\n"); free(bind); - return -ENOENT; + return rc; } node = cds_lfht_add_unique(bind_table, @@ -3107,9 +2725,11 @@ static int policy_feat_attach(vrfid_t vrfid, const struct xfrm_selector *sel, /* existing binding, use it instead of the created one */ free(bind); bind = caa_container_of(node, struct s2s_binding, bind_ht_node); - } else - bind_table_vrf_inc(vrfid); - + } else { + rc = bind_table_vrf_inc(vrfid); + if (rc < 0) + POLICY_ERR("Failed inc vtf binding\n"); + } bind->sel = *sel; bind->ifindex = ifindex; bind->vrfid = vrfid; @@ -3129,7 +2749,7 @@ static int policy_feat_attach(vrfid_t vrfid, const struct xfrm_selector *sel, * [sel if] * * The [sel if] is the ifindex in the selector, if set. If not set then this - * value will be 0. If it is the ifindex of a vrfmaster, then we will use 0 + * value will be 0. If it is the ifindex of a vrf, then we will use 0 * instead (like we do when creating policies). If this arg does not exist then * we will assume it is 0. */ @@ -3155,18 +2775,18 @@ static int crypto_policy_cmd_handler(struct pb_msg *msg) } vrf_id = cp_msg->vrf; - vrf = vrf_get_rcu_from_external(vrf_id); + vrf = dp_vrf_get_rcu_from_external(vrf_id); if (vrf) vrf_id = vrf->v_id; memset(&sel, 0, sizeof(sel)); struct ip_addr daddr, saddr; - if (protobuf_get_ipaddr(cp_msg->sel_daddr, &daddr)) { + if (dp_protobuf_get_ipaddr(cp_msg->sel_daddr, &daddr)) { rc = -1; goto done; } - if (protobuf_get_ipaddr(cp_msg->sel_saddr, &saddr)) { + if (dp_protobuf_get_ipaddr(cp_msg->sel_saddr, &saddr)) { rc = -1; goto done; } @@ -3223,8 +2843,8 @@ static int crypto_policy_cmd_handler(struct pb_msg *msg) if (cp_msg->has_sel_ifindex) { sel.ifindex = cp_msg->sel_ifindex; - ifp = ifnet_byifindex(sel.ifindex); - if (ifp && ifp->if_type == IFT_VRFMASTER) + ifp = dp_ifnet_byifindex(sel.ifindex); + if (ifp && ifp->if_type == IFT_VRF) sel.ifindex = 0; } else sel.ifindex = 0; @@ -3308,7 +2928,7 @@ crypto_incmpl_xfrm_pol_free(struct rcu_head *head) } /* - * Add an incomplete policy (waiting on the vrf master). If we already have + * Add an incomplete policy (waiting on the vrf). If we already have * an entry for the key (selector + mark) then update the message. */ void crypto_incmpl_xfrm_policy_add(uint32_t ifindex __unused, @@ -3403,14 +3023,18 @@ void crypto_incmpl_policy_make_complete(void) { struct cds_lfht_iter iter; struct crypto_incmpl_xfrm_policy *pol; + struct xfrm_client_aux_data aux; + vrfid_t vrf_id = VRF_DEFAULT_ID; + aux.vrf = &vrf_id; crypto_incmpl_xfrm_pol_stats.if_complete++; cds_lfht_for_each_entry(crypto_incmpl_policy, &iter, pol, hash_node) { - rtnl_process_xfrm(pol->nlh, &vrf_id); + rtnl_process_xfrm(pol->nlh, &aux); } + crypto_npf_cfg_commit_flush(); } PB_REGISTER_CMD(crypto_policy_cmd) = { diff --git a/src/crypto/crypto_policy.h b/src/crypto/crypto_policy.h index 6c20852c..d65d45cb 100644 --- a/src/crypto/crypto_policy.h +++ b/src/crypto/crypto_policy.h @@ -29,24 +29,22 @@ struct rte_mbuf; int crypto_policy_add(const struct xfrm_userpolicy_info *usr_policy, const xfrm_address_t *dst, const struct xfrm_user_tmpl *tmpl, - const struct xfrm_mark *mark, vrfid_t vrfid); + const struct xfrm_mark *mark, vrfid_t vrfid, + uint32_t seq, bool *send_ack); int crypto_policy_update(const struct xfrm_userpolicy_info *usr_policy, const xfrm_address_t *dst, const struct xfrm_user_tmpl *tmpl, - const struct xfrm_mark *mark, vrfid_t vrfid); -void crypto_policy_delete(const struct xfrm_userpolicy_id *id, - const struct xfrm_mark *mark, vrfid_t vrfid); + const struct xfrm_mark *mark, vrfid_t vrfid, + uint32_t seq, bool *send_ack); +int crypto_policy_delete(const struct xfrm_userpolicy_id *id, + const struct xfrm_mark *mark, vrfid_t vrfid, + uint32_t seq, bool *send_ack); +struct crypto_vrf_ctx; +void crypto_policy_flush_vrf(struct crypto_vrf_ctx *vrf_ctx); void crypto_policy_update_pending_if(struct ifnet *ifp); int crypto_policy_init(void); -/* - * Check if outbound policy is active and return af/address if true - */ -bool crypto_policy_outbound_active(struct ifnet *in_ifp, struct rte_mbuf **mbuf, - uint32_t *af, void **addr, - uint16_t eth_type); - void crypto_incmpl_policy_init(void); void crypto_incmpl_xfrm_policy_add(uint32_t ifindex, const struct nlmsghdr *nlh, const struct xfrm_selector *sel, @@ -55,4 +53,8 @@ void crypto_incmpl_xfrm_policy_del(uint32_t ifindex, const struct nlmsghdr *nlh, const struct xfrm_selector *sel, const struct xfrm_mark *mark); void crypto_incmpl_policy_make_complete(void); + +void policy_feat_flush_vrf(struct crypto_vrf_ctx *vrf_ctx); + +void crypto_npf_cfg_commit_flush(void); #endif /* CRYPTO_POLICY_H */ diff --git a/src/crypto/crypto_policy_cache.h b/src/crypto/crypto_policy_cache.h deleted file mode 100644 index 74eacb38..00000000 --- a/src/crypto/crypto_policy_cache.h +++ /dev/null @@ -1,50 +0,0 @@ -/*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. - * Copyright (c) 2017 by Brocade Communications Systems, Inc. - * All rights reserved. - * - * SPDX-License-Identifier: LGPL-2.1-only - */ -#ifndef CRYPTO_POLICY_CACHE_H -#define CRYPTO_POLICY_CACHE_H - -#include -#include - -#include "urcu.h" -#include "util.h" - -struct policy_rule; - -#define POLICY_CACHE_SIZE 4096 - -#define POLICY_CACHE_HASH_SEED 0xDEAFCAFE - -/* - * Crypto Pkt Buffer (CPB) DB, containing pointers to all the - * per CORE CPB. - */ -struct crypto_pkt_buffer *cpbdb[RTE_MAX_LCORE]; - -struct pr_cache_hash_key { - uint32_t src; - uint32_t dst; - uint32_t proto; - vrfid_t vrfid; -}; - -struct policy_cache_rule { - struct cds_lfht_node pr_node; - struct rcu_head policy_cache_rcu; - struct policy_rule *pr; - struct pr_cache_hash_key key; - uint8_t in_rule_checked:1, - in_rule_drop:1, - PR_UNUSED:6; - char SPARE[7]; - char *padding[0] __rte_cache_aligned; -}; - -void pr_cache_timer_handler(struct rte_timer *, void *arg); - -#endif /* CRYPTO_POLICY_CACHE_H */ diff --git a/src/crypto/crypto_rte_pmd.c b/src/crypto/crypto_rte_pmd.c new file mode 100644 index 00000000..131a6933 --- /dev/null +++ b/src/crypto/crypto_rte_pmd.c @@ -0,0 +1,1055 @@ +/*- + * Copyright (c) 2020-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include +#include +#include +#include + +#include "compiler.h" +#include "crypto_defs.h" +#include "vplane_log.h" +#include "compiler.h" +#include "crypto.h" +#include "crypto_internal.h" +#include "crypto_rte_pmd.h" +#include "esp.h" + +/* + * Support for 16K sessions ( = 8K tunnels ) + */ +#define CRYPTO_MAX_SESSIONS (1 << 14) + +#define CRYPTO_OP_CTX_OFFSET (sizeof(struct rte_crypto_op) + \ + sizeof(struct rte_crypto_sym_op)) + +#define CRYPTO_OP_IV_OFFSET (CRYPTO_OP_CTX_OFFSET + \ + sizeof(struct crypto_pkt_ctx **)) + +/* per session (SA) data structure used to set up operations with PMDs */ +static struct rte_mempool *crypto_session_pool; + +/* per session data structure for private driver data */ +static struct rte_mempool *crypto_priv_sess_pools[CRYPTODEV_MAX]; + +static uint8_t dev_cnts[CRYPTODEV_MAX]; + +/* per packet crypto op pool. This may eventually subsume crypto_pkt_ctx */ +static struct rte_mempool *crypto_op_pool; + +int crypto_rte_setup(void) +{ + int err = 0; + int socket = rte_lcore_to_socket_id(rte_get_master_lcore()); + + /* + * allocate generic session context pool + */ + crypto_session_pool = rte_cryptodev_sym_session_pool_create( + "crypto_session_pool", CRYPTO_MAX_SESSIONS, 0, 0, 0, socket); + if (!crypto_session_pool) { + RTE_LOG(ERR, DATAPLANE, + "Could not allocate crypto session pool\n"); + return -ENOMEM; + } + + uint16_t crypto_op_data_size = + sizeof(struct rte_crypto_sym_op) + + sizeof(struct crypto_pkt_ctx **) + CRYPTO_MAX_IV_LENGTH; + + /* + * dp_lcore_events_init gets invoked from the main thread as well + * and leads to a UT failure if the pool is not sized to take that + * into account + */ + uint16_t crypto_op_pool_size = + MAX_CRYPTO_PKT_BURST * (rte_lcore_count() + 1); + + crypto_op_pool = rte_crypto_op_pool_create("crypto_op_pool", + RTE_CRYPTO_OP_TYPE_SYMMETRIC, + crypto_op_pool_size, 0, + crypto_op_data_size, + socket); + if (!crypto_op_pool) { + RTE_LOG(ERR, DATAPLANE, "Could not set up crypto op pool\n"); + err = -ENOMEM; + goto fail; + } + + return 0; + +fail: + rte_mempool_free(crypto_session_pool); + return err; +} + +void crypto_rte_shutdown(void) +{ + rte_mempool_free(crypto_session_pool); + rte_mempool_free(crypto_op_pool); +} + +struct cipher_algo_table { + const char *name; + enum rte_crypto_cipher_algorithm cipher_algo; + uint8_t iv_len; + uint8_t block_size; +}; + +#define AES_BLOCK_SIZE 16 +#define DES3_BLOCK_SIZE 8 + +/* AES-GCM does not have padding requirements */ +#define AES_GCM_BLOCK_SIZE 1 + +static const struct cipher_algo_table cipher_algorithms[] = { + + { "aes", RTE_CRYPTO_CIPHER_AES_CBC, + IPSEC_AES_CBC_IV_SIZE, AES_BLOCK_SIZE}, + { "cbc(aes)", RTE_CRYPTO_CIPHER_AES_CBC, + IPSEC_AES_CBC_IV_SIZE, AES_BLOCK_SIZE}, + { "des3_ede", RTE_CRYPTO_CIPHER_3DES_CBC, + IPSEC_3DES_IV_SIZE, DES3_BLOCK_SIZE}, + { "cbc(des3_ede)", RTE_CRYPTO_CIPHER_3DES_CBC, + IPSEC_3DES_IV_SIZE, DES3_BLOCK_SIZE}, + { "eNULL", RTE_CRYPTO_CIPHER_NULL, + 0, 1}, + { "ecb(cipher_null)", RTE_CRYPTO_CIPHER_NULL, + 0, 1} +}; + +struct md_algo_table { + const char *name; + enum rte_crypto_auth_algorithm auth_algo; + uint32_t override_trunc_len; /* override truncation length, in bits. */ +}; + +static const struct md_algo_table md_algorithms[] = { + { "sha1", RTE_CRYPTO_AUTH_SHA1_HMAC, 96}, + { "hmac(sha1)", RTE_CRYPTO_AUTH_SHA1_HMAC, 96}, + { "hmac(sha256)", RTE_CRYPTO_AUTH_SHA256_HMAC, 0}, + { "hmac(sha384)", RTE_CRYPTO_AUTH_SHA384_HMAC, 192}, + { "hmac(sha512)", RTE_CRYPTO_AUTH_SHA512_HMAC, 256}, + { "md5", RTE_CRYPTO_AUTH_MD5_HMAC, 96}, + { "hmac(md5)", RTE_CRYPTO_AUTH_MD5_HMAC, 96}, + { "rfc4106(gcm(aes))", RTE_CRYPTO_AUTH_NULL, 0}, + { "aNULL", RTE_CRYPTO_AUTH_NULL, 0} +}; + +static const char *cryptodev_names[CRYPTODEV_MAX] = { + [CRYPTODEV_AESNI_MB] = "crypto_aesni_mb", + [CRYPTODEV_AESNI_GCM] = "crypto_aesni_gcm", + [CRYPTODEV_NULL] = "crypto_null", + [CRYPTODEV_OPENSSL] = "crypto_openssl", +}; + +static int crypto_rte_setup_aes_gcm_cipher(struct crypto_session *ctx, + const struct xfrm_algo *algo_crypt) +{ + uint16_t key_len = algo_crypt->alg_key_len / BITS_PER_BYTE; + + key_len -= AES_GCM_NONCE_LENGTH; + ctx->aead_algo = RTE_CRYPTO_AEAD_AES_GCM; + ctx->nonce_len = AES_GCM_NONCE_LENGTH; + ctx->key_len = key_len; + ctx->iv_len = AES_GCM_IV_LENGTH; + ctx->block_size = AES_GCM_BLOCK_SIZE; + + /* setup AES-GCM according to RFC4106 */ + if (key_len < 4) { + RTE_LOG(ERR, DATAPLANE, + "key_len too small: %d\n", key_len); + return -EINVAL; + } + + if (key_len != 16 && key_len != 32) { + RTE_LOG(ERR, DATAPLANE, + "Unsupported gcm(aes) key size: %d\n", + key_len); + return -EINVAL; + } + + if (key_len > ARRAY_SIZE(ctx->key)) { + RTE_LOG(ERR, DATAPLANE, + "Unexpected encryption key len: %d\n", key_len); + return -EINVAL; + } + memcpy(ctx->key, algo_crypt->alg_key, ctx->key_len); + memcpy(ctx->nonce, algo_crypt->alg_key + ctx->key_len, + ctx->nonce_len); + return 0; +} + +static int crypto_rte_set_cipher(struct crypto_session *ctx, + const struct xfrm_algo *algo_crypt) +{ + const char *algo_name = algo_crypt->alg_name; + uint16_t key_len = algo_crypt->alg_key_len / BITS_PER_BYTE; + int err; + + ctx->cipher_algo = RTE_CRYPTO_CIPHER_LIST_END; + ctx->aead_algo = RTE_CRYPTO_AEAD_LIST_END; + if (strcmp("rfc4106(gcm(aes))", algo_name) == 0) { + err = crypto_rte_setup_aes_gcm_cipher(ctx, algo_crypt); + if (err) + return err; + } else { + for (uint8_t i = 0; i < ARRAY_SIZE(cipher_algorithms); i++) + if (!strcmp(cipher_algorithms[i].name, algo_name)) { + ctx->cipher_algo = + cipher_algorithms[i].cipher_algo; + ctx->iv_len = cipher_algorithms[i].iv_len; + ctx->block_size = + cipher_algorithms[i].block_size; + break; + } + + if (ctx->cipher_algo == RTE_CRYPTO_CIPHER_LIST_END) { + RTE_LOG(ERR, DATAPLANE, "Unsupported digest algo %s\n", + algo_name); + return -EINVAL; + } + + if ((!key_len && ctx->cipher_algo != RTE_CRYPTO_CIPHER_NULL) || + key_len > CRYPTO_MAX_CIPHER_KEY_LENGTH) { + RTE_LOG(ERR, DATAPLANE, + "Invalid key length %d specified with crypto algorithm %s\n", + key_len, algo_name); + return -EINVAL; + } + + ctx->key_len = key_len; + memcpy(ctx->key, algo_crypt->alg_key, key_len); + } + + return 0; +} + +static int crypto_rte_set_auth(struct crypto_session *ctx, + const struct xfrm_algo_auth *algo_trunc_auth, + const struct xfrm_algo *algo_auth) +{ + uint16_t key_len; + const char *algo_name; + const char *alg_key; + unsigned int digest_len; + + /* + * Depending upon the source of the xfrm, either from + * strongswan direct or via the kernel, the authentication + * details are provided through different conventions. + * + * When the source is via the kernel, the kernel transposes + * the information it received from strongswan in the new + * convention into the old convention before replay to the rest + * of the system. + */ + if (algo_trunc_auth) { + key_len = algo_trunc_auth->alg_key_len / BITS_PER_BYTE; + digest_len = algo_trunc_auth->alg_trunc_len / BITS_PER_BYTE; + algo_name = algo_trunc_auth->alg_name; + alg_key = algo_trunc_auth->alg_key; + } else { + key_len = algo_auth->alg_key_len / BITS_PER_BYTE; + digest_len = key_len; + algo_name = algo_auth->alg_name; + alg_key = algo_auth->alg_key; + } + + + ctx->auth_algo = RTE_CRYPTO_AUTH_LIST_END; + for (uint8_t i = 0; i < ARRAY_SIZE(md_algorithms); i++) + if (!strcmp(md_algorithms[i].name, algo_name)) { + ctx->auth_algo = md_algorithms[i].auth_algo; + + /* Override legacy digest_len for "sha1", "md5" */ + if (md_algorithms[i].override_trunc_len) + digest_len = md_algorithms[i].override_trunc_len + / BITS_PER_BYTE; + break; + } + + if (ctx->auth_algo == RTE_CRYPTO_AUTH_LIST_END) { + RTE_LOG(ERR, DATAPLANE, "Unsupported digest algo %s\n", + algo_name); + return -EINVAL; + } + + if ((!key_len && ctx->auth_algo != RTE_CRYPTO_AUTH_NULL) || + key_len > CRYPTO_MAX_AUTH_KEY_LENGTH) { + RTE_LOG(ERR, DATAPLANE, + "Invalid key size %d specified with auth algo %s\n", + key_len, algo_name); + return -EINVAL; + } + + ctx->auth_alg_key_len = key_len; + memcpy(ctx->auth_alg_key, alg_key, key_len); + ctx->digest_len = digest_len; + + return 0; +} + +int +crypto_rte_set_session_parameters(struct crypto_session *ctx, + const struct xfrm_algo *algo_crypt, + const struct xfrm_algo_auth *algo_trunc_auth, + const struct xfrm_algo *algo_auth) +{ + int err = 0; + + err = crypto_rte_set_cipher(ctx, algo_crypt); + if (err) + return err; + + err = crypto_rte_set_auth(ctx, algo_trunc_auth, algo_auth); + return err; +} + +/* + * select PMD to create based on algorithm requirements + * Ideally, DPDK should provide an API to query capability based on driver type + * However, the DPDK API for querying capabilities requires a device to + * be created first which presents unnecessary overhead. + * Use a static method of selection for now. + * + */ +int +crypto_rte_select_pmd_type(enum rte_crypto_cipher_algorithm cipher_algo, + enum rte_crypto_aead_algorithm aead_algo, + enum cryptodev_type *dev_type, bool *setup_openssl) +{ + if (aead_algo == RTE_CRYPTO_AEAD_AES_GCM) { + *dev_type = CRYPTODEV_AESNI_GCM; + *setup_openssl = false; + return 0; + } + + switch (cipher_algo) { + case RTE_CRYPTO_CIPHER_3DES_CBC: + case RTE_CRYPTO_CIPHER_AES_CBC: + *dev_type = CRYPTODEV_AESNI_MB; + *setup_openssl = true; + break; + + case RTE_CRYPTO_CIPHER_NULL: + *dev_type = CRYPTODEV_NULL; + *setup_openssl = true; + break; + + default: + RTE_LOG(ERR, CRYPTO, "Invalid cipher %d requested\n", + cipher_algo); + return -EINVAL; + } + + return 0; +} + +/* + * array of dev ids per device type + * Used as the suffix in the device name + */ +static int8_t pmd_inst_ids[CRYPTODEV_MAX][MAX_CRYPTO_PMD]; + +static int crypto_rte_find_inst_id(enum cryptodev_type dev_type, + int *inst_id) +{ + static int first_time = 1; + int i; + + if (first_time) { + memset(pmd_inst_ids, -1, sizeof(pmd_inst_ids)); + first_time = 0; + } + + for (i = 0; i < MAX_CRYPTO_PMD; i++) { + if (pmd_inst_ids[dev_type][i] == -1) + break; + } + + if (i == MAX_CRYPTO_PMD) + return -ENOSPC; + + *inst_id = i; + return 0; +} + +static int crypto_rte_setup_priv_pool(enum cryptodev_type dev_type, + unsigned int session_size) +{ +#define POOL_NAME_LEN 50 + char pool_name[POOL_NAME_LEN]; + unsigned int socket = rte_lcore_to_socket_id(rte_get_master_lcore()); + + snprintf(pool_name, POOL_NAME_LEN, "crypto_sess_priv_pool_%d", + dev_type); + crypto_priv_sess_pools[dev_type] = + rte_mempool_create(pool_name, CRYPTO_MAX_SESSIONS, session_size, + 0, 0, NULL, NULL, NULL, NULL, socket, 0); + if (!crypto_priv_sess_pools[dev_type]) { + RTE_LOG(ERR, DATAPLANE, + "Could not allocate crypto session private pool for socket %d, dev %s\n", + socket, cryptodev_names[dev_type]); + return -ENOMEM; + } + return 0; +} + +static void crypto_rte_destroy_priv_pool(enum cryptodev_type dev_type) +{ + if (crypto_priv_sess_pools[dev_type]) { + rte_mempool_free(crypto_priv_sess_pools[dev_type]); + crypto_priv_sess_pools[dev_type] = NULL; + } +} + +int crypto_rte_create_pmd(int cpu_socket, uint8_t dev_id, + enum cryptodev_type dev_type, char dev_name[], + uint8_t max_name_len, int *rte_dev_id) +{ +#define ARGS_LEN 128 + int err; + char args[ARGS_LEN]; + int inst_id = 0; + unsigned int session_size; + struct rte_cryptodev_config conf = { + .nb_queue_pairs = MAX_CRYPTO_XFRM, + .socket_id = cpu_socket + }; + + /* look for next available id for this pmd type */ + err = crypto_rte_find_inst_id(dev_type, &inst_id); + if (err) { + RTE_LOG(ERR, DATAPLANE, + "Could not find instance id for dev type %d\n", + dev_type); + return err; + } + + /* create new device */ + snprintf(dev_name, max_name_len, "%s%d", cryptodev_names[dev_type], + inst_id); + snprintf(args, ARGS_LEN, "socket_id=%d", cpu_socket); + + err = rte_vdev_init(dev_name, args); + if (err != 0) { + RTE_LOG(ERR, DATAPLANE, "Could not create PMD %s\n", + dev_name); + return err; + } + + *rte_dev_id = rte_cryptodev_get_dev_id(dev_name); + + session_size = + rte_cryptodev_sym_get_private_session_size(*rte_dev_id); + + if (!crypto_priv_sess_pools[dev_type]) { + err = crypto_rte_setup_priv_pool(dev_type, session_size); + if (err) + goto fail; + } + + err = rte_cryptodev_configure(*rte_dev_id, &conf); + if (err != 0) { + RTE_LOG(ERR, DATAPLANE, + "Failed to configure crypto device %s : %s\n", + dev_name, strerror(-err)); + goto fail; + } + + struct rte_cryptodev_qp_conf qp_conf = { + .nb_descriptors = 2048, + .mp_session = crypto_session_pool, + .mp_session_private = crypto_priv_sess_pools[dev_type] + }; + + for (int i = MIN_CRYPTO_XFRM; i < MAX_CRYPTO_XFRM; i++) { + err = rte_cryptodev_queue_pair_setup(*rte_dev_id, i, + &qp_conf, + cpu_socket); + if (err != 0) { + RTE_LOG(ERR, DATAPLANE, + "Failed to set up queue pair %d for crypto device %s : %s\n", + i, dev_name, strerror(err)); + goto fail; + } + } + + err = rte_cryptodev_start(*rte_dev_id); + if (err != 0) { + RTE_LOG(ERR, DATAPLANE, + "Failed to start crypto device %s\n", dev_name); + goto fail; + } + + pmd_inst_ids[dev_type][inst_id] = dev_id; + dev_cnts[dev_type]++; + + return err; + +fail: + if (!dev_cnts[dev_type]) + crypto_rte_destroy_priv_pool(dev_type); + rte_vdev_uninit(dev_name); + return err; +} + +/* + * destroy specified PMD + */ +int crypto_rte_destroy_pmd(enum cryptodev_type dev_type, char dev_name[], + int dev_id) +{ + int err = 0, i, rte_dev_id; + + for (i = 0; i < MAX_CRYPTO_PMD; i++) { + if (pmd_inst_ids[dev_type][i] == dev_id) { + pmd_inst_ids[dev_type][i] = -1; + break; + } + } + + if (i == MAX_CRYPTO_PMD) { + RTE_LOG(ERR, DATAPLANE, + "Could not find instance id for pmd %s, dev_id %d\n", + dev_name, dev_id); + return -EINVAL; + } + + rte_dev_id = rte_cryptodev_get_dev_id(dev_name); + if (rte_dev_id < 0) { + RTE_LOG(ERR, DATAPLANE, "Could not find id for device %s\n", + dev_name); + return -ENOENT; + } + + rte_cryptodev_stop(rte_dev_id); + + err = rte_vdev_uninit(dev_name); + if (err) { + RTE_LOG(ERR, DATAPLANE, "Could not uninit device %s\n", + dev_name); + return err; + } + + dev_cnts[dev_type]--; + if (!dev_cnts[dev_type]) + crypto_rte_destroy_priv_pool(dev_type); + + return err; +} + +static void +crypto_rte_setup_xform_chain(struct crypto_session *session, + struct rte_crypto_sym_xform *cipher_xform, + struct rte_crypto_sym_xform *auth_xform, + struct rte_crypto_sym_xform **xform_chain) +{ + int direction = session->direction; + static enum rte_crypto_cipher_operation cipher_ops[2] = { + [XFRM_POLICY_OUT] = RTE_CRYPTO_CIPHER_OP_ENCRYPT, + [XFRM_POLICY_IN] = RTE_CRYPTO_CIPHER_OP_DECRYPT + }; + static enum rte_crypto_auth_operation auth_ops[2] = { + [XFRM_POLICY_OUT] = RTE_CRYPTO_AUTH_OP_GENERATE, + [XFRM_POLICY_IN] = RTE_CRYPTO_AUTH_OP_VERIFY + }; + static enum rte_crypto_aead_operation aead_ops[2] = { + [XFRM_POLICY_OUT] = RTE_CRYPTO_AEAD_OP_ENCRYPT, + [XFRM_POLICY_IN] = RTE_CRYPTO_AEAD_OP_DECRYPT + }; + + if (session->aead_algo == RTE_CRYPTO_AEAD_AES_GCM) { + cipher_xform->type = RTE_CRYPTO_SYM_XFORM_AEAD; + cipher_xform->aead.op = aead_ops[direction]; + cipher_xform->aead.algo = session->aead_algo; + cipher_xform->aead.aad_length = 8; /* no ESN support yet */ + cipher_xform->aead.iv.offset = CRYPTO_OP_IV_OFFSET; + cipher_xform->aead.iv.length = + session->iv_len + session->nonce_len; + cipher_xform->aead.key.data = session->key; + cipher_xform->aead.key.length = session->key_len; + cipher_xform->aead.digest_length = session->digest_len; + cipher_xform->next = NULL; + *xform_chain = cipher_xform; + } else { + /* set up data for cipher */ + cipher_xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER; + cipher_xform->cipher.op = cipher_ops[direction]; + cipher_xform->cipher.algo = session->cipher_algo; + cipher_xform->cipher.key.data = session->key; + cipher_xform->cipher.key.length = session->key_len; + cipher_xform->cipher.iv.length = + session->iv_len + session->nonce_len; + cipher_xform->cipher.iv.offset = CRYPTO_OP_IV_OFFSET; + + /* set up data for authentication */ + auth_xform->type = RTE_CRYPTO_SYM_XFORM_AUTH; + auth_xform->auth.op = auth_ops[direction]; + auth_xform->auth.algo = session->auth_algo; + auth_xform->auth.key.data = + (const uint8_t *)session->auth_alg_key; + auth_xform->auth.key.length = session->auth_alg_key_len; + auth_xform->auth.digest_length = session->digest_len; + + /* set up transform chain */ + if (direction == XFRM_POLICY_IN) { + auth_xform->next = cipher_xform; + cipher_xform->next = NULL; + *xform_chain = auth_xform; + } else { + cipher_xform->next = auth_xform; + auth_xform->next = NULL; + *xform_chain = cipher_xform; + } + } +} + +int crypto_rte_setup_session(struct crypto_session *session, + enum cryptodev_type dev_type, uint8_t rte_cdev_id) +{ + struct rte_crypto_sym_xform cipher_xform, auth_xform, *xform_chain; + int err = 0; + + crypto_rte_setup_xform_chain(session, &cipher_xform, &auth_xform, + &xform_chain); + + session->rte_session = + rte_cryptodev_sym_session_create(crypto_session_pool); + if (!session->rte_session) { + RTE_LOG(ERR, DATAPLANE, "Could not create cryptodev session\n"); + return -ENOMEM; + } + + err = rte_cryptodev_sym_session_init( + rte_cdev_id, session->rte_session, xform_chain, + crypto_priv_sess_pools[dev_type]); + if (err) { + RTE_LOG(ERR, DATAPLANE, + "Could not initialize cryptodev session\n"); + rte_cryptodev_sym_session_free(session->rte_session); + session->rte_session = NULL; + } + + return err; +} + +int crypto_rte_destroy_session(struct crypto_session *session, + uint8_t rte_cdev_id) +{ + int err; + + if (!session->rte_session) + return 0; + + rte_cryptodev_sym_session_clear(rte_cdev_id, session->rte_session); + err = rte_cryptodev_sym_session_free(session->rte_session); + if (err) { + RTE_LOG(ERR, DATAPLANE, + "Failed to free cryptodev session : %s\n", + strerror(-err)); + return err; + } + + session->rte_session = NULL; + return err; +} + +int crypto_rte_op_alloc(struct rte_crypto_op *cops[], uint16_t count) +{ + uint16_t i; + + if (rte_crypto_op_bulk_alloc(crypto_op_pool, + RTE_CRYPTO_OP_TYPE_SYMMETRIC, + cops, count) != count) + return -ENOMEM; + + for (i = 0; i < count; i++) + cops[i]->sess_type = RTE_CRYPTO_OP_WITH_SESSION; + + return 0; +} + +void crypto_rte_op_free(struct rte_crypto_op *cops[], uint16_t count) +{ + for (uint16_t i = 0; i < count; i++) + rte_crypto_op_free(cops[i]); +} + +static inline int +crypto_rte_op_assoc_session(struct rte_crypto_op *cop, + struct crypto_session *session) +{ + int err; + + cop->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED; + err = rte_crypto_op_attach_sym_session(cop, + session->rte_session); + return err; +} + +struct crypto_rte_pkt_batch { + uint8_t cdev_id; + uint16_t batch_size; + enum crypto_xfrm qid; + struct rte_crypto_op *cop_arr[MAX_CRYPTO_PKT_BURST]; +}; + +static inline +void crypto_rte_process_op_batch(struct crypto_rte_pkt_batch *batch) +{ + uint8_t enqueued = 0, dequeued = 0, tmp_cnt; + struct crypto_pkt_ctx *ctx; + struct rte_crypto_op *cop; + + while (dequeued < batch->batch_size) { + tmp_cnt = rte_cryptodev_enqueue_burst( + batch->cdev_id, batch->qid, + &batch->cop_arr[enqueued], + batch->batch_size - enqueued); + enqueued += tmp_cnt; + + tmp_cnt = rte_cryptodev_dequeue_burst( + batch->cdev_id, batch->qid, + &batch->cop_arr[dequeued], + batch->batch_size - dequeued); + dequeued += tmp_cnt; + + if (!tmp_cnt) + break; + } + + if (unlikely(dequeued < batch->batch_size)) + IPSEC_CNT_INC_BY(CRYPTO_OP_FAILED, + (batch->batch_size - dequeued)); + + for (tmp_cnt = 0; tmp_cnt < dequeued; tmp_cnt++) { + cop = batch->cop_arr[tmp_cnt]; + if (likely(cop->status == + RTE_CRYPTO_OP_STATUS_SUCCESS)) { + + ctx = *(rte_crypto_op_ctod_offset( + cop, + struct crypto_pkt_ctx **, + CRYPTO_OP_CTX_OFFSET)); + ctx->status = 0; + } else + IPSEC_CNT_INC(CRYPTO_OP_FAILED); + } + batch->batch_size = 0; +} + +static inline void +crypto_rte_iv_fill(uint8_t *iv, struct crypto_session *s, + char *cur_iv) +{ + memcpy(iv, s->nonce, s->nonce_len); + memcpy(iv + s->nonce_len, cur_iv, s->iv_len); +} + +static inline void +crypto_rte_sop_ciph_auth_prepare(struct rte_crypto_sym_op *sop, + uint32_t l3_hdr_len, uint8_t udp_len, + uint32_t esp_len, uint32_t payload_len, + uint16_t icv_ofs) +{ + struct rte_mbuf *m = sop->m_src; + uint16_t esp_start = dp_pktmbuf_l2_len(m) + l3_hdr_len + udp_len; + + sop->cipher.data.offset = esp_start + esp_len; + sop->cipher.data.length = payload_len; + + sop->auth.data.offset = esp_start; + sop->auth.data.length = esp_len + payload_len; + + sop->auth.digest.data = rte_pktmbuf_mtod_offset(m, void*, icv_ofs); + sop->auth.digest.phys_addr = rte_pktmbuf_iova_offset(m, icv_ofs); +} + +/* + * adjust last segment if necessary to hold the entire ICV + */ +static inline void +crypto_rte_fixup_icv(struct rte_mbuf *m, uint16_t icv_len) +{ + struct rte_mbuf *p_mbuf, *l_mbuf; + uint8_t icv[icv_len], *data; + uint16_t icv1_len, icv2_len, icv_ofs; + + p_mbuf = NULL; + l_mbuf = m; + while (l_mbuf->next != NULL) { + p_mbuf = l_mbuf; + l_mbuf = l_mbuf->next; + } + + if (l_mbuf->data_len >= icv_len) + return; + + icv2_len = icv_len - l_mbuf->data_len; + icv1_len = icv_len - icv2_len; + icv_ofs = p_mbuf->data_len - icv1_len; + data = rte_pktmbuf_mtod_offset(p_mbuf, uint8_t *, icv_ofs); + memcpy(icv, data, icv1_len); + data = rte_pktmbuf_mtod(l_mbuf, uint8_t *); + memcpy(&icv[icv1_len], data, icv2_len); + memcpy(data, icv, icv_len); + l_mbuf->data_len += icv1_len; + l_mbuf->pkt_len += icv1_len; + p_mbuf->data_len -= icv1_len; + p_mbuf->pkt_len -= icv1_len; +} + + +/* + * helper function to fill crypto_sym op for aead algorithms + */ +static inline void +crypto_rte_sop_aead_prepare(struct rte_crypto_sym_op *sop, + uint32_t l3_hdr_len, uint8_t udp_len, + uint32_t esp_len, uint32_t payload_len, + uint16_t icv_len, bool encrypt) +{ + struct rte_mbuf *m = sop->m_src, *last_seg = m; + uint16_t esp_start = dp_pktmbuf_l2_len(m) + l3_hdr_len + udp_len; + uint16_t icv_ofs; + + sop->aead.data.offset = esp_start + esp_len; + sop->aead.data.length = payload_len; + + sop->aead.aad.data = rte_pktmbuf_mtod_offset(m, void *, esp_start); + sop->aead.aad.phys_addr = rte_pktmbuf_iova_offset(m, esp_start); + + if (unlikely(m->nb_segs > 1)) { + if (!encrypt) + crypto_rte_fixup_icv(m, icv_len); + + last_seg = rte_pktmbuf_lastseg(m); + } + icv_ofs = last_seg->data_len - icv_len; + sop->aead.digest.data = rte_pktmbuf_mtod_offset(last_seg, void *, + icv_ofs); + sop->aead.digest.phys_addr = + rte_pktmbuf_iova_offset(last_seg, icv_ofs); +} + +/* + * setup crypto op and crypto sym op for ESP inbound packet. + */ +static inline int +crypto_rte_inbound_cop_prepare(struct rte_crypto_op *cop, + struct crypto_session *session, + struct rte_mbuf *m, uint32_t l3_hdr_len, + uint8_t udp_len, uint32_t esp_len, + char *iv, uint32_t payload_len) +{ + int err = 0; + struct rte_crypto_sym_op *sop; + uint8_t *ivc; + uint16_t icv_ofs, icv_len; + + memcpy(session->iv, iv, session->iv_len); + icv_len = crypto_session_digest_len(session); + icv_ofs = rte_pktmbuf_pkt_len(m) - icv_len; + + /* fill sym op fields */ + sop = cop->sym; + + if (session->aead_algo == RTE_CRYPTO_AEAD_AES_GCM) { + crypto_rte_sop_aead_prepare(sop, l3_hdr_len, + udp_len, esp_len, + payload_len, icv_len, false); + + /* fill AAD IV (located inside crypto op) */ + ivc = rte_crypto_op_ctod_offset(cop, uint8_t *, + CRYPTO_OP_IV_OFFSET); + crypto_rte_iv_fill(ivc, session, iv); + return err; + } + + switch (session->cipher_algo) { + case RTE_CRYPTO_CIPHER_AES_CBC: + case RTE_CRYPTO_CIPHER_3DES_CBC: + crypto_rte_sop_ciph_auth_prepare(sop, l3_hdr_len, + udp_len, esp_len, + payload_len, icv_ofs); + + /* copy iv from the input packet to the cop */ + ivc = rte_crypto_op_ctod_offset( + cop, uint8_t *, CRYPTO_OP_IV_OFFSET); + crypto_rte_iv_fill(ivc, session, iv); + break; + case RTE_CRYPTO_CIPHER_NULL: + break; + + default: + err = -EINVAL; + } + + return err; +} + +/* + * setup crypto op and crypto sym op for ESP outbound packet. + */ +static inline int +crypto_rte_outbound_cop_prepare(struct rte_crypto_op *cop, + struct crypto_session *session, + struct rte_mbuf *m, uint32_t l3_hdr_len, + uint8_t udp_len, uint32_t esp_len, + char *iv, uint32_t payload_len) +{ + int err = 0; + struct rte_crypto_sym_op *sop; + uint8_t *ivc; + uint16_t icv_ofs, icv_len; + + icv_ofs = dp_pktmbuf_l2_len(m) + l3_hdr_len + udp_len + esp_len + + payload_len; + icv_len = crypto_session_digest_len(session); + + /* fill sym op fields */ + sop = cop->sym; + + if (session->aead_algo == RTE_CRYPTO_AEAD_AES_GCM) { + crypto_rte_sop_aead_prepare(sop, l3_hdr_len, udp_len, + esp_len, payload_len, + icv_len, true); + + /* fill AAD IV (located inside crypto op) */ + ivc = rte_crypto_op_ctod_offset(cop, uint8_t *, + CRYPTO_OP_IV_OFFSET); + crypto_rte_iv_fill(ivc, session, iv); + return err; + } + + switch (session->cipher_algo) { + case RTE_CRYPTO_CIPHER_AES_CBC: + case RTE_CRYPTO_CIPHER_3DES_CBC: + crypto_rte_sop_ciph_auth_prepare(sop, l3_hdr_len, + udp_len, esp_len, + payload_len, + icv_ofs); + + /* copy iv from the input packet to the cop */ + ivc = rte_crypto_op_ctod_offset( + cop, uint8_t *, CRYPTO_OP_IV_OFFSET); + crypto_rte_iv_fill(ivc, session, iv); + break; + + case RTE_CRYPTO_CIPHER_NULL: + break; + + default: + err = -EINVAL; + } + + return err; +} + +ALWAYS_INLINE uint16_t +crypto_rte_xform_packets(struct crypto_pkt_ctx *cctx_arr[], uint16_t count) +{ + int err; + struct crypto_session *session; + enum crypto_xfrm qid; + uint16_t i, text_len, hdr_len; + struct crypto_rte_pkt_batch pkt_batch; + struct crypto_pkt_ctx *cctx, **ctx_ptr; + bool encrypt; + struct rte_crypto_op *cop; + struct crypto_pkt_buffer *cpb = cpbdb[dp_lcore_id()]; + uint16_t bad_idx[count], bad_cnt = 0; + + pkt_batch.cdev_id = 0; + pkt_batch.qid = 0; + pkt_batch.batch_size = 0; + + assert(count <= MAX_CRYPTO_PKT_BURST); + + for (i = 0; i < count; i++) { + crypto_prefetch_ctx(cctx_arr, count, i); + + crypto_prefetch_ops(i, count); + + cctx = cctx_arr[i]; + session = cctx->sa->session; + encrypt = (cctx->sa->dir == CRYPTO_DIR_OUT); + + if (unlikely(cctx->mbuf->next && session->cipher_init)) { + crypto_rte_process_op_batch(&pkt_batch); + hdr_len = encrypt ? cctx->out_hdr_len : cctx->iphlen; + text_len = encrypt ? cctx->plaintext_size : + cctx->ciphertext_len; + err = esp_generate_chain(cctx->sa, cctx->mbuf, + hdr_len, cctx->esp, cctx->iv, + text_len + cctx->esp_len, + encrypt); + if (err) + cctx_arr[i]->status = -1; + continue; + } + + cop = cpb->cops[i]; + + err = crypto_rte_op_assoc_session(cop, session); + if (unlikely(err)) { + cctx->status = -1; + IPSEC_CNT_INC(CRYPTO_OP_ASSOC_FAILED); + continue; + } + cop->sym->m_src = cctx->mbuf; + if (encrypt) { + err = crypto_rte_outbound_cop_prepare( + cop, session, cctx->mbuf, + cctx->out_hdr_len, + cctx->sa->udp_encap, cctx->esp_len, + (char *)cctx->iv, cctx->plaintext_size); + qid = CRYPTO_ENCRYPT; + } else { + err = crypto_rte_inbound_cop_prepare( + cop, session, cctx->mbuf, cctx->iphlen, + cctx->sa->udp_encap, cctx->esp_len, + (char *)cctx->iv, cctx->ciphertext_len); + qid = CRYPTO_DECRYPT; + } + if (unlikely(err)) { + cctx->status = -1; + IPSEC_CNT_INC(CRYPTO_OP_PREPARE_FAILED); + continue; + } + + /* + * Explicitly set status to failure for each packet + * being handed to the PMD. The status will be set to 0 + * again after successful processing. This allows us to handle + * any cases of mismatch between enqueue and dequeue + */ + cctx->status = -1; + ctx_ptr = rte_crypto_op_ctod_offset(cop, + struct crypto_pkt_ctx **, + CRYPTO_OP_CTX_OFFSET); + *ctx_ptr = cctx; + + crypto_prefetch_ctx_data(cctx_arr, count, i); + + if (pkt_batch.cdev_id != cctx->sa->rte_cdev_id || + pkt_batch.qid != qid) { + crypto_rte_process_op_batch(&pkt_batch); + pkt_batch.cdev_id = cctx->sa->rte_cdev_id; + pkt_batch.qid = qid; + } + pkt_batch.cop_arr[pkt_batch.batch_size] = cop; + pkt_batch.batch_size++; + } + crypto_rte_process_op_batch(&pkt_batch); + for (i = 0; i < count; i++) + if (cctx_arr[i]->status < 0) + bad_idx[bad_cnt++] = i; + move_bad_mbufs(cctx_arr, count, bad_idx, bad_cnt); + return count - bad_cnt; +} diff --git a/src/crypto/crypto_rte_pmd.h b/src/crypto/crypto_rte_pmd.h new file mode 100644 index 00000000..3b70050d --- /dev/null +++ b/src/crypto/crypto_rte_pmd.h @@ -0,0 +1,73 @@ +/*- + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#ifndef CRYPTO_RTE_PMD_H + +#define CRYPTO_RTE_PMD_H + +#include +#include +#include + +#define BITS_PER_BYTE 8 + +struct crypto_session; +struct sadb_sa; +struct crypto_pkt_ctx; + +int crypto_rte_setup(void); + +void crypto_rte_shutdown(void); + +int +crypto_rte_set_session_parameters(struct crypto_session *ctx, + const struct xfrm_algo *algo_crypt, + const struct xfrm_algo_auth *algo_trunc_auth, + const struct xfrm_algo *algo_auth); + +/* + * Crypto devices to instantiate in descending order of priority. + * Whenever there is a need to instantiate a crypto device, the + * available devices/drivers are checked starting with the first in this + * list. + */ +enum cryptodev_type { + CRYPTODEV_MIN, + CRYPTODEV_AESNI_GCM = CRYPTODEV_MIN, + CRYPTODEV_AESNI_MB, + CRYPTODEV_NULL, + CRYPTODEV_OPENSSL, + CRYPTODEV_MAX +}; + +#define MAX_CRYPTO_PMD 128 + +int crypto_rte_select_pmd_type(enum rte_crypto_cipher_algorithm cipher_algo, + enum rte_crypto_aead_algorithm aead_algo, + enum cryptodev_type *dev_type, + bool *setup_openssl); + +int crypto_rte_create_pmd(int cpu_socket, uint8_t pmd_dev_id, + enum cryptodev_type dev_type, char dev_name[], + uint8_t max_name_len, int *rte_dev_id); + +int crypto_rte_destroy_pmd(enum cryptodev_type dev_type, char dev_name[], + int pmd_dev_id); + +int crypto_rte_setup_session(struct crypto_session *session, + enum cryptodev_type dev_type, + uint8_t rte_cdev_id); + +int crypto_rte_destroy_session(struct crypto_session *session, + uint8_t rte_cdev_id); + +int crypto_rte_op_alloc(struct rte_crypto_op *cops[], uint16_t count); + +void crypto_rte_op_free(struct rte_crypto_op *cops[], uint16_t count); + +uint16_t crypto_rte_xform_packets(struct crypto_pkt_ctx *ctx_arr[], + uint16_t count); + +#endif diff --git a/src/crypto/crypto_sadb.c b/src/crypto/crypto_sadb.c index dfec8aaa..16053af9 100644 --- a/src/crypto/crypto_sadb.c +++ b/src/crypto/crypto_sadb.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -28,13 +28,15 @@ #include "esp.h" #include "if_var.h" #include "json_writer.h" +#include "lcore_sched.h" #include "route.h" #include "route_v6.h" #include "urcu.h" #include "util.h" #include "vplane_debug.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" +#include "xfrm_client.h" #define SADB_DEBUG(args...) \ DP_DEBUG(CRYPTO, DEBUG, SADB, args) @@ -56,14 +58,20 @@ struct sadb_peer { struct cds_lfht_node ht_node; struct cds_list_head sa_list; - xfrm_address_t dst; - uint16_t family; - char SPARE[6]; - struct rcu_head peer_rcu; - /* Cacheline1 -8 bytes */ + uint32_t req_id; struct crypto_overhead_list observers; + char SPARE[12]; + /* --- cacheline 1 boundary (64 bytes) --- */ + struct rcu_head peer_rcu; + }; +/* peer_rcu and observers are both control plane fields. + * Ensure that the other fields do not reach into the 2nd cache line. + */ +static_assert(offsetof(struct sadb_peer, SPARE) < 64, + "first cache line exceeded"); + /* * Key for hash table entries for the F(spi,dest) to * output SA hash tree lookup @@ -74,6 +82,8 @@ struct sadb_spi_out_key { uint16_t family; }; +static uint64_t sa_epoch; + /* * Hash seed used when hashing the spi and dest address * for an output SA lookup. @@ -84,9 +94,8 @@ static unsigned int sadb_spi_out_seed; * This is a utility structure used for looking up * a peer in hash table. */ -struct sadb_peer_key { - const xfrm_address_t *dst; - uint16_t family; +struct sadb_tunl_key { + uint32_t req_id; }; /* @@ -100,7 +109,7 @@ static struct cds_lfht *spi_in_hash_table; * the SPI) in to the hash table, not (possibly non-unique) * encrytion SAs. */ -static unsigned int sadb_spi_in_hash(uint32_t *spi_p) +static unsigned int sadb_spi_in_hash(const uint32_t *spi_p) { return *spi_p; } @@ -223,11 +232,12 @@ static int sadb_spi_out_match(struct cds_lfht_node *node, const void *key) } /* - * Used by the fast path to lookup an output (encrypt) SA by SPI and dest addr. + * Find a specific SA. */ -struct sadb_sa *sadb_lookup_sa_outbound(vrfid_t vrfid, - const xfrm_address_t *dst, - uint16_t family, uint32_t spi) +static struct sadb_sa * +sadb_lookup_sa_outbound_noblock(vrfid_t vrfid, + const xfrm_address_t *dst, + uint16_t family, uint32_t spi) { struct cds_lfht_node *node; struct cds_lfht_iter iter; @@ -252,19 +262,27 @@ struct sadb_sa *sadb_lookup_sa_outbound(vrfid_t vrfid, sa = node ? caa_container_of(node, struct sadb_sa, spi_ht_node) : NULL; + return sa; +} - if (!sa) { - IPSEC_CNT_INC(DROPPED_NO_SPI_TO_SA); - return NULL; - } +/* + * Used by the fast path to lookup an output (encrypt) SA by SPI and dest addr. + */ +struct sadb_sa *sadb_lookup_sa_outbound(vrfid_t vrfid, + const xfrm_address_t *dst, + uint16_t family, uint32_t spi) +{ + struct sadb_sa *sa; - if (sa->blocked) + sa = sadb_lookup_sa_outbound_noblock(vrfid, dst, family, spi); + if (!sa) + IPSEC_CNT_INC(DROPPED_NO_SPI_TO_SA); + else if (sa->blocked) return NULL; return sa; } - static bool sadb_add_sa_to_spi_out_hash(struct sadb_sa *sa, struct crypto_vrf_ctx *vrf_ctx) { @@ -312,41 +330,32 @@ static void sadb_remove_sa_from_spi_out_hash(struct sadb_sa *sa, } /* - * sadb_peer_hash() + * sadb_tunl_hash() * * Address hash function used to select a bucket in * the SADB hash table. */ -static unsigned long sadb_peer_hash(struct sadb_peer_key *key) +static unsigned long sadb_tunl_hash(struct sadb_tunl_key *key) { - const xfrm_address_t *dst = key->dst; - unsigned long h; - - if (key->family == AF_INET) - h = dst->a4; - else - h = dst->a6[0] + dst->a6[1] + dst->a6[2] + dst->a6[3]; - - return h; + return key->req_id; } /* - * sadb_peer_match() + * sadb_tunl_match() * * Comparison function used when searching the peer hash table. * Returns TRUE if the peer containing node matches the search key. */ -static int sadb_peer_match(struct cds_lfht_node *node, const void *key) +static int sadb_tunl_match(struct cds_lfht_node *node, const void *key) { - const struct sadb_peer_key *search_key; + const struct sadb_tunl_key *search_key; const struct sadb_peer *peer; - search_key = (const struct sadb_peer_key *)key; + search_key = (const struct sadb_tunl_key *)key; peer = caa_container_of(node, const struct sadb_peer, ht_node); - return ((peer->family == search_key->family) && - xfrm_addr_eq(&peer->dst, search_key->dst, peer->family)); + return (peer->req_id == search_key->req_id); } /* @@ -358,76 +367,63 @@ static int sadb_peer_match(struct cds_lfht_node *node, const void *key) * This can be called from any thread that is registered as * an RCU read and is in a RCU read critical section */ -static struct sadb_peer *sadb_lookup_peer(const xfrm_address_t *dst, - uint16_t family, vrfid_t vrfid) +static int sadb_lookup_peer(vrfid_t vrfid, + uint32_t req_id, struct sadb_peer **peer_ret) { - struct sadb_peer_key search_key; + struct sadb_tunl_key search_key; struct crypto_vrf_ctx *vrf_ctx; struct cds_lfht_node *node; struct cds_lfht_iter iter; + *peer_ret = NULL; + vrf_ctx = crypto_vrf_find(vrfid); if (!vrf_ctx) - return NULL; + return -EINVAL; - search_key.family = family; - search_key.dst = dst; + search_key.req_id = req_id; - cds_lfht_lookup(vrf_ctx->sadb_hash_table, sadb_peer_hash(&search_key), - sadb_peer_match, &search_key, &iter); + cds_lfht_lookup(vrf_ctx->sadb_hash_table, sadb_tunl_hash(&search_key), + sadb_tunl_match, &search_key, &iter); node = cds_lfht_iter_get_node(&iter); - return node ? caa_container_of(node, struct sadb_peer, ht_node) : NULL; + *peer_ret = node ? + caa_container_of(node, struct sadb_peer, ht_node) : NULL; + return 0; } -/* - * sadb_lookup_or_create_peer() - * - * Lookup and IPsec peer in the hash table using its address. - * If there is no hash table entry for the peer, create one - * and insert into the table. - * - * NOTE: This may only be called from the main thread. - */ -static struct sadb_peer *sadb_lookup_or_create_peer(const xfrm_address_t *dst, - uint16_t family, - vrfid_t vrfid) +static int sadb_create_peer(vrfid_t vrfid, + uint32_t req_id, struct sadb_peer **peer_ret) { + struct crypto_vrf_ctx *vrf_ctx; struct cds_lfht_node *ret_node; - struct sadb_peer_key key; + struct sadb_tunl_key key; struct sadb_peer *peer; - struct crypto_vrf_ctx *vrf_ctx; - peer = sadb_lookup_peer(dst, family, vrfid); - if (peer) - return peer; + *peer_ret = NULL; + /* + * Lookup/create VRF context + */ + vrf_ctx = crypto_vrf_get(vrfid); + if (!vrf_ctx) + return -EINVAL; peer = zmalloc_aligned(sizeof(*peer)); if (!peer) { SADB_ERR("Failed to allocate IPsec peer\n"); - return NULL; + return -ENOMEM; } - memcpy(&peer->dst, dst, sizeof(peer->dst)); - peer->family = family; + peer->req_id = req_id; CDS_INIT_LIST_HEAD(&peer->sa_list); cds_lfht_node_init(&peer->ht_node); TAILQ_INIT(&peer->observers); - /* - * Lookup/create VRF context - */ - vrf_ctx = crypto_vrf_get(vrfid); - if (!vrf_ctx) - return NULL; - - key.dst = &peer->dst; - key.family = peer->family; - + key.req_id = peer->req_id; ret_node = cds_lfht_add_unique(vrf_ctx->sadb_hash_table, - sadb_peer_hash(&key), - sadb_peer_match, &key, &peer->ht_node); + sadb_tunl_hash(&key), + sadb_tunl_match, &key, &peer->ht_node); /* * We've just done a lookup that didn't find the peer. If we're * now told that the key we're trying to insert is not unique, @@ -436,20 +432,52 @@ static struct sadb_peer *sadb_lookup_or_create_peer(const xfrm_address_t *dst, if (ret_node != &peer->ht_node) { SADB_ERR("Failed to insert IPsec peer in hash table\n"); free(peer); - return NULL; + return -ENOTUNIQ; } vrf_ctx->count_of_peers++; - return peer; + *peer_ret = peer; + return 0; +} +/* + * sadb_lookup_or_create_peer() + * + * Lookup and IPsec peer in the hash table using its address. + * If there is no hash table entry for the peer, create one + * and insert into the table. + * + * NOTE: This may only be called from the main thread. + */ +static int sadb_lookup_or_create_peer(vrfid_t vrfid, + uint32_t req_id, + struct sadb_peer **peer_ret) +{ + struct sadb_peer *peer; + int rc; + + *peer_ret = NULL; + + rc = sadb_lookup_peer(vrfid, req_id, &peer); + if (rc < 0) + return rc; + if (peer) { + *peer_ret = peer; + return 0; + } + rc = sadb_create_peer(vrfid, req_id, &peer); + + *peer_ret = peer; + + return rc; } /* - * sadb_peer_rcu_free() + * sadb_tunl_rcu_free() * * RCU callback to free a peer that has been removed * from the hash table. */ -static void sadb_peer_rcu_free(struct rcu_head *head) +static void sadb_tunl_rcu_free(struct rcu_head *head) { free(caa_container_of(head, struct sadb_peer, peer_rcu)); } @@ -468,7 +496,7 @@ static void sadb_remove_peer(struct sadb_peer *peer, vrfid_t vrfid) return; cds_lfht_del(vrf_ctx->sadb_hash_table, &peer->ht_node); - call_rcu(&peer->peer_rcu, sadb_peer_rcu_free); + call_rcu(&peer->peer_rcu, sadb_tunl_rcu_free); vrf_ctx->count_of_peers--; } @@ -514,25 +542,66 @@ static void sadb_refresh_osbervers_of_sa(struct sadb_sa *sa, } } +/* + * Look up for an old SA. Return the least old one. + */ +static struct sadb_sa * +sadb_find_old_sa(struct sadb_sa *sa, vrfid_t vrfid, struct sadb_peer **ret_peer, + uint32_t req_id) +{ + struct sadb_peer *peer; + struct cds_list_head *this_entry; + struct sadb_sa *tmp_sa, *match_sa = NULL; + int rc; + + *ret_peer = NULL; + + rc = sadb_lookup_peer(vrfid, req_id, &peer); + if (rc < 0 || !peer) + return NULL; + + cds_list_for_each(this_entry, &peer->sa_list) { + tmp_sa = cds_list_entry(this_entry, struct sadb_sa, + peer_links); + if (tmp_sa->reqid == sa->reqid && + tmp_sa->spi != sa->spi) { + if (!match_sa) + match_sa = tmp_sa; + else if (match_sa->epoch < tmp_sa->epoch) + match_sa = tmp_sa; + } + } + + *ret_peer = peer; + return match_sa; +} /* * Look up for a duplicate SA. */ static struct sadb_sa * -sadb_find_matching_sa(struct sadb_sa *sa, bool ign_pending_del, vrfid_t vrfid) +sadb_find_matching_sa(struct sadb_sa *sa, bool ign_pending_del, vrfid_t vrfid, + struct sadb_peer **matching_peer, uint32_t req_id) { struct sadb_peer *peer; struct cds_list_head *this_entry; struct sadb_sa *tmp_sa; + int rc; - peer = sadb_lookup_or_create_peer(&sa->dst, sa->family, vrfid); - if (!peer) + rc = sadb_lookup_peer(vrfid, req_id, &peer); + if (rc < 0 || !peer) { + *matching_peer = NULL; return NULL; + } + + *matching_peer = peer; cds_list_for_each(this_entry, &peer->sa_list) { tmp_sa = cds_list_entry(this_entry, struct sadb_sa, peer_links); if (tmp_sa->reqid == sa->reqid && tmp_sa->spi != sa->spi && + tmp_sa->family == sa->family && + xfrm_addr_eq(&tmp_sa->dst, &sa->dst, sa->family) && ((!ign_pending_del && !tmp_sa->pending_del) || ign_pending_del)) return tmp_sa; @@ -546,30 +615,35 @@ sadb_find_matching_sa(struct sadb_sa *sa, bool ign_pending_del, vrfid_t vrfid) * * This function should only be called from the main thread. */ -static int sadb_insert_sa(struct sadb_sa *sa, struct crypto_vrf_ctx *vrf_ctx) +static int +sadb_insert_sa(struct sadb_sa *sa, struct crypto_vrf_ctx *vrf_ctx, + struct sadb_peer *peer, uint32_t req_id) { - struct sadb_peer *peer; + int rc; if (!sa) - return -1; + return -EINVAL; if (!sadb_add_sa_to_spi_in_hash(sa)) { SADB_ERR("Failed to add SA to SPI in hash table"); - return -1; + return -EINVAL; } if (!sadb_add_sa_to_spi_out_hash(sa, vrf_ctx)) { SADB_ERR("Failed to add SA to SPI out hash table"); - return -1; + return -EINVAL; } - peer = sadb_lookup_or_create_peer(&sa->dst, sa->family, - vrf_ctx->vrfid); - if (!peer) { + if (!peer) + rc = sadb_create_peer(vrf_ctx->vrfid, req_id, &peer); + else + rc = 0; + + if (rc < 0) { sadb_remove_sa_from_spi_in_hash(sa); sadb_remove_sa_from_spi_out_hash(sa, vrf_ctx->vrfid); SADB_ERR("Could not insert SA, failed to find IPsec peer\n"); - return -2; + return rc; } cds_list_add_rcu(&sa->peer_links, &peer->sa_list); @@ -580,7 +654,7 @@ static int sadb_insert_sa(struct sadb_sa *sa, struct crypto_vrf_ctx *vrf_ctx) */ sadb_refresh_osbervers_of_sa(sa, peer, false); - return 1; + return 0; } /* @@ -598,17 +672,19 @@ static struct sadb_sa *sadb_remove_sa(const xfrm_address_t *dst, const xfrm_address_t *src, uint32_t spi, uint16_t family, - vrfid_t vrfid) + vrfid_t vrfid, + uint32_t req_id) { struct cds_list_head *this_entry, *next_entry; struct sadb_peer *peer; struct sadb_sa *sa; + int rc; if (!dst || !src || ((family != AF_INET) && (family != AF_INET6))) return NULL; - peer = sadb_lookup_peer(dst, family, vrfid); - if (!peer) + rc = sadb_lookup_peer(vrfid, req_id, &peer); + if (rc < 0 || !peer) return NULL; /* @@ -617,7 +693,8 @@ static struct sadb_sa *sadb_remove_sa(const xfrm_address_t *dst, */ cds_list_for_each_prev_safe(this_entry, next_entry, &peer->sa_list) { sa = cds_list_entry(this_entry, struct sadb_sa, peer_links); - if (sa->spi == spi) { + if ((sa->spi == spi) && (sa->family == family) && + xfrm_addr_eq(&sa->dst, dst, family)) { cds_list_del_rcu(&sa->peer_links); goto done; } @@ -649,6 +726,15 @@ static struct sadb_sa *sadb_remove_sa(const xfrm_address_t *dst, return sa; } +static struct sadb_sa * +sadb_lookup_inbound_noblock(uint32_t spi) +{ + struct sadb_sa *sa; + + sa = sadb_lookup_sa_by_spi_in(spi); + return sa; +} + /* * sadb_lookup_inbound() * @@ -662,22 +748,20 @@ struct sadb_sa *sadb_lookup_inbound(uint32_t spi) { struct sadb_sa *sa; - sa = sadb_lookup_sa_by_spi_in(spi); - if (!sa) { + sa = sadb_lookup_inbound_noblock(spi); + + if (!sa) IPSEC_CNT_INC(DROPPED_NO_SPI_TO_SA); + else if (sa->blocked) return NULL; - } - - if (!sa->blocked) - return sa; - else - return NULL; + return sa; } static void sadb_sa_destroy(struct sadb_sa *sa) { cipher_teardown_ctx(sa); + crypto_sa_unbind_rcu(sa->del_pmd_dev_id); free(sa); } @@ -686,6 +770,49 @@ static enum crypto_xfrm crypto_sa_to_xfrm(struct sadb_sa *sa) return sa->dir == CRYPTO_DIR_IN ? CRYPTO_DECRYPT : CRYPTO_ENCRYPT; } + +/* + * This function is invoked at the time of SA creation to + * set the direction and set up the session in the driver + */ +static inline int +crypto_session_set_direction(struct sadb_sa *sa, int direction, + bool setup_openssl) +{ + struct crypto_session *ctx = sa->session; + enum cryptodev_type dev_type = CRYPTODEV_MIN; + int err = 0; + + if (unlikely(ctx->direction == -1)) { + ctx->direction = direction; + err = crypto_pmd_get_info(sa->pmd_dev_id, + &sa->rte_cdev_id, + &dev_type); + if (err) { + SADB_ERR("Failed to get PMD info for SA\n"); + return err; + } + + if (setup_openssl) { + err = crypto_openssl_session_setup(ctx); + if (err) { + SADB_ERR("Failed to set up openssl session\n"); + return err; + } + } + + err = crypto_rte_setup_session(ctx, dev_type, + sa->rte_cdev_id); + if (err) { + SADB_ERR("Failed to set up rte session for SA\n"); + crypto_openssl_session_teardown(ctx); + return err; + } + } + + return err; +} + /* * crypto_sadb_new_sa() * @@ -693,9 +820,10 @@ static enum crypto_xfrm crypto_sa_to_xfrm(struct sadb_sa *sa) * * This function is called from the main thread only. */ -void crypto_sadb_new_sa(const struct xfrm_usersa_info *sa_info, +int crypto_sadb_new_sa(const struct xfrm_usersa_info *sa_info, const struct xfrm_algo *crypto_algo, - const struct xfrm_algo_auth *auth_algo, + const struct xfrm_algo_auth *auth_trunc_algo, + const struct xfrm_algo *auth_algo, const struct xfrm_encap_tmpl *tmpl, uint32_t mark_val, uint32_t extra_flags, vrfid_t vrf_id) @@ -704,15 +832,19 @@ void crypto_sadb_new_sa(const struct xfrm_usersa_info *sa_info, struct sadb_sa *sa, *retiring_sa; struct crypto_vrf_ctx *vrf_ctx; struct ifnet *ifp; + int pmd_dev_id; + int err, rc; + bool setup_openssl = false; + struct sadb_peer *peer; if (!sa_info || !crypto_algo) { SADB_ERR("Bad parameters on attempt to add SA\n"); - return; + return -EINVAL; } vrf_ctx = crypto_vrf_get(vrf_id); if (!vrf_ctx) - return; + return -EINVAL; SADB_DEBUG("NEWSA SPI = %x Mark = %x VRF %d\n", ntohl(sa_info->id.spi), mark_val, vrf_id); @@ -720,7 +852,7 @@ void crypto_sadb_new_sa(const struct xfrm_usersa_info *sa_info, sa = zmalloc_aligned(sizeof(*sa)); if (!sa) { SADB_ERR("Failed to allocate SA\n"); - return; + return -ENOMEM; } sa->family = sa_info->family; @@ -732,6 +864,7 @@ void crypto_sadb_new_sa(const struct xfrm_usersa_info *sa_info, sa->byte_limit = lft->hard_byte_limit; sa->packet_limit = lft->hard_packet_limit; sa->overlay_vrf_id = vrf_id; + sa->epoch = ++sa_epoch; if (sa_info->family == AF_INET) { if (is_local_ipv4(VRF_DEFAULT_ID, sa_info->id.daddr.a4)) @@ -750,24 +883,52 @@ void crypto_sadb_new_sa(const struct xfrm_usersa_info *sa_info, CDS_INIT_LIST_HEAD(&sa->peer_links); - if (cipher_setup_ctx(crypto_algo, auth_algo, sa_info, tmpl, - sa, extra_flags)) + if (cipher_setup_ctx(crypto_algo, auth_trunc_algo, auth_algo, + sa_info, tmpl, sa, extra_flags)) sa->blocked = true; /* * Need to allocate the crypto_pmd before inserting the sa as * the insertion triggers an update for any registered * observers, i.e policies. */ - retiring_sa = sadb_find_matching_sa(sa, false, vrf_id); + retiring_sa = sadb_find_matching_sa(sa, false, vrf_id, &peer, + sa_info->reqid); if (retiring_sa) { retiring_sa->pending_del = true; - crypto_pmd_inc_pending_del(retiring_sa->pmd_dev_id, - crypto_sa_to_xfrm(retiring_sa)); + crypto_pmd_mod_pending_del(retiring_sa->pmd_dev_id, + crypto_sa_to_xfrm(retiring_sa), + true); } - sa->del_pmd_dev_id = sa->pmd_dev_id = - crypto_allocate_pmd(crypto_sa_to_xfrm(sa)); - if (sadb_insert_sa(sa, vrf_ctx) < 0) { + if (sa->session) { + pmd_dev_id = crypto_allocate_pmd(crypto_sa_to_xfrm(sa), + sa->session->cipher_algo, + sa->session->aead_algo, + &setup_openssl); + if (pmd_dev_id == CRYPTO_PMD_INVALID_ID) { + SADB_ERR("Failed to allocate PMD for SA\n"); + sadb_sa_destroy(sa); + return -ENOMEM; + } + } else + pmd_dev_id = CRYPTO_PMD_INVALID_ID; + + sa->del_pmd_dev_id = sa->pmd_dev_id = pmd_dev_id; + + if (pmd_dev_id != CRYPTO_PMD_INVALID_ID) { + err = crypto_session_set_direction(sa, + sa->dir == CRYPTO_DIR_IN ? + XFRM_POLICY_IN : + XFRM_POLICY_OUT, + setup_openssl); + if (err) { + SADB_ERR("Failed to set direction for SA\n"); + sadb_sa_destroy(sa); + return -EINVAL; + } + } + rc = sadb_insert_sa(sa, vrf_ctx, peer, sa->reqid); + if (rc < 0) { /* * Even though the SA insert failed, we know * there is a pending del on the retiring_sa, @@ -776,7 +937,7 @@ void crypto_sadb_new_sa(const struct xfrm_usersa_info *sa_info, */ SADB_ERR("Failed to insert SA into SADB\n"); sadb_sa_destroy(sa); - return; + return rc; } /* @@ -784,10 +945,14 @@ void crypto_sadb_new_sa(const struct xfrm_usersa_info *sa_info, * point bound to it. */ ifp = (sa->dir == CRYPTO_DIR_IN) ? - crypto_policy_feat_attach_by_reqid(sa->reqid) : NULL; + crypto_policy_feat_attach_by_reqid(vrf_ctx, sa->reqid) : NULL; rcu_assign_pointer(sa->feat_attach_ifp, ifp); + /* allocate a core for post crypto processing */ + sa->fwd_core = crypto_sa_alloc_fwd_core(); vrf_ctx->count_of_sas++; + + return 0; } /* @@ -804,28 +969,41 @@ static void sadb_sa_rcu_free(struct rcu_head *head) sadb_sa_destroy(sa); } -/* - * crypto_sadb_del_sa() - * - * Delete an SA from the SADB and free the memory - * - * This function is called from the main thread only. - */ -void crypto_sadb_del_sa(const struct xfrm_usersa_info *sa_info, vrfid_t vrfid) +static void crypto_sadb_resurrect_sa(struct sadb_sa *sa, vrfid_t vrfid, + uint32_t req_id) { - static struct sadb_sa *sa; - struct crypto_vrf_ctx *vrf_ctx; + struct sadb_peer *peer = NULL; + struct sadb_sa *old_sa = + sadb_find_old_sa(sa, vrfid, &peer, req_id); - if (!sa_info) { - SADB_ERR("Bad parameters on attempt to update SA\n"); + if (!old_sa || !peer) return; - } - SADB_DEBUG("DELSA SPI = %x VRF %d\n", ntohl(sa_info->id.spi), vrfid); + SADB_DEBUG("Resurrect old SA %x\n", ntohl(old_sa->spi)); - vrf_ctx = crypto_vrf_find(vrfid); - if (!vrf_ctx) - return; + old_sa->pending_del = false; + crypto_pmd_mod_pending_del(old_sa->pmd_dev_id, + crypto_sa_to_xfrm(old_sa), false); + /* + * Update the crypto overhead of any observers that + * are registered for this peer and reqid. + */ + sadb_refresh_osbervers_of_sa(old_sa, peer, false); +} + +static int crypto_sadb_del_sa_internal(const xfrm_address_t *dst, + const xfrm_address_t *src, + uint32_t spi, + uint16_t family, + struct crypto_vrf_ctx *vrf_ctx, + bool resurrect_old_sa, + uint32_t req_id) +{ + static struct sadb_sa *sa; + + ASSERT_MAIN(); + + SADB_DEBUG("DELSA SPI = %x VRF %d\n", ntohl(spi), vrf_ctx->vrfid); /* * Trigger the deletion of the SA, and set its pmd_dev_id to @@ -835,30 +1013,93 @@ void crypto_sadb_del_sa(const struct xfrm_usersa_info *sa_info, vrfid_t vrfid) * flow. The PMD detatch is handled in the rcu callback for the * sa delete. */ - sa = sadb_remove_sa(&sa_info->id.daddr, - &sa_info->saddr, - sa_info->id.spi, - sa_info->family, - vrfid); + sa = sadb_remove_sa(dst, src, spi, family, vrf_ctx->vrfid, req_id); if (!sa) { char dstip_str[INET6_ADDRSTRLEN]; - inet_ntop(sa_info->family, &sa_info->id.daddr, + inet_ntop(family, &dst, dstip_str, sizeof(dstip_str)); - SADB_ERR("SA delete for %s SPI %x failed: not found\n", - dstip_str, ntohl(sa_info->id.spi)); - return; + SADB_DEBUG("SA delete for %s SPI %x failed: not found\n", + dstip_str, ntohl(spi)); + return -ESRCH; } + /* If this is an active SA, then we need to restore an old SA + * if one exists + */ + if (resurrect_old_sa && !sa->pending_del) + crypto_sadb_resurrect_sa(sa, vrf_ctx->vrfid, sa->reqid); + crypto_remove_sa_from_pmd(sa->del_pmd_dev_id, crypto_sa_to_xfrm(sa), sa->pending_del); + crypto_sa_free_fwd_core(sa->fwd_core); call_rcu(&sa->sa_rcu, sadb_sa_rcu_free); vrf_ctx->count_of_sas--; crypto_vrf_check_remove(vrf_ctx); + + return 0; +} + +/* + * crypto_sadb_del_sa() + * + * Delete an SA from the SADB and free the memory + * + * This function is called from the main thread only. + */ +int crypto_sadb_del_sa(const struct xfrm_usersa_info *sa_info, vrfid_t vrfid) +{ + struct crypto_vrf_ctx *vrf_ctx; + + if (!sa_info) + return -EINVAL; + + vrf_ctx = crypto_vrf_find(vrfid); + if (!vrf_ctx) + return -EINVAL; + + return crypto_sadb_del_sa_internal(&sa_info->id.daddr, + &sa_info->saddr, + sa_info->id.spi, + sa_info->family, + vrf_ctx, + true, + sa_info->reqid); +} + +void crypto_sadb_flush_vrf(struct crypto_vrf_ctx *vrf_ctx) +{ + struct cds_lfht_iter iter; + struct sadb_sa *sa; + + SADB_DEBUG("Flush all SAs for VRF %d\n", vrf_ctx->vrfid); + + cds_lfht_for_each_entry(vrf_ctx->spi_out_hash_table, + &iter, sa, spi_ht_node) { + (void)crypto_sadb_del_sa_internal(&sa->dst, + &sa->src, + sa->spi, + sa->family, + vrf_ctx, + false, + sa->reqid); + } + + cds_lfht_for_each_entry(spi_in_hash_table, + &iter, sa, spi_ht_node) { + if (sa->overlay_vrf_id == vrf_ctx->vrfid) + (void)crypto_sadb_del_sa_internal(&sa->dst, + &sa->src, + sa->spi, + sa->family, + vrf_ctx, + false, + sa->reqid); + } } /* @@ -926,8 +1167,7 @@ uint32_t crypto_sadb_get_mark_val(struct sadb_sa *sa) { if (sa) return sa->mark_val; - else - return 0; + return 0; } static const char *xfrm_addr_to_str(uint16_t family, @@ -952,6 +1192,27 @@ static const char *xfrm_addr_to_str(uint16_t family, #define SPI_LEN_IN_HEXCHARS (8+1) /* 32 bit SPI */ +bool +crypto_sadb_get_stats(vrfid_t vrf_id, xfrm_address_t daddr, + uint16_t family, uint32_t spi, + struct crypto_sadb_stats *sa_stats) +{ + struct sadb_sa *sa; + + sa = sadb_lookup_sa_outbound_noblock(vrf_id, &daddr, + family, spi); + if (!sa) { + sa = sadb_lookup_inbound_noblock(spi); + if (!sa) + return false; + } + + sa_stats->bytes = sa->byte_count; + sa_stats->packets = sa->packet_count; + + return true; +} + void crypto_sadb_show_summary(FILE *f, vrfid_t vrfid) { json_writer_t *wr; @@ -960,7 +1221,7 @@ void crypto_sadb_show_summary(FILE *f, vrfid_t vrfid) struct crypto_vrf_ctx *vrf_ctx; struct ifnet *ifp; - if (!vrf_get_rcu_from_external(vrfid)) + if (!dp_vrf_get_rcu_from_external(vrfid)) return; wr = jsonw_new(f); @@ -999,9 +1260,15 @@ void crypto_sadb_show_summary(FILE *f, vrfid_t vrfid) spi_to_hexstr(spi_as_hexstring, sa->spi); jsonw_string_field(wr, "spi", spi_as_hexstring); jsonw_uint_field(wr, "pmd_dev_id", sa->pmd_dev_id); + jsonw_uint_field(wr, "fwd_core", sa->fwd_core); jsonw_string_field(wr, "pending_delete", sa->pending_del ? "Yes" : "No"); crypto_engine_summary(wr, sa); + jsonw_uint_field(wr, "replay_window", + sa->replay_window); + jsonw_uint_field(wr, "replay_bitmap", + sa->replay_bitmap); + jsonw_uint_field(wr, "seq", sa->seq); jsonw_uint_field(wr, "af", sa->family); jsonw_string_field(wr, "dst", xfrm_addr_to_str(sa->family, @@ -1106,6 +1373,7 @@ void crypto_sadb_increment_counters(struct sadb_sa *sa, uint32_t bytes, (sa->byte_count > sa->byte_limit)) { crypto_sadb_mark_as_blocked(sa); crypto_expire_request(sa->spi, sa->reqid, + sa->dst, sa->family, IPPROTO_ESP, 0 /* hard */); } } @@ -1135,15 +1403,13 @@ static void cypto_sadb_overhead_refresh(struct sadb_peer *peer, } } -void crypto_sadb_peer_overhead_subscribe(const xfrm_address_t *peer_address, - uint16_t family, uint32_t reqid, +void crypto_sadb_tunl_overhead_subscribe(uint32_t reqid, struct crypto_overhead *overhead, vrfid_t vrfid) { struct sadb_peer *peer; - peer = sadb_lookup_or_create_peer(peer_address, family, vrfid); - if (!peer) { + if (sadb_lookup_or_create_peer(vrfid, reqid, &peer) < 0) { SADB_ERR("Could not subscribe to peer overhead\n"); return; } @@ -1157,15 +1423,15 @@ void crypto_sadb_peer_overhead_subscribe(const xfrm_address_t *peer_address, cypto_sadb_overhead_refresh(peer, overhead); } -void crypto_sadb_peer_overhead_unsubscribe(const xfrm_address_t *peer_address, - uint16_t family, +void crypto_sadb_tunl_overhead_unsubscribe(uint32_t reqid, struct crypto_overhead *overhead, vrfid_t vrfid) { struct sadb_peer *peer; + int rc; - peer = sadb_lookup_peer(peer_address, family, vrfid); - if (!peer) { + rc = sadb_lookup_peer(vrfid, reqid, &peer); + if (rc < 0 || !peer) { SADB_ERR("Overhead unsubscribe failed: peer not found.\n"); return; } @@ -1182,30 +1448,19 @@ void crypto_sadb_peer_overhead_unsubscribe(const xfrm_address_t *peer_address, sadb_remove_peer(peer, vrfid); } -int crypto_sadb_peer_overhead_change_reqid(const xfrm_address_t *peer_address, - uint16_t family, uint32_t reqid, - struct crypto_overhead *overhead, - vrfid_t vrfid) +uint32_t crypto_sadb_get_reqid(struct sadb_sa *sa) { - struct sadb_peer *peer; - - peer = sadb_lookup_peer(peer_address, family, vrfid); - if (!peer) { - SADB_ERR("Overhead reqid change failed: peer not found.\n"); - return -1; - } - - overhead->bytes = 0; - overhead->reqid = reqid; - overhead->block_size = ESP_PAYLOAD_MIN_ALIGN; - cypto_sadb_overhead_refresh(peer, overhead); + return sa->reqid; +} - return 1; +uint32_t crypto_sadb_get_family(struct sadb_sa *sa) +{ + return sa->family; } -uint32_t crypto_sadb_get_reqid(struct sadb_sa *sa) +xfrm_address_t crypto_sadb_get_dst(struct sadb_sa *sa) { - return sa->reqid; + return sa->dst; } void crypto_sadb_mark_as_blocked(struct sadb_sa *sa) @@ -1327,7 +1582,7 @@ static unsigned long crypto_sa_hash(const struct crypto_sa_key *key) } /* - * Add an incomplete sa (waiting on the vrf master). If we already have + * Add an incomplete sa (waiting on the vrf). If we already have * an entry for the key (spi + addr) then update the message. * * The values come from different places depending on the msg type. @@ -1415,13 +1670,16 @@ void crypto_incmpl_sa_make_complete(void) { struct cds_lfht_iter iter; struct crypto_incmpl_xfrm_sa *sa; + struct xfrm_client_aux_data aux; + vrfid_t vrf_id = VRF_DEFAULT_ID; + aux.vrf = &vrf_id; crypto_incmpl_xfrm_sa_stats.if_complete++; cds_lfht_for_each_entry(crypto_incmpl_sa, &iter, sa, hash_node) { - rtnl_process_xfrm_sa(sa->nlh, &vrf_id); + rtnl_process_xfrm_sa(sa->nlh, &aux); } } diff --git a/src/crypto/crypto_sadb.h b/src/crypto/crypto_sadb.h index d7429cb1..a6fca16c 100644 --- a/src/crypto/crypto_sadb.h +++ b/src/crypto/crypto_sadb.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -15,6 +15,11 @@ #include "crypto_internal.h" #include "util.h" +struct crypto_sadb_stats { + uint64_t bytes; + uint64_t packets; +}; + struct crypto_vrf_ctx; struct ifnet; struct sadb_sa; @@ -23,14 +28,16 @@ int crypto_sadb_init(void); int crypto_sadb_vrf_init(struct crypto_vrf_ctx *vrf_ctx); void crypto_sadb_vrf_clean(struct crypto_vrf_ctx *vrf_ctx); -void crypto_sadb_new_sa(const struct xfrm_usersa_info *sa_info, +int crypto_sadb_new_sa(const struct xfrm_usersa_info *sa_info, const struct xfrm_algo *crypto_algo, - const struct xfrm_algo_auth *auth_algo, + const struct xfrm_algo_auth *auth_trunc_algo, + const struct xfrm_algo *auth_algo, const struct xfrm_encap_tmpl *tmpl, uint32_t mark_val, uint32_t extra_flags, vrfid_t vrf_id); -void crypto_sadb_del_sa(const struct xfrm_usersa_info *sa_info, vrfid_t vrfid); +int crypto_sadb_del_sa(const struct xfrm_usersa_info *sa_info, vrfid_t vrfid); +void crypto_sadb_flush_vrf(struct crypto_vrf_ctx *vrf_ctx); struct sadb_sa *sadb_lookup_inbound(uint32_t spi); @@ -41,6 +48,8 @@ void crypto_sadb_increment_counters(struct sadb_sa *sa, uint32_t packets); uint32_t crypto_sadb_get_reqid(struct sadb_sa *sa); +uint32_t crypto_sadb_get_family(struct sadb_sa *sa); +xfrm_address_t crypto_sadb_get_dst(struct sadb_sa *sa); void crypto_sadb_mark_as_blocked(struct sadb_sa *sa); @@ -58,4 +67,7 @@ void crypto_incmpl_xfrm_sa_del(uint32_t ifindex, const struct nlmsghdr *nlh, const struct xfrm_usersa_info *sa_info); void crypto_incmpl_sa_make_complete(void); +bool crypto_sadb_get_stats(vrfid_t vrf_id, xfrm_address_t addr, + uint16_t family, uint32_t spi, + struct crypto_sadb_stats *sa); #endif /* CRYPTO_SADB_H */ diff --git a/src/crypto/esp.c b/src/crypto/esp.c index cf03dab4..c8a7dd94 100644 --- a/src/crypto/esp.c +++ b/src/crypto/esp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -26,18 +26,20 @@ #include "crypto/crypto_sadb.h" #include "in6.h" #include "ip_funcs.h" +#include "util.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" #include #include #include "../in_cksum.h" #include "../iptun_common.h" -#include "../pktmbuf.h" +#include "../pktmbuf_internal.h" #include "crypto.h" #include "crypto_internal.h" #include "esp.h" +#include "crypto_rte_pmd.h" struct ifnet; @@ -66,11 +68,10 @@ struct esp_hdr_ctx { void *l3hdr, void *new_l3hdr, unsigned int pre_len, unsigned int udp_size, struct sadb_sa *sa); - unsigned int out_hdr_len; + uint16_t out_hdr_len; unsigned int pre_len; unsigned int tot_len; unsigned int out_align_val; - uint16_t out_ethertype; uint8_t proto_ip; uint8_t out_proto_nxt; }; @@ -121,14 +122,16 @@ static struct rte_mbuf *buf_tail_free(struct rte_mbuf *m) */ static int buf_tail_read_char(struct rte_mbuf *m, char *ptr, int err) { - struct rte_mbuf *m_last; + struct rte_mbuf *m_last = m; if (err) return err; __rte_mbuf_sanity_check(m, 1); - m_last = rte_pktmbuf_lastseg(m); + if (unlikely(m->nb_segs > 1)) + m_last = rte_pktmbuf_lastseg(m); + *ptr = *((char *)m_last->buf_addr + m_last->data_off + m_last->data_len - 1); m_last->data_len = (uint16_t)(m_last->data_len - 1); @@ -142,14 +145,15 @@ static int buf_tail_read_char(struct rte_mbuf *m, char *ptr, int err) static int buf_tail_trim(struct rte_mbuf *m, uint16_t len, int err) { - struct rte_mbuf *m_last; + struct rte_mbuf *m_last = m; if (err) return err; - m->pkt_len -= len; + if (unlikely(m->nb_segs > 1)) + m_last = rte_pktmbuf_lastseg(m); - m_last = rte_pktmbuf_lastseg(m); + m->pkt_len -= len; while (len != 0) { if (m_last->data_len <= len) { len -= m_last->data_len; @@ -211,28 +215,41 @@ uint16_t esp_payload_padded_len(const struct crypto_overhead *overhead, * highest_received >= S > (highest_received - replay_window_size) * */ -int esp_replay_check(const uint8_t *esp, - const struct sadb_sa *sa) +int esp_replay_check(const uint8_t *esp, const struct sadb_sa *sa) { const uint32_t replay_window = sa->replay_window; const uint32_t pkt_seq = ntohl(*(const uint32_t *)(esp+4)); uint32_t delta; + int ret = 0; - if (unlikely(!pkt_seq)) - return -1; /* Invalid seq in packet. Auditable event? */ + if (unlikely(!pkt_seq)) { + ret = -1; /* Invalid seq in packet. Auditable event? */ + goto err; + } if (likely(pkt_seq > sa->seq)) return 0; delta = sa->seq - pkt_seq; - if (delta >= replay_window) - return -2; /* Wrap or replay. Auditable event? */ + if (delta >= replay_window) { + ret = -2; /* Wrap or replay. Auditable event? */ + goto err; + } - if (sa->replay_bitmap & (1U << delta)) - return -3; /* Replay. Auditable event? */ + if (sa->replay_bitmap & (1U << delta)) { + ret = -3; /* Replay. Auditable event? */ + goto err; + } return 0; + +err: + if (net_ratelimit()) + ESP_INFO("Replay check failed for SPI %#x." + " (Packet seq: %#x / SA seq: %#x / Replay Bitmap: %#lx)\n", + sa->spi, pkt_seq, sa->seq, sa->replay_bitmap); + return ret; } /* @@ -243,8 +260,7 @@ int esp_replay_check(const uint8_t *esp, * bitmask is cleared, and a single bit set to indicate that we've * started afresh. */ -void esp_replay_advance(const uint8_t *esp, - struct sadb_sa *sa) +void esp_replay_advance(const uint8_t *esp, struct sadb_sa *sa) { const uint32_t replay_window = sa->replay_window; uint32_t delta; @@ -535,12 +551,12 @@ static int null_icv_cb(struct crypto_chain *chain __rte_unused, * * Generate and process a chain of actions for the crypto engine. */ -static int esp_generate_chain(struct sadb_sa *sa, - struct rte_mbuf *mbuf, - unsigned int l3_hdr_len, - unsigned char *esp, - unsigned char *iv, - uint32_t text_total_len, int8_t encrypt) +int esp_generate_chain(struct sadb_sa *sa, + struct rte_mbuf *mbuf, + unsigned int l3_hdr_len, + unsigned char *esp, + unsigned char *iv, + uint32_t text_total_len, int8_t encrypt) { struct crypto_chain chain; unsigned int esp_len = esp_hdr_len(sa); @@ -549,11 +565,12 @@ static int esp_generate_chain(struct sadb_sa *sa, struct crypto_visitor_ctx ctx = { .session = sa->session, }; + int err; - crypto_session_set_direction(sa->session, encrypt); - - if (crypto_chain_init(&chain, sa->session)) + if (crypto_chain_init(&chain, sa->session)) { + IPSEC_CNT_INC(CRYPTO_CHAIN_INIT_FAILED); return -1; + } chain.v_ctx = &ctx; @@ -562,11 +579,10 @@ static int esp_generate_chain(struct sadb_sa *sa, chain.v_ops->set_iv(chain.v_ctx, iv_len, iv); /* set ICV and callback */ - chain.icv_offset = pktmbuf_l2_len(mbuf) + l3_hdr_len + + chain.icv_offset = dp_pktmbuf_l2_len(mbuf) + l3_hdr_len + text_total_len; - if (sa->udp_encap) - chain.icv_offset += sizeof(struct udphdr); + chain.icv_offset += sa->udp_encap; if (!encrypt) { chain.icv_callback = icv_len ? check_icv_cb : null_icv_cb; @@ -580,8 +596,10 @@ static int esp_generate_chain(struct sadb_sa *sa, } /* process plaintext ESP header (w/o IV) */ - if (esp_process_authdata(&chain, esp) < 0) + if (esp_process_authdata(&chain, esp) < 0) { + IPSEC_CNT_INC(CRYPTO_AUTH_OP_FAILED); return -1; + } /* process plaintext ESP payload IV ptr & len*/ if (iv_len) { @@ -590,16 +608,26 @@ static int esp_generate_chain(struct sadb_sa *sa, } text_total_len -= esp_len; - if (esp_process_text(&chain, mbuf, text_total_len, esp + esp_len) < 0) + if (esp_process_text(&chain, mbuf, text_total_len, esp + esp_len) < 0) { + IPSEC_CNT_INC(CRYPTO_CIPHER_OP_FAILED); return -1; + } - if (esp_process_digest(&chain) < 0) + if (esp_process_digest(&chain) < 0) { + IPSEC_CNT_INC(CRYPTO_DIGEST_OP_FAILED); return -1; + } - return chain.icv_callback(&chain, mbuf); + err = chain.icv_callback(&chain, mbuf); + if (err) { + IPSEC_CNT_INC(CRYPTO_DIGEST_CB_FAILED); + return -1; + } + + return 0; } -static unsigned int +static inline unsigned int esp_input_tunl_fixup4(struct sadb_sa *sa, void *l3, void *new_l3) { @@ -609,7 +637,7 @@ esp_input_tunl_fixup4(struct sadb_sa *sa, if (sa->flags & XFRM_STATE_DECAP_DSCP) { ip_dscp_set(ip->tos, new_ip); new_ip->check = 0; - new_ip->check = in_cksum_hdr(new_ip); + new_ip->check = dp_in_cksum_hdr(new_ip); } if (!(sa->flags & XFRM_STATE_NOECN)) { if (ip_tos_ecn_decap(ip->tos, (char *)new_ip, @@ -620,7 +648,7 @@ esp_input_tunl_fixup4(struct sadb_sa *sa, return ntohs(new_ip->tot_len); } -static unsigned int +static inline unsigned int esp_input_tunl_fixup6(struct sadb_sa *sa, void *l3, void *new_l3) { struct ip6_hdr *new_ip6 = new_l3; @@ -640,19 +668,21 @@ esp_input_tunl_fixup6(struct sadb_sa *sa, void *l3, void *new_l3) return ntohs(new_ip6->ip6_plen) + sizeof(struct ip6_hdr); } -static void esp_input_tran_fixup4(void *new_l3, unsigned int new_total, - char next_hdr, unsigned int prev_off __unused) +static inline +void esp_input_tran_fixup4(void *new_l3, unsigned int new_total, + char next_hdr, unsigned int prev_off __unused) { struct iphdr *ip = new_l3; ip->protocol = next_hdr; ip->tot_len = htons(new_total); ip->check = 0; - ip->check = in_cksum_hdr(ip); + ip->check = dp_in_cksum_hdr(ip); } -static void esp_input_tran_fixup6(void *new_l3, unsigned int new_total, - char next_hdr, unsigned int prev_off) +static inline +void esp_input_tran_fixup6(void *new_l3, unsigned int new_total, + char next_hdr, unsigned int prev_off) { struct ip6_hdr *ip6 = new_l3; unsigned char *p_proto; @@ -662,9 +692,10 @@ static void esp_input_tran_fixup6(void *new_l3, unsigned int new_total, ip6->ip6_plen = htons(new_total - sizeof(struct ip6_hdr)); } -static void esp_input_nat_l4cksum_fixup(struct rte_mbuf *m) +static void esp_input_nat_l4cksum_fixup(int family, struct rte_mbuf *m) { - struct iphdr *ip; + void *l3_hdr; + uint16_t protocol; struct udphdr *udp; struct tcphdr *tcp; @@ -674,19 +705,26 @@ static void esp_input_nat_l4cksum_fixup(struct rte_mbuf *m) * UDP fixup = option #3 * TCP fixup = option #2 */ - ip = pktmbuf_mtol3(m, struct iphdr *); - switch (ip->protocol) { + l3_hdr = dp_pktmbuf_mtol3(m, void *); + if (family == AF_INET) + protocol = ((struct iphdr *)l3_hdr)->protocol; + else + protocol = ((struct ip6_hdr *)l3_hdr)->ip6_nxt; + switch (protocol) { case IPPROTO_UDP: if (pktmbuf_udp_header_is_usable(m)) { - udp = pktmbuf_mtol4(m, struct udphdr *); + udp = dp_pktmbuf_mtol4(m, struct udphdr *); udp->check = 0; } break; case IPPROTO_TCP: if (pktmbuf_tcp_header_is_usable(m)) { - tcp = pktmbuf_mtol4(m, struct tcphdr *); + tcp = dp_pktmbuf_mtol4(m, struct tcphdr *); tcp->check = 0; - tcp->check = in4_cksum_mbuf(m, ip, tcp); + if (family == AF_INET) + tcp->check = dp_in4_cksum_mbuf(m, l3_hdr, tcp); + else + tcp->check = dp_in6_cksum_mbuf(m, l3_hdr, tcp); } break; default: @@ -694,183 +732,261 @@ static void esp_input_nat_l4cksum_fixup(struct rte_mbuf *m) } } -static int esp_input_inner(int family, struct rte_mbuf *m, void *l3_hdr, - struct sadb_sa *sa, uint32_t *bytes, - uint8_t *new_family) +static inline uint16_t +esp_input_pre_decrypt(struct crypto_pkt_ctx *ctx_arr[], uint16_t count) { - int rc = 0, head_trim = 0, tail_trim = 0; - unsigned int esp_len, ciphertext_len, udp_len = 0; - unsigned int iphlen, icv_len, counter_modify = 0; + int head_trim = 0; + unsigned int esp_len, ciphertext_len; + unsigned int iphlen, icv_len; unsigned int base_len; - char next_hdr = 0, padding_size = 0; unsigned char *iv = NULL, *esp = NULL; unsigned int seg_data_remaining; - unsigned int new_total; - uint16_t ethertype, prev_off = 0; - char *new_l3_hdr; - uint8_t post_decrypt_family; - void (*tran_fixup)(void *, unsigned int, char, unsigned int); - unsigned int (*tunl_fixup)(struct sadb_sa *, void *, void *); + uint16_t prev_off = 0; + struct crypto_pkt_ctx *ctx; + uint16_t i; + int family; + struct rte_mbuf *m; + struct sadb_sa *sa; + uint16_t bad_idx[count], bad_cnt = 0; + + for (i = 0; i < count; i++) { + crypto_prefetch_ctx(ctx_arr, count, i); + ctx = ctx_arr[i]; + family = ctx->family; + m = ctx->mbuf; + sa = ctx->sa; + + crypto_prefetch_mbuf_payload(m); + + if (family == AF_INET) { + struct iphdr *ip = iphdr(m); + + if (unlikely(ip_is_fragment(ip))) { + IPSEC_CNT_INC(DROPPED_ESP_IP_FRAG); + ctx->status = -1; + bad_idx[bad_cnt++] = i; + continue; + } - if (!sa) { - ESP_ERR("No SA for the inbound packet\n"); - return -1; - } + ctx->l3hdr = ip; + base_len = ntohs(ip->tot_len); + iphlen = ip->ihl << 2; + } else { + struct ip6_hdr *ip6 = ip6hdr(m); + + ctx->l3hdr = ip6; + base_len = ntohs(ip6->ip6_plen) + + sizeof(struct ip6_hdr); + iphlen = dp_pktmbuf_l3_len(m); + if (sa->mode == XFRM_MODE_TRANSPORT) + prev_off = ip6_findprevoff(m); + } - if (family == AF_INET) { - struct iphdr *ip = l3_hdr; + crypto_prefetch_ctx_data(ctx_arr, count, i); - if (ip_is_fragment(ip)) { - ESP_ERR("IP Frag\n"); - return -1; + esp = dp_pktmbuf_mtol4(m, unsigned char *); + esp += sa->udp_encap; + + if (unlikely(sa->replay_window && + esp_replay_check(esp, sa) < 0)) { + crypto_sadb_seq_drop_inc(sa); + ctx->status = -1; + bad_idx[bad_cnt++] = i; + continue; } - base_len = ntohs(ip->tot_len); - iphlen = ip->ihl << 2; - } else { - struct ip6_hdr *ip6 = l3_hdr; + esp_len = esp_hdr_len(sa); - base_len = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr); - iphlen = pktmbuf_l3_len(m); - if (sa->mode == XFRM_MODE_TRANSPORT) - prev_off = ip6_findprevoff(m); - } + /* + * Now much data is there left in the segment after the ip/udp + * hdr. Assumption here is that esp hdr is in the first + * segment. + */ + seg_data_remaining = rte_pktmbuf_data_len(m) - + (esp - rte_pktmbuf_mtod(m, unsigned char *)); + + if (seg_data_remaining < esp_len) { + IPSEC_CNT_INC(ESP_NOT_IN_FIRST_SEG); + ctx->status = -1; + bad_idx[bad_cnt++] = i; + continue; + } - esp = pktmbuf_mtol4(m, unsigned char *); - if (sa->udp_encap) { - esp += sizeof(struct udphdr); - udp_len = sizeof(struct udphdr); - } + /* iv is after the SPI(4) and the SEQ(4) */ + iv = esp + 8; + + /* ESP length = SPI(4) + SEQ(4) + IV_LEN */ + head_trim = esp_len + sa->udp_encap; + icv_len = esp_icv_len(sa); + ciphertext_len = base_len - iphlen - esp_len - + sa->udp_encap - icv_len; + + if (ciphertext_len % crypto_session_block_size(sa->session)) { + ESP_ERR("Invalid ctext len %d block_size %d", + ciphertext_len, + crypto_session_block_size(sa->session)); + IPSEC_CNT_INC(INVALID_CIPHERTEXT_LEN); + ctx->status = -1; + bad_idx[bad_cnt++] = i; + continue; + } - if (unlikely(sa->replay_window && - esp_replay_check(esp, sa) < 0)) { - crypto_sadb_seq_drop_inc(sa); - ESP_INFO("Replay check failed for SPI 0x%x\n", sa->spi); - return -1; + ctx->iphlen = iphlen; + ctx->base_len = base_len; + ctx->esp_len = esp_len; + ctx->ciphertext_len = ciphertext_len; + ctx->icv_len = icv_len; + ctx->prev_off = prev_off; + ctx->head_trim = head_trim; + ctx->esp = esp; + ctx->iv = iv; } - esp_len = esp_hdr_len(sa); + move_bad_mbufs(ctx_arr, count, bad_idx, bad_cnt); - /* - * Now much data is there left in the segment after the ip/udp - * hdr. Assumption here is that esp hdr is in the first - * segment. - */ - seg_data_remaining = rte_pktmbuf_data_len(m) - - (esp - rte_pktmbuf_mtod(m, unsigned char *)); + return count - bad_cnt; +} - if (seg_data_remaining < esp_len) { - ESP_ERR("ESP not in first buffer\n"); - return -1; - } +static inline uint16_t +esp_input_post_decrypt(struct crypto_pkt_ctx *ctx_arr[], uint16_t count) +{ + unsigned int counter_modify = 0; + int rc = 0, tail_trim = 0; + char next_hdr = 0, padding_size = 0; + void (*tran_fixup)(void *new_l3_hdr, unsigned int new_total, + char next_hdr, unsigned int); + unsigned int (*tunl_fixup)(struct sadb_sa *sa, void *l3hdr, + void *new_l3_hdr); + uint16_t ethertype; + unsigned int new_total; + char *new_l3_hdr; + uint8_t post_decrypt_family; + uint16_t i; + struct crypto_pkt_ctx *ctx; + struct rte_mbuf *m; + struct sadb_sa *sa; + uint16_t bad_idx[count], bad_cnt = 0; + + for (i = 0; i < count; i++) { + crypto_prefetch_ctx(ctx_arr, count, i); + ctx = ctx_arr[i]; + m = ctx->mbuf; + sa = ctx->sa; + + esp_replay_advance(ctx->esp, sa); + + rc = buf_tail_trim(m, ctx->icv_len, rc); + rc = buf_tail_read_char(m, &next_hdr, rc); + rc = buf_tail_read_char(m, &padding_size, rc); + if (rc != 0) { + IPSEC_CNT_INC(ESP_TAIL_TRIM_FAILED); + ctx->status = -1; + bad_idx[bad_cnt++] = i; + continue; + } - /* iv is after the SPI(4) and the SEQ(4) */ - iv = esp + 8; + if (padding_size != 0) + buf_tail_trim(m, padding_size, rc); - /* ESP length = SPI(4) + SEQ(4) + IV_LEN */ - head_trim = esp_len + udp_len; - icv_len = esp_icv_len(sa); - ciphertext_len = base_len - iphlen - esp_len - udp_len - icv_len; + /* Trim the tail of next_hdr(1), padding_size(1), + * icv and padding + */ + tail_trim = 2 + padding_size + ctx->icv_len; - if (ciphertext_len % crypto_session_block_size(sa->session)) { - ESP_ERR("Invalid ctext len %d block_size %d", - ciphertext_len, - crypto_session_block_size(sa->session)); - return -1; - } + /* + * We know what the next hdr type is now, so set up based + * on that. In case of transport mode, the next hdr doesn't + * matter the 'family' itself tells us the address family of + * the payload. + */ + if (((sa->mode == XFRM_MODE_TRANSPORT) && + (ctx->family == AF_INET)) || + (next_hdr == IPPROTO_IPIP)) { + ethertype = ETH_P_IP; + tran_fixup = esp_input_tran_fixup4; + tunl_fixup = esp_input_tunl_fixup4; + post_decrypt_family = AF_INET; + } else { + ethertype = ETH_P_IPV6; + tran_fixup = esp_input_tran_fixup6; + tunl_fixup = esp_input_tunl_fixup6; + post_decrypt_family = AF_INET6; + } - if (unlikely(esp_generate_chain(sa, m, iphlen, esp, iv, - ciphertext_len + esp_len, - 0) != 0)) - return -1; + if (sa->mode == XFRM_MODE_TRANSPORT) { + new_l3_hdr = (char *)((char *)ctx->l3hdr + + ctx->esp_len + + sa->udp_encap); + memmove(new_l3_hdr, ctx->l3hdr, ctx->iphlen); + new_total = ctx->base_len - ctx->esp_len - + sa->udp_encap - tail_trim; + (*tran_fixup)(new_l3_hdr, new_total, next_hdr, + ctx->prev_off); + + counter_modify = ctx->iphlen; + } else if (sa->mode == XFRM_MODE_TUNNEL) { + if (next_hdr != IPPROTO_IPV6 && + next_hdr != IPPROTO_IPIP) { + IPSEC_CNT_INC(ESP_INVALID_NXT_HDR); + ctx->status = -1; + bad_idx[bad_cnt++] = i; + continue; + } - esp_replay_advance(esp, sa); + ctx->head_trim += ctx->iphlen; + new_l3_hdr = (char *)(ctx->esp + ctx->esp_len); + new_total = (*tunl_fixup)(sa, ctx->l3hdr, new_l3_hdr); + } else { + IPSEC_CNT_INC(INVALID_IPSEC_MODE); + ctx->status = -1; + bad_idx[bad_cnt++] = i; + continue; + } - rc = buf_tail_trim(m, icv_len, rc); - rc = buf_tail_read_char(m, &next_hdr, rc); - rc = buf_tail_read_char(m, &padding_size, rc); - if (rc != 0) { - ESP_ERR("ESP tail trim failed\n"); - return -1; - } + crypto_prefetch_ctx_data(ctx_arr, count, i); - if (padding_size != 0) - buf_tail_trim(m, padding_size, rc); - /* Trim the tail of next_hdr(1), padding_size(1), - * icv and padding - */ - tail_trim = 2 + padding_size + icv_len; + rc = iptun_eth_hdr_fixup(m, ethertype, ctx->head_trim); + if (rc < 0) { + IPSEC_CNT_INC(ESP_ETH_HDR_FIXUP_FAILED); + ctx->status = -1; + bad_idx[bad_cnt++] = i; + continue; + } - /* - * We know what the next hdr type is now, so set up based on that. - * In case of transport mode, the next hdr doesn't matter the 'family' - * itself tells us the address family of the payload. - */ - if (((sa->mode == XFRM_MODE_TRANSPORT) && (family == AF_INET)) || - (next_hdr == IPPROTO_IPIP)) { - ethertype = ETH_P_IP; - tran_fixup = esp_input_tran_fixup4; - tunl_fixup = esp_input_tunl_fixup4; - post_decrypt_family = AF_INET; - } else { - ethertype = ETH_P_IPV6; - tran_fixup = esp_input_tran_fixup6; - tunl_fixup = esp_input_tunl_fixup6; - post_decrypt_family = AF_INET6; - } + /* + * RFC 3948: Section 3.1.2 + */ + if (unlikely(sa->udp_encap && sa->mode == XFRM_MODE_TRANSPORT)) + esp_input_nat_l4cksum_fixup(ctx->family, m); - if (sa->mode == XFRM_MODE_TRANSPORT) { - new_l3_hdr = (char *)((char *)l3_hdr + esp_len + udp_len); - memmove(new_l3_hdr, l3_hdr, iphlen); - new_total = base_len - esp_len - udp_len - tail_trim; - (*tran_fixup)(new_l3_hdr, new_total, next_hdr, prev_off); - - counter_modify = iphlen; - } else if (sa->mode == XFRM_MODE_TUNNEL) { - if (next_hdr != IPPROTO_IPV6 && - next_hdr != IPPROTO_IPIP) { - ESP_PKT_ERR("IPSEC: Invalid next_hdr proto %d\n", - next_hdr); - return -1; - } + /* Count the decapped payload against the receiving SA */ + crypto_sadb_increment_counters(sa, new_total - counter_modify, + 1); + ctx->bytes = new_total - counter_modify; - head_trim += iphlen; - new_l3_hdr = (char *)(esp + esp_len); - new_total = (*tunl_fixup)(sa, l3_hdr, new_l3_hdr); - } else { - ESP_ERR("IPSEC: Unsupported mode"); - return -1; + ctx->family = post_decrypt_family; } - rc = iptun_eth_hdr_fixup(m, ethertype, head_trim); - if (rc < 0) { - ESP_ERR("Ethernet header fixup failed\n"); - return -1; - } + move_bad_mbufs(ctx_arr, count, bad_idx, bad_cnt); - /* - * RFC 3948: Section 3.1.2 - */ - if (next_hdr == IPPROTO_IPIP) { - if (unlikely(sa->udp_encap == 1 && - sa->mode == XFRM_MODE_TRANSPORT)) { - esp_input_nat_l4cksum_fixup(m); - } - } + return count - bad_cnt; +} - /* Count the decapped payload against the receiving SA */ - crypto_sadb_increment_counters(sa, new_total - counter_modify, 1); - *bytes = new_total - counter_modify; +void esp_input(struct crypto_pkt_ctx *ctx_arr[], uint16_t count) +{ + count = esp_input_pre_decrypt(ctx_arr, count); - *new_family = post_decrypt_family; - return 0; + count = crypto_rte_xform_packets(ctx_arr, count); + + count = esp_input_post_decrypt(ctx_arr, count); + + (void) count; } -static unsigned int esp_out_new_hdr6(bool transport, uint8_t orig_family, - void *l3hdr, void *new_l3hdr, - unsigned int pre_len, - unsigned int udp_size, - struct sadb_sa *sa) +static inline unsigned int +esp_out_new_hdr6(bool transport, uint8_t orig_family, void *l3hdr, + void *new_l3hdr, unsigned int pre_len, unsigned int udp_size, + struct sadb_sa *sa) { struct ip6_hdr *new_ip6 = (struct ip6_hdr *)new_l3hdr; unsigned int counter_modify = 0; @@ -916,11 +1032,10 @@ static unsigned int esp_out_new_hdr6(bool transport, uint8_t orig_family, return counter_modify; } -static unsigned int esp_out_new_hdr4(bool transport, uint8_t orig_family, - void *l3hdr, - void *new_l3hdr, unsigned int pre_len, - unsigned int udp_size, - struct sadb_sa *sa) +static inline unsigned int +esp_out_new_hdr4(bool transport, uint8_t orig_family, void *l3hdr, + void *new_l3hdr, unsigned int pre_len, + unsigned int udp_size, struct sadb_sa *sa) { struct iphdr *new_ip = (struct iphdr *)new_l3hdr; struct iphdr *ip = (struct iphdr *)l3hdr; @@ -990,15 +1105,16 @@ static unsigned int esp_out_new_hdr4(bool transport, uint8_t orig_family, * Dest(1) is a Dest immediately followed by Routing * Dest(2) is a Dest without a following Routing */ -static int esp_out_proc_exthdr6(struct rte_mbuf *m, struct ip6_hdr *ip6, - uint8_t *proto, unsigned int *offset) +static inline int +esp_out_proc_exthdr6(struct rte_mbuf *m, struct ip6_hdr *ip6, + uint8_t *proto, unsigned int *offset) { struct ip6_ext *ip6e; struct ip6_frag *ip6f; uint16_t off, base; *proto = ip6->ip6_nxt; - base = pktmbuf_l2_len(m); + base = dp_pktmbuf_l2_len(m); off = base + sizeof(struct ip6_hdr); for (;;) { @@ -1044,23 +1160,20 @@ static int esp_out_proc_exthdr6(struct rte_mbuf *m, struct ip6_hdr *ip6, return 0; } -static int esp_out_hdr_parse6(struct rte_mbuf *m, void *l3hdr, - struct esp_hdr_ctx *h, - uint8_t new_family, - bool transport) +static inline int +esp_out_hdr_parse6(struct rte_mbuf *m, void *l3hdr, struct esp_hdr_ctx *h, + uint8_t new_family, bool transport) { struct ip6_hdr *ip6 = l3hdr; if (new_family == AF_INET) { h->out_new_hdr = esp_out_new_hdr4; h->out_hdr_len = sizeof(struct iphdr); - h->out_ethertype = ETH_P_IP; h->out_align_val = 8; h->out_proto_nxt = IPPROTO_IP; /* for transport mode */ } else { h->out_new_hdr = esp_out_new_hdr6; h->out_hdr_len = sizeof(struct ip6_hdr); - h->out_ethertype = ETH_P_IPV6; h->out_align_val = 8; h->out_proto_nxt = IPPROTO_IPV6; /* for transport mode */ } @@ -1079,21 +1192,20 @@ static int esp_out_hdr_parse6(struct rte_mbuf *m, void *l3hdr, return 0; } -static void esp_out_hdr_parse4(void *l3hdr, struct esp_hdr_ctx *h, - uint8_t new_family) +static inline void +esp_out_hdr_parse4(void *l3hdr, struct esp_hdr_ctx *h, + uint8_t new_family) { struct iphdr *ip = l3hdr; if (new_family == AF_INET) { h->out_new_hdr = esp_out_new_hdr4; h->out_hdr_len = sizeof(struct iphdr); - h->out_ethertype = ETH_P_IP; h->out_align_val = 4; h->out_proto_nxt = ip->protocol; /* for transport mode */ } else { h->out_new_hdr = esp_out_new_hdr6; h->out_hdr_len = sizeof(struct ip6_hdr); - h->out_ethertype = ETH_P_IPV6; h->out_align_val = 8; h->out_proto_nxt = IPPROTO_IPV6; /* for transport mode */ } @@ -1103,175 +1215,219 @@ static void esp_out_hdr_parse4(void *l3hdr, struct esp_hdr_ctx *h, h->tot_len = ntohs(ip->tot_len); } -static int esp_output_inner(int new_family, struct sadb_sa *sa, - struct rte_mbuf *m, uint8_t orig_family, - void *l3hdr, uint32_t *bytes) +static inline uint16_t +esp_output_pre_encrypt(struct crypto_pkt_ctx *ctx_arr[], + struct esp_hdr_ctx h_arr[], uint16_t count) { int block_size; unsigned int icv_size, tail_len, padding, enc_inc, udp_size = 0; - unsigned int i, counter_modify = 0; + unsigned int i, counter_modify = 0, j; unsigned int esp_size, plaintext_size, plaintext_size_orig; bool transport; unsigned char *plaintext = NULL, *esp_base, *esp_ptr = NULL; unsigned char *udp_base; char *hdr, *tail = NULL; struct udphdr *udp = NULL; - struct ether_hdr *eth_hdr; unsigned char *new_l3hdr; - struct esp_hdr_ctx h; + struct crypto_pkt_ctx *ctx; + uint16_t bad_idx[count], bad_cnt = 0; + struct sadb_sa *sa; + struct rte_mbuf *m; + struct esp_hdr_ctx *h; - if (!sa) { - ESP_ERR("No SA for the outbound pkt\n"); - return -1; - } + crypto_prefetch_ivs(); - transport = (sa->mode == XFRM_MODE_TRANSPORT) ? 1 : 0; + for (j = 0; j < count; j++) { + crypto_prefetch_ctx(ctx_arr, count, j); + ctx = ctx_arr[j]; + m = ctx->mbuf; + h = &h_arr[j]; + sa = ctx->sa; - if (orig_family == AF_INET) { - esp_out_hdr_parse4(l3hdr, &h, new_family); - } else { - if (esp_out_hdr_parse6(m, l3hdr, &h, new_family, transport) < 0) - return -1; - if (!transport) - m->l3_len = sizeof(struct ip6_hdr); - } + crypto_prefetch_mbuf_payload(m); - icv_size = esp_icv_len(sa); - esp_size = esp_hdr_len(sa); + transport = (sa->mode == XFRM_MODE_TRANSPORT) ? 1 : 0; - plaintext = l3hdr; - plaintext_size_orig = plaintext_size = h.tot_len; + if (ctx->orig_family == AF_INET) { + esp_out_hdr_parse4(ctx->l3hdr, h, ctx->family); + } else { + if (unlikely(esp_out_hdr_parse6(m, ctx->l3hdr, h, + ctx->family, + transport) < 0)) { + IPSEC_CNT_INC(ESP_OUT_HDR_PARSE6_FAILED); + ctx->status = -1; + bad_idx[bad_cnt++] = j; + continue; + } + if (!transport) + m->l3_len = sizeof(struct ip6_hdr); + } - if (transport) { - /* - * ESP follows header options - */ - plaintext += h.pre_len; - plaintext_size -= h.pre_len; - enc_inc = 0; - } else { - /* - * Taking whole packet from start of l3 and encrypting. - */ - h.pre_len = h.out_hdr_len; - enc_inc = h.out_hdr_len; - } + icv_size = esp_icv_len(sa); + esp_size = esp_hdr_len(sa); - udp_base = esp_base = esp_ptr = plaintext - esp_size; - enc_inc += esp_size; + plaintext = ctx->l3hdr; + plaintext_size_orig = plaintext_size = h->tot_len; - if (sa->udp_encap) { - udp_size = sizeof(struct udphdr); - enc_inc += udp_size; - udp_base -= udp_size; - udp = (struct udphdr *) udp_base; - } + crypto_prefetch_ctx_data(ctx_arr, count, j); - hdr = rte_pktmbuf_prepend(m, enc_inc); - if (!hdr) { - ESP_ERR("Head room inc failed (requested %d bytes)\n", enc_inc); - return -1; - } + if (transport) { + /* + * ESP follows header options + */ + plaintext += h->pre_len; + plaintext_size -= h->pre_len; + enc_inc = 0; + } else { + /* + * Taking whole packet from start of l3 and encrypting. + */ + h->pre_len = h->out_hdr_len; + enc_inc = h->out_hdr_len; + } - /* The ESP payload block needs to be aligned dependent on AF */ - block_size = RTE_ALIGN(crypto_session_block_size(sa->session), - h.out_align_val); - /* - * Workout the padding and tail bytes required, based upon the - * plain text and the minimum two tail bytes, padding len and next_hdr - */ - padding = RTE_ALIGN(plaintext_size + 2, block_size) - - (plaintext_size + 2); - - tail_len = padding + 2 + icv_size; - tail = pktmbuf_append_alloc(m, tail_len); - if (!tail) { - ESP_PKT_ERR("Tail room inc failed (requested %d bytes)\n", - tail_len); - return -1; - } + udp_base = esp_base = esp_ptr = plaintext - esp_size; + enc_inc += esp_size; - /* Set the padding using RFC specified pattern */ - for (i = 1; i <= padding; i++) - *tail++ = i; - *tail++ = padding; - *tail++ = transport ? h.out_proto_nxt : h.proto_ip; - plaintext_size += padding + 2; + udp_size = sa->udp_encap; + if (udp_size) { + enc_inc += udp_size; + udp_base -= udp_size; + udp = (struct udphdr *) udp_base; + } - new_l3hdr = udp_base - h.out_hdr_len; - udp_size += esp_size + plaintext_size + icv_size; + hdr = rte_pktmbuf_prepend(m, enc_inc); + if (unlikely(!hdr)) { + IPSEC_CNT_INC(ESP_HDR_PREPEND_FAILED); + ctx->status = -1; + bad_idx[bad_cnt++] = j; + continue; + } - counter_modify = (*h.out_new_hdr)(transport, orig_family, l3hdr, - new_l3hdr, h.pre_len, udp_size, sa); + /* The ESP payload block needs to be aligned dependent on AF */ + block_size = RTE_ALIGN(crypto_session_block_size(sa->session), + h->out_align_val); + /* + * Workout the padding and tail bytes required, based upon the + * plain text and the minimum two tail bytes, padding len and + * next_hdr + */ + padding = RTE_ALIGN(plaintext_size + 2, block_size) - + (plaintext_size + 2); + + tail_len = padding + 2 + icv_size; + tail = pktmbuf_append_alloc(m, tail_len); + if (unlikely(!tail)) { + IPSEC_CNT_INC(ESP_TAIL_APPEND_FAILED); + ctx->status = -1; + bad_idx[bad_cnt++] = j; + continue; + } - if (udp) { - udp->dest = sa->udp_dport; - udp->source = sa->udp_sport; - udp->check = 0; - udp->len = htons(udp_size); - } - /* Add Spi, sequence and IV */ - *(uint32_t *)esp_ptr = (sa->spi); - esp_ptr += 4; - *(uint32_t *)esp_ptr = htonl(++(sa->seq)); - esp_ptr += 4; - - crypto_session_generate_iv(sa->session, (char *)esp_ptr); - - if (unlikely(sa->seq == ESP_SEQ_SA_REKEY_THRESHOLD)) { - crypto_rekey_requests++; - crypto_expire_request(sa->spi, - crypto_sadb_get_reqid(sa), - IPPROTO_ESP, 0 /* hard */); - } - if (unlikely(sa->seq > (ESP_SEQ_SA_BLOCK_LIMIT - 1))) - crypto_sadb_mark_as_blocked(sa); + /* Set the padding using RFC specified pattern */ + for (i = 1; i <= padding; i++) + *tail++ = i; + *tail++ = padding; + *tail++ = transport ? h->out_proto_nxt : h->proto_ip; + plaintext_size += padding + 2; - if (unlikely(esp_generate_chain(sa, m, h.out_hdr_len, esp_base, esp_ptr, - plaintext_size + esp_size, 1) != 0)) - return -1; + new_l3hdr = udp_base - h->out_hdr_len; + udp_size += esp_size + plaintext_size + icv_size; - crypto_session_set_iv(sa->session, - crypto_session_iv_len(sa->session), - tail - crypto_session_iv_len(sa->session)); + counter_modify = (*h->out_new_hdr)(transport, ctx->orig_family, + ctx->l3hdr, new_l3hdr, + h->pre_len, udp_size, sa); - eth_hdr = (struct ether_hdr *)hdr; - eth_hdr->ether_type = htons(h.out_ethertype); + if (udp) { + udp->dest = sa->udp_dport; + udp->source = sa->udp_sport; + udp->check = 0; + udp->len = htons(udp_size); + } + /* Add Spi, sequence and IV */ + *(uint32_t *)esp_ptr = (sa->spi); + esp_ptr += 4; + *(uint32_t *)esp_ptr = htonl(++(sa->seq)); + esp_ptr += 4; - crypto_sadb_increment_counters(sa, plaintext_size_orig - - counter_modify, 1); - *bytes = plaintext_size_orig - counter_modify; - return 0; -} + /* + * For the first packet on an SA, use the original + * IV. This is primarily to get the UTs to pass + */ + if (unlikely(!sa->packet_count)) + memcpy(&cpbdb[dp_lcore_id()]->iv_cache[j][0], + sa->session->iv, + sa->session->nonce_len + + sa->session->iv_len); + + crypto_get_iv(j, (char *)esp_ptr, + crypto_session_iv_len(sa->session)); + + if (unlikely(sa->seq == ESP_SEQ_SA_REKEY_THRESHOLD)) { + crypto_rekey_requests++; + crypto_expire_request(sa->spi, + crypto_sadb_get_reqid(sa), + crypto_sadb_get_dst(sa), + crypto_sadb_get_family(sa), + IPPROTO_ESP, 0 /* hard */); + } + if (unlikely(sa->seq > (ESP_SEQ_SA_BLOCK_LIMIT - 1))) + crypto_sadb_mark_as_blocked(sa); + + /* set up output parameters */ + ctx->esp = esp_base; + ctx->iv = esp_ptr; + ctx->plaintext_size = plaintext_size; + ctx->plaintext_size_orig = plaintext_size_orig; + ctx->esp_len = esp_size; + ctx->counter_modify = counter_modify; + ctx->hdr = hdr; + ctx->tail = tail; + ctx->out_hdr_len = h->out_hdr_len; + } -int esp_output(struct rte_mbuf *m, uint8_t orig_family, void *ip, - struct sadb_sa *sa, uint32_t *bytes) -{ - return esp_output_inner(AF_INET, sa, m, orig_family, ip, bytes); -} + move_bad_mbufs(ctx_arr, count, bad_idx, bad_cnt); -int esp_output6(struct rte_mbuf *m, uint8_t orig_family, void *ip6, - struct sadb_sa *sa, uint32_t *bytes) -{ - return esp_output_inner(AF_INET6, sa, m, orig_family, ip6, bytes); + return count - bad_cnt; } -int esp_input(struct rte_mbuf *m, struct sadb_sa *sa, - uint32_t *bytes, uint8_t *new_family) +static inline void +esp_output_post_encrypt(struct crypto_pkt_ctx *ctx_arr[], uint16_t count) { - struct iphdr *ip = iphdr(m); + struct rte_ether_hdr *eth_hdr; + uint16_t i, iv_len; + struct crypto_pkt_ctx *ctx; - return esp_input_inner(AF_INET, m, ip, sa, - bytes, new_family); + crypto_prefetch_ivs(); + + for (i = 0; i < count; i++) { + crypto_prefetch_ctx(ctx_arr, count, i); + ctx = ctx_arr[i]; + iv_len = crypto_session_iv_len(ctx->sa->session); + + crypto_save_iv(i, ctx->tail - iv_len, iv_len); + + eth_hdr = (struct rte_ether_hdr *)ctx->hdr; + eth_hdr->ether_type = htons(ctx->out_ethertype); + + crypto_prefetch_ctx_data(ctx_arr, count, i); + + crypto_sadb_increment_counters(ctx->sa, + ctx->plaintext_size_orig - + ctx->counter_modify, 1); + ctx->bytes = ctx->plaintext_size_orig - ctx->counter_modify; + } } -int esp_input6(struct rte_mbuf *m, struct sadb_sa *sa, - uint32_t *bytes, uint8_t *new_family) +void esp_output(struct crypto_pkt_ctx *ctx_arr[], uint16_t count) { - struct ip6_hdr *ip6 = ip6hdr(m); + struct esp_hdr_ctx h[count]; + + count = esp_output_pre_encrypt(ctx_arr, h, count); + + count = crypto_rte_xform_packets(ctx_arr, count); - return esp_input_inner(AF_INET6, m, ip6, sa, - bytes, new_family); + esp_output_post_encrypt(ctx_arr, count); } bool udp_esp_dp_interesting(const struct udphdr *udp, diff --git a/src/crypto/esp.h b/src/crypto/esp.h index bc9820c0..11b79969 100644 --- a/src/crypto/esp.h +++ b/src/crypto/esp.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -22,16 +22,9 @@ struct rte_mbuf; struct sadb_sa; struct udphdr; -int esp_input(struct rte_mbuf *m, struct sadb_sa *sa, uint32_t *bytes, - uint8_t *new_family); -int esp_input6(struct rte_mbuf *m, struct sadb_sa *sa, uint32_t *bytes, - uint8_t *new_family); +void esp_input(struct crypto_pkt_ctx *ctx_arr[], uint16_t count); - -int esp_output(struct rte_mbuf *m, uint8_t family, void *l3hdr, - struct sadb_sa *sa, uint32_t *bytes); -int esp_output6(struct rte_mbuf *m, uint8_t family, void *l3hdr, - struct sadb_sa *sa, uint32_t *bytes); +void esp_output(struct crypto_pkt_ctx *ctx_arr[], uint16_t count); /* * RFC 4303 requires the pad length and next header fields to be right aligned @@ -54,4 +47,12 @@ void esp_replay_advance(const uint8_t *esp, struct sadb_sa *sa); */ bool udp_esp_dp_interesting(const struct udphdr *udp, uint32_t *spi); +/* + * API to invoke openssl implementation of encryption + */ +int esp_generate_chain(struct sadb_sa *sa, struct rte_mbuf *mbuf, + unsigned int l3_hdr_len, unsigned char *esp, + unsigned char *iv, uint32_t text_total_len, + int8_t encrypt); + #endif /* ESP_H */ diff --git a/src/crypto/vti.c b/src/crypto/vti.c index e7428853..424d275c 100644 --- a/src/crypto/vti.c +++ b/src/crypto/vti.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -43,17 +43,17 @@ #include "ip6_funcs.h" #include "ip_funcs.h" #include "ip_icmp.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "shadow.h" #include "snmp_mib.h" #include "urcu.h" #include "util.h" #include "vplane_debug.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" #include "vti.h" -struct ether_addr; +struct rte_ether_addr; struct nlattr; #define VTI_DEBUG(args...) \ @@ -67,7 +67,7 @@ struct nlattr; /* Dummy logging function to force checking of args */ static inline void __attribute__((format(printf, 1, 2))) -no_printf(const char *fmt __attribute__((unused)), ...) { } +no_printf(const char *fmt __unused, ...) { } #define VTI_DEBUG_PKT(args...) no_printf(args) @@ -104,13 +104,12 @@ static unsigned int vti_ctxt_hash(const struct vti_tunnel_key *key, key->mark, seed); - else - return rte_jhash_2words(key->dst.a6[0] + - key->dst.a6[1] + - key->dst.a6[2] + - key->dst.a6[3], - key->mark, - seed); + return rte_jhash_2words(key->dst.a6[0] + + key->dst.a6[1] + + key->dst.a6[2] + + key->dst.a6[3], + key->mark, + seed); } static int vti_ctxt_match(struct cds_lfht_node *node, const void *_key) @@ -270,7 +269,7 @@ static int vti_tunnel_key_from_nlattr(struct vti_tunnel_key *cfg, struct ifnet * vti_tunnel_create(int ifindex, const char *ifname, - const struct ether_addr *addr, const unsigned int mtu, + const struct rte_ether_addr *addr, const unsigned int mtu, struct nlattr *data) { struct ifnet *ifp; @@ -299,7 +298,7 @@ vti_tunnel_create(int ifindex, const char *ifname, * take care of doing fragmentation. */ ifp = if_alloc(ifname, IFT_TUNNEL_VTI, UINT16_MAX, addr, - SOCKET_ID_ANY); + SOCKET_ID_ANY, NULL); if (!ifp) { VTI_ERR("%s: can't allocate ifnet\n", ifname); goto free_ctxt; @@ -349,9 +348,7 @@ vti_tunnel_create(int ifindex, const char *ifname, } else { VTI_DEBUG("Policy reqid set to %x on create for %s\n", ctxt->reqid, ifname); - crypto_sadb_peer_overhead_subscribe(&ctxt->key.dst, - ctxt->key.family, - ctxt->reqid, + crypto_sadb_tunl_overhead_subscribe(ctxt->reqid, &ctxt->ipsec_overhead, t_vrfid); ctxt->overhead_subscribed = true; @@ -415,8 +412,7 @@ static void vti_tunnel_delete(struct ifnet *ifp) ifp->if_softc = NULL; ctxt->ifp = NULL; if (ctxt->overhead_subscribed) { - crypto_sadb_peer_overhead_unsubscribe(&ctxt->key.dst, - ctxt->key.family, + crypto_sadb_tunl_overhead_unsubscribe(ctxt->reqid, &ctxt->ipsec_overhead, t_vrfid); ctxt->overhead_subscribed = false; @@ -465,21 +461,22 @@ void vti_reqid_set(const xfrm_address_t *dst, uint8_t family, /* * If we've not previously subscribed to IPsec encryption - * overhead information from the peer, do so now. Otherwise, + * overhead information from the tunl, do so now. Otherwise, * we need to tell the SADB that we're now interested in a * (possibly) different reqid. */ if (!ctxt->overhead_subscribed) { - crypto_sadb_peer_overhead_subscribe(&ctxt->key.dst, - ctxt->key.family, reqid, + crypto_sadb_tunl_overhead_subscribe(reqid, &ctxt->ipsec_overhead, t_vrfid); ctxt->overhead_subscribed = true; } else { - crypto_sadb_peer_overhead_change_reqid(&ctxt->key.dst, - ctxt->key.family, reqid, - &ctxt->ipsec_overhead, - t_vrfid); + crypto_sadb_tunl_overhead_unsubscribe(ctxt->reqid, + &ctxt->ipsec_overhead, + t_vrfid); + crypto_sadb_tunl_overhead_subscribe(reqid, + &ctxt->ipsec_overhead, + t_vrfid); } ctxt->reqid = reqid; @@ -722,21 +719,16 @@ int vti_set_output_vrf(const struct ifnet *ifp, struct rte_mbuf *m) return -1; } -int vti_get_peer_addr(const struct ifnet *ifp, uint32_t *af, void **addr) +static enum dp_ifnet_iana_type +vti_iana_type(struct ifnet *ifp __unused) { - struct vti_tunnel_ctxt *ctxt = rcu_dereference(ifp->if_softc); - - if (ctxt) { - *af = ctxt->key.family; - *addr = &ctxt->key.dst; - return 0; - } - return -1; + return DP_IFTYPE_IANA_TUNNEL; } static const struct ift_ops vti_if_ops = { .ifop_set_mtu = vti_tunnel_set_mtu, .ifop_uninit = vti_tunnel_delete, + .ifop_iana_type = vti_iana_type, }; static void vti_type_init(void) diff --git a/src/crypto/vti.h b/src/crypto/vti.h index 0eb574c4..42070fc4 100644 --- a/src/crypto/vti.h +++ b/src/crypto/vti.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -10,7 +10,7 @@ #include "netlink.h" -struct ether_addr; +struct rte_ether_addr; struct ifnet; struct nlattr; struct rte_mbuf; @@ -22,10 +22,10 @@ struct vti_ctxt_table; struct ifnet * vti_tunnel_create(int ifindex, const char *ifname, - const struct ether_addr *addr, const unsigned int mtu, + const struct rte_ether_addr *addr, const unsigned int mtu, struct nlattr *data); void vti_tunnel_modify(struct ifnet *ifp, struct nlattr *data); -void vti_tunnel_out(struct ifnet *input_ifp, struct ifnet *nxt_ifp, +void vti_tunnel_out(struct ifnet *in_ifp, struct ifnet *nxt_ifp, struct rte_mbuf *m, uint16_t proto); int vti_handle_inbound(const xfrm_address_t *dst, const uint8_t family, const uint32_t mark, struct rte_mbuf *m, @@ -33,6 +33,5 @@ int vti_handle_inbound(const xfrm_address_t *dst, const uint8_t family, int vti_table_init(struct vrf *vrf); void vti_table_uninit(struct vrf *vrf); int vti_set_output_vrf(const struct ifnet *ifp, struct rte_mbuf *m); -int vti_get_peer_addr(const struct ifnet *ifp, uint32_t *af, void **addr); #endif /* VTI_H */ diff --git a/src/crypto/xfrm_client.c b/src/crypto/xfrm_client.c new file mode 100644 index 00000000..2247511b --- /dev/null +++ b/src/crypto/xfrm_client.c @@ -0,0 +1,376 @@ +/* + * Copyright (c) 2020 AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "config_internal.h" +#include "control.h" +#include "event_internal.h" +#include "controller.h" +#include "netlink.h" +#include "xfrm_client.h" +#include "vplane_debug.h" +#include "vplane_log.h" +#include "zmq_dp.h" +#include "crypto/crypto_policy.h" +#include "crypto/crypto.h" + +zsock_t *xfrm_pull_socket; +zsock_t *xfrm_push_socket; + +/* + * xfrm_direct indcates that xfrm messages are coming direct from + * strongswan, rather than via vplaned. Updates from vplaned are not + * batched and do not need to the acked, and the ack channel will not + * be initialised. + */ +bool xfrm_direct; + +uint32_t last_seq_sent; + +/* + * Build a message back to strongswan to indicates if the + * xfrm message, with sequenece id 'seq', was successfully + * processed or not. + * + * Strongswan expects a netlink error message, and result of the xfrm + * processing is passed in the error field. + */ +int xfrm_client_send_ack(uint32_t seq, int err) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh; + struct nlmsgerr *err_msg; + zframe_t *frame; + int rc; + + if (!xfrm_direct) + return 0; + if (last_seq_sent == seq) + rte_panic("XFRM Duplicate sequence %d", seq); + + last_seq_sent = seq; + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_seq = seq; + nlh->nlmsg_type = NLMSG_ERROR; + err_msg = mnl_nlmsg_put_extra_header(nlh, sizeof(*err_msg)); + if (!err_msg) { + DP_DEBUG(CRYPTO, ERR, DATAPLANE, + "Failed to alloc xfrm ack error frame\n"); + return -ENOMSG; + } + err_msg->error = -err; + + frame = zframe_new(nlh, nlh->nlmsg_len); + if (!frame) + return -ENOMSG; + + rc = zframe_send(&frame, xfrm_push_socket, 0); + if (rc < 0) + zframe_destroy(&frame); + + return rc; +} + +int xfrm_client_send_expire(xfrm_address_t *dst, uint16_t family, uint32_t spi, + uint32_t reqid, uint8_t proto, uint8_t hard) +{ + struct xfrm_user_expire *expire; + struct nlmsghdr *nlh; + zframe_t *frame; + char buf[MNL_SOCKET_BUFFER_SIZE]; + int rc; + + memset(buf, 0, MNL_SOCKET_BUFFER_SIZE); + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = XFRM_MSG_EXPIRE; + nlh->nlmsg_flags = 0; + nlh->nlmsg_seq = 0; + + expire = mnl_nlmsg_put_extra_header(nlh, sizeof(*expire)); + if (!expire) { + DP_DEBUG(CRYPTO, ERR, DATAPLANE, + "XFRM expire failed SPI:%u\n", spi); + return -ENOMSG; + } + + expire->state.family = family; + expire->state.id.daddr = *dst; + expire->state.id.proto = proto; + expire->state.id.spi = spi; + expire->state.reqid = reqid; + expire->hard = hard; + + frame = zframe_new(nlh, nlh->nlmsg_len); + if (!frame) { + DP_DEBUG(CRYPTO, ERR, DATAPLANE, + "XFRM expire can't create frame SPI:%u\n", + spi); + return -ENOMSG; + } + + rc = zframe_send(&frame, xfrm_push_socket, 0); + if (rc < 0) { + DP_DEBUG(CRYPTO, ERR, DATAPLANE, + "XFRM expire failed to send SPU:%u\n", + spi); + zframe_destroy(&frame); + return rc; + } + return rc; +} +/* + * Build an SA message back to the server with the stats that were requested. + */ +int xfrm_client_send_sa_stats(uint32_t seq, uint32_t spi, + struct crypto_sadb_stats *stats) +{ + struct nlmsghdr *nlh; + struct xfrm_usersa_info *sa; + zframe_t *frame; + char buf[MNL_SOCKET_BUFFER_SIZE]; + int rc; + + memset(buf, 0, MNL_SOCKET_BUFFER_SIZE); + + /* the stats are returned in a NEWSA xfrm which is not intuitive */ + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = XFRM_MSG_NEWSA; + nlh->nlmsg_flags = NLM_F_ACK; + nlh->nlmsg_seq = seq; + + sa = mnl_nlmsg_put_extra_header(nlh, sizeof(*sa)); + if (!sa) { + DP_DEBUG(CRYPTO, ERR, DATAPLANE, + "XFRM sa stats failed SPI:%u\n", spi); + return -1; + } + sa->curlft.bytes = stats->bytes; + sa->curlft.packets = stats->packets; + + frame = zframe_new(nlh, nlh->nlmsg_len); + if (!frame) { + DP_DEBUG(CRYPTO, ERR, DATAPLANE, + "XFRM sa stats framing failed SPI:%u\n", spi); + return -1; + } + + rc = zframe_send(&frame, xfrm_push_socket, 0); + if (rc < 0) { + DP_DEBUG(CRYPTO, ERR, DATAPLANE, + "XFRM sa stats framing failed SPI:%u\n", spi); + zframe_destroy(&frame); + } + + return rc; +} + +static int +dp_xfrm_msg_recv(zsock_t *sock, zmq_msg_t *hdr, zmq_msg_t *msg) +{ + zmq_msg_init(hdr); + zmq_msg_init(msg); + + if (zmq_msg_recv(hdr, zsock_resolve(sock), 0) <= 0) + goto error; + + int more = zmq_msg_get(hdr, ZMQ_MORE); + if (!more) + return 0; + + if (zmq_msg_recv(msg, zsock_resolve(sock), 0) <= 0) + goto error; + + more = zmq_msg_get(msg, ZMQ_MORE); + while (more) { + zmq_msg_t sink; + zmq_msg_init(&sink); + zmq_msg_recv(&sink, zsock_resolve(sock), 0); + more = zmq_msg_get(&sink, ZMQ_MORE); + zmq_msg_close(&sink); + } + + return 0; +error: + zmq_msg_close(msg); + zmq_msg_close(hdr); + return -1; +} + +static int xfrm_netlink_recv(void *arg) +{ + zmq_msg_t xfrm_msg, xfrm_hdr; + zsock_t *sock = arg; + const struct nlmsghdr *nlh; + const char *hdr; + uint32_t len; + int rc; + struct xfrm_client_aux_data xfrm_aux; + errno = 0; + + rc = dp_xfrm_msg_recv(sock, &xfrm_hdr, &xfrm_msg); + + if (rc != 0) { + if (errno == 0) + return 0; + return -1; + } + + xfrm_direct = true; + + /* + * Get the hdr type, either START, DATA, END and are used to + * deliminate a batch. All hdrs have netlink msgs to follow, + * however only END is of special significance as it triggers + * a npf commit and rebuild. + * + * The message types of FLUSH and COMMIT are control messages + * and used with out any accompanying xfrm. + */ + hdr = zmq_msg_data(&xfrm_hdr); + if (strncmp("FLUSH", hdr, strlen("FLUSH")) == 0) { + last_seq_sent = 0; + crypto_flush_all(); + goto end; + } else if (strncmp("COMMIT", hdr, strlen("COMMIT")) == 0) { + crypto_npf_cfg_commit_flush(); + goto end; + } + + nlh = zmq_msg_data(&xfrm_msg); + len = zmq_msg_size(&xfrm_msg); + + if (!nlh || len < sizeof(*nlh)) { + DP_DEBUG(CRYPTO, ERR, DATAPLANE, + "XFRM msg invalid\n"); + goto end; + } + + vrfid_t vrf_id = VRF_DEFAULT_ID; + xfrm_aux.vrf = &vrf_id; + xfrm_aux.seq = nlh->nlmsg_seq; + + switch (nlh->nlmsg_type) { + case XFRM_MSG_NEWPOLICY: /* Fall through */ + case XFRM_MSG_UPDPOLICY: + case XFRM_MSG_POLEXPIRE: + case XFRM_MSG_DELPOLICY: + /* + * Policy updates ACK are normally generated upon the + * programming of the policy into the classifier which + * occurs at the end of batch. However there are + * scenarios when the policy will not be programmed + * into the classifier but an ack is still be required + * to returned to the xfrm source. These scenarios + * include duplicate updates, errors, and incomplete + * policies. Inorder to achieve this a return code + * ,rc,and an indication if an ack should be sent + * ,xfrm_aux.ack_msg, are required. + * + * Acks are always sent in error scenarios. However + * unless one of the scenarios outlined above are hit + * acks are not sent until the policy has been added + * to the classifier + */ + rc = mnl_cb_run(nlh, len, 0, 0, rtnl_process_xfrm, &xfrm_aux); + /* Policy acks are batched in most cases */ + if (rc < 0 || xfrm_aux.ack_msg) + xfrm_client_send_ack(nlh->nlmsg_seq, rc); + if (strncmp("END", hdr, strlen("END")) == 0) + crypto_npf_cfg_commit_flush(); + break; + + case XFRM_MSG_NEWSA: /* fall through */ + case XFRM_MSG_UPDSA: + case XFRM_MSG_DELSA: + case XFRM_MSG_EXPIRE: + case XFRM_MSG_GETSA: + rc = mnl_cb_run(nlh, len, 0, 0, rtnl_process_xfrm_sa, + &xfrm_aux); + /* + * For all SA messages apart from a successful GETSA + * then the ack response is always sent from here, + * i.e. all other msg processing returns ack_msg = + * True. Successful GETSA processing generates an + * message back to the server and so does not require + * an ACK, i.e. it sets ack_msg = false, however if it + * is unsuccessful an error is sent from here as rc != + * 0. + */ + if (rc != 0 || xfrm_aux.ack_msg) + xfrm_client_send_ack(nlh->nlmsg_seq, rc); + break; + default: + rc = -EINVAL; + xfrm_client_send_ack(nlh->nlmsg_seq, rc); + } + + if (rc != 0) { + DP_DEBUG(CRYPTO, ERR, DATAPLANE, + "XFRM netlink msg not handled\n"); + } +end: + zmq_msg_close(&xfrm_hdr); + zmq_msg_close(&xfrm_msg); + + return 0; +} + +void xfrm_client_unsubscribe(void) +{ + if (xfrm_push_socket) { + zsock_destroy(&xfrm_push_socket); + xfrm_push_socket = NULL; + } + if (xfrm_pull_socket) { + zsock_destroy(&xfrm_pull_socket); + xfrm_pull_socket = NULL; + } +} + +int xfrm_client_init(void) +{ + /* Ensure we are not restarting without cleanup */ + if (xfrm_pull_socket || xfrm_push_socket) + rte_panic("Open xfrm socket"); + + if (!config.xfrm_pull_url || !config.xfrm_push_url) { + RTE_LOG(ERR, DATAPLANE, "No xfrm url"); + /* Once the cut over to the xfrm direct path + * is complete need to return -1 + */ + return 0; + } + + xfrm_pull_socket = zsock_new(ZMQ_PULL); + if (!xfrm_pull_socket) + rte_panic("failed to open xfrm socket"); + if (zsock_connect(xfrm_pull_socket, "%s", config.xfrm_pull_url) < 0) + rte_panic("failed to open xfrm pull socket"); + + xfrm_push_socket = zsock_new(ZMQ_PUSH); + if (!xfrm_push_socket) + rte_panic("failed to open xfrm socket"); + if (zsock_connect(xfrm_push_socket, "%s", config.xfrm_push_url) < 0) + rte_panic("failed to open xfrm push socket"); + + dp_register_event_socket( + zsock_resolve(xfrm_pull_socket), + xfrm_netlink_recv, + xfrm_pull_socket); + + return 0; +} diff --git a/src/crypto/xfrm_client.h b/src/crypto/xfrm_client.h new file mode 100644 index 00000000..9dee1a10 --- /dev/null +++ b/src/crypto/xfrm_client.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#ifndef XFRM_CLIENT_H +#define XFRM_CLIENT_H + +#include +#include +#include "control.h" +#include "crypto/crypto_sadb.h" + +extern bool xfrm_direct; + +struct xfrm_client_aux_data { + vrfid_t *vrf; + bool ack_msg; + uint32_t seq; +}; + +/* + * Close all the client sockets for this source. + */ +void xfrm_client_unsubscribe(void); + +int xfrm_client_init(void); + +int xfrm_client_send_ack(uint32_t seq, int err); +int xfrm_client_send_sa_stats(uint32_t seq, uint32_t spi, + struct crypto_sadb_stats *stats); +int xfrm_client_send_expire(xfrm_address_t *dst, uint16_t family, uint32_t spi, + uint32_t reqid, uint8_t proto, uint8_t hard); +#endif /* XFRM_CLIENT_H */ diff --git a/src/dealer.c b/src/dealer.c index bf432c05..fde04e35 100644 --- a/src/dealer.c +++ b/src/dealer.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -15,7 +15,7 @@ #include #include -#include "config.h" +#include "config_internal.h" #include "dealer.h" #include "vplane_debug.h" #include "vplane_log.h" @@ -39,16 +39,16 @@ process_dealer_reject(zmsg_t *reject, enum cont_src_en cont_src) */ if (zmsg_size(reject) != (NUM_FRAMES_REJECT_MSG - 1)) { RTE_LOG(ERR, DATAPLANE, - "master(%s) Rx'd REJECT message with wrong number of frames\n", + "main(%s) Rx'd REJECT message with wrong number of frames\n", cont_src_name(cont_src)); rc = -1; goto err; } uuid = zmsg_popstr(reject); - if (strcmp(uuid, config.uuid)) { + if (strcmp(uuid, config.uuid) != 0) { RTE_LOG(ERR, DATAPLANE, - "master(%s) REJECT message mis-match on UUID\n", + "main(%s) REJECT message mis-match on UUID\n", cont_src_name(cont_src)); rc = -2; goto err; @@ -71,7 +71,7 @@ process_dealer_accept(zmsg_t *accept, enum cont_src_en cont_src) */ if (zmsg_size(accept) != (NUM_FRAMES_ACCEPT_MSG - 1)) { RTE_LOG(ERR, DATAPLANE, - "master (%s) Rx'd ACCEPT msg with wrong number of frames\n", + "main(%s) Rx'd ACCEPT msg with wrong number of frames\n", cont_src_name(cont_src)); rc = -2; goto err; @@ -82,9 +82,9 @@ process_dealer_accept(zmsg_t *accept, enum cont_src_en cont_src) */ uuid = zmsg_popstr(accept); if (cont_src == CONT_SRC_MAIN) { - if (strcmp(uuid, config.uuid)) { + if (strcmp(uuid, config.uuid) != 0) { RTE_LOG(ERR, DATAPLANE, - "master(%s) ACCEPT message mis-match on UUID\n", + "main(%s) ACCEPT message mis-match on UUID\n", cont_src_name(cont_src)); rc = -3; goto err; @@ -96,7 +96,7 @@ process_dealer_accept(zmsg_t *accept, enum cont_src_en cont_src) if (zmsg_popu16(accept, &dp_idx) < 0) { RTE_LOG(ERR, DATAPLANE, - "master(%s) ACCEPT message fail on vPlane index\n", + "main(%s) ACCEPT message fail on vPlane index\n", cont_src_name(cont_src)); rc = -4; goto err; @@ -104,7 +104,7 @@ process_dealer_accept(zmsg_t *accept, enum cont_src_en cont_src) if (config.dp_index != dp_idx) RTE_LOG(ERR, DATAPLANE, - "master(%s) ACCEPT message dp id mismatch, local %u != rx %u\n", + "main(%s) ACCEPT message dp id mismatch, local %u != rx %u\n", cont_src_name(cont_src), config.dp_index, dp_idx); return 0; @@ -141,7 +141,7 @@ static int process_dealer_msg(zmsg_t *rep, enum cont_src_en cont_src) type = zmsg_popstr(rep); for (h = dealer_msg_handlers; h->type && type; ++h) { - if (strcmp(h->type, type)) + if (strcmp(h->type, type) != 0) continue; rc = (*h->handler)(rep, cont_src); @@ -151,7 +151,7 @@ static int process_dealer_msg(zmsg_t *rep, enum cont_src_en cont_src) } RTE_LOG(NOTICE, DATAPLANE, - "master(%s) Couldn't process message with type '%s'\n", + "main(%s) Couldn't process message with type '%s'\n", cont_src_name(cont_src), type); free(type); @@ -186,7 +186,7 @@ static int dealer_recv(zsock_t *socket, enum cont_src_en cont_src) if (!dealer_msg) { RTE_LOG(ERR, DATAPLANE, - "master(%s) Missing ZMQ message from DEALER socket\n", + "main(%s) Missing ZMQ message from DEALER socket\n", cont_src_name(cont_src)); return -1; } @@ -194,7 +194,7 @@ static int dealer_recv(zsock_t *socket, enum cont_src_en cont_src) rc = process_dealer_msg(dealer_msg, cont_src); if (rc < 0) RTE_LOG(ERR, DATAPLANE, - "master(%s) Error processing ZMQ message from DEALER socket\n", + "main(%s) Error processing ZMQ message from DEALER socket\n", cont_src_name(cont_src)); zmsg_destroy(&dealer_msg); @@ -209,7 +209,7 @@ int init_controller_connection(zsock_t *socket, enum cont_src_en cont_src) rc = send_controller_connect(socket, cont_src); if (rc < 0) RTE_LOG(ERR, DATAPLANE, - "master(%s) ZMQ failed to connect to controller\n", + "main(%s) ZMQ failed to connect to controller\n", cont_src_name(cont_src)); return rc; } @@ -281,20 +281,3 @@ void conf_query(enum cont_src_en cont_src) free(type); zmsg_destroy(&zmsg); } - -#ifdef DEALER_TEST -int __test_process_dealer_msg(zmsg_t *msg, enum cont_src_en cont_src) -{ - return process_dealer_msg(msg, cont_src); -} - -int __test_process_dealer_reject(zmsg_t *reject, enum cont_src_en cont_src) -{ - return process_dealer_reject(reject, cont_src); -} - -int __test_process_dealer_accept(zmsg_t *accept, enum cont_src_en cont_src) -{ - return process_dealer_accept(accept, cont_src); -} -#endif diff --git a/src/dealer.h b/src/dealer.h index 4929c2ed..ada71442 100644 --- a/src/dealer.h +++ b/src/dealer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -26,15 +26,4 @@ int try_controller_response(zsock_t *socket, enum cont_src_en cont_src); */ void conf_query(enum cont_src_en cont_src); -#ifdef DEALER_TEST -/* - * The following function are use for the purposes of unit-testing only. - * - * They should not be used in production code - */ -int __test_process_dealer_msg(zmsg_t *msg, enum cont_src_en cont_src); -int __test_process_dealer_reject(zmsg_t *reject, enum cont_src_en cont_src); -int __test_process_dealer_accept(zmsg_t *accept, enum cont_src_en cont_src); -#endif /* DEALER_TEST */ - #endif /* DEALER_H */ diff --git a/src/debug.c b/src/debug.c new file mode 100644 index 00000000..c401a16d --- /dev/null +++ b/src/debug.c @@ -0,0 +1,321 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include +#include + +#include "debug.h" +#include "json_writer.h" +#include "urcu.h" +#include "util.h" +#include "vplane_debug.h" + +uint64_t dp_debug = DP_DBG_DEFAULT; +uint64_t dp_debug_init = DP_DBG_DEFAULT; +static uint64_t dp_debug_allocated_flags; +static uint32_t dp_log_level_init; + +struct dp_debug_event_type { + const char *event_type; + uint64_t id; + int bit; + struct cds_list_head list_entry; +}; + +static struct cds_list_head dp_debug_event_list_head; + +static int cmd_log_level(FILE *f, int argc, char **argv) +{ + if (argc > 1) + rte_log_set_global_level(atoi(argv[1])); + else { + json_writer_t *wr = jsonw_new(f); + + jsonw_uint_field(wr, "level", rte_log_get_global_level()); + jsonw_destroy(&wr); + } + + return 0; +} + +/* Log types (see rte_log.h) */ +static const char *log_type_bits[] = { + [0] = "EAL", [1] = "MALLOC", [2] = "RING", [3] = "MEMPOOL", + [4] = "TIMER", [5] = "PMD", [6] = "HASH", [7] = "LPM", + [8] = "KNI", [9] = "ACL", [10] = "POWER", [11] = "METER", + [12] = "SCHED", [13] = "PORT", [14] = "TABLE", [15] = "PIPELINE", + [16] = "MBUF", [17] = "CRYPTODEV", [18] = "EFD", [19] = "EVENTDEV", + + [24] = "USER1", [25] = "USER2", [26] = "USER3", [27] = "USER4", + [28] = "USER5", [29] = "USER6", [30] = "USER7", [31] = "USER8", +}; + +static int cmd_log_type(FILE *f, int argc, char **argv) +{ + unsigned int i; + unsigned int log_type_size = ARRAY_SIZE(log_type_bits); + const char *name; + int level; + + if (argc == 1) { + json_writer_t *wr = jsonw_new(f); + + for (i = 0; i < log_type_size; i++) { + name = log_type_bits[i]; + if (!name) + continue; + level = rte_log_get_level(i); + if (level < 0) + continue; + jsonw_int_field(wr, name, level); + } + jsonw_destroy(&wr); + return 0; + } + + while (--argc) { + const char *arg = *++argv; + int enable = 1; + + if (*arg == '-') { + enable = 0; + ++arg; + } + + for (i = 0; i < log_type_size; i++) { + name = log_type_bits[i]; + if (!name) + continue; + if (strcasecmp(name, arg) == 0) { + rte_log_set_level(i, + enable ? RTE_LOG_DEBUG + : rte_log_get_global_level()); + break; + } + } + if (i == log_type_size) { + fprintf(f, "%s unknown log type\n", arg); + return -1; + } + } + return 0; +} + +int cmd_log(FILE *f, int argc, char **argv) +{ + + if (argc == 1) { + fprintf(f, "missing log command\n"); + return -1; + } + --argc, ++argv; + + if (strcmp(argv[0], "level") == 0) + return cmd_log_level(f, argc, argv); + if (strcmp(argv[0], "type") == 0) + return cmd_log_type(f, argc, argv); + + fprintf(f, "unknown log command: %s\n", argv[0]); + return -1; +} + +/* Control over debug settings */ +/* Keep this in sync with vplane_debug.h */ +static const char *debug_bits[] = { + "init", "link", "arp", "bridge", + "nl_interface", "nl_route", "nl_address", "nl_neighbor", + "nl_netconf", "subscribe", "resync", "nd6", + "route", "macvlan", "vxlan", "qos", + "npf", "nat", "l2tp", "lag", + "dealer", "nsh", + "vti", "crypto", "crypto_data", "vhost", + "vrf", "multicast", "mpls_control", + "mpls_pkterr", "dpi", "qos_dp", "qos_hw", + "storm_ctl", "cpp_rl", "ptp", "cgnat", + "flow-cache", "mac-limit", "gpc", +}; + +/* find debug bit based on name, allow abbreviation */ +static int find_debug_bit(const char *str) +{ + unsigned int i; + struct dp_debug_event_type *event; + + /* Check the hardcoded ones first */ + for (i = 0; i < ARRAY_SIZE(debug_bits); i++) + if (strncmp(debug_bits[i], str, strlen(str)) == 0) + return i; + + /* And then the dynamically registered ones */ + cds_list_for_each_entry_rcu(event, &dp_debug_event_list_head, + list_entry) { + if (strcmp(event->event_type, str) == 0) + return event->bit; + } + + return -1; +} + +static int dp_debug_enable_disable(const char *event_type, bool enable) +{ + int i; + + i = find_debug_bit(event_type); + if (i < 0) + return i; + + if (enable) + dp_debug |= (1ul << i); + else + dp_debug &= ~(1ul << i); + + return 0; +} + +int dp_debug_enable(const char *event_type) +{ + return dp_debug_enable_disable(event_type, true); +} + +int dp_debug_disable(const char *event_type) +{ + return dp_debug_enable_disable(event_type, false); +} + +bool dp_debug_is_enabled(uint64_t event_id) +{ + return event_id & dp_debug; +} + +static void show_debug(FILE *f) +{ + unsigned int i; + struct dp_debug_event_type *event; + + fprintf(f, "Debug %#lx", dp_debug); + for (i = 0; i < ARRAY_SIZE(debug_bits); i++) + if (dp_debug & (1ul<id) + fprintf(f, " %s", event->event_type); + } + + fprintf(f, "\n"); +} + +int cmd_debug(FILE *f, int argc, char **argv) +{ + int i; + + if (argc == 1) { + show_debug(f); + return 0; + } + + while (--argc) { + const char *arg = *++argv; + + if (strcmp(arg, "all") == 0) { + dp_debug = ~0ul; + rte_log_set_global_level(RTE_LOG_DEBUG); + } else if (strcmp(arg, "-all") == 0) { + /* Revert back to the startup debugs */ + dp_debug = dp_debug_init; + rte_log_set_global_level(dp_log_level_init); + } else if (*arg == '-') { + + i = dp_debug_disable(arg+1); + if (i < 0) { + fprintf(f, "Unknown debug flag %s\n", arg+1); + return -1; + } + if (dp_debug == dp_debug_init) + rte_log_set_global_level(dp_log_level_init); + } else { + i = dp_debug_enable(arg); + if (i < 0) { + fprintf(f, "Unknown debug flag %s\n", arg); + return -1; + } + rte_log_set_global_level(RTE_LOG_DEBUG); + } + } + return 0; +} + +uint64_t dp_debug_register(const char *event_type) +{ + struct dp_debug_event_type *event; + int i; + + if (!event_type) + return 0; + + if (find_debug_bit(event_type) > 0) + return 0; + + if (dp_debug_allocated_flags == UINT64_MAX) { + RTE_LOG(ERR, DATAPLANE, + "no space left for new debug event\n"); + return 0; + } + + event = malloc(sizeof(*event)); + if (!event) { + RTE_LOG(ERR, DATAPLANE, + "no memory for new debug event\n"); + return 0; + } + + for (i = 0; i < 64; i++) { + if (!((1ul << i) & dp_debug_allocated_flags)) { + event->event_type = strdup(event_type); + if (!event->event_type) { + free(event); + RTE_LOG(ERR, DATAPLANE, + "no memory for new debug event\n"); + return 0; + + } + dp_debug_allocated_flags |= 1ul << i; + event->id = 1ul << i; + event->bit = i; + + cds_list_add_rcu(&event->list_entry, + &dp_debug_event_list_head); + return event->id; + } + } + free(event); + RTE_LOG(ERR, DATAPLANE, + "Could not register new debug event\n"); + return 0; +} + +void debug_init(void) +{ + unsigned int i; + + CDS_INIT_LIST_HEAD(&dp_debug_event_list_head); + + /* Take a note of the hardcoded flags that are allocated */ + for (i = 0; i < ARRAY_SIZE(debug_bits); i++) + dp_debug_allocated_flags |= (1ul << i); + + dp_log_level_init = rte_log_get_global_level(); + + /* + * Set user types to the debug log level, since we are in + * control of debugs and these should be controlled by + * facility-specific debug flags in combination with the + * global log level. + */ + for (i = RTE_LOGTYPE_USER1; i <= RTE_LOGTYPE_USER5; i++) + rte_log_set_level(i, RTE_LOG_DEBUG); +} diff --git a/src/dp_event.c b/src/dp_event.c index 483cd090..b660bfda 100644 --- a/src/dp_event.c +++ b/src/dp_event.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -24,26 +24,23 @@ static void dp_evt_notify(enum dp_evt evt, uint32_t cont_src, const struct dp_event_ops *ops, void *obj, uint32_t val, uint32_t val2, const void *data) { + const struct dp_events_ops *pub_ops; switch (evt) { case DP_EVT_IF_CREATE: if (ops->if_create) ops->if_create(obj); break; - case DP_EVT_IF_CREATE_FINISHED: - if (ops->if_create_finished) - ops->if_create_finished(obj); - break; case DP_EVT_IF_DELETE: if (ops->if_delete) ops->if_delete(obj); break; case DP_EVT_IF_INDEX_SET: if (ops->if_index_set) - ops->if_index_set(obj, val); + ops->if_index_set(obj); break; - case DP_EVT_IF_INDEX_PRE_UNSET: - if (ops->if_index_pre_unset) - ops->if_index_pre_unset(obj); + case DP_EVT_IF_FEAT_MODE_CHANGE: + if (ops->if_feat_mode_change) + ops->if_feat_mode_change(obj, val); break; case DP_EVT_IF_INDEX_UNSET: if (ops->if_index_unset) @@ -61,11 +58,21 @@ static void dp_evt_notify(enum dp_evt evt, uint32_t cont_src, /* args: cont_src, ifindex, family, addr */ if (ops->if_addr_add) ops->if_addr_add(cont_src, obj, val, val2, data); + else { + pub_ops = rcu_dereference(ops->public_ops); + if (pub_ops && pub_ops->if_addr_add) + pub_ops->if_addr_add(obj, val, val2, data); + } break; case DP_EVT_IF_ADDR_DEL: /* args: cont_src, ifindex, family, addr */ if (ops->if_addr_delete) ops->if_addr_delete(cont_src, obj, val, val2, data); + else { + pub_ops = rcu_dereference(ops->public_ops); + if (pub_ops && pub_ops->if_addr_delete) + pub_ops->if_addr_delete(obj, val, val2, data); + } break; case DP_EVT_RESET_CONFIG: if (ops->reset_config) @@ -86,6 +93,11 @@ static void dp_evt_notify(enum dp_evt evt, uint32_t cont_src, case DP_EVT_IF_LINK_CHANGE: if (ops->if_link_change) ops->if_link_change(obj, val, val2); + else { + pub_ops = rcu_dereference(ops->public_ops); + if (pub_ops && pub_ops->if_link_change) + pub_ops->if_link_change(obj, val, val2); + } break; case DP_EVT_IF_VLAN_ADD: if (ops->if_vlan_add) @@ -95,11 +107,24 @@ static void dp_evt_notify(enum dp_evt evt, uint32_t cont_src, if (ops->if_vlan_del) ops->if_vlan_del(obj, val); break; + case DP_EVT_IF_MTU_CHANGE: + if (ops->if_mtu_change) + ops->if_mtu_change(obj, val); + break; - case DP_EVT_IF_HW_SWITCHING_CHANGE: - if (ops->if_hw_switching_change) - ops->if_hw_switching_change(obj, val); + case DP_EVT_IF_LAG_ADD_MEMBER: + if (ops->if_lag_add_member) + ops->if_lag_add_member(obj, (void *) data); + break; + case DP_EVT_IF_LAG_DELETE_MEMBER: + if (ops->if_lag_delete_member) + ops->if_lag_delete_member(obj, (void *) data); + break; + case DP_EVT_IF_LAG_CHANGE: + if (ops->if_lag_change) + ops->if_lag_change(obj, val); break; + case DP_EVT_INIT: if (ops->init) ops->init(); @@ -108,6 +133,10 @@ static void dp_evt_notify(enum dp_evt evt, uint32_t cont_src, if (ops->uninit) ops->uninit(); break; + case DP_EVT_IF_ADMIN_STATUS_CHANGE: + if (ops->if_admin_status_change) + ops->if_admin_status_change(obj, val); + break; default: rte_panic("dp_event: unknown event: %u\n", evt); } @@ -152,3 +181,70 @@ void dp_event_unregister(const struct dp_event_ops *op) return; } } + +/* + * Public version of the API. + */ +int dp_events_register(const struct dp_events_ops *ops) +{ + struct dp_event_ops *internal_ops; + + if (!ops) + return -EINVAL; + + internal_ops = calloc(1, sizeof(*internal_ops)); + if (!internal_ops) + return -ENOMEM; + + internal_ops->vrf_create = ops->vrf_create; + internal_ops->vrf_delete = ops->vrf_delete; + internal_ops->if_create = ops->if_create; + internal_ops->if_delete = ops->if_delete; + internal_ops->if_rename = ops->if_rename; + internal_ops->if_vrf_set = ops->if_vrf_set; + internal_ops->if_lag_change = ops->if_lag_change; + internal_ops->if_lag_add_member = ops->if_lag_add_member; + internal_ops->if_lag_delete_member = ops->if_lag_delete_member; + + /* if addr_add and delete have different signature + * and used directly from the public_ops. + */ + + internal_ops->public_ops = ops; + + dp_event_register(internal_ops); + return 0; +} + +static void dp_event_unregister_free(struct rcu_head *head) +{ + struct dp_event_ops *ops = caa_container_of(head, struct dp_event_ops, + rcu); + free(ops); +} + +/* + * Public version of the API. + */ +int dp_events_unregister(const struct dp_events_ops *ops) +{ + struct dp_event_ops *internal_ops; + uint32_t i; + + if (!ops) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(dp_ops); i++) { + internal_ops = rcu_dereference(dp_ops[i]); + + if (!internal_ops || internal_ops->public_ops != ops) + continue; + + if (rcu_cmpxchg_pointer(&dp_ops[i], + internal_ops, NULL) == internal_ops) { + call_rcu(&internal_ops->rcu, dp_event_unregister_free); + return 0; + } + } + return -ENOENT; +} diff --git a/src/dp_event.h b/src/dp_event.h index 89680434..84ff58e4 100644 --- a/src/dp_event.h +++ b/src/dp_event.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -11,43 +11,74 @@ #include "if_var.h" #include "control.h" +#include "control.h" +#include "event.h" /* * Maximum size of the event operations structs array. */ -#define DP_EVENT_MAX_OPS 32 +#define DP_EVENT_MAX_OPS 64 /* Specific dataplane events */ enum dp_evt { - DP_EVT_IF_CREATE = 1, - DP_EVT_IF_CREATE_FINISHED, - DP_EVT_IF_DELETE, - DP_EVT_IF_INDEX_SET, - DP_EVT_IF_INDEX_PRE_UNSET, - DP_EVT_IF_INDEX_UNSET, + DP_EVT_VRF_CREATE = 1, + DP_EVT_VRF_DELETE, DP_EVT_IF_RENAME, DP_EVT_IF_VRF_SET, DP_EVT_IF_ADDR_ADD, DP_EVT_IF_ADDR_DEL, + DP_EVT_IF_CREATE, + DP_EVT_IF_DELETE, + DP_EVT_IF_INDEX_SET, + DP_EVT_IF_INDEX_UNSET, DP_EVT_IF_MAC_ADDR_CHANGE, DP_EVT_IF_LINK_CHANGE, DP_EVT_IF_VLAN_ADD, DP_EVT_IF_VLAN_DEL, - DP_EVT_IF_HW_SWITCHING_CHANGE, + DP_EVT_IF_FEAT_MODE_CHANGE, + DP_EVT_IF_MTU_CHANGE, DP_EVT_RESET_CONFIG, - DP_EVT_VRF_CREATE, - DP_EVT_VRF_DELETE, DP_EVT_INIT, DP_EVT_UNINIT, + DP_EVT_IF_LAG_ADD_MEMBER, + DP_EVT_IF_LAG_DELETE_MEMBER, + DP_EVT_IF_LAG_CHANGE, + DP_EVT_IF_ADMIN_STATUS_CHANGE, +}; + +_Static_assert((int)DP_EVT_VRF_CREATE == (int)DP_EVENT_VRF_CREATE, + "public and internal vrf event create events differ"); +_Static_assert((int)DP_EVT_VRF_DELETE == (int)DP_EVENT_VRF_DELETE, + "public and internal vrf event delete events differ"); +_Static_assert((int)DP_EVT_IF_RENAME == (int)DP_EVENT_IF_RENAME, + "public and internal if event rename events differ"); +_Static_assert((int)DP_EVT_IF_VRF_SET == (int)DP_EVENT_IF_VRF_SET, + "public and internal if vrf event set events differ"); +_Static_assert((int)DP_EVT_IF_ADDR_ADD == (int)DP_EVENT_IF_ADDR_ADD, + "public and internal if addr event add events differ"); +_Static_assert((int)DP_EVT_IF_ADDR_DEL == (int)DP_EVENT_IF_ADDR_DEL, + "public and internal if addr event delete events differ"); + +enum if_feat_mode_event { + IF_FEAT_MODE_EVENT_L3_FAL_ENABLED, + IF_FEAT_MODE_EVENT_L3_FAL_DISABLED, + IF_FEAT_MODE_EVENT_L3_ENABLED, + IF_FEAT_MODE_EVENT_L3_DISABLED, + /* interface-embellishing feature set changed */ + IF_FEAT_MODE_EVENT_EMB_FEAT_CHANGED, + IF_FEAT_MODE_EVENT_L2_FAL_ENABLED, + IF_FEAT_MODE_EVENT_L2_FAL_DISABLED, + IF_FEAT_MODE_EVENT_L2_CREATED, + IF_FEAT_MODE_EVENT_L2_DELETED, }; /* Event operations - 1:1 correspondence with above events */ struct dp_event_ops { void (*if_create)(struct ifnet *ifp); - void (*if_create_finished)(struct ifnet *ifp); void (*if_delete)(struct ifnet *ifp); - void (*if_index_set)(struct ifnet *ifp, uint32_t idx); - void (*if_index_pre_unset)(struct ifnet *ifp); + void (*if_index_set)(struct ifnet *ifp); + void (*if_feat_mode_change)(struct ifnet *ifp, + enum if_feat_mode_event event); void (*if_index_unset)(struct ifnet *ifp, uint32_t idx); void (*if_rename)(struct ifnet *ifp, const char *old_name); void (*if_vrf_set)(struct ifnet *ifp); @@ -59,12 +90,19 @@ struct dp_event_ops { void (*if_link_change)(struct ifnet *ifp, bool up, uint32_t speed); void (*if_vlan_add)(struct ifnet *ifp, uint16_t vlan); void (*if_vlan_del)(struct ifnet *ifp, uint16_t vlan); - void (*if_hw_switching_change)(struct ifnet *ifp, bool enable); + void (*if_mtu_change)(struct ifnet *ifp, uint32_t mtu); + void (*if_lag_change)(struct ifnet *ifp, enum dp_if_lag_event event); + void (*if_lag_add_member)(struct ifnet *team, struct ifnet *ifp); + void (*if_lag_delete_member)(struct ifnet *team, struct ifnet *ifp); void (*reset_config)(enum cont_src_en cont_src); void (*vrf_create)(struct vrf *vrf); void (*vrf_delete)(struct vrf *vrf); void (*init)(void); void (*uninit)(void); + void (*if_admin_status_change)(struct ifnet *ifp, bool up); + + const struct dp_events_ops *public_ops; + struct rcu_head rcu; }; #define DP_STARTUP_EVENT_REGISTER(x) \ diff --git a/src/dpdk_eth_if.c b/src/dpdk_eth_if.c deleted file mode 100644 index 0665fe4a..00000000 --- a/src/dpdk_eth_if.c +++ /dev/null @@ -1,805 +0,0 @@ -/* - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. - * - * SPDX-License-Identifier: LGPL-2.1-only - * - * DPDK port-backed interface implementation - */ - -#include -#include -#include -#include -#include -#include -#ifdef HAVE_RTE_BUS_PCI_H -#include -#endif - -#include "dpdk_eth_if.h" -#include "dp_event.h" -#include "ether.h" -#include "if_var.h" -#include "lag.h" -#include "vhost.h" -#include "vplane_debug.h" -#include "vplane_log.h" -#include "transceiver.h" - -#define MODULE_SFF_8436_AX_LEN 640 - -static inline bool -is_jumbo_size(uint32_t size) -{ - return size > ETHER_MTU; -} - -static int dpdk_eth_if_set_mtu(struct ifnet *ifp, uint32_t mtu) -{ - int err = 0; - int adjusted_mtu = mtu; - - if (ifp->aggregator) { - /* - * This interface is already under control of the - * bonding interface. dev_start() in the bonding - * driver does a rte_eth_dev_configure() for - * each of the slaves and will update the slave - * adapters at that point. But we need to keep - * ifp->if_mtu up to date. - */ - goto out; - } - - if (ifp->qinq_vif_cnt) - adjusted_mtu = adjusted_mtu + 4; - - /* - * If the interface has qos on it then we need to stop it - * and restart it to get QoS to recalculate its token bucket - * size based upon the new MTU. - * - * If it does not have QoS on it then we may have to stop it - * anyway as some drivers always need the port to be stopped (i40) - * for the mtu to be changed. Some drivers need the port to be - * stopped to transition into/outof jumbo range (ixgbe). - * Unfortunately we can't tell this ahead of time, so try to - * set the mtu, and if we get an error then stop the ports and - * try again. - * - * If we are transitioning into/outof jumbo range then we have to - * reconfigure the port to get the correct jumbo settings. - */ - bool mtu_jumbo_change = - (is_jumbo_size(ifp->if_mtu_adjusted) && - !is_jumbo_size(mtu)) || - (is_jumbo_size(mtu) && - !is_jumbo_size(ifp->if_mtu_adjusted)); - - /* Try and set it. If we get an error try again with port stopped */ - if (!mtu_jumbo_change && !ifp->if_qos) - err = rte_eth_dev_set_mtu(ifp->if_port, adjusted_mtu); - - /* - * We must update the interface's adjusted MTU before - * starting the port so that QoS can recalculate its - * token bucket size based upon the new MTU. - * - * Also used in the reconfigure_port callback. - */ - ifp->if_mtu_adjusted = adjusted_mtu; - - /* Try again, but this time after changing the port config */ - if (mtu_jumbo_change || err || ifp->if_qos) { - RTE_LOG(INFO, DATAPLANE, - "reconfiguring %s due to %s\n", - ifp->if_name, - mtu_jumbo_change ? - "jumbo length packet change" : - "online MTU setting not supported for this interface"); - err = reconfigure_pkt_len(ifp, adjusted_mtu); - } -out: - if (!err) - ifp->if_mtu = mtu; - - return err; -} - -static int dpdk_eth_if_set_l2_address(struct ifnet *ifp, uint32_t l2_addr_len, - void *l2_addr) -{ - struct ether_addr *macaddr = l2_addr; - char b1[32], b2[32]; - - if (l2_addr_len != ETHER_ADDR_LEN) { - RTE_LOG(NOTICE, DATAPLANE, - "link address is not ethernet (len=%u)!\n", - l2_addr_len); - return -EINVAL; - } - - if (ether_addr_equal(&ifp->eth_addr, macaddr)) - return 1; - - RTE_LOG(INFO, DATAPLANE, "%s change MAC from %s to %s\n", - ifp->if_name, - ether_ntoa_r(&ifp->eth_addr, b1), - ether_ntoa_r(macaddr, b2)); - - int rc; - - if (ifp->if_team) - rc = rte_eth_bond_mac_address_set( - ifp->if_port, macaddr); - else - rc = rte_eth_dev_default_mac_addr_set( - ifp->if_port, macaddr); - if (rc != 0) - return rc; - - ifp->eth_addr = *macaddr; - - return 0; -} - -static int dpdk_eth_if_start(struct ifnet *ifp) -{ - start_port(ifp->if_port, ifp->if_flags); - if (if_port_is_bkplane(ifp->if_port)) - ifpromisc(ifp, true); - - return 0; -} - -static int dpdk_eth_if_stop(struct ifnet *ifp) -{ - /* - * If this is a bonding member then it's managed by the - * bonding PMD until the team genetlink removes it from the - * bond. - */ - if (ifp->aggregator && lag_port_is_slave(ifp->aggregator, ifp)) - return 0; - - stop_port(ifp->if_port); - if (if_port_is_bkplane(ifp->if_port)) - ifpromisc(ifp, false); - - return 0; -} - -static int -dpdk_eth_if_add_l2_addr(struct ifnet *ifp, void *l2_addr) -{ - /* - * The bonding PMD doesn't support normal MAC address - * operations, and neither does it return ENOTSUP from the - * functions, so return it explicitly here. - */ - if (ifp->if_team) - return -ENOTSUP; - - return rte_eth_dev_mac_addr_add(ifp->if_port, l2_addr, 0); -} - -static int -dpdk_eth_if_del_l2_addr(struct ifnet *ifp, void *l2_addr) -{ - /* - * The bonding PMD doesn't support normal MAC address - * operations, and neither does it return ENOTSUP from the - * functions, so return it explicitly here. - */ - if (ifp->if_team) - return -ENOTSUP; - - return rte_eth_dev_mac_addr_remove(ifp->if_port, l2_addr); -} - -static int dpdk_eth_if_init(struct ifnet *ifp) -{ - struct dpdk_eth_if_softc *sc; - - sc = rte_zmalloc_socket("dpdk softc", sizeof(*sc), 0, ifp->if_socket); - if (!sc) - return -ENOMEM; - - rte_timer_init(&sc->scd_link_timer); - rte_timer_init(&sc->scd_blink_timer); - rte_timer_init(&sc->scd_reset_timer); - - ifp->if_softc = sc; - - ether_addr_copy(&ifp->eth_addr, &ifp->perm_addr); - - return 0; -} - -static void dpdk_eth_if_softc_free_rcu(struct rcu_head *head) -{ - struct dpdk_eth_if_softc *sc = - caa_container_of(head, struct dpdk_eth_if_softc, scd_rcu); - - if (sc->scd_vhost_info) - vhost_info_free(sc->scd_vhost_info); - - rte_free(sc); -} - -static void dpdk_eth_if_uninit(struct ifnet *ifp) -{ - struct dpdk_eth_if_softc *sc = ifp->if_softc; - - /* to cope with freeing after errors during initialisation of ifp */ - if (!sc) - return; - - rte_timer_stop(&sc->scd_link_timer); - rte_timer_stop(&sc->scd_blink_timer); - rte_timer_stop(&sc->scd_reset_timer); - - rcu_assign_pointer(ifp->if_softc, NULL); - - call_rcu(&sc->scd_rcu, dpdk_eth_if_softc_free_rcu); -} - -static int -dpdk_eth_if_set_vlan_filter(struct ifnet *ifp, uint16_t vlan, bool enable) -{ - struct rte_eth_dev_info dev_info; - int ret = -ENOTSUP; - - rte_eth_dev_info_get(ifp->if_port, &dev_info); - if ((dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_FILTER) != 0) - ret = rte_eth_dev_vlan_filter(ifp->if_port, vlan, enable); - - return ret; -} - -static int -dpdk_eth_if_set_vlan_proto(struct ifnet *ifp, - enum if_vlan_header_type type, - uint16_t proto) -{ - enum rte_vlan_type rte_type = ETH_VLAN_TYPE_UNKNOWN; - int ret; - - if (!ifp->if_local_port) - return -ENOTSUP; - - /* - * The Mellanox ConnectX-5 driver uses a very inefficient - * transmit function if VLAN insertion is offloaded. - * Temporarily handle this in the dataplane. - * This should be removed when we up-rev DPDK to 1908 - */ - if (is_device_mlx5(ifp->if_port)) - return -ENOTSUP; - - switch (type) { - case IF_VLAN_HEADER_OUTER: - rte_type = ETH_VLAN_TYPE_OUTER; - break; - case IF_VLAN_HEADER_INNER: - rte_type = ETH_VLAN_TYPE_INNER; - break; - } - - /* - * The vlan protocol is set in the PMD even if setting - * back to 802.1q and offload wasn't supported to - * avoid making assumptions about what the drivers may - * or may not supported. I.e. the driver may support - * certain protocols, rather than being an - * all-or-nothing deal. - */ - ret = rte_eth_dev_set_vlan_ether_type(ifp->if_port, rte_type, - proto); - - if (ret == -ENOTSUP && proto == ETH_P_8021Q) { - /* - * Offload for the 802.1q protocol - * type is guaranteed by DPDK to - * always be supported in a PMD, but - * rte_eth_dev_set_vlan_ether_type - * returns -ENOTSUP if the PMD doesn't - * fill in the function pointer. - */ - return 0; - } - - return ret; -} - -static int -dpdk_eth_if_set_broadcast(struct ifnet *ifp, bool enable) -{ - /* - * This interface is under the control of bonding PMD, so - * don't make any changes to it. - */ - if (ifp->aggregator) - return 0; - - return ether_if_set_broadcast(ifp, enable); -} - -static int -dpdk_eth_if_set_promisc(struct ifnet *ifp, bool enable) -{ - /* - * This interface is under the control of bonding PMD - * so don't make any changes to it. - */ - if (ifp->aggregator) - return 0; - - if (enable) - rte_eth_promiscuous_enable(ifp->if_port); - else - rte_eth_promiscuous_disable(ifp->if_port); - - return 0; -} - -static void -dpdk_eth_if_show_dev_capabilities(json_writer_t *wr, - const struct rte_eth_dev_info *info) -{ - struct speed_capas { - uint32_t speed_capa; - uint32_t mbps; /* megabits */ - } speed_capas[] = { - { ETH_LINK_SPEED_10M, 10 }, - { ETH_LINK_SPEED_100M, 100 }, - { ETH_LINK_SPEED_1G, 1000 }, - { ETH_LINK_SPEED_2_5G, 2500 }, - { ETH_LINK_SPEED_5G, 5000 }, - { ETH_LINK_SPEED_10G, 10000 }, - { ETH_LINK_SPEED_20G, 20000 }, - { ETH_LINK_SPEED_25G, 25000 }, - { ETH_LINK_SPEED_40G, 40000 }, - { ETH_LINK_SPEED_50G, 50000 }, - { ETH_LINK_SPEED_56G, 56000 }, - { ETH_LINK_SPEED_100G, 100000 }, - }; - struct speed_capas hd_speed_capas[] = { - { ETH_LINK_SPEED_10M_HD, 10 }, - { ETH_LINK_SPEED_100M_HD, 100 }, - }; - unsigned int i; - - jsonw_name(wr, "capabilities"); - jsonw_start_object(wr); - - /* If speed_capa is 0, it's likely it hasn't been set up and we - * have no idea what the hardware/driver actually supports. We - * could add some overrides to dataplane-drivers-default.conf - * to massage what we return here. - */ - jsonw_name(wr, "full-duplex"); - jsonw_start_array(wr); - for (i = 0; i < ARRAY_SIZE(speed_capas); i++) { - if (info->speed_capa & speed_capas[i].speed_capa) - jsonw_uint(wr, speed_capas[i].mbps); - } - jsonw_end_array(wr); - - jsonw_name(wr, "half-duplex"); - jsonw_start_array(wr); - for (i = 0; i < ARRAY_SIZE(hd_speed_capas); i++) { - if (info->speed_capa & hd_speed_capas[i].speed_capa) - jsonw_uint(wr, hd_speed_capas[i].mbps); - } - jsonw_end_array(wr); - - jsonw_end_object(wr); -} - -static void -dpdk_eth_if_show_dev_info(struct ifnet *ifp, json_writer_t *wr) -{ - struct rte_eth_dev_info info; - portid_t port = ifp->if_port; - int hw_switch; - - rte_eth_dev_info_get(port, &info); - - jsonw_name(wr, "dev"); - jsonw_start_object(wr); - if (info.driver_name) - jsonw_string_field(wr, "driver", info.driver_name); - jsonw_uint_field(wr, "node", rte_eth_dev_socket_id(port)); - - if (port < RTE_MAX_ETHPORTS) { /* possibly NO_OWNER */ - struct rte_eth_dev *dev = &rte_eth_devices[port]; - bool settable; - - if (ifp->if_team) - settable = true; - else - settable = dev && dev->dev_ops && - dev->dev_ops->mac_addr_set ? true : false; - - jsonw_bool_field(wr, "mac_addr_settable", settable); - jsonw_string_field(wr, "eth_dev_data_name", dev->data->name); - /* - * workaround to determine switch id until we have - * a mechanism for retrieving opaque data - */ - if (get_switch_dev_info(info.driver_name, dev->data->name, - &hw_switch, NULL)) - jsonw_uint_field(wr, "hw_switch_id", hw_switch); - } - -#ifdef HAVE_RTE_ETH_DEV_INFO_DEVICE - const struct rte_bus *bus = rte_bus_find_by_device(info.device); - struct rte_pci_device *pci = NULL; - if (bus && streq(bus->name, "pci")) - pci = RTE_DEV_TO_PCI(info.device); -#else - const struct rte_pci_device *pci = info.pci_dev; -#endif - if (pci) { - jsonw_name(wr, "pci"); - jsonw_start_object(wr); - - jsonw_name(wr, "address"); - jsonw_start_object(wr); - jsonw_uint_field(wr, "domain", pci->addr.domain); - jsonw_uint_field(wr, "bus", pci->addr.bus); - jsonw_uint_field(wr, "devid", pci->addr.devid); - jsonw_uint_field(wr, "function", pci->addr.function); - jsonw_end_object(wr); - - jsonw_name(wr, "id"); - jsonw_start_object(wr); - jsonw_uint_field(wr, "vendor", pci->id.vendor_id); - jsonw_uint_field(wr, "device", pci->id.device_id); - jsonw_uint_field(wr, "subsystem_vendor", - pci->id.subsystem_vendor_id); - jsonw_uint_field(wr, "subsystem_device", - pci->id.subsystem_device_id); - jsonw_end_object(wr); - - jsonw_end_object(wr); - } - - dpdk_eth_if_show_dev_capabilities(wr, &info); - - jsonw_uint_field(wr, "min_rx_bufsize", info.min_rx_bufsize); - jsonw_uint_field(wr, "max_rx_pktlen", info.max_rx_pktlen); - jsonw_uint_field(wr, "max_rx_queues", info.max_rx_queues); - jsonw_uint_field(wr, "max_tx_queues", info.max_tx_queues); - jsonw_uint_field(wr, "max_mac_addrs", info.max_mac_addrs); - jsonw_uint_field(wr, "vmdq_queue_base", info.vmdq_queue_base); - jsonw_uint_field(wr, "vmdq_queue_num", info.vmdq_queue_num); - - if (info.driver_name && strcasestr(info.driver_name, "net_vhost")) - vhost_devinfo(wr, ifp); - - jsonw_end_object(wr); -} - -/* Device with statistics in hardware */ -static void -dpdk_eth_if_show_stats(struct ifnet *ifp, json_writer_t *wr) -{ - struct rte_eth_stats hwstats; - unsigned int i; - int ret; - - ret = rte_eth_stats_get(ifp->if_port, &hwstats); - if (ret) - return; - - jsonw_uint_field(wr, "rx_missed", hwstats.imissed); - jsonw_uint_field(wr, "rx_nobuffer", hwstats.rx_nombuf); - - jsonw_name(wr, "qstats"); - jsonw_start_array(wr); - for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS; i++) { - jsonw_start_object(wr); - jsonw_uint_field(wr, "ipackets", hwstats.q_ipackets[i]); - jsonw_uint_field(wr, "ibytes", hwstats.q_ibytes[i]); - jsonw_uint_field(wr, "opackets", hwstats.q_opackets[i]); - jsonw_uint_field(wr, "obytes", hwstats.q_obytes[i]); - jsonw_uint_field(wr, "errors", hwstats.q_errors[i]); - jsonw_end_object(wr); - } - jsonw_end_array(wr); -} - -/* Device with extended statistics in hardware (physical port) */ -static void -dpdk_eth_if_show_xstats(struct ifnet *ifp, json_writer_t *wr) -{ - int i, len, ret; - - len = rte_eth_xstats_get_names(ifp->if_port, NULL, 0); - if (len < 1) - return; - - struct rte_eth_xstat_name xstat_names[len]; - struct rte_eth_xstat xstats[len]; - memset(xstat_names, 0, sizeof(xstat_names)); - memset(xstats, 0, sizeof(xstats)); - - ret = rte_eth_xstats_get_names(ifp->if_port, xstat_names, len); - if (ret < 0 || ret > len) - return; - ret = rte_eth_xstats_get(ifp->if_port, xstats, len); - if (ret < 0 || ret > len) - return; - - for (i = 0; i < len; i++) - jsonw_uint_field(wr, xstat_names[xstats[i].id].name, - xstats[i].value); -} - -static void -dpdk_eth_if_show_state(struct ifnet *ifp, json_writer_t *wr) -{ - if (ifp->if_local_port) - jsonw_uint_field(wr, "port", ifp->if_port); -} - -static void dpdk_eth_if_show_xcvr_info(struct ifnet *ifp, json_writer_t *wr) -{ - struct rte_eth_dev_module_info module_info; - struct rte_dev_eeprom_info eeprom_info; - char *buf; - int rv; - - memset(&module_info, 0, sizeof(module_info)); - - rv = rte_eth_dev_get_module_info(ifp->if_port, &module_info); - if (rv) - return; - - eeprom_info.length = - module_info.eeprom_len < MODULE_SFF_8436_AX_LEN ? - module_info.eeprom_len : MODULE_SFF_8436_AX_LEN; - - buf = malloc(eeprom_info.length); - if (!buf) { - DP_DEBUG(LINK, ERR, DATAPLANE, - "Failed to allocate xcvr eeprom info buffer\n"); - return; - } - eeprom_info.data = buf; - eeprom_info.offset = 0; - - rv = rte_eth_dev_get_module_eeprom(ifp->if_port, &eeprom_info); - if (rv) { - free(buf); - return; - } - - if (!module_info.eeprom_len) { - free(buf); - return; - } - - jsonw_name(wr, "xcvr_info"); - jsonw_start_object(wr); - sfp_status(&module_info, &eeprom_info, wr); - jsonw_end_object(wr); - free(buf); -} - -static int -dpdk_eth_if_dump(struct ifnet *ifp, json_writer_t *wr, - enum if_dump_state_type type) -{ - if (!ifp->if_local_port) - return 0; - - switch (type) { - case IF_DS_STATS: - dpdk_eth_if_show_stats(ifp, wr); - break; - case IF_DS_XSTATS: - dpdk_eth_if_show_xstats(ifp, wr); - break; - case IF_DS_DEV_INFO: - dpdk_eth_if_show_dev_info(ifp, wr); - break; - case IF_DS_STATE: - dpdk_eth_if_show_state(ifp, wr); - break; - case IF_DS_STATE_VERBOSE: - dpdk_eth_if_show_xcvr_info(ifp, wr); - break; - default: - break; - } - - return 0; -} - -static void -dpdk_eth_if_get_xstats(struct ifnet *ifp, - struct if_data *stats) -{ -#define NUM_XSTATS 2 - int i, rv, nstats; - const char *xstat_names[NUM_XSTATS] = { - "rx_multicast_packets", - "rx_broadcast_packets" - }; - uint64_t xstat_ids[NUM_XSTATS] = { -1 }; - uint64_t rx_mcast_pkts = 0; - - /* retrieve all xstats */ - nstats = rte_eth_xstats_get(ifp->if_port, NULL, 0); - if (nstats < 0) - return; - - struct rte_eth_xstat xstat_values[nstats]; - - rv = rte_eth_xstats_get(ifp->if_port, xstat_values, nstats); - if (rv < 0) - return; - - /* get stat ids for the ones we are interested in */ - for (i = 0; i < NUM_XSTATS; i++) { - rv = rte_eth_xstats_get_id_by_name(ifp->if_port, - xstat_names[i], - &xstat_ids[i]); - if (rv) - continue; - } - - for (nstats = 0, i = 0; i < NUM_XSTATS; i++) { - if (xstat_ids[i] == (uint64_t) -1) - continue; - - nstats++; - rx_mcast_pkts += xstat_values[xstat_ids[i]].value; - } - - if (nstats) - stats->ifi_imulticast = rx_mcast_pkts; -} - -static int -dpdk_eth_if_get_stats(struct ifnet *ifp, struct if_data *stats) -{ - struct rte_eth_stats hwstats; - int ret; - - ret = rte_eth_stats_get(ifp->if_port, &hwstats); - if (ret) - return ret; - - stats->ifi_ipackets = hwstats.ipackets; - stats->ifi_opackets = hwstats.opackets; - stats->ifi_ibytes = hwstats.ibytes; - stats->ifi_obytes = hwstats.obytes; - stats->ifi_ierrors += hwstats.ierrors; - stats->ifi_oerrors += hwstats.oerrors; - - dpdk_eth_if_get_xstats(ifp, stats); - return 0; -} - -/* Timer called (from master) to toggle state of LED. */ -static void dpdk_eth_if_blink_timer(struct rte_timer *tim, void *arg) -{ - struct ifnet *ifp = arg; - struct dpdk_eth_if_softc *sc = ifp->if_softc; - int rc; - - if (sc->scd_blink_on) - rc = rte_eth_led_on(ifp->if_port); - else - rc = rte_eth_led_off(ifp->if_port); - - if (rc < 0) { - DP_DEBUG(LINK, NOTICE, DATAPLANE, - "%s: led %s failed: %s\n", - ifp->if_name, sc->scd_blink_on ? "on" : "off", - strerror(-rc)); - rte_timer_stop(tim); - } else - sc->scd_blink_on = !sc->scd_blink_on; -} - -/* Start/stop LED blink timer */ -static int dpdk_eth_if_blink(struct ifnet *ifp, bool on) -{ - struct dpdk_eth_if_softc *sc = ifp->if_softc; - int rc = 0; - - if (on) { - rc = rte_eth_led_on(ifp->if_port); - if (rc < 0) - return rc; - - sc->scd_blink_on = 0; - rte_timer_reset(&sc->scd_blink_timer, - rte_get_timer_hz() / 2, - PERIODICAL, rte_get_master_lcore(), - dpdk_eth_if_blink_timer, ifp); - } else { - rte_timer_stop_sync(&sc->scd_blink_timer); - - /* restore proper link state of LED */ - if (if_port_isup(ifp->if_port)) - rte_eth_led_on(ifp->if_port); - else - rte_eth_led_off(ifp->if_port); - } - - return rc; -} - -static int dpdk_eth_if_set_backplane(struct ifnet *ifp, - unsigned int bp_ifindex) -{ - struct dpdk_eth_if_softc *sc = ifp->if_softc; - - sc->bp_ifindex = bp_ifindex; - - return 0; -} - -static int dpdk_eth_if_get_backplane(struct ifnet *ifp, - unsigned int *bp_ifindex) -{ - struct dpdk_eth_if_softc *sc = ifp->if_softc; - - *bp_ifindex = sc->bp_ifindex; - - return 0; -} - -static const struct ift_ops dpdk_eth_if_ops = { - .ifop_set_mtu = dpdk_eth_if_set_mtu, - .ifop_set_l2_address = dpdk_eth_if_set_l2_address, - .ifop_start = dpdk_eth_if_start, - .ifop_stop = dpdk_eth_if_stop, - .ifop_add_l2_addr = dpdk_eth_if_add_l2_addr, - .ifop_del_l2_addr = dpdk_eth_if_del_l2_addr, - .ifop_init = dpdk_eth_if_init, - .ifop_uninit = dpdk_eth_if_uninit, - .ifop_set_vlan_filter = dpdk_eth_if_set_vlan_filter, - .ifop_set_vlan_proto = dpdk_eth_if_set_vlan_proto, - .ifop_set_broadcast = dpdk_eth_if_set_broadcast, - .ifop_set_promisc = dpdk_eth_if_set_promisc, - .ifop_dump = dpdk_eth_if_dump, - .ifop_get_stats = dpdk_eth_if_get_stats, - .ifop_blink = dpdk_eth_if_blink, - .ifop_set_backplane = dpdk_eth_if_set_backplane, - .ifop_get_backplane = dpdk_eth_if_get_backplane, -}; - -static void dpdk_eth_init(void) -{ - int ret = if_register_type(IFT_ETHER, &dpdk_eth_if_ops); - if (ret < 0) - rte_panic("Failed to register DPDK ethernet interface type: %s", - strerror(-ret)); -} - -static const struct dp_event_ops dpdk_eth_if_events = { - .init = dpdk_eth_init, -}; - -DP_STARTUP_EVENT_REGISTER(dpdk_eth_if_events); - -bool is_device_mlx5(portid_t portid) -{ - struct rte_eth_dev_info dev_info; - - if (!rte_eth_dev_is_valid_port(portid)) - return false; - - rte_eth_dev_info_get(portid, &dev_info); - if (strstr(dev_info.driver_name, "net_mlx5") == dev_info.driver_name) - return true; - - return false; -} diff --git a/src/dpdk_eth_if.h b/src/dpdk_eth_if.h deleted file mode 100644 index ca639a89..00000000 --- a/src/dpdk_eth_if.h +++ /dev/null @@ -1,40 +0,0 @@ -/*- - * Copyright (c) 2018-2019, AT&T Intellectual Property. - * All rights reserved. - * - * SPDX-License-Identifier: LGPL-2.1-only - */ -/* - * DPDK Ethernet interfaces - */ - -#ifndef DPDK_ETH_IF_H -#define DPDK_ETH_IF_H - -#include -#include - -#include "urcu.h" -#include "compat.h" - -struct vhost_info; - -struct dpdk_eth_if_softc { - struct rcu_head scd_rcu; - struct rte_timer scd_link_timer; /* update controller */ - struct rte_timer scd_blink_timer; /* blink LED */ - struct rte_timer scd_reset_timer; /* reset interface */ - struct vhost_info *scd_vhost_info; - bool scd_need_reset; /* VF down when PF is down */ - uint8_t scd_blink_on; - unsigned int bp_ifindex; /* backplane interface */ -}; - -/* - * determine if device is Mellanox ConnectX-5 - * This will be used for some short-term customization of dataplane - * behaviour until we are able to up-rev DPDK to 1908 - */ -bool is_device_mlx5(portid_t portid); - -#endif /* DPDK_ETH_IF_H */ diff --git a/src/dpmsg.h b/src/dpmsg.h index e4421b51..85257e2b 100644 --- a/src/dpmsg.h +++ b/src/dpmsg.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * diff --git a/src/ecmp.c b/src/ecmp.c index b8d47a50..090e6075 100644 --- a/src/ecmp.c +++ b/src/ecmp.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -19,10 +19,6 @@ #include #include #include -#include -#include -#include -#include #include #include #include @@ -33,31 +29,19 @@ #include "commands.h" #include "ecmp.h" #include "if_var.h" +#include "ip_forward.h" #include "ip_funcs.h" #include "json_writer.h" #include "mpls/mpls.h" #include "mpls/mpls_forward.h" #include "netinet6/in6.h" -#include "netinet6/route_v6.h" -#include "pktmbuf.h" -#include "route.h" -#include "route_flags.h" +#include "pktmbuf_internal.h" #include "util.h" #include "vplane_log.h" -#define IN6_SET_ADDR_V4MAPPED(a6, a4) { \ - (a6)->s6_addr32[0] = 0; \ - (a6)->s6_addr32[1] = 0; \ - (a6)->s6_addr32[2] = htonl(0xffff); \ - (a6)->s6_addr32[3] = (a4); \ - } - /* Global ECMP mode */ static uint8_t ecmp_mode = ECMP_HRW; -/* Global ECMP max path param */ -uint16_t ecmp_max_path = UINT16_MAX; - /* ECMP modes */ static const char *ecmp_modes[ECMP_MAX] = { [ECMP_DISABLED] = "disable", @@ -66,550 +50,6 @@ static const char *ecmp_modes[ECMP_MAX] = { [ECMP_MODULO_N] = "modulo-n", }; -/* Callback to store route attributes */ -static int route_attr(const struct nlattr *attr, void *data) -{ - const struct nlattr **tb = data; - unsigned int type = mnl_attr_get_type(attr); - - if (type <= RTA_MAX) - tb[type] = attr; - - return MNL_CB_OK; -} - -/* Fill nexthop struct */ -static bool nexthop_fill(struct nlattr *ntb_gateway, struct nlattr *ntb_encap, - struct rtnexthop *nhp, struct next_hop *next) -{ - label_t labels[NH_MAX_OUT_LABELS]; - uint16_t num_labels = 0; - void *labels_ptr; - uint32_t len; - int err; - struct ifnet *ifp; - - nh_outlabels_set(&next->outlabels, 0, NULL); - - nh4_set_ifp(next, ifnet_byifindex(nhp->rtnh_ifindex)); - if (!nh4_get_ifp(next) && !is_ignored_interface(nhp->rtnh_ifindex)) - return true; - if (ntb_gateway) { - next->gateway = mnl_attr_get_u32(ntb_gateway); - next->flags = RTF_GATEWAY; - } else { - next->gateway = INADDR_ANY; - next->flags = 0; - } - - if (ntb_encap) { - len = mnl_attr_get_payload_len(ntb_encap); - labels_ptr = mnl_attr_get_payload(ntb_encap); - err = rta_encap_get_labels(labels_ptr, len, - ARRAY_SIZE(labels), - labels, &num_labels); - if (err) { - RTE_LOG(NOTICE, MPLS, - "malformed label stack in netlink message\n"); - return false; - } - nh_outlabels_set(&next->outlabels, num_labels, labels); - } - - ifp = nh4_get_ifp(next); - if ((!ifp || ifp->if_type == IFT_LOOP) && - num_labels == 0) - /* no dp interface or via loopback */ - next->flags |= RTF_SLOWPATH; - - if (num_labels > 0 && !is_lo(ifp)) - /* Output label rather than local label */ - next->flags |= RTF_OUTLABEL; - - return false; -} - -static int mpls_payload_attr(const struct nlattr *attr, void *data) -{ - const struct nlattr **tb = data; - int type = mnl_attr_get_type(attr); - - if (mnl_attr_type_valid(attr, RTMPA_NH_FLAGS) < 0) - return MNL_CB_OK; - - switch (type) { - case RTMPA_TYPE: - case RTMPA_NH_FLAGS: - if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0) { - RTE_LOG(NOTICE, MPLS, - "invalid mpls payload attribute %d\n", type); - return MNL_CB_ERROR; - } - break; - } - - tb[type] = attr; - return MNL_CB_OK; -} - -static bool nexthop_fill_mpls_common(const struct nlattr *ntb_newdst, - union next_hop_outlabels *outlabels, - bool bos_only) -{ - label_t labels[NH_MAX_OUT_LABELS]; - uint16_t num_labels = 0; - void *labels_ptr; - uint32_t len; - int ret; - - if (ntb_newdst) { - len = mnl_attr_get_payload_len(ntb_newdst); - labels_ptr = mnl_attr_get_payload(ntb_newdst); - ret = rta_encap_get_labels(labels_ptr, len, - ARRAY_SIZE(labels), - labels, &num_labels); - if (ret) { - RTE_LOG(NOTICE, MPLS, - "malformed label stack in netlink message\n"); - return false; - } - nh_outlabels_set(outlabels, num_labels, labels); - } - - /* - * If there are no labels and BOS_ONLY not - * set, then this implies the implicit-null - * label. This won't go out on the wire and is - * for signaling only. - */ - if (num_labels == 0 && !bos_only) { - label_t lbl[1] = { MPLS_LABEL_IMPLNULL }; - - nh_outlabels_set(outlabels, 1, lbl); - } - - return false; -} - -/* - * Fill nh struct from an mpls route add netlink - which uses different - * attributes - via, newdest instead of gateway, encap. - */ -static bool nexthop_fill_mpls(struct nlattr *ntb_via, struct nlattr *ntb_newdst, - struct nlattr *ntb_payload, - struct rtnexthop *nhp, struct next_hop *next) -{ - const struct nlattr *pl_tb[RTMPA_NH_FLAGS+1]; - bool bos_only = false; - int ret; - - if (ntb_payload) { - ret = mnl_attr_parse_nested(ntb_payload, mpls_payload_attr, - &pl_tb); - if (ret == MNL_CB_OK && pl_tb[RTMPA_NH_FLAGS]) - bos_only = (mnl_attr_get_u32(pl_tb[RTMPA_NH_FLAGS]) & - RTMPNF_BOS_ONLY) != 0; - } - - /* initialize out labels to NULL */ - nh_outlabels_set(&next->outlabels, 0, NULL); - - nh4_set_ifp(next, ifnet_byifindex(nhp->rtnh_ifindex)); - if (!nh4_get_ifp(next) && !is_ignored_interface(nhp->rtnh_ifindex)) - return true; - if (ntb_via) { - const struct rtvia *via; - in_addr_t nh = INADDR_NONE; - - via = mnl_attr_get_payload(ntb_via); - if (via->rtvia_family == AF_INET) { - memcpy(&nh, &via->rtvia_addr, sizeof(nh)); - next->flags = RTF_GATEWAY; - } else { - RTE_LOG(NOTICE, MPLS, - "unsupported via AF %d in netlink message\n", - via->rtvia_family); - } - - next->gateway = nh; - } else { - next->gateway = INADDR_ANY; - next->flags = 0; - } - - ret = nexthop_fill_mpls_common(ntb_newdst, &next->outlabels, bos_only); - if (!nh4_get_ifp(next)) - next->flags |= RTF_SLOWPATH; - - return ret; -} - -/* - * Fill nh6 struct from an mpls route add netlink. - */ -static bool nexthop6_fill_mpls(const struct nlattr *ntb_via, - const struct nlattr *ntb_newdst, - const struct nlattr *ntb_payload, - const struct rtnexthop *nhp, - struct next_hop_v6 *next) -{ - const struct nlattr *pl_tb[RTMPA_NH_FLAGS+1]; - struct in6_addr nh6 = IN6ADDR_ANY_INIT; - bool bos_only = false; - int ret; - - if (ntb_payload) { - ret = mnl_attr_parse_nested(ntb_payload, mpls_payload_attr, - &pl_tb); - if (ret == MNL_CB_OK && pl_tb[RTMPA_NH_FLAGS]) - bos_only = (mnl_attr_get_u32(pl_tb[RTMPA_NH_FLAGS]) & - RTMPNF_BOS_ONLY) != 0; - } - - /* initialise out labels to NULL */ - nh_outlabels_set(&next->outlabels, 0, NULL); - - nh6_set_ifp(next, ifnet_byifindex(nhp->rtnh_ifindex)); - if (!nh6_get_ifp(next) && !is_ignored_interface(nhp->rtnh_ifindex)) - return true; - if (ntb_via) { - const struct rtvia *via; - in_addr_t nh = INADDR_NONE; - - via = mnl_attr_get_payload(ntb_via); - if (via->rtvia_family == AF_INET) { - memcpy(&nh, &via->rtvia_addr, sizeof(nh)); - IN6_SET_ADDR_V4MAPPED(&nh6, nh); - } else if (via->rtvia_family == AF_INET6) { - memcpy(&nh6, &via->rtvia_addr, sizeof(nh6)); - } else { - RTE_LOG(NOTICE, MPLS, - "unsupported via AF %d in netlink message\n", - via->rtvia_family); - } - - next->gateway = nh6; - next->flags = RTF_GATEWAY; - if (IN6_IS_ADDR_V4MAPPED(&nh6)) - next->flags |= RTF_MAPPED_IPV6; - } else { - next->gateway = nh6; - next->flags = 0; - } - - ret = nexthop_fill_mpls_common(ntb_newdst, &next->outlabels, bos_only); - if (!nh6_get_ifp(next)) - next->flags |= RTF_SLOWPATH; - - return ret; -} - -static int mpls_attr(const struct nlattr *attr, void *data) -{ - const struct nlattr **tb = data; - int type = mnl_attr_get_type(attr); - - if (mnl_attr_type_valid(attr, MPLS_IPTUNNEL_MAX) < 0) - return MNL_CB_OK; - - tb[type] = attr; - return MNL_CB_OK; -} - -/* Create nexthop struct */ -struct next_hop *ecmp_create(struct nlattr *mpath, uint32_t *count, - bool *missing_ifp) -{ - size_t size = 0, i; - struct next_hop *next, *n; - void *vnhp; - - /* - * Need to loop over the paths to find out how many there are - * as the size is not fixed because the gateway is optional. - */ - mnl_attr_for_each_nested(vnhp, mpath) { - size++; - } - - if (!size) - return NULL; - - n = next = calloc(sizeof(struct next_hop), size); - if (!next) - return NULL; - - mnl_attr_for_each_nested(vnhp, mpath) { - struct rtnexthop *nhp = vnhp; - - if (nhp->rtnh_len == sizeof(*nhp)) { - /* There is a NH with no extra attrs */ - if (nexthop_fill(NULL, NULL, nhp, n)) - goto missing; - n++; - - } else if (nhp->rtnh_len > sizeof(*nhp)) { - struct nlattr *ntb[RTA_MAX+1] = { NULL }; - struct nlattr *mpls_ntb[MPLS_IPTUNNEL_MAX+1] = { NULL }; - - int res = mnl_attr_parse_payload(RTNH_DATA(vnhp), - nhp->rtnh_len - sizeof(*nhp), - route_attr, ntb); - - if (res != MNL_CB_OK) - goto failed; - - if (ntb[RTA_ENCAP] && ntb[RTA_ENCAP_TYPE] && - (mnl_attr_get_u16(ntb[RTA_ENCAP_TYPE]) == - LWTUNNEL_ENCAP_MPLS)) { - res = mnl_attr_parse_nested(ntb[RTA_ENCAP], - mpls_attr, - mpls_ntb); - if (res != MNL_CB_OK) { - RTE_LOG(NOTICE, DATAPLANE, - "unparseable mpls attributes\n"); - goto failed; - } - } - - res = mnl_attr_parse_payload( - RTNH_DATA(vnhp), nhp->rtnh_len - sizeof(*nhp), - route_attr, ntb); - - if (res != MNL_CB_OK) - goto failed; - - if (ntb[RTA_VIA]) { - if (nexthop_fill_mpls(ntb[RTA_VIA], - ntb[RTA_NEWDST], - ntb[RTA_MPLS_PAYLOAD], - nhp, n)) { - goto missing; - } - } else { - if (nexthop_fill(ntb[RTA_GATEWAY], - mpls_ntb[MPLS_IPTUNNEL_DST], - nhp, n)) { - goto missing; - } - } - n++; - } - } - - *count = n - next; - - return next; - -missing: - *missing_ifp = true; -failed: - size = n - next; - for (i = 0; i < size; i++) - nh_outlabels_destroy(&next[i].outlabels); - free(next); - return NULL; -} - -static const struct in6_addr anyaddr; - -/* Fill nexthop struct */ -static bool nexthop6_fill(struct nlattr *ntb_gateway, - struct nlattr *ntb_encap, - struct rtnexthop *nhp, struct next_hop_v6 *next) -{ - label_t labels[NH_MAX_OUT_LABELS]; - uint16_t num_labels = 0; - void *labels_ptr; - uint32_t len; - int err; - struct ifnet *ifp; - - nh_outlabels_set(&next->outlabels, 0, NULL); - - nh6_set_ifp(next, ifnet_byifindex(nhp->rtnh_ifindex)); - if (!nh6_get_ifp(next) && !is_ignored_interface(nhp->rtnh_ifindex)) - return true; - - if (ntb_gateway) { - next->gateway = *(struct in6_addr *)mnl_attr_get_payload( - ntb_gateway); - next->flags = RTF_GATEWAY; - } else { - next->gateway = anyaddr; - next->flags = 0; - } - - if (ntb_encap) { - len = mnl_attr_get_payload_len(ntb_encap); - labels_ptr = mnl_attr_get_payload(ntb_encap); - err = rta_encap_get_labels(labels_ptr, len, - ARRAY_SIZE(labels), - labels, &num_labels); - if (err) { - RTE_LOG(NOTICE, MPLS, - "malformed label stack in netlink message\n"); - return false; - } - nh_outlabels_set(&next->outlabels, num_labels, labels); - } - - ifp = nh6_get_ifp(next); - if ((!ifp || ifp->if_type == IFT_LOOP) && - num_labels == 0) - /* no dp interface or via loopback */ - next->flags |= RTF_SLOWPATH; - - if (num_labels > 0 && !is_lo(ifp)) - /* Output label rather than local label */ - next->flags |= RTF_OUTLABEL; - - return false; -} - -/* Create nexthop struct */ -struct next_hop_v6 *ecmp6_create(struct nlattr *mpath, uint32_t *count, - bool *missing_ifp) -{ - size_t size = 0, i; - struct next_hop_v6 *next, *n; - void *vnhp; - - /* - * Need to loop over the paths to find out how many there are - * as the size is not fixed because the gateway is optional. - */ - mnl_attr_for_each_nested(vnhp, mpath) { - size++; - } - - if (size == 0) - return NULL; - - n = next = calloc(sizeof(struct next_hop_v6), size); - if (!next) - return NULL; - - mnl_attr_for_each_nested(vnhp, mpath) { - struct rtnexthop *nhp = vnhp; - - if (nhp->rtnh_len == sizeof(*nhp)) { - /* There is a NH with no extra attrs */ - if (nexthop6_fill(NULL, NULL, nhp, n)) - goto missing; - n++; - - } else if (nhp->rtnh_len > sizeof(*nhp)) { - struct nlattr *ntb[RTA_MAX+1] = { NULL }; - struct nlattr *mpls_ntb[MPLS_IPTUNNEL_MAX+1] = { NULL }; - - int res = mnl_attr_parse_payload(RTNH_DATA(vnhp), - nhp->rtnh_len - sizeof(*nhp), - route_attr, ntb); - - if (res != MNL_CB_OK) - goto failed; - - if (ntb[RTA_ENCAP] && ntb[RTA_ENCAP_TYPE] && - (mnl_attr_get_u16(ntb[RTA_ENCAP_TYPE]) == - LWTUNNEL_ENCAP_MPLS)) { - res = mnl_attr_parse_nested(ntb[RTA_ENCAP], - mpls_attr, - mpls_ntb); - if (res != MNL_CB_OK) { - RTE_LOG(NOTICE, DATAPLANE, - "unparseable mpls attributes\n"); - goto failed; - } - } - - res = mnl_attr_parse_payload( - RTNH_DATA(vnhp), nhp->rtnh_len - sizeof(*nhp), - route_attr, ntb); - - if (res != MNL_CB_OK) - goto failed; - - if (ntb[RTA_VIA]) { - if (nexthop6_fill_mpls(ntb[RTA_VIA], - ntb[RTA_NEWDST], - ntb[RTA_MPLS_PAYLOAD], - nhp, n)) { - goto missing; - } - } else { - if (nexthop6_fill(ntb[RTA_GATEWAY], - mpls_ntb[MPLS_IPTUNNEL_DST], - nhp, n)) { - goto missing; - } - } - n++; - } - } - - *count = n - next; - - return next; - -missing: - *missing_ifp = true; -failed: - size = n - next; - for (i = 0; i < size; i++) - nh_outlabels_destroy(&next[i].outlabels); - free(next); - return NULL; -} - -/* Create nexthop struct */ -union next_hop_v4_or_v6_ptr ecmp_mpls_create(struct nlattr *mpath, - uint32_t *count, - enum nh_type *nh_type, - bool *missing_ifp) -{ - union next_hop_v4_or_v6_ptr nh = { NULL }; - size_t size = 0; - void *vnhp; - struct nlattr *attr; - - /* - * Need to loop over the paths to find out how many there are - * and what type of nexthop we need. - */ - *nh_type = NH_TYPE_V4GW; - mnl_attr_for_each_nested(vnhp, mpath) { - struct rtnexthop *nhp = vnhp; - - mnl_attr_for_each_payload((void *)RTNH_DATA(nhp), - nhp->rtnh_len - sizeof(*nhp)) { - /* - * If at least one of the vias is an IPv6 - * address, then all nexthops are represented - * as IPv6. - */ - if (attr->nla_type == RTA_VIA) { - const struct rtvia *via = RTA_DATA(attr); - - if (via->rtvia_family == AF_INET6) - *nh_type = NH_TYPE_V6GW; - break; - } - } - size++; - } - - switch (*nh_type) { - case NH_TYPE_V4GW: - nh.v4 = ecmp_create(mpath, count, missing_ifp); - break; - case NH_TYPE_V6GW: - nh.v6 = ecmp6_create(mpath, count, missing_ifp); - break; - } - return nh; -} - /* * All of the common L4 transport protocols (TCP/UDP/SCTP/UDP-Lite/DCCP) * have their port numbers at the same offset. Also ESP has a 32 bit @@ -718,10 +158,9 @@ ecmp_mbuf_hash(const struct rte_mbuf *m, uint16_t ether_type) if (ether_type == ETH_P_MPLS_UC) return mpls_ecmp_hash(m); - else if (ether_type == ETHER_TYPE_IPv6) - return ecmp_ipv6_hash(m, pktmbuf_l2_len(m)); - else - return ecmp_ipv4_hash(m, pktmbuf_l2_len(m)); + if (ether_type == RTE_ETHER_TYPE_IPV6) + return ecmp_ipv6_hash(m, dp_pktmbuf_l2_len(m)); + return ecmp_ipv4_hash(m, dp_pktmbuf_l2_len(m)); } static unsigned int @@ -753,7 +192,7 @@ unsigned int ecmp_lookup(uint32_t size, uint32_t key) static void ecmp_show(json_writer_t *json) { jsonw_string_field(json, "mode", ecmp_modes[ecmp_mode]); - jsonw_uint_field(json, "max-path", ecmp_max_path); + jsonw_uint_field(json, "max-path", UINT16_MAX); } static int ecmp_set_mode(const char *mode) @@ -772,26 +211,17 @@ static int ecmp_set_mode(const char *mode) return -1; } -static int ecmp_set_max_path(int val) -{ - ecmp_max_path = val; - - return 0; -} - #define ECMP_MODES \ "hash-threshold|hrw|modulo-n|disable" #define CMD_ECMP_USAGE \ "Usage: ecmp show\n" \ -" ecmp max-path <2-65535>\n" \ " ecmp mode <"ECMP_MODES">\n" /* * Commands: * ecmp show - show ecmp options * ecmp mode - set ecmp mode - * ecmp max-path - set ecmp max-path option */ int cmd_ecmp(FILE *f, int argc, char **argv) { @@ -800,12 +230,6 @@ int cmd_ecmp(FILE *f, int argc, char **argv) if (argc == 3 && !strcmp(argv[1], "mode")) { if (strstr(ECMP_MODES, argv[2])) return ecmp_set_mode(argv[2]); - } else if (argc == 3 && !strcmp(argv[1], "max-path")) { - unsigned int val = strtoul(argv[2], NULL, 0); - - if (val == 0 || (val >= 2 && val <= 65535)) - return ecmp_set_max_path(val); - } else if (argc == 2 && !strcmp(argv[1], "show")) { json = jsonw_new(f); jsonw_name(json, "ecmp_show"); @@ -819,3 +243,27 @@ int cmd_ecmp(FILE *f, int argc, char **argv) fprintf(f, CMD_ECMP_USAGE); return -1; } + +uint32_t dp_ecmp_hash(const struct ecmp_hash_param *hash_param) +{ + struct iphdr iph; + struct ip6_hdr ip6h; + uint32_t hash = 0; + uint32_t l4key = htonl((hash_param->src_port << 16) | + hash_param->dst_port); + + if (hash_param->src_ip.type == hash_param->dst_ip.type) { + if (hash_param->src_ip.type == AF_INET) { + iph.saddr = hash_param->src_ip.address.ip_v4.s_addr; + iph.daddr = hash_param->dst_ip.address.ip_v4.s_addr; + iph.protocol = hash_param->protocol; + hash = ecmp_iphdr_hash(&iph, l4key); + } else if (hash_param->src_ip.type == AF_INET6) { + ip6h.ip6_src = hash_param->src_ip.address.ip_v6; + ip6h.ip6_dst = hash_param->dst_ip.address.ip_v6; + ip6h.ip6_nxt = hash_param->protocol; + hash = ecmp_ip6hdr_hash(&ip6h, l4key); + } + } + return hash; +} diff --git a/src/ecmp.h b/src/ecmp.h index 03120fb0..f2913c01 100644 --- a/src/ecmp.h +++ b/src/ecmp.h @@ -14,13 +14,10 @@ #include #include -#include "nh.h" +#include "nh_common.h" struct nlattr; -/* Global ECMP max path param */ -extern uint16_t ecmp_max_path; - /* ECMP modes */ enum ecmp_modes { ECMP_DISABLED, @@ -38,12 +35,8 @@ uint32_t ecmp_mbuf_hash(const struct rte_mbuf *m, uint16_t ether_type); unsigned int ecmp_lookup(uint32_t size, uint32_t key); -struct next_hop *ecmp_create(struct nlattr *mpath, uint32_t *count, - bool *missing_ifp); -struct next_hop_v6 *ecmp6_create(struct nlattr *mpath, uint32_t *count, - bool *missing_ifp); -union next_hop_v4_or_v6_ptr ecmp_mpls_create( - struct nlattr *mpath, uint32_t *count, enum nh_type *nh_type, - bool *missing_ifp); +struct next_hop *ecmp_mpls_create(struct nlattr *mpath, uint32_t *count, + enum nh_type *nh_type, + bool *missing_ifp); #endif diff --git a/src/ether.c b/src/ether.c index 1a961af3..3e571b45 100644 --- a/src/ether.c +++ b/src/ether.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -9,6 +9,7 @@ #include "ether.h" +#include "compiler.h" #include "dp_event.h" #include "l2_rx_fltr.h" #include "pl_common.h" @@ -23,7 +24,7 @@ struct rte_mbuf; * * Always consumes the mbuf */ -__attribute__((noinline)) void +__noinline void ether_input(struct ifnet *ifp, struct rte_mbuf *m) { struct pl_packet pkt; @@ -41,7 +42,7 @@ ether_input(struct ifnet *ifp, struct rte_mbuf *m) * * Always consumes the mbuf */ -__attribute__((noinline)) void +__noinline void ether_input_no_dyn_feats(struct ifnet *ifp, struct rte_mbuf *m) { struct pl_packet pkt; @@ -57,17 +58,17 @@ ether_input_no_dyn_feats(struct ifnet *ifp, struct rte_mbuf *m) int ether_if_set_l2_address(struct ifnet *ifp, uint32_t l2_addr_len, void *l2_addr) { - struct ether_addr *macaddr = l2_addr; + struct rte_ether_addr *macaddr = l2_addr; char b1[32], b2[32]; - if (l2_addr_len != ETHER_ADDR_LEN) { + if (l2_addr_len != RTE_ETHER_ADDR_LEN) { RTE_LOG(NOTICE, DATAPLANE, "link address is not ethernet (len=%u)!\n", l2_addr_len); return -EINVAL; } - if (ether_addr_equal(&ifp->eth_addr, macaddr)) + if (rte_ether_addr_equal(&ifp->eth_addr, macaddr)) return 1; RTE_LOG(INFO, DATAPLANE, "%s change MAC from %s to %s\n", @@ -82,7 +83,7 @@ int ether_if_set_l2_address(struct ifnet *ifp, uint32_t l2_addr_len, int ether_if_set_broadcast(struct ifnet *ifp, bool enable) { - static const struct ether_addr ea_broadcast = { + static const struct rte_ether_addr ea_broadcast = { .addr_bytes = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, }; diff --git a/src/ether.h b/src/ether.h index d63db7dc..0585fc05 100644 --- a/src/ether.h +++ b/src/ether.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -21,14 +21,14 @@ #include "compiler.h" #include "if_var.h" #include "main.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "util.h" struct ifnet; struct ether_vlan_hdr { - struct ether_hdr eh; - struct vlan_hdr vh; + struct rte_ether_hdr eh; + struct rte_vlan_hdr vh; }; #define VLAN_HDR_LEN sizeof(struct ether_vlan_hdr) @@ -41,22 +41,23 @@ void ether_input(struct ifnet *ifp, struct rte_mbuf *m) void ether_input_no_dyn_feats(struct ifnet *ifp, struct rte_mbuf *m) __hot_func __rte_cache_aligned; -static inline struct ether_hdr *ethhdr(struct rte_mbuf *m) +static inline struct rte_ether_hdr *ethhdr(struct rte_mbuf *m) { - return rte_pktmbuf_mtod(m, struct ether_hdr *); + return rte_pktmbuf_mtod(m, struct rte_ether_hdr *); } /* ethtype in host byte order, return ptr to pkmbuf new data_start */ static inline char *ethhdr_prepend(struct rte_mbuf *m, uint16_t ethtype) { - char *data_start = rte_pktmbuf_prepend(m, ETHER_HDR_LEN); - struct ether_hdr *eh; + char *data_start = rte_pktmbuf_prepend(m, RTE_ETHER_HDR_LEN); + struct rte_ether_hdr *eh; if (!data_start) return NULL; - m->l2_len = ETHER_HDR_LEN; + m->l2_len = RTE_ETHER_HDR_LEN; eh = ethhdr(m); - eh->d_addr.addr_bytes[0] &= ~ETHER_GROUP_ADDR; /* Clear multicast bit */ + /* Clear multicast bit */ + eh->d_addr.addr_bytes[0] &= ~RTE_ETHER_GROUP_ADDR; eh->ether_type = htons(ethtype); return data_start; } @@ -76,12 +77,12 @@ static inline uint16_t ethtype(const struct rte_mbuf *m, static inline uint16_t vid_from_pkt(struct rte_mbuf *m, uint16_t etype) { - struct ether_hdr *eth = rte_pktmbuf_mtod(m, struct ether_hdr *); + struct rte_ether_hdr *eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); if (eth->ether_type != htons(etype)) return 0; - struct vlan_hdr *vh = (struct vlan_hdr *) (eth + 1); + struct rte_vlan_hdr *vh = (struct rte_vlan_hdr *) (eth + 1); return ntohs(vh->vlan_tci) & VLAN_VID_MASK; } @@ -109,10 +110,10 @@ static inline uint16_t vid_decap(struct rte_mbuf *m, uint16_t etype) return 0; vid = ntohs(eth->vh.vlan_tci); - memmove((char *) eth + sizeof(struct vlan_hdr), - eth, 2 * ETHER_ADDR_LEN); + memmove((char *) eth + sizeof(struct rte_vlan_hdr), + eth, 2 * RTE_ETHER_ADDR_LEN); - rte_pktmbuf_adj(m, sizeof(struct vlan_hdr)); + rte_pktmbuf_adj(m, sizeof(struct rte_vlan_hdr)); return vid; } @@ -121,24 +122,25 @@ static inline struct rte_mbuf *vid_encap(uint16_t if_vlan, struct rte_mbuf **m, uint16_t etype) { if (unlikely(pktmbuf_prepare_for_header_change(m, - sizeof(struct ether_hdr)) != 0)) + sizeof(struct rte_ether_hdr)) != 0)) return NULL; - struct ether_hdr *eth = rte_pktmbuf_mtod(*m, struct ether_hdr *); + struct rte_ether_hdr *eth = + rte_pktmbuf_mtod(*m, struct rte_ether_hdr *); struct ether_vlan_hdr *vhdr; vhdr = (struct ether_vlan_hdr *) rte_pktmbuf_prepend(*m, - sizeof(struct vlan_hdr)); + sizeof(struct rte_vlan_hdr)); if (unlikely(vhdr == NULL)) return NULL; - memmove(&vhdr->eh, eth, 2 * ETHER_ADDR_LEN); + memmove(&vhdr->eh, eth, 2 * RTE_ETHER_ADDR_LEN); vhdr->vh.eth_proto = eth->ether_type; vhdr->eh.ether_type = htons(etype); vhdr->vh.vlan_tci = htons(if_vlan); /* NB VLAN_HDR_LEN includes the ethernet header as well */ - pktmbuf_l2_len(*m) = VLAN_HDR_LEN; + dp_pktmbuf_l2_len(*m) = VLAN_HDR_LEN; return *m; } @@ -163,8 +165,9 @@ static inline struct rte_mbuf *vid_encap(uint16_t if_vlan, #endif #endif -static inline int ether_addr_equal(const struct ether_addr *e1, - const struct ether_addr *e2) +IGNORE_SANITIZER +static inline int rte_ether_addr_equal(const struct rte_ether_addr *e1, + const struct rte_ether_addr *e2) { uint64_t e1_addr = shift16(*(const uint64_t *) e1); uint64_t e2_addr = shift16(*(const uint64_t *) e2); @@ -173,17 +176,19 @@ static inline int ether_addr_equal(const struct ether_addr *e1, } /* - * A safe version of ether_addr_equal() that can be used safely - * with ether_addr_copy(). The compiler might choose to re-order - * parts of ether_addr_equal() before a copy. + * A safe version of rte_ether_addr_equal() that can be used safely + * with rte_ether_addr_copy(). The compiler might choose to re-order + * parts of rte_ether_addr_equal() before a copy. */ -static inline int ether_addr_equal_safe(const struct ether_addr *ea_from, - const struct ether_addr *ea_to) +static inline int rte_ether_addr_equal_safe( + const struct rte_ether_addr *ea_from, + const struct rte_ether_addr *ea_to) { return memcmp(ea_from, ea_to, sizeof(*ea_from)) == 0; } -static inline uint32_t eth_addr_hash(const struct ether_addr *ea, +IGNORE_SANITIZER +static inline uint32_t eth_addr_hash(const struct rte_ether_addr *ea, unsigned int bits) { uint64_t val = shift16(*(const uint64_t *) ea); @@ -191,18 +196,18 @@ static inline uint32_t eth_addr_hash(const struct ether_addr *ea, return hash64(val, bits); } -static inline bool ether_is_empty(const struct ether_addr *mac) +static inline bool ether_is_empty(const struct rte_ether_addr *mac) { - const struct ether_addr empty_mac = { { 0 } }; + const struct rte_ether_addr empty_mac = { { 0 } }; - return ether_addr_equal_safe(mac, &empty_mac); + return rte_ether_addr_equal_safe(mac, &empty_mac); } /* * is_link_local_ether_addr - Determine if given Ethernet address is * link-local. Includes Spanning Tree multicast address. */ -static inline bool is_link_local_ether_addr(const struct ether_addr *ea) +static inline bool is_link_local_ether_addr(const struct rte_ether_addr *ea) { uint64_t ea_addr = clear_lsn(shift16(*(const uint64_t *) ea)); diff --git a/src/event.c b/src/event.c index d50ee715..391d87f8 100644 --- a/src/event.c +++ b/src/event.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -16,7 +16,7 @@ #include #include -#include "event.h" +#include "event_internal.h" #include "urcu.h" #include "vplane_log.h" @@ -108,13 +108,13 @@ static void rebuild_poll_list(void) * Register a function to be called by get_next_event * when file descriptor has data available. */ -static void register_event(int fd, void *socket, ev_callback_t rdfunc, - void *arg, enum cont_src_en cont_src) +static int register_event(int fd, void *socket, ev_callback_t rdfunc, + void *arg, enum cont_src_en cont_src) { struct event *ev = malloc(sizeof(*ev)); if (ev == NULL) - rte_panic("%s(): out of memory\n", __func__); + return -ENOMEM; ev->arg = arg; ev->cont_src = cont_src; @@ -127,6 +127,8 @@ static void register_event(int fd, void *socket, ev_callback_t rdfunc, LIST_INSERT_HEAD(&todo.list, ev, next); todo.dirty = 1; rte_spinlock_unlock(&event_list_lock); + + return 0; } void register_event_fd(int fd, ev_callback_t rdfunc, void *arg) @@ -134,9 +136,9 @@ void register_event_fd(int fd, ev_callback_t rdfunc, void *arg) register_event(fd, NULL, rdfunc, arg, CONT_SRC_MAIN); } -void register_event_socket(void *socket, ev_callback_t rdfunc, void *arg) +int dp_register_event_socket(void *socket, ev_callback_t callback, void *arg) { - register_event(-1, socket, rdfunc, arg, CONT_SRC_MAIN); + return register_event(-1, socket, callback, arg, CONT_SRC_MAIN); } void register_event_socket_src(void *socket, ev_callback_t rdfunc, void *arg, @@ -159,17 +161,24 @@ static void delete_event(struct event *ev) rte_spinlock_unlock(&event_list_lock); } -void unregister_event_socket(void *socket) +int dp_unregister_event_socket(void *socket) { struct event *ev, *ev2; + bool found = false; rte_spinlock_lock(&event_list_lock); LIST_FOREACH_SAFE(ev, &todo.list, next, ev2) { if (ev->socket != socket) continue; __delete_event(ev); + found = true; } rte_spinlock_unlock(&event_list_lock); + + if (found) + return 0; + + return -ENOENT; } /* @@ -189,9 +198,9 @@ int get_next_event(enum cont_src_en cont_src, long ms, bool cont_src_all) rebuild_poll_list(); - rcu_thread_offline(); + dp_rcu_thread_offline(); n = zmq_poll(todo.items, todo.list_size, ms * ZMQ_POLL_MSEC); - rcu_thread_online(); + dp_rcu_thread_online(); if (n < 0) { if (errno == EINTR || errno == EAGAIN) diff --git a/src/event.h b/src/event_internal.h similarity index 67% rename from src/event.h rename to src/event_internal.h index e8952087..54a8d8fa 100644 --- a/src/event.h +++ b/src/event_internal.h @@ -1,26 +1,25 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ -#ifndef EVENT_H -#define EVENT_H +#ifndef EVENT_INTERNAL_H +#define EVENT_INTERNAL_H #include #include "control.h" +#include "event.h" typedef int (*ev_callback_t)(void *arg); void register_event_fd(int fd, ev_callback_t rdfunc, void *arg); void unregister_event_fd(int fd); -void register_event_socket(void *socket, ev_callback_t rdfunc, void *arg); void register_event_socket_src(void *socket, ev_callback_t rdfunc, void *arg, enum cont_src_en cont_src); -void unregister_event_socket(void *socket); int get_next_event(enum cont_src_en cont_src, long ms, bool cont_src_all); -#endif /* EVENT_H */ +#endif /* EVENT_INTERNAL_H */ diff --git a/src/fal.c b/src/fal.c index 55dc2c35..f9d7f74a 100644 --- a/src/fal.c +++ b/src/fal.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. + * Copyright (c) 2017-2020, AT&T Intellectual Property. * All rights reserved. * Copyright (c) 2016-2017 by Brocade Communications Systems, Inc. * All rights reserved. @@ -19,23 +19,31 @@ #include "compiler.h" #include "fal.h" #include "fal_plugin.h" +#include "fal_bfd.h" #include "if_var.h" +#include "ip6_funcs.h" #include "mpls/mpls.h" -#include "nh.h" +#include "nh_common.h" #include "route.h" #include "route_flags.h" #include "route_v6.h" #include "vplane_debug.h" #include "vplane_log.h" #include "bridge_vlan_set.h" -#include "hotplug.h" +#include "if/dpdk-eth/hotplug.h" -struct ether_addr; +struct rte_ether_addr; -int __externally_visible +struct fal_mem { + struct rcu_head rcu; + uint8_t data[0]; +}; + +__FOR_EXPORT +int fal_port_byifindex(int ifindex, uint16_t *portid) { - struct ifnet *ifp = ifnet_byifindex(ifindex); + struct ifnet *ifp = dp_ifnet_byifindex(ifindex); if (ifp == NULL || ifp->if_type != IFT_ETHER || !ifp->if_local_port) @@ -44,15 +52,76 @@ fal_port_byifindex(int ifindex, uint16_t *portid) return 0; } +__FOR_EXPORT +void * +fal_malloc(size_t size) +{ + struct fal_mem *fal_mem; + + if (size >= SIZE_MAX - sizeof(*fal_mem)) + return NULL; + + fal_mem = malloc(sizeof(*fal_mem) + size); + if (!fal_mem) + return NULL; + + memset(&fal_mem->rcu, 0, sizeof(fal_mem->rcu)); + + return &fal_mem->data; +} + +__FOR_EXPORT +void * +fal_calloc(int nmemb, size_t size) +{ + struct fal_mem *fal_mem; + size_t total_size; + + total_size = nmemb * size; + if (total_size < size) + return NULL; + if (total_size >= SIZE_MAX - sizeof(*fal_mem)) + return NULL; + + fal_mem = calloc(1, sizeof(*fal_mem) + total_size); + if (!fal_mem) + return NULL; + + return &fal_mem->data; +} + +static void +fal_free_worker(struct rcu_head *head) +{ + struct fal_mem *fal_mem = + caa_container_of(head, struct fal_mem, rcu); + + free(fal_mem); +} + +__FOR_EXPORT +void +fal_free_deferred(void *ptr) +{ + struct fal_mem *fal_mem; + + if (!ptr) + return; + + fal_mem = caa_container_of(ptr, struct fal_mem, data); + + call_rcu(&fal_mem->rcu, fal_free_worker); +} + static struct message_handler *fal_handler; void fal_init(void) { } -static struct l2_ops *new_dyn_l2_ops(void *lib) +static struct fal_l2_ops *new_dyn_l2_ops(void *lib) { - struct l2_ops *l2_ops = calloc(1, sizeof(struct l2_ops)); + struct fal_l2_ops *l2_ops = calloc(1, sizeof(struct fal_l2_ops)); if (!l2_ops) { RTE_LOG(ERR, DATAPLANE, "Could not allocate l2 ops\n"); @@ -62,6 +131,7 @@ static struct l2_ops *new_dyn_l2_ops(void *lib) l2_ops->new_port = dlsym(lib, "fal_plugin_l2_new_port"); l2_ops->upd_port = dlsym(lib, "fal_plugin_l2_upd_port"); l2_ops->del_port = dlsym(lib, "fal_plugin_l2_del_port"); + l2_ops->dump_port = dlsym(lib, "fal_plugin_l2_dump_port"); l2_ops->get_attrs = dlsym(lib, "fal_plugin_l2_get_attrs"); l2_ops->new_addr = dlsym(lib, "fal_plugin_l2_new_addr"); l2_ops->upd_addr = dlsym(lib, "fal_plugin_l2_upd_addr"); @@ -69,9 +139,9 @@ static struct l2_ops *new_dyn_l2_ops(void *lib) return l2_ops; } -static struct rif_ops *new_dyn_rif_ops(void *lib) +static struct fal_rif_ops *new_dyn_rif_ops(void *lib) { - struct rif_ops *rif_ops = calloc(1, sizeof(struct rif_ops)); + struct fal_rif_ops *rif_ops = calloc(1, sizeof(struct fal_rif_ops)); if (!rif_ops) { RTE_LOG(ERR, DATAPLANE, "Could not allocate rif ops\n"); @@ -81,12 +151,16 @@ static struct rif_ops *new_dyn_rif_ops(void *lib) rif_ops->create_intf = dlsym(lib, "fal_plugin_create_router_interface"); rif_ops->delete_intf = dlsym(lib, "fal_plugin_delete_router_interface"); rif_ops->set_attr = dlsym(lib, "fal_plugin_set_router_interface_attr"); + rif_ops->get_stats = dlsym(lib, + "fal_plugin_get_router_interface_stats"); + rif_ops->dump = dlsym(lib, "fal_plugin_dump_router_interface"); + rif_ops->get_attr = dlsym(lib, "fal_plugin_get_router_interface_attr"); return rif_ops; } -static struct tun_ops *new_dyn_tun_ops(void *lib) +static struct fal_tun_ops *new_dyn_tun_ops(void *lib) { - struct tun_ops *tun_ops = calloc(1, sizeof(struct tun_ops)); + struct fal_tun_ops *tun_ops = calloc(1, sizeof(struct fal_tun_ops)); if (!tun_ops) { RTE_LOG(ERR, DATAPLANE, "Could not allocate tun ops\n"); @@ -98,9 +172,31 @@ static struct tun_ops *new_dyn_tun_ops(void *lib) return tun_ops; } -static struct bridge_ops *new_dyn_bridge_ops(void *lib) +static struct fal_lag_ops *new_dyn_lag_ops(void *lib) { - struct bridge_ops *bridge_ops = calloc(1, sizeof(struct bridge_ops)); + struct fal_lag_ops *lag_ops = calloc(1, sizeof(*lag_ops)); + + if (!lag_ops) { + RTE_LOG(ERR, DATAPLANE, "Could not allocate LAG ops\n"); + return NULL; + } + lag_ops->create_lag = dlsym(lib, "fal_plugin_create_lag"); + lag_ops->delete_lag = dlsym(lib, "fal_plugin_delete_lag"); + lag_ops->set_lag_attr = dlsym(lib, "fal_plugin_set_lag_attr"); + lag_ops->get_lag_attr = dlsym(lib, "fal_plugin_get_lag_attr"); + lag_ops->dump = dlsym(lib, "fal_plugin_dump_lag"); + lag_ops->create_lag_member = dlsym(lib, "fal_plugin_create_lag_member"); + lag_ops->delete_lag_member = dlsym(lib, "fal_plugin_delete_lag_member"); + lag_ops->set_lag_member_attr = + dlsym(lib, "fal_plugin_set_lag_member_attr"); + lag_ops->get_lag_member_attr = + dlsym(lib, "fal_plugin_get_member_lag_attr"); + return lag_ops; +} + +static struct fal_bridge_ops *new_dyn_bridge_ops(void *lib) +{ + struct fal_bridge_ops *bridge_ops = calloc(1, sizeof(*bridge_ops)); if (!bridge_ops) { RTE_LOG(ERR, DATAPLANE, "Could not allocate bridge ops\n"); @@ -118,9 +214,9 @@ static struct bridge_ops *new_dyn_bridge_ops(void *lib) return bridge_ops; } -static struct vlan_ops *new_dyn_vlan_ops(void *lib) +static struct fal_vlan_ops *new_dyn_vlan_ops(void *lib) { - struct vlan_ops *vlan_ops = calloc(1, sizeof(struct vlan_ops)); + struct fal_vlan_ops *vlan_ops = calloc(1, sizeof(struct fal_vlan_ops)); if (!vlan_ops) { RTE_LOG(ERR, DATAPLANE, "Could not allocate vlan_ops\n"); @@ -131,9 +227,9 @@ static struct vlan_ops *new_dyn_vlan_ops(void *lib) return vlan_ops; } -static struct stp_ops *new_dyn_stp_ops(void *lib) +static struct fal_stp_ops *new_dyn_stp_ops(void *lib) { - struct stp_ops *stp_ops = calloc(1, sizeof(struct stp_ops)); + struct fal_stp_ops *stp_ops = calloc(1, sizeof(struct fal_stp_ops)); if (!stp_ops) { RTE_LOG(ERR, DATAPLANE, "Could not allocate stp_ops ops\n"); @@ -151,9 +247,9 @@ static struct stp_ops *new_dyn_stp_ops(void *lib) return stp_ops; } -static struct ip_ops *new_dyn_ip_ops(void *lib) +static struct fal_ip_ops *new_dyn_ip_ops(void *lib) { - struct ip_ops *ip_ops = calloc(1, sizeof(struct ip_ops)); + struct fal_ip_ops *ip_ops = calloc(1, sizeof(struct fal_ip_ops)); if (!ip_ops) { RTE_LOG(ERR, DATAPLANE, "Could not allocate ip ops\n"); @@ -163,28 +259,51 @@ static struct ip_ops *new_dyn_ip_ops(void *lib) ip_ops->new_addr = dlsym(lib, "fal_plugin_ip_new_addr"); ip_ops->upd_addr = dlsym(lib, "fal_plugin_ip_upd_addr"); ip_ops->del_addr = dlsym(lib, "fal_plugin_ip_del_addr"); - ip_ops->new_neigh = dlsym(lib, "fal_plugin_ip_new_neigh"); - ip_ops->upd_neigh = dlsym(lib, "fal_plugin_ip_upd_neigh"); - ip_ops->get_neigh_attrs = dlsym(lib, "fal_plugin_ip_get_neigh_attrs"); - ip_ops->del_neigh = dlsym(lib, "fal_plugin_ip_del_neigh"); - ip_ops->new_route = dlsym(lib, "fal_plugin_ip_new_route"); - ip_ops->upd_route = dlsym(lib, "fal_plugin_ip_upd_route"); - ip_ops->del_route = dlsym(lib, "fal_plugin_ip_del_route"); + ip_ops->new_neigh = dlsym(lib, "fal_plugin_create_ip_neigh"); + ip_ops->upd_neigh = dlsym(lib, "fal_plugin_set_ip_neigh_attr"); + ip_ops->get_neigh_attrs = dlsym(lib, "fal_plugin_get_ip_neigh_attrs"); + ip_ops->dump_neigh = dlsym(lib, "fal_plugin_dump_ip_neigh"); + ip_ops->del_neigh = dlsym(lib, "fal_plugin_delete_ip_neigh"); + ip_ops->new_neigh_depr = dlsym(lib, "fal_plugin_ip_new_neigh"); + ip_ops->upd_neigh_depr = dlsym(lib, "fal_plugin_ip_upd_neigh"); + ip_ops->get_neigh_attrs_depr = + dlsym(lib, "fal_plugin_ip_get_neigh_attrs"); + ip_ops->dump_neigh_depr = dlsym(lib, "fal_plugin_ip_dump_neigh"); + ip_ops->del_neigh_depr = dlsym(lib, "fal_plugin_ip_del_neigh"); + ip_ops->new_route = dlsym(lib, "fal_plugin_create_route_entry"); + ip_ops->upd_route = dlsym(lib, "fal_plugin_set_route_entry_attr"); + ip_ops->del_route = dlsym(lib, "fal_plugin_delete_route_entry"); + ip_ops->get_route_attrs = + dlsym(lib, "fal_plugin_get_route_entry_attrs"); + ip_ops->new_route_depr = dlsym(lib, "fal_plugin_ip_new_route"); + ip_ops->upd_route_depr = dlsym(lib, "fal_plugin_ip_upd_route"); + ip_ops->del_route_depr = dlsym(lib, "fal_plugin_ip_del_route"); + ip_ops->get_route_attrs_depr = + dlsym(lib, "fal_plugin_ip_get_route_attrs"); + ip_ops->walk_routes = dlsym(lib, "fal_plugin_ip_walk_routes"); ip_ops->new_next_hop_group = dlsym( lib, "fal_plugin_ip_new_next_hop_group"); ip_ops->upd_next_hop_group = dlsym( lib, "fal_plugin_ip_upd_next_hop_group"); ip_ops->del_next_hop_group = dlsym( lib, "fal_plugin_ip_del_next_hop_group"); + ip_ops->get_next_hop_group_attrs = dlsym( + lib, "fal_plugin_ip_get_next_hop_group_attrs"); + ip_ops->dump_next_hop_group = dlsym( + lib, "fal_plugin_ip_dump_next_hop_group"); ip_ops->new_next_hops = dlsym(lib, "fal_plugin_ip_new_next_hops"); ip_ops->upd_next_hop = dlsym(lib, "fal_plugin_ip_upd_next_hop"); ip_ops->del_next_hops = dlsym(lib, "fal_plugin_ip_del_next_hops"); + ip_ops->get_next_hop_attrs = dlsym( + lib, "fal_plugin_ip_get_next_hop_attrs"); + ip_ops->dump_next_hop = dlsym( + lib, "fal_plugin_ip_dump_next_hop"); return ip_ops; } -static struct acl_ops *new_dyn_acl_ops(void *lib) +static struct fal_acl_ops *new_dyn_acl_ops(void *lib) { - struct acl_ops *acl_ops = calloc(1, sizeof(struct acl_ops)); + struct fal_acl_ops *acl_ops = calloc(1, sizeof(struct fal_acl_ops)); if (!acl_ops) { RTE_LOG(ERR, DATAPLANE, "Could not allocate acl ops\n"); @@ -211,9 +330,9 @@ static struct acl_ops *new_dyn_acl_ops(void *lib) return acl_ops; } -static struct ipmc_ops *new_dyn_ipmc_ops(void *lib) +static struct fal_ipmc_ops *new_dyn_ipmc_ops(void *lib) { - struct ipmc_ops *ipmc_ops = calloc(1, sizeof(struct ipmc_ops)); + struct fal_ipmc_ops *ipmc_ops = calloc(1, sizeof(struct fal_ipmc_ops)); ipmc_ops->create_entry = dlsym(lib, "fal_plugin_create_ip_mcast_entry"); ipmc_ops->delete_entry = dlsym(lib, "fal_plugin_delete_ip_mcast_entry"); @@ -256,9 +375,9 @@ static struct ipmc_ops *new_dyn_ipmc_ops(void *lib) return ipmc_ops; } -static struct qos_ops *new_dyn_qos_ops(void *lib) +static struct fal_qos_ops *new_dyn_qos_ops(void *lib) { - struct qos_ops *qos_ops = calloc(1, sizeof(struct qos_ops)); + struct fal_qos_ops *qos_ops = calloc(1, sizeof(struct fal_qos_ops)); if (!qos_ops) { RTE_LOG(ERR, DATAPLANE, "Could not allocate qos ops\n"); @@ -301,12 +420,16 @@ static struct qos_ops *new_dyn_qos_ops(void *lib) qos_ops->del_wred = dlsym(lib, "fal_plugin_qos_del_wred"); qos_ops->upd_wred = dlsym(lib, "fal_plugin_qos_upd_wred"); qos_ops->get_wred_attrs = dlsym(lib, "fal_plugin_qos_get_wred_attrs"); + qos_ops->get_counters = dlsym(lib, "fal_plugin_qos_get_counters"); + qos_ops->dump_buf_errors = + dlsym(lib, "fal_plugin_dump_memory_buffer_errors"); + return qos_ops; } -static struct sw_ops *new_dyn_switch_ops(void *lib) +static struct fal_sw_ops *new_dyn_switch_ops(void *lib) { - struct sw_ops *sw_ops = calloc(1, sizeof(*sw_ops)); + struct fal_sw_ops *sw_ops = calloc(1, sizeof(*sw_ops)); if (!sw_ops) { RTE_LOG(ERR, DATAPLANE, "Could not allocate sw ops\n"); @@ -318,9 +441,9 @@ static struct sw_ops *new_dyn_switch_ops(void *lib) return sw_ops; } -static struct sys_ops *new_dyn_sys_ops(void *lib) +static struct fal_sys_ops *new_dyn_sys_ops(void *lib) { - struct sys_ops *sops = calloc(1, sizeof(struct sys_ops)); + struct fal_sys_ops *sops = calloc(1, sizeof(struct fal_sys_ops)); if (!sops) { RTE_LOG(ERR, DATAPLANE, "Could not allocate sys ops\n"); @@ -333,9 +456,9 @@ static struct sys_ops *new_dyn_sys_ops(void *lib) return sops; } -static struct policer_ops *new_dyn_policer_ops(void *lib) +static struct fal_policer_ops *new_dyn_policer_ops(void *lib) { - struct policer_ops *policer_ops = calloc(1, sizeof(struct policer_ops)); + struct fal_policer_ops *policer_ops = calloc(1, sizeof(*policer_ops)); if (!policer_ops) { RTE_LOG(ERR, DATAPLANE, "Could not allocate policer ops\n"); @@ -355,9 +478,9 @@ static struct policer_ops *new_dyn_policer_ops(void *lib) return policer_ops; } -static struct mirror_ops *new_dyn_mirror_ops(void *lib) +static struct fal_mirror_ops *new_dyn_mirror_ops(void *lib) { - struct mirror_ops *mr_ops = calloc(1, sizeof(struct mirror_ops)); + struct fal_mirror_ops *mr_ops = calloc(1, sizeof(*mr_ops)); if (!mr_ops) { RTE_LOG(ERR, DATAPLANE, "Could not allocate mirror ops\n"); @@ -374,10 +497,10 @@ static struct mirror_ops *new_dyn_mirror_ops(void *lib) return mr_ops; } -static struct vlan_feat_ops *new_dyn_vlan_feat_ops(void *lib) +static struct fal_vlan_feat_ops *new_dyn_vlan_feat_ops(void *lib) { - struct vlan_feat_ops *vlan_feat_ops = - calloc(1, sizeof(struct vlan_feat_ops)); + struct fal_vlan_feat_ops *vlan_feat_ops = + calloc(1, sizeof(struct fal_vlan_feat_ops)); if (!vlan_feat_ops) { RTE_LOG(ERR, DATAPLANE, "Could not allocate vlan_feat ops\n"); @@ -389,13 +512,15 @@ static struct vlan_feat_ops *new_dyn_vlan_feat_ops(void *lib) lib, "fal_plugin_vlan_feature_delete"); vlan_feat_ops->vlan_feature_set_attr = dlsym( lib, "fal_plugin_vlan_feature_set_attr"); + vlan_feat_ops->vlan_feature_get_attr = dlsym( + lib, "fal_plugin_vlan_feature_get_attr"); return vlan_feat_ops; } -static struct backplane_ops *new_dyn_backplane_ops(void *lib) +static struct fal_backplane_ops *new_dyn_backplane_ops(void *lib) { - struct backplane_ops *backplane_ops = - calloc(1, sizeof(struct backplane_ops)); + struct fal_backplane_ops *backplane_ops = + calloc(1, sizeof(struct fal_backplane_ops)); if (!backplane_ops) { RTE_LOG(ERR, DATAPLANE, "Could not allocate backplane ops\n"); @@ -408,9 +533,9 @@ static struct backplane_ops *new_dyn_backplane_ops(void *lib) return backplane_ops; } -static struct cpp_rl_ops *new_dyn_cpp_rl_ops(void *lib) +static struct fal_cpp_rl_ops *new_dyn_cpp_rl_ops(void *lib) { - struct cpp_rl_ops *cpp_rl_ops = calloc(1, sizeof(*cpp_rl_ops)); + struct fal_cpp_rl_ops *cpp_rl_ops = calloc(1, sizeof(*cpp_rl_ops)); if (!cpp_rl_ops) { RTE_LOG(ERR, DATAPLANE, "Could not allocate cpp_rl ops\n"); @@ -425,11 +550,11 @@ static struct cpp_rl_ops *new_dyn_cpp_rl_ops(void *lib) return cpp_rl_ops; } -static struct ptp_ops *new_dyn_ptp_ops(void *lib) +static struct fal_ptp_ops *new_dyn_ptp_ops(void *lib) { - struct ptp_ops *ptp_ops; + struct fal_ptp_ops *ptp_ops; - ptp_ops = calloc(1, sizeof(struct ptp_ops)); + ptp_ops = calloc(1, sizeof(struct fal_ptp_ops)); if (!ptp_ops) { RTE_LOG(ERR, DATAPLANE, "Could not allocate ptp ops\n"); return NULL; @@ -449,6 +574,79 @@ static struct ptp_ops *new_dyn_ptp_ops(void *lib) return ptp_ops; } +static struct fal_bfd_ops *new_dyn_bfd_ops(void *lib) +{ + struct fal_bfd_ops *bfd_ops; + + bfd_ops = calloc(1, sizeof(*bfd_ops)); + if (!bfd_ops) { + RTE_LOG(ERR, DATAPLANE, "Could not allocate bfd ops\n"); + return NULL; + } + + bfd_ops->create_session = dlsym(lib, "fal_plugin_bfd_create_session"); + bfd_ops->delete_session = dlsym(lib, "fal_plugin_bfd_delete_session"); + bfd_ops->set_session_attr = dlsym(lib, + "fal_plugin_bfd_set_session_attribute"); + bfd_ops->get_session_attr = dlsym(lib, + "fal_plugin_bfd_get_session_attribute"); + bfd_ops->get_session_stats = dlsym(lib, + "fal_plugin_bfd_get_session_stats"); + bfd_ops->dump_session = dlsym(lib, + "fal_plugin_bfd_dump_session"); + return bfd_ops; +} + +static struct fal_capture_ops *new_dyn_capture_ops(void *lib) +{ + struct fal_capture_ops *ops; + + ops = calloc(1, sizeof(struct fal_capture_ops)); + if (ops == NULL) { + RTE_LOG(ERR, DATAPLANE, "Could not allocate capture ops\n"); + return NULL; + } + + ops->create = dlsym(lib, "fal_plugin_capture_create"); + ops->delete = dlsym(lib, "fal_plugin_capture_delete"); + ops->get_stats = dlsym(lib, "fal_plugin_capture_get_stats"); + return ops; +} + +static struct fal_mpls_ops *new_dyn_mpls_ops(void *lib) +{ + struct fal_mpls_ops *mpls_ops = calloc(1, sizeof(*mpls_ops)); + + if (!mpls_ops) { + RTE_LOG(ERR, DATAPLANE, "Could not allocate mpls ops\n"); + return NULL; + } + + mpls_ops->create_route = dlsym(lib, "fal_plugin_create_mpls_route"); + mpls_ops->delete_route = dlsym(lib, "fal_plugin_delete_mpls_route"); + mpls_ops->set_route_attr = dlsym(lib, "fal_plugin_set_mpls_route_attr"); + mpls_ops->get_route_attr = dlsym(lib, "fal_plugin_get_mpls_route_attr"); + + return mpls_ops; +} + +static struct fal_vrf_ops *new_dyn_vrf_ops(void *lib) +{ + struct fal_vrf_ops *vrf_ops = calloc(1, sizeof(*vrf_ops)); + + if (!vrf_ops) { + RTE_LOG(ERR, DATAPLANE, "Could not allocate vrf ops\n"); + return NULL; + } + + vrf_ops->create = dlsym(lib, "fal_plugin_create_vrf"); + vrf_ops->delete = dlsym(lib, "fal_plugin_delete_vrf"); + vrf_ops->set_attr = dlsym(lib, "fal_plugin_set_vrf_attr"); + vrf_ops->get_attr = dlsym(lib, "fal_plugin_get_vrf_attr"); + + return vrf_ops; +} + static void register_dyn_msg_handlers(void *lib) { struct message_handler *handler = @@ -461,6 +659,7 @@ static void register_dyn_msg_handlers(void *lib) handler->l2 = new_dyn_l2_ops(lib); handler->rif = new_dyn_rif_ops(lib); + handler->lag = new_dyn_lag_ops(lib); handler->tun = new_dyn_tun_ops(lib); handler->bridge = new_dyn_bridge_ops(lib); handler->vlan = new_dyn_vlan_ops(lib); @@ -477,6 +676,10 @@ static void register_dyn_msg_handlers(void *lib) handler->backplane = new_dyn_backplane_ops(lib); handler->cpp_rl = new_dyn_cpp_rl_ops(lib); handler->ptp = new_dyn_ptp_ops(lib); + handler->capture = new_dyn_capture_ops(lib); + handler->bfd = new_dyn_bfd_ops(lib); + handler->mpls = new_dyn_mpls_ops(lib); + handler->vrf = new_dyn_vrf_ops(lib); fal_register_message_handler(handler); } @@ -561,6 +764,8 @@ static void free_message_handler(struct message_handler *handler) free(handler->vlan_feat); free(handler->backplane); free(handler->cpp_rl); + free(handler->capture); + free(handler->bfd); free(handler); } @@ -578,7 +783,7 @@ bool fal_plugins_present(void) #define call_handler(op_type, fn, args...) \ { \ - struct op_type ## _ops *interface = NULL; \ + struct fal_ ## op_type ## _ops *interface = NULL; \ if (fal_handler) { \ interface = fal_handler->op_type; \ if (interface && interface->fn) \ @@ -588,7 +793,7 @@ bool fal_plugins_present(void) #define call_handler_def_ret(op_type, def_ret, fn, args...) \ ({ \ - struct op_type ## _ops *interface = NULL; \ + struct fal_ ## op_type ## _ops *interface = NULL; \ int ret = def_ret; \ if (fal_handler) { \ interface = fal_handler->op_type; \ @@ -623,7 +828,8 @@ int cmd_fal(FILE *f, int argc, char **argv) */ call_handler(sys, command, f, argc, argv); return 0; - } else if ((streq(argv[0], "plugin_ret"))) { + } + if ((streq(argv[0], "plugin_ret"))) { argc--; argv++; /*TODO Implement get_name handlers @@ -648,7 +854,7 @@ int fal_l2_get_attrs(unsigned int if_index, uint32_t attr_count, struct fal_attribute_t *attr_list) { - struct l2_ops *interface; + struct fal_l2_ops *interface; int rc = -1; if (fal_handler) { @@ -662,10 +868,11 @@ int fal_l2_get_attrs(unsigned int if_index, return rc; } -void fal_l2_upd_port(unsigned int if_index, - struct fal_attribute_t *attr) +int fal_l2_upd_port(unsigned int if_index, + struct fal_attribute_t *attr) { - call_handler(l2, upd_port, if_index, attr); + return call_handler_def_ret(l2, -EOPNOTSUPP, upd_port, + if_index, attr); } void fal_l2_del_port(unsigned int if_index) @@ -673,8 +880,13 @@ void fal_l2_del_port(unsigned int if_index) call_handler(l2, del_port, if_index); } +void fal_l2_dump_port(unsigned int if_index, json_writer_t *wr) +{ + call_handler(l2, dump_port, if_index, wr); +} + void fal_l2_new_addr(unsigned int if_index, - const struct ether_addr *addr, + const struct rte_ether_addr *addr, uint32_t attr_count, const struct fal_attribute_t *attr_list) { @@ -682,13 +894,13 @@ void fal_l2_new_addr(unsigned int if_index, } void fal_l2_upd_addr(unsigned int if_index, - const struct ether_addr *addr, + const struct rte_ether_addr *addr, struct fal_attribute_t *attr) { call_handler(l2, upd_addr, if_index, addr, attr); } -void fal_l2_del_addr(unsigned int if_index, const struct ether_addr *addr) +void fal_l2_del_addr(unsigned int if_index, const struct rte_ether_addr *addr) { call_handler(l2, del_addr, if_index, addr); } @@ -718,6 +930,30 @@ int fal_set_router_interface_attr(fal_object_t obj, obj, attr); } +int +fal_get_router_interface_stats(fal_object_t obj, + uint32_t cntr_count, + const enum fal_router_interface_stat_t *cntr_ids, + uint64_t *cntrs) +{ + return call_handler_def_ret(rif, -EOPNOTSUPP, get_stats, + obj, cntr_count, cntr_ids, cntrs); +} + +int fal_get_router_interface_attr(fal_object_t obj, + uint32_t attr_count, + struct fal_attribute_t *attr_list) +{ + return call_handler_def_ret(rif, -EOPNOTSUPP, get_attr, + obj, attr_count, attr_list); +} + +void +fal_dump_router_interface(fal_object_t obj, json_writer_t *wr) +{ + call_handler(rif, dump, obj, wr); +} + /* Tunnel operations */ int @@ -744,6 +980,76 @@ int fal_set_tunnel_attr(fal_object_t obj, obj, attr_count, attr_list); } +/* LAG operations */ +int fal_create_lag(uint32_t attr_count, + struct fal_attribute_t *attr_list, + fal_object_t *obj) +{ + return call_handler_def_ret(lag, -EOPNOTSUPP, create_lag, + attr_count, attr_list, obj); +} + +int fal_delete_lag(fal_object_t obj) +{ + return call_handler_def_ret(lag, -EOPNOTSUPP, delete_lag, + obj); +} + +int fal_set_lag_attr(fal_object_t obj, + uint32_t attr_count, + const struct fal_attribute_t *attr_list) +{ + return call_handler_def_ret(lag, -EOPNOTSUPP, set_lag_attr, + obj, attr_count, attr_list); + +} + +int fal_get_lag_attr(fal_object_t obj, + uint32_t attr_count, + struct fal_attribute_t *attr_list) +{ + return call_handler_def_ret(lag, -EOPNOTSUPP, get_lag_attr, + obj, attr_count, attr_list); + +} + +void +fal_dump_lag(fal_object_t obj, json_writer_t *wr) +{ + call_handler(lag, dump, obj, wr); +} + +int fal_create_lag_member(uint32_t attr_count, + struct fal_attribute_t *attr_list, + fal_object_t *obj) +{ + return call_handler_def_ret(lag, -EOPNOTSUPP, create_lag_member, + attr_count, attr_list, obj); +} + +int fal_delete_lag_member(fal_object_t obj) +{ + return call_handler_def_ret(lag, -EOPNOTSUPP, delete_lag_member, + obj); +} + +int fal_set_lag_member_attr(fal_object_t obj, + const struct fal_attribute_t *attr) +{ + return call_handler_def_ret(lag, -EOPNOTSUPP, set_lag_member_attr, + obj, attr); + +} + +int fal_get_lag_member_attr(fal_object_t obj, + uint32_t attr_count, + struct fal_attribute_t *attr_list) +{ + return call_handler_def_ret(lag, -EOPNOTSUPP, get_lag_member_attr, + obj, attr_count, attr_list); + +} + /* Bridge operations */ void fal_br_new_port(unsigned int bridge_ifindex, @@ -769,7 +1075,7 @@ void fal_br_del_port(unsigned int bridge_ifindex, unsigned int child_ifindex) void fal_br_new_neigh(unsigned int child_ifindex, uint16_t vlanid, - const struct ether_addr *dst, + const struct rte_ether_addr *dst, uint32_t attr_count, const struct fal_attribute_t *attr_list) { @@ -779,14 +1085,14 @@ void fal_br_new_neigh(unsigned int child_ifindex, void fal_br_upd_neigh(unsigned int child_ifindex, uint16_t vlanid, - const struct ether_addr *dst, + const struct rte_ether_addr *dst, struct fal_attribute_t *attr) { call_handler(bridge, upd_neigh, child_ifindex, vlanid, dst, attr); } void fal_br_del_neigh(unsigned int child_ifindex, uint16_t vlanid, - const struct ether_addr *dst) + const struct rte_ether_addr *dst) { call_handler(bridge, del_neigh, child_ifindex, vlanid, dst); } @@ -800,7 +1106,8 @@ void fal_br_flush_neigh(unsigned int bridge_ifindex, } int fal_br_walk_neigh(unsigned int bridge_ifindex, uint16_t vlanid, - const struct ether_addr *dst, unsigned int child_ifindex, + const struct rte_ether_addr *dst, + unsigned int child_ifindex, fal_br_walk_neigh_fn cb, void *arg) { return call_handler_ret(bridge, walk_neigh, bridge_ifindex, vlanid, dst, @@ -809,7 +1116,7 @@ int fal_br_walk_neigh(unsigned int bridge_ifindex, uint16_t vlanid, void fal_fdb_flush_mac(unsigned int bridge_ifindex, unsigned int child_ifindex, - const struct ether_addr *mac) + const struct rte_ether_addr *mac) { struct fal_attribute_t attrs[2]; uint32_t acount = 0; @@ -1118,35 +1425,99 @@ void fal_ip6_del_addr(unsigned int if_index, const struct if_addr *ifa) fal_ip_del_addr(if_index, &faddr, ifa->ifa_prefixlen); } -static int fal_ip_new_neigh(unsigned int if_index, - struct fal_ip_address_t *ipaddr, - uint32_t attr_count, - const struct fal_attribute_t *attr_list) +static int _fal_ip_new_neigh(unsigned int if_index, + fal_object_t rtr_intf_obj, + struct fal_ip_address_t *ipaddr, + uint32_t attr_count, + const struct fal_attribute_t *attr_list) { - return call_handler_def_ret( - ip, -EOPNOTSUPP, new_neigh, if_index, ipaddr, attr_count, + struct fal_neighbor_entry_t neigh_entry = { + .router_intf_obj = rtr_intf_obj, + .ip_addr = *ipaddr, + }; + int ret; + + ret = call_handler_def_ret( + ip, -EOPNOTSUPP, new_neigh, &neigh_entry, attr_count, attr_list); + if (ret == -EOPNOTSUPP) + ret = call_handler_def_ret( + ip, -EOPNOTSUPP, new_neigh_depr, if_index, + ipaddr, attr_count, attr_list); + + return ret; } static int _fal_ip_upd_neigh(unsigned int if_index, + fal_object_t rtr_intf_obj, struct fal_ip_address_t *ipaddr, const struct fal_attribute_t *attr) { - return call_handler_def_ret( - ip, -EOPNOTSUPP, upd_neigh, if_index, ipaddr, - (struct fal_attribute_t *)attr); + struct fal_neighbor_entry_t neigh_entry = { + .router_intf_obj = rtr_intf_obj, + .ip_addr = *ipaddr, + }; + int ret; + + ret = call_handler_def_ret( + ip, -EOPNOTSUPP, upd_neigh, &neigh_entry, attr); + if (ret == -EOPNOTSUPP) + ret = call_handler_def_ret( + ip, -EOPNOTSUPP, upd_neigh_depr, if_index, ipaddr, + (struct fal_attribute_t *)attr); + + return ret; } int fal_ip_get_neigh_attrs(unsigned int if_index, + fal_object_t rtr_intf_obj, const struct sockaddr *sa, uint32_t attr_count, struct fal_attribute_t *attr_list) { - struct fal_ip_address_t ipaddr = { 0 }; + struct fal_neighbor_entry_t neigh_entry = { + .router_intf_obj = rtr_intf_obj, + }; + int ret; if (!fal_plugins_present()) return -EOPNOTSUPP; + switch (sa->sa_family) { + case AF_INET: + neigh_entry.ip_addr.addr_family = FAL_IP_ADDR_FAMILY_IPV4; + neigh_entry.ip_addr.addr.ip4 = + ((const struct sockaddr_in *)sa)->sin_addr.s_addr; + break; + case AF_INET6: + neigh_entry.ip_addr.addr_family = FAL_IP_ADDR_FAMILY_IPV6; + neigh_entry.ip_addr.addr.addr6 = + ((const struct sockaddr_in6 *)sa)->sin6_addr; + break; + default: + return -EOPNOTSUPP; + } + + ret = call_handler_def_ret( + ip, -EOPNOTSUPP, get_neigh_attrs, &neigh_entry, + attr_count, attr_list); + if (ret == -EOPNOTSUPP) + ret = call_handler_def_ret( + ip, -EOPNOTSUPP, get_neigh_attrs_depr, if_index, + &neigh_entry.ip_addr, + attr_count, attr_list); + + return ret; +} + +int fal_ip_new_neigh(unsigned int if_index, + fal_object_t rtr_intf_obj, + const struct sockaddr *sa, + uint32_t attr_count, + const struct fal_attribute_t *attr_list) +{ + struct fal_ip_address_t ipaddr = { 0 }; + switch (sa->sa_family) { case AF_INET: ipaddr.addr_family = FAL_IP_ADDR_FAMILY_IPV4; @@ -1162,12 +1533,13 @@ int fal_ip_get_neigh_attrs(unsigned int if_index, return -EOPNOTSUPP; } - return call_handler_def_ret( - ip, -EOPNOTSUPP, get_neigh_attrs, if_index, &ipaddr, - attr_count, attr_list); + return _fal_ip_new_neigh(if_index, rtr_intf_obj, &ipaddr, attr_count, + attr_list); } + int fal_ip_upd_neigh(unsigned int if_index, + fal_object_t rtr_intf_obj, const struct sockaddr *sa, const struct fal_attribute_t *attr) { @@ -1191,17 +1563,44 @@ int fal_ip_upd_neigh(unsigned int if_index, return -EOPNOTSUPP; } - return _fal_ip_upd_neigh(if_index, &ipaddr, attr); + return _fal_ip_upd_neigh(if_index, rtr_intf_obj, &ipaddr, attr); } static int fal_ip_del_neigh(unsigned int if_index, + fal_object_t rtr_intf_obj, struct fal_ip_address_t *ipaddr) { - return call_handler_def_ret( - ip, -EOPNOTSUPP, del_neigh, if_index, ipaddr); + struct fal_neighbor_entry_t neigh_entry = { + .router_intf_obj = rtr_intf_obj, + .ip_addr = *ipaddr, + }; + int ret; + + ret = call_handler_def_ret( + ip, -EOPNOTSUPP, del_neigh, &neigh_entry); + if (ret == -EOPNOTSUPP) + ret = call_handler_def_ret( + ip, -EOPNOTSUPP, del_neigh_depr, if_index, ipaddr); + + return ret; +} + +static void fal_ip_dump_neigh(unsigned int if_index, + fal_object_t rtr_intf_obj, + struct fal_ip_address_t *ipaddr, + json_writer_t *wr) +{ + struct fal_neighbor_entry_t neigh_entry = { + .router_intf_obj = rtr_intf_obj, + .ip_addr = *ipaddr, + }; + + call_handler(ip, dump_neigh, &neigh_entry, wr); + call_handler(ip, dump_neigh_depr, if_index, ipaddr, wr); } int fal_ip4_new_neigh(unsigned int if_index, + fal_object_t rtr_intf_obj, const struct sockaddr_in *sin, uint32_t attr_count, const struct fal_attribute_t *attr_list) @@ -1214,10 +1613,12 @@ int fal_ip4_new_neigh(unsigned int if_index, if (!fal_plugins_present()) return 0; - return fal_ip_new_neigh(if_index, &faddr, attr_count, attr_list); + return _fal_ip_new_neigh(if_index, rtr_intf_obj, &faddr, + attr_count, attr_list); } int fal_ip6_new_neigh(unsigned int if_index, + fal_object_t rtr_intf_obj, const struct sockaddr_in6 *sin6, uint32_t attr_count, const struct fal_attribute_t *attr_list) @@ -1230,10 +1631,12 @@ int fal_ip6_new_neigh(unsigned int if_index, if (!fal_plugins_present()) return 0; - return fal_ip_new_neigh(if_index, &faddr, attr_count, attr_list); + return _fal_ip_new_neigh(if_index, rtr_intf_obj, &faddr, + attr_count, attr_list); } int fal_ip4_upd_neigh(unsigned int if_index, + fal_object_t rtr_intf_obj, const struct sockaddr_in *sin, struct fal_attribute_t *attr) { @@ -1245,10 +1648,11 @@ int fal_ip4_upd_neigh(unsigned int if_index, if (!fal_plugins_present()) return 0; - return _fal_ip_upd_neigh(if_index, &faddr, attr); + return _fal_ip_upd_neigh(if_index, rtr_intf_obj, &faddr, attr); } int fal_ip6_upd_neigh(unsigned int if_index, + fal_object_t rtr_intf_obj, const struct sockaddr_in6 *sin6, struct fal_attribute_t *attr) { @@ -1260,10 +1664,11 @@ int fal_ip6_upd_neigh(unsigned int if_index, if (!fal_plugins_present()) return 0; - return _fal_ip_upd_neigh(if_index, &faddr, attr); + return _fal_ip_upd_neigh(if_index, rtr_intf_obj, &faddr, attr); } int fal_ip4_del_neigh(unsigned int if_index, + fal_object_t rtr_intf_obj, const struct sockaddr_in *sin) { struct fal_ip_address_t faddr = { @@ -1274,10 +1679,11 @@ int fal_ip4_del_neigh(unsigned int if_index, if (!fal_plugins_present()) return 0; - return fal_ip_del_neigh(if_index, &faddr); + return fal_ip_del_neigh(if_index, rtr_intf_obj, &faddr); } int fal_ip6_del_neigh(unsigned int if_index, + fal_object_t rtr_intf_obj, const struct sockaddr_in6 *sin6) { struct fal_ip_address_t faddr = { @@ -1288,38 +1694,72 @@ int fal_ip6_del_neigh(unsigned int if_index, if (!fal_plugins_present()) return 0; - return fal_ip_del_neigh(if_index, &faddr); + return fal_ip_del_neigh(if_index, rtr_intf_obj, &faddr); } -static enum fal_packet_action_t -next_hop_to_packet_action(const struct next_hop *nh) +void fal_ip4_dump_neigh(unsigned int if_index, + fal_object_t rtr_intf_obj, + const struct sockaddr_in *sin, + json_writer_t *wr) { - if (nh->flags & RTF_BLACKHOLE || - nh_outlabels_present(&nh->outlabels)) - return FAL_PACKET_ACTION_DROP; + struct fal_ip_address_t faddr = { + .addr_family = FAL_IP_ADDR_FAMILY_IPV4, + .addr.ip4 = sin->sin_addr.s_addr + }; - if (nh->flags & (RTF_LOCAL|RTF_BROADCAST|RTF_SLOWPATH|RTF_REJECT)) - return FAL_PACKET_ACTION_TRAP; + fal_ip_dump_neigh(if_index, rtr_intf_obj, &faddr, wr); +} - return FAL_PACKET_ACTION_FORWARD; +void fal_ip6_dump_neigh(unsigned int if_index, + fal_object_t rtr_intf_obj, + const struct sockaddr_in6 *sin6, + json_writer_t *wr) +{ + struct fal_ip_address_t faddr = { + .addr_family = FAL_IP_ADDR_FAMILY_IPV6, + .addr.addr6 = sin6->sin6_addr + }; + + fal_ip_dump_neigh(if_index, rtr_intf_obj, &faddr, wr); +} + +static inline bool +is_deagg_nh(struct ifnet *ifp, enum fal_next_hop_group_use use, + unsigned int label_count, + const union next_hop_outlabels *lbls) +{ + return ifp && is_lo(ifp) && use == FAL_NHG_USE_MPLS_LABEL_SWITCH && + (label_count == 0 || + (label_count == 1 && + nh_outlabels_get_value(lbls, 0) == MPLS_IMPLICITNULL)); } static enum fal_packet_action_t -next_hop6_to_packet_action(const struct next_hop_v6 *nh) +next_hop_to_packet_action(const struct next_hop *nh) { - if (nh->flags & RTF_BLACKHOLE || - nh_outlabels_present(&nh->outlabels)) + struct ifnet *ifp; + + if (nh->flags & RTF_BLACKHOLE) return FAL_PACKET_ACTION_DROP; if (nh->flags & (RTF_LOCAL|RTF_BROADCAST|RTF_SLOWPATH|RTF_REJECT)) return FAL_PACKET_ACTION_TRAP; + ifp = dp_nh_get_ifp(nh); + if (!ifp || + (ifp->fal_l3 == FAL_NULL_OBJECT_ID && + !is_deagg_nh(ifp, FAL_NHG_USE_MPLS_LABEL_SWITCH, + nh_outlabels_get_cnt(&nh->outlabels), + &nh->outlabels))) + return FAL_PACKET_ACTION_TRAP; + return FAL_PACKET_ACTION_FORWARD; } static const struct fal_attribute_t **next_hop_to_attr_list( fal_object_t nhg_object, size_t nhops, - const struct next_hop hops[], uint32_t **attr_count) + const struct next_hop hops[], + enum fal_next_hop_group_use use, uint32_t **attr_count) { const struct fal_attribute_t **nh_attr_list; size_t i; @@ -1337,10 +1777,17 @@ static const struct fal_attribute_t **next_hop_to_attr_list( const struct next_hop *nh = &hops[i]; struct fal_attribute_t *nh_attr; struct ifnet *ifp; - struct fal_ip_address_t *addr; + unsigned int max_attrs = 7; + unsigned int nh_attr_count = 0; + struct fal_u32_list_t *label_list; + unsigned int label_count = + nh_outlabels_get_cnt(&nh->outlabels); + unsigned int label_idx; nh_attr_list[i] = nh_attr = calloc( - 1, sizeof(*nh_attr) * 3); + 1, sizeof(*nh_attr) * max_attrs + + offsetof(typeof(*label_list), + list[label_count])); if (!nh_attr) { while (i--) free((struct fal_attribute_t *) @@ -1349,175 +1796,129 @@ static const struct fal_attribute_t **next_hop_to_attr_list( free(nh_attr_list); return NULL; } - addr = &nh_attr[2].value.ipaddr; - nh_attr[0].id = FAL_NEXT_HOP_ATTR_NEXT_HOP_GROUP; - nh_attr[0].value.objid = nhg_object; - nh_attr[1].id = FAL_NEXT_HOP_ATTR_INTF; - ifp = nh4_get_ifp(nh); - nh_attr[1].value.u32 = ifp ? ifp->if_index : 0; - if (nh->flags & (RTF_GATEWAY | RTF_NEIGH_CREATED)) { - nh_attr[2].id = FAL_NEXT_HOP_ATTR_IP; - nh_attr[2].value.ptr = addr; - addr->addr_family = FAL_IP_ADDR_FAMILY_IPV4; - addr->addr.ip4 = nh->gateway; - (*attr_count)[i] = 3; + label_list = (struct fal_u32_list_t *)&nh_attr[max_attrs]; + + nh_attr[nh_attr_count].id = FAL_NEXT_HOP_ATTR_NEXT_HOP_GROUP; + nh_attr[nh_attr_count].value.objid = nhg_object; + nh_attr_count++; + ifp = dp_nh_get_ifp(nh); + if (is_deagg_nh(ifp, use, label_count, &nh->outlabels)) { + nh_attr[nh_attr_count].id = + FAL_NEXT_HOP_ATTR_VRF_LOOKUP; + nh_attr[nh_attr_count].value.objid = + get_vrf(ifp->if_vrfid)->v_fal_obj; + nh_attr_count++; } else { - (*attr_count)[i] = 2; + nh_attr[nh_attr_count].id = FAL_NEXT_HOP_ATTR_INTF; + nh_attr[nh_attr_count].value.u32 = + ifp ? ifp->if_index : 0; + nh_attr_count++; + nh_attr[nh_attr_count].id = + FAL_NEXT_HOP_ATTR_ROUTER_INTF; + nh_attr[nh_attr_count].value.objid = ifp ? ifp->fal_l3 : + FAL_NULL_OBJECT_ID; + nh_attr_count++; } - } - - return nh_attr_list; -} - -static const struct fal_attribute_t **next_hop6_to_attr_list( - fal_object_t nhg_object, size_t nhops, - const struct next_hop_v6 hops[], uint32_t **attr_count) -{ - const struct fal_attribute_t **nh_attr_list; - size_t i; - - nh_attr_list = calloc(nhops, sizeof(*nh_attr_list)); - if (!nh_attr_list) - return NULL; - *attr_count = calloc(nhops, sizeof(**attr_count)); - if (!*attr_count) { - free(nh_attr_list); - return NULL; - } - - for (i = 0; i < nhops; i++) { - const struct next_hop_v6 *nh = &hops[i]; - struct fal_attribute_t *nh_attr; - struct ifnet *ifp; - struct fal_ip_address_t *addr; - - nh_attr_list[i] = nh_attr = calloc( - 1, sizeof(*nh_attr) * 3); - if (!nh_attr) { - while (i--) - free((struct fal_attribute_t *) - nh_attr_list[i]); - free(*attr_count); - free(nh_attr_list); - return NULL; - } - addr = &nh_attr[2].value.ipaddr; - nh_attr[0].id = FAL_NEXT_HOP_ATTR_NEXT_HOP_GROUP; - nh_attr[0].value.objid = nhg_object; - nh_attr[1].id = FAL_NEXT_HOP_ATTR_INTF; - ifp = nh6_get_ifp(nh); - nh_attr[1].value.u32 = ifp ? ifp->if_index : 0; if (nh->flags & (RTF_GATEWAY | RTF_NEIGH_CREATED)) { - nh_attr[2].id = FAL_NEXT_HOP_ATTR_IP; - nh_attr[2].value.ptr = addr; - addr->addr_family = FAL_IP_ADDR_FAMILY_IPV6; - addr->addr.addr6 = nh->gateway; - (*attr_count)[i] = 3; - } else { - (*attr_count)[i] = 2; + nh_attr[nh_attr_count].id = FAL_NEXT_HOP_ATTR_IP; + fal_attr_set_ip_addr(&nh_attr[nh_attr_count], + &nh->gateway); + nh_attr_count++; } + if (nh->flags & RTF_BACKUP) { + nh_attr[nh_attr_count].id = + FAL_NEXT_HOP_ATTR_CONFIGURED_ROLE; + nh_attr[nh_attr_count].value.u32 = + FAL_NEXT_HOP_CONFIGURED_ROLE_STANDBY; + nh_attr_count++; + } + if (nh->flags & RTF_UNUSABLE) { + nh_attr[nh_attr_count].id = FAL_NEXT_HOP_ATTR_USABILITY; + nh_attr[nh_attr_count].value.u32 = + FAL_NEXT_HOP_UNUSABLE; + nh_attr_count++; + } + if (label_count) { + nh_attr[nh_attr_count].id = + FAL_NEXT_HOP_ATTR_MPLS_LABELSTACK; + nh_attr[nh_attr_count].value.u32list = + label_list; + label_list->count = label_count; + for (label_idx = 0; label_idx < label_count; + label_idx++) + label_list->list[label_idx] = + nh_outlabels_get_value( + &nh->outlabels, label_idx); + nh_attr_count++; + } + (*attr_count)[i] = nh_attr_count; } return nh_attr_list; } -int fal_ip4_new_next_hops(size_t nhops, const struct next_hop hops[], - fal_object_t *nhg_object, - fal_object_t *obj_list) +enum fal_packet_action_t +fal_next_hop_group_packet_action(uint32_t nhops, const struct next_hop hops[]) { - const struct fal_attribute_t **nh_attr_list; - uint32_t *nh_attr_count; + enum fal_packet_action_t action; uint32_t i; - int ret; - - /* we must have at least one nexthop */ - if (!nhops) - return -EINVAL; - - if (!fal_plugins_present()) - return 0; - - for (i = 0; i < nhops; i++) { - /* - * Don't create next_hop_group if there is at least - * one nexthop that needs to do something special, since - * we can't represent this in the next_hop - * attributes. This will be represent instead using - * route attributes. - */ - if (next_hop_to_packet_action(&hops[i]) != - FAL_PACKET_ACTION_FORWARD) - return 0; - } - - ret = call_handler_def_ret(ip, -EOPNOTSUPP, - new_next_hop_group, 0, NULL, - nhg_object); - if (ret < 0) - return ret; - - nh_attr_list = next_hop_to_attr_list(*nhg_object, nhops, hops, - &nh_attr_count); - if (!nh_attr_list) { - ret = -ENOMEM; - goto error; - } - - ret = call_handler_def_ret(ip, -EOPNOTSUPP, new_next_hops, - nhops, nh_attr_count, nh_attr_list, - obj_list); - - for (i = 0; i < nhops; i++) - free((struct fal_attribute_t *)nh_attr_list[i]); - free(nh_attr_list); - free(nh_attr_count); - if (ret < 0) - goto error; - return ret; + for (i = 0; i < nhops; i++) { + action = next_hop_to_packet_action(&hops[i]); + if (action != FAL_PACKET_ACTION_FORWARD) + return action; + } -error: - call_handler_ret(ip, del_next_hop_group, *nhg_object); - return ret; + return FAL_PACKET_ACTION_FORWARD; } -int fal_ip6_new_next_hops(size_t nhops, const struct next_hop_v6 hops[], - fal_object_t *nhg_object, - fal_object_t *obj_list) + +int fal_ip_new_next_hops(enum fal_next_hop_group_use use, + size_t nhops, const struct next_hop hops[], + fal_object_t *nhg_object, + fal_object_t *obj_list) { const struct fal_attribute_t **nh_attr_list; + struct fal_attribute_t nhg_attrs[1]; + uint32_t nhg_attr_count = 0; uint32_t *nh_attr_count; uint32_t i; int ret; + enum fal_packet_action_t action; /* we must have at least one nexthop */ if (!nhops) return -EINVAL; if (!fal_plugins_present()) - return 0; + return -EOPNOTSUPP; - for (i = 0; i < nhops; i++) { - /* - * Don't create next_hop_group if there is at least - * one nexthop that needs to do something special, since - * we can't represent this in the next_hop - * attributes. This will be represent instead using - * route attributes. - */ - if (next_hop6_to_packet_action(&hops[i]) != - FAL_PACKET_ACTION_FORWARD) - return 0; + action = fal_next_hop_group_packet_action(nhops, hops); + /* + * Don't create next_hop_group if there is at least + * one nexthop that needs to do something special, since + * we can't represent this in the next_hop + * attributes. This will be represented instead using + * route attributes. + */ + if (action != FAL_PACKET_ACTION_FORWARD) + return FAL_RC_NOT_REQ; + + if (use != FAL_NHG_USE_IP) { + nhg_attrs[nhg_attr_count].id = + FAL_NEXT_HOP_GROUP_ATTR_USE; + nhg_attrs[nhg_attr_count].value.u32 = use; + nhg_attr_count++; } ret = call_handler_def_ret(ip, -EOPNOTSUPP, - new_next_hop_group, 0, NULL, - nhg_object); + new_next_hop_group, nhg_attr_count, + nhg_attrs, nhg_object); if (ret < 0) return ret; - nh_attr_list = next_hop6_to_attr_list(*nhg_object, nhops, hops, - &nh_attr_count); + nh_attr_list = next_hop_to_attr_list(*nhg_object, nhops, hops, + use, &nh_attr_count); if (!nh_attr_list) { ret = -ENOMEM; goto error; @@ -1541,28 +1942,13 @@ int fal_ip6_new_next_hops(size_t nhops, const struct next_hop_v6 hops[], return ret; } -int fal_ip4_del_next_hops(fal_object_t nhg_object, size_t nhops, - const struct next_hop *hops, - const fal_object_t *obj_list) +int fal_ip_del_next_hops(fal_object_t nhg_object, size_t nhops, + const fal_object_t *obj_list) { - uint32_t i; int ret; if (!fal_plugins_present()) - return 0; - - for (i = 0; i < nhops; i++) { - /* - * Don't create next_hop_group if there is at least - * one nexthop that needs to do something special, since - * we can't represent this in the next_hop - * attributes. This will be represent instead using - * route attributes. - */ - if (next_hop_to_packet_action(&hops[i]) != - FAL_PACKET_ACTION_FORWARD) - return 0; - } + return -EOPNOTSUPP; ret = call_handler_def_ret(ip, -EOPNOTSUPP, del_next_hops, nhops, obj_list); @@ -1574,103 +1960,195 @@ int fal_ip4_del_next_hops(fal_object_t nhg_object, size_t nhops, return ret; } -int fal_ip6_del_next_hops(fal_object_t nhg_object, size_t nhops, - const struct next_hop_v6 *hops, - const fal_object_t *obj_list) +/* + * The nexthop at 'index' has changed so inform the platforms. + */ +int fal_ip_upd_next_hop_state(const fal_object_t *nh_list, + int index, bool usable) { - uint32_t i; - int ret; + const fal_object_t *nh_obj = &nh_list[index]; + struct fal_attribute_t nh_attr; - if (!fal_plugins_present()) - return 0; + nh_attr.id = FAL_NEXT_HOP_ATTR_USABILITY; + if (usable) + nh_attr.value.u32 = FAL_NEXT_HOP_USABLE; + else + nh_attr.value.u32 = FAL_NEXT_HOP_UNUSABLE; - for (i = 0; i < nhops; i++) { - /* - * Don't create next_hop_group if there is at least - * one nexthop that needs to do something special, since - * we can't represent this in the next_hop - * attributes. This will be represent instead using - * route attributes. - */ - if (next_hop6_to_packet_action(&hops[i]) != - FAL_PACKET_ACTION_FORWARD) - return 0; - } + return call_handler_def_ret(ip, -EOPNOTSUPP, upd_next_hop, + *nh_obj, &nh_attr); +} - ret = call_handler_def_ret(ip, -EOPNOTSUPP, del_next_hops, - nhops, obj_list); - if (ret >= 0) - ret = call_handler_def_ret(ip, -EOPNOTSUPP, - del_next_hop_group, - nhg_object); +int fal_ip_get_next_hop_group_attrs(fal_object_t nhg_object, + uint32_t attr_count, + struct fal_attribute_t *attr_list) +{ + return call_handler_def_ret( + ip, -EOPNOTSUPP, get_next_hop_group_attrs, nhg_object, + attr_count, attr_list); +} - return ret; +void fal_ip_dump_next_hop_group(fal_object_t nhg_object, json_writer_t *wr) +{ + call_handler(ip, dump_next_hop_group, nhg_object, wr); } -static int fal_ip_new_route(unsigned int vrf_id, - struct fal_ip_address_t *ipaddr, - uint8_t prefixlen, - uint32_t tableid, - uint32_t attr_count, - const struct fal_attribute_t *attr_list) +int fal_ip_get_next_hop_attrs(fal_object_t nh_object, + uint32_t attr_count, + struct fal_attribute_t *attr_list) { return call_handler_def_ret( - ip, -EOPNOTSUPP, new_route, vrf_id, ipaddr, prefixlen, - tableid, attr_count, attr_list); + ip, -EOPNOTSUPP, get_next_hop_attrs, nh_object, + attr_count, attr_list); +} + +void fal_ip_dump_next_hop(fal_object_t nh_object, json_writer_t *wr) +{ + call_handler(ip, dump_next_hop, nh_object, wr); +} + +static int fal_ip_new_route(unsigned int vrf_id, + fal_object_t vrf_obj, + struct fal_ip_address_t *ipaddr, + uint8_t prefixlen, + uint32_t tableid, + uint32_t attr_count, + const struct fal_attribute_t *attr_list) +{ + struct fal_route_entry_t route = { + .vrf_obj = vrf_obj, + .ip_addr = *ipaddr, + .prefix_len = prefixlen, + }; + int ret; + + /* Multiple tables not supported in FAL plugins outside of VRFs */ + if (tableid != RT_TABLE_MAIN) + return -EOPNOTSUPP; + + /* + * If using route handler that takes a VRF object but no VRF + * object created then return an error. + */ + if (vrf_obj == FAL_NULL_OBJECT_ID && + fal_handler && fal_handler->ip && fal_handler->ip->new_route) + return -EINVAL; + + ret = call_handler_def_ret( + ip, -EOPNOTSUPP, new_route, &route, attr_count, attr_list); + if (ret == -EOPNOTSUPP) + ret = call_handler_def_ret( + ip, -EOPNOTSUPP, new_route_depr, vrf_id, ipaddr, + prefixlen, tableid, attr_count, attr_list); + + return ret; } static int fal_ip_upd_route(unsigned int vrf_id, + fal_object_t vrf_obj, struct fal_ip_address_t *ipaddr, uint8_t prefixlen, uint32_t tableid, struct fal_attribute_t *attr) { - return call_handler_def_ret( - ip, -EOPNOTSUPP, upd_route, vrf_id, ipaddr, prefixlen, - tableid, attr); + struct fal_route_entry_t route = { + .vrf_obj = vrf_obj, + .ip_addr = *ipaddr, + .prefix_len = prefixlen, + }; + int ret; + + /* Multiple tables not supported in FAL plugins outside of VRFs */ + if (tableid != RT_TABLE_MAIN) + return -EOPNOTSUPP; + + ret = call_handler_def_ret( + ip, -EOPNOTSUPP, upd_route, &route, attr); + if (ret == -EOPNOTSUPP) + ret = call_handler_def_ret( + ip, -EOPNOTSUPP, upd_route_depr, vrf_id, + ipaddr, prefixlen, tableid, attr); + + return ret; } static int fal_ip_del_route(unsigned int vrf_id, + fal_object_t vrf_obj, struct fal_ip_address_t *ipaddr, uint8_t prefixlen, uint32_t tableid) { - return call_handler_def_ret( - ip, -EOPNOTSUPP, del_route, vrf_id, ipaddr, prefixlen, - tableid); -} + struct fal_route_entry_t route = { + .vrf_obj = vrf_obj, + .ip_addr = *ipaddr, + .prefix_len = prefixlen, + }; + int ret; -static enum fal_packet_action_t -next_hop_group_packet_action(uint32_t nhops, struct next_hop hops[]) -{ - enum fal_packet_action_t action; - uint32_t i; + /* Multiple tables not supported in FAL plugins outside of VRFs */ + if (tableid != RT_TABLE_MAIN) + return -EOPNOTSUPP; - for (i = 0; i < nhops; i++) { - action = next_hop_to_packet_action(&hops[i]); - if (action != FAL_PACKET_ACTION_FORWARD) - return action; - } + ret = call_handler_def_ret( + ip, -EOPNOTSUPP, del_route, &route); + if (ret == -EOPNOTSUPP) + ret = call_handler_def_ret( + ip, -EOPNOTSUPP, del_route_depr, vrf_id, ipaddr, + prefixlen, tableid); - return FAL_PACKET_ACTION_FORWARD; + return ret; } -static enum fal_packet_action_t -next_hop6_group_packet_action(uint32_t nhops, struct next_hop_v6 hops[]) +static int fal_ip_get_route_attrs(unsigned int vrf_id, + fal_object_t vrf_obj, + struct fal_ip_address_t *ipaddr, + uint8_t prefixlen, + uint32_t tableid, + uint32_t attr_count, + const struct fal_attribute_t *attr_list) { - enum fal_packet_action_t action; - uint32_t i; + struct fal_route_entry_t route = { + .vrf_obj = vrf_obj, + .ip_addr = *ipaddr, + .prefix_len = prefixlen, + }; + int ret; - for (i = 0; i < nhops; i++) { - action = next_hop6_to_packet_action(&hops[i]); - if (action != FAL_PACKET_ACTION_FORWARD) - return action; - } + /* Multiple tables not supported in FAL plugins outside of VRFs */ + if (tableid != RT_TABLE_MAIN) + return -EOPNOTSUPP; - return FAL_PACKET_ACTION_FORWARD; + /* + * If using route handler that takes a VRF object but no VRF + * object created then return an error. + */ + if (vrf_obj == FAL_NULL_OBJECT_ID && + fal_handler && fal_handler->ip && fal_handler->ip->get_route_attrs) + return -EINVAL; + + ret = call_handler_def_ret( + ip, -EOPNOTSUPP, get_route_attrs, &route, attr_count, + attr_list); + if (ret == -EOPNOTSUPP) + ret = call_handler_def_ret( + ip, -EOPNOTSUPP, get_route_attrs_depr, vrf_id, + ipaddr, prefixlen, tableid, attr_count, + attr_list); + + return ret; +} + +int fal_ip_walk_routes(fal_plugin_route_walk_fn cb, + uint32_t attr_cnt, + struct fal_attribute_t *attr_list, + void *arg) +{ + return call_handler_def_ret(ip, -EOPNOTSUPP, walk_routes, cb, + attr_cnt, attr_list, arg); } -int fal_ip4_new_route(vrfid_t vrf_id, in_addr_t addr, uint8_t prefixlen, +int fal_ip4_new_route(vrfid_t vrf_id, fal_object_t vrf_obj, + in_addr_t addr, uint8_t prefixlen, uint32_t tableid, struct next_hop hops[], size_t nhops, fal_object_t nhg_object) { @@ -1680,7 +2158,7 @@ int fal_ip4_new_route(vrfid_t vrf_id, in_addr_t addr, uint8_t prefixlen, .addr.ip4 = addr }; enum fal_packet_action_t action = - next_hop_group_packet_action(nhops, hops); + fal_next_hop_group_packet_action(nhops, hops); struct fal_attribute_t attr_list[] = { { FAL_ROUTE_ENTRY_ATTR_PACKET_ACTION, .value.u32 = action }, { FAL_ROUTE_ENTRY_ATTR_NEXT_HOP_GROUP, @@ -1690,13 +2168,14 @@ int fal_ip4_new_route(vrfid_t vrf_id, in_addr_t addr, uint8_t prefixlen, if (!fal_plugins_present()) return 0; - return fal_ip_new_route(__vrf_id, &faddr, prefixlen, tableid, + return fal_ip_new_route(__vrf_id, vrf_obj, &faddr, prefixlen, tableid, RTE_DIM(attr_list), attr_list); } -int fal_ip6_new_route(vrfid_t vrf_id, const struct in6_addr *addr, +int fal_ip6_new_route(vrfid_t vrf_id, fal_object_t vrf_obj, + const struct in6_addr *addr, uint8_t prefixlen, uint32_t tableid, - struct next_hop_v6 hops[], size_t nhops, + struct next_hop hops[], size_t nhops, fal_object_t nhg_object) { uint32_t __vrf_id = vrf_id; @@ -1705,7 +2184,7 @@ int fal_ip6_new_route(vrfid_t vrf_id, const struct in6_addr *addr, .addr.addr6 = *addr }; enum fal_packet_action_t action = - next_hop6_group_packet_action(nhops, hops); + fal_next_hop_group_packet_action(nhops, hops); struct fal_attribute_t attr_list[] = { { FAL_ROUTE_ENTRY_ATTR_NEXT_HOP_GROUP, .value.objid = nhg_object }, @@ -1715,11 +2194,12 @@ int fal_ip6_new_route(vrfid_t vrf_id, const struct in6_addr *addr, if (!fal_plugins_present()) return 0; - return fal_ip_new_route(__vrf_id, &faddr, prefixlen, tableid, + return fal_ip_new_route(__vrf_id, vrf_obj, &faddr, prefixlen, tableid, RTE_DIM(attr_list), attr_list); } -int fal_ip4_upd_route(vrfid_t vrf_id, in_addr_t addr, uint8_t prefixlen, +int fal_ip4_upd_route(vrfid_t vrf_id, fal_object_t vrf_obj, + in_addr_t addr, uint8_t prefixlen, uint32_t tableid, struct next_hop hops[], size_t nhops, fal_object_t nhg_object) { @@ -1729,17 +2209,30 @@ int fal_ip4_upd_route(vrfid_t vrf_id, in_addr_t addr, uint8_t prefixlen, .addr.ip4 = addr }; enum fal_packet_action_t action = - next_hop_group_packet_action(nhops, hops); + fal_next_hop_group_packet_action(nhops, hops); struct fal_attribute_t pa_attr = { FAL_ROUTE_ENTRY_ATTR_PACKET_ACTION, .value.u32 = action }; + int ret = 0; if (!fal_plugins_present()) return 0; - ret = fal_ip_upd_route(__vrf_id, &faddr, prefixlen, + /* + * If this happens then it indicates a bug in the conditions + * evaluated by fal_next_hop_group_packet_action, or not + * having created the next-hop-group object following a change + * in state. + */ + if (action == FAL_PACKET_ACTION_FORWARD && + nhg_object == FAL_NULL_OBJECT_ID) { + RTE_LOG(ERR, ROUTE, "Missing next-hop-group object for route with action of forward\n"); + return -EINVAL; + } + + ret = fal_ip_upd_route(__vrf_id, vrf_obj, &faddr, prefixlen, tableid, &pa_attr); if (!ret && action == FAL_PACKET_ACTION_FORWARD) { @@ -1747,16 +2240,17 @@ int fal_ip4_upd_route(vrfid_t vrf_id, in_addr_t addr, uint8_t prefixlen, FAL_ROUTE_ENTRY_ATTR_NEXT_HOP_GROUP, .value.objid = nhg_object }; - ret = fal_ip_upd_route(__vrf_id, &faddr, prefixlen, + ret = fal_ip_upd_route(__vrf_id, vrf_obj, &faddr, prefixlen, tableid, &fnhg_attr); } return ret; } -int fal_ip6_upd_route(vrfid_t vrf_id, const struct in6_addr *addr, +int fal_ip6_upd_route(vrfid_t vrf_id, fal_object_t vrf_obj, + const struct in6_addr *addr, uint8_t prefixlen, uint32_t tableid, - struct next_hop_v6 hops[], size_t nhops, + struct next_hop hops[], size_t nhops, fal_object_t nhg_object) { uint32_t __vrf_id = vrf_id; @@ -1766,7 +2260,7 @@ int fal_ip6_upd_route(vrfid_t vrf_id, const struct in6_addr *addr, }; int ret = 0; enum fal_packet_action_t action = - next_hop6_group_packet_action(nhops, hops); + fal_next_hop_group_packet_action(nhops, hops); struct fal_attribute_t pa_attr = { FAL_ROUTE_ENTRY_ATTR_PACKET_ACTION, .value.u32 = action @@ -1775,7 +2269,19 @@ int fal_ip6_upd_route(vrfid_t vrf_id, const struct in6_addr *addr, if (!fal_plugins_present()) return 0; - ret = fal_ip_upd_route(__vrf_id, &faddr, prefixlen, + /* + * If this happens then it indicates a bug in the conditions + * evaluated by fal_next_hop_group_packet_action, or not + * having created the next-hop-group object following a change + * in state. + */ + if (action == FAL_PACKET_ACTION_FORWARD && + nhg_object == FAL_NULL_OBJECT_ID) { + RTE_LOG(ERR, ROUTE, "Missing next-hop-group object for route with action of forward\n"); + return -EINVAL; + } + + ret = fal_ip_upd_route(__vrf_id, vrf_obj, &faddr, prefixlen, tableid, &pa_attr); if (!ret && action == FAL_PACKET_ACTION_FORWARD) { @@ -1783,14 +2289,15 @@ int fal_ip6_upd_route(vrfid_t vrf_id, const struct in6_addr *addr, FAL_ROUTE_ENTRY_ATTR_NEXT_HOP_GROUP, .value.objid = nhg_object }; - ret = fal_ip_upd_route(__vrf_id, &faddr, prefixlen, + ret = fal_ip_upd_route(__vrf_id, vrf_obj, &faddr, prefixlen, tableid, &fnhg_attr); } return ret; } -int fal_ip4_del_route(vrfid_t vrf_id, in_addr_t addr, uint8_t prefixlen, +int fal_ip4_del_route(vrfid_t vrf_id, fal_object_t vrf_obj, + in_addr_t addr, uint8_t prefixlen, uint32_t tableid) { uint32_t __vrf_id = vrf_id; @@ -1802,10 +2309,11 @@ int fal_ip4_del_route(vrfid_t vrf_id, in_addr_t addr, uint8_t prefixlen, if (!fal_plugins_present()) return 0; - return fal_ip_del_route(__vrf_id, &faddr, prefixlen, tableid); + return fal_ip_del_route(__vrf_id, vrf_obj, &faddr, prefixlen, tableid); } -int fal_ip6_del_route(vrfid_t vrf_id, const struct in6_addr *addr, +int fal_ip6_del_route(vrfid_t vrf_id, fal_object_t vrf_obj, + const struct in6_addr *addr, uint8_t prefixlen, uint32_t tableid) { uint32_t __vrf_id = vrf_id; @@ -1817,7 +2325,38 @@ int fal_ip6_del_route(vrfid_t vrf_id, const struct in6_addr *addr, if (!fal_plugins_present()) return 0; - return fal_ip_del_route(__vrf_id, &faddr, prefixlen, tableid); + return fal_ip_del_route(__vrf_id, vrf_obj, &faddr, prefixlen, tableid); +} + +int fal_ip4_get_route_attrs(vrfid_t vrf_id, fal_object_t vrf_obj, + in_addr_t addr, uint8_t prefixlen, + uint32_t tableid, uint32_t attr_count, + const struct fal_attribute_t *attr_list) +{ + uint32_t __vrf_id = vrf_id; + struct fal_ip_address_t faddr = { + .addr_family = FAL_IP_ADDR_FAMILY_IPV4, + .addr.ip4 = addr + }; + + return fal_ip_get_route_attrs(__vrf_id, vrf_obj, &faddr, prefixlen, + tableid, attr_count, attr_list); +} + +int fal_ip6_get_route_attrs(vrfid_t vrf_id, fal_object_t vrf_obj, + const struct in6_addr *addr, + uint8_t prefixlen, uint32_t tableid, + uint32_t attr_count, + const struct fal_attribute_t *attr_list) +{ + uint32_t __vrf_id = vrf_id; + struct fal_ip_address_t faddr = { + .addr_family = FAL_IP_ADDR_FAMILY_IPV6, + .addr.addr6 = *addr + }; + + return fal_ip_get_route_attrs(__vrf_id, vrf_obj, &faddr, prefixlen, + tableid, attr_count, attr_list); } /* IP Multicast operations */ @@ -2093,7 +2632,7 @@ int fal_create_ipmc_rpf_group(uint32_t *ifindex_list, uint32_t num_int, rpf_attr[0].value.objid = *rpf_group_id; for (i = 0; i < num_int; i++) { rpf_attr[1].id = FAL_RPF_GROUP_MEMBER_ATTR_RPF_INTERFACE_ID; - ifp = ifnet_byifindex(ifindex_list[i]); + ifp = dp_ifnet_byifindex(ifindex_list[i]); if (!ifp || !ifp->fal_l3) { DP_DEBUG(MULTICAST, ERR, MCAST, "FAL failed to create RPF member bad ifp %s.\n", @@ -2151,7 +2690,7 @@ static int fal_ip4_iterate_ipmc_olist(unsigned char count, struct cds_lfht_iter iter; cds_lfht_for_each_entry(iftable, &iter, vifp, node) { - if (IF_ISSET(vifp->v_if_index, mfcc_ifset)) { + if (IF_ISSET(vifp->v_vif_index, mfcc_ifset)) { if (i >= count) { DP_DEBUG(MULTICAST, ERR, MCAST, "FAL Too many IPMC members %d(%d).\n", @@ -2200,7 +2739,7 @@ static int fal_ip6_iterate_ipmc_olist(unsigned char count, struct cds_lfht_iter iter; cds_lfht_for_each_entry(iftable, &iter, mifp, node) { - if (IF_ISSET(mifp->m6_if_index, mfc_ifset)) { + if (IF_ISSET(mifp->m6_mif_index, mfc_ifset)) { if (i >= count) { DP_DEBUG(MULTICAST, ERR, MCAST, "FAL Too many IPMC members %d(%d).\n", @@ -2417,7 +2956,7 @@ int fal_ip4_upd_mroute(fal_object_t obj, struct mfc *rt, struct vmfcctl *mfc, /* check what changed */ if (memcmp(&rt->mfc_ifset, &mfc->mfcc_ifset, - sizeof(mfc->mfcc_ifset))) { + sizeof(mfc->mfcc_ifset)) != 0) { /* Output list change - do this first before RPF change */ ret = fal_create_ipmc_group(mfc->if_count, &mfc->mfcc_ifset, @@ -2638,7 +3177,7 @@ int fal_ip6_upd_mroute(fal_object_t obj, struct mf6c *rt, struct vmf6cctl *mfc, /* check what changed */ if (memcmp(&rt->mf6c_ifset, &mfc->mf6cc_ifset, - sizeof(mfc->mf6cc_ifset))) { + sizeof(mfc->mf6cc_ifset)) != 0) { /* Output list change - do this first before RPF change */ ret = fal_create_ipmc_group(mfc->if_count, &mfc->mf6cc_ifset, @@ -2860,7 +3399,7 @@ int str_to_fal_ip_address_t(char *str, struct fal_ip_address_t *ipaddr) * * Returns 1 on success and 0 on failure. */ -const char *fal_ip_address_t_to_str(struct fal_ip_address_t *ipaddr, +const char *fal_ip_address_t_to_str(const struct fal_ip_address_t *ipaddr, char *dst, socklen_t size) { if (ipaddr->addr_family == FAL_IP_ADDR_FAMILY_IPV4) @@ -2872,6 +3411,27 @@ const char *fal_ip_address_t_to_str(struct fal_ip_address_t *ipaddr, return NULL; } +bool fal_is_ipaddr_empty(const struct fal_ip_address_t *ipaddr) +{ + struct fal_ip_address_t empty_ipaddr = { 0 }; + + return memcmp(ipaddr, &empty_ipaddr, sizeof(empty_ipaddr)) == 0; +} + +enum fal_ip_addr_family_t addr_family_to_fal_ip_addr_family(int family) +{ + switch (family) { + case AF_INET: + return FAL_IP_ADDR_FAMILY_IPV4; + case AF_INET6: + return FAL_IP_ADDR_FAMILY_IPV6; + default: + RTE_LOG(ERR, DATAPLANE, "Invalid address family %d\n", + family); + return -1; + } +} + /* QoS functions */ int fal_qos_new_queue(fal_object_t switch_id, uint32_t attr_count, const struct fal_attribute_t *attr_list, @@ -3038,13 +3598,28 @@ void fal_qos_dump_sched_group(fal_object_t sg, json_writer_t *wr) call_handler(qos, dump_sched_group, sg, wr); } -int __externally_visible +int fal_qos_get_counters(const uint32_t *cntr_ids, + uint32_t num_cntrs, + uint64_t *cntrs) +{ + return call_handler_def_ret(qos, -EOPNOTSUPP, get_counters, + cntr_ids, num_cntrs, cntrs); +} + +void fal_qos_dump_buf_errors(json_writer_t *wr) +{ + call_handler(qos, dump_buf_errors, wr); +} + +__FOR_EXPORT +int fal_attach_device(const char *devargs) { return attach_device(devargs); } -int __externally_visible +__FOR_EXPORT +int fal_detach_device(const char *device) { return detach_device(device); @@ -3052,31 +3627,32 @@ fal_detach_device(const char *device) int fal_mirror_session_create(uint32_t attr_count, const struct fal_attribute_t *attr_list, - fal_object_t *obj) + fal_object_t *mr_obj_id) { return call_handler_def_ret(mirror, -EOPNOTSUPP, session_create, - attr_count, attr_list, obj); + attr_count, attr_list, mr_obj_id); } -int fal_mirror_session_delete(fal_object_t obj) +int fal_mirror_session_delete(fal_object_t mr_obj_id) { - return call_handler_def_ret(mirror, -EOPNOTSUPP, session_delete, obj); + return call_handler_def_ret(mirror, -EOPNOTSUPP, session_delete, + mr_obj_id); } -int fal_mirror_session_set_attr(fal_object_t obj, +int fal_mirror_session_set_attr(fal_object_t mr_obj_id, const struct fal_attribute_t *attr) { - return call_handler_def_ret(mirror, -EOPNOTSUPP, session_set_attr, obj, - attr); + return call_handler_def_ret(mirror, -EOPNOTSUPP, session_set_attr, + mr_obj_id, attr); } -int fal_mirror_session_get_attr(fal_object_t obj, uint32_t attr_count, +int fal_mirror_session_get_attr(fal_object_t mr_obj_id, uint32_t attr_count, struct fal_attribute_t *attr_list) { - return call_handler_def_ret(mirror, -EOPNOTSUPP, session_get_attr, obj, - attr_count, attr_list); + return call_handler_def_ret(mirror, -EOPNOTSUPP, session_get_attr, + mr_obj_id, attr_count, attr_list); } int fal_vlan_feature_create(uint32_t attr_count, @@ -3100,6 +3676,15 @@ int fal_vlan_feature_set_attr(fal_object_t obj, vlan_feature_set_attr, obj, attr); } +int fal_vlan_feature_get_attr(fal_object_t obj, + uint32_t attr_count, + struct fal_attribute_t *attr_list) +{ + return call_handler_def_ret(vlan_feat, -EOPNOTSUPP, + vlan_feature_get_attr, obj, attr_count, + attr_list); +} + int fal_backplane_bind(unsigned int bp_ifindex, unsigned int ifindex) { return call_handler_def_ret(backplane, -EOPNOTSUPP, backplane_bind, @@ -3131,7 +3716,8 @@ int fal_get_cpp_limiter_attribute(fal_object_t limiter_id, uint32_t attr_count, attr_list); } -void fal_attr_set_ip_addr(struct fal_attribute_t *attr, struct ip_addr *ip) +void fal_attr_set_ip_addr(struct fal_attribute_t *attr, + const struct ip_addr *ip) { switch (ip->type) { case AF_INET: @@ -3140,8 +3726,16 @@ void fal_attr_set_ip_addr(struct fal_attribute_t *attr, struct ip_addr *ip) break; case AF_INET6: - attr->value.ipaddr.addr_family = FAL_IP_ADDR_FAMILY_IPV6; - attr->value.ipaddr.addr.addr6 = ip->address.ip_v6; + if (IN6_IS_ADDR_V4MAPPED(&ip->address.ip_v6)) { + attr->value.ipaddr.addr_family = + FAL_IP_ADDR_FAMILY_IPV4; + attr->value.ipaddr.addr.addr4.s_addr = + V4MAPPED_IPV6_TO_IPV4(ip->address.ip_v6); + } else { + attr->value.ipaddr.addr_family = + FAL_IP_ADDR_FAMILY_IPV6; + attr->value.ipaddr.addr.addr6 = ip->address.ip_v6; + } break; } } @@ -3305,3 +3899,144 @@ int fal_acl_get_counter_attr(fal_object_t counter_id, } /* End of ACL functions */ + +int fal_capture_create(uint32_t attr_count, + const struct fal_attribute_t *attr_list, + fal_object_t *obj) +{ + return call_handler_def_ret(capture, -EOPNOTSUPP, + create, attr_count, + attr_list, obj); +} + +void fal_capture_delete(fal_object_t obj) +{ + call_handler(capture, delete, obj); +} + +int fal_capture_get_stats(fal_object_t obj, uint32_t num_counters, + const enum fal_capture_stat_type *cntr_ids, + uint64_t *stats) +{ + return call_handler_def_ret(capture, -EOPNOTSUPP, get_stats, + obj, num_counters, cntr_ids, stats); +} + +/* Start of BFD functions */ + +int dp_fal_bfd_create_session(fal_object_t *bfd_session_id, + uint32_t attr_count, const struct fal_attribute_t *attr_list) +{ + return call_handler_def_ret(bfd, -EOPNOTSUPP, create_session, + bfd_session_id, attr_count, attr_list); +} + +int dp_fal_bfd_delete_session(fal_object_t bfd_session_id) +{ + return call_handler_def_ret(bfd, -EOPNOTSUPP, delete_session, + bfd_session_id); +} + +int dp_fal_bfd_set_session_attribute(fal_object_t bfd_session_id, + uint32_t attr_count, const struct fal_attribute_t *attr_list) +{ + return call_handler_def_ret(bfd, -EOPNOTSUPP, set_session_attr, + bfd_session_id, attr_count, attr_list); +} + +int dp_fal_bfd_get_session_attribute(fal_object_t bfd_session_id, + uint32_t attr_count, struct fal_attribute_t *attr_list) +{ + return call_handler_def_ret(bfd, -EOPNOTSUPP, get_session_attr, + bfd_session_id, attr_count, attr_list); +} + +int dp_fal_bfd_get_session_stats(fal_object_t bfd_session_id, + uint32_t num_of_counters, + const enum fal_bfd_session_stat_t *counter_ids, + uint64_t *counters) +{ + return call_handler_def_ret(bfd, -EOPNOTSUPP, get_session_stats, + bfd_session_id, num_of_counters, + counter_ids, counters); +} + +int dp_fal_bfd_get_switch_attrs(uint32_t attr_count, + struct fal_attribute_t *attr_list) +{ + return fal_get_switch_attrs(attr_count, attr_list); +} + +int dp_fal_bfd_set_switch_attr(const struct fal_attribute_t *attr) +{ + return fal_set_switch_attr(attr); +} + +int dp_fal_bfd_dump_session(fal_object_t bfd_session_id, + json_writer_t *wr) +{ + return call_handler_def_ret(bfd, -EOPNOTSUPP, dump_session, + bfd_session_id, wr); +} + +/* End of BFD functions */ + +int fal_create_mpls_route(const struct fal_mpls_route_t *mpls_route, + uint32_t attr_count, + const struct fal_attribute_t *attr_list) +{ + return call_handler_def_ret( + mpls, -EOPNOTSUPP, create_route, mpls_route, + attr_count, attr_list); +} + +int fal_delete_mpls_route(const struct fal_mpls_route_t *mpls_route) +{ + return call_handler_def_ret( + mpls, -EOPNOTSUPP, delete_route, mpls_route); +} + +int fal_set_mpls_route_attr(const struct fal_mpls_route_t *mpls_route, + const struct fal_attribute_t *attr) +{ + return call_handler_def_ret( + mpls, -EOPNOTSUPP, set_route_attr, mpls_route, + attr); +} + +int fal_get_mpls_route_attr(const struct fal_mpls_route_t *mpls_route, + uint32_t attr_count, + struct fal_attribute_t *attr_list) +{ + return call_handler_def_ret( + mpls, -EOPNOTSUPP, get_route_attr, mpls_route, + attr_count, attr_list); +} + +int fal_vrf_create(uint32_t attr_count, + const struct fal_attribute_t *attr_list, + fal_object_t *obj) +{ + return call_handler_def_ret(vrf, -EOPNOTSUPP, create, + attr_count, attr_list, obj); +} + +int fal_vrf_delete(fal_object_t obj) +{ + return call_handler_def_ret(vrf, -EOPNOTSUPP, delete, obj); +} + +int fal_set_vrf_attr(fal_object_t obj, + const struct fal_attribute_t *attr) +{ + return call_handler_def_ret(vrf, -EOPNOTSUPP, set_attr, obj, + attr); +} + +int fal_get_vrf_attr(fal_object_t obj, + uint32_t attr_count, + struct fal_attribute_t *attr_list) +{ + return call_handler_def_ret(vrf, -EOPNOTSUPP, get_attr, obj, + attr_count, attr_list); +} diff --git a/src/fal.h b/src/fal.h index f38e5a0e..c291f839 100644 --- a/src/fal.h +++ b/src/fal.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. + * Copyright (c) 2017-2020, AT&T Intellectual Property. * All rights reserved. * Copyright (c) 2016-2017 by Brocade Communications Systems, Inc. * All rights reserved. @@ -24,14 +24,13 @@ #include "route.h" #include "netinet/ip_mroute.h" #include "util.h" -#include "vrf.h" +#include "vrf_internal.h" -struct ether_addr; +struct rte_ether_addr; struct fal_attribute_t; struct fal_ip_address_t; struct if_addr; struct next_hop; -struct next_hop_v6; struct fal_ipmc_entry_t; /* @@ -45,25 +44,30 @@ struct fal_ipmc_entry_t; * or set to NULL. */ struct message_handler { - struct l2_ops *l2; - struct rif_ops *rif; - struct tun_ops *tun; - struct bridge_ops *bridge; - struct vlan_ops *vlan; - struct stp_ops *stp; - struct ip_ops *ip; - struct ipmc_ops *ipmc; - struct acl_ops *acl; - struct qos_ops *qos; - struct lacp_ops *lacp; - struct sys_ops *sys; - struct policer_ops *policer; - struct sw_ops *sw; - struct mirror_ops *mirror; - struct vlan_feat_ops *vlan_feat; - struct backplane_ops *backplane; - struct cpp_rl_ops *cpp_rl; - struct ptp_ops *ptp; + struct fal_l2_ops *l2; + struct fal_rif_ops *rif; + struct fal_tun_ops *tun; + struct fal_lag_ops *lag; + struct fal_bridge_ops *bridge; + struct fal_vlan_ops *vlan; + struct fal_stp_ops *stp; + struct fal_ip_ops *ip; + struct fal_ipmc_ops *ipmc; + struct fal_acl_ops *acl; + struct fal_qos_ops *qos; + struct fal_lacp_ops *lacp; + struct fal_sys_ops *sys; + struct fal_policer_ops *policer; + struct fal_sw_ops *sw; + struct fal_mirror_ops *mirror; + struct fal_vlan_feat_ops *vlan_feat; + struct fal_backplane_ops *backplane; + struct fal_cpp_rl_ops *cpp_rl; + struct fal_ptp_ops *ptp; + struct fal_capture_ops *capture; + struct fal_bfd_ops *bfd; + struct fal_mpls_ops *mpls; + struct fal_vrf_ops *vrf; LIST_ENTRY(message_handler) link; }; @@ -72,16 +76,17 @@ struct message_handler { * l2_ops provide an interface for a receiver 'recv' to work with the data * parsed from AF_UNSPEC netlink messages. */ -struct l2_ops { +struct fal_l2_ops { void (*new_port)(unsigned int if_index, uint32_t attr_count, const struct fal_attribute_t *attr_list); int (*get_attrs)(unsigned int if_index, uint32_t attr_count, struct fal_attribute_t *attr_list); - void (*upd_port)(unsigned int if_index, - struct fal_attribute_t *attr); + int (*upd_port)(unsigned int if_index, + struct fal_attribute_t *attr); void (*del_port)(unsigned int if_index); + void (*dump_port)(unsigned int if_index, json_writer_t *wr); void (*new_addr)(unsigned int if_index, const void *addr, uint32_t attr_count, @@ -96,20 +101,26 @@ struct l2_ops { /* * rif_ops provides an interface for controlling (l3) router intf */ -struct rif_ops { +struct fal_rif_ops { int (*create_intf)(uint32_t attr_count, const struct fal_attribute_t *attr, fal_object_t *obj); int (*delete_intf)(fal_object_t obj); int (*set_attr)(fal_object_t obj, const struct fal_attribute_t *attr); + int (*get_stats)(fal_object_t obj, uint32_t cntr_count, + const enum fal_router_interface_stat_t *cntr_ids, + uint64_t *cntrs); + int (*get_attr)(fal_object_t obj, uint32_t attr_count, + struct fal_attribute_t *attr_list); + void (*dump)(fal_object_t obj, json_writer_t *wr); }; /* * tun_ops provides an interface for controlling tunnel initiator * and terminator */ -struct tun_ops { +struct fal_tun_ops { int (*create_tun)(uint32_t attr_count, const struct fal_attribute_t *attr, fal_object_t *obj); @@ -118,7 +129,32 @@ struct tun_ops { const struct fal_attribute_t *attr); }; -struct stp_ops { +/* + * lag_ops provides an interface for controlling LAG + */ +struct fal_lag_ops { + int (*create_lag)(uint32_t attr_count, + const struct fal_attribute_t *attr, + fal_object_t *obj); + int (*delete_lag)(fal_object_t obj); + int (*set_lag_attr)(fal_object_t obj, uint32_t attr_count, + const struct fal_attribute_t *attr); + int (*get_lag_attr)(fal_object_t obj, + uint32_t attr_count, + struct fal_attribute_t *attr_list); + void (*dump)(fal_object_t obj, json_writer_t *wr); + int (*create_lag_member)(uint32_t attr_count, + const struct fal_attribute_t *attr, + fal_object_t *obj); + int (*delete_lag_member)(fal_object_t obj); + int (*set_lag_member_attr)(fal_object_t obj, + const struct fal_attribute_t *attr); + int (*get_lag_member_attr)(fal_object_t obj, + uint32_t attr_count, + struct fal_attribute_t *attr_list); +}; + +struct fal_stp_ops { int (*create)(unsigned int bridge_ifindex, uint32_t attr_count, const struct fal_attribute_t *attr_list, fal_object_t *obj); @@ -140,7 +176,7 @@ struct stp_ops { * bridge_ops provide the ability for a receiver 'hdlr' to work with data * parsed from AF_BRIDGE netlink messages. */ -struct bridge_ops { +struct fal_bridge_ops { void (*new_port)(unsigned int bridge_ifindex, unsigned int child_ifindex, uint32_t attr_count, @@ -151,26 +187,26 @@ struct bridge_ops { unsigned int child_ifindex); void (*new_neigh)(unsigned int child_ifindex, uint16_t vlanid, - const struct ether_addr *dst, + const struct rte_ether_addr *dst, uint32_t attr_count, const struct fal_attribute_t *attr_list); void (*upd_neigh)(unsigned int child_ifindex, uint16_t vlanid, - const struct ether_addr *dst, + const struct rte_ether_addr *dst, struct fal_attribute_t *attr); void (*del_neigh)(unsigned int child_ifindex, uint16_t vlanid, - const struct ether_addr *dst); + const struct rte_ether_addr *dst); void (*flush_neigh)(unsigned int bridge_ifindex, uint32_t attr_count, const struct fal_attribute_t *attr_list); int (*walk_neigh)(unsigned int bridge_ifindex, uint16_t vlanid, - const struct ether_addr *dst, + const struct rte_ether_addr *dst, unsigned int child_ifindex, fal_br_walk_neigh_fn cb, void *arg); }; -struct vlan_ops { +struct fal_vlan_ops { int (*get_stats)(uint16_t vlan, uint32_t num_cntrs, const enum fal_vlan_stat_type *cntr_ids, uint64_t *cntrs); @@ -182,7 +218,7 @@ struct vlan_ops { * ip_ops provide the ability for a receiver 'hdlr' to work with data * parsed from AF_INET netlink messages. */ -struct ip_ops { +struct fal_ip_ops { void (*new_addr)(unsigned int if_index, struct fal_ip_address_t *ipaddr, uint8_t prefixlen, @@ -195,40 +231,78 @@ struct ip_ops { void (*del_addr)(unsigned int if_index, struct fal_ip_address_t *ipaddr, uint8_t prefixlen); - int (*new_neigh)(unsigned int if_index, - struct fal_ip_address_t *ipaddr, + int (*new_neigh)(const struct fal_neighbor_entry_t *neigh_entry, uint32_t attr_count, const struct fal_attribute_t *attr_list); - int (*upd_neigh)(unsigned int if_index, - struct fal_ip_address_t *ipaddr, - struct fal_attribute_t *attr); - int (*get_neigh_attrs)(unsigned int if_index, - struct fal_ip_address_t *ipaddr, + int (*upd_neigh)(const struct fal_neighbor_entry_t *neigh_entry, + const struct fal_attribute_t *attr); + int (*get_neigh_attrs)(const struct fal_neighbor_entry_t *neigh_entry, uint32_t attr_count, const struct fal_attribute_t *attr_list); - int (*del_neigh)(unsigned int if_index, - struct fal_ip_address_t *ipaddr); - int (*new_route)(uint32_t vrf_id, - struct fal_ip_address_t *ipaddr, - uint8_t prefixlen, - uint32_t tableid, + int (*del_neigh)(const struct fal_neighbor_entry_t *neigh_entry); + void (*dump_neigh)(const struct fal_neighbor_entry_t *neigh_entry, + json_writer_t *wr); + int (*new_neigh_depr)(unsigned int if_index, + struct fal_ip_address_t *ipaddr, + uint32_t attr_count, + const struct fal_attribute_t *attr_list); + int (*upd_neigh_depr)(unsigned int if_index, + struct fal_ip_address_t *ipaddr, + struct fal_attribute_t *attr); + int (*get_neigh_attrs_depr)(unsigned int if_index, + struct fal_ip_address_t *ipaddr, + uint32_t attr_count, + const struct fal_attribute_t *attr_list); + int (*del_neigh_depr)(unsigned int if_index, + struct fal_ip_address_t *ipaddr); + void (*dump_neigh_depr)(unsigned int if_index, + struct fal_ip_address_t *ipaddr, + json_writer_t *wr); + int (*new_route)(const struct fal_route_entry_t *route, uint32_t attr_count, const struct fal_attribute_t *attr_list); - int (*upd_route)(uint32_t vrf_id, - struct fal_ip_address_t *ipaddr, - uint8_t prefixlen, - uint32_t tableid, + int (*upd_route)(const struct fal_route_entry_t *route, struct fal_attribute_t *attr); - int (*del_route)(uint32_t vrf_id, - struct fal_ip_address_t *ipaddr, - uint8_t prefixlen, - uint32_t tableid); + int (*del_route)(const struct fal_route_entry_t *route); + int (*get_route_attrs)(const struct fal_route_entry_t *route, + uint32_t attr_count, + const struct fal_attribute_t *attr_list); + int (*new_route_depr)(uint32_t vrf_id, + struct fal_ip_address_t *ipaddr, + uint8_t prefixlen, + uint32_t tableid, + uint32_t attr_count, + const struct fal_attribute_t *attr_list); + int (*upd_route_depr)(uint32_t vrf_id, + struct fal_ip_address_t *ipaddr, + uint8_t prefixlen, + uint32_t tableid, + struct fal_attribute_t *attr); + int (*del_route_depr)(uint32_t vrf_id, + struct fal_ip_address_t *ipaddr, + uint8_t prefixlen, + uint32_t tableid); + int (*get_route_attrs_depr)(uint32_t vrf_id, + struct fal_ip_address_t *ipaddr, + uint8_t prefixlen, + uint32_t tableid, + uint32_t attr_count, + const struct fal_attribute_t *attr_list); + int (*walk_routes)(fal_plugin_route_walk_fn cb, + uint32_t attr_count, + const struct fal_attribute_t *attr_list, + void *arg); int (*new_next_hop_group)(uint32_t attr_count, const struct fal_attribute_t *attr_list, fal_object_t *obj); int (*upd_next_hop_group)(fal_object_t obj, const struct fal_attribute_t *attr); int (*del_next_hop_group)(fal_object_t obj); + int (*get_next_hop_group_attrs)( + fal_object_t obj, + uint32_t attr_count, + const struct fal_attribute_t *attr_list); + void (*dump_next_hop_group)(fal_object_t obj, json_writer_t *wr); int (*new_next_hops)(uint32_t nh_count, const uint32_t *attr_count, const struct fal_attribute_t **attr_list, @@ -237,9 +311,13 @@ struct ip_ops { const struct fal_attribute_t *attr); int (*del_next_hops)(uint32_t nh_count, const fal_object_t *obj_list); + int (*get_next_hop_attrs)(fal_object_t obj, + uint32_t attr_count, + const struct fal_attribute_t *attr_list); + void (*dump_next_hop)(fal_object_t obj, json_writer_t *wr); }; -struct acl_ops { +struct fal_acl_ops { /* A "table" corresponds to a named "group" */ int (*create_table)(uint32_t attr_count, const struct fal_attribute_t *attr, @@ -282,7 +360,7 @@ struct acl_ops { * ipmc_ops provide the ability for a receiver 'hdlr' to work with data * parsed from AF_INET multicast netlink messages. */ -struct ipmc_ops { +struct fal_ipmc_ops { int (*create_entry)(const struct fal_ipmc_entry_t *ipmc_entry, uint32_t attr_count, const struct fal_attribute_t *attr_list, @@ -341,7 +419,7 @@ struct ipmc_ops { }; /* qos_ops provide ability handle vyatta-dataplane QoS configuration commands */ -struct qos_ops { +struct fal_qos_ops { /* QoS queue object functions */ int (*new_queue)(fal_object_t switch_id, uint32_t attr_count, @@ -408,16 +486,20 @@ struct qos_ops { const struct fal_attribute_t *attr); int (*get_wred_attrs)(fal_object_t wred_id, uint32_t attr_count, struct fal_attribute_t *attr_list); + int (*get_counters)(const uint32_t *cntr_ids, + uint32_t num_cntrs, + uint64_t *cntrs); + void (*dump_buf_errors)(json_writer_t *wr); }; -struct sw_ops { +struct fal_sw_ops { int (*set_attribute)(const struct fal_attribute_t *attr); int (*get_attribute)(uint32_t attr_count, struct fal_attribute_t *attr_list); }; /* sys_ops provide ability to handle system level events */ -struct sys_ops { +struct fal_sys_ops { void (*cleanup)(void); void (*command)(FILE *f, int argc, char **argv); int (*command_ret)(FILE *f, int argc, char **argv); @@ -427,7 +509,7 @@ struct sys_ops { * policer ops are used for setting up storm control and * other traffic policing operations. */ -struct policer_ops { +struct fal_policer_ops { /* The policer APIs follow SAI approach */ int (*create)(uint32_t attr_count, const struct fal_attribute_t *attr_list, @@ -453,7 +535,7 @@ struct policer_ops { * Portmirror/portmonitor operations used for setting,updating and * deleting portmonitor session */ -struct mirror_ops { +struct fal_mirror_ops { int (*session_create)(uint32_t attr_count, const struct fal_attribute_t *attr_list, fal_object_t *mr_obj_id); @@ -469,16 +551,19 @@ struct mirror_ops { * Vlan_feature operations user for setting, updating and creating a vlan * feature. */ -struct vlan_feat_ops { +struct fal_vlan_feat_ops { int (*vlan_feature_create)(uint32_t attr_count, const struct fal_attribute_t *attr_list, fal_object_t *fal_obj_id); int (*vlan_feature_delete)(fal_object_t fal_obj_id); int (*vlan_feature_set_attr)(fal_object_t fal_obj_id, const struct fal_attribute_t *attr); + int (*vlan_feature_get_attr)(fal_object_t fal_obj_id, + uint32_t attr_count, + struct fal_attribute_t *attr_list); }; -struct backplane_ops { +struct fal_backplane_ops { int (*backplane_bind)(unsigned int bp_ifindex, unsigned int ifindex); void (*backplane_dump)(unsigned int bp_ifindex, json_writer_t *wr); }; @@ -487,7 +572,7 @@ struct backplane_ops { * cpp_rl_ops are used for setting up control plane policing rate limiter * operations */ -struct cpp_rl_ops { +struct fal_cpp_rl_ops { /* CPP rate limiter object functions */ int (*create)(uint32_t attr_count, const struct fal_attribute_t *attr_list, @@ -497,7 +582,7 @@ struct cpp_rl_ops { struct fal_attribute_t *attr_list); }; -struct ptp_ops { +struct fal_ptp_ops { int (*create_ptp_clock)(uint32_t attr_count, const struct fal_attribute_t *attr_list, fal_object_t *clock_obj); @@ -514,20 +599,84 @@ struct ptp_ops { int (*delete_ptp_peer)(fal_object_t peer_obj); }; +struct fal_capture_ops { + int (*create)(uint32_t attr_count, + const struct fal_attribute_t *attr_list, + fal_object_t *obj); + void (*delete)(fal_object_t obj); + int (*get_stats)(fal_object_t obj, + uint32_t num_counters, + const enum fal_capture_stat_type *cntr_ids, + uint64_t *stats); +}; + +struct fal_mpls_ops { + int (*create_route)(const struct fal_mpls_route_t *mpls_route, + uint32_t attr_count, + const struct fal_attribute_t *attr_list); + int (*delete_route)(const struct fal_mpls_route_t *mpls_route); + int (*set_route_attr)(const struct fal_mpls_route_t *mpls_route, + const struct fal_attribute_t *attr); + int (*get_route_attr)(const struct fal_mpls_route_t *mpls_route, + uint32_t attr_count, + struct fal_attribute_t *attr_list); +}; + +struct fal_vrf_ops { + int (*create)(uint32_t attr_count, + const struct fal_attribute_t *attr_list, + fal_object_t *obj); + int (*delete)(fal_object_t obj); + int (*set_attr)(fal_object_t obj, + const struct fal_attribute_t *attr); + int (*get_attr)(fal_object_t obj, + uint32_t attr_count, + struct fal_attribute_t *attr_list); +}; + +enum fal_rc { + /* All good */ + FAL_RC_SUCCESS = 0, + /* Object not required in FAL plugin */ + FAL_RC_NOT_REQ = 1, +}; + +struct fal_bfd_ops { + int (*create_session)(fal_object_t *bfd_session_id, + uint32_t attr_count, + const struct fal_attribute_t *attr_list); + int (*delete_session)(fal_object_t bfd_session_id); + int (*set_session_attr)(fal_object_t bfd_session_id, + uint32_t attr_count, + const struct fal_attribute_t *attr_list); + int (*get_session_attr)(fal_object_t bfd_session_id, + uint32_t attr_count, + const struct fal_attribute_t *attr_list); + int (*get_session_stats)(fal_object_t bfd_session_id, + uint32_t num_counters, + const enum fal_bfd_session_stat_t *counter_ids, + uint64_t *counters); + int (*dump_session)(fal_object_t bfd_session_id, + json_writer_t *wr); +}; + void fal_init(void); void fal_init_plugins(void); void fal_cleanup(void); int cmd_fal(FILE *f, int argc, char **argv); bool fal_plugins_present(void); int str_to_fal_ip_address_t(char *str, struct fal_ip_address_t *ipaddr); -const char *fal_ip_address_t_to_str(struct fal_ip_address_t *ipaddr, +const char *fal_ip_address_t_to_str(const struct fal_ip_address_t *ipaddr, char *dst, socklen_t size); +bool fal_is_ipaddr_empty(const struct fal_ip_address_t *ipaddr); +enum fal_ip_addr_family_t addr_family_to_fal_ip_addr_family(int family); void fal_register_message_handler(struct message_handler *handler); void fal_delete_message_handler(struct message_handler *handler); /* Set the ip addr into the given attr */ -void fal_attr_set_ip_addr(struct fal_attribute_t *attr, struct ip_addr *ip); +void fal_attr_set_ip_addr(struct fal_attribute_t *attr, + const struct ip_addr *ip); void fal_l2_new_port(unsigned int if_index, uint32_t attr_count, @@ -535,18 +684,19 @@ void fal_l2_new_port(unsigned int if_index, int fal_l2_get_attrs(unsigned int if_index, uint32_t attr_count, struct fal_attribute_t *attr_list); -void fal_l2_upd_port(unsigned int if_index, - struct fal_attribute_t *attr); +int fal_l2_upd_port(unsigned int if_index, + struct fal_attribute_t *attr); void fal_l2_del_port(unsigned int if_index); +void fal_l2_dump_port(unsigned int if_index, json_writer_t *wr); void fal_l2_new_addr(unsigned int if_index, - const struct ether_addr *addr, + const struct rte_ether_addr *addr, uint32_t attr_count, const struct fal_attribute_t *attr_list); void fal_l2_upd_addr(unsigned int if_index, - const struct ether_addr *addr, + const struct rte_ether_addr *addr, struct fal_attribute_t *attr); void fal_l2_del_addr(unsigned int if_index, - const struct ether_addr *addr); + const struct rte_ether_addr *addr); /* Router Interface related APIs */ int fal_create_router_interface(uint32_t attr_count, @@ -555,6 +705,15 @@ int fal_create_router_interface(uint32_t attr_count, int fal_delete_router_interface(fal_object_t obj); int fal_set_router_interface_attr(fal_object_t obj, const struct fal_attribute_t *attr); +int +fal_get_router_interface_stats(fal_object_t obj, + uint32_t cntr_count, + const enum fal_router_interface_stat_t *cntr_ids, + uint64_t *cntrs); +int fal_get_router_interface_attr(fal_object_t obj, uint32_t attr_count, + struct fal_attribute_t *attr); +void +fal_dump_router_interface(fal_object_t obj, json_writer_t *wr); /* Tunnel APIs */ int fal_create_tunnel(uint32_t attr_count, @@ -565,6 +724,28 @@ int fal_set_tunnel_attr(fal_object_t obj, uint32_t attr_count, const struct fal_attribute_t *attr_list); +/* LAG APIs*/ +int fal_create_lag(uint32_t attr_count, + struct fal_attribute_t *attr_list, + fal_object_t *obj); +int fal_delete_lag(fal_object_t obj); +int fal_set_lag_attr(fal_object_t obj, + uint32_t attr_count, + const struct fal_attribute_t *attr_list); +int fal_get_lag_attr(fal_object_t obj, + uint32_t attr_count, + struct fal_attribute_t *attr_list); +void fal_dump_lag(fal_object_t obj, json_writer_t *wr); +int fal_create_lag_member(uint32_t attr_count, + struct fal_attribute_t *attr_list, + fal_object_t *obj); +int fal_delete_lag_member(fal_object_t obj); +int fal_set_lag_member_attr(fal_object_t obj, + const struct fal_attribute_t *attr); +int fal_get_lag_member_attr(fal_object_t obj, + uint32_t attr_count, + struct fal_attribute_t *attr_list); + void fal_br_new_port(unsigned int bridge_ifindex, unsigned int child_ifindex, uint32_t attr_count, @@ -575,25 +756,26 @@ void fal_br_del_port(unsigned int bridge_ifindex, unsigned int child_ifindex); void fal_br_new_neigh(unsigned int child_ifindex, uint16_t vlanid, - const struct ether_addr *dst, + const struct rte_ether_addr *dst, uint32_t attr_count, const struct fal_attribute_t *attr_list); void fal_br_upd_neigh(unsigned int child_ifindex, uint16_t vlanid, - const struct ether_addr *dst, + const struct rte_ether_addr *dst, struct fal_attribute_t *attr); void fal_br_del_neigh(unsigned int child_ifindex, uint16_t vlanid, - const struct ether_addr *dst); + const struct rte_ether_addr *dst); void fal_br_flush_neigh(unsigned int bridge_ifindex, uint32_t attr_count, const struct fal_attribute_t *attr); void fal_fdb_flush_mac(unsigned int bridge_ifindex, unsigned int child_ifindex, - const struct ether_addr *mac); + const struct rte_ether_addr *mac); void fal_fdb_flush(unsigned int bridge_ifindex, unsigned int child_ifindex, uint16_t vlanid, bool only_dynamic); int fal_br_walk_neigh(unsigned int bridge_ifindex, uint16_t vlanid, - const struct ether_addr *dst, unsigned int child_ifindex, + const struct rte_ether_addr *dst, + unsigned int child_ifindex, fal_br_walk_neigh_fn cb, void *arg); int fal_vlan_get_stats(uint16_t vlan, uint32_t num_cntrs, @@ -625,42 +807,78 @@ int fal_get_switch_attrs(uint32_t attr_count, int fal_set_switch_attr(const struct fal_attribute_t *attr); +int fal_ip_new_neigh(unsigned int if_index, + fal_object_t rtr_intf_obj, + const struct sockaddr *sa, + uint32_t attr_count, + const struct fal_attribute_t *attr_list); int fal_ip_upd_neigh(unsigned int if_index, + fal_object_t rtr_intf_obj, const struct sockaddr *sa, const struct fal_attribute_t *attr); int fal_ip_get_neigh_attrs(unsigned int if_index, + fal_object_t rtr_intf_obj, const struct sockaddr *sa, uint32_t attr_count, struct fal_attribute_t *attr_list); int fal_ip4_new_neigh(unsigned int if_index, + fal_object_t rtr_intf_obj, const struct sockaddr_in *sin, uint32_t attr_count, const struct fal_attribute_t *attr_list); int fal_ip4_upd_neigh(unsigned int if_index, + fal_object_t rtr_intf_obj, const struct sockaddr_in *sin, struct fal_attribute_t *attr); int fal_ip4_del_neigh(unsigned int if_index, + fal_object_t rtr_intf_obj, const struct sockaddr_in *sin); +void fal_ip4_dump_neigh(unsigned int if_index, + fal_object_t rtr_intf_obj, + const struct sockaddr_in *sin, + json_writer_t *wr); void fal_ip4_new_addr(unsigned int if_index, const struct if_addr *ifa); void fal_ip4_upd_addr(unsigned int if_index, const struct if_addr *ifa); void fal_ip4_del_addr(unsigned int if_index, const struct if_addr *ifa); -int fal_ip4_new_next_hops(size_t nhops, const struct next_hop hops[], - fal_object_t *nhg_object, fal_object_t *obj); -int fal_ip4_del_next_hops(fal_object_t nhg_object, size_t nhops, - const struct next_hop *hops, - const fal_object_t *obj); -int fal_ip4_new_route(vrfid_t vrf_id, in_addr_t addr, uint8_t prefixlen, +int fal_ip_new_next_hops(enum fal_next_hop_group_use use, + size_t nhops, const struct next_hop hops[], + fal_object_t *nhg_object, fal_object_t *obj); +int fal_ip_del_next_hops(fal_object_t nhg_object, size_t nhops, + const fal_object_t *obj); +int fal_ip_upd_next_hop_state(const fal_object_t *nh_list, int index, + bool usable); +enum fal_packet_action_t +fal_next_hop_group_packet_action(uint32_t nhops, const struct next_hop hops[]); + +int fal_ip4_new_route(vrfid_t vrf_id, fal_object_t vrf_obj, + in_addr_t addr, uint8_t prefixlen, uint32_t tableid, struct next_hop hops[], - size_t size, fal_object_t nhg_object); -int fal_ip4_upd_route(vrfid_t vrf_id, in_addr_t addr, uint8_t prefixlen, + size_t nhops, fal_object_t nhg_object); +int fal_ip4_upd_route(vrfid_t vrf_id, fal_object_t vrf_obj, + in_addr_t addr, uint8_t prefixlen, uint32_t tableid, struct next_hop hops[], - size_t size, fal_object_t nhg_object); -int fal_ip4_del_route(vrfid_t vrf_id, in_addr_t addr, uint8_t prefixlen, + size_t nhops, fal_object_t nhg_object); +int fal_ip4_del_route(vrfid_t vrf_id, fal_object_t vrf_obj, + in_addr_t addr, uint8_t prefixlen, uint32_t tableid); +int fal_ip4_get_route_attrs(vrfid_t vrf_id, fal_object_t vrf_obj, + in_addr_t addr, uint8_t prefixlen, + uint32_t tableid, uint32_t attr_count, + const struct fal_attribute_t *attr_list); +int fal_ip6_get_route_attrs(vrfid_t vrf_id, fal_object_t vrf_obj, + const struct in6_addr *addr, + uint8_t prefixlen, uint32_t tableid, + uint32_t attr_count, + const struct fal_attribute_t *attr_list); +int fal_ip_walk_routes(fal_plugin_route_walk_fn cb, + uint32_t attr_cnt, + struct fal_attribute_t *attr_list, + void *arg); + int fal_create_ipmc_rpf_group(uint32_t *ifindex_list, uint32_t num_int, fal_object_t *rpf_group_id, struct fal_object_list_t **rpf_member_list); @@ -679,34 +897,47 @@ int fal_ip6_upd_mroute(fal_object_t obj, struct mf6c *rt, struct vmf6cctl *mfc, struct cds_lfht *iftable); int fal_ip6_new_neigh(unsigned int if_index, + fal_object_t rtr_intf_obj, const struct sockaddr_in6 *sin6, uint32_t attr_count, const struct fal_attribute_t *attr_list); int fal_ip6_upd_neigh(unsigned int if_index, + fal_object_t rtr_intf_obj, const struct sockaddr_in6 *sin6, struct fal_attribute_t *attr); int fal_ip6_del_neigh(unsigned int if_index, + fal_object_t rtr_intf_obj, const struct sockaddr_in6 *sin6); +void fal_ip6_dump_neigh(unsigned int if_index, + fal_object_t rtr_intf_obj, + const struct sockaddr_in6 *sin6, + json_writer_t *wr); void fal_ip6_new_addr(unsigned int if_index, const struct if_addr *ifa); void fal_ip6_upd_addr(unsigned int if_index, const struct if_addr *ifa); void fal_ip6_del_addr(unsigned int if_index, const struct if_addr *ifa); -int fal_ip6_new_next_hops(size_t nhops, const struct next_hop_v6 hops[], - fal_object_t *group_obj, fal_object_t *obj); -int fal_ip6_del_next_hops(fal_object_t group_obj, size_t nhops, - const struct next_hop_v6 *hops, - const fal_object_t *obj); -int fal_ip6_new_route(vrfid_t vrf_id, const struct in6_addr *addr, +int fal_ip_get_next_hop_group_attrs(fal_object_t nhg_object, + uint32_t attr_count, + struct fal_attribute_t *attr_list); +void fal_ip_dump_next_hop_group(fal_object_t nhg_object, json_writer_t *wr); +int fal_ip_get_next_hop_attrs(fal_object_t nh_object, + uint32_t attr_count, + struct fal_attribute_t *attr_list); +void fal_ip_dump_next_hop(fal_object_t nh_object, json_writer_t *wr); +int fal_ip6_new_route(vrfid_t vrf_id, fal_object_t vrf_obj, + const struct in6_addr *addr, uint8_t prefixlen, uint32_t tableid, - struct next_hop_v6 hops[], size_t size, - fal_object_t group_obj); -int fal_ip6_upd_route(vrfid_t vrf_id, const struct in6_addr *addr, + struct next_hop hops[], size_t nhops, + fal_object_t nhg_object); +int fal_ip6_upd_route(vrfid_t vrf_id, fal_object_t vrf_obj, + const struct in6_addr *addr, uint8_t prefixlen, uint32_t tableid, - struct next_hop_v6 hops[], size_t size, - fal_object_t group_obj); -int fal_ip6_del_route(vrfid_t vrf_id, const struct in6_addr *addr, + struct next_hop hops[], size_t nhops, + fal_object_t nhg_object); +int fal_ip6_del_route(vrfid_t vrf_id, fal_object_t vrf_obj, + const struct in6_addr *addr, uint8_t prefixlen, uint32_t tableid); int fal_ip_mcast_get_stats(fal_object_t obj, uint32_t num_counters, @@ -809,11 +1040,11 @@ int fal_qos_get_map_attrs(fal_object_t map_id, uint32_t attr_count, struct fal_attribute_t *attr_list); int fal_qos_new_scheduler(fal_object_t switch_id, uint32_t attr_count, const struct fal_attribute_t *attr_list, - fal_object_t *new_scheduler_id); -int fal_qos_del_scheduler(fal_object_t scheduler_id); -int fal_qos_upd_scheduler(fal_object_t scheduler_id, + fal_object_t *new_sched_id); +int fal_qos_del_scheduler(fal_object_t sched_id); +int fal_qos_upd_scheduler(fal_object_t sched_id, const struct fal_attribute_t *attr); -int fal_qos_get_scheduler_attrs(fal_object_t scheduler_id, uint32_t attr_count, +int fal_qos_get_scheduler_attrs(fal_object_t sched_id, uint32_t attr_count, struct fal_attribute_t *attr_list); int fal_qos_new_sched_group(fal_object_t switch_id, uint32_t attr_count, const struct fal_attribute_t *attr_list, @@ -831,8 +1062,11 @@ int fal_qos_del_wred(fal_object_t wred_id); int fal_qos_upd_wred(fal_object_t wred_id, const struct fal_attribute_t *attr); int fal_qos_get_wred_attrs(fal_object_t wred_id, uint32_t attr_count, struct fal_attribute_t *attr_list); -void fal_qos_dump_map(fal_object_t obj, json_writer_t *wr); -void fal_qos_dump_sched_group(fal_object_t obj, json_writer_t *wr); +void fal_qos_dump_map(fal_object_t map, json_writer_t *wr); +void fal_qos_dump_sched_group(fal_object_t sg, json_writer_t *wr); +void fal_qos_dump_buf_errors(json_writer_t *wr); +int fal_qos_get_counters(const uint32_t *cntr_ids, uint32_t num_cntrs, + uint64_t *cntrs); int fal_mirror_session_create(uint32_t attr_count, const struct fal_attribute_t *attr_list, @@ -848,10 +1082,13 @@ uint8_t fal_feat_storageid(void); int fal_vlan_feature_create(uint32_t attr_count, const struct fal_attribute_t *attr_list, - fal_object_t *fal_obj_id); -int fal_vlan_feature_delete(fal_object_t fal_obj_id); -int fal_vlan_feature_set_attr(fal_object_t fal_obj_id, + fal_object_t *obj); +int fal_vlan_feature_delete(fal_object_t obj); +int fal_vlan_feature_set_attr(fal_object_t obj, const struct fal_attribute_t *attr); +int fal_vlan_feature_get_attr(fal_object_t obj, + uint32_t attr_count, + struct fal_attribute_t *attr_list); int fal_backplane_bind(unsigned int bp_ifindex, unsigned int ifindex); void fal_backplane_dump(unsigned int bp_ifindex, json_writer_t *wr); @@ -907,4 +1144,32 @@ int fal_acl_get_counter_attr(fal_object_t counter_id, struct fal_attribute_t *attr_list); /* End of ACL related functions */ +int fal_capture_create(uint32_t attr_count, + const struct fal_attribute_t *attr_list, + fal_object_t *obj); +void fal_capture_delete(fal_object_t obj); +int fal_capture_get_stats(fal_object_t obj, uint32_t num_counters, + const enum fal_capture_stat_type *cntr_ids, + uint64_t *stats); + +int fal_create_mpls_route(const struct fal_mpls_route_t *mpls_route, + uint32_t attr_count, + const struct fal_attribute_t *attr_list); +int fal_delete_mpls_route(const struct fal_mpls_route_t *mpls_route); +int fal_set_mpls_route_attr(const struct fal_mpls_route_t *mpls_route, + const struct fal_attribute_t *attr); +int fal_get_mpls_route_attr(const struct fal_mpls_route_t *mpls_route, + uint32_t attr_count, + struct fal_attribute_t *attr_list); + +int fal_vrf_create(uint32_t attr_count, + const struct fal_attribute_t *attr_list, + fal_object_t *obj); +int fal_vrf_delete(fal_object_t obj); +int fal_set_vrf_attr(fal_object_t obj, + const struct fal_attribute_t *attr); +int fal_get_vrf_attr(fal_object_t obj, + uint32_t attr_count, + struct fal_attribute_t *attr_list); + #endif /* FAL_H */ diff --git a/src/feature_plugin.c b/src/feature_plugin.c new file mode 100644 index 00000000..a3beba6a --- /dev/null +++ b/src/feature_plugin.c @@ -0,0 +1,239 @@ +/* + * feature_plugin.c + * + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. + * + * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include +#include +#include +#include +#include +#include + +#include "feature_plugin_internal.h" +#include "json_writer.h" +#include "pl_internal.h" +#include "urcu.h" +#include "vplane_log.h" + +#define PL_DLL_LOC PKGLIB_DIR"/pipeline/plugins" +static const char *feat_plugin_dir; + +struct plugin_handle { + char *lib_name; + char *feat_name; + void *handle; + struct cds_list_head list_entry; + struct rcu_head rcu_head; +}; + +static struct cds_list_head feature_plugin_list_head = + CDS_LIST_HEAD_INIT(feature_plugin_list_head); + +static void feature_load_plugin(const char *buf) +{ + int (*feature_plugin_init)(const char **name); + int rv; + void *handle; + struct plugin_handle *pl_handle; + + handle = dlopen(buf, RTLD_NOW); + if (handle == NULL) { + RTE_LOG(ERR, DATAPLANE, + "failed to load feature plug-in: %s\n", + dlerror()); + return; + } + + /* Check it has an init func */ + feature_plugin_init = dlsym(handle, "dp_feature_plugin_init"); + if (!feature_plugin_init) { + /* Not a feature plugin library */ + dlclose(handle); + return; + } + + pl_handle = malloc(sizeof(*pl_handle)); + if (!pl_handle) { + RTE_LOG(INFO, DATAPLANE, + "Failed to load feature plug-in: %s out of memory\n", + buf); + dlclose(handle); + return; + } + pl_handle->handle = handle; + pl_handle->lib_name = strdup(buf); + if (!pl_handle->lib_name) { + RTE_LOG(INFO, DATAPLANE, + "Failed to load feature plug-in: %s out of memory\n", + buf); + free(pl_handle); + dlclose(handle); + return; + } + + RTE_LOG(INFO, DATAPLANE, + "loaded feature plug-in: %s\n", buf); + rv = feature_plugin_init((const char **)&pl_handle->feat_name); + if (rv) { + RTE_LOG(INFO, DATAPLANE, + "Failed to initialised feature plug-in: %s\n", buf); + free(pl_handle->lib_name); + free(pl_handle); + dlclose(handle); + return; + } + + RTE_LOG(INFO, DATAPLANE, + "initialised feature plug-in: %s\n", buf); + cds_list_add_rcu(&pl_handle->list_entry, &feature_plugin_list_head); +} + +static void feature_load_plugins_internal(const char *dir) +{ + /* + * Iterate through directory loading pipeline plugins + */ + DIR *dp; + struct dirent *ep; + + dp = opendir(dir); + RTE_LOG(INFO, DATAPLANE, "Checking for feature plugins in %s\n", + dir); + + if (dp != NULL) { + while ((ep = readdir(dp))) { + /* restrict to .so files only */ + char *tmp = strrchr(ep->d_name, '.'); + if (!tmp) + continue; + if (strcmp(tmp, ".so") != 0) + continue; + + char buf[1024]; + snprintf(buf, 1024, "%s/%s", + dir, ep->d_name); + feature_load_plugin(buf); + } + } else { + /* + * The directory not existing is normal so don't log + * an error in that case. + */ + if (errno != ENOENT) + RTE_LOG(ERR, DATAPLANE, + "error opening feature plug-in directory \"%s\": %s\n", + dir, strerror(errno)); + return; + } + closedir(dp); +} + +void feature_load_plugins(void) +{ + feature_load_plugins_internal(PL_DLL_LOC); + if (feat_plugin_dir) + feature_load_plugins_internal(feat_plugin_dir); +} + +static void feature_plugin_free(struct rcu_head *head) +{ + struct plugin_handle *handle; + + handle = caa_container_of(head, struct plugin_handle, rcu_head); + free(handle->lib_name); + free(handle); +} + +void feature_unload_plugins(void) +{ + struct plugin_handle *pl_handle; + struct cds_list_head *this_entry, *next; + int (*feature_plugin_cleanup)(void); + int rv; + + cds_list_for_each_safe(this_entry, next, &feature_plugin_list_head) { + pl_handle = cds_list_entry(this_entry, + struct plugin_handle, + list_entry); + + cds_list_del_rcu(&pl_handle->list_entry); + + feature_plugin_cleanup = dlsym(pl_handle->handle, + "dp_feature_plugin_cleanup"); + if (feature_plugin_cleanup) { + rv = feature_plugin_cleanup(); + if (rv) + RTE_LOG(INFO, DATAPLANE, + "Failed to clean up feature plug-in: %s\n", + pl_handle->lib_name); + else + RTE_LOG(INFO, DATAPLANE, + "Cleaned up feature plug-in: %s\n", + pl_handle->lib_name); + } + dp_rcu_barrier(); + dlclose(pl_handle->handle); + call_rcu(&pl_handle->rcu_head, feature_plugin_free); + } + + feature_unregister_all_string_op_handlers(); + feature_unregister_all_string_cfg_handlers(); +} + +static void cmd_feat_plugin_show(FILE *f) +{ + json_writer_t *json; + struct plugin_handle *pl_handle; + struct cds_list_head *this_entry, *next; + + json = jsonw_new(f); + jsonw_pretty(json, true); + + jsonw_name(json, "feature_plugin"); + jsonw_start_array(json); + + cds_list_for_each_safe(this_entry, next, &feature_plugin_list_head) { + pl_handle = cds_list_entry(this_entry, + struct plugin_handle, + list_entry); + jsonw_start_object(json); + jsonw_string_field(json, "lib", pl_handle->lib_name); + jsonw_string_field(json, "feature_name", pl_handle->feat_name); + pl_show_plugin_state(json, pl_handle->feat_name); + jsonw_end_object(json); + } + + jsonw_end_array(json); + jsonw_destroy(&json); + +} + +/* + * feat_plugin show + */ +int cmd_feat_plugin(FILE *f, int argc, char **argv) +{ + if (argc != 2) + goto error; + + if (strcmp(argv[1], "show") == 0) { + cmd_feat_plugin_show(f); + return 0; + } + +error: + fprintf(f, "Usage: feat_plugin show"); + return -1; +} + +void set_feat_plugin_dir(const char *filename) +{ + feat_plugin_dir = filename; +} diff --git a/src/feature_plugin_internal.h b/src/feature_plugin_internal.h new file mode 100644 index 00000000..90e2313c --- /dev/null +++ b/src/feature_plugin_internal.h @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#ifndef FEATURE_PLUGIN_INTERNAL_H +#define FEATURE_PLUGIN_INTERNAL_H + +void feature_load_plugins(void); +void feature_unload_plugins(void); + +int cmd_feat_plugin(FILE *f, int argc, char **argv); + +void feature_unregister_all_string_op_handlers(void); +void feature_unregister_all_string_cfg_handlers(void); + +void set_feat_plugin_dir(const char *filename); + +#endif /* FEATURE_PLUGIN_INTERNAL_H */ + diff --git a/src/flow_cache.c b/src/flow_cache.c new file mode 100644 index 00000000..2cb2f293 --- /dev/null +++ b/src/flow_cache.c @@ -0,0 +1,599 @@ +/*- + * Copyright (c) 2020, AT&T Intellectual Property. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#include +#include +#include +#include +#include +#include "vplane_log.h" +#include "vplane_debug.h" +#include "json_writer.h" +#include "flow_cache.h" +#include "ip.h" +#include "vrf_internal.h" +#include "ip_funcs.h" +#include "../netinet6/ip6_funcs.h" + +#define FLOW_CACHE_DEBUG(args...) \ + DP_DEBUG(FLOW_CACHE, DEBUG, POLICY, args) + +#define FLOW_CACHE_ERR(args...) \ + DP_DEBUG(FLOW_CACHE, ERR, POLICY, args) + +#define FLOW_CACHE_NOTICE(args...) \ + DP_DEBUG(FLOW_CACHE, NOTICE, POLICY, args) + +#define FLOW_CACHE_INFO(args...) \ + DP_DEBUG(FLOW_CACHE, INFO, POLICY, args) + +#define FLOW_CACHE_SIZE 4096 + +#define FLOW_CACHE_HASH_SEED 0xDEAFCAFE + +struct flow_cache_hash_key { + enum flow_cache_ftype af; + union addr_u src; + union addr_u dst; + uint32_t proto; + vrfid_t vrfid; +}; + +struct flow_cache_entry { + struct cds_lfht_node fl_node; + struct flow_cache_hash_key key; + uint32_t hit_count; + void *rule; + uint16_t context; + uint32_t last_hit_count; + struct rcu_head flow_cache_rcu; +}; + +#define FLOW_CACHE_HASH_MIN 8 +#define FLOW_CACHE_HASH_MAX 2048 + +#define FLOW_CACHE_MAX_COUNT 4096 +#define FLOW_CACHE_MAX_MARKER (FLOW_CACHE_MAX_COUNT + 1) + +struct flow_cache_af { + struct cds_lfht *cache_tbl; + rte_atomic32_t cache_cnt; +}; + +struct flow_cache_lcore { + struct flow_cache_af cache_af[FLOW_CACHE_MAX]; +}; + +struct flow_cache { + uint32_t max_lcore_entries; + + /* array of hash tables indexed by dp_lcore_id */ + struct flow_cache_lcore *cache_lcore; +}; + +/* PR cache management */ +static inline void +flow_cache_entry_free(struct rcu_head *head) +{ + free(caa_container_of(head, struct flow_cache_entry, + flow_cache_rcu)); +} + +static inline void +flow_cache_entry_destroy(struct flow_cache_entry *cache_entry) +{ + call_rcu(&cache_entry->flow_cache_rcu, flow_cache_entry_free); +} + +static inline bool +flow_cache_match_addr_v4(const struct flow_cache_entry *cache_entry, + const struct flow_cache_hash_key *flow_cache_key) +{ + if ((!addr_u_eq_v4(&cache_entry->key.src, &flow_cache_key->src) || + (!addr_u_eq_v4(&cache_entry->key.dst, &flow_cache_key->dst)))) + return false; + + return true; +} + +static inline bool +flow_cache_match_addr_v6(const struct flow_cache_entry *cache_entry, + const struct flow_cache_hash_key *flow_cache_key) +{ + if ((!addr_u_eq_v6(&cache_entry->key.src, &flow_cache_key->src) || + (!addr_u_eq_v6(&cache_entry->key.dst, &flow_cache_key->dst)))) + return false; + + return true; +} + +static inline int +flow_cache_match(struct cds_lfht_node *node, const void *key) +{ + const struct flow_cache_hash_key *flow_cache_key = key; + const struct flow_cache_entry *cache_entry = caa_container_of( + node, const struct flow_cache_entry, fl_node); + int ret; + + if (cache_entry->key.af == FLOW_CACHE_IPV4) + ret = flow_cache_match_addr_v4(cache_entry, flow_cache_key); + else + ret = flow_cache_match_addr_v6(cache_entry, flow_cache_key); + + if (!ret) + return 0; + + if ((cache_entry->key.proto != flow_cache_key->proto) || + (cache_entry->key.vrfid != flow_cache_key->vrfid)) + return 0; + + return 1; +} + +_Static_assert(sizeof(struct flow_cache_hash_key) % 4 == 0, + "struct flow_cache_hash_key must be a multiple of 4 bytes"); + +static inline uint32_t +flow_cache_hash(const struct flow_cache_hash_key *h_key) +{ + return rte_jhash(h_key, sizeof(*h_key), FLOW_CACHE_HASH_SEED); +} + +static inline void +flow_cache_entry_remove(struct flow_cache_lcore *cache_lcore, + struct flow_cache_entry *cache_entry) +{ + enum flow_cache_ftype af = cache_entry->key.af; + + /* + * To avoid a race where an entry has been added but the count + * hasn't been bumped + */ + if (rte_atomic32_read(&cache_lcore->cache_af[af].cache_cnt) == 0) + return; + + cds_lfht_del(cache_lcore->cache_af[af].cache_tbl, + &cache_entry->fl_node); + flow_cache_entry_destroy(cache_entry); + rte_atomic32_dec(&cache_lcore->cache_af[af].cache_cnt); +} + +static int +flow_cache_insert(struct cds_lfht *tbl, struct flow_cache_entry *cache_entry, + uint32_t rss_hash, const struct flow_cache_hash_key *h_key) +{ + struct cds_lfht_node *ret_node; + uint32_t hash; + + cds_lfht_node_init(&cache_entry->fl_node); + + if (rss_hash) + hash = rss_hash; + else + hash = flow_cache_hash(h_key); + + ret_node = cds_lfht_add_unique(tbl, hash, flow_cache_match, h_key, + &cache_entry->fl_node); + + return (ret_node != &cache_entry->fl_node) ? -1 : 0; +} + +static inline void +flow_cache_parse_hdr(struct rte_mbuf *m, enum flow_cache_ftype af, + struct flow_cache_hash_key *h) +{ + const struct iphdr *ip; + const struct ip6_hdr *ip6; + + h->af = af; + if (af == FLOW_CACHE_IPV4) { + ip = iphdr(m); + h->dst.ip_v4.s_addr = ip->daddr; + h->src.ip_v4.s_addr = ip->saddr; + h->proto = ip->protocol; + } else if (af == FLOW_CACHE_IPV6) { + ip6 = ip6hdr(m); + memcpy(&h->dst.ip_v6, &ip6->ip6_dst, sizeof(ip6->ip6_dst)); + memcpy(&h->src.ip_v6, &ip6->ip6_src, sizeof(ip6->ip6_src)); + h->proto = ip6->ip6_nxt; + } + h->vrfid = pktmbuf_get_vrf(m); +} + +int flow_cache_lookup(struct flow_cache *cache, struct rte_mbuf *m, + enum flow_cache_ftype ftype, + struct flow_cache_entry **entry) +{ + struct cds_lfht_iter iter; + struct cds_lfht_node *node; + struct flow_cache_hash_key h_key; + struct cds_lfht *table; + unsigned int lcore = dp_lcore_id(); + uint32_t hash; + + memset(&h_key, 0, sizeof(h_key)); + + if (unlikely(!cache || !m || !entry)) + return -EINVAL; + + table = rcu_dereference( + cache->cache_lcore[lcore].cache_af[ftype].cache_tbl); + if (!table) + return -ENOENT; + + flow_cache_parse_hdr(m, ftype, &h_key); + + hash = m->hash.rss; + if (!hash) + hash = flow_cache_hash(&h_key); + cds_lfht_lookup(table, hash, flow_cache_match, &h_key, + &iter); + + node = cds_lfht_iter_get_node(&iter); + if (!node) + return -ENOENT; + + *entry = caa_container_of(node, struct flow_cache_entry, + fl_node); + + (*entry)->hit_count++; + return 0; +} + +int flow_cache_entry_get_info(struct flow_cache_entry *entry, + void **rule, uint16_t *context) +{ + if (unlikely(!entry || !rule || !context)) + return -EINVAL; + + *rule = entry->rule; + *context = entry->context; + return 0; +} + +int flow_cache_entry_set_info(struct flow_cache_entry *entry, + void *rule, uint16_t context) +{ + if (unlikely(!entry)) + return -EINVAL; + + entry->rule = rule; + entry->context = context; + return 0; +} + +int +flow_cache_add(struct flow_cache *flow_cache, void *rule, uint16_t ctx, + struct rte_mbuf *m, enum flow_cache_ftype ftype) +{ + struct flow_cache_entry *cache_entry; + int error; + struct flow_cache_hash_key h_key; + struct flow_cache_af *cache_af = + &flow_cache->cache_lcore[dp_lcore_id()].cache_af[ftype]; + struct cds_lfht *table = rcu_dereference(cache_af->cache_tbl); + + memset(&h_key, 0, sizeof(h_key)); + + if (!table) + return -1; + + flow_cache_parse_hdr(m, ftype, &h_key); + cache_entry = malloc_aligned(sizeof(struct flow_cache_entry)); + if (unlikely(cache_entry == NULL)) + return -1; + + cache_entry->key = h_key; + cache_entry->rule = rule; + cache_entry->hit_count = cache_entry->last_hit_count = 0; + + error = flow_cache_insert(table, cache_entry, m->hash.rss, &h_key); + + if (unlikely(error != 0)) { + free(cache_entry); + return -1; + } + flow_cache_entry_set_info(cache_entry, rule, ctx); + rte_atomic32_inc(&cache_af->cache_cnt); + return 0; +} + +static void +flow_cache_empty_table(struct flow_cache *flow_cache, unsigned int lcore, + enum flow_cache_ftype af) +{ + struct flow_cache_lcore *cache_lcore = &flow_cache->cache_lcore[lcore]; + struct flow_cache_entry *cache_entry; + struct cds_lfht_iter iter; + struct cds_lfht *table; + + table = rcu_dereference(cache_lcore->cache_af[af].cache_tbl); + if (!table) + return; + + cds_lfht_for_each_entry(table, &iter, cache_entry, fl_node) + flow_cache_entry_remove(cache_lcore, cache_entry); +} + +int +flow_cache_init_lcore(struct flow_cache *flow_cache, unsigned int lcore) +{ + enum flow_cache_ftype af, tmp_af; + struct flow_cache_lcore *cache_lcore; + + if (!flow_cache || !flow_cache->cache_lcore || + (lcore > get_lcore_max())) + return -EINVAL; + + cache_lcore = &flow_cache->cache_lcore[lcore]; + for (af = FLOW_CACHE_IPV4; af < FLOW_CACHE_MAX; af++) { + if (cache_lcore->cache_af[af].cache_tbl) + continue; + cache_lcore->cache_af[af].cache_tbl = + cds_lfht_new(FLOW_CACHE_HASH_MIN, + FLOW_CACHE_HASH_MIN, + flow_cache->max_lcore_entries, + CDS_LFHT_AUTO_RESIZE, + NULL); + if (cache_lcore->cache_af[af].cache_tbl == NULL) + goto err; + } + return 0; + +err: + FLOW_CACHE_ERR("Failed to create flow cache table for cpu %d af %d\n", + lcore, af); + for (tmp_af = FLOW_CACHE_IPV4; tmp_af < af; tmp_af++) + if (cache_lcore->cache_af[tmp_af].cache_tbl) { + cds_lfht_destroy( + cache_lcore->cache_af[tmp_af].cache_tbl, NULL); + cache_lcore->cache_af[tmp_af].cache_tbl = NULL; + } + return -ENOMEM; +} + +int +flow_cache_teardown_lcore(struct flow_cache *flow_cache, unsigned int lcore) +{ + enum flow_cache_ftype af; + struct flow_cache_lcore *cache_lcore; + + if (!flow_cache || !flow_cache->cache_lcore || + (lcore > get_lcore_max())) + return -EINVAL; + + cache_lcore = &flow_cache->cache_lcore[lcore]; + for (af = FLOW_CACHE_IPV4; af < FLOW_CACHE_MAX; af++) { + if (cache_lcore->cache_af[af].cache_tbl) { + flow_cache_empty_table(flow_cache, lcore, af); + cds_lfht_destroy( + cache_lcore->cache_af[af].cache_tbl, NULL); + cache_lcore->cache_af[af].cache_tbl = NULL; + } + } + return 0; +} + +struct flow_cache *flow_cache_init(uint32_t max_entries) +{ + struct flow_cache *cache; + unsigned int max_lcores = get_lcore_max() + 1; + + cache = malloc(sizeof(*cache)); + if (!cache) { + RTE_LOG(ERR, DATAPLANE, "Could not allocate flow cache\n"); + return NULL; + } + + cache->max_lcore_entries = max_entries; + cache->cache_lcore = calloc(1, (sizeof(struct flow_cache_lcore) * + max_lcores)); + if (!cache->cache_lcore) { + RTE_LOG(ERR, DATAPLANE, + "Could not allocate per-core flow cache table\n"); + free(cache); + return NULL; + } + + return cache; +} + +void flow_cache_age(struct flow_cache *flow_cache) +{ + unsigned int lcore_id, max_lcores = get_lcore_max() + 1; + struct flow_cache_entry *cache_entry; + struct flow_cache_lcore *cache_lcore; + struct flow_cache_af *cache_af; + enum flow_cache_ftype af; + struct cds_lfht_iter iter; + struct cds_lfht *table; + + for (lcore_id = 0; lcore_id < max_lcores; lcore_id++) { + cache_lcore = &flow_cache->cache_lcore[lcore_id]; + for (af = FLOW_CACHE_IPV4; af < FLOW_CACHE_MAX; af++) { + cache_af = &cache_lcore->cache_af[af]; + table = rcu_dereference(cache_af->cache_tbl); + if (!table) + continue; + + cds_lfht_for_each_entry(table, &iter, cache_entry, + fl_node) { + /* + * if hit count wasn't cached, cache it and + * wait for the next iteration. If not, remove + * the entry if there have been no more hits + */ + if (!cache_entry->last_hit_count && + cache_entry->hit_count) + cache_entry->last_hit_count = + cache_entry->hit_count; + else if (cache_entry->last_hit_count == + cache_entry->hit_count) + flow_cache_entry_remove(cache_lcore, + cache_entry); + } + } + } +} + +static void +flow_cache_destroy_table(struct flow_cache *flow_cache, unsigned int lcore, + enum flow_cache_ftype af) +{ + struct flow_cache_lcore *cache_lcore = &flow_cache->cache_lcore[lcore]; + struct cds_lfht *table; + + table = rcu_dereference(cache_lcore->cache_af[af].cache_tbl); + if (!table) + return; + + rcu_assign_pointer(cache_lcore->cache_af[af].cache_tbl, NULL); + + if (cds_lfht_destroy(table, NULL)) + FLOW_CACHE_ERR("Cache tbl destroy failed for lcore %d af %d\n", + lcore, af); +} + +/* + * This may be called in an rcu_callback or in the main thread. In the + * rcu_callback it must be in clear_only mode. + */ +void +flow_cache_invalidate(struct flow_cache *flow_cache, bool disable, + bool clear_only) +{ + unsigned int lcore_id, max_lcores = get_lcore_max() + 1; + enum flow_cache_ftype af; + + for (lcore_id = 0; lcore_id < max_lcores; lcore_id++) { + for (af = FLOW_CACHE_IPV4; af < FLOW_CACHE_MAX; af++) { + flow_cache_empty_table(flow_cache, lcore_id, af); + if (disable && !clear_only) + flow_cache_destroy_table(flow_cache, lcore_id, + af); + } + } + + FLOW_CACHE_INFO("Flow cache %s\n", + disable && !clear_only ? "disabled" : "invalidated"); +} + +static const char *af_names[FLOW_CACHE_MAX] = { + [FLOW_CACHE_IPV4] = "ipv4", + [FLOW_CACHE_IPV6] = "ipv6" +}; + +static void +flow_cache_dump_table(struct cds_lfht *table, + json_writer_t *wr, bool detail, + flow_cache_dump_cb dump_helper) +{ + struct flow_cache_entry *cache_entry; + struct cds_lfht_iter iter; + char addrbuf[INET6_ADDRSTRLEN]; + + jsonw_start_array(wr); + cds_lfht_for_each_entry(table, &iter, + cache_entry, fl_node) { + int af; + struct flow_cache_hash_key *cache_key; + + cache_key = &cache_entry->key; + af = cache_key->af == FLOW_CACHE_IPV4 ? + AF_INET : AF_INET6; + jsonw_start_object(wr); + jsonw_string_field(wr, "dst", + inet_ntop(af, + &cache_key->dst, + addrbuf, + sizeof(addrbuf))); + jsonw_string_field(wr, "src", + inet_ntop(af, + &cache_key->src, + addrbuf, + sizeof(addrbuf))); + jsonw_uint_field(wr, "proto", cache_key->proto); + jsonw_uint_field(wr, "hit_count", + cache_entry->hit_count); + jsonw_uint_field(wr, "last_hit_count", + cache_entry->last_hit_count); + dump_helper(cache_entry, detail, wr); + jsonw_end_object(wr); + } + jsonw_end_array(wr); +} + +static void +flow_cache_dump_lcore(struct flow_cache_lcore *cache_lcore, + json_writer_t *wr, bool detail, + flow_cache_dump_cb dump_helper) +{ + struct flow_cache_af *cache_af; + struct cds_lfht *table; + bool disabled = false; + + jsonw_start_object(wr); + jsonw_start_array(wr); + for (enum flow_cache_ftype af = FLOW_CACHE_IPV4; + af < FLOW_CACHE_MAX; af++) { + jsonw_name(wr, af_names[af]); + jsonw_start_object(wr); + + cache_af = &cache_lcore->cache_af[af]; + table = rcu_dereference(cache_af->cache_tbl); + if (!table) + disabled = true; + + if (disabled) { + jsonw_string_field(wr, "flow_cache", + "disabled"); + goto end_af_obj; + } + jsonw_string_field(wr, "flow_cache", "enabled"); + jsonw_start_object(wr); + jsonw_uint_field(wr, "cache_cnt", + rte_atomic32_read( + &cache_af->cache_cnt)); + jsonw_end_object(wr); + if (!detail) + goto end_af_obj; + + flow_cache_dump_table(table, wr, detail, dump_helper); + +end_af_obj: + jsonw_end_object(wr); + } + jsonw_end_array(wr); + jsonw_end_object(wr); +} + +void flow_cache_dump(struct flow_cache *flow_cache, json_writer_t *wr, + bool detail, flow_cache_dump_cb dump_helper) +{ + unsigned int i; + unsigned int max_lcores = get_lcore_max() + 1; + + if (!wr) + return; + + jsonw_start_object(wr); + jsonw_name(wr, "cores"); + jsonw_start_array(wr); + + for (i = 0; i < max_lcores; i++) { + struct flow_cache_lcore *cache_lcore; + + jsonw_uint_field(wr, "core_id", i); + + cache_lcore = &flow_cache->cache_lcore[i]; + + flow_cache_dump_lcore(cache_lcore, wr, detail, dump_helper); + } + + jsonw_end_array(wr); + jsonw_end_object(wr); +} + diff --git a/src/flow_cache.h b/src/flow_cache.h new file mode 100644 index 00000000..1863d2c2 --- /dev/null +++ b/src/flow_cache.h @@ -0,0 +1,213 @@ +/*- + * Copyright (c) 2020, AT&T Intellectual Property. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#ifndef FLOW_CACHE_H + +#define FLOW_CACHE_H + +struct flow_cache; +struct flow_cache_entry; + +/* + * set of flow types supported by flow cache + * allows af to be used as index and enables common code + */ +enum flow_cache_ftype { + FLOW_CACHE_IPV4, + FLOW_CACHE_IPV6, + FLOW_CACHE_MAX +}; + +/** + * Set up flow cache. The flow cache consists of an array of lock-free + * hash tables indexed by dp_lcore_id. Each hash table contains entries + * keyed by RSS hash of the packet or hash value computed by the library. + * + * @param max_entries + * Maximum number of entries in cache + * + * @return + * The pointer to the flow cache on success + * NULL if allocation fails + */ +struct flow_cache *flow_cache_init(uint32_t max_entries); + +/** + * Initialize table specific to the lcore + * Invoked when lcore is brought up + * + * @param cache + * Address of flow cache to be operated on + * + * @param + * core id returned by dp_lcore_id() + * + * @return + * 0 on success + * -ENOMEM on failure + */ +int flow_cache_init_lcore(struct flow_cache *cache, unsigned int lcore_id); +int flow_cache_teardown_lcore(struct flow_cache *cache, unsigned int lcore_id); + +/** + * + * Add an entry to the flow cache corresponding to the lcore from + * which the function is invoked. + * + * @param cache + * Address of the flow cache to which entry is to be added + * + * @param rule + * Pointer to the application-specific rule for the entry. + * A NULL value indicates a flow entry that does not match + * any rule in the application's ruleset + * + * @param ctx + * Application specific context corresponding to rule + * + * @param m + * Packet belonging to flow. The address family is expected + * to match the address family used to create the cache + * + * @param ftype + * The type of flow to add to the cache. + * @return + * 0 on success + * -EINVAL if the address family of the packet does not match + * -ENOMEM if there is a memory allocation failure + * -ENOSPC if the cache is full + */ +int flow_cache_add(struct flow_cache *cache, void *rule, uint16_t ctx, + struct rte_mbuf *m, enum flow_cache_ftype ftype); + +/** + * + * Look up cache entry corresponding to packet in lcore-specific cache + * + * @param cache + * Address of the flow cache in which the lookup is to be performed + * + * @param m + * Packet for which lookup is to be performed + * + * @param ftype + * Type of flow. Determines the table and match function used + * + * @param entry + * Output parameter. Cache entry corresponding to packet. + * NULL indicates that this is a flow without a cache entry. + * + * @return + * 0 on success + * -ENOENT if there is no entry + */ +int flow_cache_lookup(struct flow_cache *cache, struct rte_mbuf *m, + enum flow_cache_ftype ftype, + struct flow_cache_entry **entry); + +/** + * + * Accessor to retrieve information from cache entry + * + * @param entry + * Cache entry to retrieve information from + * + * @param rule + * Output parameter. Rule provided by application when the cache + * entry was created. Can be NULL. The NULL value is used in cases where + * an application needs to cache flows that do not match any rules + * in their rulesets. + * + * @param context + * Output parameter. Context provided by application when the cache entry + * is created. + * + */ +int flow_cache_entry_get_info(struct flow_cache_entry *entry, void **rule, + uint16_t *context); + +/** + * + * Accessor to set information in cache entry + * + * @param entry + * Cache entry to set information in + * + * @param rule + * Input parameter. Rule provided by application when the cache + * entry is created/updated. Can be NULL. The NULL value is used in cases + * where an application needs to cache flows that do not match any rules + * in their rulesets. + * + * @param context + * Input parameter. Context provided by application when the cache entry + * is created/updated. + * + */ +int flow_cache_entry_set_info(struct flow_cache_entry *entry, void *rule, + uint16_t context); + +/** + * + * Invalidate the flow cache. All entries in the cache are deleted. + * + * @param cache + * Address of the flow cache to be invalidated. + * + * @param disable + * Enable/disable flow cache + * + * @param clear_only + * If true, only the entries present are flushed. + * If false and disable is set to true, the entire table is destroyed + * + */ +void flow_cache_invalidate(struct flow_cache *cache, bool disable, + bool clear_only); + +/** + * Walk the entire flow cache and age out entries for which + * hit count has not changed. The aging interval and timer + * are the responsibility of the calling application. + * + * @param cache + * Address of the flow cache + */ +void flow_cache_age(struct flow_cache *cache); + + +typedef void (*flow_cache_dump_cb)(struct flow_cache_entry *entry, + bool detail, json_writer_t *wr); +/** + * + * Dump entries in the flow cache + * + * @param cache + * Address of the flow cache + * + * @param wr + * json writer object to dump entries + * + * @param detail + * controls level of detail in output. if true, dump addresses + * If true, detailed information about flows is dumped. + * + * @param helper + * Callback invoked for each entry to emit application-specific info + */ +void flow_cache_dump(struct flow_cache *cache, json_writer_t *wr, + bool detail, flow_cache_dump_cb helper); + +/** + * + * Destroy flow cache. Free up all entries + * + * @param cache + * Address of the flow cache to be cleaned up + */ +void flow_cache_destroy(struct flow_cache *cache); + +#endif diff --git a/src/gpc/gpc_config.c b/src/gpc/gpc_config.c new file mode 100644 index 00000000..74b9fd34 --- /dev/null +++ b/src/gpc/gpc_config.c @@ -0,0 +1,58 @@ +/*- + * Copyright (c) 2020, AT&T Intellectual Property. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * Generalised Packet Classification (GPF) configuration handling + */ + +#include +#include +#include +#include +#include "protobuf.h" +#include "protobuf/GPCConfig.pb-c.h" +#include "urcu.h" + +/* + * Local storage + */ +static struct cds_list_head *gpc_feature_list; + +static int +gpc_feature_parse(struct _GPCConfig *msg __unused) +{ + return 0; +} + +static int +gpc_config(struct pb_msg *msg) +{ + GPCConfig *config_msg = gpcconfig__unpack(NULL, msg->msg_len, + msg->msg); + int rv; + + /* + * Carry out any one-time initialisation + */ + if (!gpc_feature_list) { + gpc_feature_list = calloc(1, sizeof(*gpc_feature_list)); + if (!gpc_feature_list) { + RTE_LOG(ERR, GPC, "Failed to initialise GPC\n"); + return -ENOMEM; + } + + CDS_INIT_LIST_HEAD(gpc_feature_list); + } + + rv = gpc_feature_parse(config_msg); + + gpcconfig__free_unpacked(config_msg, NULL); + return rv; +} + +PB_REGISTER_CMD(gpc_config_cmd) = { + .cmd = "vyatta:gpc-config", + .handler = gpc_config, +}; diff --git a/src/gpc/gpc_op_mode.c b/src/gpc/gpc_op_mode.c new file mode 100644 index 00000000..fa40cf39 --- /dev/null +++ b/src/gpc/gpc_op_mode.c @@ -0,0 +1,534 @@ +/*- + * Copyright (c) 2020-2021, AT&T Intellectual Property. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * Generalised Packet Classification (GPF) op-mode command handling + */ + +#include +#include +#include +#include +#include + +#include "commands.h" +#include "include/fal_plugin.h" +#include "fal.h" +#include "gpc_pb.h" +#include "gpc_util.h" +#include "json_writer.h" +#include "npf/config/gpc_db_query.h" +#include "npf/config/gpc_hw.h" +#include "urcu.h" +#include "util.h" + +/* + * Some maximum string lengths + */ +#define PREFIX_STRLEN (INET6_ADDRSTRLEN + sizeof("/128")) +#define TABLE_ID_STRLEN (IFNAMSIZ + sizeof("/ingress/ipv4")) + +/* + * For the policer we are only interested in red/dropped packets. + */ +static enum fal_policer_stat_type policer_cntr_id[] = { + FAL_POLICER_STAT_RED_PACKETS +}; + +/* + * Structure definitions + */ +struct gpc_show_context { + json_writer_t *wr; +}; + +static int +gpc_ip_prefix_str(struct ip_prefix *ip_prefix, char *outstr, size_t outstr_len) +{ + char buf[INET6_ADDRSTRLEN]; + + if (inet_ntop(ip_prefix->addr.type, &ip_prefix->addr.address, buf, + sizeof(buf)) == NULL) { + RTE_LOG(ERR, GPC, "inet_ntop error: %d, af: %u\n", + errno, ip_prefix->addr.type); + } else { + snprintf(outstr, outstr_len, "%s/%d", &buf[0], + ip_prefix->prefix_length); + return 0; + } + return 1; +} + +static gpc_pb_rule_match_walker_cb gpc_op_show_match; +static bool +gpc_op_show_match(struct gpc_pb_match *match, struct gpc_walk_context *walk_ctx) +{ + struct gpc_show_context *show_ctx = walk_ctx->data; + json_writer_t *wr = show_ctx->wr; + uint16_t value; + char prefix_str[PREFIX_STRLEN]; + + jsonw_start_object(wr); + switch (match->match_type) { + case GPC_RULE_MATCH_VALUE_NOT_SET: + break; + case GPC_RULE_MATCH_VALUE_SRC_IP: + if (!gpc_ip_prefix_str(&match->match_value.src_ip, + prefix_str, sizeof(prefix_str))) { + jsonw_string_field(wr, "match", "src-ip"); + jsonw_string_field(wr, "value", prefix_str); + } + break; + case GPC_RULE_MATCH_VALUE_DEST_IP: + if (!gpc_ip_prefix_str(&match->match_value.dest_ip, + prefix_str, sizeof(prefix_str))) { + jsonw_string_field(wr, "match", "dest-ip"); + jsonw_string_field(wr, "value", prefix_str); + } + break; + case GPC_RULE_MATCH_VALUE_SRC_PORT: + jsonw_string_field(wr, "match", "src-port"); + jsonw_uint_field(wr, "value", match->match_value.src_port); + break; + case GPC_RULE_MATCH_VALUE_DEST_PORT: + jsonw_string_field(wr, "match", "dest-port"); + jsonw_uint_field(wr, "value", match->match_value.dest_port); + break; + case GPC_RULE_MATCH_VALUE_FRAGMENT: + jsonw_string_field(wr, "match", "fragment"); + jsonw_uint_field(wr, "value", match->match_value.fragment); + break; + case GPC_RULE_MATCH_VALUE_DSCP: + jsonw_string_field(wr, "match", "dscp"); + jsonw_uint_field(wr, "value", match->match_value.dscp); + break; + case GPC_RULE_MATCH_VALUE_TTL: + jsonw_string_field(wr, "match", "ttl"); + jsonw_uint_field(wr, "value", match->match_value.ttl); + break; + case GPC_RULE_MATCH_VALUE_ICMPV4: + jsonw_string_field(wr, "match", "icmpv4"); + value = match->match_value.icmpv4.typenum << 8; + value |= match->match_value.icmpv4.code; + jsonw_uint_field(wr, "value", value); + break; + case GPC_RULE_MATCH_VALUE_ICMPV6: + jsonw_string_field(wr, "match", "icmpv6"); + value = match->match_value.icmpv6.typenum << 8; + value |= match->match_value.icmpv6.code; + jsonw_uint_field(wr, "value", value); + break; + case GPC_RULE_MATCH_VALUE_ICMPV6_CLASS: + jsonw_string_field(wr, "match", "icmpv6-class"); + jsonw_uint_field(wr, "value", + match->match_value.icmpv6_class); + break; + case GPC_RULE_MATCH_VALUE_PROTO_BASE: + jsonw_string_field(wr, "match", "base-protocol"); + jsonw_uint_field(wr, "value", + match->match_value.proto_base); + break; + case GPC_RULE_MATCH_VALUE_PROTO_FINAL: + jsonw_string_field(wr, "match", "final-protocol"); + jsonw_uint_field(wr, "value", + match->match_value.proto_final); + break; + default: + RTE_LOG(ERR, GPC, "Unknown GPC Match match-type %u\n", + match->match_type); + break; + } + jsonw_end_object(wr); + return true; +} + +static gpc_pb_rule_action_walker_cb gpc_op_show_action; +static bool +gpc_op_show_action(struct gpc_pb_action *action, + struct gpc_walk_context *walk_ctx) +{ + struct gpc_show_context *show_ctx = walk_ctx->data; + json_writer_t *wr = show_ctx->wr; + struct gpc_pb_policer *policer; + + switch (action->action_type) { + case GPC_RULE_ACTION_VALUE_NOT_SET: + break; + case GPC_RULE_ACTION_VALUE_DECISION: + jsonw_string_field(wr, "decision", + gpc_get_pkt_decision_str(action->action_value.decision)); + break; + case GPC_RULE_ACTION_VALUE_DESIGNATION: + jsonw_uint_field(wr, "designation", + action->action_value.designation); + break; + case GPC_RULE_ACTION_VALUE_COLOUR: + jsonw_string_field(wr, "colour", + gpc_get_pkt_colour_str(action->action_value.colour)); + break; + case GPC_RULE_ACTION_VALUE_POLICER: + policer = &action->action_value.policer; + jsonw_name(wr, "police"); + jsonw_start_object(wr); + if (policer->flags & POLICER_HAS_BW) + jsonw_uint_field(wr, "bandwidth", policer->bw); + if (policer->flags & POLICER_HAS_BURST) + jsonw_uint_field(wr, "burst", policer->burst); + if (policer->flags & POLICER_HAS_EXCESS_BW) + jsonw_uint_field(wr, "excess-bandwidth", + policer->excess_bw); + if (policer->flags & POLICER_HAS_EXCESS_BURST) + jsonw_uint_field(wr, "excess-burst", + policer->excess_burst); + if (policer->flags & POLICER_HAS_AWARENESS) { + uint32_t val = policer->awareness; + const char *aware = gpc_get_policer_awareness_str(val); + jsonw_string_field(wr, "awareness", aware); + } + if (policer->objid != FAL_NULL_OBJECT_ID) { + uint64_t drops; + int rv; + + rv = fal_policer_get_stats_ext(policer->objid, 1, + policer_cntr_id, + FAL_STATS_MODE_READ, + &drops); + if (rv != 0) { + RTE_LOG(ERR, DATAPLANE, + "Failed to get GPC policer stats: %s\n", + strerror(-rv)); + drops = 0; + } else { + drops = drops - policer->reset_drops; + } + jsonw_uint_field(wr, "drops", drops); + } + jsonw_end_object(wr); + break; + default: + RTE_LOG(ERR, GPC, "Unknown GPC Action action-type %u\n", + action->action_type); + break; + } + return true; +} + +static gpc_pb_table_rule_walker_cb gpc_op_show_rule; +static bool +gpc_op_show_rule(struct gpc_pb_rule *rule, struct gpc_walk_context *walk_ctx) +{ + struct gpc_show_context *show_ctx = walk_ctx->data; + json_writer_t *wr = show_ctx->wr; + uint64_t bytes = 0; + uint64_t packets = 0; + + /* + * Rules with a number of zero are not being used + */ + if (rule->number == 0) + return true; + + jsonw_start_object(wr); + jsonw_uint_field(wr, "rule-number", rule->number); + + jsonw_name(wr, "matches"); + jsonw_start_array(wr); + gpc_pb_rule_match_walk(rule, gpc_op_show_match, walk_ctx); + jsonw_end_array(wr); + + gpc_pb_rule_action_walk(rule, gpc_op_show_action, walk_ctx); + + if (rule->counter.counter_type != GPC_COUNTER_TYPE_UNKNOWN || + rule->counter.name) { + jsonw_name(wr, "counter"); + jsonw_start_object(wr); + if (rule->counter.counter_type != GPC_COUNTER_TYPE_UNKNOWN) + jsonw_uint_field(wr, "counter-type", + rule->counter.counter_type); + if (rule->counter.name) + jsonw_string_field(wr, "counter-name", + rule->counter.name); + + struct gpc_cntr *cntr = gpc_rule_get_cntr(rule->gpc_rule); + + if (cntr && gpc_hw_counter_read(cntr, &packets, &bytes)) { + packets = packets - rule->counter.reset_packets; + bytes = bytes - rule->counter.reset_bytes; + } + jsonw_uint_field(wr, "packets", packets); + jsonw_uint_field(wr, "bytes", bytes); + jsonw_end_object(wr); + } + jsonw_uint_field(wr, "table-index", rule->table_index); + jsonw_uint_field(wr, "orig-number", rule->orig_number); + if (rule->result) + jsonw_string_field(wr, "result", rule->result); + jsonw_end_object(wr); + return true; +} + +static gpc_pb_feature_table_walker_cb gpc_op_show_table; +static bool +gpc_op_show_table(struct gpc_pb_table *table, + struct gpc_walk_context *walk_ctx) +{ + struct gpc_show_context *show_ctx = walk_ctx->data; + json_writer_t *wr = show_ctx->wr; + char table_id_str[TABLE_ID_STRLEN]; + uint32_t i; + + jsonw_start_object(wr); + snprintf(table_id_str, TABLE_ID_STRLEN, "%s/%s/%s", table->ifname, + gpc_get_table_location_str(table->location), + gpc_get_traffic_type_str(table->traffic_type)); + jsonw_string_field(wr, "table-id", table_id_str); + + jsonw_name(wr, "rules"); + jsonw_start_array(wr); + gpc_pb_table_rule_walk(table, gpc_op_show_rule, walk_ctx); + jsonw_end_array(wr); + + jsonw_name(wr, "table-names"); + jsonw_start_array(wr); + for (i = 0; i < table->n_table_names; i++) { + jsonw_start_object(wr); + /* + * i + 1 because table-index starts at one, but we place the + * first table-name in table_names[0] + */ + jsonw_uint_field(wr, "table-index", i + 1); + jsonw_string_field(wr, "name", table->table_names[i]); + jsonw_end_object(wr); + } + jsonw_end_array(wr); + jsonw_end_object(wr); + return true; // keep walking +} + +static gpc_pb_feature_counter_walker_cb gpc_op_show_counter; +static bool +gpc_op_show_counter(struct gpc_pb_counter *counter, + struct gpc_walk_context *walk_ctx) +{ + struct gpc_show_context *show_ctx = walk_ctx->data; + json_writer_t *wr = show_ctx->wr; + + jsonw_start_object(wr); + jsonw_string_field(wr, "name", counter->name); + jsonw_string_field(wr, "format", + gpc_get_cntr_format_str(counter->format)); + jsonw_end_object(wr); + return true; // keep walking +} + +static gpc_pb_feature_walker_cb gpc_op_show_feature; +static bool +gpc_op_show_feature(struct gpc_pb_feature *feature, + struct gpc_walk_context *walk_ctx) +{ + struct gpc_show_context *show_ctx = walk_ctx->data; + json_writer_t *wr = show_ctx->wr; + + jsonw_start_object(wr); + jsonw_string_field(wr, "type", gpc_get_feature_type_str(feature->type)); + jsonw_name(wr, "tables"); + jsonw_start_array(wr); + gpc_pb_feature_table_walk(feature, gpc_op_show_table, walk_ctx); + jsonw_end_array(wr); + jsonw_name(wr, "counters"); + jsonw_start_array(wr); + gpc_pb_feature_counter_walk(feature, gpc_op_show_counter, walk_ctx); + jsonw_end_array(wr); + jsonw_end_object(wr); + return true; // keep walking +} + +/* + * Handle: "gpc show [ [ [ []]]]" + * Output in Yang compatible JSON. + */ +static int +gpc_show(FILE *f, int argc, char **argv) +{ + struct gpc_show_context show_ctx; + struct gpc_walk_context walk_ctx; + + --argc, ++argv; /* skip "show" */ + show_ctx.wr = jsonw_new(f); + if (!show_ctx.wr) + return -ENOMEM; + + walk_ctx.data = &show_ctx; + walk_ctx.feature_type = 0; + walk_ctx.ifname = NULL; + walk_ctx.location = 0; + walk_ctx.traffic_type = 0; + + if (argc > 0) + walk_ctx.feature_type = gpc_feature_str_to_type(argv[0]); + + if (argc > 1) + walk_ctx.ifname = argv[1]; + + if (argc > 2) + walk_ctx.location = gpc_table_location_str_to_value(argv[2]); + + if (argc > 3) + walk_ctx.traffic_type = gpc_traffic_type_str_to_value(argv[3]); + + jsonw_pretty(show_ctx.wr, true); + + jsonw_name(show_ctx.wr, "gpc"); + jsonw_start_object(show_ctx.wr); + jsonw_name(show_ctx.wr, "features"); + jsonw_start_array(show_ctx.wr); + gpc_pb_feature_walk(gpc_op_show_feature, &walk_ctx); + jsonw_end_array(show_ctx.wr); + jsonw_end_object(show_ctx.wr); + jsonw_destroy(&show_ctx.wr); + return 0; +} + +/* + * The clear functions - to reset the visible counters to zero. + */ + +static gpc_pb_rule_action_walker_cb gpc_op_clear_action; +static bool +gpc_op_clear_action(struct gpc_pb_action *action, + struct gpc_walk_context *walk_ctx __unused) +{ + struct gpc_pb_policer *policer; + uint64_t drops; + int rv; + + /* + * The only action we need to clear is the policer + */ + if (action->action_type == GPC_RULE_ACTION_VALUE_POLICER) { + policer = &action->action_value.policer; + if (policer->objid != FAL_NULL_OBJECT_ID) { + rv = fal_policer_get_stats_ext(policer->objid, 1, + policer_cntr_id, + FAL_STATS_MODE_READ, + &drops); + if (rv != 0) + RTE_LOG(ERR, DATAPLANE, + "Could not retrieve GPC policer stats: %s\n", + strerror(-rv)); + else + policer->reset_drops = drops; + } + } + return true; +} + +static gpc_pb_table_rule_walker_cb gpc_op_clear_rule; +static bool +gpc_op_clear_rule(struct gpc_pb_rule *rule, struct gpc_walk_context *walk_ctx) +{ + uint64_t bytes; + uint64_t packets; + + /* + * Rules with a number of zero are not being used + */ + if (rule->number == 0) + return true; + + gpc_pb_rule_action_walk(rule, gpc_op_clear_action, walk_ctx); + + if (rule->counter.counter_type != GPC_COUNTER_TYPE_UNKNOWN || + rule->counter.name) { + struct gpc_cntr *cntr = gpc_rule_get_cntr(rule->gpc_rule); + + if (cntr && gpc_hw_counter_read(cntr, &packets, &bytes)) { + rule->counter.reset_packets = packets; + rule->counter.reset_bytes = bytes; + } + } + return true; +} + +static gpc_pb_feature_table_walker_cb gpc_op_clear_table; +static bool +gpc_op_clear_table(struct gpc_pb_table *table, + struct gpc_walk_context *walk_ctx) +{ + gpc_pb_table_rule_walk(table, gpc_op_clear_rule, walk_ctx); + return true; // keep walking +} + +static gpc_pb_feature_counter_walker_cb gpc_op_clear_counter; +static bool +gpc_op_clear_counter(struct gpc_pb_counter *counter __unused, + struct gpc_walk_context *walk_ctx __unused) +{ + /* + * We don't support named counters yet + */ + return true; // keep walking +} + +static gpc_pb_feature_walker_cb gpc_op_clear_feature; +static bool +gpc_op_clear_feature(struct gpc_pb_feature *feature, + struct gpc_walk_context *walk_ctx) +{ + gpc_pb_feature_table_walk(feature, gpc_op_clear_table, walk_ctx); + gpc_pb_feature_counter_walk(feature, gpc_op_clear_counter, walk_ctx); + return true; // keep walking +} + +/* + * Handle: "gpc clear [ [ [ []]]]" + * Output in Yang compatible JSON. + */ +static int +gpc_clear(FILE * f __unused, int argc, char **argv) +{ + struct gpc_walk_context walk_ctx; + + --argc, ++argv; /* skip "clear" */ + + walk_ctx.data = NULL; + walk_ctx.feature_type = 0; + walk_ctx.ifname = NULL; + walk_ctx.location = 0; + walk_ctx.traffic_type = 0; + + if (argc > 0) + walk_ctx.feature_type = gpc_feature_str_to_type(argv[0]); + + if (argc > 1) + walk_ctx.ifname = argv[1]; + + if (argc > 2) + walk_ctx.location = gpc_table_location_str_to_value(argv[2]); + + if (argc > 3) + walk_ctx.traffic_type = gpc_traffic_type_str_to_value(argv[3]); + + gpc_pb_feature_walk(gpc_op_clear_feature, &walk_ctx); + return 0; +} + +int +cmd_gpc_op(FILE *f, int argc, char **argv) +{ + --argc, ++argv; /* skip "gpc" */ + if (argc < 1) { + fprintf(f, "usage: missing qos command\n"); + return -1; + } + + /* Check for op-mode commands first */ + if (strcmp(argv[0], "show") == 0) + return gpc_show(f, argc, argv); + if (strcmp(argv[0], "clear") == 0) + return gpc_clear(f, argc, argv); + + return 0; +} diff --git a/src/gpc/gpc_pb.h b/src/gpc/gpc_pb.h new file mode 100644 index 00000000..325a836b --- /dev/null +++ b/src/gpc/gpc_pb.h @@ -0,0 +1,358 @@ +/*- + * Copyright (c) 2020-2021, AT&T Intellectual Property. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * Generalised Packet Classification (GPF) configuration handling + */ + +#ifndef GPC_PB_H +#define GPC_PB_H + +#include +#include "fal_plugin.h" +#include "ip.h" +#include "npf/config/pmf_rule.h" +#include "urcu.h" +#include "util.h" + +/* + * Enum definitions + */ +enum policer_flags_type { + POLICER_HAS_BW = (1 << 0), + POLICER_HAS_BURST = (1 << 1), + POLICER_HAS_EXCESS_BW = (1 << 2), + POLICER_HAS_EXCESS_BURST = (1 << 3), + POLICER_HAS_AWARENESS = (1 << 4), +}; + +enum gpc_rule_match_value_type { + GPC_RULE_MATCH_VALUE_NOT_SET, + GPC_RULE_MATCH_VALUE_SRC_IP, + GPC_RULE_MATCH_VALUE_DEST_IP, + GPC_RULE_MATCH_VALUE_SRC_PORT, + GPC_RULE_MATCH_VALUE_DEST_PORT, + GPC_RULE_MATCH_VALUE_FRAGMENT, + GPC_RULE_MATCH_VALUE_DSCP, + GPC_RULE_MATCH_VALUE_TTL, + GPC_RULE_MATCH_VALUE_ICMPV4, + GPC_RULE_MATCH_VALUE_ICMPV6, + GPC_RULE_MATCH_VALUE_ICMPV6_CLASS, + GPC_RULE_MATCH_VALUE_PROTO_BASE, + GPC_RULE_MATCH_VALUE_PROTO_FINAL, +}; + +enum gpc_rule_action_value_type { + GPC_RULE_ACTION_VALUE_NOT_SET, + GPC_RULE_ACTION_VALUE_DECISION, + GPC_RULE_ACTION_VALUE_DESIGNATION, + GPC_RULE_ACTION_VALUE_COLOUR, + GPC_RULE_ACTION_VALUE_POLICER, +}; + +enum gpc_counter_type { + GPC_COUNTER_TYPE_UNKNOWN, + GPC_COUNTER_TYPE_DISABLED, + GPC_COUNTER_TYPE_AUTO, + GPC_COUNTER_TYPE_NAMED, +}; + +enum gpc_config_action { + CREATE, + MODIFY, + DELETE +}; + +/* + * Constants + */ +#define GPC_MAX_DESIGNATION 7 + +/* + * Structure definitions + */ +struct ip_prefix { + uint32_t prefix_length; + struct ip_addr addr; +}; + +struct gpc_pb_policer { + uint64_t bw; + uint64_t burst; + uint64_t excess_bw; + uint64_t excess_burst; + uint64_t reset_drops; + fal_object_t objid; + uint32_t flags; + uint8_t awareness; +}; + +/* + * Each rule can have a single action. + */ +struct gpc_pb_action { + struct cds_list_head action_list; + struct rcu_head action_rcu; + enum gpc_rule_action_value_type action_type; + union gpc_pb_action_value_t { + uint8_t decision; + uint8_t designation; + uint8_t colour; + struct gpc_pb_policer policer; + } action_value; +}; + +struct icmp_type_code { + uint32_t typenum; + uint32_t code; + bool has_typenum; + bool has_code; +}; + +/* + * Each rule can have multiple matches, but only one of each type, so a + * maximum of 12. + */ +struct gpc_pb_match { + struct cds_list_head match_list; + struct rcu_head match_rcu; + enum gpc_rule_match_value_type match_type; + union gpc_pb_match_value_t { + struct ip_prefix src_ip; + struct ip_prefix dest_ip; + uint32_t src_port; + uint32_t dest_port; + uint8_t fragment; + uint32_t dscp; + uint32_t ttl; + struct icmp_type_code icmpv4; + struct icmp_type_code icmpv6; + uint8_t icmpv6_class; + uint32_t proto_base; + uint32_t proto_final; + } match_value; +}; + +struct gpc_pb_counter { + struct cds_list_head counter_list; + struct rcu_head counter_rcu; + /* format - packet-only/packets-and-bytes */ + uint32_t format; + char *name; +}; + +struct gpc_pb_rule_counter { + uint64_t reset_packets; + uint64_t reset_bytes; + uint32_t counter_type; + char *name; +}; + +/* + * Each user-visible table can have up to 9999 rules. However the VCI code can + * collapse multiple tables from the same feature togther, renumbering the rules + * so that they are guaranteed to arrive in numercial order. + */ +struct gpc_pb_rule { + /* The following field uniquely identifies the rule */ + uint32_t number; + /* Other operational fields */ + struct cds_list_head match_list; + struct cds_list_head action_list; + struct gpc_pb_rule_counter counter; + struct gpc_rule *gpc_rule; + struct pmf_rule *pmf_rule; + /* + * The VCI code can collapse multiple tables into a single table. + * The following fields tell us what table this rule originally + * came from, what its original rule-number was, and the name of the + * result associated with it. + */ + /* Debug fields */ + uint32_t table_index; + uint32_t orig_number; + char *result; +}; + +/* + * Each feature can have up to three tables per interface. + * An ingress table, an egress table and a punt-path table. + * Some features, e.g. QoS, will only have a single table per interface. + */ +struct gpc_pb_table { + struct cds_list_head table_list; + struct rcu_head table_rcu; + /* The following two fields uniquely identify this table */ + char *ifname; + /* location - ingress/egress/punt-path */ + uint32_t location; + /* Other operational fields */ + uint32_t n_rules; + /* + * The protobuf tells us how many rules are in the rules_table so + * we can allocate memory for all the rules in a single chunk. + */ + struct gpc_pb_rule *rules_table; + /* traffic-type - ipv4/ipv6 */ + uint32_t traffic_type; + /* + * The VCI code can collapse multiple tables into a single table. + * The following fields allows us to identify the user-visible table's + * name using the table_index field from the gpc_pb_rule struct. + */ + uint32_t n_table_names; + /* Internal operational fields */ + struct gpc_rlset *gpc_rlset; + struct gpc_group *gpc_group; + /* The following array is variable length */ + char *table_names[0]; +}; + +/* + * Currently only two features are supported, QoS and ACL. In the future + * CPP may also be added. + */ +struct gpc_pb_feature { + struct cds_list_head feature_list; + struct rcu_head feature_rcu; + /* Feature type uniquely identifies each feature */ + uint32_t type; + /* Other operational fields */ + struct cds_list_head table_list; + struct cds_list_head counter_list; +}; + +struct gpc_walk_context { + uint32_t feature_type; + char *ifname; + uint32_t location; + uint32_t traffic_type; + void *data; +}; + +/** + * Type for function passed in as a parameter in calls to + * gpc_pb_rule_match_walk() + */ +typedef bool (gpc_pb_rule_match_walker_cb)(struct gpc_pb_match *match, + struct gpc_walk_context *context); + +/** + * Walk over the matches of a GPC rule, calling a function for each match. + * + * @param rule A pointer to the GPC protobuf rule to walk. + * @param walker_cb This function is called back. + * The function should return "true" to continue to the next entry, + * or "false" to end the walk of entries. + * @param context This is passed into the walker_cb() function. + */ +void +gpc_pb_rule_match_walk(struct gpc_pb_rule *rule, + gpc_pb_rule_match_walker_cb walker_cb, + struct gpc_walk_context *context); + +/** + * Type for function passed in as a parameter in calls to + * gpc_pb_rule_action_walk() + */ +typedef bool (gpc_pb_rule_action_walker_cb)(struct gpc_pb_action *action, + struct gpc_walk_context *context); + +/** + * Walk over the actions of a GPC rule, calling a function for each action. + * + * @param rule A pointer to the GPC protobuf rule to walk. + * @param walker_cb This function is called back. + * The function should return "true" to continue to the next entry, + * or "false" to end the walk of entries. + * @param context This is passed into the walker_cb() function. + */ +void +gpc_pb_rule_action_walk(struct gpc_pb_rule *rule, + gpc_pb_rule_action_walker_cb walker_cb, + struct gpc_walk_context *context); + +/** + * Type for function passed in as a parameter in calls to + * gpc_pb_table_rule_walk() + */ +typedef bool (gpc_pb_table_rule_walker_cb)(struct gpc_pb_rule *rule, + struct gpc_walk_context *context); + +/** + * Walk over the rules of a GPC table, calling a function for each rule. + * + * @param table A pointer to the GPC table to walk. + * @param walker_cb This function is called back. + * The function should return "true" to continue to the next entry, + * or "false" to end the walk of entries. + * @param context This is passed into the walker_cb() function. + */ +void +gpc_pb_table_rule_walk(struct gpc_pb_table *table, + gpc_pb_table_rule_walker_cb walker_cb, + struct gpc_walk_context *context); + +/** + * Type for function passed in as a parameter in calls to + * gpc_pb_feature_table_walk() + */ +typedef bool (gpc_pb_feature_table_walker_cb)(struct gpc_pb_table *table, + struct gpc_walk_context *context); + +/** + * Walk over the tables of a GPC feature, calling a function for each table. + * + * @param feature A pointer to the GPC feature to walk. + * @param walker_cb This function is called back. + * The function should return "true" to continue to the next entry, + * or "false" to end the walk of entries. + * @param context This is passed into the walker_cb() function. + */ +void gpc_pb_feature_table_walk(struct gpc_pb_feature *feature, + gpc_pb_feature_table_walker_cb walker_cb, + struct gpc_walk_context *context); + +/** + * Type for function passed in as a parameter in calls to + * gpc_pb_feature_counter_walk() + */ +typedef bool (gpc_pb_feature_counter_walker_cb)(struct gpc_pb_counter *counter, + struct gpc_walk_context *context); + +/** + * Walk over the counters of a GPC feature, calling a function for each table. + * + * @param feature A pointer to the GPC feature to walk. + * @param walker_cb This function is called back. + * The function should return "true" to continue to the next entry, + * or "false" to end the walk of entries. + * @param context This is passed into the walker_cb() function. + */ +void gpc_pb_feature_counter_walk(struct gpc_pb_feature *feature, + gpc_pb_feature_counter_walker_cb walker_cb, + struct gpc_walk_context *context); + + +/** + * Type for function passed in as a parameter in calls to + * gpc_pb_feature_walk() + */ +typedef bool (gpc_pb_feature_walker_cb)(struct gpc_pb_feature *feature, + struct gpc_walk_context *context); + +/** + * Walk over the features of the GPC config, calling a function for each + * feature. + * + * @param walker_cb This function is called back. + * The function should return "true" to continue to the next entry, + * or "false" to end the walk of entries. + * @param context This is passed into the walker_cb() function. + */ +void gpc_pb_feature_walk(gpc_pb_feature_walker_cb walker_cb, + struct gpc_walk_context *context); + +#endif /* GPC_PB_H */ diff --git a/src/gpc/gpc_pb_config.c b/src/gpc/gpc_pb_config.c new file mode 100644 index 00000000..0b700139 --- /dev/null +++ b/src/gpc/gpc_pb_config.c @@ -0,0 +1,1899 @@ +/*- + * Copyright (c) 2020-2021, AT&T Intellectual Property. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * Generalised Packet Classification (GPF) configuration handling + */ + +#include +#include +#include +#include +#include +#include +#include +#include "dp_event.h" +#include "fal.h" +#include "fal_plugin.h" +#include "gpc_pb.h" +#include "gpc_util.h" +#include "interface.h" +#include "ip.h" +#include "npf/config/gpc_cntr_control.h" +#include "npf/config/gpc_cntr_query.h" +#include "npf/config/gpc_db_control.h" +#include "npf/config/gpc_db_query.h" +#include "npf/config/gpc_hw.h" +#include "npf/config/pmf_rule.h" +#include "npf/config/gpc_hw.h" +#include "protobuf.h" +#include "protobuf/GPCConfig.pb-c.h" +#include "protobuf/IPAddress.pb-c.h" +#include "urcu.h" +#include "util.h" + +/* + * Local storage + */ +static struct cds_list_head *gpc_feature_list; + +/* + * Local structure definitions + */ +struct gpc_event_context { + enum if_feat_mode_event event; + bool commit_required; +}; + +/* + * Protobuf parsing functions + */ + +static int +gpc_pb_ip_prefix_parse(IPPrefix *msg, struct ip_prefix *cfg_pfx) +{ + int rv = 0; + + /* + * Mandatory field checking. + */ + if (!msg->has_length || !msg->address) { + RTE_LOG(ERR, GPC, + "IPPrefix protobuf missing mandatory field\n"); + return -EPERM; + } + cfg_pfx->prefix_length = msg->length; + + switch (msg->address->address_oneof_case) { + case IPADDRESS__ADDRESS_ONEOF_IPV4_ADDR: + cfg_pfx->addr.type = AF_INET; + cfg_pfx->addr.address.ip_v4.s_addr = + htonl(msg->address->ipv4_addr); + break; + case IPADDRESS__ADDRESS_ONEOF_IPV6_ADDR: + cfg_pfx->addr.type = AF_INET6; + memcpy(&cfg_pfx->addr.address.ip_v6, + &msg->address->ipv6_addr.data[0], + msg->address->ipv6_addr.len); + break; + case IPADDRESS__ADDRESS_ONEOF__NOT_SET: + /* fallthrough */ + default: + RTE_LOG(ERR, GPC, "Unknown IPAddress case value %u\n", + msg->address->address_oneof_case); + rv = -EINVAL; + break; + } + return rv; +} + +static int +gpc_pb_icmp_parse(struct _RuleMatch__ICMPTypeAndCode *msg, + struct icmp_type_code *icmp) +{ + /* + * Mandatory field checking. + */ + if (!msg->has_typenum && !msg->has_code) { + RTE_LOG(ERR, GPC, + "ICMPTypeCode protobuf missing mandatory field\n"); + return -1; + } + + icmp->has_typenum = msg->has_typenum; + if (msg->has_typenum) + icmp->typenum = msg->typenum; + + icmp->has_code = msg->has_code; + if (msg->has_code) + icmp->code = msg->code; + + return 0; +} + +/* + * A policer has three mandatory attributes and up to five optional attributes. + */ +#define MAX_POLICER_ATTR_SIZE 8 + +static int +gpc_pb_policer_create(struct gpc_pb_policer *policer) +{ + uint32_t attr_count; + int rv; + + /* Create the policer. */ + struct fal_attribute_t attr_list[MAX_POLICER_ATTR_SIZE] = { + { .id = FAL_POLICER_ATTR_METER_TYPE, + .value.u32 = FAL_POLICER_METER_TYPE_BYTES }, + { .id = FAL_POLICER_ATTR_MODE, + .value.u32 = FAL_POLICER_MODE_INGRESS }, + { .id = FAL_POLICER_ATTR_RED_PACKET_ACTION, + .value.u32 = FAL_PACKET_ACTION_DROP}, + }; + + attr_count = 3; + + /* + * The protobuf bandwidths are in bytes/sec, which agrees with the + * FAL's bandwidth units. + */ + if (policer->flags & POLICER_HAS_BW) { + attr_list[attr_count].id = FAL_POLICER_ATTR_CIR; + attr_list[attr_count].value.u64 = policer->bw; + attr_count++; + } + if (policer->flags & POLICER_HAS_EXCESS_BW) { + attr_list[attr_count].id = FAL_POLICER_ATTR_EIR; + attr_list[attr_count].value.u64 = policer->excess_bw; + attr_count++; + } + if (policer->flags & POLICER_HAS_BURST) { + attr_list[attr_count].id = FAL_POLICER_ATTR_CBS; + attr_list[attr_count].value.u64 = policer->burst; + attr_count++; + } + if (policer->flags & POLICER_HAS_EXCESS_BURST) { + attr_list[attr_count].id = FAL_POLICER_ATTR_EBS; + attr_list[attr_count].value.u64 = policer->excess_burst; + attr_count++; + } + if (policer->flags & POLICER_HAS_AWARENESS && + policer->awareness != POLICER_AWARENESS__AWARENESS_UNKNOWN) { + attr_list[attr_count].id = FAL_POLICER_ATTR_COLOUR_SOURCE; + /* + * The protobuf definitions of colour-aware and colour-unaware + * don't match the FAL definitions. + */ + if (policer->awareness == POLICER_AWARENESS__COLOUR_UNAWARE) + attr_list[attr_count].value.u64 = + FAL_POLICER_COLOUR_SOURCE_UNAWARE; + else + attr_list[attr_count].value.u64 = + FAL_POLICER_COLOUR_SOURCE_AWARE; + attr_count++; + } + + /* Create policer from attribute list. */ + rv = fal_policer_create(attr_count, attr_list, &policer->objid); + if (rv) { + RTE_LOG(ERR, GPC, + "Failed to create FAL policer %d\n", rv); + return rv; + } + DP_DEBUG(GPC, DEBUG, GPC, "Created FAL policer 0x%" PRIXPTR "\n", + policer->objid); + return rv; +} + +static void +gpc_pb_policer_delete(struct gpc_pb_policer *policer) +{ + int rv; + + if (policer->objid != FAL_NULL_OBJECT_ID) { + rv = fal_policer_delete(policer->objid); + if (rv) { + RTE_LOG(ERR, GPC, "Failed to delete FAL policer 0x%" + PRIXPTR " rv %d\n", policer->objid, rv); + } + policer->objid = FAL_NULL_OBJECT_ID; + } +} + +static int +gpc_pb_policer_parse(struct _PolicerParams *msg, struct gpc_pb_action *action) +{ + struct gpc_pb_policer *policer = &action->action_value.policer; + + /* + * Mandatory field checking. + */ + if (!msg->has_bw) { + RTE_LOG(ERR, GPC, + "PolicerParams protobuf missing mandatory field\n"); + return -EPERM; + } + + policer->objid = FAL_NULL_OBJECT_ID; + policer->bw = msg->bw; + policer->flags = POLICER_HAS_BW; + if (msg->has_burst) { + policer->flags |= POLICER_HAS_BURST; + policer->burst = msg->burst; + } + if (msg->has_excess_bw) { + policer->flags |= POLICER_HAS_EXCESS_BW; + policer->excess_bw = msg->excess_bw; + } + if (msg->has_excess_burst) { + policer->flags |= POLICER_HAS_EXCESS_BURST; + policer->excess_burst = msg->excess_burst; + } + if (msg->has_awareness) { + policer->flags |= POLICER_HAS_AWARENESS; + policer->awareness = msg->awareness; + } + return gpc_pb_policer_create(policer); +} + +/* + * GPC match functions + */ +static void +gpc_pb_match_free(struct rcu_head *head) +{ + struct gpc_pb_match *match; + + match = caa_container_of(head, struct gpc_pb_match, match_rcu); + free(match); +} + +static void +gpc_pb_match_delete(struct gpc_pb_match *match) +{ + assert(match); + + cds_list_del(&match->match_list); + DP_DEBUG(GPC, DEBUG, GPC, "Freeing GPC match %p\n", match); + call_rcu(&match->match_rcu, gpc_pb_match_free); +} + +/* + * The following tables were copied from pmf_parse.c + * We probably need a new pmf_parse function to return the summary bit based + * upon level and field index. We don't use the l2_summary table in GPC. + */ + +/* Summary bits for the rule */ +static uint32_t l3_summary[PMF_L3F__LEN] = { + [PMF_L3F_SRC] = PMF_RMS_L3_SRC, + [PMF_L3F_DST] = PMF_RMS_L3_DST, + [PMF_L3F_PROTOF] = PMF_RMS_L3_PROTO_FINAL, + [PMF_L3F_PROTOB] = PMF_RMS_L3_PROTO_BASE, + [PMF_L3F_DSCP] = PMF_RMS_L3_DSCP, + [PMF_L3F_TTL] = PMF_RMS_L3_TTL, + [PMF_L3F_FRAG] = PMF_RMS_L3_FRAG, + [PMF_L3F_RH] = PMF_RMS_L3_RH, +}; +static uint32_t l4_summary[PMF_L4F__LEN] = { + [PMF_L4F_SRC] = PMF_RMS_L4_SRC, + [PMF_L4F_DST] = PMF_RMS_L4_DST, + [PMF_L4F_TCP_FLAGS] = PMF_RMS_L4_TCPFL, + [PMF_L4F_ICMP_VALS] = PMF_RMS_L4_ICMP_TYPE, +}; + +static int +gpc_match_ip(uint32_t pt_field, struct pmf_rule *pmf_rule, + IPPrefix *proto_pfx, struct ip_prefix *cfg_pfx) +{ + uint32_t summary_bit; + void *pmf_pfx; + int rv; + + /* + * Parse the ip-prefix in the protobuf and save it as config. + */ + rv = gpc_pb_ip_prefix_parse(proto_pfx, cfg_pfx); + if (rv) + return rv; + + /* Avoid duplicate match fields */ + summary_bit = l3_summary[pt_field]; + if (pmf_rule->pp_summary & summary_bit) { + DP_DEBUG(GPC, DEBUG, GPC, "Duplicate match key: %s\n", + (pt_field == PMF_L3F_SRC) ? "src-ip" : "dest-ip"); + return -EEXIST; + } + + if (cfg_pfx->addr.type == AF_INET) + pmf_pfx = pmf_v4_prefix_create(false, + cfg_pfx->prefix_length, + &cfg_pfx->addr.address.ip_v4); + else + pmf_pfx = pmf_v6_prefix_create(false, + cfg_pfx->prefix_length, + &cfg_pfx->addr.address.ip_v6); + + if (!pmf_pfx) { + RTE_LOG(ERR, GPC, "No memory for %s prefix\n", + (pt_field == PMF_L3F_SRC) ? "src-ip" : "dest-ip"); + return -ENOMEM; + } + + if (cfg_pfx->addr.type == AF_INET) + pmf_rule->pp_match.l3[pt_field].pm_l3v4 = pmf_pfx; + else + pmf_rule->pp_match.l3[pt_field].pm_l3v6 = pmf_pfx; + + pmf_rule->pp_summary |= summary_bit; + + return 0; +} + +static int +gpc_match_port(uint32_t pt_field, struct pmf_rule *pmf_rule, + uint32_t proto_port, uint32_t *cfg_port) +{ + uint32_t summary_bit; + + /* Save the protobuf port in the config */ + *cfg_port = proto_port; + + /* Avoid duplicate match fields */ + summary_bit = l4_summary[pt_field]; + if (pmf_rule->pp_summary & summary_bit) { + DP_DEBUG(GPC, DEBUG, GPC, "Duplicate match key: %s\n", + (pt_field == PMF_L4F_SRC) ? "src-port" : "dest-port"); + return -EEXIST; + } + + struct pmf_attr_l4port_range l4ports = { + .pm_tag = PMAT_L4_PORT_RANGE, + .pm_loport = *cfg_port, + .pm_hiport = *cfg_port, + }; + + struct pmf_attr_l4port_range *vp = pmf_leaf_attr_copy(&l4ports); + if (!vp) { + RTE_LOG(ERR, GPC, "No memory for parsed %s\n", + (pt_field == PMF_L4F_SRC) ? "src-port" : "dest-port"); + return -ENOMEM; + } + + pmf_rule->pp_match.l4[pt_field].pm_l4port_range = vp; + pmf_rule->pp_summary |= summary_bit; + + return 0; +} + +static int +gpc_match_fragment(struct pmf_rule *pmf_rule, + RuleMatch__FragValue proto_fragment, + uint8_t *cfg_fragment) +{ + uint32_t summary_bit; + + /* Save the protobuf fragment in the config */ + *cfg_fragment = proto_fragment; + + /* + * Avoid duplicate match fields + */ + summary_bit = l3_summary[PMF_L3F_FRAG]; + if (pmf_rule->pp_summary & summary_bit) { + DP_DEBUG(GPC, DEBUG, GPC, "Duplicate match key: fragment\n"); + return -EEXIST; + } + + /* + * Currently pmf_parse.c:pkp_fragment only handles 'fragment=y'. + * We will evential need support for: 'fragment=any', + * 'fragment=initial' and 'fragment=subsequent', but currently it is + * on or off! + */ + struct pmf_attr_frag ip_frag = { + .pm_tag = PMAT_IP_FRAG + }; + + struct pmf_attr_frag *vp = pmf_leaf_attr_copy(&ip_frag); + if (!vp) { + RTE_LOG(ERR, GPC, "No memory for parsed fragment\n"); + return -ENOMEM; + } + + pmf_rule->pp_match.l3[PMF_L3F_FRAG].pm_l3frag = vp; + pmf_rule->pp_summary |= summary_bit; + + return 0; +} + +static int +gpc_match_dscp(struct pmf_rule *pmf_rule, uint32_t proto_dscp, + uint32_t *cfg_dscp) +{ + uint32_t summary_bit; + + /* Save the protobuf dscp in the config */ + *cfg_dscp = proto_dscp; + + /* Avoid duplicate match fields */ + summary_bit = l3_summary[PMF_L3F_DSCP]; + if (pmf_rule->pp_summary & summary_bit) { + DP_DEBUG(GPC, DEBUG, GPC, + "Duplicate match key: dscp\n"); + return -EEXIST; + } + + struct pmf_attr_dscp ip_dscp = { + .pm_tag = PMAT_IP_DSCP, + .pm_dscp = *cfg_dscp + }; + + struct pmf_attr_dscp *vp = pmf_leaf_attr_copy(&ip_dscp); + if (!vp) { + RTE_LOG(ERR, GPC, "No memory for parsed dscp\n"); + return -ENOMEM; + } + + pmf_rule->pp_match.l3[PMF_L3F_DSCP].pm_l3dscp = vp; + pmf_rule->pp_summary |= summary_bit; + + return 0; +} + +static int +gpc_match_ttl(struct pmf_rule *pmf_rule, uint32_t proto_ttl, uint32_t *cfg_ttl) +{ + uint32_t summary_bit; + + /* Save the protobuf ttl in the config */ + *cfg_ttl = proto_ttl; + + /* Avoid duplicate match fields */ + summary_bit = l3_summary[PMF_L3F_TTL]; + if (pmf_rule->pp_summary & summary_bit) { + DP_DEBUG(GPC, DEBUG, GPC, "Duplicate match key: ttl\n"); + return -EEXIST; + } + + struct pmf_attr_ttl ip_ttl = { + .pm_tag = PMAT_IP_TTL, + .pm_ttl = *cfg_ttl + }; + + struct pmf_attr_ttl *vp = pmf_leaf_attr_copy(&ip_ttl); + if (!vp) { + RTE_LOG(ERR, GPC, "No memory for parsed ttl\n"); + return -ENOMEM; + } + + pmf_rule->pp_match.l3[PMF_L3F_TTL].pm_l3ttl = vp; + pmf_rule->pp_summary |= summary_bit; + + return 0; +} + +static int +gpc_match_icmp(struct pmf_rule *pmf_rule, + RuleMatch__ICMPTypeAndCode *proto_icmp, + struct icmp_type_code *cfg_icmp, bool is_v4) +{ + uint32_t summary_bit; + int rv; + + /* Parse the protobuf icmp and save it in the config. */ + rv = gpc_pb_icmp_parse(proto_icmp, cfg_icmp); + if (rv) + return rv; + + /* Avoid duplicate match fields */ + summary_bit = l4_summary[PMF_L4F_ICMP_VALS]; + if (pmf_rule->pp_summary & summary_bit) { + DP_DEBUG(GPC, DEBUG, GPC, "Duplicate match key: icmp%s\n", + (is_v4) ? "v4" : "v6"); + return -EEXIST; + } + + struct pmf_attr_l4icmp_vals l4icmp = { + .pm_named = false, + }; + + l4icmp.pm_tag = (is_v4) ? PMAT_L4_ICMP_V4_VALS : PMAT_L4_ICMP_V6_VALS; + + if (cfg_icmp->has_code) { + l4icmp.pm_code = cfg_icmp->code; + l4icmp.pm_any_code = false; + } else { + l4icmp.pm_any_code = true; + } + if (cfg_icmp->has_typenum) + l4icmp.pm_type = cfg_icmp->typenum; + + struct pmf_attr_l4icmp_vals *vp = pmf_leaf_attr_copy(&l4icmp); + + if (!vp) { + RTE_LOG(ERR, GPC, "No memory for parsed icmp%s\n", + (is_v4) ? "v4" : "v6"); + return -ENOMEM; + } + + pmf_rule->pp_match.l4[PMF_L4F_ICMP_VALS].pm_l4icmp_vals = vp; + if (!vp->pm_any_code) + pmf_rule->pp_summary |= PMF_RMS_L4_ICMP_CODE; + else + pmf_rule->pp_summary |= PMF_RMS_L4_ICMP_TYPE; + + return 0; +} + +static int +gpc_match_icmpv6_class(struct pmf_rule *pmf_rule, + RuleMatch__ICMPV6Class proto_v6class, + uint8_t *cfg_v6class) +{ + uint32_t summary_bit; + + /* Save the protobuf v6class in the config */ + *cfg_v6class = proto_v6class; + + /* Avoid duplicate match fields */ + summary_bit = l4_summary[PMF_L4F_ICMP_VALS]; + if (pmf_rule->pp_summary & summary_bit) { + DP_DEBUG(GPC, DEBUG, GPC, + "Duplicate match key: icmpv6-class\n"); + return -EEXIST; + } + + struct pmf_attr_l4icmp_vals l4icmp = { + .pm_tag = PMAT_L4_ICMP_V6_VALS, + .pm_named = false, + .pm_any_code = true, + .pm_class = true, + }; + + if (*cfg_v6class == RULE_MATCH__ICMPV6_CLASS__CLASS_INFO) + l4icmp.pm_type = ICMP6_INFOMSG_MASK; + + struct pmf_attr_l4icmp_vals *vp = pmf_leaf_attr_copy(&l4icmp); + if (!vp) { + RTE_LOG(ERR, GPC, "No memory for parsed icmpv6-class\n"); + return -ENOMEM; + } + pmf_rule->pp_match.l4[PMF_L4F_ICMP_VALS].pm_l4icmp_vals = vp; + pmf_rule->pp_summary |= summary_bit; + + return 0; +} + +static int +gpc_match_proto(uint32_t pt_field, struct pmf_rule *pmf_rule, + uint32_t proto_proto, uint32_t *cfg_proto) +{ + uint32_t summary_bit; + + /* Save the protobuf proto value in the config */ + *cfg_proto = proto_proto; + + /* Avoid duplicate match fields */ + summary_bit = l3_summary[pt_field]; + if (pmf_rule->pp_summary & summary_bit) { + DP_DEBUG(GPC, DEBUG, GPC, "Duplicate match key: proto-%s\n", + (pt_field == PMF_L3F_PROTOB) ? "base" : "final"); + return -EEXIST; + } + + struct pmf_attr_proto ip_proto = { + .pm_tag = PMAT_IP_PROTO, + }; + + ip_proto.pm_base = (pt_field == PMF_L3F_PROTOB); + ip_proto.pm_final = (pt_field == PMF_L3F_PROTOF); + + if (*cfg_proto < 256) { + ip_proto.pm_proto = *cfg_proto; + } else { + if (ip_proto.pm_final) + ip_proto.pm_unknown = true; + else { + RTE_LOG(ERR, GPC, + "Bad value in rule: proto-base=%u\n", + *cfg_proto); + return -EINVAL; + } + } + + struct pmf_attr_proto *vp = pmf_leaf_attr_copy(&ip_proto); + if (!vp) { + RTE_LOG(ERR, GPC, "No memory for parsed proto-%s\n", + (pt_field == PMF_L3F_PROTOB) ? "base" : "final"); + return -ENOMEM; + } + + pmf_rule->pp_match.l3[pt_field].pm_l3proto = vp; + pmf_rule->pp_summary |= summary_bit; + + return 0; +} + +static int +gpc_pb_match_parse(struct gpc_pb_rule *rule, RuleMatch *msg) +{ + struct gpc_pb_match *match; + struct pmf_rule *pmf_rule = rule->pmf_rule; + int rv = 0; + + match = calloc(1, sizeof(*match)); + if (!match) { + RTE_LOG(ERR, GPC, + "Failed to allocate GPC match\n"); + return -ENOMEM; + } + + switch (msg->match_value_case) { + case RULE_MATCH__MATCH_VALUE__NOT_SET: + match->match_type = GPC_RULE_MATCH_VALUE_NOT_SET; + rv = -EINVAL; + break; + case RULE_MATCH__MATCH_VALUE_SRC_IP: + match->match_type = GPC_RULE_MATCH_VALUE_SRC_IP; + rv = gpc_match_ip(PMF_L3F_SRC, pmf_rule, msg->src_ip, + &match->match_value.src_ip); + break; + case RULE_MATCH__MATCH_VALUE_DEST_IP: + match->match_type = GPC_RULE_MATCH_VALUE_DEST_IP; + rv = gpc_match_ip(PMF_L3F_DST, pmf_rule, msg->dest_ip, + &match->match_value.dest_ip); + break; + case RULE_MATCH__MATCH_VALUE_SRC_PORT: + match->match_type = GPC_RULE_MATCH_VALUE_SRC_PORT; + rv = gpc_match_port(PMF_L4F_SRC, pmf_rule, msg->src_port, + &match->match_value.src_port); + break; + case RULE_MATCH__MATCH_VALUE_DEST_PORT: + match->match_type = GPC_RULE_MATCH_VALUE_DEST_PORT; + rv = gpc_match_port(PMF_L4F_DST, pmf_rule, msg->dest_port, + &match->match_value.dest_port); + break; + case RULE_MATCH__MATCH_VALUE_FRAGMENT: + match->match_type = GPC_RULE_MATCH_VALUE_FRAGMENT; + rv = gpc_match_fragment(pmf_rule, msg->fragment, + &match->match_value.fragment); + break; + case RULE_MATCH__MATCH_VALUE_DSCP: + match->match_type = GPC_RULE_MATCH_VALUE_DSCP; + rv = gpc_match_dscp(pmf_rule, msg->dscp, + &match->match_value.dscp); + break; + case RULE_MATCH__MATCH_VALUE_TTL: + match->match_type = GPC_RULE_MATCH_VALUE_TTL; + rv = gpc_match_ttl(pmf_rule, msg->ttl, &match->match_value.ttl); + break; + case RULE_MATCH__MATCH_VALUE_ICMPV4: + match->match_type = GPC_RULE_MATCH_VALUE_ICMPV4; + rv = gpc_match_icmp(pmf_rule, msg->icmpv4, + &match->match_value.icmpv4, true); + break; + case RULE_MATCH__MATCH_VALUE_ICMPV6: + match->match_type = GPC_RULE_MATCH_VALUE_ICMPV6; + rv = gpc_match_icmp(pmf_rule, msg->icmpv6, + &match->match_value.icmpv6, false); + break; + case RULE_MATCH__MATCH_VALUE_ICMPV6_CLASS: + match->match_type = GPC_RULE_MATCH_VALUE_ICMPV6_CLASS; + rv = gpc_match_icmpv6_class(pmf_rule, msg->icmpv6_class, + &match->match_value.icmpv6_class); + break; + case RULE_MATCH__MATCH_VALUE_PROTO_BASE: + match->match_type = GPC_RULE_MATCH_VALUE_PROTO_BASE; + rv = gpc_match_proto(PMF_L3F_PROTOB, pmf_rule, msg->proto_base, + &match->match_value.proto_base); + break; + case RULE_MATCH__MATCH_VALUE_PROTO_FINAL: + match->match_type = GPC_RULE_MATCH_VALUE_PROTO_FINAL; + rv = gpc_match_proto(PMF_L3F_PROTOF, pmf_rule, msg->proto_final, + &match->match_value.proto_final); + break; + default: + RTE_LOG(ERR, GPC, "Unknown RuleMatch value case value %u\n", + msg->match_value_case); + rv = -EINVAL; + break; + } + if (rv) { + free(match); + } else { + cds_list_add_tail(&match->match_list, &rule->match_list); + DP_DEBUG(GPC, DEBUG, GPC, + "Added GPC match %p to GPC rule %p\n", + match, rule); + } + return rv; +} + +/* + * GPC action functions + */ +static void +gpc_pb_action_free(struct rcu_head *head) +{ + struct gpc_pb_action *action; + + action = caa_container_of(head, struct gpc_pb_action, action_rcu); + free(action); +} + +static void +gpc_pb_action_delete(struct gpc_pb_action *action) +{ + assert(action); + + cds_list_del(&action->action_list); + DP_DEBUG(GPC, DEBUG, GPC, "Freeing GPC action %p\n", action); + if (action->action_type == GPC_RULE_ACTION_VALUE_POLICER) + gpc_pb_policer_delete(&action->action_value.policer); + + call_rcu(&action->action_rcu, gpc_pb_action_free); +} + +static struct pmf_qos_mark * +gpc_qos_mark_attach(struct pmf_rule *rule) +{ + struct pmf_qos_mark *qos_mark = rule->pp_action.qos_mark; + + if (qos_mark) + return qos_mark; + + /* This memory will be freed by pmf_rule_dealloc */ + qos_mark = pmf_qos_mark_create(); + if (!qos_mark) { + RTE_LOG(ERR, GPC, + "Error: No memory for parsed qos mark type\n"); + return NULL; + } + + rule->pp_action.qos_mark = qos_mark; + + return qos_mark; +} + +static int +gpc_pb_action_designation(struct gpc_pb_action *action, + struct pmf_rule *pmf_rule, uint8_t designation) +{ + struct pmf_qos_mark *qos_mark = gpc_qos_mark_attach(pmf_rule); + + if (!qos_mark) + return -ENOMEM; + + action->action_type = GPC_RULE_ACTION_VALUE_DESIGNATION; + action->action_value.designation = designation; + + pmf_rule->pp_summary |= PMF_RAS_QOS_HW_DESIG; + + qos_mark->paqm_desig = designation; + qos_mark->paqm_has_desig = PMV_TRUE; + return 0; +} + +/* + * Map protobuf's idea of packet colour into the PMF's idea of packet colour. + */ +static enum pmf_mark_colour gpc_map_pb_colour_to_pmf_colour[] = { + [RULE_ACTION__COLOUR_VALUE__GREEN] = PMMC_GREEN, + [RULE_ACTION__COLOUR_VALUE__YELLOW] = PMMC_YELLOW, + [RULE_ACTION__COLOUR_VALUE__RED] = PMMC_RED +}; + +static int +gpc_pb_action_colour(struct gpc_pb_action *action, + struct pmf_rule *pmf_rule, uint8_t pb_colour) +{ + struct pmf_qos_mark *qos_mark = gpc_qos_mark_attach(pmf_rule); + + if (!qos_mark) + return -ENOMEM; + + action->action_type = GPC_RULE_ACTION_VALUE_COLOUR; + action->action_value.colour = pb_colour; + + pmf_rule->pp_summary |= PMF_RAS_QOS_COLOUR; + + qos_mark->paqm_colour = gpc_map_pb_colour_to_pmf_colour[pb_colour]; + return 0; +} + +static int +gpc_pb_action_parse(struct gpc_pb_rule *rule, RuleAction *msg) +{ + struct gpc_pb_action *action; + struct pmf_rule *pmf_rule = rule->pmf_rule; + int rv = 0; + + action = calloc(1, sizeof(*action)); + if (!action) { + RTE_LOG(ERR, GPC, + "Failed to allocate GPC action\n"); + return -ENOMEM; + } + + switch (msg->action_value_case) { + case RULE_ACTION__ACTION_VALUE__NOT_SET: + rv = -EINVAL; + break; + case RULE_ACTION__ACTION_VALUE_DECISION: + action->action_type = GPC_RULE_ACTION_VALUE_DECISION; + action->action_value.decision = msg->decision; + switch (action->action_value.decision) { + case RULE_ACTION__PACKET_DECISION__PASS: + pmf_rule->pp_action.fate = PMV_TRUE; + pmf_rule->pp_summary |= PMF_RAS_PASS; + break; + case RULE_ACTION__PACKET_DECISION__DROP: + pmf_rule->pp_action.fate = PMV_FALSE; + pmf_rule->pp_summary |= PMF_RAS_DROP; + break; + default: + RTE_LOG(ERR, GPC, + "Unexpected value in rule: decision=%u\n", + action->action_value.decision); + rv = -EINVAL; + break; + } + break; + case RULE_ACTION__ACTION_VALUE_DESIGNATION: + if (msg->designation > GPC_MAX_DESIGNATION) { + RTE_LOG(ERR, GPC, + "Unexpected designation value: %u\n", + msg->designation); + rv = -EINVAL; + } else { + rv = gpc_pb_action_designation(action, pmf_rule, + (uint8_t)msg->designation); + } + break; + case RULE_ACTION__ACTION_VALUE_COLOUR: + if (msg->colour > RULE_ACTION__COLOUR_VALUE__RED) { + RTE_LOG(ERR, GPC, + "Unexpected packet-colour value: %u\n", + msg->colour); + rv = -EINVAL; + } else { + rv = gpc_pb_action_colour(action, pmf_rule, + (uint8_t)msg->colour); + } + break; + case RULE_ACTION__ACTION_VALUE_POLICER: + rv = gpc_pb_policer_parse(msg->policer, action); + if (!rv) { + action->action_type = GPC_RULE_ACTION_VALUE_POLICER; + + pmf_rule->pp_action.qos_policer = + action->action_value.policer.objid; + pmf_rule->pp_summary |= PMF_RAS_QOS_POLICE; + } + break; + default: + RTE_LOG(ERR, GPC, "Unknown RuleAction value case value %u\n", + msg->action_value_case); + rv = -EINVAL; + break; + } + if (rv) { + free(action); + } else { + cds_list_add_tail(&action->action_list, &rule->action_list); + DP_DEBUG(GPC, DEBUG, GPC, + "Added GPC action %p to GPC rule %p\n", + action, rule); + } + return rv; +} + +/* + * GPC counter functions + */ +static void +gpc_pb_counter_free(struct rcu_head *head) +{ + struct gpc_pb_counter *counter; + + counter = caa_container_of(head, struct gpc_pb_counter, counter_rcu); + free(counter->name); + free(counter); +} + +static void +gpc_pb_counter_delete(struct gpc_pb_counter *counter) +{ + assert(counter); + + cds_list_del(&counter->counter_list); + DP_DEBUG(GPC, DEBUG, GPC, "Freeing GPC counter %p\n", counter); + + call_rcu(&counter->counter_rcu, gpc_pb_counter_free); +} + +static int +gpc_pb_counter_parse(struct gpc_pb_feature *feature, GPCCounter *msg) +{ + struct gpc_pb_counter *counter; + + /* + * Mandatory field checking. + */ + if (!msg->has_format) { + RTE_LOG(ERR, GPC, + "GPCCounter protobuf missing mandatory field\n"); + return -EPERM; + } + + counter = calloc(1, sizeof(*counter)); + if (!counter) { + RTE_LOG(ERR, GPC, + "Failed to allocate GPC counter\n"); + return -ENOMEM; + } + + counter->format = msg->format; + if (msg->name) { + counter->name = strdup(msg->name); + if (!counter->name) { + RTE_LOG(ERR, GPC, + "Failed to allocate name for counter\n"); + goto error_path; + } + } + + cds_list_add_tail(&counter->counter_list, &feature->counter_list); + DP_DEBUG(GPC, DEBUG, GPC, + "Added GPC counter %p to GPC feature %p\n", + counter, feature); + + return 0; + + error_path: + free(counter); + return -ENOMEM; +} + +static int +gpc_pb_rule_counter_parse(struct gpc_pb_rule *rule, RuleCounter *msg) +{ + struct gpc_pb_rule_counter *counter = &rule->counter; + int rv = 0; + + /* + * Mandatory field checking. + */ + if (!msg->has_counter_type) { + RTE_LOG(ERR, GPC, + "RuleCounter protobuf missing mandatory field\n"); + return -EPERM; + } + + switch (msg->counter_type) { + case RULE_COUNTER__COUNTER_TYPE__COUNTER_UNKNOWN: + counter->counter_type = GPC_COUNTER_TYPE_UNKNOWN; + break; + case RULE_COUNTER__COUNTER_TYPE__DISABLED: + counter->counter_type = GPC_COUNTER_TYPE_DISABLED; + break; + case RULE_COUNTER__COUNTER_TYPE__AUTO: + counter->counter_type = GPC_COUNTER_TYPE_AUTO; + break; + case RULE_COUNTER__COUNTER_TYPE__NAMED: + counter->counter_type = GPC_COUNTER_TYPE_NAMED; + if (msg->name) { + counter->name = strdup(msg->name); + if (!counter->name) { + RTE_LOG(ERR, GPC, + "Failed to allocate counter name\n"); + return -ENOMEM; + } + } + break; + default: + RTE_LOG(ERR, GPC, "Unknown rule counter type %u\n", + msg->counter_type); + rv = -EINVAL; + break; + } + + return rv; +} + +/* + * GPC rule functions + */ +static void +gpc_pb_rule_delete(struct gpc_pb_rule *rule) +{ + struct gpc_pb_match *match, *tmp_match; + struct gpc_pb_action *action, *tmp_action; + struct gpc_cntr *cntr; + + assert(rule); + + /* + * Rules with a number of zero are not being used + */ + if (rule->number == 0) + return; + + DP_DEBUG(GPC, DEBUG, GPC, "Freeing GPC rule: %u at %p\n", + rule->number, rule); + + /* + * Mark the rule as unused. + */ + rule->number = 0; + + /* + * Delete any counter and gpc_rule that might be associated with this + * rule + */ + if (rule->gpc_rule) { + cntr = gpc_rule_get_cntr(rule->gpc_rule); + if (cntr) { + DP_DEBUG(GPC, DEBUG, GPC, + "Releasing GPC counter %p from GPC rule %p\n", + cntr, rule); + gpc_cntr_release(cntr); + } + + gpc_rule_delete(rule->gpc_rule); + rule->gpc_rule = NULL; + } + + /* + * Delete the pmf_rule if we have one attached + */ + pmf_rule_free(rule->pmf_rule); + rule->pmf_rule = NULL; + + /* + * Delete any matches and actions attached to this rule + */ + cds_list_for_each_entry_safe(match, tmp_match, &rule->match_list, + match_list) + gpc_pb_match_delete(match); + + cds_list_for_each_entry_safe(action, tmp_action, &rule->action_list, + action_list) + gpc_pb_action_delete(action); + +} + +static int +gpc_pb_rule_counter_create(struct gpc_pb_table *table, struct gpc_pb_rule *rule) +{ + uint32_t counter_type = rule->counter.counter_type; + struct gpc_cntr *cntr = NULL; + + if (counter_type <= GPC_COUNTER_TYPE_DISABLED) + return 0; + + struct gpc_cntg *cntg = gpc_group_get_cntg(table->gpc_group); + + if (!cntg) { + enum gpc_cntr_type type; + + if (counter_type == GPC_COUNTER_TYPE_AUTO) + type = GPC_CNTT_NUMBERED; + else + type = GPC_CNTT_NAMED; + + cntg = gpc_cntg_create(table->gpc_group, type, + (GPC_CNTW_PACKET | GPC_CNTW_L3BYTE), + GPC_CNTS_INTERFACE); + if (!cntg) { + RTE_LOG(ERR, GPC, + "Failed to allocate GPC counter group\n"); + return -ENOMEM; + } + gpc_group_set_cntg(table->gpc_group, cntg); + } + + if (counter_type == GPC_COUNTER_TYPE_AUTO) { + cntr = gpc_cntr_create_numbered(cntg, rule->number); + } else if (counter_type == GPC_COUNTER_TYPE_NAMED) { + cntr = gpc_cntr_find_and_retain(cntg, rule->counter.name); + if (!cntr) + cntr = gpc_cntr_create_named(cntg, rule->counter.name); + } + if (!cntr) { + RTE_LOG(ERR, GPC, + "Failed to allocate GPC counter\n"); + return -ENOMEM; + } + + DP_DEBUG(GPC, DEBUG, GPC, "Added GPC counter %p to GPC rule %p\n", + cntr, rule); + gpc_rule_set_cntr(rule->gpc_rule, cntr); + return 0; +} + +static int +gpc_pb_rule_parse(struct gpc_pb_table *table, Rule *msg) +{ + struct gpc_pb_rule *rule; + uint32_t i; + int rv = 0; + + if (!msg) { + RTE_LOG(ERR, GPC, + "Failed to read Rule protobuf\n"); + return -EPERM; + } + /* + * Mandatory field checking. + */ + if (!msg->has_number) { + RTE_LOG(ERR, GPC, + "Rule protobuf missing mandatory field\n"); + return -EPERM; + } + + /* + * We never have a rule 0, we always start with rule 1, hence the -1 + */ + rule = &table->rules_table[msg->number - 1]; + + /* + * Initialise the rule's list heads before we mark it as used + */ + CDS_INIT_LIST_HEAD(&rule->match_list); + CDS_INIT_LIST_HEAD(&rule->action_list); + + /* + * Mark the rule as used, that is non-zero + */ + rule->number = msg->number; + + rule->gpc_rule = gpc_rule_create(table->gpc_group, rule->number, &rule); + if (!rule->gpc_rule) { + RTE_LOG(ERR, GPC, "Failed to allocate GPC rule\n"); + goto error_path; + } + + rule->pmf_rule = pmf_rule_alloc(); + if (!rule->pmf_rule) { + RTE_LOG(ERR, GPC, "Failed to allocate PMF rule\n"); + goto error_path; + } + + for (i = 0; i < msg->n_matches; i++) { + rv = gpc_pb_match_parse(rule, msg->matches[i]); + if (rv) + goto error_path; + } + + for (i = 0; i < msg->n_actions; i++) { + rv = gpc_pb_action_parse(rule, msg->actions[i]); + if (rv) + goto error_path; + } + + if (msg->counter) { + rv = gpc_pb_rule_counter_parse(rule, msg->counter); + if (rv) + goto error_path; + + rv = gpc_pb_rule_counter_create(table, rule); + if (rv) + goto error_path; + + /* + * As we have added a counter to this rule, update the + * pmf_rule's summary. The group's summary will be + * recalculated by gpc_group_hw_ntfy_create when we finally + * push the group down to the FAL. + */ + rule->pmf_rule->pp_summary |= PMF_RAS_COUNT_REF; + } + + if (msg->has_table_index) + rule->table_index = msg->table_index; + + if (msg->has_orig_number) + rule->orig_number = msg->orig_number; + + if (msg->result) { + rule->result = strdup(msg->result); + if (!rule->result) { + RTE_LOG(ERR, GPC, "Failed to allocate result name\n"); + rv = -ENOMEM; + goto error_path; + } + } + + /* + * By now the pmf_rule will have been fully updated and we can + * add the pmf_rule to the gpc_rule. + */ + gpc_rule_change_rule(rule->gpc_rule, rule->pmf_rule); + return rv; + + error_path: + RTE_LOG(ERR, GPC, "Problems parsing Rule protobuf, %d\n", rv); + gpc_pb_rule_delete(rule); + return rv; +} + +void +gpc_pb_rule_match_walk(struct gpc_pb_rule *rule, + gpc_pb_rule_match_walker_cb walker_cb, + struct gpc_walk_context *context) +{ + struct gpc_pb_match *match; + + cds_list_for_each_entry(match, &rule->match_list, match_list) + if (!walker_cb(match, context)) + return; +} + +void +gpc_pb_rule_action_walk(struct gpc_pb_rule *rule, + gpc_pb_rule_action_walker_cb walker_cb, + struct gpc_walk_context *context) +{ + struct gpc_pb_action *action; + + cds_list_for_each_entry(action, &rule->action_list, action_list) + if (!walker_cb(action, context)) + return; +} + +/* + * GPC rules functions + */ +static int +gpc_pb_rules_parse(struct gpc_pb_table *table, Rules *msg) +{ + uint32_t i; + int rv; + + if (!msg) { + RTE_LOG(ERR, GPC, + "Failed to read Rules protobuf\n"); + return -EPERM; + } + /* + * Mandatory field checking. + */ + if (!msg->traffic_type) { + RTE_LOG(ERR, GPC, + "Rules protobuf missing mandatory field\n"); + return -EPERM; + } + + table->traffic_type = msg->traffic_type; + + table->rules_table = calloc(msg->n_rules, sizeof(struct gpc_pb_rule)); + if (!table->rules_table) { + RTE_LOG(ERR, GPC, + "Failed to allocate rules-table for %lu rules\n", + msg->n_rules); + rv = -ENOMEM; + goto error_path; + } + + table->n_rules = msg->n_rules; + for (i = 0; i < table->n_rules; i++) { + rv = gpc_pb_rule_parse(table, msg->rules[i]); + if (rv) + goto error_path; + } + return rv; + + error_path: + RTE_LOG(ERR, GPC, "Problems parsing Rules protobuf: %d\n", rv); + if (table->rules_table) { + /* + * We may enter this error path without all n_rules having been + * initialised, gpc_pb_rule_delete will skip over any + * uninitialised rules + */ + for (i = 0; i < table->n_rules; i++) + gpc_pb_rule_delete(&table->rules_table[i]); + } + free(table->rules_table); + table->rules_table = NULL; + return rv; +} + +/* + * GPC table functions + */ +static void +gpc_pb_table_free(struct rcu_head *head) +{ + struct gpc_pb_table *table; + uint32_t i; + + table = caa_container_of(head, struct gpc_pb_table, table_rcu); + + for (i = 0; i < table->n_table_names; i++) + free(table->table_names[i]); + + free(table->rules_table); + free(table->ifname); + free(table); +} + +static void +gpc_pb_table_delete(struct gpc_pb_table *table) +{ + uint32_t i; + + assert(table); + + cds_list_del(&table->table_list); + DP_DEBUG(GPC, DEBUG, GPC, "Freeing GPC table %p\n", table); + + for (i = 0; i < table->n_rules; i++) + gpc_pb_rule_delete(&table->rules_table[i]); + + if (table->gpc_group) { + /* + * Getting and releasing the gpc_cntg may be unnecessary as the + * gpc_cntg is ref-counted by the number of rule counters + * hanging of it, so by deleting all the rule counters in + * gpc_pb_rule_delete, the gpc_cntg should have been freed. + */ + struct gpc_cntg *cntg = gpc_group_get_cntg(table->gpc_group); + + if (cntg) + gpc_cntg_release(cntg); + + /* + * Tell the hardware that we are deleting a bunch of stuff + */ + gpc_group_hw_ntfy_detach(table->gpc_group); + gpc_group_hw_ntfy_rules_delete(table->gpc_group); + gpc_group_hw_ntfy_delete(table->gpc_group); + } + + if (table->gpc_group) { + gpc_group_delete(table->gpc_group); + table->gpc_group = NULL; + } + if (table->gpc_rlset) { + gpc_rlset_delete(table->gpc_rlset); + table->gpc_rlset = NULL; + } + + call_rcu(&table->table_rcu, gpc_pb_table_free); +} + +static struct gpc_pb_table * +gpc_pb_table_find(struct gpc_pb_feature *feature, const char *ifname, + uint32_t location, uint32_t traffic_type) +{ + struct gpc_pb_table *table; + + cds_list_for_each_entry(table, &feature->table_list, table_list) { + if (!strcmp(table->ifname, ifname) && + table->location == location && + table->traffic_type == traffic_type) + return table; + } + return NULL; +} + +static int +gpc_pb_table_add(struct gpc_pb_feature *feature, GPCTable *msg) +{ + struct gpc_pb_table *table; + uint32_t i; + int rv = 0; + + table = calloc(1, sizeof(*table) + + (sizeof(char *) * msg->n_table_names)); + if (!table) { + RTE_LOG(ERR, GPC, + "Failed to allocate GPC table"); + return -ENOMEM; + } + + table->ifname = strdup(msg->ifname); + if (!table->ifname) { + RTE_LOG(ERR, GPC, + "Failed to allocate memory for interface-name\n"); + rv = -ENOMEM; + goto error_path; + } + table->location = msg->location; + table->traffic_type = msg->traffic_type; + + cds_list_add_tail(&table->table_list, &feature->table_list); + DP_DEBUG(GPC, DEBUG, GPC, + "Allocated GPC table %p for %s/%s\n", + table, table->ifname, + gpc_get_table_location_str(table->location)); + + for (i = 0; i < msg->n_table_names; i++) { + table->table_names[i] = strdup(msg->table_names[i]); + if (!table->table_names[i]) { + RTE_LOG(ERR, GPC, + "Failed to allocate memory for table-name\n"); + rv = -ENOMEM; + goto error_path; + } + table->n_table_names = i + 1; + } + + table->gpc_rlset = gpc_rlset_create(true, table->ifname, &table); + if (!table->gpc_rlset) { + RTE_LOG(ERR, GPC, "Failed to create ruleset for table\n"); + rv = -ENOMEM; + goto error_path; + } + + if (table->n_table_names == 0) + table->gpc_group = gpc_group_create(table->gpc_rlset, + GPC_FEAT_QOS, + "what-no-table-name?", + &table); + else + /* Just use the first table-name for the timebeing */ + table->gpc_group = gpc_group_create(table->gpc_rlset, + GPC_FEAT_QOS, + table->table_names[0], + &table); + if (!table->gpc_group) { + RTE_LOG(ERR, GPC, "Failed to create group for table\n"); + rv = -ENOMEM; + goto error_path; + } + + if (table->traffic_type == TRAFFIC_TYPE__IPV4) + gpc_group_set_v4(table->gpc_group); + else + gpc_group_set_v6(table->gpc_group); + + /* Parse the rest of config message */ + rv = gpc_pb_rules_parse(table, msg->rules); + if (rv) + goto error_path; + + /* + * Everything should be in place - tell the FAL about all this stuff + */ + gpc_group_hw_ntfy_create(table->gpc_group, NULL); + + /* + * Now the gpc_group has been created down in the GPC hw layer + * we can now create any counters associated with the group's rules. + */ + for (i = 0; i < table->n_rules; i++) { + struct gpc_pb_rule *rule = &table->rules_table[i]; + + if (rule->number && rule->gpc_rule) { + struct gpc_cntg *cntg = + gpc_group_get_cntg(table->gpc_group); + struct gpc_cntr *cntr = + gpc_rule_get_cntr(rule->gpc_rule); + + if (cntg && cntr) + gpc_cntr_hw_ntfy_create(cntg, cntr); + } + } + gpc_group_hw_ntfy_rules_create(table->gpc_group); + gpc_group_hw_ntfy_attach(table->gpc_group); + + return rv; + + error_path: + RTE_LOG(ERR, GPC, "Failed to allocate memory for table\n"); + gpc_pb_table_delete(table); + return rv; + +} + +static int +gpc_pb_table_parse(struct gpc_pb_feature *feature, GPCTable *msg) +{ + struct gpc_pb_table *table; + enum gpc_config_action action; + int rv; + + if (!msg) { + RTE_LOG(ERR, GPC, + "Failed to read GPCTable protobuf\n"); + return -EPERM; + } + /* + * Mandatory field checking. + */ + if (!msg->ifname || !msg->has_location || !msg->has_traffic_type) { + RTE_LOG(ERR, GPC, + "GPCTable protobuf missing mandatory field\n"); + return -EPERM; + } + + action = CREATE; + table = gpc_pb_table_find(feature, msg->ifname, msg->location, + msg->traffic_type); + if (table) { + /* + * If the table already exists, we delete it if the new + * msg has no rules. This is because if a GPC feature, e.g. + * ACL has more than one table of the same type (IPv4/IPv6), + * at the same location (ingress/egress/punt-path) and on the + * same interface, the VCI code will have collapsed them into + * a single table. The multiple tables allow different ACL + * use-cases to have separate tables in the CLI. + */ + DP_DEBUG(GPC, DEBUG, GPC, + "Found existing table %s/%s, msg->rules: %p\n", + msg->ifname, + gpc_get_table_location_str(msg->location), + msg->rules); + if (!msg->rules) + action = DELETE; + else + action = MODIFY; + } + + switch (action) { + case CREATE: + rv = gpc_pb_table_add(feature, msg); + break; + + case MODIFY: + /* + * Modifies are "nuke-and-rebuild" to start with. + * We will do in-place modifications at a later date. + */ + gpc_pb_table_delete(table); + rv = gpc_pb_table_add(feature, msg); + break; + + case DELETE: + gpc_pb_table_delete(table); + rv = 0; + break; + + default: + break; + } + return rv; +} + +void +gpc_pb_table_rule_walk(struct gpc_pb_table *table, + gpc_pb_table_rule_walker_cb walker_cb, + struct gpc_walk_context *context) +{ + uint32_t i; + + for (i = 0; i < table->n_rules; i++) + if (!walker_cb(&table->rules_table[i], context)) + return; +} + +/* + * GPC feature functions + */ +static void +gpc_pb_feature_free(struct rcu_head *head) +{ + struct gpc_pb_feature *feature; + + feature = caa_container_of(head, struct gpc_pb_feature, feature_rcu); + free(feature); +} + +static void +gpc_pb_feature_delete(struct gpc_pb_feature *feature) +{ + struct gpc_pb_table *table, *tmp_table; + struct gpc_pb_counter *counter, *tmp_counter; + + assert(feature); + + cds_list_del(&feature->feature_list); + DP_DEBUG(GPC, DEBUG, GPC, "Freeing GPC feature %p\n", feature); + + cds_list_for_each_entry_safe(table, tmp_table, &feature->table_list, + table_list) + gpc_pb_table_delete(table); + + cds_list_for_each_entry_safe(counter, tmp_counter, + &feature->counter_list, + counter_list) + gpc_pb_counter_delete(counter); + + call_rcu(&feature->feature_rcu, gpc_pb_feature_free); +} + +static struct gpc_pb_feature * +gpc_pb_feature_find(uint32_t type) +{ + struct gpc_pb_feature *feature; + + if (!gpc_feature_list) + return NULL; + + cds_list_for_each_entry(feature, gpc_feature_list, feature_list) { + if (feature->type == type) + return feature; + } + return NULL; +} + +static int +gpc_pb_feature_add(GPCConfig *msg) +{ + struct gpc_pb_feature *feature; + uint32_t i; + int32_t rv; + + feature = calloc(1, sizeof(*feature)); + if (!feature) { + RTE_LOG(ERR, GPC, + "Failed to allocate GPC feature"); + return -ENOMEM; + } + + feature->type = msg->feature_type; + + DP_DEBUG(GPC, DEBUG, GPC, + "Allocated GPC feature %p for %s\n", feature, + gpc_get_feature_type_str(feature->type)); + + CDS_INIT_LIST_HEAD(&feature->table_list); + CDS_INIT_LIST_HEAD(&feature->counter_list); + cds_list_add_tail(&feature->feature_list, gpc_feature_list); + + /* Parse the rest of config message */ + for (i = 0; i < msg->n_tables; i++) { + rv = gpc_pb_table_parse(feature, msg->tables[i]); + if (rv) + goto error_path; + } + + for (i = 0; i < msg->n_counters; i++) { + rv = gpc_pb_counter_parse(feature, msg->counters[i]); + if (rv) + goto error_path; + } + return 0; + + error_path: + RTE_LOG(ERR, GPC, "Failed to add GPC feature, type: %s: %d\n", + gpc_get_feature_type_str(feature->type), rv); + gpc_pb_feature_delete(feature); + return rv; +} + +static int +gpc_pb_feature_parse(GPCConfig *msg) +{ + struct gpc_pb_feature *feature; + enum gpc_config_action action; + int rv; + + if (!msg) { + RTE_LOG(ERR, GPC, + "Failed to read GPCConfig protobuf\n"); + return -EPERM; + } + + if (!msg->has_feature_type) { + RTE_LOG(ERR, GPC, + "GPCConfig protobuf missing mandatory field\n"); + return -EPERM; + } + + action = CREATE; + feature = gpc_pb_feature_find(msg->feature_type); + if (feature) { + /* + * If the feature already exists we delete it if the new + * config-msg has no tables. + */ + if (!msg->n_tables) + action = DELETE; + else + action = MODIFY; + } + + switch (action) { + case CREATE: + rv = gpc_pb_feature_add(msg); + break; + + case MODIFY: + /* + * Modifies are "nuke-and-rebuild" to start with. + * We will do in-place modifications at a later date. + */ + gpc_pb_feature_delete(feature); + rv = gpc_pb_feature_add(msg); + break; + + case DELETE: + gpc_pb_feature_delete(feature); + rv = 0; + break; + + default: + rv = -EINVAL; + break; + } + return rv; +} + +void +gpc_pb_feature_table_walk(struct gpc_pb_feature *feature, + gpc_pb_feature_table_walker_cb walker_cb, + struct gpc_walk_context *context) +{ + struct gpc_pb_table *table; + + cds_list_for_each_entry(table, &feature->table_list, table_list) { + if ((!context->ifname || + !strcmp(table->ifname, context->ifname)) && + (!context->location || + table->location == context->location) && + (!context->traffic_type || + table->traffic_type == context->traffic_type)) + if (!walker_cb(table, context)) + return; + } +} + +void +gpc_pb_feature_counter_walk(struct gpc_pb_feature *feature, + gpc_pb_feature_counter_walker_cb walker_cb, + struct gpc_walk_context *context) +{ + struct gpc_pb_counter *counter; + + cds_list_for_each_entry(counter, &feature->counter_list, counter_list) + if (!walker_cb(counter, context)) + return; +} + +void +gpc_pb_feature_walk(gpc_pb_feature_walker_cb walker_cb, + struct gpc_walk_context *context) +{ + struct gpc_pb_feature *feature; + + if (gpc_feature_list) + cds_list_for_each_entry(feature, gpc_feature_list, feature_list) + if (!context->feature_type || + feature->type == context->feature_type) + if (!walker_cb(feature, context)) + return; +} + +static gpc_pb_feature_table_walker_cb gpc_pb_table_mode_change_cb; +static bool +gpc_pb_table_mode_change_cb(struct gpc_pb_table *table, + struct gpc_walk_context *walk_ctx) +{ + struct gpc_event_context *event_ctx = walk_ctx->data; + bool enabled = (event_ctx->event == IF_FEAT_MODE_EVENT_L3_FAL_ENABLED); + struct ifnet *ifp = dp_ifnet_byifname(table->ifname); + + if (!ifp) + return true; // keep walking + + /* + * This table is attached to an interface that has just changed its + * FAL_L3 status. We need to inform the FAL. + */ + DP_DEBUG(GPC, DEBUG, GPC, + "%s if feature mode change: fal-l3 %sabled\n", + table->ifname, (enabled ? "en" : "dis")); + + if (enabled) { + /* L3 Fal enabled */ + gpc_rlset_set_ifp(table->gpc_rlset); + gpc_group_hw_ntfy_attach(table->gpc_group); + } else { + gpc_group_hw_ntfy_detach(table->gpc_group); + gpc_rlset_clear_ifp(table->gpc_rlset); + } + + /* + * Signal that there has been a change that needs to be committed. + */ + event_ctx->commit_required = true; + return true; // keep walking +} + +static gpc_pb_feature_walker_cb gpc_pb_feature_mode_change_cb; +static bool +gpc_pb_feature_mode_change_cb(struct gpc_pb_feature *feature, + struct gpc_walk_context *walk_ctx) +{ + gpc_pb_feature_table_walk(feature, gpc_pb_table_mode_change_cb, + walk_ctx); + return true; // keep walking +} + +/* + * This function is called whenever an interface feature-mode change event + * happens. The event its really interested in is when an interface becomes + * L3 FAL enabled. On SIAD and J2 there can be some delay before this + * happens after boot, and so GPC initial attempts to program the FAL plugin + * may have failed. + */ +static void +gpc_pb_if_feat_mode_change(struct ifnet *ifp, enum if_feat_mode_event event) +{ + struct gpc_walk_context walk_ctx; + struct gpc_event_context event_ctx; + + if (event != IF_FEAT_MODE_EVENT_L3_FAL_ENABLED && + event != IF_FEAT_MODE_EVENT_L3_FAL_DISABLED) + return; + + event_ctx.event = event; + event_ctx.commit_required = false; + + /* + * Walk all features, locations and traffic-types looking for + * a matching interface name. + */ + walk_ctx.feature_type = 0; + walk_ctx.ifname = ifp->if_name; + walk_ctx.location = 0; + walk_ctx.traffic_type = 0; + walk_ctx.data = &event_ctx; + + gpc_pb_feature_walk(gpc_pb_feature_mode_change_cb, &walk_ctx); + + if (event_ctx.commit_required) + gpc_hw_commit(); +} + +static const struct dp_event_ops gpc_pb_config_events = { + .if_feat_mode_change = gpc_pb_if_feat_mode_change, +}; + + +static int +gpc_config(struct pb_msg *msg) +{ + GPCConfig *config_msg = gpcconfig__unpack(NULL, msg->msg_len, + msg->msg); + int rv; + + /* + * Carry out any one-time initialisation + */ + if (!gpc_feature_list) { + gpc_feature_list = calloc(1, sizeof(*gpc_feature_list)); + if (!gpc_feature_list) { + RTE_LOG(ERR, GPC, "Failed to initialise GPC\n"); + return -ENOMEM; + } + + CDS_INIT_LIST_HEAD(gpc_feature_list); + + dp_event_register(&gpc_pb_config_events); + } + + rv = gpc_pb_feature_parse(config_msg); + + gpcconfig__free_unpacked(config_msg, NULL); + + if (!rv) + gpc_hw_commit(); + + return rv; +} + +PB_REGISTER_CMD(gpc_config_cmd) = { + .cmd = "vyatta:gpc-config", + .handler = gpc_config, +}; diff --git a/src/gpc/gpc_util.c b/src/gpc/gpc_util.c new file mode 100644 index 00000000..3e7f0c5f --- /dev/null +++ b/src/gpc/gpc_util.c @@ -0,0 +1,196 @@ +/*- + * Copyright (c) 2020-2021, AT&T Intellectual Property. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * Generalised Packet Classification (GPF) configuration handling + */ + +#include +#include "gpc_util.h" +#include "protobuf/GPCConfig.pb-c.h" +#include "util.h" +#include "vplane_log.h" + +/* + * GPC enum to string mapping definitions and functions + */ + +const char *gpc_action_type_str[] = { + [RULE_ACTION__ACTION_VALUE__NOT_SET] = "not-set", + [RULE_ACTION__ACTION_VALUE_DECISION] = "decision", + [RULE_ACTION__ACTION_VALUE_DESIGNATION] = "set-designation", + [RULE_ACTION__ACTION_VALUE_COLOUR] = "set-colour", + [RULE_ACTION__ACTION_VALUE_POLICER] = "policer", +}; + +const char *gpc_policer_awareness_str[] = { + [POLICER_AWARENESS__AWARENESS_UNKNOWN] = "unknown", + [POLICER_AWARENESS__COLOUR_AWARE] = "aware", + [POLICER_AWARENESS__COLOUR_UNAWARE] = "unaware", +}; + +const char *gpc_cntr_format_str[] = { + [GPCCOUNTER__COUNTER_FORMAT__FORMAT_UNKNOWN] = "format-unknown", + [GPCCOUNTER__COUNTER_FORMAT__PACKETS_ONLY] = "packets", + [GPCCOUNTER__COUNTER_FORMAT__PACKETS_AND_L2_L3_BYTES] = + "packets-and-l2-and-l3-bytes", +}; + +const char *gpc_feature_type_str[] = { + [GPCCONFIG__FEATURE_TYPE__FEATURE_UNKNOWN] = "unknown", + [GPCCONFIG__FEATURE_TYPE__QOS] = "qos", + [GPCCONFIG__FEATURE_TYPE__ACL] = "acl", +}; + +const char *gpc_match_type_str[] = { + [RULE_MATCH__MATCH_VALUE__NOT_SET] = "not-set", + [RULE_MATCH__MATCH_VALUE_SRC_IP] = "src-ip", + [RULE_MATCH__MATCH_VALUE_DEST_IP] = "dst-ip", + [RULE_MATCH__MATCH_VALUE_SRC_PORT] = "src-port", + [RULE_MATCH__MATCH_VALUE_DEST_PORT] = "dst-port", + [RULE_MATCH__MATCH_VALUE_FRAGMENT] = "fragment", + [RULE_MATCH__MATCH_VALUE_DSCP] = "dscp", + [RULE_MATCH__MATCH_VALUE_TTL] = "ttl", + [RULE_MATCH__MATCH_VALUE_ICMPV4] = "icmpv4", + [RULE_MATCH__MATCH_VALUE_ICMPV6] = "icmpv6", + [RULE_MATCH__MATCH_VALUE_ICMPV6_CLASS] = "icmpv6-class", + [RULE_MATCH__MATCH_VALUE_PROTO_BASE] = "proto-base", + [RULE_MATCH__MATCH_VALUE_PROTO_FINAL] = "proto-final", +}; + +const char *gpc_pkt_colour_str[] = { + [RULE_ACTION__COLOUR_VALUE__GREEN] = "green", + [RULE_ACTION__COLOUR_VALUE__YELLOW] = "yellow", + [RULE_ACTION__COLOUR_VALUE__RED] = "red", +}; + +const char *gpc_pkt_decision_str[] = { + [RULE_ACTION__PACKET_DECISION__DECISION_UNKNOWN] = "unknown", + [RULE_ACTION__PACKET_DECISION__PASS] = "pass", + [RULE_ACTION__PACKET_DECISION__DROP] = "drop", +}; + +const char *gpc_table_location_str[] = { + [GPCTABLE__FEATURE_LOCATION__LOCATION_UNKNOWN] = "unknown", + [GPCTABLE__FEATURE_LOCATION__INGRESS] = "ingress", + [GPCTABLE__FEATURE_LOCATION__EGRESS] = "egress", + [GPCTABLE__FEATURE_LOCATION__PUNT_PATH] = "punt-path", +}; + +const char *gpc_traffic_type_str[] = { + [TRAFFIC_TYPE__TRAFFIC_UNKNOWN] = "unknown", + [TRAFFIC_TYPE__IPV4] = "ipv4", + [TRAFFIC_TYPE__IPV6] = "ipv6", +}; + + +static const char * +gpc_get_str(uint32_t index, uint32_t size, const char *str_array[]) +{ + if (index >= size) { + RTE_LOG(WARNING, GPC, + "Unexpected string index %u for str-array %p\n", + index, str_array); + index = 0; + } + + return str_array[index]; +} + +const char * +gpc_get_action_type_str(uint32_t action) +{ + return gpc_get_str(action, ARRAY_SIZE(gpc_action_type_str), + gpc_action_type_str); +} + +const char * +gpc_get_cntr_format_str(uint32_t format) +{ + return gpc_get_str(format, ARRAY_SIZE(gpc_cntr_format_str), + gpc_cntr_format_str); +} + +const char * +gpc_get_feature_type_str(uint32_t type) +{ + return gpc_get_str(type, ARRAY_SIZE(gpc_feature_type_str), + gpc_feature_type_str); +} + +const char * +gpc_get_match_type_str(uint32_t match) +{ + return gpc_get_str(match, ARRAY_SIZE(gpc_match_type_str), + gpc_match_type_str); +} + +const char * +gpc_get_pkt_colour_str(uint32_t colour) +{ + return gpc_get_str(colour, ARRAY_SIZE(gpc_pkt_colour_str), + gpc_pkt_colour_str); +} + +const char * +gpc_get_pkt_decision_str(uint32_t decision) +{ + return gpc_get_str(decision, ARRAY_SIZE(gpc_pkt_decision_str), + gpc_pkt_decision_str); +} + +const char * +gpc_get_policer_awareness_str(uint32_t awareness) +{ + return gpc_get_str(awareness, ARRAY_SIZE(gpc_policer_awareness_str), + gpc_policer_awareness_str); +} + +const char * +gpc_get_table_location_str(uint32_t location) +{ + return gpc_get_str(location, ARRAY_SIZE(gpc_table_location_str), + gpc_table_location_str); +} + +const char * +gpc_get_traffic_type_str(uint32_t traffic_type) +{ + return gpc_get_str(traffic_type, ARRAY_SIZE(gpc_traffic_type_str), + gpc_traffic_type_str); +} + +static uint32_t +gpc_get_value(const char *str, uint32_t size, const char *str_array[]) +{ + uint32_t i; + + for (i = 0; i < size; i++) { + if (!strcmp(str_array[i], str)) + return i; + } + return 0; +} + +uint32_t +gpc_feature_str_to_type(const char *str) +{ + return gpc_get_value(str, ARRAY_SIZE(gpc_feature_type_str), + gpc_feature_type_str); +} + +uint32_t +gpc_table_location_str_to_value(const char *str) +{ + return gpc_get_value(str, ARRAY_SIZE(gpc_table_location_str), + gpc_table_location_str); +} + +uint32_t +gpc_traffic_type_str_to_value(const char *str) +{ + return gpc_get_value(str, ARRAY_SIZE(gpc_traffic_type_str), + gpc_traffic_type_str); +} diff --git a/src/gpc/gpc_util.h b/src/gpc/gpc_util.h new file mode 100644 index 00000000..d6caaedd --- /dev/null +++ b/src/gpc/gpc_util.h @@ -0,0 +1,111 @@ +/*- + * Copyright (c) 2020-2021, AT&T Intellectual Property. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * Generalised Packet Classification (GPF) configuration handling + */ + +#ifndef GPC_UTIL_H +#define GPC_UTIL_H + +#include + +/** + * Return the gpc action-type string. + * + * @param type The numeric gpc action type + * @return Returns a string pointer + */ +const char *gpc_get_action_type_str(uint32_t action); + +/** + * Return the gpc counter-format string. + * + * @param type The numeric gpc counter-format type + * @return Returns a string pointer + */ +const char *gpc_get_cntr_format_str(uint32_t format); + +/** + * Return the gpc feature type string. + * + * @param type The numeric gpc feature type + * @return Returns a string pointer + */ +const char *gpc_get_feature_type_str(uint32_t type); + +/** + * Return the gpc match-type string. + * + * @param type The numeric gpc match type + * @return Returns a string pointer + */ +const char *gpc_get_match_type_str(uint32_t match); + +/** + * Return the gpc packet-colour string. + * + * @param type The numeric gpc counter-format type + * @return Returns a string pointer + */ +const char *gpc_get_pkt_colour_str(uint32_t colour); + +/** + * Return the gpc packet-decision string. + * + * @param type The numeric gpc packet decision value + * @return Returns a string pointer + */ +const char *gpc_get_pkt_decision_str(uint32_t decision); + +/** + * Return the gpc policer-awareness string. + * + * @param type The numeric gpc policer awareness value + * @return Returns a string pointer + */ +const char *gpc_get_policer_awareness_str(uint32_t awareness); + +/** + * Return the gpc table location string. + * + * @param type The numeric gpc location type + * @return Returns a string pointer + */ +const char *gpc_get_table_location_str(uint32_t location); + +/** + * Return the gpc traffic-type string. + * + * @param type The numeric gpc traffic type + * @return Returns a string pointer + */ +const char *gpc_get_traffic_type_str(uint32_t traffic_type); + +/** + * Return the gpc feature type based upon a feature string + * + * @param str The string to convert into a feature type + * @return Returns a gpc feature, or 0 for unknown feature + */ +uint32_t gpc_feature_str_to_type(const char *str); + +/** + * Return the gpc table location value based upon a location string + * + * @param str The string to convert into a location value + * @return Returns a gpc location, or 0 for unknown location + */ +uint32_t gpc_table_location_str_to_value(const char *str); + +/** + * Return the gpc traffic-type value based upon a traffic-type string + * + * @param str The string to convert into a traffic-type value + * @return Returns a gpc traffic-type, or 0 for unknown traffic-type + */ +uint32_t gpc_traffic_type_str_to_value(const char *str); + +#endif /* GPC_UTIL_H */ diff --git a/src/if.c b/src/if.c index 66dc19ef..2e678229 100644 --- a/src/if.c +++ b/src/if.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. + * Copyright (c) 2017-2021, AT&T Intellectual Property. * All rights reserved. * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. @@ -62,41 +62,42 @@ #include #include #include -#include #include #include -#ifdef HAVE_RTE_BUS_PCI_H -#include -#endif -#include "bridge.h" #include "capture.h" #include "commands.h" #include "compiler.h" -#include "config.h" +#include "config_internal.h" #include "control.h" #include "pipeline/nodes/cross_connect/cross_connect.h" #include "crypto/crypto_policy.h" #include "crypto/vti.h" #include "dp_event.h" -#include "dpdk_eth_if.h" #include "ether.h" #include "fal.h" -#include "gre.h" +#include "if/bridge/bridge.h" +#include "if/dpdk-eth/dpdk_eth_if.h" +#include "if/gre.h" +#include "if/macvlan.h" +#include "if/vxlan.h" #include "if_llatbl.h" #include "if_var.h" #include "ip_addr.h" +#include "ip_icmp.h" +#include "ip_rt_protobuf.h" #include "json_writer.h" #include "l2_rx_fltr.h" #include "l2tp/l2tpeth.h" #include "lag.h" -#include "macvlan.h" #include "main.h" -#include "master.h" +#include "controller.h" #include "netinet6/in6.h" +#include "netinet6/ip6_funcs.h" #include "netlink.h" #include "pipeline/nodes/pl_nodes_common.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" +#include "pl_fused_gen.h" #include "pl_node.h" #include "portmonitor/portmonitor.h" #include "pipeline/nodes/pppoe/pppoe.h" @@ -105,9 +106,8 @@ #include "util.h" #include "vplane_debug.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" #include "vrf_if.h" -#include "vxlan.h" #include "backplane.h" #include "protobuf.h" @@ -115,6 +115,10 @@ struct pl_feature_registration; +static void **if_node_instance_cleanup_cbs; + +static const struct dp_event_ops if_allmcast_event_ops; + /* A child interface should only inherit the parents if_port value if it is * valid. */ @@ -257,16 +261,21 @@ void interface_init(void) NULL); if (!ifname_hash) rte_panic("Can't allocate if_name hash for interfaces\n"); + + dp_event_register(&if_allmcast_event_ops); } void interface_cleanup(void) { struct cds_lfht_iter iter; - struct ifnet *ifp; + struct ifnet *ifp, *tmp; struct if_type_reg *if_reg; - cds_lfht_for_each_entry(ifnet_hash, &iter, - ifp, ifindex_hash) { + /* + * Walk in newest first order, thus guaranteeing that children + * are deleted before parents, as parents are created first. + */ + cds_list_for_each_entry_safe(ifp, tmp, &ifnet_list, if_list) { if_free(ifp); } cds_lfht_destroy(ifnet_hash, NULL); @@ -309,9 +318,9 @@ static inline int interface_ifname_match_fn(struct cds_lfht_node *node, /* * Lookup ifnet information by the kernel ifindex. - * Only called from master thread (no locking) + * Only called from main thread (no locking) */ -struct ifnet *ifnet_byifindex(unsigned int ifindex) +struct ifnet *dp_ifnet_byifindex(unsigned int ifindex) { struct ifnet *ifp = NULL; struct cds_lfht_iter iter; @@ -349,7 +358,7 @@ struct ifnet *ifnet_byifname_cont_src(enum cont_src_en cont_src, return ifp; } -struct ifnet *ifnet_byifname(const char *ifname) +struct ifnet *dp_ifnet_byifname(const char *ifname) { struct ifnet *ifp; @@ -360,35 +369,31 @@ struct ifnet *ifnet_byifname(const char *ifname) return ifp; } -/** - * Find local DPDK interface by the eth_dev->data->name - */ -struct ifnet *ifnet_byethname(const char *ethname) +void dp_ifnet_walk(dp_ifnet_iter_func_t func, void *arg) { struct ifnet *ifp; struct cds_lfht_iter iter; cds_lfht_for_each_entry(ifname_hash, &iter, ifp, ifname_hash) { - if (ifp->if_local_port) { - struct rte_eth_dev *eth_dev = - &rte_eth_devices[ifp->if_port]; - - if (strcmp(eth_dev->data->name, ethname) == 0) - return ifp; - } + (func)(ifp, arg); } - - return NULL; } -void ifnet_walk(ifnet_iter_func_t func, void *arg) +int dp_ifnet_addr_walk(struct ifnet *ifp, dp_ifnet_addr_iter_func_t func, + void *arg) { - struct ifnet *ifp; - struct cds_lfht_iter iter; + struct if_addr *ifa; + struct sockaddr *sa; + int status; - cds_lfht_for_each_entry(ifname_hash, &iter, ifp, ifname_hash) { - (func)(ifp, arg); + cds_list_for_each_entry(ifa, &ifp->if_addrhead, ifa_link) { + sa = (struct sockaddr *) &ifa->ifa_addr; + status = (func)(sa, ifa->ifa_prefixlen, arg); + if (status) + return status; } + + return 0; } static void @@ -422,13 +427,8 @@ void if_unset_ifindex(struct ifnet *ifp) if (ifp->if_index == 0) return; - /* - * notify features of impending interface deletion - * This allows features to notify plugins to perform the - * necessary cleanup before the interface is deleted - */ - dp_event(DP_EVT_IF_INDEX_PRE_UNSET, 0, ifp, 0, 0, NULL); - fal_l2_del_port(ifp->if_index); + if_change_features_mode(ifp, IF_FEAT_MODE_FLAG_L2_DISABLE); + cds_lfht_del(ifnet_hash, &ifp->ifindex_hash); ifp->if_index = 0; cds_list_del(&ifp->if_list); @@ -477,7 +477,224 @@ void if_set_ifindex(struct ifnet *ifp, unsigned int ifindex) ifname_hash_insert(ifp); cds_list_add_rcu(&ifp->if_list, &ifnet_list); out: - dp_event(DP_EVT_IF_INDEX_SET, 0, ifp, ifindex, 0, NULL); + dp_event(DP_EVT_IF_INDEX_SET, 0, ifp, 0, 0, NULL); +} + +static int if_is_hw_switching_enabled(struct ifnet *ifp) +{ + const struct ift_ops *ops; + + ops = if_get_ops(ifp); + if (!ops) + return true; + + /* + * ask the interface since the ifp->hw_forwarding field is + * overloaded and we need this to be true for interface types + * that support L3 in the FAL that don't have support for + * hardware-switching disable state (e.g. switch vifs). + */ + if (ops->ifop_is_hw_switching_enabled) + return ops->ifop_is_hw_switching_enabled(ifp); + + /* default to true so that we attempt to create L3 FAL objects */ + return true; +} + +static enum if_embellish_feat +if_get_emb_feats(struct ifnet *ifp) +{ + enum if_embellish_feat feat_present = IF_EMB_FEAT_NONE; + + if (ifp->if_brport) + feat_present |= IF_EMB_FEAT_BRIDGE_MEMBER; + if (ifp->aggregator) + feat_present |= IF_EMB_FEAT_LAG_MEMBER; + if (ifp->unplugged) + feat_present |= IF_EMB_FEAT_UNPLUGGED; + if (!if_is_hw_switching_enabled(ifp)) + feat_present |= IF_EMB_FEAT_HW_SWITCHING_DISABLED; + if (ifp->unplugged) + feat_present |= IF_EMB_FEAT_UNPLUGGED; + if (ifp->if_broken_out) + feat_present |= IF_EMB_FEAT_BREAK_OUT; + if (ifp->aggregator) + feat_present |= IF_EMB_FEAT_LAG_MEMBER; + + return feat_present; +} + +bool if_check_any_emb_feat(struct ifnet *ifp, enum if_embellish_feat feat_any) +{ + return if_get_emb_feats(ifp) & feat_any; +} + +bool if_check_any_except_emb_feat(struct ifnet *ifp, + enum if_embellish_feat feat_except) +{ + return if_get_emb_feats(ifp) & ~feat_except; +} + +void if_notify_emb_feat_change(struct ifnet *ifp) +{ + if_change_features_mode(ifp, IF_FEAT_MODE_FLAG_EMB_FEAT_CHANGED); +} + +static int if_l3_enable(struct ifnet *ifp) +{ + const struct ift_ops *ops; + int ret = 0; + + ops = if_get_ops(ifp); + if (!ops) + return 0; + + if (ops->ifop_l3_enable) + ret = ops->ifop_l3_enable(ifp); + + return ret; +} + +static int if_l3_disable(struct ifnet *ifp) +{ + const struct ift_ops *ops; + int ret = 0; + + ops = if_get_ops(ifp); + if (!ops) + return 0; + + if (ops->ifop_l3_disable) + ret = ops->ifop_l3_disable(ifp); + + return ret; +} + +static bool +_if_is_features_mode_active(struct ifnet *ifp, + enum if_feat_mode_event event, + enum if_feat_mode_flags flags) +{ + bool l3_enabled; + bool l3_hw_enabled; + bool l2_enabled; + bool emb_feat_changed; + bool l2_hw_enabled; + + l2_enabled = !(flags & IF_FEAT_MODE_FLAG_L2_DISABLE); + l2_hw_enabled = l2_enabled && + !if_check_any_emb_feat(ifp, IF_EMB_FEAT_HW_SWITCHING_DISABLED); + emb_feat_changed = flags & IF_FEAT_MODE_FLAG_EMB_FEAT_CHANGED; + + l3_enabled = l2_enabled && + !if_check_any_except_emb_feat( + ifp, IF_EMB_FEATS_ALLOW_L3) && + !(flags & IF_FEAT_MODE_FLAG_L3_DISABLE); + l3_hw_enabled = l3_enabled && l2_hw_enabled; + + switch (event) { + case IF_FEAT_MODE_EVENT_L3_FAL_ENABLED: + return l3_hw_enabled; + case IF_FEAT_MODE_EVENT_L3_ENABLED: + return l3_enabled; + case IF_FEAT_MODE_EVENT_EMB_FEAT_CHANGED: + return emb_feat_changed; + case IF_FEAT_MODE_EVENT_L2_FAL_ENABLED: + return l2_hw_enabled; + case IF_FEAT_MODE_EVENT_L2_CREATED: + return l2_enabled; + default: + return false; + } +} + +bool if_is_features_mode_active(struct ifnet *ifp, + enum if_feat_mode_event event) +{ + return _if_is_features_mode_active(ifp, event, + IF_FEAT_MODE_FLAG_NONE); +} + +void if_change_features_mode(struct ifnet *ifp, enum if_feat_mode_flags flags) +{ + bool l3_enabled; + bool l3_hw_enabled; + bool l2_enabled; + bool emb_feat_changed; + int ret; + + l2_enabled = _if_is_features_mode_active( + ifp, IF_FEAT_MODE_EVENT_L2_CREATED, flags); + emb_feat_changed = _if_is_features_mode_active( + ifp, IF_FEAT_MODE_EVENT_EMB_FEAT_CHANGED, flags); + l3_enabled = _if_is_features_mode_active( + ifp, IF_FEAT_MODE_EVENT_L3_ENABLED, flags); + l3_hw_enabled = _if_is_features_mode_active( + ifp, IF_FEAT_MODE_EVENT_L3_FAL_ENABLED, flags); + + if (l2_enabled && flags & IF_FEAT_MODE_FLAG_L2_ENABLED) + dp_event(DP_EVT_IF_FEAT_MODE_CHANGE, 0, ifp, + IF_FEAT_MODE_EVENT_L2_CREATED, 0, NULL); + + if (flags & IF_FEAT_MODE_FLAG_L2_FAL_ENABLE) { + struct fal_attribute_t attr = { + .id = FAL_PORT_ATTR_HW_SWITCH_MODE, + .value.u8 = FAL_PORT_HW_SWITCHING_ENABLE, + }; + + fal_l2_upd_port(ifp->if_index, &attr); + + dp_event(DP_EVT_IF_FEAT_MODE_CHANGE, 0, ifp, + IF_FEAT_MODE_EVENT_L2_FAL_ENABLED, 0, NULL); + } + + if (l3_enabled && !ifp->if_l3_enabled) { + ifp->if_l3_enabled = true; + dp_event(DP_EVT_IF_FEAT_MODE_CHANGE, 0, ifp, + IF_FEAT_MODE_EVENT_L3_ENABLED, 0, NULL); + } + + if (l3_hw_enabled && !ifp->fal_l3) { + ret = if_l3_enable(ifp); + if (ret == 0) + dp_event(DP_EVT_IF_FEAT_MODE_CHANGE, 0, ifp, + IF_FEAT_MODE_EVENT_L3_FAL_ENABLED, 0, NULL); + } else if (!l3_hw_enabled && ifp->fal_l3) { + dp_event(DP_EVT_IF_FEAT_MODE_CHANGE, 0, ifp, + IF_FEAT_MODE_EVENT_L3_FAL_DISABLED, 0, NULL); + if_l3_disable(ifp); + } + + if (!l3_enabled && ifp->if_l3_enabled) { + ifp->if_l3_enabled = false; + dp_event(DP_EVT_IF_FEAT_MODE_CHANGE, 0, ifp, + IF_FEAT_MODE_EVENT_L3_DISABLED, 0, NULL); + } + + if (emb_feat_changed) + dp_event(DP_EVT_IF_FEAT_MODE_CHANGE, 0, ifp, + IF_FEAT_MODE_EVENT_EMB_FEAT_CHANGED, 0, NULL); + + if (flags & IF_FEAT_MODE_FLAG_L2_FAL_DISABLE) { + struct fal_attribute_t attr = { + .id = FAL_PORT_ATTR_HW_SWITCH_MODE, + .value.u8 = FAL_PORT_HW_SWITCHING_DISABLE, + }; + + /* + * notify features to give them a chance to clean up + * first, if required + */ + dp_event(DP_EVT_IF_FEAT_MODE_CHANGE, 0, ifp, + IF_FEAT_MODE_EVENT_L2_FAL_DISABLED, 0, NULL); + fal_l2_upd_port(ifp->if_index, &attr); + } + + if (!l2_enabled) { + dp_event(DP_EVT_IF_FEAT_MODE_CHANGE, 0, ifp, + IF_FEAT_MODE_EVENT_L2_DELETED, 0, NULL); + fal_l2_del_port(ifp->if_index); + } } /* @@ -512,15 +729,11 @@ if_set_vrf(struct ifnet *ifp, vrfid_t vrf_id) vrf = vrf_find_or_create(vrf_id); if (vrf) { /* - * Ignore initial setting of VRF master interface into + * Ignore initial setting of VRF interface into * default VRF. */ - if (ifp->if_type == IFT_VRFMASTER && + if (ifp->if_type == IFT_VRF && vrf_id != VRF_DEFAULT_ID) { - uint32_t vrf_tableid = vrfmaster_get_tableid(ifp); - - route_link_vrf_to_table(vrf, vrf_tableid); - route6_link_vrf_to_table(vrf, vrf_tableid); vrf_set_external_id(vrf, ifp->if_index); dp_event(DP_EVT_VRF_CREATE, 0, vrf, 0, 0, NULL); @@ -543,6 +756,327 @@ if_set_vrf(struct ifnet *ifp, vrfid_t vrf_id) dp_event(DP_EVT_IF_VRF_SET, 0, ifp, 0, 0, NULL); } +/* + * Allocate space on the interface to be able to store context for + * a given feature on the if. The callbacks are per feature type, not + * per interface, so they hang off a global array of cbs. + */ +int if_allocate_feature_space(struct ifnet *ifp, + enum pl_feature_point_id fp) +{ + bool alloced_fp = false; + uint32_t size; + void **fp_ctxts = NULL; + void **feat_ctxts = NULL; + + size = pl_feat_point_node_get_max_features(fp); + if (size == 0) + return -EINVAL; + + fp_ctxts = rcu_dereference(ifp->node_instance_contexts); + if (!fp_ctxts) { + /* + * We have to allocate the top level feature point array. We + * will also need to allocate the feature array for the given + * feature point below. + */ + fp_ctxts = zmalloc_aligned(PL_FEATURE_POINT_NUM_IDS * + (sizeof(void *))); + if (!fp_ctxts) + return -ENOMEM; + + alloced_fp = true; + } + + if (alloced_fp || !fp_ctxts[fp]) { + /* The feature point doesn't have a feature array yet */ + feat_ctxts = zmalloc_aligned(size * sizeof(void *)); + if (!feat_ctxts) + goto cleanup_on_failure; + + /* + * Now that all the allocs have succeeded set the pointers to + * make things reachable. + */ + rcu_assign_pointer(fp_ctxts[fp], feat_ctxts); + + if (alloced_fp) + rcu_assign_pointer(ifp->node_instance_contexts, + fp_ctxts); + + } + + return 0; + +cleanup_on_failure: + free(fp_ctxts); + free(feat_ctxts); + return -ENOMEM; +} + +static void if_free_feature_space(struct ifnet *ifp) +{ + int i, j; + dp_pipeline_inst_cleanup_cb *cb_func; + int size; + void **fp_cbs = NULL; + void **fp_ctxts = NULL; + + if (!ifp->node_instance_contexts) + return; + + /* + * This is only called in an call_rcu callback when the interface is + * going away so don't need to worry about users accessing it. + */ + for (i = 0; i < PL_FEATURE_POINT_NUM_IDS; i++) { + fp_ctxts = ifp->node_instance_contexts[i]; + if (if_node_instance_cleanup_cbs) + fp_cbs = if_node_instance_cleanup_cbs[i]; + + if (fp_cbs && fp_ctxts) { + size = pl_feat_point_node_get_max_features(i); + for (j = 0; j < size; j++) { + if (fp_cbs[j] && fp_ctxts[j]) { + cb_func = fp_cbs[j]; + cb_func(ifp->if_name, fp_ctxts[j]); + } + } + } + free(fp_ctxts); + } + + if (ifp->node_instance_contexts) + free(ifp->node_instance_contexts); +} + +int +if_node_instance_register_storage(struct pl_node *node, + struct pl_feature_registration *feat, + void *context) +{ + int rv; + struct ifnet *ifp = (struct ifnet *)node; + void **feat_ctxts; + enum pl_feature_point_id fp_id; + + fp_id = feat->feature_point_node->feature_point_id; + rv = if_allocate_feature_space(ifp, fp_id); + if (rv) + return rv; + + feat_ctxts = rcu_dereference(ifp->node_instance_contexts[fp_id]); + rcu_assign_pointer(feat_ctxts[feat->id], context); + + return 0; +} + +int +if_node_instance_unregister_storage(struct pl_node *node, + struct pl_feature_registration *feat) +{ + struct ifnet *ifp = (struct ifnet *)node; + void **feat_ctxts; + void **fp_ctxts; + enum pl_feature_point_id fp_id; + + fp_id = feat->feature_point_node->feature_point_id; + fp_ctxts = rcu_dereference(ifp->node_instance_contexts); + if (!fp_ctxts) + return -ENOENT; + + feat_ctxts = rcu_dereference(fp_ctxts[fp_id]); + if (feat_ctxts[feat->id]) { + rcu_assign_pointer(feat_ctxts[feat->id], NULL); + /* Memory freed on shutdown */ + return 0; + } + + return -ENOENT; +} + +ALWAYS_INLINE void * +if_node_instance_get_storage_internal(struct ifnet *ifp, + enum pl_feature_point_id feat_point, + int feat) +{ + void **feat_ctxts; + void **fp_ctxts; + + fp_ctxts = rcu_dereference(ifp->node_instance_contexts); + if (!fp_ctxts) + return NULL; + + feat_ctxts = rcu_dereference(fp_ctxts[feat_point]); + if (feat_ctxts) + return feat_ctxts[feat]; + + return NULL; +} + +ALWAYS_INLINE void * +if_node_instance_get_storage(struct pl_node *node, + struct pl_feature_registration *feat) +{ + struct ifnet *ifp = (struct ifnet *)node; + + return if_node_instance_get_storage_internal( + ifp, + feat->feature_point_node->feature_point_id, + feat->id); +} + +/* + * Set the cleanup callback for interface node instances. All memory + * allocated will only be released on shutdown. + */ +int +if_node_instance_set_cleanup_cb(struct pl_feature_registration *feat) +{ + enum pl_feature_point_id fp_id; + uint32_t size; + void **fp_cbs = NULL; + void **feat_cbs = NULL; + + if (!feat->node->feat_setup_cleanup_cb) + return 0; + + fp_id = feat->feature_point_node->feature_point_id; + + if (!if_node_instance_cleanup_cbs) { + fp_cbs = calloc(PL_FEATURE_POINT_NUM_IDS, (sizeof(void *))); + if (!fp_cbs) + return -ENOMEM; + rcu_assign_pointer(if_node_instance_cleanup_cbs, fp_cbs); + } else { + fp_cbs = if_node_instance_cleanup_cbs; + } + + if (!fp_cbs[fp_id]) { + size = pl_feat_point_node_get_max_features(fp_id); + if (size == 0) + return 0; + + feat_cbs = calloc(size, sizeof(void *)); + if (!feat_cbs) + return -ENOMEM; + rcu_assign_pointer(fp_cbs[fp_id], feat_cbs); + } else { + feat_cbs = fp_cbs[fp_id]; + } + + feat_cbs[feat->id] = feat->node->feat_setup_cleanup_cb; + return 0; +} + +static struct cds_list_head if_node_instance_all_list_head = + CDS_LIST_HEAD_INIT(if_node_instance_all_list_head); + + +struct if_node_instance_all_ctx { + struct pl_feature_registration *pl_feat; + pl_node_feat_change *feat_change; + enum pl_node_feat_action action; + int rv; + struct cds_list_head list_entry; +}; + +static void if_node_instance_feat_change_all_cb(struct ifnet *ifp, void *arg) +{ + struct if_node_instance_all_ctx *context = arg; + + context->rv |= context->feat_change((struct pl_node *)ifp, + context->pl_feat, + context->action); +} + +static void if_node_instance_all_int_create_cb(struct ifnet *ifp) +{ + struct cds_list_head *this_entry, *next; + struct if_node_instance_all_ctx *context; + + /* Walk the list of feature and enable them for this interface */ + cds_list_for_each_safe(this_entry, next, + &if_node_instance_all_list_head) { + + context = cds_list_entry(this_entry, + struct if_node_instance_all_ctx, + list_entry); + + if_node_instance_feat_change_all_cb(ifp, context); + } +} + + +struct dp_event_ops if_node_instance_all_int_event_ops = { + .if_create = if_node_instance_all_int_create_cb, +}; + +int if_node_instance_feat_change_all(struct pl_feature_registration *pl_feat, + enum pl_node_feat_action action, + pl_node_feat_change *feat_change) +{ + struct if_node_instance_all_ctx *context; + struct cds_list_head *this_entry, *next; + bool found = false; + bool rv; + + if (!pl_feat) + return -EINVAL; + + cds_list_for_each_safe(this_entry, next, + &if_node_instance_all_list_head) { + context = cds_list_entry(this_entry, + struct if_node_instance_all_ctx, + list_entry); + if (context->pl_feat == pl_feat) { + found = true; + break; + } + } + + if (action == PL_NODE_FEAT_ADD) { + if (found) + return -EINVAL; + + context = malloc(sizeof(*context)); + if (!context) + return -ENOMEM; + + context->pl_feat = pl_feat; + context->feat_change = feat_change; + context->action = action; + context->rv = 0; + + if (cds_list_empty(&if_node_instance_all_list_head)) + /* Register for event notification for new interfaces */ + dp_event_register(&if_node_instance_all_int_event_ops); + + /* Track the features to enable on new interfaces */ + cds_list_add_rcu(&context->list_entry, + &if_node_instance_all_list_head); + } else { + if (!found) + return -ENOENT; + context->action = action; + context->rv = 0; + } + + dp_ifnet_walk(if_node_instance_feat_change_all_cb, context); + rv = context->rv; + if (action == PL_NODE_FEAT_REM) { + cds_list_del_rcu(&context->list_entry); + free(context); + + if (cds_list_empty(&if_node_instance_all_list_head)) + dp_event_unregister( + &if_node_instance_all_int_event_ops); + + } + + return rv; +} + /* Callback from RCU to free interface */ static void if_free_rcu(struct rcu_head *head) { @@ -559,6 +1093,7 @@ static void if_free_rcu(struct rcu_head *head) if (ifp->vlan_feat_table) dp_ht_destroy_deferred(ifp->vlan_feat_table); + if_free_feature_space(ifp); rte_free(ifp->if_vlantbl); rte_free(ifp); } @@ -580,8 +1115,8 @@ static void if_unset_netconf(struct ifnet *ifp) * Note: it is floating (not in any table) */ struct ifnet *if_alloc(const char *ifname, enum if_type type, - unsigned int mtu, const struct ether_addr *eth_addr, - int socket) + unsigned int mtu, const struct rte_ether_addr *eth_addr, + int socket, void *ctx) { const struct ift_ops *ops; struct ifnet *ifp; @@ -601,7 +1136,7 @@ struct ifnet *if_alloc(const char *ifname, enum if_type type, return NULL; if (eth_addr) - ether_addr_copy(eth_addr, &ifp->eth_addr); + rte_ether_addr_copy(eth_addr, &ifp->eth_addr); if (strlen(ifname) >= IFNAMSIZ) RTE_LOG(NOTICE, DATAPLANE, @@ -635,7 +1170,7 @@ struct ifnet *if_alloc(const char *ifname, enum if_type type, ifp->if_name); if (ops->ifop_init) { - ret = ops->ifop_init(ifp); + ret = ops->ifop_init(ifp, ctx); if (ret < 0) { if_free(ifp); return NULL; @@ -656,7 +1191,7 @@ void if_add_vlan(struct ifnet *ifp, struct rte_mbuf **m) if (ifp->qinq_inner) { if_incr_out(ifp, *m); - vid_encap(ifp->if_vlan, m, ETHER_TYPE_VLAN); + vid_encap(ifp->if_vlan, m, RTE_ETHER_TYPE_VLAN); ifp = ifp->if_parent; } @@ -670,10 +1205,10 @@ void if_add_vlan(struct ifnet *ifp, struct rte_mbuf **m) if_incr_out(ifp, *m); } -int if_add_l2_addr(struct ifnet *ifp, struct ether_addr *addr) +int if_add_l2_addr(struct ifnet *ifp, struct rte_ether_addr *addr) { const struct ift_ops *ops; - int ret = 0; + int ret; char buf[32]; /* @@ -689,26 +1224,27 @@ int if_add_l2_addr(struct ifnet *ifp, struct ether_addr *addr) if (ops->ifop_add_l2_addr) ret = ops->ifop_add_l2_addr(ifp, addr); + else + ret = -ENOTSUP; - if (ret < -ENOTSUP) { + if (ret < 0) { + /* we use promisc mode as a fallback */ + ifpromisc(ifp, 1); DP_DEBUG(INIT, ERR, DATAPLANE, - "%s can't add MAC address %s: %s\n", + "%s, failed to add MAC address %s: %s setting interface to promisc\n", ifp->if_name, ether_ntoa_r(addr, buf), strerror(-ret)); - } else { - /* we use promisc mode as a fallback */ - ifpromisc(ifp, 1); ret = 0; } return ret; } -int if_del_l2_addr(struct ifnet *ifp, struct ether_addr *addr) +int if_del_l2_addr(struct ifnet *ifp, struct rte_ether_addr *addr) { const struct ift_ops *ops; - int ret = 0; + int ret; char buf[32]; /* @@ -724,16 +1260,17 @@ int if_del_l2_addr(struct ifnet *ifp, struct ether_addr *addr) if (ops->ifop_del_l2_addr) ret = ops->ifop_del_l2_addr(ifp, addr); + else + ret = -ENOTSUP; - if (ret < -ENOTSUP) { + if (ret < 0) { + /* we use promisc mode as a fallback */ + ifpromisc(ifp, 0); DP_DEBUG(INIT, ERR, DATAPLANE, - "%s can't remove MAC address %s: %s\n", + "%s, failed to remove MAC address %s: %s setting interface to non-promisc\n", ifp->if_name, ether_ntoa_r(addr, buf), strerror(-ret)); - } else { - /* we use promisc mode as a fallback */ - ifpromisc(ifp, 0); ret = 0; } @@ -756,14 +1293,8 @@ if_qinq_created(struct ifnet *phy_ifp) { phy_ifp->qinq_vif_cnt++; - if (phy_ifp->qinq_vif_cnt == 1) { - if (phy_ifp->if_team) - lag_walk_bond_slaves(phy_ifp, - lag_set_phy_qinq_mtu_slave, - NULL); - + if (phy_ifp->qinq_vif_cnt == 1) if_set_mtu(phy_ifp, phy_ifp->if_mtu, true); - } } void @@ -771,14 +1302,8 @@ if_qinq_deleted(struct ifnet *phy_ifp) { phy_ifp->qinq_vif_cnt--; - if (phy_ifp->qinq_vif_cnt == 0) { - if (phy_ifp->if_team) - lag_walk_bond_slaves(phy_ifp, - lag_set_phy_qinq_mtu_slave, - NULL); - + if (phy_ifp->qinq_vif_cnt == 0) if_set_mtu(phy_ifp, phy_ifp->if_mtu, true); - } } int if_vlan_proto_set(struct ifnet *ifp, uint16_t proto) @@ -797,7 +1322,7 @@ int if_vlan_proto_set(struct ifnet *ifp, uint16_t proto) * and physical VLAN interfaces. */ if (ifp->qinq_inner) { - if (proto != ETHER_TYPE_VLAN) + if (proto != RTE_ETHER_TYPE_VLAN) RTE_LOG(ERR, DATAPLANE, "%s: can't change QinQ inner tpid - 0x%x\n", ifp->if_name, proto); @@ -843,17 +1368,22 @@ int if_vlan_proto_set(struct ifnet *ifp, uint16_t proto) } /* - * Do not delete the interface associated with a hardware port, unless - * it is due to a hotplug remove. Reinitialise the port with the - * controller so it has a new ifindex and is ready to be configured. + * Do not permanently delete the interface associated with a hardware + * port, unless it is due to a hotplug remove. This is because when + * deleting a dataplane interface from the configuration it will + * generate a netlink delete of it, but this most likely isn't what + * the user wants, since creating the interface again in the + * configuration doesn't result in a netlink create of it. + * + * Therefore, unregister and re-register the port with the controller + * so it gets recreated in the system which will then generate a + * create of the interface again. */ void netlink_if_free(struct ifnet *ifp) { if (if_is_hwport(ifp) && !ifp->unplugged) { teardown_interface_portid(ifp->if_port); - if_unset_ifindex(ifp); setup_interface_portid(ifp->if_port); - return; } if_free(ifp); } @@ -898,18 +1428,13 @@ if_free(struct ifnet *ifp) "No ops registered during free for interface type %d", ifp->if_type); - if (ops && ops->ifop_pre_uninit) - ops->ifop_pre_uninit(ifp); + /* Layer 3 & 2 feature cleanup */ - /* First make ifp unreachable by ifindex and ifname */ - if_unset_ifindex(ifp); - cds_lfht_del(ifname_hash, &ifp->ifname_hash); - - /* Send event prior to freeing features */ dp_event(DP_EVT_IF_DELETE, 0, ifp, 0, 0, NULL); - + if_change_features_mode(ifp, IF_FEAT_MODE_FLAG_L3_DISABLE); if_clean(ifp); + /* remove any features left over that didn't clean themselves up */ pl_node_iter_features(ipv4_validate_node_ptr, ifp, if_remove_pl_feat, ifp); pl_node_iter_features(ipv4_out_node_ptr, ifp, @@ -919,6 +1444,13 @@ if_free(struct ifnet *ifp) pl_node_iter_features(ipv6_out_node_ptr, ifp, if_remove_pl_feat, ifp); + /* Layer 2 cleanup */ + + if_unset_ifindex(ifp); + cds_lfht_del(ifname_hash, &ifp->ifname_hash); + + /* Layer 1 cleanup */ + /* * Turn off promiscuous mode if left on so we don't leak * promiscuous mode refcounts in parent interfaces (if @@ -953,6 +1485,8 @@ if_setup_vlan_storage(struct ifnet *ifp) * of onswitch. The calls are reference counted so that only the first * "on" request actually has an effect, as does the final "off" request. * Results are undefined if the "off" and "on" requests are not matched. + * + * This function defines promiscuity to include both MAC and VLAN. */ void ifpromisc(struct ifnet *ifp, int onswitch) { @@ -1003,6 +1537,47 @@ void ifpromisc(struct ifnet *ifp, int onswitch) l2_rx_fltr_state_change(ifp); } +/* + * Whenever a member is added to or removed from a LAG (bond + * interface), need to ensure that the multicast promiscuous mode + * setting is updated. + */ +static void +if_allmulti_lag_member_add(struct ifnet *lag, struct ifnet *member) +{ + if (lag->if_allmcast_ref != 0) { + DP_DEBUG(MULTICAST, INFO, MCAST, + "Enable multicast promiscuous mode for LAG member %s\n", + member->if_name); + member->if_allmcast_ref = lag->if_allmcast_ref; + l2_rx_fltr_state_change(member); + } +} + +static void +if_allmulti_lag_member_delete(struct ifnet *lag, struct ifnet *member) +{ + if (lag->if_allmcast_ref != 0) { + DP_DEBUG(MULTICAST, INFO, MCAST, + "Disable multicast promiscuous mode for LAG member %s\n", + member->if_name); + member->if_allmcast_ref = 0; + l2_rx_fltr_state_change(member); + } +} + +static const struct dp_event_ops if_allmcast_event_ops = { + .if_lag_add_member = if_allmulti_lag_member_add, + .if_lag_delete_member = if_allmulti_lag_member_delete, +}; + +static void if_allmulti_lag_member(struct ifnet *member, void *arg) +{ + int onoff = *((int *)arg); + + if_allmulti(member, onoff); +} + /* Enable promiscuous reception of IP multicasts from the interface */ void if_allmulti(struct ifnet *ifp, int onoff) { @@ -1021,98 +1596,22 @@ void if_allmulti(struct ifnet *ifp, int onoff) assert(ifp->if_allmcast_ref >= 0); } - l2_rx_fltr_state_change(ifp); -} - -static void if_team_init(struct ifnet *ifp) -{ - struct rte_eth_dev_info dev_info; - - if (ifp->if_type != IFT_ETHER) - return; - - rte_eth_dev_info_get(ifp->if_port, &dev_info); - - DP_DEBUG(INIT, DEBUG, DATAPLANE, - "%d:%s dev_info.driver_name %s\n", - ifp->if_index, ifp->if_name, dev_info.driver_name); - - if (strstr(dev_info.driver_name, "rte_bond_pmd") != NULL) - ifp->if_team = 1; -} - -static struct ifnet * -if_hwport_init(const char *if_name, unsigned int portid, - const struct ether_addr *eth, int socketid) -{ - struct ifnet *ifp; - - /* device driver couldn't find MAC address */ - if (is_zero_ether_addr(eth)) { - RTE_LOG(NOTICE, DATAPLANE, - "%s port %u: address not set!\n", if_name, portid); - return NULL; - } - - ifp = if_alloc(if_name, IFT_ETHER, ETHER_MTU, eth, socketid); - if (!ifp) - return NULL; - - ifp->if_port = portid; - - /* - * Temporarily turn off VLAN insertion offload for Mellanox - * ConnectX5 devices. This should be removed when DPDK is - * up-reved to 1908 - */ - if (is_device_mlx5(portid)) - ifp->tpid_offloaded = 0; + if (is_team(ifp)) { + int err; - if (!if_setup_vlan_storage(ifp)) { - if_free(ifp); - return NULL; + /* + * Propagate the multicast promiscuous mode settings + * down to each member of the LAG. + */ + err = lag_walk_team_members(ifp, if_allmulti_lag_member, + &onoff); + if (err < 0) + DP_DEBUG(MULTICAST, INFO, MCAST, + "Failed to update multicast promiscuous mode for " + "LAG members %s.\n", ifp->if_name); } - ifname_hash_insert(ifp); - - return ifp; -} - -/* - * Initialize a hardwired port. - */ -struct ifnet *if_hwport_alloc(unsigned int portid, - const struct ether_addr *eth, int socketid) -{ - struct ifnet *ifp; - char if_name[IFNAMSIZ]; - - /* Temporary name during boot up. - * Should never be visible, but set a value to avoid any potential - * issues from messages during startup. - */ - snprintf(if_name, IFNAMSIZ, "port%u", portid); - - - ifp = if_hwport_init(if_name, portid, eth, socketid); - if (!ifp) - return NULL; - - /* Can't set ifp->if_dp_id, we have not been told our dp_id yet */ - - /* port is on this dataplane, so if_port is valid */ - ifp->if_local_port = 1; - - /* - * Set mac-address driver filtering as initially - * supported. This will be reset later if any subsequent - * attempt to program filtering in the driver should fail. - */ - ifp->if_mac_filtr_supported = 1; - ifp->if_mac_filtr_reprogram = 0; - - if_team_init(ifp); - return ifp; + l2_rx_fltr_state_change(ifp); } /* Cleanup all pseudo-interfaces. */ @@ -1125,19 +1624,8 @@ void if_cleanup(enum cont_src_en cont_src) * are deleted before parents, as parents are created first. */ cds_list_for_each_entry_safe(ifp, tmp, &ifnet_list, if_list) { - if (ifp->if_cont_src == cont_src) { - fal_l2_del_port(ifp->if_index); - - /* eth ports are only registered on boot, remove - * signaled state, but dont free ifnet. - */ - if (ifp->if_type == IFT_ETHER && ifp->if_local_port) { - if_unset_ifindex(ifp); - if_clean(ifp); - if_set_vrf(ifp, VRF_DEFAULT_ID); - } else - if_free(ifp); - } + if (ifp->if_cont_src == cont_src) + if_free(ifp); } } @@ -1151,6 +1639,9 @@ bool if_stats(struct ifnet *ifp, struct if_data *stats) memset(sum, 0, sizeof(struct if_data)); + if (ifp->unplugged) + return false; + FOREACH_DP_LCORE(lcore) { const uint64_t *pcpu = (const uint64_t *) &ifp->if_data[lcore]; @@ -1231,9 +1722,9 @@ void if_mpls_stats(const struct ifnet *ifp, struct if_mpls_data *stats) * when sampling at 5 second intervals. */ static const uint32_t cexp[3] = { - 0.9200444146293232 * FSCALE, /* exp(-1/12) */ - 0.9834714538216174 * FSCALE, /* exp(-1/60) */ - 0.9944598480048967 * FSCALE, /* exp(-1/180) */ + (uint32_t) (0.9200444146293232 * FSCALE), /* exp(-1/12) */ + (uint32_t) (0.9834714538216174 * FSCALE), /* exp(-1/60) */ + (uint32_t) (0.9944598480048967 * FSCALE), /* exp(-1/180) */ }; /* Convert from scaled value to displayable counter @@ -1286,7 +1777,18 @@ static void if_perf_timer(struct rte_timer *tim __rte_unused, void *arg) changed |= if_perf_update(&ifp->if_txpps, swstats.ifi_opackets); changed |= if_perf_update(&ifp->if_txbps, swstats.ifi_obytes); - if (changed && ifp->if_type != IFT_ETHER) + /* + * also send update if any counters where we may not have + * successfully RX'd or TX'd a packet are non-zero, since they + * won't have triggered a change above. + */ + changed |= swstats.ifi_ierrors != 0 || + swstats.ifi_oerrors != 0 || + swstats.ifi_idropped != 0 || + ifi_odropped(&swstats) != 0; + + if (changed && (ifp->if_type != IFT_ETHER || + is_team(ifp))) send_if_stats(ifp, &swstats); } @@ -1311,6 +1813,33 @@ static void if_stats_disable(struct ifnet *ifp) if_perf_clear(&ifp->if_txbps); } +int dp_ifnet_mib_counters(struct ifnet *ifp, + struct dp_ifnet_mib_counters *counters) +{ + struct if_data stats; + + if_stats(ifp, &stats); + + if (!counters) + return -1; + + counters->dp_ifnet_mib_counter_inoctets = stats.ifi_ibytes; + counters->dp_ifnet_mib_counter_inucastpkts = stats.ifi_ipackets; + counters->dp_ifnet_mib_counter_inmulticastpkts = stats.ifi_imulticast; + counters->dp_ifnet_mib_counter_inbroadcastpkts = stats.ifi_imulticast; + counters->dp_ifnet_mib_counter_indiscards = stats.ifi_idropped; + counters->dp_ifnet_mib_counter_inerrors = stats.ifi_ierrors; + counters->dp_ifnet_mib_counter_inunknownprotos = stats.ifi_unknown; + counters->dp_ifnet_mib_counter_outoctets = stats.ifi_obytes; + counters->dp_ifnet_mib_counter_outucastpkts = stats.ifi_opackets; + counters->dp_ifnet_mib_counter_outmulticastpkts = 0; + counters->dp_ifnet_mib_counter_outbroadcastpkts = 0; + counters->dp_ifnet_mib_counter_outdiscards = ifi_odropped(&stats); + counters->dp_ifnet_mib_counter_outerrors = stats.ifi_oerrors; + + return 0; +} + int if_blink(struct ifnet *ifp, bool on) { const struct ift_ops *ops; @@ -1320,7 +1849,7 @@ int if_blink(struct ifnet *ifp, bool on) if (!ops) return -EINVAL; - if (ops->ifop_set_mtu) + if (ops->ifop_blink) rc = ops->ifop_blink(ifp, on); return rc; @@ -1354,11 +1883,6 @@ struct incomplete_if_stats { uint64_t route_del; uint64_t route_del_missing; uint64_t route_update; - uint64_t missed_replayed; - uint64_t missed_add; - uint64_t missed_del; - uint64_t missed_del_missing; - uint64_t missed_update; uint64_t mem_fails; }; @@ -1375,62 +1899,27 @@ struct incomplete_route { /* keys */ struct ip_addr dest; uint32_t label; - vrfid_t vrf_id; uint32_t table; uint8_t depth; uint8_t scope; uint8_t proto; - /* netlink message */ - struct nlmsghdr *nlh; -}; - -enum missed_nl_type { - MISSED_UNSPEC_LINK, - MISSED_UNSPEC_ADDR, - MISSED_INET_ADDR, - MISSED_INET6_ADDR, - MISSED_INET_NETCONF, - MISSED_INET6_NETCONF, - MISSED_CHILD_LINK, -}; - -struct missed_netlink { - struct cds_lfht_node hash_node; - struct rcu_head rcu; - - /* keys -- be sure to zero unused keys! */ - enum missed_nl_type type; - uint32_t ifindex; - union { - struct ether_addr addr; - struct ip_addr ip; - unsigned int ifindex; - } keys; - - /* netlink message */ - struct nlmsghdr *nlh; + /* route update message */ + void *data; + size_t size; + bool protobuf; }; static struct incomplete_if_stats incomplete_stats; static struct cds_lfht *incomplete_routes; static struct cds_lfht *ignored_interfaces; -static struct cds_lfht *missed_netlinks; #define INCOMPLETE_HASH_MIN 2 #define INCOMPLETE_HASH_MAX 64 void incomplete_interface_init(void) { - missed_netlinks = cds_lfht_new(INCOMPLETE_HASH_MIN, - INCOMPLETE_HASH_MAX, - INCOMPLETE_HASH_MAX, - CDS_LFHT_AUTO_RESIZE | - CDS_LFHT_ACCOUNTING, - NULL); - if (!missed_netlinks) - rte_panic("Can't allocate hash for incomplete links\n"); incomplete_routes = cds_lfht_new(INCOMPLETE_HASH_MIN, INCOMPLETE_HASH_MAX, INCOMPLETE_HASH_MAX, @@ -1461,26 +1950,15 @@ incomplete_route_free(struct rcu_head *head) struct incomplete_route *route; route = caa_container_of(head, struct incomplete_route, rcu); - free(route->nlh); + free(route->data); free(route); } -static void -missed_netlink_free(struct rcu_head *head) -{ - struct missed_netlink *missed; - - missed = caa_container_of(head, struct missed_netlink, rcu); - free(missed->nlh); - free(missed); -} - void incomplete_interface_cleanup(void) { struct cds_lfht_iter iter; struct incomplete_route *route; struct ignored_interface *ignored; - struct missed_netlink *missed; cds_lfht_for_each_entry(incomplete_routes, &iter, route, hash_node) { @@ -1495,13 +1973,6 @@ void incomplete_interface_cleanup(void) call_rcu(&ignored->if_rcu, ignored_if_free); } cds_lfht_destroy(ignored_interfaces, NULL); - - cds_lfht_for_each_entry(missed_netlinks, &iter, - missed, hash_node) { - cds_lfht_del(missed_netlinks, &missed->hash_node); - call_rcu(&missed->rcu, missed_netlink_free); - } - cds_lfht_destroy(missed_netlinks, NULL); } static inline int ignored_interface_match_fn(struct cds_lfht_node *node, @@ -1603,7 +2074,11 @@ void incomplete_routes_make_complete(void) cds_lfht_for_each_entry(incomplete_routes, &iter, route, hash_node) { /* CONT_SRC_UPLINK does not use the rib broker */ - notify_route(route->nlh, CONT_SRC_MAIN); + if (route->protobuf) + ip_route_pb_handler(route->data, route->size, + CONT_SRC_MAIN); + else + notify_route(route->data, CONT_SRC_MAIN); } } @@ -1611,7 +2086,7 @@ static uint32_t incomplete_route_hash(struct incomplete_route *route) { int num_words; - num_words = (offsetof(struct incomplete_route, nlh) - + num_words = (offsetof(struct incomplete_route, data) - offsetof(struct incomplete_route, dest) + 3) / 4; return rte_jhash_32b((uint32_t *)&route->dest, num_words, 0); } @@ -1632,24 +2107,23 @@ static inline int incomplete_route_match_fn(struct cds_lfht_node *node, return 0; if (route->table != route_key->table) return 0; - if (route->vrf_id != route_key->vrf_id) - return 0; if (route->label != route_key->label) return 0; - if (memcmp(&route->dest, &route_key->dest, sizeof(struct ip_addr))) + if (memcmp(&route->dest, &route_key->dest, sizeof(struct ip_addr)) != 0) return 0; return 1; } /* - * Add an incomplete route. If we already have an entry for that key then - * update the message to new one. + * Add an incomplete route. If we already have an entry for that key + * then update the message to new one. */ -void incomplete_route_add(vrfid_t vrf_id, const void *dst, - uint8_t family, uint8_t depth, uint32_t table, - uint8_t scope, uint8_t proto, - const struct nlmsghdr *nlh) +static bool +incomplete_route_add(const void *dst, + uint8_t family, uint8_t depth, uint32_t table, + uint8_t scope, uint8_t proto, void *data, size_t size, + bool protobuf) { struct incomplete_route *route; struct cds_lfht_node *ret_node; @@ -1657,7 +2131,7 @@ void incomplete_route_add(vrfid_t vrf_id, const void *dst, route = calloc(1, sizeof(*route)); if (!route) { incomplete_stats.mem_fails++; - return; + return false; } switch (family) { @@ -1676,14 +2150,9 @@ void incomplete_route_add(vrfid_t vrf_id, const void *dst, route->table = table; route->scope = scope; route->proto = proto; - route->vrf_id = vrf_id; - route->nlh = malloc(nlh->nlmsg_len); - if (!route->nlh) { - free(route); - incomplete_stats.mem_fails++; - return; - } - memcpy(route->nlh, nlh, nlh->nlmsg_len); + route->data = data; + route->size = size; + route->protobuf = protobuf; ret_node = cds_lfht_add_replace(incomplete_routes, incomplete_route_hash(route), @@ -1700,9 +2169,54 @@ void incomplete_route_add(vrfid_t vrf_id, const void *dst, hash_node); call_rcu(&route->rcu, incomplete_route_free); } + + return true; +} + +/* + * Add an incomplete route in netlink format. If we already have an + * entry for that key then update the message to new one. + */ +void incomplete_route_add_nl(const void *dst, + uint8_t family, uint8_t depth, uint32_t table, + uint8_t scope, uint8_t proto, + const struct nlmsghdr *nlh) +{ + void *data = malloc(nlh->nlmsg_len); + bool protobuf = false; + + if (!data) { + incomplete_stats.mem_fails++; + return; + } + memcpy(data, nlh, nlh->nlmsg_len); + + if (!incomplete_route_add(dst, family, depth, table, scope, proto, + data, nlh->nlmsg_len, protobuf)) + free(data); +} + +/* Add incomplete protobuf route */ +void incomplete_route_add_pb(const void *dst, + uint8_t family, uint8_t depth, uint32_t table, + uint8_t scope, uint8_t proto, + void *data, size_t size) +{ + void *data_cpy = malloc(size); + bool protobuf = true; + + if (!data_cpy) { + incomplete_stats.mem_fails++; + return; + } + memcpy(data_cpy, data, size); + + if (!incomplete_route_add(dst, family, depth, table, scope, proto, + data_cpy, size, protobuf)) + free(data_cpy); } -void incomplete_route_del(vrfid_t vrf_id, const void *dst, +void incomplete_route_del(const void *dst, uint8_t family, uint8_t depth, uint32_t table, uint8_t scope, uint8_t proto) @@ -1730,7 +2244,6 @@ void incomplete_route_del(vrfid_t vrf_id, const void *dst, route.table = table; route.scope = scope; route.proto = proto; - route.vrf_id = vrf_id; cds_lfht_lookup(incomplete_routes, incomplete_route_hash(&route), @@ -1750,269 +2263,6 @@ void incomplete_route_del(vrfid_t vrf_id, const void *dst, incomplete_stats.route_del++; } -static void missed_netlink_replay_type(unsigned int ifindex, - enum missed_nl_type type) -{ - struct cds_lfht_iter iter; - struct missed_netlink *missed; - - cds_lfht_for_each_entry(missed_netlinks, &iter, missed, hash_node) { - if (missed->ifindex == ifindex && - missed->type == type) { - incomplete_stats.missed_replayed++; - rtnl_process(missed->nlh, (void *)CONT_SRC_MAIN); - cds_lfht_del(missed_netlinks, &missed->hash_node); - call_rcu(&missed->rcu, missed_netlink_free); - } - } -} - -/* - * Call this each time a new ifindex arrives to see if there are any - * netlink messages that need to be replayed. - */ -void missed_netlink_replay(unsigned int ifindex) -{ - missed_netlink_replay_type(ifindex, MISSED_UNSPEC_LINK); - missed_netlink_replay_type(ifindex, MISSED_UNSPEC_ADDR); - missed_netlink_replay_type(ifindex, MISSED_INET_ADDR); - missed_netlink_replay_type(ifindex, MISSED_INET6_ADDR); - missed_netlink_replay_type(ifindex, MISSED_INET_NETCONF); - missed_netlink_replay_type(ifindex, MISSED_INET6_NETCONF); - missed_netlink_replay_type(ifindex, MISSED_CHILD_LINK); -} - -static uint32_t missed_netlink_hash(struct missed_netlink *missed) -{ - int num_words; - - num_words = (offsetof(struct missed_netlink, nlh) - - offsetof(struct missed_netlink, ifindex) + 3) / 4; - return rte_jhash_32b((uint32_t *)&missed->ifindex, num_words, 0); -} - -static inline int missed_netlink_match_fn(struct cds_lfht_node *node, - const void *key) -{ - const struct missed_netlink *missed; - const struct missed_netlink *missed_key = key; - - missed = caa_container_of(node, struct missed_netlink, hash_node); - - if (missed->type != missed_key->type) - return 0; - if (missed->ifindex != missed_key->ifindex) - return 0; - if (missed->type == MISSED_UNSPEC_ADDR) { - if (memcmp(&missed->keys.addr, - &missed_key->keys.addr, - sizeof(struct ether_addr)) != 0) - return 0; - } - if (missed->type == MISSED_INET_ADDR) { - if (memcmp(&missed->keys.ip.address.ip_v4, - &missed_key->keys.ip.address.ip_v4, - 4) != 0) - return 0; - } - if (missed->type == MISSED_INET6_ADDR) { - if (memcmp(&missed->keys.ip.address.ip_v6, - &missed_key->keys.ip.address.ip_v6, - 16) != 0) - return 0; - } - if (missed->type == MISSED_CHILD_LINK) { - if (memcmp(&missed->keys.ifindex, - &missed_key->keys.ifindex, - sizeof(unsigned int)) != 0) - return 0; - } - - return 1; -} - -/* - * Add a missed netlink message. - * If we already have an entry for that key then update the message to new one. - */ -static void missed_netlink_add(enum missed_nl_type type, - unsigned int ifindex, - const void *addr, - const struct nlmsghdr *nlh) -{ - struct missed_netlink *missed; - struct cds_lfht_node *ret_node; - - missed = calloc(1, sizeof(*missed)); - if (!missed) { - incomplete_stats.mem_fails++; - return; - } - - missed->type = type; - if (type == MISSED_UNSPEC_ADDR) - memcpy(&missed->keys.addr, addr, sizeof(struct ether_addr)); - if (type == MISSED_INET_ADDR) - memcpy(&missed->keys.ip.address.ip_v4, - addr, sizeof(struct in_addr)); - if (type == MISSED_INET6_ADDR) - memcpy(&missed->keys.ip.address.ip_v6, - addr, sizeof(struct in6_addr)); - if (type == MISSED_CHILD_LINK) - memcpy(&missed->keys.ifindex, - addr, sizeof(unsigned int)); - missed->ifindex = ifindex; - missed->nlh = malloc(nlh->nlmsg_len); - if (!missed->nlh) { - free(missed); - incomplete_stats.mem_fails++; - return; - } - memcpy(missed->nlh, nlh, nlh->nlmsg_len); - - ret_node = cds_lfht_add_replace(missed_netlinks, - missed_netlink_hash(missed), - missed_netlink_match_fn, - missed, - &missed->hash_node); - if (ret_node == NULL) { - /* added, but was no old entry */ - incomplete_stats.missed_add++; - } else if (ret_node != &missed->hash_node) { - /* replaced, so free old one */ - incomplete_stats.missed_update++; - missed = caa_container_of(ret_node, struct missed_netlink, - hash_node); - call_rcu(&missed->rcu, missed_netlink_free); - } -} - -static void missed_netlink_del(enum missed_nl_type type, - unsigned int ifindex, - const void *addr) -{ - struct missed_netlink missed, *found; - struct cds_lfht_node *node; - struct cds_lfht_iter iter; - - memset(&missed, 0, sizeof(missed)); - missed.type = type; - if (type == MISSED_UNSPEC_ADDR) - memcpy(&missed.keys.addr, addr, sizeof(struct ether_addr)); - if (type == MISSED_INET_ADDR) - memcpy(&missed.keys.ip.address.ip_v4, - addr, sizeof(struct in_addr)); - if (type == MISSED_INET6_ADDR) - memcpy(&missed.keys.ip.address.ip_v6, - addr, sizeof(struct in6_addr)); - if (type == MISSED_CHILD_LINK) - memcpy(&missed.keys.ifindex, - addr, sizeof(unsigned int)); - missed.ifindex = ifindex; - - cds_lfht_lookup(missed_netlinks, - missed_netlink_hash(&missed), - missed_netlink_match_fn, - &missed, - &iter); - - node = cds_lfht_iter_get_node(&iter); - if (!node) { - incomplete_stats.missed_del_missing++; - return; - } - - cds_lfht_del(missed_netlinks, node); - found = caa_container_of(node, struct missed_netlink, hash_node); - call_rcu(&found->rcu, missed_netlink_free); - incomplete_stats.missed_del++; -} - -void missed_nl_unspec_link_add(unsigned int ifindex, - const struct nlmsghdr *nlh) -{ - missed_netlink_add(MISSED_UNSPEC_LINK, ifindex, NULL, nlh); -} - -void missed_nl_unspec_link_del(unsigned int ifindex) -{ - missed_netlink_del(MISSED_UNSPEC_LINK, ifindex, NULL); -} - -void missed_nl_child_link_add(unsigned int ifindex, - unsigned int child_ifindex, - const struct nlmsghdr *nlh) -{ - missed_netlink_add(MISSED_CHILD_LINK, ifindex, &child_ifindex, nlh); -} - -void missed_nl_child_link_del(unsigned int ifindex, - unsigned int child_ifindex) -{ - missed_netlink_del(MISSED_CHILD_LINK, ifindex, &child_ifindex); -} - -void missed_nl_unspec_addr_add(unsigned int ifindex, - const struct ether_addr *addr, - const struct nlmsghdr *nlh) -{ - missed_netlink_add(MISSED_UNSPEC_ADDR, ifindex, addr, nlh); -} - -void missed_nl_unspec_addr_del(unsigned int ifindex, - const struct ether_addr *addr) -{ - missed_netlink_del(MISSED_UNSPEC_ADDR, ifindex, addr); -} - -void missed_nl_inet_addr_add(unsigned int ifindex, - unsigned char family, - const void *addr, - const struct nlmsghdr *nlh) -{ - if (family == AF_INET) - missed_netlink_add(MISSED_INET_ADDR, ifindex, addr, nlh); - else if (family == AF_INET6) - missed_netlink_add(MISSED_INET6_ADDR, ifindex, addr, nlh); - else - RTE_LOG(ERR, DATAPLANE, "%s: unsupported family\n", __func__); -} - -void missed_nl_inet_addr_del(unsigned int ifindex, - unsigned char family, - const void *addr) -{ - if (family == AF_INET) - missed_netlink_del(MISSED_INET_ADDR, ifindex, addr); - else if (family == AF_INET6) - missed_netlink_del(MISSED_INET6_ADDR, ifindex, addr); - else - RTE_LOG(ERR, DATAPLANE, "%s: unsupported family\n", __func__); -} - -void missed_nl_inet_netconf_add(unsigned int ifindex, - unsigned char family, - const struct nlmsghdr *nlh) -{ - if (family == AF_INET) - missed_netlink_add(MISSED_INET_NETCONF, ifindex, NULL, nlh); - else if (family == AF_INET6) - missed_netlink_add(MISSED_INET6_NETCONF, ifindex, NULL, nlh); - else - RTE_LOG(ERR, DATAPLANE, "%s: unsupported family\n", __func__); -} - -void missed_nl_inet_netconf_del(unsigned int ifindex, - unsigned char family) -{ - if (family == AF_INET) - missed_netlink_del(MISSED_INET_NETCONF, ifindex, NULL); - else if (family == AF_INET6) - missed_netlink_del(MISSED_INET6_NETCONF, ifindex, NULL); - else - RTE_LOG(ERR, DATAPLANE, "%s: unsupported family\n", __func__); -} - int cmd_incomplete(FILE *f, int argc __unused, char **argv __unused) { json_writer_t *wr = jsonw_new(f); @@ -2040,37 +2290,13 @@ int cmd_incomplete(FILE *f, int argc __unused, char **argv __unused) jsonw_uint_field(wr, "route_del_miss", incomplete_stats.route_del_missing); jsonw_uint_field(wr, "route_update", incomplete_stats.route_update); - jsonw_uint_field(wr, "missed_replayed", - incomplete_stats.missed_replayed); - jsonw_uint_field(wr, "missed_add", incomplete_stats.missed_add); - jsonw_uint_field(wr, "missed_update", incomplete_stats.missed_update); - jsonw_uint_field(wr, "missed_del", incomplete_stats.missed_del); - jsonw_uint_field(wr, "missed_del_miss", - incomplete_stats.missed_del_missing); jsonw_uint_field(wr, "mem_fail", incomplete_stats.mem_fails); - jsonw_name(wr, "outstanding_missed"); - jsonw_start_array(wr); + jsonw_end_object(wr); + jsonw_destroy(&wr); - struct cds_lfht_iter iter; - struct missed_netlink *missed; - - cds_lfht_for_each_entry(missed_netlinks, &iter, missed, hash_node) { - jsonw_start_object(wr); - - jsonw_uint_field(wr, "ifindex", missed->ifindex); - jsonw_uint_field(wr, "type", missed->type); - - jsonw_end_object(wr); - } - - jsonw_end_array(wr); - - jsonw_end_object(wr); - jsonw_destroy(&wr); - - return 0; -} + return 0; +} void if_set_cont_src(struct ifnet *ifp, enum cont_src_en cont_src) { @@ -2087,10 +2313,10 @@ bool if_port_is_uplink(portid_t portid) if (is_local_controller() || (portid == IF_PORT_ID_INVALID)) return false; - struct ether_addr mac_addr; + struct rte_ether_addr mac_addr; rte_eth_macaddr_get(portid, &mac_addr); - return is_same_ether_addr(&mac_addr, &config.uplink_addr); + return rte_is_same_ether_addr(&mac_addr, &config.uplink_addr); } /* Backplane ports connect switch to CPU @@ -2121,16 +2347,6 @@ bool if_is_control_channel(struct ifnet *ifp) return (!strcmp(ifp->if_name, config.ctrl_intf_name)); } -/* Is this port owned by the cont_src passed in ? - * Ports are owned by the cont_src that created them - */ -bool if_port_is_owned_by_src(enum cont_src_en cont_src, portid_t portid) -{ - const struct ifnet *ifp = ifnet_byport(portid); - - return (ifp && ifp->if_cont_src == cont_src); -} - /* * Used in json output so consider backwards compatibility * before changing existing values @@ -2149,11 +2365,23 @@ const char *iftype_name(uint8_t type) case IFT_VXLAN: return "vxlan"; case IFT_L2TPETH: return "l2tpeth"; case IFT_MACVLAN: return "macvlan"; - case IFT_VRFMASTER: return "vrf"; + case IFT_VRF: return "vrf"; default: return "UNKNOWN"; } } +enum dp_ifnet_iana_type dp_ifnet_iana_type(struct ifnet *ifp) +{ + const struct ift_ops *ops = if_get_ops(ifp); + if (!ops) + return DP_IFTYPE_IANA_OTHER; + + if (!ops->ifop_iana_type) + return DP_IFTYPE_IANA_OTHER; + + return ops->ifop_iana_type(ifp); +} + bool if_ignore_df(const struct ifnet *ifp) { return is_gre(ifp) && gre_tunnel_ignore_df(ifp); @@ -2168,10 +2396,10 @@ is_lo(const struct ifnet *ifp) return false; /* - * VRF master devices have the semantics of loopbacks in a + * VRF devices have the semantics of loopbacks in a * particular VRF. */ - if (ifp->if_type == IFT_VRFMASTER) + if (ifp->if_type == IFT_VRF) return true; if (ifp->if_type != IFT_LOOP) @@ -2279,13 +2507,11 @@ static int vfp_set_cfg(struct pb_msg *msg) void *payload = msg->msg; int len = msg->msg_len; - VFPSetConfig *vfp_msg = - vfpset_config__unpack(NULL, len, - payload); + VFPSetConfig *vfp_msg = vfpset_config__unpack(NULL, len, payload); if (!vfp_msg) { RTE_LOG(ERR, DATAPLANE, "failed to read vfp-set protobuf command\n"); - goto done; + return -1; } struct ifnet *vfp; @@ -2305,7 +2531,7 @@ static int vfp_set_cfg(struct pb_msg *msg) if (!vfp_msg->has_action) goto done; - vfp = ifnet_byifindex(vfp_msg->if_index); + vfp = dp_ifnet_byifindex(vfp_msg->if_index); if (!vfp) { /* Interface delete netlink may already have deleted it. */ if (vfp_msg->action != VFPSET_CONFIG__ACTION__VFP_ACTION_GET) @@ -2386,7 +2612,7 @@ int cmd_set_vfp(FILE *f __unused, int argc, char **argv) else return -1; - vfp = ifnet_byifindex(ifindex); + vfp = dp_ifnet_byifindex(ifindex); if (!vfp) { /* Interface delete netlink may already have deleted it. */ if (!is_get) @@ -2416,8 +2642,7 @@ int cmd_set_vfp(FILE *f __unused, int argc, char **argv) if (is_get) return if_get_vfp(vfp, vfp_type); - else - return if_put_vfp(vfp, vfp_type); + return if_put_vfp(vfp, vfp_type); } /* update MTU of tunnels bound to specified device */ @@ -2428,7 +2653,7 @@ static void update_tunnel_mtu(struct ifnet *ifp) int if_set_mtu(struct ifnet *ifp, uint32_t mtu, bool force_update) { - struct fal_attribute_t mtu_attr = { FAL_PORT_ATTR_MTU, }; + struct fal_attribute_t mtu_attr = { .id = FAL_PORT_ATTR_MTU, }; struct fal_attribute_t l3_mtu_attr = { .id = FAL_ROUTER_INTERFACE_ATTR_MTU, }; @@ -2469,6 +2694,7 @@ int if_set_mtu(struct ifnet *ifp, uint32_t mtu, bool force_update) fal_l2_upd_port(ifp->if_index, &mtu_attr); update_tunnel_mtu(ifp); + dp_event(DP_EVT_IF_MTU_CHANGE, 0, ifp, mtu, 0, NULL); } else { RTE_LOG(ERR, DATAPLANE, "%s changing MTU failed: %d (%s)\n", @@ -2481,7 +2707,7 @@ int if_set_mtu(struct ifnet *ifp, uint32_t mtu, bool force_update) int if_set_l2_address(struct ifnet *ifp, uint32_t l2_addr_len, void *l2_addr) { - struct fal_attribute_t mac_attr = { FAL_PORT_ATTR_MAC_ADDRESS, }; + struct fal_attribute_t mac_attr = { .id = FAL_PORT_ATTR_MAC_ADDRESS, }; struct fal_attribute_t l3_mac_attr = { .id = FAL_ROUTER_INTERFACE_ATTR_SRC_MAC_ADDRESS, }; @@ -2503,7 +2729,7 @@ int if_set_l2_address(struct ifnet *ifp, uint32_t l2_addr_len, void *l2_addr) * Note: this assumes the L2 address is an Ethernet MAC. This * will have to be changed if this assumption ever changes. */ - struct ether_addr old_mac_addr = ifp->eth_addr; + struct rte_ether_addr old_mac_addr = ifp->eth_addr; if (ops->ifop_set_l2_address) ret = ops->ifop_set_l2_address(ifp, l2_addr_len, l2_addr); @@ -2543,7 +2769,8 @@ int if_set_l2_address(struct ifnet *ifp, uint32_t l2_addr_len, void *l2_addr) int if_set_poe(struct ifnet *ifp, bool enable) { - struct fal_attribute_t poe_attr = { FAL_PORT_ATTR_POE_ADMIN_STATUS, }; + struct fal_attribute_t poe_attr = { + .id = FAL_PORT_ATTR_POE_ADMIN_STATUS, }; /* * Don't make any changes if the device has been hot @@ -2589,7 +2816,7 @@ int if_get_poe(struct ifnet *ifp, bool *admin_status, bool *oper_status) void if_finish_create(struct ifnet *ifp, const char *ifi_type, const char *kind, - const struct ether_addr *mac_addr) + const struct rte_ether_addr *mac_addr) { struct fal_attribute_t attrs[10]; unsigned int nattrs = 5; @@ -2634,12 +2861,10 @@ void if_finish_create(struct ifnet *ifp, const char *ifi_type, } fal_l2_new_port(ifp->if_index, nattrs, attrs); - incomplete_routes_make_complete(); - missed_netlink_replay(ifp->if_index); - ifp->if_created = true; - if_create_finished(ifp, mac_addr); - dp_event(DP_EVT_IF_CREATE_FINISHED, 0, ifp, 0, 0, NULL); + if_change_features_mode(ifp, IF_FEAT_MODE_FLAG_L2_ENABLED); + + incomplete_routes_make_complete(); } int if_start(struct ifnet *ifp) @@ -2674,6 +2899,8 @@ int if_start(struct ifnet *ifp) /* Enable forwarding in the FAL */ fal_if_update_forwarding_all(ifp); + dp_event(DP_EVT_IF_ADMIN_STATUS_CHANGE, 0, ifp, true, 0, NULL); + RTE_LOG(NOTICE, DATAPLANE, "%s changed state to admin up\n", ifp->if_name); @@ -2710,7 +2937,7 @@ int if_stop(struct ifnet *ifp) if_stats_disable(ifp); - mrt_purge(ifp); + dp_event(DP_EVT_IF_ADMIN_STATUS_CHANGE, 0, ifp, false, 0, NULL); RTE_LOG(WARNING, DATAPLANE, "%s changed state to admin down\n", ifp->if_name); @@ -2787,38 +3014,29 @@ int if_set_broadcast(struct ifnet *ifp, bool enable) return ret; } -void if_create_finished(struct ifnet *ifp, const struct ether_addr *mac_addr) +void dp_ifnet_link_status(struct ifnet *ifp, + struct dp_ifnet_link_status *if_link) { const struct ift_ops *ops; + int ret = -EOPNOTSUPP; ops = if_get_ops(ifp); - if (!ops) - return; + if (ops && ops->ifop_get_link_status) + ret = ops->ifop_get_link_status(ifp, if_link); - if (ops->ifop_create_finished) - ops->ifop_create_finished(ifp, mac_addr); + if (ret < 0) { + if_link->link_status = ifp->if_flags & IFF_RUNNING; + if_link->link_speed = DP_IFNET_LINK_SPEED_UNKNOWN; + if_link->link_duplex = DP_IFNET_LINK_DUPLEX_UNKNOWN; + } } -void if_get_link_status(struct ifnet *ifp, - struct if_link_status *if_link) +bool dp_ifnet_admin_status(struct ifnet *ifp) { - struct rte_eth_link link; - - if (ifp->if_type == IFT_ETHER && ifp->if_local_port && - !ifp->unplugged) { - memset(&link, 0, sizeof(link)); - rte_eth_link_get_nowait(ifp->if_port, &link); + if (ifp->if_flags & IFF_UP) + return true; - if_link->link_status = link.link_status; - if_link->link_duplex = - link.link_duplex ? IF_LINK_DUPLEX_FULL : - IF_LINK_DUPLEX_HALF; - if_link->link_speed = link.link_speed; - } else { - if_link->link_status = ifp->if_flags & IFF_RUNNING; - if_link->link_speed = IF_LINK_SPEED_UNKNOWN; - if_link->link_duplex = IF_LINK_DUPLEX_UNKNOWN; - } + return false; } int if_dump_state(struct ifnet *ifp, json_writer_t *wr, @@ -2935,31 +3153,37 @@ fal_if_update_forwarding(struct ifnet *ifp, uint8_t family, bool multicast) bool fwd_enable = (ifp->if_flags & IFF_UP); struct fal_attribute_t state; int ret; + const char *family_str; switch (family) { case AF_INET: if (multicast) { + family_str = "IPv4 Multicast"; state.id = FAL_ROUTER_INTERFACE_ATTR_V4_MCAST_ENABLE; fwd_enable = ifp->ip_mc_forwarding; } else { + family_str = "IPv4"; state.id = FAL_ROUTER_INTERFACE_ATTR_ADMIN_V4_STATE; - if (pl_node_is_feature_enabled( + if (pl_node_is_feature_enabled_by_inst( &ipv4_in_no_forwarding_feat, ifp)) fwd_enable = false; } break; case AF_INET6: if (multicast) { + family_str = "IPv6 Multicast"; state.id = FAL_ROUTER_INTERFACE_ATTR_V6_MCAST_ENABLE; fwd_enable = ifp->ip6_mc_forwarding; } else { + family_str = "IPv6"; state.id = FAL_ROUTER_INTERFACE_ATTR_ADMIN_V6_STATE; - if (pl_node_is_feature_enabled( + if (pl_node_is_feature_enabled_by_inst( &ipv6_in_no_forwarding_feat, ifp)) fwd_enable = false; } break; case AF_MPLS: + family_str = "MPLS"; state.id = FAL_ROUTER_INTERFACE_ATTR_ADMIN_MPLS_STATE; if (!rcu_dereference(ifp->mpls_label_table)) fwd_enable = false; @@ -2981,8 +3205,7 @@ fal_if_update_forwarding(struct ifnet *ifp, uint8_t family, bool multicast) } else RTE_LOG(NOTICE, DATAPLANE, "%s Forwarding %s for %s\n", - ((family == AF_INET) ? "IPv4" : - ((family == AF_INET6) ? "IPv6" : "MPLS")), + family_str, fwd_enable ? "enabled" : "disabled", ifp->if_name); } @@ -2997,17 +3220,20 @@ fal_if_update_forwarding_all(struct ifnet *ifp) fal_if_update_forwarding(ifp, AF_INET6, true); } -void -if_create_l3_intf(struct ifnet *ifp, const struct ether_addr *mac_addr) +int +if_fal_create_l3_intf(struct ifnet *ifp) { - struct fal_attribute_t l3_attrs[10]; - unsigned int l3_nattrs = 2; + struct fal_attribute_t l3_attrs[12]; + unsigned int l3_nattrs = 3; + fal_object_t fal_l3; int ret = 0; l3_attrs[0].id = FAL_ROUTER_INTERFACE_ATTR_IFINDEX; l3_attrs[0].value.u32 = ifp->if_index; l3_attrs[1].id = FAL_ROUTER_INTERFACE_ATTR_VRF_ID; l3_attrs[1].value.u32 = ifp->if_vrfid; + l3_attrs[2].id = FAL_ROUTER_INTERFACE_ATTR_VRF_OBJ; + l3_attrs[2].value.objid = get_vrf(ifp->if_vrfid)->v_fal_obj; if (ifp->if_vlan) { if (ifp->if_vlan) { @@ -3029,50 +3255,86 @@ if_create_l3_intf(struct ifnet *ifp, const struct ether_addr *mac_addr) l3_attrs[l3_nattrs].value.u16 = ifp->if_mtu; l3_nattrs++; } - if (mac_addr) { + if (!rte_is_zero_ether_addr(&ifp->eth_addr)) { l3_attrs[l3_nattrs].id = FAL_ROUTER_INTERFACE_ATTR_SRC_MAC_ADDRESS; - memcpy(&l3_attrs[l3_nattrs].value.mac, mac_addr, + memcpy(&l3_attrs[l3_nattrs].value.mac, &ifp->eth_addr, sizeof(l3_attrs[l3_nattrs].value.mac)); l3_nattrs++; } + l3_attrs[l3_nattrs].id = FAL_ROUTER_INTERFACE_ATTR_V4_MCAST_ENABLE; + l3_attrs[l3_nattrs].value.booldata = ifp->ip_mc_forwarding; + l3_nattrs++; + + l3_attrs[l3_nattrs].id = FAL_ROUTER_INTERFACE_ATTR_ADMIN_V4_STATE; + l3_attrs[l3_nattrs].value.booldata = + !pl_node_is_feature_enabled_by_inst( + &ipv4_in_no_forwarding_feat, ifp); + l3_nattrs++; + + l3_attrs[l3_nattrs].id = FAL_ROUTER_INTERFACE_ATTR_V6_MCAST_ENABLE; + l3_attrs[l3_nattrs].value.booldata = ifp->ip6_mc_forwarding; + l3_nattrs++; + + l3_attrs[l3_nattrs].id = FAL_ROUTER_INTERFACE_ATTR_ADMIN_V6_STATE; + l3_attrs[l3_nattrs].value.booldata = + !pl_node_is_feature_enabled_by_inst( + &ipv6_in_no_forwarding_feat, ifp); + l3_nattrs++; + + l3_attrs[l3_nattrs].id = FAL_ROUTER_INTERFACE_ATTR_ADMIN_MPLS_STATE; + l3_attrs[l3_nattrs].value.booldata = + !!rcu_dereference(ifp->mpls_label_table); + l3_nattrs++; + ret = fal_create_router_interface(l3_nattrs, l3_attrs, - &ifp->fal_l3); - if ((ret == 0) && !ifp->fal_l3) { + &fal_l3); + if ((ret == 0) && !fal_l3) { RTE_LOG(ERR, DATAPLANE, "Invalid L3 object ID returned for %s\n", ifp->if_name); - return; + return -EINVAL; } - if ((ret < 0) && (ret != -EOPNOTSUPP)) + if (ret == 0) + CMM_STORE_SHARED(ifp->fal_l3, fal_l3); + if (ret == -EOPNOTSUPP) + ret = 0; + if (ret < 0) RTE_LOG(ERR, DATAPLANE, "Failed to create L3 FAL object for %s, %d (%s)\n", - ifp->if_name, ret, strerror(ret)); + ifp->if_name, ret, strerror(-ret)); + + return ret; } -void -if_delete_l3_intf(struct ifnet *ifp) +int +if_fal_delete_l3_intf(struct ifnet *ifp) { int ret = 0; if (!ifp->fal_l3) - return; + return 0; ret = fal_delete_router_interface(ifp->fal_l3); if (ret == 0) - ifp->fal_l3 = 0; + CMM_STORE_SHARED(ifp->fal_l3, FAL_NULL_OBJECT_ID); - if ((ret < 0) && (ret != -EOPNOTSUPP)) + if (ret == -EOPNOTSUPP) + ret = 0; + if (ret < 0) RTE_LOG(ERR, DATAPLANE, "Failed to delete L3 FAL object for %s, %d (%s)\n", - ifp->if_name, ret, strerror(ret)); + ifp->if_name, ret, strerror(-ret)); + return ret; } int if_set_l3_intf_attr(struct ifnet *ifp, struct fal_attribute_t *attr) { + struct fal_attribute_t l3_attr; struct fal_attribute_t l2_attr; + int ret; /* for backwards compatibility */ switch (attr->id) { @@ -3081,10 +3343,20 @@ if_set_l3_intf_attr(struct ifnet *ifp, struct fal_attribute_t *attr) l2_attr.value.u16 = attr->value.u16; fal_l2_upd_port(ifp->if_index, &l2_attr); break; - case FAL_ROUTER_INTERFACE_ATTR_VRF_ID: + case FAL_ROUTER_INTERFACE_ATTR_VRF_OBJ: l2_attr.id = FAL_PORT_ATTR_VRF_ID; - l2_attr.value.u32 = attr->value.u32; + l3_attr.value.u32 = ifp->if_vrfid; fal_l2_upd_port(ifp->if_index, &l2_attr); + + if (!ifp->fal_l3) + return -EOPNOTSUPP; + + l3_attr.id = FAL_ROUTER_INTERFACE_ATTR_VRF_ID; + l3_attr.value.u32 = ifp->if_vrfid; + ret = fal_set_router_interface_attr(ifp->fal_l3, &l3_attr); + if (ret < 0 && ret != -EOPNOTSUPP) + return ret; + break; } @@ -3094,6 +3366,81 @@ if_set_l3_intf_attr(struct ifnet *ifp, struct fal_attribute_t *attr) return fal_set_router_interface_attr(ifp->fal_l3, attr); } +int +if_get_l3_intf_attr(struct ifnet *ifp, uint32_t attr_count, + struct fal_attribute_t *attr_list) +{ + uint32_t i; + + for (i = 0; i < attr_count; i++) { + switch (attr_list[i].id) { + case FAL_ROUTER_INTERFACE_ATTR_EGRESS_QOS_MAP: + if (!ifp->fal_l3) + return -EOPNOTSUPP; + break; + + default: + return -EOPNOTSUPP; + } + } + return fal_get_router_interface_attr(ifp->fal_l3, attr_count, + attr_list); +} + + +/* + * Retrieve FAL router interface object stats for hardware-switch traffic. + * + * This function assumes: + * 1. For-us traffic increments counters on both the hardware and + * software objects (and thus the latter shouldn't be taken into + * account). + * 2. Software transmitted (both for punted and locally generated + * traffic) packets cause counters to be incremented only on the + * software objects. + */ +int +if_fal_l3_get_stats(struct ifnet *ifp, struct if_data *stats) +{ + int i; + int ret; + uint64_t cntrs[FAL_ROUTER_INTERFACE_STAT_MAX]; + enum fal_router_interface_stat_t + cntr_ids[FAL_ROUTER_INTERFACE_STAT_MAX]; + + if (!ifp->fal_l3) + return 0; + + for (i = FAL_ROUTER_INTERFACE_STAT_MIN; + i < FAL_ROUTER_INTERFACE_STAT_MAX; i++) + cntr_ids[i] = i; + + memset(cntrs, 0, sizeof(cntrs)); + ret = fal_get_router_interface_stats(ifp->fal_l3, + FAL_ROUTER_INTERFACE_STAT_MAX, + cntr_ids, cntrs); + if (ret < 0 && ret != -EOPNOTSUPP) + return ret; + + if (ret != -EOPNOTSUPP) { + /* + * If HW stats aren't supported then not overwriting + * these values here will ensure that at least the + * software stats are still maintained based on for-us + * traffic + */ + stats->ifi_ibytes = cntrs[FAL_ROUTER_INTERFACE_STAT_IN_OCTETS]; + stats->ifi_ipackets = + cntrs[FAL_ROUTER_INTERFACE_STAT_IN_PACKETS]; + } + /* + * Hw doesn't count from-us packets so sum the hw and sw stats here. + */ + stats->ifi_obytes += cntrs[FAL_ROUTER_INTERFACE_STAT_OUT_OCTETS]; + stats->ifi_opackets += cntrs[FAL_ROUTER_INTERFACE_STAT_OUT_PACKETS]; + return 0; +} + int if_set_backplane(struct ifnet *ifp, unsigned int ifindex) { const struct ift_ops *ops; @@ -3108,16 +3455,630 @@ int if_set_backplane(struct ifnet *ifp, unsigned int ifindex) return ops->ifop_set_backplane(ifp, ifindex); } -int if_get_backplane(struct ifnet *ifp, unsigned int *ifindex) +int if_set_speed(struct ifnet *ifp, bool autoneg, + uint32_t forced_speed, int duplex) { const struct ift_ops *ops; + int ret = -EOPNOTSUPP; + + /* + * Don't make any changes if the device has been hot + * unplugged. Only bad things can happen. + */ + if (ifp->unplugged) + return 0; + + if (autoneg) + RTE_LOG(INFO, DATAPLANE, + "%s setting to auto-negotiate", ifp->if_name); + else + RTE_LOG(INFO, DATAPLANE, + "%s setting to forced speed %uMbps\n", + ifp->if_name, forced_speed); ops = if_get_ops(ifp); if (!ops) return -EINVAL; - if (!ops->ifop_get_backplane) - return -EOPNOTSUPP; + if (ops->ifop_set_speed) + ret = ops->ifop_set_speed(ifp, autoneg, forced_speed, + duplex); + + if (ret < 0) { + RTE_LOG(ERR, DATAPLANE, + "%s setting speed %sautoneg speed %ubps failed: %s\n", + ifp->if_name, autoneg ? "" : "not ", + forced_speed, strerror(-ret)); + } + + return ret; +} + +int if_set_usability(struct ifnet *ifp, bool usability) +{ + const struct ift_ops *ops; + + ops = if_get_ops(ifp); + if (!ops) + return -EINVAL; + + if (ops->ifop_set_usability) + return ops->ifop_set_usability(ifp, usability); + + return 0; +} + +/* Show link state (only applies to physical ports) */ +static void show_link_state(json_writer_t *wr, struct ifnet *ifp) +{ + struct dp_ifnet_link_status link; + + dp_ifnet_link_status(ifp, &link); + + jsonw_name(wr, "link"); + jsonw_start_object(wr); + jsonw_bool_field(wr, "up", link.link_status); + if (link.link_duplex != DP_IFNET_LINK_DUPLEX_UNKNOWN) + jsonw_string_field(wr, "duplex", + link.link_duplex == + DP_IFNET_LINK_DUPLEX_FULL ? + "full" : "half"); + if (link.link_speed != DP_IFNET_LINK_SPEED_UNKNOWN) + jsonw_uint_field(wr, "speed", link.link_speed); + jsonw_end_object(wr); +} + +/* Device performance statistics + * TODO add a instance counter to avoid race with timer + */ +static void show_perf_info(json_writer_t *wr, const char *name, + const struct if_perf *stats) +{ + char label[32]; + int i; + + jsonw_uint_field(wr, name, if_scaled(stats->cur)); + + snprintf(label, sizeof(label), "%s_avg", name); + jsonw_name(wr, label); + jsonw_start_array(wr); + for (i = 0; i < 3; i++) + jsonw_uint(wr, if_scaled(stats->avg[i])); + jsonw_end_array(wr); +} + +/* Interface performance counters + * Only maintained on physical and vif ports now. + */ +static void show_perf_stats(json_writer_t *wr, struct ifnet *ifp) +{ + show_perf_info(wr, "tx_pps", &ifp->if_txpps); + show_perf_info(wr, "tx_bps", &ifp->if_txbps); + show_perf_info(wr, "rx_pps", &ifp->if_rxpps); + show_perf_info(wr, "rx_bps", &ifp->if_rxbps); +} + +static void show_stats(json_writer_t *wr, struct ifnet *ifp) +{ + struct if_data stats; + + jsonw_name(wr, "statistics"); + jsonw_start_object(wr); + + if_stats(ifp, &stats); + jsonw_uint_field(wr, "rx_packets", stats.ifi_ipackets); + jsonw_uint_field(wr, "rx_errors", stats.ifi_ierrors); + jsonw_uint_field(wr, "tx_packets", stats.ifi_opackets); + jsonw_uint_field(wr, "tx_errors", stats.ifi_oerrors); + jsonw_uint_field(wr, "rx_bytes", stats.ifi_ibytes); + jsonw_uint_field(wr, "tx_bytes", stats.ifi_obytes); + + if_dump_state(ifp, wr, IF_DS_STATS); + + show_perf_stats(wr, ifp); + + jsonw_uint_field(wr, "rx_dropped", stats.ifi_idropped); + jsonw_uint_field(wr, "tx_dropped", ifi_odropped(&stats)); + jsonw_uint_field(wr, "tx_dropped_txring", stats.ifi_odropped_txring); + jsonw_uint_field(wr, "tx_dropped_hwq", stats.ifi_odropped_hwq); + jsonw_uint_field(wr, "tx_dropped_proto", stats.ifi_odropped_proto); + jsonw_uint_field(wr, "rx_bridge", stats.ifi_ibridged); + jsonw_uint_field(wr, "rx_multicast", stats.ifi_imulticast); + jsonw_uint_field(wr, "rx_vlan", stats.ifi_ivlan); + jsonw_uint_field(wr, "rx_bad_vid", stats.ifi_no_vlan); + jsonw_uint_field(wr, "rx_bad_address", stats.ifi_no_address); + jsonw_uint_field(wr, "rx_non_ip", stats.ifi_unknown); + + jsonw_end_object(wr); +} + +static void +show_xstats(json_writer_t *wr, struct ifnet *ifp) +{ + jsonw_name(wr, "xstatistics"); + jsonw_start_object(wr); + + if_dump_state(ifp, wr, IF_DS_XSTATS); + + jsonw_end_object(wr); +} + +static void show_if_l2_filter(json_writer_t *wr, struct ifnet *ifp) +{ + struct cds_lfht_iter iter; + struct l2_mcfltr_node *l2mf; + struct cds_lfht *tmp_hash; + + jsonw_name(wr, "l2_mcast_filters"); + + jsonw_start_object(wr); + + jsonw_uint_field(wr, "if_allmcast_ref", ifp->if_allmcast_ref); + jsonw_string_field(wr, "sw_filter", !ifp->if_allmcast_ref + ? (!ifp->if_mac_filtr_active + ? "promiscuous" : "active") : "disabled"); + jsonw_string_field(wr, "hw_filter", ifp->if_mac_filtr_supported ? + (!ifp->if_mac_filtr_active ? + "promiscuous" : "active") : "unsupported"); + jsonw_name(wr, "addresses"); + jsonw_start_array(wr); + tmp_hash = rcu_dereference(ifp->if_mcfltr_hash); + if (tmp_hash) { + cds_lfht_for_each_entry(tmp_hash, &iter, l2mf, l2mf_node) { + char ebuf[32]; + + jsonw_string(wr, ether_ntoa_r(&l2mf->l2mf_addr, ebuf)); + } + } + jsonw_end_array(wr); + jsonw_end_object(wr); +} + +static void show_af_ifconfig(json_writer_t *wr, struct ifnet *ifp) +{ + jsonw_name(wr, "ipv4"); + + jsonw_start_object(wr); + jsonw_uint_field(wr, "forwarding", + !pl_node_is_feature_enabled_by_inst( + &ipv4_in_no_forwarding_feat, ifp)); + jsonw_uint_field(wr, "proxy_arp", ifp->ip_proxy_arp); + jsonw_string_field(wr, "garp_req_op", + (ifp->ip_garp_op.garp_req_action == GARP_PKT_DROP) ? + "Drop" : "Update"); + jsonw_string_field(wr, "garp_rep_op", + (ifp->ip_garp_op.garp_rep_action == GARP_PKT_DROP) ? + "Drop" : "Update"); + jsonw_uint_field(wr, "mc_forwarding", ifp->ip_mc_forwarding); + jsonw_uint_field(wr, "redirects", ip_redirects_get()); + if (pl_node_is_feature_enabled_by_inst(&ipv4_rpf_feat, ifp)) { + if (ifp->ip_rpf_strict) + jsonw_uint_field(wr, "rp_filter", 1); + else + jsonw_uint_field(wr, "rp_filter", 2); + } else { + jsonw_uint_field(wr, "rp_filter", 0); + } + jsonw_name(wr, "validate_features"); + jsonw_start_array(wr); + pl_node_iter_features(ipv4_validate_node_ptr, ifp, pl_print_feats, wr); + jsonw_end_array(wr); + jsonw_name(wr, "out_features"); + jsonw_start_array(wr); + pl_node_iter_features(ipv4_out_node_ptr, ifp, pl_print_feats, wr); + jsonw_end_array(wr); + jsonw_end_object(wr); + + jsonw_name(wr, "ipv6"); + + jsonw_start_object(wr); + jsonw_uint_field(wr, "forwarding", + !pl_node_is_feature_enabled_by_inst( + &ipv6_in_no_forwarding_feat, ifp)); + jsonw_uint_field(wr, "mc_forwarding", ifp->ip6_mc_forwarding); + jsonw_uint_field(wr, "redirects", ip6_redirects_get()); + jsonw_name(wr, "validate_features"); + jsonw_start_array(wr); + pl_node_iter_features(ipv6_validate_node_ptr, ifp, pl_print_feats, wr); + jsonw_end_array(wr); + jsonw_name(wr, "out_features"); + jsonw_start_array(wr); + pl_node_iter_features(ipv6_out_node_ptr, ifp, pl_print_feats, wr); + jsonw_end_array(wr); + jsonw_end_object(wr); +} + +struct ifconfig_ctx { + bool verbose; + json_writer_t *wr; +}; + +static const char *pause_enum_to_string(int pause_mode) +{ + const char *p_mode = NULL; + + switch (pause_mode) { + case FAL_PORT_FLOW_CONTROL_MODE_BOTH_ENABLE: + p_mode = "both"; + break; + case FAL_PORT_FLOW_CONTROL_MODE_RX_ONLY: + p_mode = "rx"; + break; + case FAL_PORT_FLOW_CONTROL_MODE_TX_ONLY: + p_mode = "tx"; + break; + case FAL_PORT_FLOW_CONTROL_MODE_DISABLE: + p_mode = "none"; + break; + default: + p_mode = "unknown"; + break; + } + return p_mode; +} + +static void show_eth_info_pause(json_writer_t *wr, struct ifnet *ifp) +{ + struct fal_attribute_t pause_attr; + int rv; + + pause_attr.id = FAL_PORT_ATTR_REMOTE_ADVERTISED_FLOW_CONTROL_MODE; + rv = fal_l2_get_attrs(ifp->if_index, 1, &pause_attr); + + if (rv != 0) + jsonw_string_field(wr, "pause-mode", "none"); + else + jsonw_string_field(wr, "pause-mode", + pause_enum_to_string(pause_attr.value.u8)); +} + +static void show_eth_info(json_writer_t *wr, struct ifnet *ifp) +{ + jsonw_name(wr, "eth-info"); + jsonw_start_object(wr); + + show_eth_info_pause(wr, ifp); + + jsonw_name(wr, "l2_intf_platform_state"); + jsonw_start_object(wr); + fal_l2_dump_port(ifp->if_index, wr); + jsonw_end_object(wr); + + jsonw_end_object(wr); +} + +/* Show information generic interface in JSON */ +static void ifconfig(struct ifnet *ifp, void *arg) +{ + struct ifconfig_ctx *ctx = arg; + struct bridge_port *brport; + json_writer_t *wr = ctx->wr; + struct ifnet *parent; + fal_object_t fal_l3; + char ebuf[32]; + + jsonw_start_object(wr); + + jsonw_string_field(wr, "name", ifp->if_name); + jsonw_uint_field(wr, "vrf_id", + dp_vrf_get_external_id(ifp->if_vrfid)); + jsonw_uint_field(wr, "ifindex", ifp->if_index); + jsonw_uint_field(wr, "cont_src", ifp->if_cont_src); + parent = rcu_dereference(ifp->if_parent); + if (parent) + jsonw_string_field(wr, "parent", parent->if_name); + brport = rcu_dereference(ifp->if_brport); + if (brport) + jsonw_string_field(wr, "bridge", + bridge_port_get_bridge(brport)->if_name); + jsonw_uint_field(wr, "role", if_role(ifp)); + jsonw_uint_field(wr, "mtu", ifp->if_mtu); + jsonw_uint_field(wr, "flags", ifp->if_flags); + jsonw_uint_field(wr, "hw_forwarding", ifp->hw_forwarding); + jsonw_uint_field(wr, "hw_l3", ifp->fal_l3 ? 1 : 0); + jsonw_uint_field(wr, "tpid_offloaded", ifp->tpid_offloaded); + + /* + * These are deprecated in favour of the ipv4/ipv6 sub-objects + * but are retained for compatibility. + */ + jsonw_uint_field(wr, "ip_forwarding", + !pl_node_is_feature_enabled_by_inst( + &ipv4_in_no_forwarding_feat, ifp)); + jsonw_uint_field(wr, "ip_proxy_arp", ifp->ip_proxy_arp); + jsonw_uint_field(wr, "ip_mc_forwarding", ifp->ip_mc_forwarding); + if (pl_node_is_feature_enabled_by_inst(&ipv4_rpf_feat, ifp)) { + if (ifp->ip_rpf_strict) + jsonw_uint_field(wr, "ip_rp_filter", 1); + else + jsonw_uint_field(wr, "ip_rp_filter", 2); + } else { + jsonw_uint_field(wr, "ip_rp_filter", 0); + } + jsonw_uint_field(wr, "ip6_forwarding", + !pl_node_is_feature_enabled_by_inst( + &ipv6_in_no_forwarding_feat, ifp)); + jsonw_uint_field(wr, "ip6_mc_forwarding", ifp->ip6_mc_forwarding); + + jsonw_uint_field(wr, "dp_id", 0); + jsonw_string_field(wr, "ether", + ether_ntoa_r(&ifp->eth_addr, ebuf)); + if (!rte_is_zero_ether_addr(&ifp->perm_addr)) + jsonw_string_field(wr, "perm_addr", + ether_ntoa_r(&ifp->perm_addr, ebuf)); + + jsonw_name(wr, "ether_lookup_features"); + jsonw_start_array(wr); + pl_node_iter_features(ether_lookup_node_ptr, ifp, pl_print_feats, wr); + jsonw_end_array(wr); + + jsonw_string_field(wr, "type", iftype_name(ifp->if_type)); + + if_dump_state(ifp, wr, IF_DS_STATE); + if (ctx->verbose) + if_dump_state(ifp, wr, IF_DS_STATE_VERBOSE); + if_dump_state(ifp, wr, IF_DS_DEV_INFO); + + show_eth_info(wr, ifp); + show_link_state(wr, ifp); + show_address(wr, ifp); + show_stats(wr, ifp); + show_xstats(wr, ifp); + show_if_l2_filter(wr, ifp); + show_af_ifconfig(wr, ifp); + + fal_l3 = CMM_LOAD_SHARED(ifp->fal_l3); + if (fal_l3) { + jsonw_name(wr, "router_intf_platform_state"); + jsonw_start_object(wr); + fal_dump_router_interface(fal_l3, wr); + jsonw_end_object(wr); + } + + jsonw_end_object(wr); +} + +static void ifconfig_up(struct ifnet *ifp, void *arg) +{ + if (ifp->if_flags & IFF_UP) + ifconfig(ifp, arg); +} + +int cmd_ifconfig(FILE *f, int argc, char **argv) +{ + struct ifconfig_ctx ctx; + json_writer_t *wr = jsonw_new(f); + if (!wr) + return -1; + + jsonw_pretty(wr, true); + jsonw_name(wr, "interfaces"); + jsonw_start_array(wr); + ctx.wr = wr; + ctx.verbose = false; + if (argc == 1) + dp_ifnet_walk(ifconfig_up, &ctx); + else if (strcmp(argv[1], "-a") == 0) + dp_ifnet_walk(ifconfig, &ctx); + else { + if (strcmp(argv[1], "-v") == 0) { + ctx.verbose = true; + argc--, argv++; + } + while (--argc > 0) { + struct ifnet *ifp = dp_ifnet_byifname(*++argv); + if (ifp) + ifconfig(ifp, &ctx); + } + } + jsonw_end_array(wr); + jsonw_destroy(&wr); + + return 0; +} + +struct pl_show_intf_ctx { + json_writer_t *json; + char *ifname; + struct pl_node_registration *node_ptr; +}; + +static void if_pl_print_feat(struct ifnet *ifp, void *arg) +{ + struct pl_show_intf_ctx *ctx = arg; + json_writer_t *wr = ctx->json; + + if (ctx->ifname && + (strcmp(ctx->ifname, ifp->if_name) != 0) && + (strcmp(ctx->ifname, "all") != 0)) + return; + + jsonw_start_object(wr); + jsonw_name(wr, ifp->if_name); + + jsonw_start_array(wr); + pl_node_iter_features(ctx->node_ptr, ifp, pl_print_feats, wr); + jsonw_end_array(wr); + + jsonw_end_object(wr); +} + +/* + * show features [interface ] + */ +int if_node_instance_feat_print(struct pl_command *cmd, + struct pl_node_registration *node_ptr) +{ + int argc = cmd->argc; + char **argv = cmd->argv; + char *opt, *ifname = NULL; + json_writer_t *wr; + + while (argc > 0) { + opt = next_arg(&argc, &argv); + + if (!strcmp(opt, "interface")) { + ifname = next_arg(&argc, &argv); + if (!ifname) + return 0; + } + } + + wr = jsonw_new(cmd->fp); + if (!wr) + return 0; + + struct pl_show_intf_ctx ctx = { + .json = wr, + .ifname = ifname, + .node_ptr = node_ptr, + }; + + jsonw_name(wr, "features"); + jsonw_start_object(wr); + + jsonw_name(wr, "interface"); + jsonw_start_array(wr); + dp_ifnet_walk(if_pl_print_feat, &ctx); + jsonw_end_array(wr); + + jsonw_end_object(wr); + jsonw_destroy(&wr); + return 0; +} + +/* + * Transmit one packet + * + * The expectation is that for !IFF_NOARP interfaces then the packet + * will be properly L2 encapsulated at this point such that it can be + * sent to the L2 neighbour. + * + * For IFF_NOARP interfaces then the packet will be L2 encapsulated + * during send. + * + * The reason for this asymmetry is to keep the address resolution + * above this layer for multipoint interfaces, yet to keep things + * simple and fast for point-to-point interfaces to avoid needing to + * perform an extra encap step before calling this function. + * + * The proto passed in is the link-layer protocol used for + * point-to-point interfaces. + */ +ALWAYS_INLINE __rte_cache_aligned +void if_output_internal(struct pl_packet *pkt) +{ + struct ifnet *ifp = pkt->out_ifp; + uint16_t proto = pkt->l2_proto; + + if (ifp->if_type == IFT_L2VLAN) { + if_add_vlan(ifp, &pkt->mbuf); - return ops->ifop_get_backplane(ifp, ifindex); + if (!pipeline_fused_l2_output(pkt)) + goto out; + + ifp = ifp->if_parent; + pkt->out_ifp = ifp; + + /* for the case where original ifp was for QinQ */ + if (ifp->if_type == IFT_L2VLAN) { + if (!pipeline_fused_l2_output(pkt)) + goto out; + ifp = ifp->if_parent; + pkt->out_ifp = ifp; + } + } + + if (!pipeline_fused_l2_output(pkt)) + goto out; + + if (likely(ifp->if_type == IFT_ETHER)) + pkt_ring_output(ifp, pkt->mbuf); + else if (ifp->if_type == IFT_BRIDGE) + bridge_output(ifp, pkt->mbuf, pkt->in_ifp); + else if (ifp->if_type == IFT_VXLAN) + vxlan_output(ifp, pkt->mbuf, proto); + else if (ifp->if_type == IFT_L2TPETH) + l2tp_output(ifp, pkt->mbuf); + else if (ifp->if_type == IFT_TUNNEL_GRE) + gre_tunnel_send(pkt->in_ifp, ifp, pkt->mbuf, proto); + else if (ifp->if_type == IFT_TUNNEL_VTI) + vti_tunnel_out(pkt->in_ifp, ifp, pkt->mbuf, proto); + else if (ifp->if_type == IFT_PPP) + ppp_tunnel_output(ifp, pkt->mbuf, pkt->in_ifp, proto); + else if (ifp->if_type == IFT_TUNNEL_OTHER) + unsup_tunnel_output(ifp, pkt->mbuf, pkt->in_ifp, proto); + else if (ifp->if_type == IFT_LOOP) + vfp_output(ifp, pkt->mbuf, pkt->in_ifp, proto); + else if (ifp->if_type == IFT_MACVLAN) + macvlan_output(ifp, pkt->mbuf, pkt->in_ifp, proto); + else { + /* + * Packets for other interface types shouldn't reach + * this point. + */ +out: + assert(0); + rte_pktmbuf_free(pkt->mbuf); + if_incr_dropped(ifp); + } +} + +__hot_func __rte_cache_aligned +void if_output(struct ifnet *ifp, struct rte_mbuf *m, + struct ifnet *input_ifp, uint16_t proto) +{ + struct pl_packet pkt = { + .mbuf = m, + .l2_pkt_type = pkt_mbuf_get_l2_traffic_type(m), + .in_ifp = input_ifp, + .out_ifp = ifp, + .l2_proto = proto, + }; + + if_output_internal(&pkt); +} + +/* Return ifindex for the ifp */ +unsigned int dp_ifnet_ifindex(const struct ifnet *ifp) +{ + return ifp->if_index; +} + +const char *dp_ifnet_ifname(const struct ifnet *ifp) +{ + return ifp->if_name; +} + +vrfid_t dp_ifnet_vrfid(const struct ifnet *ifp) +{ + return if_vrfid(ifp); +} + +fal_object_t dp_ifnet_fal_l3_if(const struct ifnet *ifp) +{ + return ifp->fal_l3; +} + +bool dp_ifnet_is_bridge_member(struct ifnet *ifp) +{ + if (ifp->if_brport) + return true; + + return false; +} + +void dp_ifnet_output(struct ifnet *in_ifp, struct rte_mbuf *m, + struct ifnet *out_ifp, uint16_t proto) +{ + if_output(out_ifp, m, in_ifp, proto); +} + +int dp_ifnet_get_mac_addr(struct ifnet *ifp, struct rte_ether_addr *eth_addr) +{ + if (!ifp || !eth_addr) + return -1; + + rte_ether_addr_copy(&ifp->eth_addr, eth_addr); + return 0; } diff --git a/src/bridge.c b/src/if/bridge/bridge.c similarity index 83% rename from src/bridge.c rename to src/if/bridge/bridge.c index 492ce87d..cdb58763 100644 --- a/src/bridge.c +++ b/src/if/bridge/bridge.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -43,13 +43,14 @@ #include "bridge_vlan_set.h" #include "capture.h" #include "compat.h" -#include "config.h" +#include "config_internal.h" #include "control.h" #include "dp_event.h" #include "ether.h" #include "fal.h" #include "fal_plugin.h" -#include "gre.h" +#include "if/gre.h" +#include "if/vxlan.h" #include "if_var.h" #include "json_writer.h" #include "main.h" @@ -60,17 +61,17 @@ #include "npf/config/npf_config.h" #include "npf/config/npf_ruleset_type.h" #include "npf/npf_if.h" +#include "npf/npf_rc.h" #include "npf_shim.h" #include "pipeline/nodes/pl_nodes_common.h" #include "pl_common.h" #include "pl_fused.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_node.h" #include "urcu.h" #include "util.h" #include "vplane_debug.h" #include "vplane_log.h" -#include "vxlan.h" #include "l2_rx_fltr.h" struct bridge_port; @@ -101,7 +102,7 @@ static const char *bridge_ifstate_names[STP_IFSTATE_SIZE] = { /* * Cisco-specific PVST (per-vlan BPDU) multicast address */ -static const struct ether_addr pvst_mcast_address = { +static const struct rte_ether_addr pvst_mcast_address = { .addr_bytes = {0x01, 0x00, 0x0c, 0xcc, 0xcc, 0xcd}, }; @@ -119,9 +120,8 @@ static bool bridge_pvst_flood_local; #define IFBAF_LOCAL 0x04 /* address of local interface */ #define IFBAF_ALL IFBAF_TYPEMASK -static void bridge_newneigh(int ifindex, const struct ether_addr *dst, +static void bridge_newneigh(int ifindex, const struct rte_ether_addr *dst, uint16_t state, uint16_t vlan); -static void bridge_timer(struct rte_timer *, void *); static bool bridge_intf_is_virt(struct ifnet *ifp) { @@ -133,7 +133,7 @@ static bool bridge_intf_is_virt(struct ifnet *ifp) static bool bridge_pkt_exceeds_mtu(struct rte_mbuf *m, struct ifnet *out_ifp) { - if (rte_pktmbuf_pkt_len(m) - ETHER_HDR_LEN > out_ifp->if_mtu) { + if (rte_pktmbuf_pkt_len(m) - RTE_ETHER_HDR_LEN > out_ifp->if_mtu) { /* * Transparent bridge shouldn't be doing any form of pkt * manipulation, fragmentation or otherwise, but because the @@ -142,8 +142,7 @@ static bool bridge_pkt_exceeds_mtu(struct rte_mbuf *m, */ if (bridge_intf_is_virt(out_ifp) && bridge_frag_enable) return false; - else - return true; + return true; } return false; @@ -253,6 +252,7 @@ static void if_vlan_in_stats_incr(struct ifnet *ifp, { unsigned int lcore; struct bridge_vlan_stat_block *stats; + const struct rte_ether_hdr *eh; /* HW ports will count this in HW */ if (ifp->hw_forwarding) @@ -269,6 +269,12 @@ static void if_vlan_in_stats_incr(struct ifnet *ifp, lcore = dp_lcore_id(); stats->stats[lcore].rx_octets += rte_pktmbuf_pkt_len(m); stats->stats[lcore].rx_pkts++; + + eh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); + if (rte_is_unicast_ether_addr(&eh->d_addr)) + stats->stats[lcore].rx_ucast_pkts++; + else + stats->stats[lcore].rx_nucast_pkts++; } static void if_vlan_out_stats_incr(struct bridge_softc *sc, @@ -277,6 +283,7 @@ static void if_vlan_out_stats_incr(struct bridge_softc *sc, { unsigned int lcore; struct bridge_vlan_stat_block *stats; + const struct rte_ether_hdr *eh; /* HW ports will not count this in HW */ stats = rcu_dereference(sc->vlan_stats[vlan]); @@ -286,6 +293,12 @@ static void if_vlan_out_stats_incr(struct bridge_softc *sc, lcore = dp_lcore_id(); stats->stats[lcore].tx_octets += rte_pktmbuf_pkt_len(m); stats->stats[lcore].tx_pkts++; + + eh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); + if (rte_is_unicast_ether_addr(&eh->d_addr)) + stats->stats[lcore].tx_ucast_pkts++; + else + stats->stats[lcore].tx_nucast_pkts++; } static void if_vlan_out_drop_stats_incr(struct bridge_softc *sc, @@ -352,8 +365,8 @@ bridge_get_ifstate_string(uint8_t brstate) { if (bridge_is_ifstate_valid(brstate)) return bridge_ifstate_names[brstate]; - else - return "UNKNOWN"; + + return "UNKNOWN"; } static inline bool @@ -380,7 +393,8 @@ bridge_mac_is_local(const struct bridge_rtnode *brt) static inline int bridge_key_equal(const struct bridge_key *k1, const struct bridge_key *k2) { - return ether_addr_equal(&k1->addr, &k2->addr) && k1->vlan == k2->vlan; + return rte_ether_addr_equal(&k1->addr, &k2->addr) && + k1->vlan == k2->vlan; } static inline unsigned long bridge_key_hash(const struct bridge_key *key) @@ -405,7 +419,7 @@ static int bridge_rtnode_match(struct cds_lfht_node *node, const void *key) */ static struct bridge_rtnode * bridge_rtnode_lookup(struct bridge_softc *sc, - const struct ether_addr *addr, uint16_t vid) + const struct rte_ether_addr *addr, uint16_t vid) { struct cds_lfht_iter iter; struct cds_lfht_node *node; @@ -422,8 +436,7 @@ bridge_rtnode_lookup(struct bridge_softc *sc, node = cds_lfht_iter_get_node(&iter); if (node) return caa_container_of(node, struct bridge_rtnode, brt_node); - else - return NULL; + return NULL; } /* @@ -454,7 +467,7 @@ bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt) */ static void bridge_rtupdate(struct ifnet *ifp, - const struct ether_addr *dst, + const struct rte_ether_addr *dst, uint16_t vlan) { struct bridge_softc *sc = @@ -545,7 +558,8 @@ bridge_rtable_init(struct bridge_softc *sc) } int -bridge_newneigh_tunnel(struct bridge_port *brport, const struct ether_addr *dst, +bridge_newneigh_tunnel(struct bridge_port *brport, + const struct rte_ether_addr *dst, in_addr_t dst_ip, uint16_t vlan) { struct ifnet *ifp = bridge_port_get_interface(brport); @@ -596,7 +610,7 @@ fal_object_t bridge_fal_stp_object(const struct ifnet *ifp) return sc->stp; } -void bridge_upd_hw_forwarding(const struct ifnet *ifp) +static void bridge_upd_hw_forwarding(const struct ifnet *ifp) { /* * If this interface is part of a switch/bridge, let the FAL @@ -617,12 +631,11 @@ void bridge_upd_hw_forwarding(const struct ifnet *ifp) /* Create bridge in response to netlink */ struct ifnet *bridge_create(int ifindex, const char *ifname, unsigned int mtu, - const struct ether_addr *addr) + const struct rte_ether_addr *addr) { struct ifnet *ifp; - struct bridge_softc *sc; - ifp = ifnet_byifname(ifname); + ifp = dp_ifnet_byifname(ifname); /* existing interface, reuse it */ if (ifp != NULL) { DP_DEBUG(BRIDGE, DEBUG, BRIDGE, @@ -637,32 +650,19 @@ struct ifnet *bridge_create(int ifindex, const char *ifname, return NULL; } - ifp = if_alloc(ifname, IFT_BRIDGE, mtu, addr, SOCKET_ID_ANY); + ifp = if_alloc(ifname, IFT_BRIDGE, mtu, addr, SOCKET_ID_ANY, NULL); if (!ifp) { RTE_LOG(NOTICE, BRIDGE, "out of memory to create %s\n", ifname); return NULL; } - sc = ifp->if_softc; - if_set_ifindex(ifp, ifindex); if (!if_setup_vlan_storage(ifp)) { if_free(ifp); return NULL; } - const struct fal_attribute_t attr_list[2] = { - {FAL_STP_ATTR_INSTANCE, .value.u8 = STP_INST_IST}, - {FAL_STP_ATTR_MSTI, .value.u16 = MSTP_MSTI_IST} - }; - - int rc = fal_stp_create(ifindex, 2, &attr_list[0], &sc->stp); - if (rc < 0) - DP_DEBUG(BRIDGE, ERR, BRIDGE, - "FAL(%u): failed to create STP: '%s'\n", - ifindex, strerror(-rc)); - return ifp; } @@ -685,7 +685,7 @@ void bridge_update(const char *ifname, struct nl_bridge_info *br_info) struct bridge_softc *sc; uint32_t cur_ageing_time; - ifp = ifnet_byifname(ifname); + ifp = dp_ifnet_byifname(ifname); if (ifp == NULL || ifp->if_softc == NULL) return; @@ -728,8 +728,46 @@ void bridge_update(const char *ifname, struct nl_bridge_info *br_info) sc->scbr_vlan_default_pvid = br_info->br_vlan_default_pvid; } +/* Should route entry be expired? + * For dynamic entries only, check if it has been used. + * for more than BRIDGE_RTABLE_EXPIRE intervals. + */ +static int +bridge_rtexpired(struct bridge_rtnode *brt, uint32_t ageing_ticks) +{ + if ((brt->brt_flags & IFBAF_TYPEMASK) != IFBAF_DYNAMIC) + return 0; -static int bridge_if_init(struct ifnet *ifp) + if (rte_atomic32_test_and_set(&brt->brt_unused)) { + /* Transition from used to unused */ + brt->brt_expire = 0; + return 0; + } + + /* If ageing_ticks is 0 then dynamic entries are never timed out */ + if (++brt->brt_expire > ageing_ticks && ageing_ticks > 0) + return 1; /* expired */ + + return 0; +} + +/* walk bridge forwarding database and timeout old entries */ +static void bridge_timer(struct rte_timer *timer __rte_unused, + void *arg __rte_unused) +{ + struct bridge_softc *sc = arg; + struct cds_lfht_iter iter; + struct bridge_rtnode *brt; + + dp_rcu_read_lock(); + cds_lfht_for_each_entry(sc->scbr_rthash, &iter, brt, brt_node) { + if (bridge_rtexpired(brt, sc->scbr_ageing_ticks)) + bridge_rtnode_destroy(sc->scbr_rthash, brt); + } + dp_rcu_read_unlock(); +} + +static int bridge_if_init(struct ifnet *ifp, void *ctx __unused) { struct bridge_softc *sc; @@ -755,24 +793,12 @@ static int bridge_if_init(struct ifnet *ifp) static void bridge_if_uninit(struct ifnet *ifp) { struct bridge_softc *sc = ifp->if_softc; - struct cds_list_head *entry; - struct bridge_port *brport; struct bridge_vlan_stat_block *stats; int i; if (!sc) return; - /* Delete the member pointers to the bridge */ - bridge_for_each_brport(brport, entry, sc) { - struct ifnet *dif = bridge_port_get_interface(brport); - - rcu_assign_pointer(dif->if_brport, NULL); - bridge_port_destroy(brport); - } - - fal_stp_delete(bridge_fal_stp_object(ifp)); - rte_timer_stop(&sc->scbr_timer); cds_lfht_destroy(sc->scbr_rthash, NULL); @@ -789,28 +815,41 @@ static void bridge_if_uninit(struct ifnet *ifp) call_rcu(&sc->scbr_rcu, bridge_free); } +static bool +bridge_can_create_in_fal(struct ifnet *ifp) +{ + /* + * Ignore our own feature, and still create in the FAL with + * hardware switching disabled since the FAL needs this + * programming to aid the operation. + */ + return !if_check_any_except_emb_feat( + ifp, IF_EMB_FEAT_BRIDGE_MEMBER | + IF_EMB_FEAT_HW_SWITCHING_DISABLED); +} + /* Add port in response to netlink */ static void bridge_newport(int ifindex, const char *name, - int ifmaster, uint8_t state, - struct ether_addr *lladdr) + int ifbridge, uint8_t state, + struct rte_ether_addr *lladdr) { struct ifnet *ifm, *ifp; struct fal_attribute_t attr_list[1] = { { FAL_BRIDGE_PORT_ATTR_STATE, .value.u8 = state }, }; - ifm = ifnet_byifindex(ifmaster); + ifm = dp_ifnet_byifindex(ifbridge); if (!ifm) { DP_DEBUG(BRIDGE, ERR, BRIDGE, - "bridge_newport: can't find master for ifindex %d\n", - ifmaster); + "%s: can't find bridge for ifindex %d\n", + __func__, ifbridge); return; } if (ifm->if_type != IFT_BRIDGE) - rte_panic("bridge_newport: ifmaster %d is type %#x\n", - ifmaster, ifm->if_type); + rte_panic("%s: ifbridge %d is type %#x\n", + __func__, ifbridge, ifm->if_type); - ifp = ifnet_byifindex(ifindex); + ifp = dp_ifnet_byifindex(ifindex); if (!ifp) { DP_DEBUG(BRIDGE, ERR, BRIDGE, "bridge_newport: can't find interface for ifindex %d\n", @@ -826,7 +865,9 @@ static void bridge_newport(int ifindex, const char *name, "%s changed state to %s\n", name, bridge_get_ifstate_string(state)); bridge_port_set_state(ifp->if_brport, state); - fal_br_upd_port(ifindex, &attr_list[0]); + if (bridge_port_is_fal_created(ifp->if_brport)) + fal_br_upd_port(ifindex, + &attr_list[0]); } } else { DP_DEBUG(BRIDGE, ERR, BRIDGE, @@ -853,9 +894,19 @@ static void bridge_newport(int ifindex, const char *name, rcu_assign_pointer(ifp->if_brport, port); bridge_port_add_to_list(port, &sc->scbr_porthead); + if (!bridge_can_create_in_fal(ifp)) + DP_DEBUG(BRIDGE, DEBUG, BRIDGE, + "%s deferring signalling of newport in FAL\n", + ifp->if_name); + + /* + * This will also create the FAL bridge-port object, + * if possible. + */ + if_notify_emb_feat_change(ifp); + ifpromisc(ifp, 1); - fal_br_new_port(ifmaster, ifindex, 1, attr_list); bridge_port_set_state(ifp->if_brport, state); } @@ -869,7 +920,7 @@ static void bridge_newport(int ifindex, const char *name, */ static void bridge_fdb_flush(struct ifnet *bridge, struct ifnet *ifp, - uint8_t fdb_type, uint16_t vlanid) + uint8_t fdb_type, uint16_t vlanid, bool flush_fal) { struct bridge_softc *sc = bridge->if_softc; struct cds_lfht_iter iter; @@ -882,49 +933,96 @@ bridge_fdb_flush(struct ifnet *bridge, struct ifnet *ifp, bridge_rtnode_destroy(sc->scbr_rthash, brt); } - fal_fdb_flush(bridge->if_index, - (ifp == NULL) ? 0 : ifp->if_index, - vlanid, - (fdb_type & IFBAF_TYPEMASK) == IFBAF_DYNAMIC); + if (flush_fal) + fal_fdb_flush(bridge->if_index, + (ifp == NULL) ? 0 : ifp->if_index, + vlanid, + (fdb_type & IFBAF_TYPEMASK) == IFBAF_DYNAMIC); } void bridge_fdb_dynamic_flush_vlan(struct ifnet *bridge, struct ifnet *port, uint16_t vlanid) { - bridge_fdb_flush(bridge, port, IFBAF_DYNAMIC, vlanid); + bridge_fdb_flush(bridge, port, IFBAF_DYNAMIC, vlanid, true); } -static void bridge_delport(int ifindex, int ifmaster) +static void bridge_fal_delport(struct ifnet *ifp) { - struct ifnet *ifp, *ifm; struct bridge_port *brport; + struct ifnet *ifm; + bool fal_created; - ifm = ifnet_byifindex(ifmaster); - if (!ifm) { - DP_DEBUG(BRIDGE, ERR, BRIDGE, - "bridge_delport: can't find master for ifindex %d\n", - ifmaster); + brport = rcu_dereference(ifp->if_brport); + if (!brport) return; - } - if (ifm->if_type != IFT_BRIDGE) - rte_panic("bridge_delport: ifmaster %d is type %#x\n", - ifmaster, ifm->if_type); - ifp = ifnet_byifindex(ifindex); - if (!ifp) { - DP_DEBUG(BRIDGE, ERR, BRIDGE, - "bridge_delport: can't find bridge port for ifindex %d\n", - ifindex); - return; + ifm = bridge_port_get_bridge(brport); + fal_created = bridge_port_is_fal_created(brport); + + if (fal_created) { + fal_fdb_flush(ifm->if_index, ifp->if_index, 0, false); + fal_br_del_port(ifm->if_index, ifp->if_index); + bridge_port_set_fal_created(brport, false); } +} + +static void bridge_fal_newport(struct ifnet *ifp) +{ + struct bridge_port *brport; + struct ifnet *ifm; + bool fal_created; brport = rcu_dereference(ifp->if_brport); - if (!brport || bridge_port_get_bridge(brport) != ifm) { - DP_DEBUG(BRIDGE, ERR, BRIDGE, - "%s: is not a member of bridge %s\n", - ifp->if_name, ifm->if_name); + if (!brport) return; + + ifm = bridge_port_get_bridge(brport); + fal_created = bridge_port_is_fal_created(brport); + + if (!fal_created) { + struct bridge_vlan_set *vlans = bridge_vlan_set_create(); + struct bridge_vlan_set *untagged = bridge_vlan_set_create(); + struct fal_attribute_t attr_list[] = { + { .id = FAL_BRIDGE_PORT_ATTR_STATE, + .value.u8 = bridge_port_get_state(brport) }, + { .id = FAL_BRIDGE_PORT_ATTR_TAGGED_VLANS, + .value.ptr = vlans }, + { .id = FAL_BRIDGE_PORT_ATTR_UNTAGGED_VLANS, + .value.ptr = untagged }, + { .id = FAL_BRIDGE_PORT_ATTR_PORT_VLAN_ID, + .value.u16 = 0 }, + }; + if (vlans && untagged) { + /* + * Only populate if admin up since we want to + * not program the port as being part of the + * VLAN to save resources if it's admin down + */ + if (ifp->if_flags & IFF_UP) { + bridge_port_get_vlans(brport, vlans); + bridge_port_get_untag_vlans(brport, untagged); + attr_list[3].value.u16 = + bridge_port_get_pvid(brport); + } + + fal_br_new_port(ifm->if_index, ifp->if_index, + ARRAY_SIZE(attr_list), attr_list); + bridge_port_set_fal_created(ifp->if_brport, true); + + bridge_vlan_set_free(vlans); + bridge_vlan_set_free(untagged); + } else + RTE_LOG(ERR, BRIDGE, + "out of memory allocating vlan sets during FAL newport signalling\n"); } +} + +static void bridge_delport(struct ifnet *ifp, struct bridge_port *brport) +{ + struct ifnet *ifm; + bool fal_created; + + ifm = bridge_port_get_bridge(brport); DP_DEBUG(BRIDGE, INFO, BRIDGE, "remove %s from %s\n", ifp->if_name, ifm->if_name); @@ -932,11 +1030,122 @@ static void bridge_delport(int ifindex, int ifmaster) pl_node_remove_feature_by_inst(&bridge_in_feat, ifp); rcu_assign_pointer(ifp->if_brport, NULL); + fal_created = bridge_port_is_fal_created(brport); bridge_port_destroy(brport); ifpromisc(ifp, 0); - bridge_fdb_flush(ifm, ifp, IFBAF_ALL, 0); - fal_br_del_port(ifmaster, ifindex); + bridge_fdb_flush(ifm, ifp, IFBAF_ALL, 0, fal_created); + if (fal_created) + fal_br_del_port(ifm->if_index, ifp->if_index); + + if_notify_emb_feat_change(ifp); +} + +static void +bridge_if_l2_deleted(struct ifnet *ifp) +{ + struct cds_list_head *entry; + struct bridge_port *brport; + struct bridge_softc *sc; + + if (ifp->if_type == IFT_BRIDGE) { + sc = ifp->if_softc; + if (!sc) + return; + + /* Delete the member pointers to the bridge */ + bridge_for_each_brport(brport, entry, sc) { + struct ifnet *ifp_member = + bridge_port_get_interface(brport); + + bridge_delport(ifp_member, brport); + } + } else { + brport = rcu_dereference(ifp->if_brport); + if (brport) + bridge_delport(ifp, brport); + } +} + +static void bridge_if_feat_mode_change( + struct ifnet *ifp, enum if_feat_mode_event event) +{ + switch (event) { + case IF_FEAT_MODE_EVENT_L2_FAL_ENABLED: + case IF_FEAT_MODE_EVENT_L2_FAL_DISABLED: + bridge_upd_hw_forwarding(ifp); + break; + case IF_FEAT_MODE_EVENT_EMB_FEAT_CHANGED: + if (bridge_can_create_in_fal(ifp)) + bridge_fal_newport(ifp); + else + bridge_fal_delport(ifp); + break; + case IF_FEAT_MODE_EVENT_L2_DELETED: + bridge_if_l2_deleted(ifp); + break; + default: + break; + } +} + + +/* + * React to inteface admin status changes since we want to not program + * the port as being part of the VLAN to save resources if it's admin + * down + */ +static void +bridge_if_admin_status_change(struct ifnet *ifp, bool up) +{ + struct fal_attribute_t vlan_update; + struct bridge_vlan_set *untagged; + struct bridge_vlan_set *vlans; + struct bridge_port *brport; + uint16_t pvid; + + brport = rcu_dereference(ifp->if_brport); + if (!brport) + /* nothing to do if not a bridge port */ + return; + + /* nothing to do if not created in the FAL */ + if (!bridge_port_is_fal_created(brport)) + return; + + vlans = bridge_vlan_set_create(); + if (!vlans) { + RTE_LOG(ERR, BRIDGE, + "out of memory allocating vlan sets during FAL admin status event\n"); + return; + } + if (up) + bridge_port_get_vlans(brport, vlans); + vlan_update.id = FAL_BRIDGE_PORT_ATTR_TAGGED_VLANS; + vlan_update.value.ptr = vlans; + fal_br_upd_port(ifp->if_index, &vlan_update); + bridge_vlan_set_free(vlans); + + untagged = bridge_vlan_set_create(); + if (!untagged) { + RTE_LOG(ERR, BRIDGE, + "out of memory allocating vlan sets during FAL admin status event\n"); + return; + } + if (up) + bridge_port_get_untag_vlans(brport, untagged); + vlan_update.id = FAL_BRIDGE_PORT_ATTR_UNTAGGED_VLANS; + vlan_update.value.ptr = untagged; + fal_br_upd_port(ifp->if_index, &vlan_update); + bridge_vlan_set_free(untagged); + + if (up) + pvid = bridge_port_get_pvid(brport); + else + pvid = 0; + vlan_update.id = FAL_BRIDGE_PORT_ATTR_PORT_VLAN_ID; + vlan_update.value.u16 = pvid; + fal_br_upd_port(ifp->if_index, &vlan_update); } static void @@ -959,7 +1168,6 @@ bridge_forward_via_tunnel(struct ifnet *br_ifp, drop: if_incr_dropped(br_ifp); rte_pktmbuf_free(m); - return; } /* @@ -974,7 +1182,8 @@ static int bridge_forward(struct bridge_softc *sc, struct ifnet *ifp, struct rte_mbuf *m, struct ifnet *brif) { - const struct ether_hdr *eh = rte_pktmbuf_mtod(m, struct ether_hdr *); + const struct rte_ether_hdr *eh = + rte_pktmbuf_mtod(m, struct rte_ether_hdr *); struct bridge_rtnode *brt; struct ifnet *dif; struct bridge_port *port = NULL; @@ -1032,8 +1241,14 @@ bridge_forward(struct bridge_softc *sc, struct ifnet *ifp, drop: if_incr_full_proto(brif, 1); if_vlan_out_drop_stats_incr(sc, vlan); - - rte_pktmbuf_free(m); + { + struct pl_packet pkt = { + .mbuf = m, + .l2_pkt_type = pkt_mbuf_get_l2_traffic_type(m), + .in_ifp = ifp + }; + pipeline_fused_term_drop(&pkt); + } return BRIDGE_CONSUMED; } @@ -1162,7 +1377,8 @@ static void bridge_flood(struct bridge_softc *sc, struct ifnet *in_ifp, void bridge_output(struct ifnet *ifp, struct rte_mbuf *m, struct ifnet *in_ifp) { - const struct ether_hdr *eh = rte_pktmbuf_mtod(m, struct ether_hdr *); + const struct rte_ether_hdr *eh = + rte_pktmbuf_mtod(m, struct rte_ether_hdr *); struct bridge_rtnode *brt; struct ifnet *dif; struct bridge_port *port = NULL; @@ -1172,17 +1388,23 @@ void bridge_output(struct ifnet *ifp, struct rte_mbuf *m, const struct npf_config *npf_config = npf_if_conf(nif); if (npf_active(npf_config, NPF_BRIDGE) && - eh->ether_type != htons(ETHER_TYPE_ARP)) { + eh->ether_type != htons(RTE_ETHER_TYPE_ARP)) { npf_result_t result; + int rc = NPF_RC_UNMATCHED; result = npf_hook_notrack(npf_get_ruleset(npf_config, NPF_RS_BRIDGE), &m, ifp, PFIL_IN, 0, - ethtype(m, ETHER_TYPE_VLAN)); + ethtype(m, RTE_ETHER_TYPE_VLAN), + &rc); + + /* Increment return code counter */ + npf_rc_inc(ifp, NPF_RCT_L2, NPF_RC_OUT, rc, result.decision); + if (result.decision != NPF_DECISION_PASS) goto drop; /* Set eh again in case buffer in m changed. */ - eh = rte_pktmbuf_mtod(m, struct ether_hdr *); + eh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); } brt = bridge_rtnode_lookup(sc, &eh->d_addr, vlan); @@ -1208,7 +1430,7 @@ void bridge_output(struct ifnet *ifp, struct rte_mbuf *m, * This can happen when bridging between interfaces with different * mtu's. */ - if (rte_pktmbuf_pkt_len(m) - ETHER_HDR_LEN > dif->if_mtu) + if (rte_pktmbuf_pkt_len(m) - RTE_ETHER_HDR_LEN > dif->if_mtu) goto drop; /* XXX add stat for this */ /* Count L3 forwarded and local packets as outbound on bridge */ @@ -1277,7 +1499,7 @@ bridge_input_local(struct rte_mbuf *m, struct ifnet *input_if, /* * Have we exposed an inner vlan. */ - if (ethhdr(m)->ether_type == htons(ETHER_TYPE_VLAN)) { + if (ethhdr(m)->ether_type == htons(RTE_ETHER_TYPE_VLAN)) { struct pktmbuf_mdata *mdata; mdata = pktmbuf_mdata(m); @@ -1286,7 +1508,7 @@ bridge_input_local(struct rte_mbuf *m, struct ifnet *input_if, * data */ m->ol_flags |= PKT_RX_VLAN; - m->vlan_tci = vid_decap(m, ETHER_TYPE_VLAN); + m->vlan_tci = vid_decap(m, RTE_ETHER_TYPE_VLAN); bridge_input_local(m, input_if, base_bridge); return; } @@ -1309,7 +1531,8 @@ void bridge_input(struct bridge_port *port, struct rte_mbuf *m) struct ifnet *ifp = bridge_port_get_interface(port); struct ifnet *brif = bridge_port_get_bridge(ifp->if_brport); struct bridge_softc *sc = brif->if_softc; - const struct ether_hdr *eh = rte_pktmbuf_mtod(m, struct ether_hdr *); + const struct rte_ether_hdr *eh = + rte_pktmbuf_mtod(m, struct rte_ether_hdr *); struct if_data *ifstat = &ifp->if_data[dp_lcore_id()]; struct pktmbuf_mdata *mdata; @@ -1334,7 +1557,7 @@ void bridge_input(struct bridge_port *port, struct rte_mbuf *m) capture_burst(brif, &m, 1); /* bogon filter */ - if (!is_valid_assigned_ether_addr(&eh->s_addr)) + if (!rte_is_valid_assigned_ether_addr(&eh->s_addr)) goto errorpath; /* bridge must be up */ @@ -1356,7 +1579,7 @@ void bridge_input(struct bridge_port *port, struct rte_mbuf *m) * on whether or not the Cisco multicast address has been * registered. */ - bool is_pvst = ether_addr_equal(&eh->d_addr, &pvst_mcast_address); + bool is_pvst = rte_ether_addr_equal(&eh->d_addr, &pvst_mcast_address); if (is_link_local_ether_addr(&eh->d_addr) || (is_pvst && @@ -1383,19 +1606,25 @@ void bridge_input(struct bridge_port *port, struct rte_mbuf *m) const struct npf_config *npf_config = npf_if_conf(nif); if (npf_active(npf_config, NPF_BRIDGE) && - eh->ether_type != htons(ETHER_TYPE_ARP)) { + eh->ether_type != htons(RTE_ETHER_TYPE_ARP)) { npf_result_t result; + int rc = NPF_RC_UNMATCHED; result = npf_hook_notrack(npf_get_ruleset(npf_config, NPF_RS_BRIDGE), &m, brif, PFIL_IN, 0, - ethtype(m, ETHER_TYPE_VLAN)); + ethtype(m, RTE_ETHER_TYPE_VLAN), + &rc); + + /* Increment return code counter */ + npf_rc_inc(ifp, NPF_RCT_L2, NPF_RC_IN, rc, result.decision); + if (result.decision != NPF_DECISION_PASS) goto ignore; /* Set eh again in case buffer in m changed. */ - eh = rte_pktmbuf_mtod(m, struct ether_hdr *); + eh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); } - if (unlikely(ether_addr_equal(&eh->d_addr, &brif->eth_addr))) { + if (unlikely(rte_ether_addr_equal(&eh->d_addr, &brif->eth_addr))) { /* "to us" unicast pkts should always be consumed */ bridge_input_local(m, brif, brif); return; @@ -1404,14 +1633,14 @@ void bridge_input(struct bridge_port *port, struct rte_mbuf *m) bool mcast = false; /* Check for multicast and broadcast pkts *after* firewall. */ - if (unlikely(is_multicast_ether_addr(&eh->d_addr))) { + if (unlikely(rte_is_multicast_ether_addr(&eh->d_addr))) { struct rte_mbuf *m_local = pktmbuf_copy(m, m->pool); if (!m_local) goto errorpath; mcast = true; ifstat->ifi_imulticast++; - if (is_broadcast_ether_addr(&eh->d_addr)) + if (rte_is_broadcast_ether_addr(&eh->d_addr)) pkt_mbuf_set_l2_traffic_type(m_local, L2_PKT_BROADCAST); else @@ -1436,45 +1665,6 @@ void bridge_input(struct bridge_port *port, struct rte_mbuf *m) rte_pktmbuf_free(m); } -/* Should route entry be expired? - * For dynamic entries only, check if it has been used. - * for more than BRIDGE_RTABLE_EXPIRE intervals. - */ -static int -bridge_rtexpired(struct bridge_rtnode *brt, uint32_t ageing_ticks) -{ - if ((brt->brt_flags & IFBAF_TYPEMASK) != IFBAF_DYNAMIC) - return 0; - - if (rte_atomic32_test_and_set(&brt->brt_unused)) { - /* Transition from used to unused */ - brt->brt_expire = 0; - return 0; - } - - /* If ageing_ticks is 0 then dynamic entries are never timed out */ - if (++brt->brt_expire > ageing_ticks && ageing_ticks > 0) - return 1; /* expired */ - - return 0; -} - -/* walk bridge forwarding database and timeout old entries */ -static void bridge_timer(struct rte_timer *timer __rte_unused, - void *arg __rte_unused) -{ - struct bridge_softc *sc = arg; - struct cds_lfht_iter iter; - struct bridge_rtnode *brt; - - rcu_read_lock(); - cds_lfht_for_each_entry(sc->scbr_rthash, &iter, brt, brt_node) { - if (bridge_rtexpired(brt, sc->scbr_ageing_ticks)) - bridge_rtnode_destroy(sc->scbr_rthash, brt); - } - rcu_read_unlock(); -} - /* * Code for handling netlink message about bridging */ @@ -1506,14 +1696,14 @@ static int bridge_port_attr(const struct nlattr *attr, void *data) static int notify_newport(int ifindex, const char *ifname, struct nlattr *tb[]) { - int master; + int bridge; uint8_t state; struct nlattr *pinfo[IFLA_BRPORT_MAX+1] = { NULL }; - struct ether_addr *lladdr = NULL; + struct rte_ether_addr *lladdr = NULL; if (!tb[IFLA_MASTER]) { DP_DEBUG(BRIDGE, ERR, BRIDGE, - "missing master in newlink msg\n"); + "missing bridge in newlink msg\n"); return MNL_CB_ERROR; } @@ -1526,10 +1716,10 @@ static int notify_newport(int ifindex, const char *ifname, } } - master = mnl_attr_get_u32(tb[IFLA_MASTER]); + bridge = mnl_attr_get_u32(tb[IFLA_MASTER]); if (tb[IFLA_ADDRESS] && (mnl_attr_get_payload_len(tb[IFLA_ADDRESS]) == - ETHER_ADDR_LEN)) + RTE_ETHER_ADDR_LEN)) lladdr = mnl_attr_get_payload(tb[IFLA_ADDRESS]); if (pinfo[IFLA_BRPORT_STATE]) { @@ -1542,7 +1732,7 @@ static int notify_newport(int ifindex, const char *ifname, } - bridge_newport(ifindex, ifname, master, state, lladdr); + bridge_newport(ifindex, ifname, bridge, state, lladdr); } else { if (lladdr) bridge_newneigh(ifindex, lladdr, NUD_PERMANENT, 0); @@ -1554,17 +1744,46 @@ static int notify_newport(int ifindex, const char *ifname, /* remove port from bridge */ static int notify_delport(int ifindex, struct nlattr *tb[]) { - int master; + struct bridge_port *brport; + struct ifnet *ifp, *ifm; + int bridge; if (tb[IFLA_MASTER]) - master = mnl_attr_get_u32(tb[IFLA_MASTER]); + bridge = mnl_attr_get_u32(tb[IFLA_MASTER]); else { DP_DEBUG(BRIDGE, ERR, BRIDGE, - "missing master in newlink msg\n"); + "missing bridge in newlink msg\n"); return MNL_CB_ERROR; } - bridge_delport(ifindex, master); + ifm = dp_ifnet_byifindex(bridge); + if (!ifm) { + DP_DEBUG(BRIDGE, ERR, BRIDGE, + "%s: can't find bridge for ifindex %d\n", + __func__, bridge); + return MNL_CB_OK; + } + if (ifm->if_type != IFT_BRIDGE) + rte_panic("%s: ifbridge %d is type %#x\n", + __func__, bridge, ifm->if_type); + + ifp = dp_ifnet_byifindex(ifindex); + if (!ifp) { + DP_DEBUG(BRIDGE, ERR, BRIDGE, + "%s: can't find bridge port for ifindex %d\n", + __func__, ifindex); + return MNL_CB_OK; + } + + brport = rcu_dereference(ifp->if_brport); + if (!brport || bridge_port_get_bridge(brport) != ifm) { + DP_DEBUG(BRIDGE, ERR, BRIDGE, + "%s: is not a member of bridge %s\n", + ifp->if_name, ifm->if_name); + return MNL_CB_OK; + } + + bridge_delport(ifp, brport); return MNL_CB_OK; } @@ -1573,13 +1792,12 @@ static uint8_t ndmstate_to_flags(uint16_t state) { if (state & NUD_PERMANENT) return IFBAF_LOCAL; - else if (state & NUD_NOARP) + if (state & NUD_NOARP) return IFBAF_STATIC; - else - return IFBAF_DYNAMIC; + return IFBAF_DYNAMIC; } -static void bridge_newneigh(int ifindex, const struct ether_addr *dst, +static void bridge_newneigh(int ifindex, const struct rte_ether_addr *dst, uint16_t state, uint16_t vlan) { struct ifnet *ifp, *ifm; @@ -1590,7 +1808,7 @@ static void bridge_newneigh(int ifindex, const struct ether_addr *dst, { FAL_BRIDGE_NEIGH_ATTR_STATE, .value.u16 = state }, }; - ifp = ifnet_byifindex(ifindex); + ifp = dp_ifnet_byifindex(ifindex); if (!ifp) return; /* not a DPDK interface */ @@ -1639,13 +1857,13 @@ static void bridge_newneigh(int ifindex, const struct ether_addr *dst, } static void bridge_delneigh(int ifindex, - const struct ether_addr *dst, uint16_t vid) + const struct rte_ether_addr *dst, uint16_t vid) { struct ifnet *ifp, *ifm; struct bridge_softc *sc; struct bridge_rtnode *brt; - ifp = ifnet_byifindex(ifindex); + ifp = dp_ifnet_byifindex(ifindex); if (!ifp) return; /* not a DPDK interface */ @@ -1680,7 +1898,7 @@ static int bridge_neigh_change(const struct nlmsghdr *nlh, struct nlattr *tb[], enum cont_src_en cont_src) { - const struct ether_addr *lladdr; + const struct rte_ether_addr *lladdr; int skip = MNL_CB_OK; struct ifnet *ifp; uint16_t vid; @@ -1700,7 +1918,7 @@ static int bridge_neigh_change(const struct nlmsghdr *nlh, vid = 0; ifindex = cont_src_ifindex(cont_src, ndm->ndm_ifindex); - ifp = ifnet_byifindex(ifindex); + ifp = dp_ifnet_byifindex(ifindex); if (ifp && ifp->if_type == IFT_VXLAN && vxlan_get_vni(ifp)) skip = vxlan_neigh_change(nlh, ndm, tb); @@ -1958,7 +2176,7 @@ bridge_netlink_update_port(int ifindex, struct nlattr *tb[], int msg_type) int rv = MNL_CB_OK; struct fal_attribute_t vlan_update; - struct ifnet *port = ifnet_byifindex(ifindex); + struct ifnet *port = dp_ifnet_byifindex(ifindex); if (!port) return rv; @@ -2012,17 +2230,27 @@ bridge_netlink_update_port(int ifindex, struct nlattr *tb[], int msg_type) } rv = MNL_CB_OK; + /* + * Only update in FAL if created in the FAL and if admin up + * since we want to not program the port as being part of the + * VLAN to save resources if it's admin down + */ + /* * compare new vlan config with the old * and synchronize them */ - if (bridge_port_synchronize_vlans(brport, new_vlans)) { + if (bridge_port_synchronize_vlans(brport, new_vlans) && + bridge_port_is_fal_created(brport) && + port->if_flags & IFF_UP) { vlan_update.id = FAL_BRIDGE_PORT_ATTR_TAGGED_VLANS; vlan_update.value.ptr = new_vlans; fal_br_upd_port(ifindex, &vlan_update); } - if (bridge_port_synchronize_untag_vlans(brport, new_untagged)) { + if (bridge_port_synchronize_untag_vlans(brport, new_untagged) && + bridge_port_is_fal_created(brport) && + port->if_flags & IFF_UP) { vlan_update.id = FAL_BRIDGE_PORT_ATTR_UNTAGGED_VLANS; vlan_update.value.ptr = new_untagged; fal_br_upd_port(ifindex, &vlan_update); @@ -2034,7 +2262,15 @@ bridge_netlink_update_port(int ifindex, struct nlattr *tb[], int msg_type) .value.u16 = pvid }; bridge_port_set_pvid(brport, pvid); - fal_br_upd_port(ifindex, &pvid_update); + /* + * only update in FAL if created in the FAL and if + * admin up since we want to not program the port as + * being part of the VLAN to save resources if it's + * admin down + */ + if (bridge_port_is_fal_created(brport) && + port->if_flags & IFF_UP) + fal_br_upd_port(ifindex, &pvid_update); } bridge_free_vlan_stats(brport); @@ -2089,7 +2325,7 @@ bridge_cmd_get_port(FILE *f, struct ifnet *bridge, const char *port_name) { struct ifnet *port; - port = ifnet_byifname(port_name); + port = dp_ifnet_byifname(port_name); if (!port) { fprintf(f, "%s not found\n", port_name); return NULL; @@ -2107,13 +2343,14 @@ bridge_cmd_get_port(FILE *f, struct ifnet *bridge, const char *port_name) return port; } -static struct ether_addr * -bridge_cmd_get_mac(const char *mac_string, struct ether_addr *eap) +static struct rte_ether_addr * +bridge_cmd_get_mac(const char *mac_string, struct rte_ether_addr *eap) { return ether_aton_r(mac_string, eap); } -static int bridge_macs_show_entry(uint16_t vlanid, const struct ether_addr *dst, +static int bridge_macs_show_entry(uint16_t vlanid, + const struct rte_ether_addr *dst, unsigned int child_ifindex, uint32_t attr_count, const struct fal_attribute_t *attr_list, @@ -2131,7 +2368,7 @@ static int bridge_macs_show_entry(uint16_t vlanid, const struct ether_addr *dst, RTE_LOG(ERR, BRIDGE, "Show macs: no walk argument provided\n"); return -1; } - ifp = ifnet_byifindex(child_ifindex); + ifp = dp_ifnet_byifindex(child_ifindex); if (!ifp) RTE_LOG(ERR, BRIDGE, "Show macs: no interface for ifindex %d (mac: %s)\n", @@ -2192,7 +2429,7 @@ bridge_macs_jsonw_one(json_writer_t *wr, const struct bridge_rtnode *brt) static void bridge_macs_jsonw_all(json_writer_t *wr, struct bridge_softc *sc, - struct ifnet *port, struct ether_addr *macp, + struct ifnet *port, struct rte_ether_addr *macp, uint16_t vlan) { struct cds_lfht_iter iter; @@ -2206,7 +2443,7 @@ bridge_macs_jsonw_all(json_writer_t *wr, struct bridge_softc *sc, if ((!port || port == brt->brt_difp) && (!vlan || vlan == brt->brt_key.vlan) && - (!macp || ether_addr_equal(macp, &brt->brt_key.addr))) + (!macp || rte_ether_addr_equal(macp, &brt->brt_key.addr))) bridge_macs_jsonw_one(wr, brt); cds_lfht_next(sc->scbr_rthash, &iter); @@ -2222,7 +2459,7 @@ bridge_macs_show(FILE *f, int argc, char **argv, struct ifnet *bridge) { struct bridge_softc *sc = bridge->if_softc; struct ifnet *port = NULL; - struct ether_addr mac, *macp = NULL; + struct rte_ether_addr mac, *macp = NULL; fal_br_walk_neigh_fn cb; uint16_t vlanid = 0; bool hw = false; @@ -2316,7 +2553,7 @@ bridge_macs_clear(FILE *f, int argc, char **argv, struct ifnet *bridge) { struct bridge_softc *sc = bridge->if_softc; struct ifnet *port = NULL; - struct ether_addr mac, *macp = NULL; + struct rte_ether_addr mac, *macp = NULL; if (argc < 1) { fprintf(f, "%s: missing argument: %d", __func__, argc); @@ -2346,7 +2583,7 @@ bridge_macs_clear(FILE *f, int argc, char **argv, struct ifnet *bridge) (port == NULL) ? 0 : port->if_index, macp); } else { - bridge_fdb_flush(bridge, port, IFBAF_DYNAMIC, 0); + bridge_fdb_flush(bridge, port, IFBAF_DYNAMIC, 0, true); } return 0; } @@ -2367,7 +2604,7 @@ bridge_macs(FILE *f, int argc, char **argv, struct ifnet *bridge) if (strcmp(argv[0], "show") == 0) return bridge_macs_show(f, argc, argv, bridge); - else if (strcmp(argv[0], "clear") == 0) + if (strcmp(argv[0], "clear") == 0) return bridge_macs_clear(f, argc, argv, bridge); fprintf(f, "Unknown bridge macs command\n"); @@ -2392,10 +2629,12 @@ bridge_frag(FILE *f, int argc, char **argv) if (strcmp(argv[0], "enable") == 0) { bridge_frag_enable = true; return 0; - } else if (strcmp(argv[0], "disable") == 0) { + } + if (strcmp(argv[0], "disable") == 0) { bridge_frag_enable = false; return 0; - } else if (strcmp(argv[0], "show") == 0) + } + if (strcmp(argv[0], "show") == 0) return bridge_frag_status(f, argc, argv); fprintf(f, "Unknown bridge frag command\n"); @@ -2421,7 +2660,7 @@ cmd_bridge(FILE *f, int argc, char **argv) if (strcmp(argv[0], "frag") == 0) return bridge_frag(f, argc, argv); - bridge = ifnet_byifname(argv[0]); + bridge = dp_ifnet_byifname(argv[0]); if (!bridge || !bridge->if_softc || bridge->if_type != IFT_BRIDGE) { @@ -2483,7 +2722,7 @@ static void show_bridge(json_writer_t *wr, const struct ifnet *ifp) } jsonw_end_array(wr); - jsonw_name(wr, "bridge_master"); + jsonw_name(wr, "bridge_interface"); jsonw_start_object(wr); jsonw_uint_field(wr, "default_pvid", sc->scbr_vlan_default_pvid); jsonw_bool_field(wr, "vlan_filtering", sc->scbr_vlan_filter); @@ -2505,11 +2744,18 @@ bridge_if_dump(struct ifnet *ifp, json_writer_t *wr, return 0; } +static enum dp_ifnet_iana_type +bridge_iana_type(struct ifnet *ifp __unused) +{ + return DP_IFTYPE_IANA_BRIDGE; +} + static const struct ift_ops bridge_if_ops = { .ifop_set_l2_address = ether_if_set_l2_address, .ifop_init = bridge_if_init, .ifop_uninit = bridge_if_uninit, .ifop_dump = bridge_if_dump, + .ifop_iana_type = bridge_iana_type, }; static const struct netlink_handler bridge_netlink = { @@ -2527,7 +2773,7 @@ static void bridge_init(void) strerror(-ret)); struct fal_attribute_t punt_pvst = { - FAL_SWITCH_ATTR_PUNT_PVST}; + .id = FAL_SWITCH_ATTR_PUNT_PVST}; if (fal_get_switch_attrs(1, &punt_pvst) == 0) bridge_pvst_flood_local = punt_pvst.value.booldata; @@ -2537,6 +2783,8 @@ static void bridge_init(void) static const struct dp_event_ops bridge_events = { .init = bridge_init, + .if_feat_mode_change = bridge_if_feat_mode_change, + .if_admin_status_change = bridge_if_admin_status_change, }; DP_STARTUP_EVENT_REGISTER(bridge_events); diff --git a/src/bridge.h b/src/if/bridge/bridge.h similarity index 89% rename from src/bridge.h rename to src/if/bridge/bridge.h index 2bac2909..4e470cc3 100644 --- a/src/bridge.h +++ b/src/if/bridge/bridge.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -23,7 +23,7 @@ #include "fal_plugin.h" #include "bridge_port.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "urcu.h" #include "util.h" @@ -47,13 +47,16 @@ struct bridge_vlan_stats { uint64_t tx_pkts; uint64_t tx_ucast_pkts; uint64_t tx_nucast_pkts; - /* end of first cache line */ + /* --- cacheline 1 boundary (64 bytes) --- */ uint64_t rx_drops; uint64_t rx_errors; uint64_t tx_drops; uint64_t tx_errors; } __rte_cache_aligned; +static_assert(offsetof(struct bridge_vlan_stats, rx_drops) == 64, + "first cache line exceeded"); + struct bridge_vlan_stat_block { struct rcu_head vlan_stats_rcu; struct bridge_vlan_stats stats[]; @@ -63,7 +66,7 @@ struct bridge_vlan_stat_block { * Bridge keys consist of an ethernet address and the VLAN */ struct bridge_key { - struct ether_addr addr; + struct rte_ether_addr addr; uint16_t vlan; }; @@ -136,23 +139,23 @@ const char *bridge_get_ifstate_string(uint8_t brstate); void bridge_input(struct bridge_port *port, struct rte_mbuf *m); int -bridge_newneigh_tunnel(struct bridge_port *brport, const struct ether_addr *dst, +bridge_newneigh_tunnel(struct bridge_port *brport, + const struct rte_ether_addr *dst, in_addr_t dst_ip, uint16_t vlan); void bridge_output(struct ifnet *ifp, struct rte_mbuf *m, struct ifnet *in_ifp); fal_object_t bridge_fal_stp_object(const struct ifnet *ifp); -void bridge_upd_hw_forwarding(const struct ifnet *port); struct ifnet *bridge_create(int ifindex, const char *ifname, unsigned int mtu, - const struct ether_addr *eth_addr); + const struct rte_ether_addr *eth_addr); void bridge_update(const char *ifname, struct nl_bridge_info *br_info); void bridge_nl_modify(struct ifnet *ifp, struct nlattr *kdata); struct ifnet *bridge_nl_create(int ifindex, const char *ifname, unsigned int mtu, - const struct ether_addr *eth_addr, + const struct rte_ether_addr *eth_addr, struct nlattr *kdata); void bridge_fdb_dynamic_flush_vlan(struct ifnet *bridge, struct ifnet *port, diff --git a/src/bridge_netlink.c b/src/if/bridge/bridge_netlink.c similarity index 97% rename from src/bridge_netlink.c rename to src/if/bridge/bridge_netlink.c index 059b8af1..f405cf31 100644 --- a/src/bridge_netlink.c +++ b/src/if/bridge/bridge_netlink.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -123,7 +123,7 @@ void bridge_nl_modify(struct ifnet *ifp, struct nlattr *kdata) */ struct ifnet *bridge_nl_create(int ifindex, const char *ifname, unsigned int mtu, - const struct ether_addr *eth_addr, + const struct rte_ether_addr *eth_addr, struct nlattr *kdata) { struct ifnet *ifp = bridge_create(ifindex, ifname, diff --git a/src/bridge_port.c b/src/if/bridge/bridge_port.c similarity index 89% rename from src/bridge_port.c rename to src/if/bridge/bridge_port.c index 975680be..24b0bf70 100644 --- a/src/bridge_port.c +++ b/src/if/bridge/bridge_port.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -69,6 +69,7 @@ struct bridge_port { struct bridge_vlan_set *untag_vlans; struct cds_list_head brlink; /* list of ports in bridge */ uint16_t pvid; + bool fal_created; uint8_t state[MSTP_MSTI_COUNT]; /* Administrative */ @@ -174,12 +175,6 @@ bridge_port_get_state(struct bridge_port *port) return CMM_LOAD_SHARED(port->state[MSTP_MSTI_IST]); } -void -bridge_port_flush_vlans(struct bridge_port *port) -{ - bridge_vlan_set_clear(port->vlans); -} - bool bridge_port_lookup_vlan(struct bridge_port *port, uint16_t vlan) { @@ -193,6 +188,13 @@ bridge_port_synchronize_vlans(struct bridge_port *port, return bridge_port_set_synchronize(port->vlans, new_vlans); } +bool +bridge_port_get_vlans( + struct bridge_port *port, struct bridge_vlan_set *to_vlans) +{ + return bridge_port_set_synchronize(to_vlans, port->vlans); +} + void bridge_port_set_pvid(struct bridge_port *port, uint16_t vlan) { @@ -205,24 +207,6 @@ bridge_port_get_pvid(struct bridge_port *port) return CMM_LOAD_SHARED(port->pvid); } -void -bridge_port_add_untag_vlan(struct bridge_port *port, uint16_t vlan) -{ - bridge_vlan_set_add(port->untag_vlans, vlan); -} - -void -bridge_port_remove_untag_vlan(struct bridge_port *port, uint16_t vlan) -{ - bridge_vlan_set_remove(port->untag_vlans, vlan); -} - -void -bridge_port_flush_untag_vlans(struct bridge_port *port) -{ - bridge_vlan_set_clear(port->untag_vlans); -} - bool bridge_port_lookup_untag_vlan(struct bridge_port *port, uint16_t vlan) { @@ -236,12 +220,11 @@ bridge_port_synchronize_untag_vlans(struct bridge_port *port, return bridge_port_set_synchronize(port->untag_vlans, new_untagged); } -void -bridge_port_reset(struct bridge_port *port) +bool +bridge_port_get_untag_vlans( + struct bridge_port *port, struct bridge_vlan_set *to_vlans) { - bridge_port_flush_vlans(port); - bridge_port_flush_untag_vlans(port); - bridge_port_set_pvid(port, 0); + return bridge_port_set_synchronize(to_vlans, port->untag_vlans); } struct ifnet *bridge_port_get_interface(struct bridge_port *port) @@ -276,3 +259,12 @@ bool bridge_port_is_vlan_member(struct bridge_port *port, return false; } +void bridge_port_set_fal_created(struct bridge_port *port, bool created) +{ + port->fal_created = created; +} + +bool bridge_port_is_fal_created(struct bridge_port *port) +{ + return port->fal_created; +} diff --git a/src/bridge_port.h b/src/if/bridge/bridge_port.h similarity index 83% rename from src/bridge_port.h rename to src/if/bridge/bridge_port.h index c007f974..e5330215 100644 --- a/src/bridge_port.h +++ b/src/if/bridge/bridge_port.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -29,8 +29,8 @@ struct ifnet; /* * Creates a new bridge port */ -struct bridge_port *bridge_port_create(struct ifnet *ifp_port, - struct ifnet *ifp_bridge); +struct bridge_port *bridge_port_create(struct ifnet *port_ifp, + struct ifnet *bridge_ifp); /* * Free the memory associated with a bridge port @@ -72,11 +72,6 @@ uint8_t bridge_port_get_state(struct bridge_port *port); * and belongs to a VLAN not part of this list it will be dropped on egress. */ -/* - * Remove all VLANs from the allowed list for this bridge port - */ -void bridge_port_flush_vlans(struct bridge_port *port); - /* * Check if a VLAN is in the allowed list for this bridge port */ @@ -89,6 +84,13 @@ bool bridge_port_lookup_vlan(struct bridge_port *port, uint16_t vlan); bool bridge_port_synchronize_vlans(struct bridge_port *port, struct bridge_vlan_set *new_vlans); +/* + * Get the VLANs in the allowed list. + * Returns true if to_vlans changed, false otherwise. + */ +bool bridge_port_get_vlans( + struct bridge_port *port, struct bridge_vlan_set *to_vlans); + /* * The following functions are related to a port's PVID. * The PVID is the VLAN id that all untagged packets get tagged @@ -110,20 +112,6 @@ uint16_t bridge_port_get_pvid(struct bridge_port *port); * If a VLAN belongs to this list it will be removed from the frame on egress * from the bridge code. */ -/* - * Add a VLAN to the untag list for this bridge port - */ -void bridge_port_add_untag_vlan(struct bridge_port *port, uint16_t vlan); - -/* - * Remove a VLAN from the untag list for this bridge port - */ -void bridge_port_remove_untag_vlan(struct bridge_port *port, uint16_t vlan); - -/* - * Remove all VLANs from the untag list for this bridge port - */ -void bridge_port_flush_untag_vlans(struct bridge_port *port); /* * Check if a VLAN is in the untag list for this bridge port @@ -138,9 +126,11 @@ bool bridge_port_synchronize_untag_vlans(struct bridge_port *port, struct bridge_vlan_set *new_untagged); /* - * Clear all data related to a bridge port. + * Get the VLANs in the untagged list. + * Returns true if to_vlans changed, false otherwise. */ -void bridge_port_reset(struct bridge_port *port); +bool bridge_port_get_untag_vlans( + struct bridge_port *port, struct bridge_vlan_set *to_vlans); /* * Get the interface the bridge port is associated with. @@ -171,4 +161,14 @@ void bridge_port_add_to_list(struct bridge_port *port, bool bridge_port_is_vlan_member(struct bridge_port *port, uint16_t vlan); +/* + * Set state to note that bridge-port has been created in the FAL + */ +void bridge_port_set_fal_created(struct bridge_port *port, bool created); + +/* + * Retrieve state for whether bridge-port has been created in the FAL + */ +bool bridge_port_is_fal_created(struct bridge_port *port); + #endif /* BRIDGE_PORT_H */ diff --git a/src/switch.c b/src/if/bridge/switch.c similarity index 97% rename from src/switch.c rename to src/if/bridge/switch.c index 57cce54e..e05f45aa 100644 --- a/src/switch.c +++ b/src/if/bridge/switch.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018-2019, AT&T Intellectual Property. + * Copyright (c) 2019-2020, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -196,15 +196,15 @@ static int cmd_switch_vlan(struct ifnet *ifp, FILE *f, int argc, char **argv) { uint16_t vlan = 0; - if (strcmp(argv[4], "stats")) + if (strcmp(argv[4], "stats") != 0) goto error; if (argc == 6) vlan = atoi(argv[5]); - if (!strcmp(argv[3], "show")) + if (strcmp(argv[3], "show") == 0) switch_vlan_show_stats(ifp, vlan, f); - else if (!strcmp(argv[3], "clear")) + else if (strcmp(argv[3], "clear") == 0) switch_vlan_clear_stats(ifp, vlan, f); else goto error; @@ -222,7 +222,7 @@ int cmd_switch_op(FILE *f, int argc, char **argv) if (argc < 5) goto error; - ifp = ifnet_byifname(argv[1]); + ifp = dp_ifnet_byifname(argv[1]); if (!ifp) { fprintf(f, "Could not find interface %s", argv[1]); return -1; diff --git a/src/switch.h b/src/if/bridge/switch.h similarity index 79% rename from src/switch.h rename to src/if/bridge/switch.h index dfe096ed..ab82cbdb 100644 --- a/src/switch.h +++ b/src/if/bridge/switch.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018, AT&T Intellectual Property. + * Copyright (c) 2019, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only diff --git a/src/devinfo.c b/src/if/dpdk-eth/devinfo.c similarity index 93% rename from src/devinfo.c rename to src/if/dpdk-eth/devinfo.c index 7394dcf9..97b52c20 100644 --- a/src/devinfo.c +++ b/src/if/dpdk-eth/devinfo.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -14,9 +14,7 @@ #include #include #include -#ifdef HAVE_RTE_BUS_PCI_H #include -#endif #include #include #include @@ -24,6 +22,7 @@ #include #include "backplane.h" +#include "dpdk_eth_if.h" #include "if_var.h" #include "json_writer.h" #include "lag.h" @@ -201,6 +200,23 @@ static unsigned int get_dev_port(const struct rte_pci_addr *loc) return dev_port; } +/* The cxgbe PMD encodes the port in the name + * of the instance. + */ + +static unsigned int get_dev_port_cxgbe(int portid) +{ + const struct rte_eth_dev *dev = &rte_eth_devices[portid]; + char *p; + int dev_port = 0; + + p = strchr(dev->data->name, '_'); + if (p) + dev_port = atoi(++p); + + return dev_port; +} + #define PCI_BASE_CLASS_NETWORK 0x02 static bool is_ethernet_device(const char *path) @@ -294,14 +310,10 @@ static void json_bus_info(json_writer_t *wr, portid_t portid, return; } -#ifdef HAVE_RTE_ETH_DEV_INFO_DEVICE const struct rte_bus *bus = rte_bus_find_by_device(dev_info.device); struct rte_pci_device *pci = NULL; if (bus && streq(bus->name, "pci")) pci = RTE_DEV_TO_PCI(dev_info.device); -#else - const struct rte_pci_device *pci = dev_info.pci_dev; -#endif if (pci) { const struct rte_pci_addr *loc = &pci->addr; char buf[PATH_MAX]; @@ -326,6 +338,11 @@ static void json_bus_info(json_writer_t *wr, portid_t portid, jsonw_uint_field(wr, "slot", (unsigned int)slot); int dev_port = get_dev_port(loc); + + if (dev_info.driver_name && + strcasestr(dev_info.driver_name, "net_cxgbe") != NULL) + dev_port = get_dev_port_cxgbe(portid); + if (dev_port > 0) jsonw_uint_field(wr, "dev-port", (unsigned int)dev_port); @@ -361,8 +378,10 @@ void check_broken_firmware(void) if (is_ethernet_device(dev_path)) { index = __get_firmware_index(dev_path); - if (index == -1) + if (index == -1) { + free(dev_path); continue; + } for (i = 0; i < ndevs; i++) { if (index == dev_index[i]) { @@ -394,15 +413,15 @@ void check_broken_firmware(void) "Some devices have duplicate BIOS indexes!\n"); free(dev_index); - closedir(devs); + if (devs) + closedir(devs); } /* Provide JSON string describing all info about a DPDK port. */ -char *if_port_info(const struct ifnet *ifp) +char *dpdk_eth_vplaned_devinfo(portid_t port_id) { struct rte_eth_dev_info dev_info; char name[IFNAMSIZ]; - portid_t port_id = ifp->if_port; char *outbuf = NULL; size_t outsize = 0; struct rte_eth_dev *eth_dev; @@ -430,8 +449,11 @@ char *if_port_info(const struct ifnet *ifp) jsonw_uint_field(wr, "port", port_id); + struct rte_ether_addr mac_addr; + rte_eth_macaddr_get(port_id, &mac_addr); + char ebuf[32]; - jsonw_string_field(wr, "mac", ether_ntoa_r(&ifp->perm_addr, ebuf)); + jsonw_string_field(wr, "mac", ether_ntoa_r(&mac_addr, ebuf)); /* Shouldn't be looking inside DPDK but there is no documented * way to get DPDK name which is used by bond driver. @@ -481,8 +503,8 @@ char *if_port_info(const struct ifnet *ifp) */ if (if_port_is_bkplane(port_id)) { if_flags |= IFF_UP; - /* max_rx_pktlen is the frame size */ - mtu = dev_info.max_rx_pktlen - ETHER_HDR_LEN - ETHER_CRC_LEN; + /* Use max mtu */ + mtu = dev_info.max_mtu; } json_bus_info(wr, port_id, backplane_name); diff --git a/src/if/dpdk-eth/dpdk_eth_if.c b/src/if/dpdk-eth/dpdk_eth_if.c new file mode 100644 index 00000000..28f64800 --- /dev/null +++ b/src/if/dpdk-eth/dpdk_eth_if.c @@ -0,0 +1,1551 @@ +/* + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * DPDK port-backed interface implementation + */ + +#include +#include +#include +#include +#include +#include + +#include "dpdk_eth_if.h" +#include "dpdk_eth_linkwatch.h" +#include "dp_event.h" +#include "ether.h" +#include "hotplug.h" +#include "if_var.h" +#include "l2_rx_fltr.h" +#include "lag.h" +#include "qos.h" +#include "vhost.h" +#include "vplane_debug.h" +#include "vplane_log.h" +#include "transceiver.h" + +#define MODULE_SFF_8436_AX_LEN 640 + +typedef int (*reconfigure_port_cb_fn)(struct ifnet *ifp, + struct rte_eth_conf *dev_conf); + +static int reconfigure_port(struct ifnet *ifp, + struct rte_eth_conf *dev_conf, + reconfigure_port_cb_fn reconfigure_port_cb); + +static bitmask_t started_port_mask; /* port has been started */ + +static zhash_t *dpdk_name_to_eth_port_map; + +static void dpdk_name_to_eth_port_map_cleanup(void) +{ + zhash_destroy(&dpdk_name_to_eth_port_map); +} + +static void dpdk_name_to_eth_port_map_init(void) +{ + dpdk_name_to_eth_port_map = zhash_new(); + if (!dpdk_name_to_eth_port_map) + rte_panic( + "Cannot allocate zhash for name to port map for eth interfaces\n" + ); +} + +int dpdk_name_to_eth_port_map_add(const char *ifname, portid_t port) +{ + uint16_t *portid_obj = malloc(sizeof(*portid_obj)); + + if (!portid_obj) + return -ENOMEM; + *portid_obj = port; + if (zhash_insert(dpdk_name_to_eth_port_map, ifname, + portid_obj) < 0) { + free(portid_obj); + return -ENOMEM; + } + zhash_freefn(dpdk_name_to_eth_port_map, ifname, free); + + return 0; +} + +void dpdk_eth_port_map_del_port(portid_t port) +{ + uint16_t *portid_obj; + const char *ifname; + + for (portid_obj = zhash_first(dpdk_name_to_eth_port_map); + portid_obj; + portid_obj = zhash_next(dpdk_name_to_eth_port_map)) { + if (*portid_obj == port) { + ifname = zhash_cursor(dpdk_name_to_eth_port_map); + + zhash_delete(dpdk_name_to_eth_port_map, ifname); + return; + } + } +} + +static portid_t +dpdk_name_to_eth_port_map_get(const char *ifname) +{ + uint16_t *portid_obj = zhash_lookup(dpdk_name_to_eth_port_map, ifname); + + if (!portid_obj) + return IF_PORT_ID_INVALID; + + return *portid_obj; +} + +/* + * determine if device is Mellanox ConnectX-5 + * This will be used for some short-term customization of dataplane + * behaviour until we are able to up-rev DPDK to 1908 + */ +static bool is_device_mlx5(portid_t portid) +{ + struct rte_eth_dev_info dev_info; + + if (!rte_eth_dev_is_valid_port(portid)) + return false; + + rte_eth_dev_info_get(portid, &dev_info); + if (strstr(dev_info.driver_name, "net_mlx5") == dev_info.driver_name) + return true; + + return false; +} + +bool dpdk_eth_if_port_started(portid_t port) +{ + return bitmask_isset(&started_port_mask, port); +} + +static void soft_stop_port(struct ifnet *ifp) +{ + struct dpdk_eth_if_softc *sc = ifp->if_softc; + portid_t port = ifp->if_port; + + if (!bitmask_isset(&started_port_mask, port)) + return; /* already inactive */ + + bitmask_clear(&started_port_mask, port); + rte_eth_led_off(port); + + linkwatch_update_port_status(port, LINKWATCH_FLAG_FORCE_LINK_DOWN); + + /* Stop monitoring port */ + rte_timer_stop(&sc->scd_link_timer); + + qos_sched_stop(ifp); + + /* make sure cores have drained */ + dp_rcu_synchronize(); + + /* free any leftovers */ + pkt_ring_empty(port); +} + +static void soft_start_port(struct ifnet *ifp) +{ + struct dpdk_eth_if_softc *sc = ifp->if_softc; + portid_t port = ifp->if_port; + + if (bitmask_isset(&started_port_mask, port)) + return; /* already active */ + + rte_eth_led_on(port); + + bitmask_set(&started_port_mask, port); + linkwatch_update_port_status(port, LINKWATCH_FLAG_FORCE_NOTIFY); + + /* Start timer to send keepalive messages */ + if (rte_timer_reset(&sc->scd_link_timer, + config.port_update * rte_get_timer_hz(), + PERIODICAL, rte_get_master_lcore(), + linkwatch_timer, ifp) < 0) + RTE_LOG(ERR, DATAPLANE, + "rte_timer_reset failed for linkwatch timer port:%u\n", + port); +} + +/* Start device (admin up) */ +void dpdk_eth_if_start_port(struct ifnet *ifp) +{ + struct dpdk_eth_if_softc *sc = ifp->if_softc; + portid_t port = ifp->if_port; + int ret; + + if (!lag_can_start(ifp)) { + /* A bonding interface might not have any members yet. Don't + * try to start it since this will result in an error from + * rte_eth_dev_start(). Instead, lag_member_add() will start + * the interface (if necessary) when the first member is added. + */ + RTE_LOG(DEBUG, DATAPLANE, + "no members on bonding device %s\n", ifp->if_name); + return; + } + + if (bitmask_isset(&started_port_mask, port)) + return; /* already active */ + + /* bonding driver will start member device when ready */ + if (lag_can_startstop_member(ifp)) { + if (assign_queues(port)) + return; /* failure */ + + if (sc->scd_need_reset) + dpdk_eth_if_reset_port(NULL, ifp); + + ret = rte_eth_dev_start(port); + if (ret < 0 && !sc->scd_need_reset) { + RTE_LOG(ERR, DATAPLANE, + "rte_eth_dev_start: port=%u err=%d\n", + port, ret); + unassign_queues(port); + return; + } + + sc->scd_need_reset = false; + } + + soft_start_port(ifp); + if (lag_can_startstop_member(ifp)) + rte_eth_dev_set_link_up(port); +} + +/* Stop device (admin down) */ +void dpdk_eth_if_stop_port(struct ifnet *ifp) +{ + struct dpdk_eth_if_softc *sc = ifp->if_softc; + portid_t port = ifp->if_port; + + if (!bitmask_isset(&started_port_mask, port) && + !rte_timer_pending(&sc->scd_reset_timer)) + return; /* already inactive */ + + /* if the PF is down when the port is stopped, then it will not work + * once it restarts unless rte_eth_dev_reset is called. + * But if the timer is simply left running then port will be set to UP + * when the PF goes back online, even if it should still be stopped + */ + if (rte_timer_pending(&sc->scd_reset_timer)) { + rte_timer_stop_sync(&sc->scd_reset_timer); + sc->scd_need_reset = true; + } + + if (lag_can_startstop_member(ifp)) + rte_eth_dev_set_link_down(port); + soft_stop_port(ifp); + + /* + * If we're a member of a bonding interface that doesn't + * support starting/stopping members independently then don't + * alter the state of the member - it will shortly be removed + * and it will then be coerced into the right state. + */ + if (!lag_can_startstop_member(ifp)) + return; + + rte_eth_dev_stop(port); + + unassign_queues(port); + + /* + * Some drivers require the HW multicast filter to be reprogrammed when + * the interface is next brought up after being taken down, regardless + * of whether this filter is already active + */ + l2_rx_fltr_set_reprogram(ifp); +} + +/* + * Stop a port when the dataplane port state may not be in sync with + * the dpdk port state, ensuring that either way the dpdk port is + * stopped on return. + */ +void dpdk_eth_if_force_stop_port(struct ifnet *ifp) +{ + struct dpdk_eth_if_softc *sc = ifp->if_softc; + portid_t port = ifp->if_port; + + if (!bitmask_isset(&started_port_mask, port) && + !rte_timer_pending(&sc->scd_reset_timer)) + rte_eth_dev_stop(port); + else + dpdk_eth_if_stop_port(ifp); +} + +/* Stop data transfer */ +void stop_all_ports(void) +{ + struct ifnet *ifp; + unsigned int port; + + for (port = 0; port < DATAPLANE_MAX_PORTS; port++) { + ifp = ifport_table[port]; + if (bitmask_isset(&started_port_mask, port)) + dpdk_eth_if_stop_port(ifp); + } +} + +/* Timer for periodic check of link reset + * + * Note: dp_rcu_read_lock not held here! + * This can be run both via directly in response to a link reset interrupt + * (tim will be NULL) or from an rte_timer callback (tim will be the actual + * timer). In both cases it will be ran from the main thread. + */ +void dpdk_eth_if_reset_port(struct rte_timer *tim, void *arg) +{ + struct ifnet *ifp = arg; + portid_t port = ifp->if_port; + struct dpdk_eth_if_softc *sc = ifp->if_softc; + int ret; + struct rte_eth_conf dev_conf; + struct rte_eth_dev *eth_dev; + + dpdk_eth_if_stop_port(ifp); + + ret = rte_eth_dev_reset(port); + /* Only VF receives interrupt, bonding int will NOT reset. Also if the + * port is bonded, the bond interface must be restarted AFTER the + * reset call, otherwise bonding will be broken once PF is back up. + */ + if (ifp->aggregator) { + soft_start_port(ifp->aggregator); + if (is_team(ifp->aggregator)) + lag_refresh_actor_state(ifp->aggregator); + } + if (ret == -ENODEV || ret == -EINVAL) { + RTE_LOG(ERR, DATAPLANE, + "rte_eth_dev_reset: invalid port=%u err=%d\n", + port, ret); + } else if (ret == -ENOTSUP) { + RTE_LOG(NOTICE, DATAPLANE, + "rte_eth_dev_reset: no reset on HW port=%u err=%d\n", + port, ret); + } else if (ret == -EAGAIN || ret == -15) { + RTE_LOG(DEBUG, DATAPLANE, + "rte_eth_dev_reset: PF still down port=%u err=%d\n", + port, ret); + + /* reset failed, start timer to check again. If tim is not + * NULL then the call is from the timer, so it's already + * running, no need to start it again + * -15 is a weird IXGBE specific error code + */ + if (!tim && rte_timer_reset(&sc->scd_reset_timer, + config.port_update * rte_get_timer_hz(), + PERIODICAL, rte_get_master_lcore(), + dpdk_eth_if_reset_port, ifp) < 0) + RTE_LOG(ERR, DATAPLANE, "rte_timer_reset failed for " + "reset timer port:%u\n", port); + return; + } else if (ret < 0) { + /* drivers can return weird errors, catch it and log it */ + RTE_LOG(ERR, DATAPLANE, + "rte_eth_dev_reset: reset failed on HW port=%u err=%d\n", + port, ret); + } + + /* Port is inactive, no races are possible. If tim is NULL then this + * is the first call on interrupt and the timer is not running. + */ + if (tim) + rte_timer_stop_sync(tim); + + /* stop_port has to set need_reset if the timer is running, but + * setting it from here would cause a loop + */ + eth_dev = &rte_eth_devices[ifp->if_port]; + memcpy(&dev_conf, ð_dev->data->dev_conf, sizeof(dev_conf)); + + sc->scd_need_reset = false; + reconfigure_port(ifp, &dev_conf, NULL); +} + +void dpdk_eth_if_update_port_queue_state(portid_t port) +{ + unassign_queues(port); + + set_port_queue_state(port); + + if (bitmask_isset(&started_port_mask, port)) + assign_queues(port); +} + +static void reconfigure_member(struct ifnet *ifp, void *arg) +{ + struct rte_eth_conf *conf = arg; + struct rte_eth_conf *member_conf; + struct rte_eth_dev *member_dev; + bool dev_started; + + member_dev = &rte_eth_devices[ifp->if_port]; + member_conf = &member_dev->data->dev_conf; + dev_started = member_dev->data->dev_started; + + /* Ensure member is stopped as stopping the bond port doesn't do this */ + if (dev_started) + rte_eth_dev_stop(ifp->if_port); + + /* + * Update member config to match the aggregate jumbo config + * so that it will accept a jumbo mtu change. + * Leave everything else alone. + * When the bond is restarted, it will configure the member, + * set up its queues, and start it, so don't call + * rte_eth_dev_configure() directly here. + */ + if (conf->rxmode.offloads & DEV_RX_OFFLOAD_SCATTER) + member_conf->rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER; + else + member_conf->rxmode.offloads &= ~(DEV_RX_OFFLOAD_SCATTER); + if (conf->rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) + member_conf->rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; + else + member_conf->rxmode.offloads &= ~(DEV_RX_OFFLOAD_JUMBO_FRAME); + + if (dev_started) + rte_eth_dev_start(ifp->if_port); +} + +/* + * Reconfigure a port, stopping the port if necessary and + * performing any necessary work after restarting the port. + * + * reconfigure_port_cb can be used to perform any additional + * operations before the port is restarted. + */ +static int reconfigure_port(struct ifnet *ifp, + struct rte_eth_conf *dev_conf, + reconfigure_port_cb_fn reconfigure_port_cb) +{ + portid_t portid = ifp->if_port; + int err; + struct rte_eth_dev *dev = &rte_eth_devices[portid]; + int dev_started = dev->data->dev_started; + + if (dev_started) + dpdk_eth_if_stop_port(ifp); + + err = eth_port_configure(portid, dev_conf); + + if (!err && reconfigure_port_cb) + err = reconfigure_port_cb(ifp, dev_conf); + + /* + * If we brought the port down then bring it back up, even if there + * was an error. + */ + if (dev_started) { + dpdk_eth_if_start_port(ifp); + if (is_team(ifp)) + lag_refresh_actor_state(ifp); + /* Reprogram HW multicast filter after restarting port */ + l2_rx_fltr_state_change(ifp); + } + + if (err && reconfigure_port_cb) + /* + * Try again if it failed when the port was down. Some + * drivers such as igb require the port to be brought + * back up after the jumbo cfg is set before the mtu + * can be set into the jumbo range. + */ + err = reconfigure_port_cb(ifp, dev_conf); + + return err; +} + +static int reconfigure_pkt_len_cb(struct ifnet *ifp, + struct rte_eth_conf *dev_conf) +{ + int err; + + /* Reconfigure members to match aggregate jumbo config */ + if (is_team(ifp)) + lag_walk_team_members(ifp, reconfigure_member, dev_conf); + + err = rte_eth_dev_set_mtu(ifp->if_port, ifp->if_mtu_adjusted); + if (err == -ENOTSUP) + err = 0; + + return err; +} + +/* Change hardware MTU, can only be called if stopped. */ +static int reconfigure_pkt_len(struct ifnet *ifp, uint32_t mtu) +{ + struct rte_eth_conf dev_conf; + struct rte_eth_dev *eth_dev = &rte_eth_devices[ifp->if_port]; + + memcpy(&dev_conf, ð_dev->data->dev_conf, sizeof(dev_conf)); + + if (mtu > RTE_ETHER_MTU) { + struct rte_eth_dev_info dev_info; + rte_eth_dev_info_get(ifp->if_port, &dev_info); + if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME) + dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; + if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SCATTER) + dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER; + } else { + dev_conf.rxmode.offloads &= ~(DEV_RX_OFFLOAD_JUMBO_FRAME | + DEV_RX_OFFLOAD_SCATTER); + } + dev_conf.rxmode.max_rx_pkt_len = mtu + + RTE_ETHER_HDR_LEN + + RTE_ETHER_CRC_LEN; + + return reconfigure_port(ifp, &dev_conf, reconfigure_pkt_len_cb); +} + +static inline bool +is_jumbo_size(uint32_t size) +{ + return size > RTE_ETHER_MTU; +} + +static bool is_device_swport(portid_t portid) +{ + struct rte_eth_dev_info dev_info; + + if (!rte_eth_dev_is_valid_port(portid)) + return false; + + rte_eth_dev_info_get(portid, &dev_info); + if (strstr(dev_info.driver_name, "net_sw_port") == dev_info.driver_name) + return true; + + return false; +} + +static inline bool is_reconfigure_port_required(struct ifnet *ifp) +{ + return !is_device_swport(ifp->if_port); +} + +static int dpdk_eth_if_set_mtu(struct ifnet *ifp, uint32_t mtu) +{ + int err = 0; + int adjusted_mtu = mtu; + + if (ifp->aggregator) { + /* + * This interface is already under control of the + * bonding interface. dev_start() in the bonding + * driver does a rte_eth_dev_configure() for + * each of the members and will update the member + * adapters at that point. But we need to keep + * ifp->if_mtu up to date. + */ + goto out; + } + + if (ifp->qinq_vif_cnt) + adjusted_mtu = adjusted_mtu + 4; + + /* + * MTU changes can affect the burst size used for QoS shaper. + * Thefeore, QoS needs to be notified of the MTU changes so it can + * make the necessary dynamic changes where possible or alternatively + * stop and then restart QoS to allow it to re-calculate any resultant + * changes in the bucket/burst sizes where dynamic changes aren't + * possible. This is done using dp_event_notify + * + * Some drivers always need the port to be stopped (i40) + * for the mtu to be changed. Some drivers need the port to be + * stopped to transition into/outof jumbo range (ixgbe). + * Unfortunately we can't tell this ahead of time, so try to + * set the mtu, and if we get an error then stop the ports and + * try again. + * + * If we are transitioning into/outof jumbo range then we have to + * reconfigure the port to get the correct jumbo settings. + */ + bool changed = false; + bool mtu_jumbo_change = + (is_jumbo_size(ifp->if_mtu_adjusted) && + !is_jumbo_size(mtu)) || + (is_jumbo_size(mtu) && + !is_jumbo_size(ifp->if_mtu_adjusted)); + + /* + * Certain drivers require the port to be reconfigured when changing + * the MTU to/from jumbo size. Check if this is the case, if not then + * there is no reason to bounce the port. Also if we get an error + * try again with port stopped. + */ + if (!mtu_jumbo_change || !is_reconfigure_port_required(ifp)) { + err = rte_eth_dev_set_mtu(ifp->if_port, adjusted_mtu); + changed = true; + } + + /* + * We must update the interface's adjusted MTU before + * starting the port so that QoS can recalculate its + * token bucket size based upon the new MTU. + * + * Also used in the reconfigure_port callback. + */ + ifp->if_mtu_adjusted = adjusted_mtu; + + /* Try again, but this time after changing the port config */ + if (!changed || err ) { + RTE_LOG(INFO, DATAPLANE, + "reconfiguring %s due to %s\n", + ifp->if_name, + mtu_jumbo_change ? + "jumbo length packet change" : + "online MTU setting not supported for this interface"); + err = reconfigure_pkt_len(ifp, adjusted_mtu); + } +out: + if (!err) + ifp->if_mtu = mtu; + + return err; +} + +static int dpdk_eth_if_set_l2_address(struct ifnet *ifp, uint32_t l2_addr_len, + void *l2_addr) +{ + struct rte_ether_addr *macaddr = l2_addr; + char b1[32], b2[32]; + + if (l2_addr_len != RTE_ETHER_ADDR_LEN) { + RTE_LOG(NOTICE, DATAPLANE, + "link address is not ethernet (len=%u)!\n", + l2_addr_len); + return -EINVAL; + } + + if (rte_ether_addr_equal(&ifp->eth_addr, macaddr)) + return 1; + + RTE_LOG(INFO, DATAPLANE, "%s change MAC from %s to %s\n", + ifp->if_name, + ether_ntoa_r(&ifp->eth_addr, b1), + ether_ntoa_r(macaddr, b2)); + + int rc; + + if (ifp->if_team) + rc = lag_set_l2_address(ifp, macaddr); + else + rc = rte_eth_dev_default_mac_addr_set( + ifp->if_port, macaddr); + if (rc != 0) + return rc; + + ifp->eth_addr = *macaddr; + + return 0; +} + +static int dpdk_eth_if_start(struct ifnet *ifp) +{ + dpdk_eth_if_start_port(ifp); + if (if_port_is_bkplane(ifp->if_port)) + ifpromisc(ifp, true); + + return 0; +} + +static int dpdk_eth_if_stop(struct ifnet *ifp) +{ + /* + * If this is a bonding member then it's managed by the + * bonding PMD until the team genetlink removes it from the + * bond. + */ + if (!lag_can_startstop_member(ifp)) + return 0; + + dpdk_eth_if_stop_port(ifp); + if (if_port_is_bkplane(ifp->if_port)) + ifpromisc(ifp, false); + + return 0; +} + +static int +dpdk_eth_if_add_l2_addr(struct ifnet *ifp, void *l2_addr) +{ + return rte_eth_dev_mac_addr_add(ifp->if_port, l2_addr, 0); +} + +static int +dpdk_eth_if_del_l2_addr(struct ifnet *ifp, void *l2_addr) +{ + return rte_eth_dev_mac_addr_remove(ifp->if_port, l2_addr); +} + +static int dpdk_eth_if_init(struct ifnet *ifp, void *ctx) +{ + struct dpdk_eth_if_softc *sc; + portid_t port = *(portid_t *)ctx; + + sc = rte_zmalloc_socket("dpdk softc", sizeof(*sc), 0, ifp->if_socket); + if (!sc) + return -ENOMEM; + + rte_timer_init(&sc->scd_link_timer); + rte_timer_init(&sc->scd_blink_timer); + rte_timer_init(&sc->scd_reset_timer); + + sc->scd_ifp = ifp; + ifp->if_softc = sc; + ifp->if_port = port; + + rte_ether_addr_copy(&ifp->eth_addr, &ifp->perm_addr); + + return 0; +} + +static void dpdk_eth_if_softc_free_rcu(struct rcu_head *head) +{ + struct dpdk_eth_if_softc *sc = + caa_container_of(head, struct dpdk_eth_if_softc, scd_rcu); + + if (sc->scd_vhost_info) + vhost_info_free(sc->scd_vhost_info); + + rte_free(sc); +} + +static void dpdk_eth_if_uninit(struct ifnet *ifp) +{ + struct dpdk_eth_if_softc *sc = ifp->if_softc; + + /* if unplugged, then this has already been done */ + if (!ifp->unplugged) + shadow_uninit_port(ifp->if_port); + + /* to cope with freeing after errors during initialisation of ifp */ + if (!sc) + return; + + rte_timer_stop(&sc->scd_link_timer); + rte_timer_stop(&sc->scd_blink_timer); + rte_timer_stop(&sc->scd_reset_timer); + + rcu_assign_pointer(ifp->if_softc, NULL); + + ifport_table[ifp->if_port] = NULL; + + call_rcu(&sc->scd_rcu, dpdk_eth_if_softc_free_rcu); +} + +static int +dpdk_eth_if_set_vlan_filter(struct ifnet *ifp, uint16_t vlan, bool enable) +{ + struct rte_eth_dev_info dev_info; + int ret = -ENOTSUP; + + rte_eth_dev_info_get(ifp->if_port, &dev_info); + if ((dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_FILTER) != 0) + ret = rte_eth_dev_vlan_filter(ifp->if_port, vlan, enable); + + return ret; +} + +static int +dpdk_eth_if_set_vlan_proto(struct ifnet *ifp, + enum if_vlan_header_type type, + uint16_t proto) +{ + enum rte_vlan_type rte_type = ETH_VLAN_TYPE_UNKNOWN; + int ret; + + if (!ifp->if_local_port) + return -ENOTSUP; + + /* + * The Mellanox ConnectX-5 driver uses a very inefficient + * transmit function if VLAN insertion is offloaded. + * Temporarily handle this in the dataplane. + * This should be removed when we up-rev DPDK to 1908 + */ + if (is_device_mlx5(ifp->if_port)) + return -ENOTSUP; + + switch (type) { + case IF_VLAN_HEADER_OUTER: + rte_type = ETH_VLAN_TYPE_OUTER; + break; + case IF_VLAN_HEADER_INNER: + rte_type = ETH_VLAN_TYPE_INNER; + break; + } + + /* + * The vlan protocol is set in the PMD even if setting + * back to 802.1q and offload wasn't supported to + * avoid making assumptions about what the drivers may + * or may not supported. I.e. the driver may support + * certain protocols, rather than being an + * all-or-nothing deal. + */ + ret = rte_eth_dev_set_vlan_ether_type(ifp->if_port, rte_type, + proto); + + if (ret == -ENOTSUP && proto == ETH_P_8021Q) { + /* + * Offload for the 802.1q protocol + * type is guaranteed by DPDK to + * always be supported in a PMD, but + * rte_eth_dev_set_vlan_ether_type + * returns -ENOTSUP if the PMD doesn't + * fill in the function pointer. + */ + return 0; + } + + return ret; +} + +static int +dpdk_eth_if_set_broadcast(struct ifnet *ifp, bool enable) +{ + /* + * This interface is under the control of bonding PMD, so + * don't make any changes to it. + */ + if (ifp->aggregator) + return 0; + + return ether_if_set_broadcast(ifp, enable); +} + +static int +dpdk_eth_if_set_promisc(struct ifnet *ifp, bool enable) +{ + struct rte_eth_dev_info dev_info; + uint32_t offload_mask; + int ret; + + /* + * This interface is under the control of bonding PMD + * so don't make any changes to it. + */ + if (ifp->aggregator) + return 0; + + if (enable) + ret = rte_eth_promiscuous_enable(ifp->if_port); + else + ret = rte_eth_promiscuous_disable(ifp->if_port); + + rte_eth_dev_info_get(ifp->if_port, &dev_info); + if (!ret && dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_FILTER) { + offload_mask = + rte_eth_dev_get_vlan_offload(ifp->if_port); + if (enable) + offload_mask &= ~ETH_VLAN_FILTER_OFFLOAD; + else + offload_mask |= ETH_VLAN_FILTER_OFFLOAD; + ret = rte_eth_dev_set_vlan_offload(ifp->if_port, + offload_mask); + } + + return ret; +} + +static void +dpdk_eth_if_show_dev_capabilities(json_writer_t *wr, + const struct rte_eth_dev_info *info) +{ + struct speed_capas { + uint32_t speed_capa; + uint32_t mbps; /* megabits */ + } speed_capas[] = { + { ETH_LINK_SPEED_10M, 10 }, + { ETH_LINK_SPEED_100M, 100 }, + { ETH_LINK_SPEED_1G, 1000 }, + { ETH_LINK_SPEED_2_5G, 2500 }, + { ETH_LINK_SPEED_5G, 5000 }, + { ETH_LINK_SPEED_10G, 10000 }, + { ETH_LINK_SPEED_20G, 20000 }, + { ETH_LINK_SPEED_25G, 25000 }, + { ETH_LINK_SPEED_40G, 40000 }, + { ETH_LINK_SPEED_50G, 50000 }, + { ETH_LINK_SPEED_56G, 56000 }, + { ETH_LINK_SPEED_100G, 100000 }, + }; + struct speed_capas hd_speed_capas[] = { + { ETH_LINK_SPEED_10M_HD, 10 }, + { ETH_LINK_SPEED_100M_HD, 100 }, + }; + unsigned int i; + + jsonw_name(wr, "capabilities"); + jsonw_start_object(wr); + + /* If speed_capa is 0, it's likely it hasn't been set up and we + * have no idea what the hardware/driver actually supports. We + * could add some overrides to dataplane-drivers-default.conf + * to massage what we return here. + */ + jsonw_name(wr, "full-duplex"); + jsonw_start_array(wr); + for (i = 0; i < ARRAY_SIZE(speed_capas); i++) { + if (info->speed_capa & speed_capas[i].speed_capa) + jsonw_uint(wr, speed_capas[i].mbps); + } + jsonw_end_array(wr); + + jsonw_name(wr, "half-duplex"); + jsonw_start_array(wr); + for (i = 0; i < ARRAY_SIZE(hd_speed_capas); i++) { + if (info->speed_capa & hd_speed_capas[i].speed_capa) + jsonw_uint(wr, hd_speed_capas[i].mbps); + } + jsonw_end_array(wr); + + jsonw_end_object(wr); +} + +static bool +dpdk_eth_is_mgmt_port(const struct rte_pci_addr *pci_addr) +{ + struct config_pci_entry *pci_entry; + + LIST_FOREACH(pci_entry, &platform_cfg.mgmt_list, link) + if (!rte_pci_addr_cmp(&pci_entry->pci_addr, pci_addr)) + return true; + + return false; +} + +static void +dpdk_eth_if_show_dev_info(struct ifnet *ifp, json_writer_t *wr) +{ + struct rte_eth_dev_info info; + portid_t port = ifp->if_port; + int hw_switch; + + rte_eth_dev_info_get(port, &info); + + jsonw_name(wr, "dev"); + jsonw_start_object(wr); + if (info.driver_name) + jsonw_string_field(wr, "driver", info.driver_name); + jsonw_uint_field(wr, "node", rte_eth_dev_socket_id(port)); + + if (port < RTE_MAX_ETHPORTS) { /* possibly NO_OWNER */ + struct rte_eth_dev *dev = &rte_eth_devices[port]; + bool settable; + + if (ifp->if_team) + settable = true; + else + settable = dev && dev->dev_ops && + dev->dev_ops->mac_addr_set ? true : false; + + jsonw_bool_field(wr, "mac_addr_settable", settable); + jsonw_string_field(wr, "eth_dev_data_name", dev->data->name); + jsonw_bool_field(wr, "dev_started", dev->data->dev_started); + jsonw_bool_field(wr, "scattered_rx", dev->data->scattered_rx); + jsonw_uint_field(wr, "lsc", dev->data->dev_conf.intr_conf.lsc); + /* + * workaround to determine switch id until we have + * a mechanism for retrieving opaque data + */ + if (info.driver_name && + get_switch_dev_info(info.driver_name, dev->data->name, + &hw_switch, NULL)) + jsonw_uint_field(wr, "hw_switch_id", hw_switch); + } + + const struct rte_bus *bus = rte_bus_find_by_device(info.device); + struct rte_pci_device *pci = NULL; + if (bus && streq(bus->name, "pci")) + pci = RTE_DEV_TO_PCI(info.device); + if (pci) { + jsonw_name(wr, "pci"); + jsonw_start_object(wr); + + jsonw_name(wr, "address"); + jsonw_start_object(wr); + jsonw_uint_field(wr, "domain", pci->addr.domain); + jsonw_uint_field(wr, "bus", pci->addr.bus); + jsonw_uint_field(wr, "devid", pci->addr.devid); + jsonw_uint_field(wr, "function", pci->addr.function); + jsonw_end_object(wr); + + jsonw_name(wr, "id"); + jsonw_start_object(wr); + jsonw_uint_field(wr, "vendor", pci->id.vendor_id); + jsonw_uint_field(wr, "device", pci->id.device_id); + jsonw_uint_field(wr, "subsystem_vendor", + pci->id.subsystem_vendor_id); + jsonw_uint_field(wr, "subsystem_device", + pci->id.subsystem_device_id); + jsonw_end_object(wr); + + jsonw_end_object(wr); + } + + dpdk_eth_if_show_dev_capabilities(wr, &info); + + jsonw_uint_field(wr, "min_rx_bufsize", info.min_rx_bufsize); + jsonw_uint_field(wr, "max_rx_pktlen", info.max_rx_pktlen); + jsonw_uint_field(wr, "max_rx_queues", info.max_rx_queues); + jsonw_uint_field(wr, "max_tx_queues", info.max_tx_queues); + jsonw_uint_field(wr, "max_mac_addrs", info.max_mac_addrs); + jsonw_uint_field(wr, "vmdq_queue_base", info.vmdq_queue_base); + jsonw_uint_field(wr, "vmdq_queue_num", info.vmdq_queue_num); + + if (info.driver_name && strcasestr(info.driver_name, "net_vhost")) + vhost_devinfo(wr, ifp); + + if (pci && dpdk_eth_is_mgmt_port(&pci->addr)) + jsonw_bool_field(wr, "management", true); + + jsonw_end_object(wr); +} + +/* Device with statistics in hardware */ +static void +dpdk_eth_if_show_stats(struct ifnet *ifp, json_writer_t *wr) +{ + struct rte_eth_stats hwstats; + unsigned int i; + int ret; + + ret = rte_eth_stats_get(ifp->if_port, &hwstats); + if (ret) + return; + + jsonw_uint_field(wr, "rx_missed", hwstats.imissed); + jsonw_uint_field(wr, "rx_nobuffer", hwstats.rx_nombuf); + + jsonw_name(wr, "qstats"); + jsonw_start_array(wr); + for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS; i++) { + jsonw_start_object(wr); + jsonw_uint_field(wr, "ipackets", hwstats.q_ipackets[i]); + jsonw_uint_field(wr, "ibytes", hwstats.q_ibytes[i]); + jsonw_uint_field(wr, "opackets", hwstats.q_opackets[i]); + jsonw_uint_field(wr, "obytes", hwstats.q_obytes[i]); + jsonw_uint_field(wr, "errors", hwstats.q_errors[i]); + jsonw_end_object(wr); + } + jsonw_end_array(wr); +} + +/* Device with extended statistics in hardware (physical port) */ +static void +dpdk_eth_if_show_xstats(struct ifnet *ifp, json_writer_t *wr) +{ + int i, len, ret; + + len = rte_eth_xstats_get_names(ifp->if_port, NULL, 0); + if (len < 1) + return; + + struct rte_eth_xstat_name xstat_names[len]; + struct rte_eth_xstat xstats[len]; + memset(xstat_names, 0, sizeof(xstat_names)); + memset(xstats, 0, sizeof(xstats)); + + ret = rte_eth_xstats_get_names(ifp->if_port, xstat_names, len); + if (ret < 0 || ret > len) + return; + ret = rte_eth_xstats_get(ifp->if_port, xstats, len); + if (ret < 0 || ret > len) + return; + + for (i = 0; i < len; i++) + jsonw_uint_field(wr, xstat_names[xstats[i].id].name, + xstats[i].value); +} + +static void +dpdk_eth_if_show_state(struct ifnet *ifp, json_writer_t *wr) +{ + if (ifp->if_local_port) + jsonw_uint_field(wr, "port", ifp->if_port); +} + +static void dpdk_eth_if_show_xcvr_info(struct ifnet *ifp, json_writer_t *wr) +{ + struct rte_eth_dev_module_info module_info; + struct rte_dev_eeprom_info eeprom_info; + char *buf; + int rv; + + memset(&module_info, 0, sizeof(module_info)); + + rv = rte_eth_dev_get_module_info(ifp->if_port, &module_info); + if (rv) + return; + + eeprom_info.length = + module_info.eeprom_len < MODULE_SFF_8436_AX_LEN ? + module_info.eeprom_len : MODULE_SFF_8436_AX_LEN; + + buf = malloc(eeprom_info.length); + if (!buf) { + DP_DEBUG(LINK, ERR, DATAPLANE, + "Failed to allocate xcvr eeprom info buffer\n"); + return; + } + eeprom_info.data = buf; + eeprom_info.offset = 0; + + rv = rte_eth_dev_get_module_eeprom(ifp->if_port, &eeprom_info); + if (rv) { + free(buf); + return; + } + + if (!module_info.eeprom_len) { + free(buf); + return; + } + + jsonw_name(wr, "xcvr_info"); + jsonw_start_object(wr); + sfp_status(&module_info, &eeprom_info, wr); + jsonw_end_object(wr); + free(buf); +} + +static int +dpdk_eth_if_dump(struct ifnet *ifp, json_writer_t *wr, + enum if_dump_state_type type) +{ + if (!ifp->if_local_port || ifp->unplugged) + return 0; + + switch (type) { + case IF_DS_STATS: + dpdk_eth_if_show_stats(ifp, wr); + break; + case IF_DS_XSTATS: + dpdk_eth_if_show_xstats(ifp, wr); + break; + case IF_DS_DEV_INFO: + dpdk_eth_if_show_dev_info(ifp, wr); + break; + case IF_DS_STATE: + dpdk_eth_if_show_state(ifp, wr); + break; + case IF_DS_STATE_VERBOSE: + dpdk_eth_if_show_xcvr_info(ifp, wr); + break; + default: + break; + } + + return 0; +} + +static void +dpdk_eth_if_get_xstats(struct ifnet *ifp, + struct if_data *stats) +{ +#define NUM_XSTATS 2 + int i, rv, nstats; + const char *xstat_names[NUM_XSTATS] = { + "rx_multicast_packets", + "rx_broadcast_packets" + }; + uint64_t xstat_ids[NUM_XSTATS] = { ~0ull, ~0ull }; + uint64_t rx_mcast_pkts = 0; + + /* retrieve all xstats */ + nstats = rte_eth_xstats_get(ifp->if_port, NULL, 0); + if (nstats < 0) + return; + + struct rte_eth_xstat xstat_values[nstats]; + + rv = rte_eth_xstats_get(ifp->if_port, xstat_values, nstats); + if (rv < 0) + return; + + /* get stat ids for the ones we are interested in */ + for (i = 0; i < NUM_XSTATS; i++) { + rv = rte_eth_xstats_get_id_by_name(ifp->if_port, + xstat_names[i], + &xstat_ids[i]); + if (rv) + continue; + } + + for (nstats = 0, i = 0; i < NUM_XSTATS; i++) { + if (xstat_ids[i] == ~0ull) + continue; + + nstats++; + rx_mcast_pkts += xstat_values[xstat_ids[i]].value; + } + + if (nstats) + stats->ifi_imulticast = rx_mcast_pkts; +} + +static int +dpdk_eth_if_get_stats(struct ifnet *ifp, struct if_data *stats) +{ + struct rte_eth_stats hwstats; + int ret; + + ret = rte_eth_stats_get(ifp->if_port, &hwstats); + if (ret) + return ret; + + stats->ifi_ipackets = hwstats.ipackets; + stats->ifi_opackets = hwstats.opackets; + stats->ifi_ibytes = hwstats.ibytes; + stats->ifi_obytes = hwstats.obytes; + stats->ifi_ierrors += hwstats.ierrors; + stats->ifi_oerrors += hwstats.oerrors; + + dpdk_eth_if_get_xstats(ifp, stats); + return 0; +} + +/* Timer called (from main) to toggle state of LED. */ +static void dpdk_eth_if_blink_timer(struct rte_timer *tim, void *arg) +{ + struct ifnet *ifp = arg; + struct dpdk_eth_if_softc *sc = ifp->if_softc; + int rc; + + if (sc->scd_blink_on) + rc = rte_eth_led_on(ifp->if_port); + else + rc = rte_eth_led_off(ifp->if_port); + + if (rc < 0) { + DP_DEBUG(LINK, NOTICE, DATAPLANE, + "%s: led %s failed: %s\n", + ifp->if_name, sc->scd_blink_on ? "on" : "off", + strerror(-rc)); + rte_timer_stop(tim); + } else + sc->scd_blink_on = !sc->scd_blink_on; +} + +/* Start/stop LED blink timer */ +static int dpdk_eth_if_blink(struct ifnet *ifp, bool on) +{ + struct dpdk_eth_if_softc *sc = ifp->if_softc; + int rc = 0; + + if (!ifp->if_local_port) + return -ENOTSUP; + + if (on) { + rc = rte_eth_led_on(ifp->if_port); + if (rc < 0) + return rc; + + sc->scd_blink_on = 0; + rte_timer_reset(&sc->scd_blink_timer, + rte_get_timer_hz() / 2, + PERIODICAL, rte_get_master_lcore(), + dpdk_eth_if_blink_timer, ifp); + } else { + rte_timer_stop_sync(&sc->scd_blink_timer); + + /* restore proper link state of LED */ + if (if_port_isup(ifp->if_port)) + rte_eth_led_on(ifp->if_port); + else + rte_eth_led_off(ifp->if_port); + } + + return rc; +} + +static int dpdk_eth_if_set_backplane(struct ifnet *ifp, + unsigned int bp_ifindex) +{ + struct dpdk_eth_if_softc *sc = ifp->if_softc; + + sc->bp_ifindex = bp_ifindex; + + return 0; +} + +static int dpdk_eth_if_get_backplane(struct ifnet *ifp, + unsigned int *bp_ifindex) +{ + struct dpdk_eth_if_softc *sc = ifp->if_softc; + + *bp_ifindex = sc->bp_ifindex; + + return 0; +} + +static int +dpdk_eth_if_l3_enable(struct ifnet *ifp) +{ + int ret = 0; + + if (!if_port_is_bkplane(ifp->if_port)) + ret = if_fal_create_l3_intf(ifp); + + return ret; +} + +static int +dpdk_eth_if_l3_disable(struct ifnet *ifp) +{ + /* + * No check for backplane here (unlike in + * dpdk_eth_if_l3_enable) because the port may not be + * valid and if_delete_l3_intf checks whether the L3 interface + * has been created anyway. + */ + return if_fal_delete_l3_intf(ifp); +} + +static bool +dpdk_eth_if_is_hw_switching_enabled(struct ifnet *ifp) +{ + return ifp->hw_forwarding; +} + +static int +dpdk_eth_if_set_speed(struct ifnet *ifp, bool autoneg, + uint32_t speed, int duplex) +{ + struct rte_eth_conf dev_conf; + struct rte_eth_dev *eth_dev; + uint32_t link_speeds; + + if (autoneg) + link_speeds = ETH_LINK_SPEED_AUTONEG; + else { + if (duplex == -1) + /* + * Most speeds don't have a separate half- + * and full-duplex so or'ing their bitflags + * together is harmless. + */ + link_speeds = rte_eth_speed_bitflag(speed, 0) | + rte_eth_speed_bitflag(speed, 1); + else + link_speeds = + rte_eth_speed_bitflag(speed, duplex); + link_speeds |= ETH_LINK_SPEED_FIXED; + } + + eth_dev = &rte_eth_devices[ifp->if_port]; + memcpy(&dev_conf, ð_dev->data->dev_conf, sizeof(dev_conf)); + + /* Some drivers set bits for advertised speeds if autoneg enabled */ + if (dev_conf.link_speeds == link_speeds || + (autoneg && !(dev_conf.link_speeds & ETH_LINK_SPEED_FIXED))) + return 0; + + dev_conf.link_speeds = link_speeds; + return reconfigure_port(ifp, &dev_conf, NULL); +} + +static int +dpdk_eth_if_get_link_status(struct ifnet *ifp, + struct dp_ifnet_link_status *if_link) +{ + struct rte_eth_link link; + + memset(&link, 0, sizeof(link)); + + /* consider unplugged as down, but don't ask DPDK */ + if (!ifp->unplugged) + rte_eth_link_get_nowait(ifp->if_port, &link); + + if_link->link_status = link.link_status; + if_link->link_duplex = + link.link_duplex ? DP_IFNET_LINK_DUPLEX_FULL : + DP_IFNET_LINK_DUPLEX_HALF; + if_link->link_speed = link.link_speed; + + return 0; +} + +static enum dp_ifnet_iana_type +dpdk_eth_iana_type(struct ifnet *ifp) +{ + if (lag_is_team(ifp)) + return DP_IFTYPE_IANA_IEEE8023ADLAG; + + return DP_IFTYPE_IANA_ETHERNETCSMACD; +} + +static int dpdk_eth_if_set_usability(struct ifnet *ifp, bool usable) +{ + if (!lag_port_is_member(ifp)) + return 0; + + return lag_set_member_usable(ifp, usable); +} + +static const struct ift_ops dpdk_eth_if_ops = { + .ifop_set_mtu = dpdk_eth_if_set_mtu, + .ifop_set_l2_address = dpdk_eth_if_set_l2_address, + .ifop_start = dpdk_eth_if_start, + .ifop_stop = dpdk_eth_if_stop, + .ifop_add_l2_addr = dpdk_eth_if_add_l2_addr, + .ifop_del_l2_addr = dpdk_eth_if_del_l2_addr, + .ifop_init = dpdk_eth_if_init, + .ifop_uninit = dpdk_eth_if_uninit, + .ifop_set_vlan_filter = dpdk_eth_if_set_vlan_filter, + .ifop_set_vlan_proto = dpdk_eth_if_set_vlan_proto, + .ifop_set_broadcast = dpdk_eth_if_set_broadcast, + .ifop_set_promisc = dpdk_eth_if_set_promisc, + .ifop_dump = dpdk_eth_if_dump, + .ifop_get_stats = dpdk_eth_if_get_stats, + .ifop_blink = dpdk_eth_if_blink, + .ifop_set_backplane = dpdk_eth_if_set_backplane, + .ifop_get_backplane = dpdk_eth_if_get_backplane, + .ifop_l3_enable = dpdk_eth_if_l3_enable, + .ifop_l3_disable = dpdk_eth_if_l3_disable, + .ifop_is_hw_switching_enabled = dpdk_eth_if_is_hw_switching_enabled, + .ifop_set_speed = dpdk_eth_if_set_speed, + .ifop_get_link_status = dpdk_eth_if_get_link_status, + .ifop_iana_type = dpdk_eth_iana_type, + .ifop_set_usability = dpdk_eth_if_set_usability, +}; + +static void dpdk_eth_init(void) +{ + int ret = if_register_type(IFT_ETHER, &dpdk_eth_if_ops); + if (ret < 0) + rte_panic("Failed to register DPDK ethernet interface type: %s", + strerror(-ret)); + + dpdk_name_to_eth_port_map_init(); +} + +static void dpdk_eth_uninit(void) +{ + dpdk_name_to_eth_port_map_cleanup(); +} + +static const struct dp_event_ops dpdk_eth_if_events = { + .init = dpdk_eth_init, + .uninit = dpdk_eth_uninit, +}; + +DP_STARTUP_EVENT_REGISTER(dpdk_eth_if_events); + +static struct ifnet * +if_hwport_init(const char *if_name, unsigned int portid, + const struct rte_ether_addr *eth, int socketid) +{ + struct ifnet *ifp; + + /* device driver couldn't find MAC address */ + if (rte_is_zero_ether_addr(eth)) { + RTE_LOG(NOTICE, DATAPLANE, + "%s port %u: address not set!\n", if_name, portid); + return NULL; + } + + ifp = if_alloc(if_name, IFT_ETHER, RTE_ETHER_MTU, eth, socketid, + &portid); + if (!ifp) + return NULL; + + /* + * Temporarily turn off VLAN insertion offload for Mellanox + * ConnectX5 devices. This should be removed when DPDK is + * up-reved to 1908 + */ + if (is_device_mlx5(portid)) + ifp->tpid_offloaded = 0; + + if (!if_setup_vlan_storage(ifp)) { + if_free(ifp); + return NULL; + } + + return ifp; +} + +/* + * Allocate and initialize a DPDK ethernet interface + */ +struct ifnet *dpdk_eth_if_alloc_w_port(const char *if_name, + unsigned int ifindex, portid_t portid) +{ + struct rte_ether_addr mac_addr; + struct ifnet *ifp; + int socketid; + + socketid = rte_eth_dev_socket_id(portid); + rte_eth_macaddr_get(portid, &mac_addr); + + ifp = if_hwport_init(if_name, portid, &mac_addr, socketid); + if (!ifp) + return NULL; + + /* Can't set ifp->if_dp_id, we have not been told our dp_id yet */ + + /* port is on this dataplane, so if_port is valid */ + ifp->if_local_port = 1; + + /* + * Set mac-address driver filtering as initially + * supported. This will be reset later if any subsequent + * attempt to program filtering in the driver should fail. + */ + ifp->if_mac_filtr_supported = 1; + ifp->if_mac_filtr_reprogram = 0; + + ifp->if_team = lag_is_team(ifp); + + rcu_assign_pointer(ifport_table[portid], ifp); + + if_set_ifindex(ifp, ifindex); + + /* No shadow interfaces for LAG interfaces */ + if (!is_team(ifp)) { + int rc = shadow_init_port(ifp->if_port, ifp->if_name, + &ifp->eth_addr); + + if (rc < 0) { + char port_name[RTE_ETH_NAME_MAX_LEN]; + RTE_LOG(ERR, DATAPLANE, + "cannot init shadow interface for %s, port %u\n", + ifp->if_name, ifp->if_port); + if (rte_eth_dev_get_name_by_port(ifp->if_port, + port_name) < 0) + RTE_LOG(ERR, DATAPLANE, + "port(%u) to name failed\n", + ifp->if_port); + else if (detach_device(port_name)) + RTE_LOG(ERR, DATAPLANE, + "detach device %s failed\n", + port_name); + } + } + + return ifp; +} + +struct ifnet *dpdk_eth_if_alloc(const char *if_name, unsigned int ifindex) +{ + portid_t portid; + + portid = dpdk_name_to_eth_port_map_get(if_name); + if (portid >= DATAPLANE_MAX_PORTS) { + RTE_LOG(WARNING, DATAPLANE, + "DPDK port not known for interface %s, not creating\n", + if_name); + return NULL; + } + + return dpdk_eth_if_alloc_w_port(if_name, ifindex, portid); +} diff --git a/src/if/dpdk-eth/dpdk_eth_if.h b/src/if/dpdk-eth/dpdk_eth_if.h new file mode 100644 index 00000000..a7c5ff50 --- /dev/null +++ b/src/if/dpdk-eth/dpdk_eth_if.h @@ -0,0 +1,62 @@ +/*- + * Copyright (c) 2019-2020, AT&T Intellectual Property. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +/* + * DPDK Ethernet interfaces + */ + +#ifndef DPDK_ETH_IF_H +#define DPDK_ETH_IF_H + +#include +#include + +#include "urcu.h" +#include "compat.h" +#include "fal_plugin.h" + +struct vhost_info; +struct ifnet; + +struct dpdk_eth_if_softc { + struct rcu_head scd_rcu; + struct rte_timer scd_link_timer; /* update controller */ + struct rte_timer scd_blink_timer; /* blink LED */ + struct rte_timer scd_reset_timer; /* reset interface */ + struct vhost_info *scd_vhost_info; + bool scd_need_reset; /* VF down when PF is down */ + uint8_t scd_blink_on; + bool scd_fal_lag_member_created; + unsigned int bp_ifindex; /* backplane interface */ + struct ifnet *scd_ifp; /* back pointer to the ifp */ + /* Keep track of LAG members */ + struct cds_list_head scd_fal_lag_members_head; + struct cds_list_head scd_fal_lag_member_link; + fal_object_t scd_fal_port_lag_obj; /* Port or LAG FAL object */ + fal_object_t scd_fal_lag_member_obj; /* LAG member FAL object */ + /* LAG configuration */ + bool has_min_links; + uint16_t min_links; +}; + +void dpdk_eth_if_start_port(struct ifnet *ifp); +void dpdk_eth_if_stop_port(struct ifnet *ifp); +void dpdk_eth_if_force_stop_port(struct ifnet *ifp); +void stop_all_ports(void); +void dpdk_eth_if_update_port_queue_state(portid_t port); +bool dpdk_eth_if_port_started(portid_t port); +void dpdk_eth_if_reset_port(struct rte_timer *tim, void *arg); + +char *dpdk_eth_vplaned_devinfo(portid_t port_id); + +int dpdk_name_to_eth_port_map_add(const char *ifname, portid_t port); +void dpdk_eth_port_map_del_port(portid_t port); + +struct ifnet *dpdk_eth_if_alloc(const char *if_name, unsigned int ifindex); +struct ifnet *dpdk_eth_if_alloc_w_port(const char *if_name, + unsigned int ifindex, portid_t portid); + +#endif /* DPDK_ETH_IF_H */ diff --git a/src/if/dpdk-eth/dpdk_eth_linkwatch.c b/src/if/dpdk-eth/dpdk_eth_linkwatch.c new file mode 100644 index 00000000..3d94f69f --- /dev/null +++ b/src/if/dpdk-eth/dpdk_eth_linkwatch.c @@ -0,0 +1,390 @@ +/*- + * Copyright (c) 2019-2020, AT&T Intellectual Property. + * All rights reserved. + * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +/* + * Port link state events + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bitmask.h" +#include "compiler.h" +#include "config_internal.h" +#include "control.h" +#include "dpdk_eth_if.h" +#include "dpdk_eth_linkwatch.h" +#include "dp_event.h" +#include "event_internal.h" +#include "if_var.h" +#include "ip_forward.h" +#include "l2_rx_fltr.h" +#include "lag.h" +#include "main.h" +#include "rcu.h" +#include "vhost.h" +#include "vplane_debug.h" +#include "vplane_log.h" + +bitmask_t linkup_port_mask __hot_data; /* link is up */ +static bitmask_t lsc_irq_mask; /* link interrupt enabled */ +static bitmask_t lsc_irq_pending; /* link state changed */ +static bitmask_t link_reset_pending; /* link reset pending */ +static bitmask_t queue_state_pending; /* queue state change pending */ + +/* event file descriptor for link state change */ +static void *lsc_arg; + +/* decode DPDK definition of duplex */ +const char *link_duplexstr(unsigned int duplex) +{ + switch (duplex) { + case ETH_LINK_HALF_DUPLEX: return "half"; + case ETH_LINK_FULL_DUPLEX: return "full"; + default: return "unknown?"; + } +} + +/* notify qos of link state change */ +static void notify_port_status(portid_t port, + const struct rte_eth_link *link) +{ + struct ifnet *ifp = ifport_table[port]; + + if (link->link_status) { + RTE_LOG(NOTICE, DATAPLANE, + "%s Link up at %d Mbps, %s duplex\n", + ifp->if_name, + link->link_speed, + link_duplexstr(link->link_duplex)); + + bitmask_set(&linkup_port_mask, port); + bitmask_and(&active_port_mask, &poll_port_mask, + &linkup_port_mask); + + dp_event(DP_EVT_IF_LINK_CHANGE, 0, ifp, + link->link_status, link->link_speed, NULL); + } else { + RTE_LOG(WARNING, DATAPLANE, + "%s Link down\n", ifp->if_name); + bitmask_clear(&linkup_port_mask, port); + bitmask_and(&active_port_mask, &poll_port_mask, + &linkup_port_mask); + + dp_event(DP_EVT_IF_LINK_CHANGE, 0, ifp, + link->link_status, link->link_speed, NULL); + + /* Note: it is probably a good idea to drain the pkt + * ring and burst at this point to avoid stale packets + * going out once the link comes back up. However, + * doing the necessary dp_rcu_synchronize() here could + * potentially have a negative impact on link changes + * for other interfaces and other events so isn't done + * for the moment. + */ + } +} + +/* Timer for peroidic check of link state + * + * Note: dp_rcu_read_lock not held here! + */ +void linkwatch_timer(struct rte_timer *tim __rte_unused, void *arg) +{ + struct ifnet *ifp = arg; + portid_t port = ifp->if_port; + struct rte_eth_link link; + + /* ignore timer when race with admin down */ + if (dpdk_eth_if_port_started(port)) { + rte_eth_link_get_nowait(port, &link); + bitmask_set(&lsc_irq_mask, port); /* re-enable irq */ + + int old_status = if_port_isup(port); + if (link.link_status != old_status) + notify_port_status(port, &link); + + send_port_status(port, &link); + } +} + +/* Check link state */ +void linkwatch_update_port_status(portid_t port, enum linkwatch_flags flags) +{ + struct rte_eth_link link; + int old_status; + + rte_eth_link_get_nowait(port, &link); + /* The kernel needs to be informed that the link is operationally down + * when the port is stopped, so intervene in this case as the link state + * in some if not all DPDK PMDs remains up. + */ + if (flags & LINKWATCH_FLAG_FORCE_LINK_DOWN) + link.link_status = ETH_LINK_DOWN; + + old_status = if_port_isup(port); + if (flags & LINKWATCH_FLAG_FORCE_NOTIFY || + link.link_status != old_status) + notify_port_status(port, &link); + + send_port_status(port, &link); +} + +/* Callback from being woken up on link_fd. + * Runs on main thread (via get_next_event) + * + * irq_mask is used to debounce events so that only one link + * state change between timer interval is possible + * + * For ports that use the queue state events, the queue state was read when + * the callback was received, so now we need to bring the state into line + * with the configured set of queues here. + * + * Note: dp_rcu_read_lock not held here! + */ +static int link_state_event(void *arg) +{ + int lsc_fd = (unsigned long) arg; + unsigned int port; + uint64_t seqno; + + if (read(lsc_fd, &seqno, sizeof(seqno)) < 0) { + if (errno != EINTR) + RTE_LOG(NOTICE, DATAPLANE, + "link state event read error: %s\n", + strerror(errno)); + } + + for (port = 0; port < DATAPLANE_MAX_PORTS; port++) { + if (!rte_eth_dev_is_valid_port(port)) + continue; + + if (bitmask_isset(&lsc_irq_pending, port)) { + bitmask_clear(&lsc_irq_pending, port); + if (dpdk_eth_if_port_started(port)) + linkwatch_update_port_status( + port, LINKWATCH_FLAG_NONE); + } + + if (bitmask_isset(&link_reset_pending, port)) { + bitmask_clear(&link_reset_pending, port); + if (dpdk_eth_if_port_started(port)) + dpdk_eth_if_reset_port(NULL, + ifport_table[port]); + } + + if (bitmask_isset(&queue_state_pending, port)) { + bitmask_clear(&queue_state_pending, port); + dpdk_eth_if_update_port_queue_state(port); + } + } + + return 0; +} + +static const char *linkscan_source = "linkscan"; + +static void linkwatch_change_mark_state(portid_t port_id, + enum dp_rt_path_state state) +{ + struct dp_rt_path_unusable_key key; + struct ifnet *ifp; + + dp_rcu_register_thread(); + + ifp = ifnet_byport(port_id); + if (ifp) { + key.ifindex = ifp->if_index; + key.type = DP_RT_PATH_UNUSABLE_KEY_INTF; + dp_rt_signal_path_state(linkscan_source, state, &key); + } + + dp_rcu_thread_offline(); +} + +static enum dp_rt_path_state +linkwatch_check_path_state(const struct dp_rt_path_unusable_key *key) +{ + struct rte_eth_link link; + struct ifnet *ifp; + + if (key->type == DP_RT_PATH_UNUSABLE_KEY_INTF) { + ifp = dp_ifnet_byifindex(key->ifindex); + if (!ifp) + return DP_RT_PATH_UNKNOWN; + + if (rte_eth_link_get_nowait(ifp->if_port, &link) < 0) + return DP_RT_PATH_UNKNOWN; + + if (link.link_status == ETH_LINK_DOWN) + return DP_RT_PATH_UNUSABLE; + return DP_RT_PATH_USABLE; + } + + return DP_RT_PATH_UNKNOWN; +} + + +/* Open eventfd handle used to notify main thread + * by callbacks called in interrupt thread. + */ +void link_state_init(void) +{ + int rv; + int fd = eventfd(0, EFD_NONBLOCK); + if (fd < 0) + rte_panic("%s: eventfd failed: %s\n", + __func__, strerror(errno)); + + lsc_arg = (void *) (unsigned long) fd; + register_event_fd(fd, link_state_event, lsc_arg); + + rv = dp_rt_register_path_state(linkscan_source, + linkwatch_check_path_state); + if (rv) + rte_panic("Could not register route state with linkwatch\n"); +} + +/* Port event occurred. + * + * Called from another Posix thread therefore can't safely update + * port state directly, need to wakeup main thread + */ +static int +eth_port_event(portid_t port_id, enum rte_eth_event_type type, void *arg, + __unused void *ret_arg) +{ + unsigned long link_fd = (unsigned long) arg; + static const uint64_t incr = 1; + bool wakeup = false; + int rv; + + /* Notify main thread, and debounce */ + if (type == RTE_ETH_EVENT_INTR_LSC) { + struct rte_eth_link link; + + rv = rte_eth_link_get_nowait(port_id, &link); + if (rv == 0) { + if (link.link_status == ETH_LINK_DOWN) + linkwatch_change_mark_state( + port_id, DP_RT_PATH_UNUSABLE); + else + linkwatch_change_mark_state(port_id, + DP_RT_PATH_USABLE); + } + /* + * If the port uses the queue state events, and it is down + * then we have to clear the enabled queues otherwise we + * can get into an inconsistent state. + */ + if (get_port_uses_queue_state(port_id)) { + if (rv == 0 && link.link_status == ETH_LINK_DOWN) + reset_port_enabled_queue_state(port_id); + } + if (bitmask_isset(&lsc_irq_mask, port_id)) { + bitmask_clear(&lsc_irq_mask, port_id); + bitmask_set(&lsc_irq_pending, port_id); + wakeup = true; + } + } + + if (type == RTE_ETH_EVENT_INTR_RESET && + dpdk_eth_if_port_started(port_id)) { + bitmask_set(&link_reset_pending, port_id); + wakeup = true; + } + + if (type == RTE_ETH_EVENT_QUEUE_STATE) { + /* + * Pull all the events off the queue, and set the + * enabled queues correctly. The main thread will then + * do the work to actually enable them. + */ + struct rte_eth_vhost_queue_event event; + + while (rte_eth_vhost_get_queue_event(port_id, &event) == 0) + track_port_queue_state(port_id, event.queue_id, + event.rx, event.enable); + + bitmask_set(&queue_state_pending, port_id); + wakeup = true; + } + + if (wakeup && write(link_fd, &incr, sizeof(incr)) < 0) + RTE_LOG(NOTICE, DATAPLANE, + "wakeup of link state thread failed: %s\n", + strerror(errno)); + + return 0; +} + +int linkwatch_port_config(portid_t portid) +{ + int ret; + + /* Enable Link State Interrupt */ + ret = rte_eth_dev_callback_register(portid, RTE_ETH_EVENT_INTR_LSC, + eth_port_event, lsc_arg); + if (ret < 0) + RTE_LOG(WARNING, DATAPLANE, + "rte_eth_dev_callback_register(lsc): err=%d, port=%u\n", + ret, portid); + + ret = rte_eth_dev_callback_register(portid, RTE_ETH_EVENT_INTR_RESET, + eth_port_event, lsc_arg); + if (ret < 0) + RTE_LOG(WARNING, DATAPLANE, + "rte_eth_dev_callback_register(reset): err=%d, port=%u\n", + ret, portid); + + if (port_uses_queue_state(portid)) { + set_port_uses_queue_state(portid, true); + reset_port_all_queue_state(portid); + ret = rte_eth_dev_callback_register(portid, + RTE_ETH_EVENT_QUEUE_STATE, + eth_port_event, + lsc_arg); + if (ret < 0) + RTE_LOG(WARNING, DATAPLANE, + "rte_eth_dev_callback_register(queue state): err=%d, port=%u\n", + ret, portid); + + } + + return 0; +} + +void linkwatch_port_unconfig(portid_t portid) +{ + /* Disable Link State Interrupt */ + rte_eth_dev_callback_unregister(portid, RTE_ETH_EVENT_INTR_LSC, + eth_port_event, lsc_arg); + + /* Disable Port Reset callback */ + rte_eth_dev_callback_unregister(portid, RTE_ETH_EVENT_INTR_RESET, + eth_port_event, lsc_arg); + + rte_eth_dev_callback_unregister(portid, RTE_ETH_EVENT_QUEUE_STATE, + eth_port_event, lsc_arg); + set_port_uses_queue_state(portid, false); +} diff --git a/src/if/dpdk-eth/dpdk_eth_linkwatch.h b/src/if/dpdk-eth/dpdk_eth_linkwatch.h new file mode 100644 index 00000000..e329ff2c --- /dev/null +++ b/src/if/dpdk-eth/dpdk_eth_linkwatch.h @@ -0,0 +1,25 @@ +/*- + * Copyright (c) 2019-2020, AT&T Intellectual Property. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +/* + * DPDK Ethernet interfaces + */ + +#ifndef DPDK_ETH_LINKWATCH_H +#define DPDK_ETH_LINKWATCH_H + +enum linkwatch_flags { + LINKWATCH_FLAG_NONE, + LINKWATCH_FLAG_FORCE_LINK_DOWN = (1 << 0), + LINKWATCH_FLAG_FORCE_NOTIFY = (1 << 1), +}; + +void linkwatch_timer(struct rte_timer *tim, void *arg); +int linkwatch_port_config(portid_t portid); +void linkwatch_port_unconfig(portid_t portid); +void linkwatch_update_port_status(portid_t port, enum linkwatch_flags flags); + +#endif /* DPDK_ETH_LINKWATCH_H */ diff --git a/src/if/dpdk-eth/dpdk_lag.c b/src/if/dpdk-eth/dpdk_lag.c new file mode 100644 index 00000000..698ee8e3 --- /dev/null +++ b/src/if/dpdk-eth/dpdk_lag.c @@ -0,0 +1,741 @@ +/*- + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2013-2016 by Brocade Communications Systems, Inc. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "compiler.h" +#include "capture.h" +#include "compat.h" +#include "dpdk_eth_if.h" +#include "ether.h" +#include "if_var.h" +#include "json_writer.h" +#include "lag.h" +#include "main.h" +#include "controller.h" +#include "netlink.h" +#include "pktmbuf_internal.h" +#include "urcu.h" +#include "util.h" +#include "vplane_debug.h" +#include "vplane_log.h" + +#define BOND_DEV_NAME "net_bonding" + +struct nlattr; + +/* remember which members are collecting/distributing */ +static uint8_t enabled[LAG_MAX_MEMBERS]; + +static int dpdk_lag_member_delete(struct ifnet *team, struct ifnet *ifp); + +static void lacp_recv_cb(portid_t member_id, struct rte_mbuf *lacp_pkt) +{ + struct ifnet *ifp = ifnet_byport(member_id); + + if (unlikely(ifp == NULL)) { + rte_pktmbuf_free(lacp_pkt); + return; + } + + pktmbuf_mdata_clear_all(lacp_pkt); + + /* local packet capture */ + if (ifp->capturing) + capture_burst(ifp, &lacp_pkt, 1); + + local_packet(ifp, lacp_pkt); +} + +/* + * outgoing ether type slow traffic has special handling: + * - capture via dataplane member + * - send via rte_pmd_bond team interface + */ +static int +dpdk_lag_etype_slow_tx(struct ifnet *team, struct ifnet *ifp, + struct rte_mbuf *lacp_pkt) +{ + if (ifp->capturing) + capture_burst(ifp, &lacp_pkt, 1); + + return rte_eth_bond_8023ad_ext_slowtx(team->if_port, ifp->if_port, + lacp_pkt); +} + +/* + * The rte_pmd_bond might change MAC address of a member port on certain events. + * This helper tries to update the MAC address of the DPDK device from the + * dataplane ifnet structure. + */ +static void +dpdk_lag_member_sync_mac_address(struct ifnet *ifp) +{ + struct rte_ether_addr hwaddr; + char buf1[32], buf2[32]; + + rte_eth_macaddr_get(ifp->if_port, &hwaddr); + if (rte_ether_addr_equal(&ifp->eth_addr, &hwaddr)) + return; + + DP_DEBUG(LAG, DEBUG, DATAPLANE, "%s updating MAC from %s to %s\n", + ifp->if_name, ether_ntoa_r(&hwaddr, buf2), + ether_ntoa_r(&ifp->eth_addr, buf1)); + + int rc = rte_eth_dev_default_mac_addr_set(ifp->if_port, &ifp->eth_addr); + if (rc) { + /* + * If updating the member's address fails lets update the + * dataplane's address to be in sync! + */ + DP_DEBUG(LAG, ERR, DATAPLANE, "%s can't set address %s: %s\n", + ifp->if_name, ether_ntoa_r(&ifp->eth_addr, buf1), + strerror(-rc)); + ifp->eth_addr = hwaddr; + } +} + +static struct ifnet * +dpdk_lag_create(const struct ifinfomsg *ifi, struct nlattr *tb[]) +{ + int port_id; + const char *ifname; + struct rte_ether_addr *macaddr = NULL; + struct ifnet *ifp; + char bond_name[RTE_ETH_NAME_MAX_LEN]; + int len; + + if (tb[IFLA_ADDRESS]) { + size_t addrlen = mnl_attr_get_payload_len(tb[IFLA_ADDRESS]); + + if (addrlen != RTE_ETHER_ADDR_LEN) + return NULL; + macaddr = mnl_attr_get_payload(tb[IFLA_ADDRESS]); + } + if (macaddr == NULL) + return NULL; + + if (tb[IFLA_IFNAME]) + ifname = mnl_attr_get_str(tb[IFLA_IFNAME]); + else + return NULL; + + /* bond device name must start with "net_bonding" */ + len = snprintf(bond_name, sizeof(bond_name), "%s%s", + BOND_DEV_NAME, ifname); + if (len < 0 || len >= (int)sizeof(bond_name)) + return NULL; + + port_id = rte_eth_bond_create(bond_name, + BONDING_MODE_ACTIVE_BACKUP, + rte_socket_id()); + if (port_id < 0) + return NULL; + + rte_eth_bond_mac_address_set(port_id, macaddr); + + if (insert_port(port_id) != 0) + return NULL; + + ifp = dpdk_eth_if_alloc_w_port(ifname, ifi->ifi_index, port_id); + if (!ifp) { + remove_port(port_id); + return NULL; + } + + ifp->eth_addr = *macaddr; + + return ifp; +} + +static bool dpdk_eth_if_is_dev_started(struct ifnet *ifp) +{ + struct rte_eth_dev *dev; + + dev = &rte_eth_devices[ifp->if_port]; + return dev->data->dev_started != 0; +} + +static int member_add(struct ifnet *team, struct ifnet *ifp) +{ + int rv; + struct rte_eth_dev_info member_info, team_info; + bool bond_dev_started; + + if (ifp->aggregator) { + /* teamd can give us redundant updates, so this is expected */ + DP_DEBUG(LAG, DEBUG, DATAPLANE, + "%s already member of %s\n", ifp->if_name, + ifp->aggregator->if_name); + return -EEXIST; + } + + rte_eth_dev_info_get(team->if_port, &team_info); + rte_eth_dev_info_get(ifp->if_port, &member_info); + bond_dev_started = dpdk_eth_if_is_dev_started(team); + + /* Ignore VMDQ information since we know that the BOND pmd + * will never have support for VMDQ and thus provides a + * reasonable upper bound. + */ + + if (member_info.max_rx_queues < team_info.nb_rx_queues || + member_info.max_tx_queues < team_info.nb_tx_queues) { + int nb_rx_queues = + MIN(member_info.max_rx_queues, team_info.nb_rx_queues); + int nb_tx_queues = + MIN(member_info.max_tx_queues, team_info.nb_tx_queues); + + if (bond_dev_started) + dpdk_eth_if_stop_port(team); + rv = reconfigure_queues(team->if_port, + nb_rx_queues, nb_tx_queues); + if (rv) + return rv; + if (bond_dev_started) + dpdk_eth_if_start_port(team); + } + + /* + * Queues are assigned again by start_port() call in + * member_remove() + */ + if_disable_poll_rcu(ifp->if_port); + if (ifp->if_flags & IFF_UP) + unassign_queues(ifp->if_port); + + /* + * Start the bonding device if not already started + * when adding a member. The member is configured only + * when the bonding device is started. + */ + if (!bond_dev_started) + dpdk_eth_if_start_port(team); + rv = rte_eth_bond_slave_add(team->if_port, ifp->if_port); + if (rv < 0) { + if (!bond_dev_started) + dpdk_eth_if_stop_port(team); + if (ifp->if_flags & IFF_UP) + assign_queues(ifp->if_port); + if_enable_poll(ifp->if_port); + return rv; + } + if (!bond_dev_started) + dpdk_eth_if_stop_port(team); + /* + * internals is accessed in the forwarding threads. We stop them + * while we update this, but since there isn't a lock, there isn't a + * barrier to ensure that these updates are visible on the other + * lcores before we resume them. + */ + rte_smp_mb(); + + rcu_assign_pointer(ifp->aggregator, team); + + return 0; +} + +/* + * Assumes that polling is turned off on the team interface, so that + * there's no race with an in-progress rx. + */ +static int member_remove(struct ifnet *team, struct ifnet *ifp) +{ + int rv; + + if (!ifp->aggregator) + return -ENOENT; + + rv = rte_eth_bond_slave_remove(team->if_port, ifp->if_port); + if (rv < 0) + return rv; + /* + * internals is accessed in the forwarding threads. We stop them + * while we update this, but since there isn't a lock, there isn't a + * barrier to ensure that these updates are visible on the other + * lcores before we resume them. + */ + rte_smp_mb(); + + /* clear RCU protected aggregator pointer */ + ifp->aggregator = NULL; + + /* + * Force the port to be stopped since it will have been + * started by bond if not already and there's no guarantee + * that our state is consistent with the DPDK state now. + */ + dpdk_eth_if_force_stop_port(ifp); + + /* enable any queues released by the bonding driver */ + rv = eth_port_config(ifp->if_port); + if (rv < 0) + return rv; + + if (ifp->if_flags & IFF_UP) + dpdk_eth_if_start_port(ifp); + if_enable_poll(ifp->if_port); + + return rv; +} + +static int dpdk_lag_mode_set_balance(struct ifnet *ifp) +{ + struct rte_eth_bond_8023ad_conf conf; + int rv; + int mode = rte_eth_bond_mode_get(ifp->if_port); + bool dev_started; + + if (mode == BONDING_MODE_8023AD) + return 0; + + /* get default configuration */ + rv = rte_eth_bond_8023ad_setup(ifp->if_port, NULL); + if (rv < 0) + return rv; + + rv = rte_eth_bond_8023ad_conf_get(ifp->if_port, &conf); + if (rv < 0) + return rv; + + conf.slowrx_cb = lacp_recv_cb; + + rv = rte_eth_bond_8023ad_setup(ifp->if_port, &conf); + if (rv < 0) + return rv; + + dev_started = dpdk_eth_if_is_dev_started(ifp); + + if (dev_started) + rte_eth_dev_stop(ifp->if_port); + + rv = rte_eth_bond_mode_set(ifp->if_port, BONDING_MODE_8023AD); + if (rv < 0) + return rv; + + if (dev_started) + rte_eth_dev_start(ifp->if_port); + + rte_eth_bond_xmit_policy_set(ifp->if_port, BALANCE_XMIT_POLICY_LAYER34); + + return 0; +} + +static int dpdk_lag_mode_set_activebackup(struct ifnet *ifp) +{ + bool dev_started = dpdk_eth_if_is_dev_started(ifp); + int rv; + + if (dev_started) + rte_eth_dev_stop(ifp->if_port); + + rv = rte_eth_bond_mode_set(ifp->if_port, BONDING_MODE_ACTIVE_BACKUP); + if (rv < 0) + return rv; + + if (dev_started) + rte_eth_dev_start(ifp->if_port); + + return rv; +} + +static int dpdk_lag_select(struct ifnet *ifp, bool sel) +{ + if (ifp->aggregator == NULL) + return -1; + + DP_DEBUG(LAG, DEBUG, DATAPLANE, + "teamd runner %sselected ifindex %d:%s (port %u)\n", + sel ? "" : "de", ifp->if_index, ifp->if_name, ifp->if_port); + + int mode = rte_eth_bond_mode_get(ifp->aggregator->if_port); + + enabled[ifp->if_port] = sel; + + if (mode == BONDING_MODE_ACTIVE_BACKUP) + return 0; + + if (rte_eth_bond_8023ad_ext_collect(ifp->aggregator->if_port, + ifp->if_port, + enabled[ifp->if_port])) { + DP_DEBUG(LAG, ERR, DATAPLANE, "cannot set collecting flag\n"); + return -1; + } + + if (rte_eth_bond_8023ad_ext_distrib(ifp->aggregator->if_port, + ifp->if_port, + enabled[ifp->if_port])) { + DP_DEBUG(LAG, ERR, DATAPLANE, "cannot set distributing flag\n"); + return -1; + } + + return 0; +} + +static int +dpdk_lag_set_activeport(struct ifnet *ifp, struct ifnet *ifp_member) +{ + DP_DEBUG(LAG, DEBUG, DATAPLANE, + "teamd runner %s activeport ifindex %d:%s (port %u)\n", + ifp->if_name, ifp_member->if_index, ifp_member->if_name, + ifp_member->if_port); + + int mode = rte_eth_bond_mode_get(ifp->if_port); + + if (mode == BONDING_MODE_ACTIVE_BACKUP) + rte_eth_bond_primary_set(ifp->if_port, ifp_member->if_port); + + return 0; +} + +/* Remove an aggregation. team%d interface went away. */ +static void dpdk_lag_delete(struct ifnet *team_ifp) +{ + portid_t port_id = team_ifp->if_port; + struct rte_eth_dev_info dev_info; + portid_t members[LAG_MAX_MEMBERS]; + int num_members; + int i; + + num_members = rte_eth_bond_slaves_get(port_id, + members, + LAG_MAX_MEMBERS); + if (num_members < 0) + RTE_LOG(ERR, DATAPLANE, + "Unable to get member count for %s\n", + team_ifp->if_name); + + if (num_members > 0) { + for (i = 0; i < num_members; i++) { + int ret; + struct ifnet *sl = ifnet_byport(members[i]); + + RTE_LOG(INFO, DATAPLANE, + "LAG %s still has %d members, removing them\n", + team_ifp->if_name, num_members); + + ret = dpdk_lag_member_delete(team_ifp, sl); + if (ret < 0) { + RTE_LOG(ERR, DATAPLANE, + "Failed to remove %s from LAG %s\n", + sl->if_name, team_ifp->if_name); + return; + } + } + } + if_free(team_ifp); + remove_port(port_id); + + rte_eth_dev_info_get(port_id, &dev_info); + rte_eth_dev_close(port_id); + if (rte_dev_remove(dev_info.device) != 0) + RTE_LOG(ERR, DATAPLANE, + "dpdk_lag_delete(%u): remove failed\n", port_id); +} + +/* + * Returns the number of members associated with this bonding interface. + * + * If not a bonding interface, return -1. + */ +static int member_count(const struct ifnet *ifp) +{ + portid_t members[LAG_MAX_MEMBERS]; + + return rte_eth_bond_slaves_get(ifp->if_port, members, + LAG_MAX_MEMBERS); +} + +static bool dpdk_lag_can_start(const struct ifnet *ifp) +{ + return member_count(ifp) != 0; +} + +static int dpdk_lag_member_add(struct ifnet *team, struct ifnet *ifp) +{ + int count, rv; + + count = member_count(team); + if (count < 0) + return -EINVAL; + + /* access to bonding "internals" structure is not thread-safe */ + if_disable_poll_rcu(team->if_port); + + rv = member_add(team, ifp); + if (rv < 0) + goto out; + + /* We just added the first port, so we might need to finally + * start_port() if this interface is currently IFF_UP. + */ + if (count == 0 && team->if_flags & IFF_UP) + dpdk_eth_if_start_port(team); + +out: + if_enable_poll(team->if_port); + return rv; +} + +static int dpdk_lag_member_delete(struct ifnet *team, struct ifnet *ifp) +{ + portid_t members[LAG_MAX_MEMBERS]; + int count, rv; + + count = rte_eth_bond_slaves_get(team->if_port, members, + LAG_MAX_MEMBERS); + if (count < 0) + return -EINVAL; + + /* access to bonding "internals" structure is not thread-safe */ + if_disable_poll_rcu(team->if_port); + + rv = member_remove(team, ifp); + if (rv < 0) + goto out; + + /* we just remove the last port, so lets stop polling */ + if (count == 1) { + dpdk_eth_if_stop_port(team); + return rv; + } + +out: + if_enable_poll(team->if_port); + return rv; +} + +/* Add interface to an aggregation or update an existing member interface */ +static int +dpdk_lag_nl_member_update(const struct ifinfomsg *ifi, struct ifnet *ifp, + struct ifnet *team) +{ + if (ifp == NULL) + return -1; + + if ((!ifp->aggregator && team) || (ifp->aggregator && !team)) { + /* team was either set or cleared */ + dpdk_lag_member_sync_mac_address(ifp); + } else { + /* if link up, restore collect/dist flags */ + if (ifi->ifi_flags & IFF_RUNNING) { + dpdk_lag_select(ifp, enabled[ifp->if_port]); + dpdk_lag_member_sync_mac_address(ifp); + } + } + + return 0; +} + +static void dpdk_lag_refresh_actor_state(struct ifnet *team) +{ + portid_t members[LAG_MAX_MEMBERS]; + int count, i; + + count = rte_eth_bond_slaves_get(team->if_port, members, + LAG_MAX_MEMBERS); + + for (i = 0; i < count; i++) + dpdk_lag_select(ifport_table[members[i]], enabled[members[i]]); +} + +static const char * const bonding_modes[] = { + [BONDING_MODE_ROUND_ROBIN] = "Round Robin", + [BONDING_MODE_ACTIVE_BACKUP] = "Active-Backup", + [BONDING_MODE_BALANCE] = "Balanced", + [BONDING_MODE_BROADCAST] = "Broadcast", + [BONDING_MODE_8023AD] = "802.3AD", + [BONDING_MODE_TLB] = "Adaptive Transmit", + [BONDING_MODE_ALB] = "Adaptive Load Balance", +}; + +static const char * const policy_names[] = { + [BALANCE_XMIT_POLICY_LAYER2] = "BALANCE_XMIT_POLICY_LAYER2", + [BALANCE_XMIT_POLICY_LAYER23] = "BALANCE_XMIT_POLICY_LAYER23", + [BALANCE_XMIT_POLICY_LAYER34] = "BALANCE_XMIT_POLICY_LAYER34" +}; + + +static bool lag_member_is_active(const portid_t active[], int len, uint16_t + portid) +{ + int i; + + for (i = 0; i < len; i++) + if (active[i] == portid) + return true; + return false; +} + +static void dpdk_lag_show_detail(struct ifnet *node, json_writer_t *wr) +{ + int num_members; + int num_active; + int i; + int primary = rte_eth_bond_primary_get(node->if_port); + int mode = rte_eth_bond_mode_get(node->if_port); + int policy = rte_eth_bond_xmit_policy_get(node->if_port); + const char *policy_str = "n/a"; + portid_t members[LAG_MAX_MEMBERS]; + portid_t active[LAG_MAX_MEMBERS]; + + jsonw_start_object(wr); + jsonw_string_field(wr, "ifname", node->if_name); + jsonw_uint_field(wr, "teamdev", + node->if_team ? node->if_index : 0); + jsonw_bool_field(wr, "lacp", !!(mode == BONDING_MODE_8023AD)); + jsonw_string_field(wr, "mode", + mode >= 0 ? bonding_modes[mode] : "Unknown"); + + if (mode == BONDING_MODE_8023AD && policy >= 0 && + policy < (int)ARRAY_SIZE(policy_names)) + policy_str = policy_names[policy]; + jsonw_string_field(wr, "hash", policy_str); + + num_active = rte_eth_bond_active_slaves_get(node->if_port, + active, + LAG_MAX_MEMBERS); + num_members = rte_eth_bond_slaves_get(node->if_port, members, + LAG_MAX_MEMBERS); + jsonw_name(wr, "members"); + jsonw_start_array(wr); + for (i = 0; i < num_members; i++) { + struct ifnet *sl = ifnet_byport(members[i]); + struct rte_eth_bond_8023ad_slave_info info; + int rc; + + if (!sl) + continue; + + bool is_primary = primary == sl->if_port; + bool is_active = lag_member_is_active(active, num_active, + sl->if_port); + jsonw_start_object(wr); + jsonw_string_field(wr, "ifname", sl->if_name); + jsonw_bool_field(wr, "primary", is_primary); + jsonw_bool_field(wr, "active", is_active); + if (mode == BONDING_MODE_8023AD) { + rc = rte_eth_bond_8023ad_slave_info(node->if_port, + sl->if_port, &info); + if (rc == 0) { + jsonw_name(wr, "802-3ad"); + jsonw_start_array(wr); + jsonw_start_object(wr); + jsonw_int_field(wr, "selected", + info.selected); + jsonw_int_field(wr, "actor-state", + info.actor_state); + jsonw_int_field(wr, "partner-state", + info.partner_state); + jsonw_int_field(wr, "agg-port-id", + info.agg_port_id); + jsonw_end_object(wr); + jsonw_end_array(wr); + } + } + + jsonw_end_object(wr); + } + jsonw_end_array(wr); + jsonw_end_object(wr); +} + +static int +dpdk_lag_walk_team_members(struct ifnet *ifp, dp_ifnet_iter_func_t iter_func, + void *arg) +{ + int num_members; + portid_t members[LAG_MAX_MEMBERS]; + int i; + + if (!ifp->if_team || !iter_func) + return -EINVAL; + + num_members = rte_eth_bond_slaves_get(ifp->if_port, members, + LAG_MAX_MEMBERS); + if (num_members < 0) + return -EINVAL; + + for (i = 0; i < num_members; i++) { + struct ifnet *sl = ifnet_byport(members[i]); + + if (sl) + (iter_func)(sl, arg); + } + + return 0; +} + +static bool +dpdk_lag_is_team(struct ifnet *ifp) +{ + struct rte_eth_dev_info dev_info = { 0 }; + int rc; + + rc = rte_eth_dev_info_get(ifp->if_port, &dev_info); + if (rc || dev_info.driver_name == NULL) + return false; + + DP_DEBUG(INIT, DEBUG, DATAPLANE, + "%d:%s dev_info.driver_name %s\n", + ifp->if_index, ifp->if_name, dev_info.driver_name); + + return strstr(dev_info.driver_name, BOND_DEV_NAME) != NULL; +} + +static bool +dpdk_lag_can_startstop_member(struct ifnet *ifp) +{ + return !ifp->aggregator; +} + +static int +dpdk_lag_set_l2_address(struct ifnet *ifp, struct rte_ether_addr *macaddr) +{ + return rte_eth_bond_mac_address_set( + ifp->if_port, macaddr); +} + +const struct lag_ops dpdk_lag_ops = { + .lagop_etype_slow_tx = dpdk_lag_etype_slow_tx, + .lagop_member_sync_mac_address = dpdk_lag_member_sync_mac_address, + .lagop_create = dpdk_lag_create, + .lagop_mode_set_balance = dpdk_lag_mode_set_balance, + .lagop_mode_set_activebackup = dpdk_lag_mode_set_activebackup, + .lagop_select = dpdk_lag_select, + .lagop_set_activeport = dpdk_lag_set_activeport, + .lagop_delete = dpdk_lag_delete, + .lagop_can_start = dpdk_lag_can_start, + .lagop_member_add = dpdk_lag_member_add, + .lagop_member_delete = dpdk_lag_member_delete, + .lagop_nl_member_update = dpdk_lag_nl_member_update, + .lagop_refresh_actor_state = dpdk_lag_refresh_actor_state, + .lagop_show_detail = dpdk_lag_show_detail, + .lagop_walk_team_members = dpdk_lag_walk_team_members, + .lagop_is_team = dpdk_lag_is_team, + .lagop_can_startstop_member = dpdk_lag_can_startstop_member, + .lagop_set_l2_address = dpdk_lag_set_l2_address, +}; diff --git a/src/if/dpdk-eth/fal_lag.c b/src/if/dpdk-eth/fal_lag.c new file mode 100644 index 00000000..17873634 --- /dev/null +++ b/src/if/dpdk-eth/fal_lag.c @@ -0,0 +1,744 @@ +/*- + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dpdk_eth_if.h" +#include "dp_event.h" +#include "ether.h" +#include "fal.h" +#include "if_var.h" +#include "lag.h" +#include "controller.h" +#include "util.h" +#include "vplane_debug.h" +#include "vplane_log.h" + +struct lag_member_state { + /* Indicates whether Teamd has selected the member for LAG */ + bool enabled; + /* + * Indicates whether the member is usable based on fault detection + * protocols or not. Assume innocent till proven guilty + */ + bool usable; + bool tx_hash_enabled; +}; + +static struct lag_member_state fal_lag_member_enabled[LAG_MAX_MEMBERS]; + +static int fal_lag_member_delete(struct ifnet *team_ifp, struct ifnet *ifp); + +static bool +fal_lag_can_create_in_fal(struct ifnet *ifp) +{ + /* + * LAG supersedes bridging, and the latter is expected to + * remove itself just prior to the FAL notifications to add the + * LAG member. So if there are no other embellished features, + * then the LAG can be created. + */ + return !if_check_any_except_emb_feat(ifp, IF_EMB_FEAT_LAG_MEMBER | + IF_EMB_FEAT_BRIDGE_MEMBER); +} + +static int +fal_lag_etype_slow_tx(struct ifnet *bond __unused, struct ifnet *ifp, + struct rte_mbuf *lacp_pkt) +{ + if_output(ifp, lacp_pkt, NULL, ntohs(ethhdr(lacp_pkt)->ether_type)); + + return 0; +} + +static void +fal_lag_member_sync_mac_address(struct ifnet *ifp __unused) +{ + /* not required */ +} + +static struct ifnet * +fal_lag_create(const struct ifinfomsg *ifi, struct nlattr *tb[]) +{ + struct swport_dev_info swport_dev_info; + struct rte_eth_dev_info dev_info; + struct rte_ether_addr *macaddr = NULL; + struct dpdk_eth_if_softc *sc; + fal_object_t fal_lag_obj; + portid_t dpdk_port; + const char *ifname; + struct ifnet *ifp; + int ret; + + if (tb[IFLA_IFNAME]) + ifname = mnl_attr_get_str(tb[IFLA_IFNAME]); + else { + RTE_LOG(ERR, DATAPLANE, + "FAL-LAG: missing name for %u\n", + ifi->ifi_index); + return NULL; + } + + if (tb[IFLA_ADDRESS]) { + size_t addrlen = mnl_attr_get_payload_len(tb[IFLA_ADDRESS]); + + if (addrlen == RTE_ETHER_ADDR_LEN) + macaddr = mnl_attr_get_payload(tb[IFLA_ADDRESS]); + } + if (!macaddr) { + RTE_LOG(ERR, DATAPLANE, + "FAL-LAG: missing MAC address for %s\n", + ifname); + return NULL; + } + + ret = fal_create_lag(0, NULL, &fal_lag_obj); + if (ret < 0) { + RTE_LOG(ERR, DATAPLANE, + "unable to create FAL LAG interface for %s: %s\n", + ifname, strerror(-ret)); + return NULL; + } + + /* + * Find the DPDK port ID for the LAG interface just created in + * the FAL. We do it this way (rather than the port ID being + * returned from the FAL call) to lessen the need for the FAL + * API needing to be aware of there being a DPDK PMD backing + * for certain interfaces. + */ + for (dpdk_port = 0; dpdk_port < DATAPLANE_MAX_PORTS; dpdk_port++) { + if (!rte_eth_dev_is_valid_port(dpdk_port)) + continue; + rte_eth_dev_info_get(dpdk_port, &dev_info); + if (strcmp(dev_info.driver_name, "net_sw_port") != 0) + continue; + if (sw_port_get_dev_info(dpdk_port, &swport_dev_info) < 0) + continue; + + if (swport_dev_info.fal_obj == fal_lag_obj) + break; + } + if (dpdk_port == DATAPLANE_MAX_PORTS) { + RTE_LOG(ERR, DATAPLANE, + "unable to find sw_port PMD instance for FAL LAG interface %s with object 0x%lx\n", + ifname, fal_lag_obj); + goto del_fal_lag; + } + + /* + * Set the MAC address so when the ifp is created it will be + * filled in with the correct MAC address + */ + rte_eth_dev_default_mac_addr_set(dpdk_port, macaddr); + + /* create ifp and set it up */ + if (insert_port(dpdk_port) != 0) { + RTE_LOG(ERR, DATAPLANE, + "insert port for FAL LAG interface %s failed\n", + ifname); + goto del_fal_lag; + } + + ifp = dpdk_eth_if_alloc_w_port(ifname, ifi->ifi_index, dpdk_port); + if (!ifp) + goto del_rem_port; + sc = ifp->if_softc; + sc->scd_fal_port_lag_obj = fal_lag_obj; + CDS_INIT_LIST_HEAD(&sc->scd_fal_lag_members_head); + + ifp->hw_forwarding = true; + + return ifp; + +del_rem_port: + remove_port(dpdk_port); + +del_fal_lag: + ret = fal_delete_lag(fal_lag_obj); + if (ret < 0) + RTE_LOG(ERR, DATAPLANE, + "unable to delete FAL LAG interface for %s during failed create cleanup: %s\n", + ifname, strerror(-ret)); + return NULL; +} + +static int +fal_lag_mode_set_balance(struct ifnet *ifp __unused) +{ + /* ignored - there is no difference in our treatment of these modes */ + return 0; +} + +static int +fal_lag_mode_set_activebackup(struct ifnet *ifp __unused) +{ + /* ignored - there is no difference in our treatment of these modes */ + return 0; +} + +static int +fal_lag_min_links(struct ifnet *ifp, uint16_t *min_links) +{ + struct dpdk_eth_if_softc *sc; + + if (ifp->if_type != IFT_ETHER) + return -ENOTSUP; + + sc = rcu_dereference(ifp->if_softc); + if (!sc || !sc->has_min_links) + return -EINVAL; + *min_links = sc->min_links; + return 0; +} + +static int +fal_lag_get_usable_member_count(struct ifnet *team_ifp) +{ + struct dpdk_eth_if_softc *member_sc; + struct dpdk_eth_if_softc *sc; + struct ifnet *member_ifp; + int count = 0; + + sc = team_ifp->if_softc; + + cds_list_for_each_entry_rcu(member_sc, + &sc->scd_fal_lag_members_head, + scd_fal_lag_member_link) { + member_ifp = member_sc->scd_ifp; + if (fal_lag_member_enabled[member_ifp->if_port].enabled && + fal_lag_member_enabled[member_ifp->if_port].usable) + count++; + } + return count; +} + +static int +fal_lag_set_member_tx_hash_state(struct ifnet *ifp, bool tx_hash_enable) +{ + struct dpdk_eth_if_softc *member_sc; + struct fal_attribute_t attr_list[] = { + { + .id = FAL_LAG_MEMBER_ATTR_EGRESS_DISABLE, + .value.booldata = !tx_hash_enable, + }, + { + .id = FAL_LAG_MEMBER_ATTR_INGRESS_DISABLE, + .value.booldata = !tx_hash_enable, + }, + }; + unsigned int i; + int ret; + + member_sc = ifp->if_softc; + + /* not yet created so nothing to do */ + if (!member_sc->scd_fal_lag_member_created) + return 0; + + for (i = 0; i < ARRAY_SIZE(attr_list); i++) { + ret = fal_set_lag_member_attr( + member_sc->scd_fal_lag_member_obj, &attr_list[i]); + if (ret < 0) { + RTE_LOG(ERR, DATAPLANE, + "%s: member %s attr %d to %s failed: %s\n", + __func__, ifp->if_name, attr_list[i].id, + tx_hash_enable ? "enabled" : "disabled", + strerror(-ret)); + return -1; + } + } + fal_lag_member_enabled[ifp->if_port].tx_hash_enabled = tx_hash_enable; + return 0; +} + +static int +fal_lag_set_all_member_tx_hash_state(struct ifnet *team_ifp, + bool tx_hash_enable) +{ + struct dpdk_eth_if_softc *member_sc; + struct dpdk_eth_if_softc *sc; + struct ifnet *m_ifp; + int count = 0; + int ret; + + sc = team_ifp->if_softc; + + cds_list_for_each_entry_rcu(member_sc, + &sc->scd_fal_lag_members_head, + scd_fal_lag_member_link) { + + m_ifp = member_sc->scd_ifp; + if (fal_lag_member_enabled[m_ifp->if_port].tx_hash_enabled == + tx_hash_enable) + continue; + + ret = fal_lag_set_member_tx_hash_state(m_ifp, tx_hash_enable); + if (ret < 0) { + RTE_LOG(ERR, DATAPLANE, + "%s: Failed to %sable tx hash state for member %s on LAG %s\n", + __func__, tx_hash_enable ? "En" : "Dis", + m_ifp->if_name, team_ifp->if_name); + return ret; + } + count++; + } + RTE_LOG(INFO, DATAPLANE, + "%s:%sabled tx hash state for %d member(s) on LAG %s\n", + __func__, tx_hash_enable ? "En" : "Dis", + count, team_ifp->if_name); + return 0; +} + +static int +fal_lag_set_member_state(struct ifnet *team_ifp, struct ifnet *ifp, + bool enable, bool usable) +{ + bool tx_hash_enable; + uint16_t min_links = 0; + int old_count; + + old_count = fal_lag_get_usable_member_count(team_ifp); + + fal_lag_member_enabled[ifp->if_port].enabled = enable; + fal_lag_member_enabled[ifp->if_port].usable = usable; + + tx_hash_enable = (enable && usable); + + if (fal_lag_member_enabled[ifp->if_port].tx_hash_enabled == + tx_hash_enable) + return 0; + + if (fal_lag_min_links(team_ifp, &min_links) < 0) + return fal_lag_set_member_tx_hash_state(ifp, tx_hash_enable); + + if (old_count < min_links) { + if (tx_hash_enable) + /* Enable all */ + return fal_lag_set_all_member_tx_hash_state( + team_ifp, tx_hash_enable); + } else if (old_count == min_links) { + if (!tx_hash_enable) + /* Disable all */ + return fal_lag_set_all_member_tx_hash_state( + team_ifp, tx_hash_enable); + } + return fal_lag_set_member_tx_hash_state(ifp, tx_hash_enable); +} + +static int +fal_lag_select(struct ifnet *ifp, bool enable) +{ + struct ifnet *team_ifp; + + team_ifp = ifp->aggregator; + if (ifp->if_type != IFT_ETHER || !team_ifp) + return -1; + + if (fal_lag_member_enabled[ifp->if_port].enabled == enable) + return 0; + + DP_DEBUG(LAG, DEBUG, DATAPLANE, + "teamd runner %sselected ifindex %d:%s (port %u)\n", + enable ? "" : "de", ifp->if_index, ifp->if_name, ifp->if_port); + + return fal_lag_set_member_state( + team_ifp, ifp, enable, + fal_lag_member_enabled[ifp->if_port].usable); +} + +static int +fal_lag_set_activeport(struct ifnet *ifp __unused, + struct ifnet *ifp_member __unused) +{ + /* + * ignored - we use the selected indication instead for all of + * active/backup, balanced and LACP config modes. + */ + return 0; +} + +static void +fal_lag_delete(struct ifnet *team_ifp) +{ + struct dpdk_eth_if_softc *member_sc, *tmp; + struct dpdk_eth_if_softc *sc; + fal_object_t fal_lag_obj; + char ifname[IFNAMSIZ]; + portid_t dpdk_port; + int ret; + + dpdk_port = team_ifp->if_port; + sc = team_ifp->if_softc; + + /* Delete all the members first */ + cds_list_for_each_entry_safe(member_sc, tmp, + &sc->scd_fal_lag_members_head, + scd_fal_lag_member_link) { + RTE_LOG(INFO, DATAPLANE, + "Removing member %s from LAG %s as part of LAG delete\n", + member_sc->scd_ifp->if_name, team_ifp->if_name); + ret = fal_lag_member_delete(team_ifp, + member_sc->scd_ifp); + if (ret < 0) + return; + } + + /* cache fields before delete */ + fal_lag_obj = sc->scd_fal_port_lag_obj; + snprintf(ifname, sizeof(ifname), "%s", team_ifp->if_name); + + if_free(team_ifp); + remove_port(dpdk_port); + + ret = fal_delete_lag(fal_lag_obj); + if (ret < 0) + RTE_LOG(ERR, DATAPLANE, + "unable to delete FAL LAG interface for %s: %s\n", + ifname, strerror(-ret)); +} + +static bool +fal_lag_can_start(const struct ifnet *ifp __unused) +{ + return true; +} + +static int +fal_lag_member_apply(struct ifnet *ifp) +{ + struct dpdk_eth_if_softc *member_sc; + struct dpdk_eth_if_softc *sc; + struct fal_attribute_t attr_list[] = { + { + .id = FAL_LAG_MEMBER_ATTR_IFINDEX, + .value.u32 = ifp->if_index, + }, + { + .id = FAL_LAG_MEMBER_ATTR_LAG_ID, + .value.objid = 0, + }, + { + .id = FAL_LAG_MEMBER_ATTR_EGRESS_DISABLE, + .value.booldata = + !(fal_lag_member_enabled[ifp->if_port].enabled && + fal_lag_member_enabled[ifp->if_port].usable), + }, + { + .id = FAL_LAG_MEMBER_ATTR_INGRESS_DISABLE, + .value.booldata = + !(fal_lag_member_enabled[ifp->if_port].enabled && + fal_lag_member_enabled[ifp->if_port].usable), + }, + }; + int ret; + + sc = ifp->aggregator->if_softc; + member_sc = ifp->if_softc; + + if (member_sc->scd_fal_lag_member_created) + return 0; + + attr_list[1].value.objid = sc->scd_fal_port_lag_obj; + + ret = fal_create_lag_member(ARRAY_SIZE(attr_list), attr_list, + &member_sc->scd_fal_lag_member_obj); + if (ret < 0) { + RTE_LOG(ERR, DATAPLANE, + "failed to add FAL member interface %s to FAL LAG interface %s: %s\n", + ifp->if_name, ifp->aggregator->if_name, + strerror(-ret)); + return ret; + } + member_sc->scd_fal_lag_member_created = true; + cds_list_add_tail_rcu(&member_sc->scd_fal_lag_member_link, + &sc->scd_fal_lag_members_head); + + return 0; +} + +static int +fal_lag_member_add(struct ifnet *team_ifp, struct ifnet *ifp) +{ + int ret; + + if (ifp->if_type != IFT_ETHER) + return -EINVAL; + + if (ifp->aggregator) { + /* teamd can give us redundant updates, so this is expected */ + DP_DEBUG(LAG, DEBUG, DATAPLANE, + "%s already a member of %s\n", ifp->if_name, + ifp->aggregator->if_name); + return -EEXIST; + } + + rcu_assign_pointer(ifp->aggregator, team_ifp); + if_notify_emb_feat_change(ifp); + + if (fal_lag_can_create_in_fal(ifp)) { + /* Innocent till proven guilty */ + fal_lag_member_enabled[ifp->if_port].usable = true; + ret = fal_lag_member_apply(ifp); + if (ret < 0) { + rcu_assign_pointer(ifp->aggregator, NULL); + if_notify_emb_feat_change(ifp); + + return ret; + } + } + + return 0; +} + +static int +fal_lag_member_unapply(struct ifnet *ifp) +{ + struct dpdk_eth_if_softc *member_sc; + int ret; + + member_sc = ifp->if_softc; + + if (!member_sc->scd_fal_lag_member_created) + return 0; + + ret = fal_delete_lag_member(member_sc->scd_fal_lag_member_obj); + if (ret < 0) { + RTE_LOG(ERR, DATAPLANE, + "failed to delete FAL member interface %s to FAL LAG interface %s: %s\n", + ifp->if_name, ifp->aggregator->if_name, + strerror(-ret)); + return ret; + } + + member_sc->scd_fal_lag_member_created = false; + cds_list_del_rcu(&member_sc->scd_fal_lag_member_link); + + return 0; +} + +static int +fal_lag_member_delete(struct ifnet *team_ifp __unused, struct ifnet *ifp) +{ + int ret; + + if (ifp->if_type != IFT_ETHER) + return -EINVAL; + + if (!ifp->aggregator) + return -ENOENT; + + ret = fal_lag_member_unapply(ifp); + if (ret < 0) + return ret; + + fal_lag_member_enabled[ifp->if_port].usable = false; + rcu_assign_pointer(ifp->aggregator, NULL); + if_notify_emb_feat_change(ifp); + + return ret; +} + +/* Add interface to an aggregation or update an existing member interface */ +static int +fal_lag_nl_member_update(const struct ifinfomsg *ifi __unused, + struct ifnet *ifp __unused, + struct ifnet *bond __unused) +{ + /* + * Not required, since MAC address syncing from bonding + * interface to member interfaces not done and link flap + * doesn't impact member enabled/disabled state. + */ + return 0; +} + +static void +fal_lag_refresh_actor_state(struct ifnet *bond __unused) +{ + /* + * Not required, since starting/stopping member doesn't impact + * member enabled/disabled state. + */ +} + +static void fal_lag_show_detail(struct ifnet *node, json_writer_t *wr) +{ + struct dpdk_eth_if_softc *sc; + + if (node->if_type != IFT_ETHER) + return; + + sc = node->if_softc; + + jsonw_start_object(wr); + jsonw_string_field(wr, "ifname", node->if_name); + jsonw_uint_field(wr, "teamdev", node->if_index); + if (sc->has_min_links) + jsonw_uint_field(wr, "min-links", sc->min_links); + + jsonw_name(wr, "platform_state"); + jsonw_start_object(wr); + fal_dump_lag(sc->scd_fal_port_lag_obj, wr); + jsonw_end_object(wr); + + jsonw_end_object(wr); +} + +static int +fal_lag_walk_team_members(struct ifnet *ifp __unused, + dp_ifnet_iter_func_t iter_func __unused, + void *arg __unused) +{ + /* not required */ + return 0; +} + +static bool +fal_lag_is_team(struct ifnet *ifp) +{ + struct swport_dev_info swport_dev_info; + struct rte_eth_dev_info dev_info = { 0 }; + int rc; + + rc = rte_eth_dev_info_get(ifp->if_port, &dev_info); + if (rc || dev_info.driver_name == NULL) + return false; + + if (strcmp(dev_info.driver_name, "net_sw_port") != 0) + return false; + + if (sw_port_get_dev_info(ifp->if_port, &swport_dev_info) < 0) + return false; + + return swport_dev_info.is_lag; +} + +static bool +fal_lag_can_startstop_member(struct ifnet *ifp __unused) +{ + /* + * members can be started/stopped independently of bonding + * interface. + */ + return true; +} + +static int +fal_lag_set_l2_address(struct ifnet *ifp, struct rte_ether_addr *macaddr) +{ + return rte_eth_dev_default_mac_addr_set( + ifp->if_port, macaddr); +} + +static int +fal_lag_set_min_links(struct ifnet *ifp, uint16_t min_links) +{ + struct dpdk_eth_if_softc *sc = ifp->if_softc; + + sc->min_links = min_links; + sc->has_min_links = true; + return 0; +} + +static bool +fal_lag_port_is_member(struct ifnet *ifp) +{ + if (ifp->if_type != IFT_ETHER) + return false; + + if (!ifp->aggregator) + return false; + + return true; +} + +static int +fal_lag_set_member_usable(struct ifnet *ifp, bool usable) +{ + struct ifnet *team_ifp; + + team_ifp = ifp->aggregator; + if (ifp->if_type != IFT_ETHER || !team_ifp) + return -1; + + /* If no change then it's a no-op */ + if (fal_lag_member_enabled[ifp->if_port].usable == usable) + return 0; + + DP_DEBUG(LAG, DEBUG, DATAPLANE, + "Member %d/%s signalled as %susable\n", + ifp->if_index, ifp->if_name, usable ? "" : "un"); + + return fal_lag_set_member_state( + team_ifp, ifp, fal_lag_member_enabled[ifp->if_port].enabled, + usable); +} + +const struct lag_ops fal_lag_ops = { + .lagop_etype_slow_tx = fal_lag_etype_slow_tx, + .lagop_member_sync_mac_address = fal_lag_member_sync_mac_address, + .lagop_create = fal_lag_create, + .lagop_mode_set_balance = fal_lag_mode_set_balance, + .lagop_mode_set_activebackup = fal_lag_mode_set_activebackup, + .lagop_select = fal_lag_select, + .lagop_set_member_usable = fal_lag_set_member_usable, + .lagop_set_activeport = fal_lag_set_activeport, + .lagop_delete = fal_lag_delete, + .lagop_can_start = fal_lag_can_start, + .lagop_member_add = fal_lag_member_add, + .lagop_member_delete = fal_lag_member_delete, + .lagop_nl_member_update = fal_lag_nl_member_update, + .lagop_refresh_actor_state = fal_lag_refresh_actor_state, + .lagop_show_detail = fal_lag_show_detail, + .lagop_walk_team_members = fal_lag_walk_team_members, + .lagop_is_team = fal_lag_is_team, + .lagop_can_startstop_member = fal_lag_can_startstop_member, + .lagop_set_l2_address = fal_lag_set_l2_address, + .lagop_min_links = fal_lag_min_links, + .lagop_set_min_links = fal_lag_set_min_links, + .lagop_port_is_member = fal_lag_port_is_member, +}; + +static void +fal_lag_if_feat_mode_change(struct ifnet *ifp, + enum if_feat_mode_event event) +{ + if (!ifp->aggregator || !fal_lag_is_team(ifp->aggregator)) + /* nothing to do */ + return; + + switch (event) { + case IF_FEAT_MODE_EVENT_L2_FAL_ENABLED: + if (fal_lag_can_create_in_fal(ifp)) + fal_lag_member_apply(ifp); + break; + case IF_FEAT_MODE_EVENT_L2_FAL_DISABLED: + fal_lag_member_unapply(ifp); + break; + case IF_FEAT_MODE_EVENT_EMB_FEAT_CHANGED: + if (fal_lag_can_create_in_fal(ifp)) + fal_lag_member_apply(ifp); + else + fal_lag_member_unapply(ifp); + break; + default: + break; + } +} + +static const struct dp_event_ops fal_lag_events = { + .if_feat_mode_change = fal_lag_if_feat_mode_change, +}; + +DP_STARTUP_EVENT_REGISTER(fal_lag_events); diff --git a/src/hotplug.c b/src/if/dpdk-eth/hotplug.c similarity index 71% rename from src/hotplug.c rename to src/if/dpdk-eth/hotplug.c index 69694300..acab121a 100644 --- a/src/hotplug.c +++ b/src/if/dpdk-eth/hotplug.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -11,26 +11,25 @@ #include #include #include -#ifdef HAVE_RTE_ETHDEV_DRIVER_H #include -#endif #include #include #include #include #include #include -#include #include "capture.h" #include "commands.h" #include "compat.h" -#include "event.h" +#include "dpdk_eth_if.h" +#include "event_internal.h" #include "hotplug.h" #include "if_var.h" #include "main.h" -#include "master.h" +#include "controller.h" #include "urcu.h" +#include "util.h" #include "vplane_log.h" sigjmp_buf hotplug_jmpbuf; @@ -47,11 +46,7 @@ enum { /* Teardown a device and detach from the DPDK port. */ int detach_device(const char *name) { -#ifdef HAVE_RTE_DEV_REMOVE struct rte_eth_dev_info dev_info; -#else - char detach_name[RTE_ETH_NAME_MAX_LEN]; -#endif struct ifnet *ifp; portid_t port_id; struct rte_eth_dev *dev; @@ -63,7 +58,7 @@ int detach_device(const char *name) if (dev) port_id = dev->data->port_id; else { - ifp = ifnet_byifname(name); + ifp = dp_ifnet_byifname(name); if (!ifp) { RTE_LOG(NOTICE, DATAPLANE, "detach-device(%s): already unplugged and deleted\n", @@ -87,25 +82,33 @@ int detach_device(const char *name) } ifp = ifport_table[port_id]; - if (!ifp) { - RTE_LOG(ERR, DATAPLANE, - "detach-device(%s): no ifp for port id %d\n", - name, port_id); - return -1; - } - CMM_STORE_SHARED(hotplug_inprogress, true); - if (sigsetjmp(hotplug_jmpbuf, 1)) - RTE_LOG(DEBUG, DATAPLANE, - "%s: stop_port() failed!\n", __func__); - else - stop_port(port_id); + if (ifp) { + /* + * The following calls (unassign_queues and + * dpdk_eth_if_stop) both call dp_rcu_synchronize(), and + * setting unplugged needs to be before that call. + */ + ifp->unplugged = 1; + if (sigsetjmp(hotplug_jmpbuf, 1)) { + RTE_LOG(DEBUG, DATAPLANE, + "%s: stop_port() failed!\n", __func__); + + /* if all else fails at least unassign queues */ + unassign_queues(ifp->if_port); + } else + dpdk_eth_if_stop_port(ifp); + + if_notify_emb_feat_change(ifp); + + capture_cancel(ifp); + } - ifp->unplugged = 1; - capture_cancel(ifp); - eth_port_uninit_portid(port_id); teardown_interface_portid(port_id); - ifport_table[port_id] = NULL; + shadow_uninit_port(port_id); + remove_port(port_id); + + rte_eth_dev_info_get(port_id, &dev_info); if (sigsetjmp(hotplug_jmpbuf, 1)) { RTE_LOG(DEBUG, DATAPLANE, @@ -117,12 +120,7 @@ int detach_device(const char *name) RTE_LOG(DEBUG, DATAPLANE, "rte_eth_dev_detach() failed!\n"); else { -#ifdef HAVE_RTE_DEV_REMOVE - rte_eth_dev_info_get(port_id, &dev_info); if (rte_dev_remove(dev_info.device) != 0) { -#else - if (rte_eth_dev_detach(port_id, detach_name) != 0) { -#endif RTE_LOG(ERR, DATAPLANE, "detach-device(%u): detach failed\n", port_id); ret = -1; @@ -151,72 +149,32 @@ int attach_device(const char *name) return 0; } -#ifdef HAVE_RTE_DEV_PROBE if (rte_dev_probe(name) != 0) { -#else - if (rte_eth_dev_attach(name, &port_id) != 0) { -#endif RTE_LOG(ERR, DATAPLANE, "attach-device(%s): attach failed\n", name); return -1; } -#ifdef HAVE_RTE_DEV_PROBE struct rte_dev_iterator iterator; - RTE_ETH_FOREACH_MATCHING_DEV(port_id, name, &iterator) -#endif - rv |= insert_port(port_id); - - if (rv != 0) - RTE_LOG(ERR, DATAPLANE, - "attach-device(%s): failed to insert port\n", - name); - return rv; -} - -/* - * Add or remove a flag from the effective capability set. - * Note the flag must already be present in the permitted set. - */ -static int -change_capability(cap_value_t capability, bool on) -{ - cap_t caps; - cap_value_t cap_flag[1]; - int rc; - - if (!cap_valid(capability)) { - RTE_LOG(ERR, DATAPLANE, - "Invalid capability %d\n", capability); - return -1; - } - - caps = cap_get_proc(); - if (caps == NULL) { - RTE_LOG(ERR, DATAPLANE, - "Failed to get current capabilities\n"); - return -1; - } - - cap_flag[0] = capability; - rc = cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_flag, - on ? CAP_SET : CAP_CLEAR); - if (rc < 0) { - RTE_LOG(ERR, DATAPLANE, - "Failed to %s flag for capability %d\n", - on ? "set" : "clear", capability); - goto out; + RTE_ETH_FOREACH_MATCHING_DEV(port_id, name, &iterator) { + rv = insert_port(port_id); + if (rv) { + RTE_LOG(ERR, DATAPLANE, + "attach-device(%s): failed to insert port %u\n", + name, port_id); + break; + } + rv = setup_interface_portid(port_id); + if (rv != 0) { + RTE_LOG(ERR, DATAPLANE, + "attach-device(%s): cannot setup interface (port %u)\n", + name, port_id); + remove_port(port_id); + break; + } } - rc = cap_set_proc(caps); - if (rc < 0) - RTE_LOG(ERR, DATAPLANE, - "Failed to %s capability %d\n", - on ? "enable" : "disable", capability); - -out: - cap_free(caps); - return rc; + return rv; } /* Handle device add/remove events. */ @@ -269,7 +227,7 @@ handle_device_event(void *arg) return zsock_signal(sock, 0); } -/* Send device add/remove to the master thread. */ +/* Send device add/remove to the main thread. */ int send_device_event(const char *name, bool is_add) { @@ -320,13 +278,13 @@ void device_server_init(void) if (!dev_server) rte_panic("cannot bind to vhost socket\n"); - register_event_socket(zsock_resolve(dev_server), + dp_register_event_socket(zsock_resolve(dev_server), handle_device_event, dev_server); } void device_server_destroy(void) { - unregister_event_socket(zsock_resolve(dev_server)); + dp_unregister_event_socket(zsock_resolve(dev_server)); zsock_destroy(&dev_server); } @@ -358,9 +316,9 @@ int cmd_hotplug(FILE *f, int argc, char **argv) return -1; } - rcu_thread_offline(); + dp_rcu_thread_offline(); rc = send_device_event(argv[2], insert); - rcu_thread_online(); + dp_rcu_thread_online(); return rc; } diff --git a/src/hotplug.h b/src/if/dpdk-eth/hotplug.h similarity index 79% rename from src/hotplug.h rename to src/if/dpdk-eth/hotplug.h index 20272367..fa1fd14e 100644 --- a/src/hotplug.h +++ b/src/if/dpdk-eth/hotplug.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. + * Copyright (c) 2019, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only diff --git a/src/vhost.c b/src/if/dpdk-eth/vhost.c similarity index 72% rename from src/vhost.c rename to src/if/dpdk-eth/vhost.c index d5375943..1ab6511e 100644 --- a/src/vhost.c +++ b/src/if/dpdk-eth/vhost.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. + * Copyright (c) 2019-2020, AT&T Intellectual Property. * All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -22,7 +23,7 @@ #include #include -#include "config.h" +#include "config_internal.h" #include "dpdk_eth_if.h" #include "dp_event.h" #include "hotplug.h" @@ -35,6 +36,16 @@ #include "vplane_debug.h" #include "vplane_log.h" +#define QMP_RETURN_BUFSIZE 200 + +struct vhost_info_private { + struct rcu_head sc_rcu; /**< Linkage for call_rcu */ + struct cds_list_head list; /**< Linkage for vhost_info_private_list */ + char name[IFNAMSIZ]; /**< DPDK instance name */ + char *qmp_path; /**< Path to QMP connection */ + char *qemu_ifname; /**< QEMU name for guest interface */ +}; + struct vhost_transport { struct rcu_head vt_rcu; /**< Linkage for call_rcu */ struct cds_list_head list; @@ -42,13 +53,13 @@ struct vhost_transport { }; struct vhost_info { - struct rcu_head sc_rcu; /**< Linkage for call_rcu */ - char *qmp_path; /**< Path to QMP connection */ - char *qemu_ifname; /**< QEMU name for guest interface */ struct cds_list_head transport_links; /**< Monitored interfaces -- if any */ }; +static struct cds_list_head vhost_info_private_list = + CDS_LIST_HEAD_INIT(vhost_info_private_list); + /* * Vhost event queue */ @@ -60,7 +71,6 @@ struct vhost_event { static rte_spinlock_t vhost_ev_list_lock = RTE_SPINLOCK_INITIALIZER; struct vhost_event_list vhost_ev_list; -static struct cfg_if_list *vhost_cfg_list; /** * Check to see if an ifp is a vhost interface by examining @@ -94,6 +104,47 @@ static struct vhost_info *get_vhost_info(const struct ifnet *ifp) return rcu_dereference(sc->scd_vhost_info); } +/** + * Get struct vhost_info_private from a vhost ifp. Note that we + * might need to strip the dataplane prefix. + */ +static struct vhost_info_private *vhost_info_by_name(const char *if_name) +{ + struct vhost_info_private *vip; + const char *name; + + name = strstr(if_name, "vhost"); + if (!name) + return NULL; + + cds_list_for_each_entry(vip, &vhost_info_private_list, list) { + if (!strcmp(name, vip->name)) + return vip; + } + + return NULL; +} + +static ssize_t read_timeout(int fd, void *buf, size_t count) +{ + struct pollfd poll_fds[1]; + /* Responses are typically << 100ms, use 500ms to be safe */ + int timeout = 500; + int rc; + + poll_fds[0].fd = fd; + poll_fds[0].events = POLLIN; + + rc = poll(poll_fds, 1, timeout); + if (rc < 1) { + if (!rc) + RTE_LOG(ERR, DATAPLANE, "timeout talking to QMP\n"); + return -1; + } + + return read(fd, buf, count); +} + /** * Send cmd via QEMU Machine Protocol (QMP). */ @@ -102,6 +153,7 @@ static void vhost_qmp_command(const char *path, const char *cmd) int sock; struct sockaddr_un server; const char *cmd_mode = "{ \"execute\": \"qmp_capabilities\" }"; + char buf[QMP_RETURN_BUFSIZE]; ssize_t len; sock = socket(AF_UNIX, SOCK_STREAM, 0); @@ -121,14 +173,39 @@ static void vhost_qmp_command(const char *path, const char *cmd) goto done; } + /* Read the initial server message. + * See https://wiki.qemu.org/Documentation/QMP for details. + */ + len = read_timeout(sock, buf, sizeof(buf)); + if (len < 0) { + DP_DEBUG(VHOST, DEBUG, DATAPLANE, + "%s: read(%s, ...) failed during capability negotiation.\n", + __func__, path); + goto done; + } + + /* Exit capability negotiation and enter command mode. */ len = write(sock, cmd_mode, strlen(cmd_mode)); if (len < 0) DP_DEBUG(VHOST, INFO, DATAPLANE, "%s: write(cmd_mode) failed\n", __func__); + len = read_timeout(sock, buf, sizeof(buf)); + if (len < 0) { + DP_DEBUG(VHOST, DEBUG, DATAPLANE, + "%s: read(%s, ...) failed entering command mode.\n", + __func__, path); + goto done; + } + len = write(sock, cmd, strlen(cmd)); if (len < 0) DP_DEBUG(VHOST, INFO, DATAPLANE, "%s: write(set_link) failed\n", __func__); + len = read_timeout(sock, buf, sizeof(buf)); + if (len < 0) + DP_DEBUG(VHOST, DEBUG, DATAPLANE, + "%s: read(%s, ...) failed after sending command.\n", + __func__, path); done: close(sock); @@ -141,43 +218,98 @@ static int vhost_set_link_state(struct ifnet *ifp, bool up) { #define SET_LINK_CMD_STR "{ \"execute\": \"set_link\", " \ "\"arguments\": { \"name\": \"%s\", \"up\" : %s } }" - struct vhost_info *vi; + struct vhost_info_private *vip; char set_link[sizeof(SET_LINK_CMD_STR) + 32 + sizeof("false") + 1]; - vi = get_vhost_info(ifp); - if (!vi || !vi->qmp_path || !vi->qemu_ifname) + vip = vhost_info_by_name(ifp->if_name); + if (!vip || !vip->qmp_path || !vip->qemu_ifname) return -EINVAL; snprintf(set_link, sizeof(set_link), SET_LINK_CMD_STR, - vi->qemu_ifname, up ? "true" : "false"); + vip->qemu_ifname, up ? "true" : "false"); - vhost_qmp_command(vi->qmp_path, set_link); + vhost_qmp_command(vip->qmp_path, set_link); return 0; } -static int vhost_info_alloc(struct ifnet *ifp) +/** + * Create private data for each vhost interface to hold the + * QEMU information. + */ +static struct vhost_info_private *vhost_info_private_create(char *name) +{ + struct vhost_info_private *vip; + + vip = calloc(1, sizeof(*vip)); + if (!vip) + return NULL; + + CDS_INIT_LIST_HEAD(&vip->list); + snprintf(vip->name, IFNAMSIZ, "%s", name); + cds_list_add_tail_rcu(&vip->list, &vhost_info_private_list); + + return vip; +} + +/** + * RCU callback that finally frees the vhost private info. + */ +static void vhost_info_private_free(struct rcu_head *head) +{ + struct vhost_info_private *vip = + caa_container_of(head, struct vhost_info_private, sc_rcu); + + free(vip->qmp_path); + free(vip->qemu_ifname); + free(vip); +} + +/** + * Delete the vhost private information from global list. + * and schedule final free after next RCU. + */ +static void vhost_info_private_delete(char *name) +{ + struct vhost_info_private *vi, *next; + + cds_list_for_each_entry_safe(vi, next, &vhost_info_private_list, list) { + if (!strcmp(name, vi->name)) { + cds_list_del_rcu(&vi->list); + call_rcu(&vi->sc_rcu, vhost_info_private_free); + } + } +} + +/** + * Allocate the vhost_info used by transport-link logic. + */ +static int vhost_info_alloc(const struct ifnet *ifp) { struct dpdk_eth_if_softc *sc = ifp->if_softc; struct vhost_info *vi; + if (sc->scd_vhost_info) + return 0; + vi = zmalloc_aligned(sizeof(*vi)); if (!vi) - return -1; + return -ENOMEM; CDS_INIT_LIST_HEAD(&vi->transport_links); rcu_assign_pointer(sc->scd_vhost_info, vi); return 0; } +/** + * Free the vhost_info associated with the softc. Called when netlink + * indicates an interface is going away. + */ void vhost_info_free(struct vhost_info *vi) { struct vhost_transport *entry, *next; - free(vi->qmp_path); - free(vi->qemu_ifname); cds_list_for_each_entry_safe(entry, next, &vi->transport_links, list) free(entry); - free(vi); } @@ -185,23 +317,27 @@ void vhost_devinfo(json_writer_t *wr, const struct ifnet *ifp) { struct vhost_info *vi; struct vhost_transport *entry; + struct vhost_info_private *vip; - vi = get_vhost_info(ifp); - if (vi) { - if (vi->qmp_path) - jsonw_string_field(wr, "qmp_path", vi->qmp_path); - if (vi->qemu_ifname) + vip = vhost_info_by_name(ifp->if_name); + if (vip) { + if (vip->qmp_path) + jsonw_string_field(wr, "qmp_path", vip->qmp_path); + if (vip->qemu_ifname) jsonw_string_field(wr, "qemu_ifname", - vi->qemu_ifname); - jsonw_name(wr, "transport_links"); - jsonw_start_array(wr); + vip->qemu_ifname); + } + + jsonw_name(wr, "transport_links"); + jsonw_start_array(wr); + vi = get_vhost_info(ifp); + if (vi) cds_list_for_each_entry(entry, &vi->transport_links, list) jsonw_string(wr, entry->ifname); - jsonw_end_array(wr); - } + jsonw_end_array(wr); } -static int cmd_vhost_disable(char *ifname, bool on_master) +static int cmd_vhost_disable(char *ifname, bool on_main) { int rc; char *devargs_p; @@ -215,12 +351,14 @@ static int cmd_vhost_disable(char *ifname, bool on_master) if (size == -1) return -1; - if (on_master) { + vhost_info_private_delete(ifname); + + if (on_main) { rc = detach_device(devargs_p); } else { - rcu_thread_offline(); + dp_rcu_thread_offline(); rc = send_device_event(devargs_p, false); - rcu_thread_online(); + dp_rcu_thread_online(); } free(devargs_p); @@ -230,45 +368,19 @@ static int cmd_vhost_disable(char *ifname, bool on_master) static const char dev_basename[] = "/run/dataplane/eth_"; -/** - * Find a vhost interface. If a dp- prefix is given, search - * by name. Otherwise, search by the eth_dev unique name. - */ -static struct ifnet *vhost_byname(char *name) -{ - struct ifnet *ifp; - - if (strncmp(name, "dp", 2) == 0) - ifp = ifnet_byifname(name); - else { - char eth_dev_name[RTE_ETH_NAME_MAX_LEN]; - /* dataplane local name */ - - snprintf(eth_dev_name, sizeof(eth_dev_name), "eth_%s", name); - ifp = ifnet_byethname(eth_dev_name); - } - - return ifp; -} - /** * Set path as the QMP (QEMU Machine Protocol) connection for the vhost ifname. */ static int cmd_vhost_set_qmp_path(char *name, char *path) { - struct ifnet *ifp; - struct vhost_info *vi; + struct vhost_info_private *vip; - ifp = vhost_byname(name); - if (!ifp) + vip = vhost_info_by_name(name); + if (!vip) return -ENODEV; - vi = get_vhost_info(ifp); - if (!vi) - return -ENOMEM; - - free(vi->qmp_path); - vi->qmp_path = strdup(path); + free(vip->qmp_path); + vip->qmp_path = strdup(path); return 0; } @@ -278,32 +390,26 @@ static int cmd_vhost_set_qmp_path(char *name, char *path) */ static int cmd_vhost_set_qemu_ifname(char *name, char *qemu_ifname) { - struct ifnet *ifp; - struct vhost_info *vi; + struct vhost_info_private *vip; - ifp = vhost_byname(name); - if (!ifp) + vip = vhost_info_by_name(name); + if (!vip) return -ENODEV; - vi = get_vhost_info(ifp); - if (!vi) - return -ENOMEM; - - free(vi->qemu_ifname); - vi->qemu_ifname = strdup(qemu_ifname); + free(vip->qemu_ifname); + vip->qemu_ifname = strdup(qemu_ifname); return 0; } static int cmd_vhost_enable(char *ifname, char *queues, char *path, char *alias, - bool on_master, bool is_client) + bool on_main, bool is_client) { int rc; char *devargs_p; char *pathname; char *p; int size; - struct ifnet *ifp; p = strrchr(ifname, 'v'); if (!p) { @@ -323,12 +429,12 @@ static int cmd_vhost_enable(char *ifname, char *queues, char *path, char *alias, DP_DEBUG(VHOST, DEBUG, DATAPLANE, "vhost: sending event ADD, %s\n", ifname); - if (on_master) { + if (on_main) { rc = attach_device(devargs_p); } else { - rcu_thread_offline(); + dp_rcu_thread_offline(); rc = send_device_event(devargs_p, true); - rcu_thread_online(); + dp_rcu_thread_online(); } /* vhost interfaces are created synchronously */ @@ -345,13 +451,18 @@ static int cmd_vhost_enable(char *ifname, char *queues, char *path, char *alias, } if (!rc) { - ifp = vhost_byname(ifname); - if (ifp) { - rc = vhost_info_alloc(ifp); - if (!rc && path) + struct vhost_info_private *vip; + + vip = vhost_info_private_create(ifname); + if (vip) { + if (path) cmd_vhost_set_qmp_path(ifname, path); - if (!rc && alias) + if (alias) cmd_vhost_set_qemu_ifname(ifname, alias); + } else { + RTE_LOG(ERR, DATAPLANE, + "vhost_info_private_create failed for %s, transport-link tracking won't work!\n", + ifname); } } @@ -414,7 +525,7 @@ static int vhost_set_update_event(struct ifnet *ifp) rte_spinlock_unlock(&vhost_ev_list_lock); /* Set the event */ - return set_master_worker_vhost_event_fd(); + return set_main_worker_vhost_event_fd(); } /** @@ -441,11 +552,9 @@ static void vhost_link_update_core(struct ifnet *ifp, void *arg, bool process) } vi = get_vhost_info(ifp); - if (!vi) - return; /* No transport links -- The guest's carrier status should be up. */ - if (cds_list_empty(&vi->transport_links)) { + if (!vi || cds_list_empty(&vi->transport_links)) { up = true; update_guest = true; goto out; @@ -458,7 +567,7 @@ static void vhost_link_update_core(struct ifnet *ifp, void *arg, bool process) &vi->transport_links, list) { struct ifnet *transport; - transport = ifnet_byifname(entry->ifname); + transport = dp_ifnet_byifname(entry->ifname); if (transport && ifnet_isrunning(transport)) up = true; if (!updated || strcmp(updated->if_name, entry->ifname) == 0) @@ -484,15 +593,15 @@ static void vhost_link_update_process(char *vhost_name) { struct ifnet *ifp; - rcu_read_lock(); - ifp = vhost_byname(vhost_name); + dp_rcu_read_lock(); + ifp = dp_ifnet_byifname(vhost_name); if (!ifp) { - rcu_read_unlock(); + dp_rcu_read_unlock(); return; } vhost_link_update_core(ifp, NULL, true); - rcu_read_unlock(); + dp_rcu_read_unlock(); } void vhost_event_handler(void) @@ -527,9 +636,9 @@ void vhost_update_guests(struct ifnet *ifp) if (is_vhost(ifp)) vhost_link_update(ifp, NULL); else { - rcu_read_lock(); - ifnet_walk(vhost_link_update, ifp); - rcu_read_unlock(); + dp_rcu_read_lock(); + dp_ifnet_walk(vhost_link_update, ifp); + dp_rcu_read_unlock(); } } @@ -541,64 +650,6 @@ static void vhost_transport_free(struct rcu_head *head) free(entry); } - -static void -vhost_event_if_index_set(struct ifnet *ifp, uint32_t ifindex); -static void -vhost_event_if_index_unset(struct ifnet *ifp, uint32_t ifindex); - -static const struct dp_event_ops vhost_event_ops = { - .if_index_set = vhost_event_if_index_set, - .if_index_unset = vhost_event_if_index_unset, -}; - -static void -vhost_event_if_index_set(struct ifnet *ifp, uint32_t ifindex __unused) -{ - struct cfg_if_list_entry *le; - - if (!vhost_cfg_list) - return; - - le = cfg_if_list_lookup(vhost_cfg_list, ifp->if_name); - if (!le) - return; - - DP_DEBUG(VHOST, DEBUG, DATAPLANE, - "Replaying (%s) command for interface %s\n", - le->le_buf, ifp->if_name); - - cmd_vhost_client_cfg(NULL, le->le_argc, le->le_argv); - cfg_if_list_del(vhost_cfg_list, ifp->if_name); - - if (!vhost_cfg_list->if_list_count) - cfg_if_list_destroy(&vhost_cfg_list); -} - -static void -vhost_event_if_index_unset(struct ifnet *ifp, uint32_t ifindex __unused) -{ - if (!vhost_cfg_list) - return; - - cfg_if_list_del(vhost_cfg_list, ifp->if_name); - if (!vhost_cfg_list->if_list_count) { - dp_event_unregister(&vhost_event_ops); - cfg_if_list_destroy(&vhost_cfg_list); - } -} - -static int vhost_replay_init(void) -{ - if (!vhost_cfg_list) { - vhost_cfg_list = cfg_if_list_create(); - if (!vhost_cfg_list) - return -ENOMEM; - } - dp_event_register(&vhost_event_ops); - return 0; -} - /** * Add or remove transport_link to the list of interfaces that name monitors. * Expects pre verified string in the following format @@ -606,34 +657,36 @@ static int vhost_replay_init(void) * argv[2] vhost interface name * argv[3] transport-link interface */ -static int cmd_vhost_transport_update(int argc, char **argv, bool add) +static int cmd_vhost_transport_update(char **argv, bool add) { struct ifnet *ifp; - struct vhost_info *vi; struct vhost_transport *entry, *next; + struct vhost_info *vi; + int rc; - ifp = vhost_byname(argv[2]); + ifp = dp_ifnet_byifname(argv[2]); if (!ifp) { - if (vhost_replay_init() < 0) { - RTE_LOG(ERR, DATAPLANE, - "Vhost could not set up replay cache\n"); - return -ENOMEM; - } RTE_LOG(DEBUG, DATAPLANE, - "Caching Vhost transport cmd for %s %s %s\n", - argv[2], argv[3], argv[4]); - cfg_if_list_add(vhost_cfg_list, - argv[2], argc, argv); - - return 0; + "Vhost transport cmd but interface missing %s\n", + argv[2]); + return -1; } - vi = get_vhost_info(ifp); - if (!vi) - return -ENOMEM; DP_DEBUG(VHOST, DEBUG, DATAPLANE, "vhost %s, transport %s action %s\n", argv[2], argv[3], add ? "ADD" : "DEL"); + + rc = vhost_info_alloc(ifp); + if (rc < 0) { + RTE_LOG(ERR, DATAPLANE, "vhost_info_alloc: %s\n", + strerror(-rc)); + return rc; + } + + vi = get_vhost_info(ifp); + if (!vi) + return -ENOENT; + if (add) { struct ifnet *transport_ifp; @@ -646,7 +699,7 @@ static int cmd_vhost_transport_update(int argc, char **argv, bool add) cds_list_add_tail_rcu(&entry->list, &vi->transport_links); /* We might have added an "up" link. */ - transport_ifp = ifnet_byifname(argv[3]); + transport_ifp = dp_ifnet_byifname(argv[3]); if (transport_ifp) vhost_link_update(ifp, transport_ifp); } else { @@ -773,11 +826,9 @@ static int __cmd_vhost_cfg(const char *cmd, rc = cmd_vhost_disable(argv[2], true); else if (strcmp(argv[1], "transport-link") == 0 && argc == 5) { if (strcmp(argv[4], "add") == 0) - rc = cmd_vhost_transport_update(argc, argv, - true); + rc = cmd_vhost_transport_update(argv, true); else if (strcmp(argv[4], "del") == 0) - rc = cmd_vhost_transport_update(argc, argv, - false); + rc = cmd_vhost_transport_update(argv, false); else goto bad_command; } diff --git a/src/vhost.h b/src/if/dpdk-eth/vhost.h similarity index 91% rename from src/vhost.h rename to src/if/dpdk-eth/vhost.h index 577da30e..4ad9f015 100644 --- a/src/vhost.h +++ b/src/if/dpdk-eth/vhost.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. + * Copyright (c) 2019, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only diff --git a/src/gre.c b/src/if/gre.c similarity index 95% rename from src/gre.c rename to src/if/gre.c index a86dd085..15489fed 100644 --- a/src/gre.c +++ b/src/if/gre.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -42,7 +42,6 @@ #include #include -#include "bridge.h" #include "capture.h" #include "commands.h" #include "compat.h" @@ -51,6 +50,7 @@ #include "ether.h" #include "fal.h" #include "gre.h" +#include "if/bridge/bridge.h" #include "if_var.h" #include "in_cksum.h" #include "in6.h" @@ -61,7 +61,7 @@ #include "json_writer.h" #include "main.h" #include "netinet6/ip6_funcs.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" #include "portmonitor/portmonitor.h" @@ -71,7 +71,7 @@ #include "shadow.h" #include "snmp_mib.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" #include "fal_plugin.h" #include "ecmp.h" @@ -188,10 +188,10 @@ static inline int gre_info_match(struct cds_lfht_node *node, const void *key) return 0; } else { if (!IN6_ARE_ADDR_EQUAL(&gre_info->iph6.ip6_dst, - &key_gre_info->remote)) + &key_gre_info->remote6)) return 0; if (!IN6_ARE_ADDR_EQUAL(&gre_info->iph6.ip6_src, - &key_gre_info->local)) + &key_gre_info->local6)) return 0; } if (gre_info->flags & GRE_KEY) { @@ -252,8 +252,7 @@ gre_info_lookup(struct gre_infotbl_st *gre_infos, node = cds_lfht_iter_get_node(&iter); if (node) return caa_container_of(node, struct gre_info_st, gre_node); - else - return NULL; + return NULL; } static struct gre_info_st * @@ -321,7 +320,7 @@ mgre_timer(struct rte_timer *tim __rte_unused, void *arg) struct mgre_rt_info *rtinfo; struct cds_lfht_iter iter; - rcu_read_lock(); + dp_rcu_read_lock(); cds_lfht_for_each_entry(sc->scg_rtinfo_hash_nbma, &iter, rtinfo, rtinfo_node_nbma) { /* @@ -342,7 +341,7 @@ mgre_timer(struct rte_timer *tim __rte_unused, void *arg) } CMM_ACCESS_ONCE(rtinfo->rt_info_bits) &= ~RT_INFO_BIT_IS_USED; } - rcu_read_unlock(); + dp_rcu_read_unlock(); } /* mGRE peer management */ @@ -653,7 +652,7 @@ gre_tunnel_add_tracker(struct gre_info_st *greinfo, struct vrf *vrf) } /* Start tracking the tunnel reachability */ - greinfo->ti_info = rt_tracker_add(vrf, &addr, greinfo, + greinfo->ti_info = dp_rt_tracker_add(vrf, &addr, greinfo, &gre_tunnel_update_tep); if (!greinfo->ti_info) { RTE_LOG(ERR, GRE, @@ -681,7 +680,7 @@ gre_tunnel_remove_tracker(struct gre_info_st *greinfo, struct vrf *vrf) return; } - rt_tracker_delete(vrf, &addr, greinfo); + dp_rt_tracker_delete(vrf, &addr, greinfo); } /* GRE tunnel setup */ @@ -761,7 +760,7 @@ setup_gre_tunnel(struct ifnet *ifp, struct nlattr *data) greinfo->ifp = ifp; sc->scg_gre_info = greinfo; - if (h_key.remote == INADDR_ANY) { + if (h_key.family == AF_INET && h_key.remote == INADDR_ANY) { /* This is a multipoint tunnel, create the default binding */ mgre_rt_info_table_init(sc); sc->scg_multipoint = 1; @@ -809,7 +808,7 @@ gre_tunnel_update_tep_internal(struct gre_info_st *greinfo, switch (greinfo->family) { case AF_INET: hash = ecmp_iphdr_hash(&greinfo->iph, 0); - ret = nh_lookup_by_index(ti_info->nhindex, hash, + ret = dp_nh_lookup_by_index(ti_info->nhindex, hash, &ip->address.ip_v4.s_addr, &nh_ifindex); if (ip->address.ip_v4.s_addr == INADDR_ANY) @@ -819,7 +818,7 @@ gre_tunnel_update_tep_internal(struct gre_info_st *greinfo, break; case AF_INET6: hash = ecmp_ip6hdr_hash(&greinfo->iph6, 0); - ret = nh6_lookup_by_index(ti_info->nhindex, hash, + ret = dp_nh6_lookup_by_index(ti_info->nhindex, hash, &ip->address.ip_v6, &nh_ifindex); if (IN6_ARE_ADDR_EQUAL(&ip->address.ip_v6, &in6addr_any)) @@ -838,7 +837,7 @@ gre_tunnel_update_tep_internal(struct gre_info_st *greinfo, return NULL; } - nh_ifp = ifnet_byifindex(nh_ifindex); + nh_ifp = dp_ifnet_byifindex(nh_ifindex); if (!nh_ifp) { RTE_LOG(ERR, GRE, "Failed to get NH intf for tun %s\n", @@ -1025,7 +1024,7 @@ gre_tunnel_add_tep(struct ifnet *ifp, struct gre_info_st *tep) /* Create GRE tunnel in response to netlink */ struct ifnet * gre_tunnel_create(int ifindex, const char *ifname, - const struct ether_addr *addr, const unsigned int mtu, + const struct rte_ether_addr *addr, const unsigned int mtu, struct nlattr *data) { struct ifnet *ifp; @@ -1035,7 +1034,7 @@ gre_tunnel_create(int ifindex, const char *ifname, return NULL; } - ifp = if_alloc(ifname, IFT_TUNNEL_GRE, mtu, addr, SOCKET_ID_ANY); + ifp = if_alloc(ifname, IFT_TUNNEL_GRE, mtu, addr, SOCKET_ID_ANY, NULL); if (!ifp) { RTE_LOG(ERR, DATAPLANE, "out of memory for gre tunnel ifnet\n"); @@ -1643,7 +1642,7 @@ int ip_gre_tunnel_in(struct rte_mbuf **m0, struct iphdr *ip) set_spath_rx_meta_data(m, tun_ifp, next_prot, TUN_META_FLAGS_DEFAULT); if (unlikely(gre_encap_l2_frame(next_prot))) - rte_pktmbuf_adj(m, decap_size + ETHER_HDR_LEN); + rte_pktmbuf_adj(m, decap_size + RTE_ETHER_HDR_LEN); else if (iptun_eth_hdr_fixup(m, next_prot, decap_size) != 0) return 1; @@ -1673,7 +1672,7 @@ int ip_gre_tunnel_in(struct rte_mbuf **m0, struct iphdr *ip) return 1; break; case ETH_P_TEB: - if (rte_pktmbuf_data_len(m) < sizeof(struct ether_hdr)) { + if (rte_pktmbuf_data_len(m) < sizeof(struct rte_ether_hdr)) { if_incr_error(tun_ifp); rte_pktmbuf_free(m); return 0; @@ -1708,7 +1707,7 @@ gre_tunnel_do_send(struct ifnet *tunnel_ifp, struct rte_mbuf *m) /* Give IPsec first crack. Returns true if packet was consumed */ if (crypto_policy_check_outbound(tunnel_ifp, &m, RT_TABLE_MAIN, - htons(ETHER_TYPE_IPv4), NULL)) + htons(RTE_ETHER_TYPE_IPV4), NULL)) return; outer_ip = iphdr(m); @@ -1720,10 +1719,9 @@ gre_tunnel_do_send(struct ifnet *tunnel_ifp, struct rte_mbuf *m) } /* A tunnel to ourselves. */ - if (unlikely(ip_gre_tunnel_in(&m, outer_ip) == 1)) - goto drop; + if (likely(ip_gre_tunnel_in(&m, outer_ip) == 0)) + return; -drop: rte_pktmbuf_free(m); } @@ -1762,10 +1760,12 @@ gre_tunnel_add_encap(struct ifnet *tunnel_ifp, struct rte_mbuf *m, { struct gre_hdr *gre; struct iphdr *ip = NULL; - struct ether_hdr *eth_hdr; + struct rte_ether_hdr *eth_hdr; /* Copy GRE header into mbuf then set the pak specific fields */ - gre = (struct gre_hdr *)(hdr + ETHER_HDR_LEN + sizeof(struct iphdr)); + gre = (struct gre_hdr *)(hdr + + RTE_ETHER_HDR_LEN + + sizeof(struct iphdr)); if (!gre_tunnel_add_gre_encap(greinfo, gre, proto)) { if_incr_oerror(tunnel_ifp); @@ -1778,7 +1778,7 @@ gre_tunnel_add_encap(struct ifnet *tunnel_ifp, struct rte_mbuf *m, * * FRAGMENT: */ - ip = (struct iphdr *) (hdr + ETHER_HDR_LEN); + ip = (struct iphdr *) (hdr + RTE_ETHER_HDR_LEN); memcpy(ip, outer_ip, sizeof(struct iphdr)); if (ip->ttl == 0) @@ -1807,16 +1807,16 @@ gre_tunnel_add_encap(struct ifnet *tunnel_ifp, struct rte_mbuf *m, /* RFE-196 CS6 marking by default for GRE/NHRP */ if (proto == ETH_P_NHRP) ip->tos |= IPTOS_PREC_INTERNETCONTROL; - ip->id = ip_randomid(0); + ip->id = dp_ip_randomid(0); ip->check = 0; - ip->check = in_cksum_hdr(ip); + ip->check = dp_in_cksum_hdr(ip); - eth_hdr = (struct ether_hdr *)hdr; + eth_hdr = (struct rte_ether_hdr *)hdr; eth_hdr->ether_type = htons(ETH_P_IP); pktmbuf_prepare_encap_out(m); - pktmbuf_l2_len(m) = ETHER_HDR_LEN; - pktmbuf_l3_len(m) = sizeof(struct iphdr); + dp_pktmbuf_l2_len(m) = RTE_ETHER_HDR_LEN; + dp_pktmbuf_l3_len(m) = sizeof(struct iphdr); return true; @@ -1883,8 +1883,8 @@ gre_tunnel_fragment_and_send(struct ifnet *input_ifp, struct ifnet *tunnel_ifp, } case ETH_P_TEB: { - const struct ether_hdr *eh - = rte_pktmbuf_mtod(m, struct ether_hdr *); + const struct rte_ether_hdr *eh + = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); inner_len = rte_pktmbuf_pkt_len(m); dont_frag = true; @@ -1896,7 +1896,7 @@ gre_tunnel_fragment_and_send(struct ifnet *input_ifp, struct ifnet *tunnel_ifp, * fragmented */ switch (ntohs(eh->ether_type)) { - case ETHER_TYPE_IPv4: + case RTE_ETHER_TYPE_IPV4: { struct iphdr *ip = iphdr(m); @@ -1911,14 +1911,14 @@ gre_tunnel_fragment_and_send(struct ifnet *input_ifp, struct ifnet *tunnel_ifp, default: break; } - mtu_offset = ETHER_HDR_LEN; + mtu_offset = RTE_ETHER_HDR_LEN; break; } default: if (gre_encap_l2_frame(proto)) inner_len = rte_pktmbuf_pkt_len(m); else - inner_len = rte_pktmbuf_pkt_len(m) - ETHER_HDR_LEN; + inner_len = rte_pktmbuf_pkt_len(m) - RTE_ETHER_HDR_LEN; dont_frag = true; break; } @@ -2039,7 +2039,7 @@ gre_tunnel_encap(struct ifnet *input_ifp, struct ifnet *tunnel_ifp, vrfid_t t_vrfid; unsigned int eh_offset; unsigned int ip_offset; - const struct ether_hdr *eh; + const struct rte_ether_hdr *eh; uint16_t ether_type; sc = rcu_dereference(tunnel_ifp->if_softc); @@ -2075,11 +2075,11 @@ gre_tunnel_encap(struct ifnet *input_ifp, struct ifnet *tunnel_ifp, */ if (gre_encap_l2_frame(proto)) { new_hdr_len = (greinfo->gre_size + sizeof(struct iphdr) + - ETHER_HDR_LEN); + RTE_ETHER_HDR_LEN); len_adjust = 0; } else { new_hdr_len = (greinfo->gre_size + sizeof(struct iphdr)); - len_adjust = ETHER_HDR_LEN; + len_adjust = RTE_ETHER_HDR_LEN; } switch (proto) { @@ -2112,17 +2112,17 @@ gre_tunnel_encap(struct ifnet *input_ifp, struct ifnet *tunnel_ifp, eh_offset = (proto == ETH_P_ERSPAN_TYPEII ? sizeof(struct erspan_v2_hdr) : sizeof(struct erspan_v3_hdr)); - eh = rte_pktmbuf_mtod_offset(m, const struct ether_hdr *, + eh = rte_pktmbuf_mtod_offset(m, const struct rte_ether_hdr *, eh_offset); ether_type = eh->ether_type; - ip_offset = eh_offset + ETHER_HDR_LEN; - if (ether_type == htons(ETHER_TYPE_IPv4)) { + ip_offset = eh_offset + RTE_ETHER_HDR_LEN; + if (ether_type == htons(RTE_ETHER_TYPE_IPV4)) { const struct iphdr *ip = rte_pktmbuf_mtod_offset(m, const struct iphdr *, ip_offset); inner_ttl = ip->ttl; inner_tos = ip->tos; - } else if (ether_type == htons(ETHER_TYPE_IPv6)) { + } else if (ether_type == htons(RTE_ETHER_TYPE_IPV6)) { const struct ip6_hdr *ip6 = rte_pktmbuf_mtod_offset(m, const struct ip6_hdr *, ip_offset); @@ -2133,8 +2133,8 @@ gre_tunnel_encap(struct ifnet *input_ifp, struct ifnet *tunnel_ifp, break; case ETH_P_TEB: { - const struct ether_hdr *eh - = rte_pktmbuf_mtod(m, struct ether_hdr *); + const struct rte_ether_hdr *eh + = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); inner_len = rte_pktmbuf_pkt_len(m) - len_adjust; inner_ttl = 0; @@ -2142,7 +2142,7 @@ gre_tunnel_encap(struct ifnet *input_ifp, struct ifnet *tunnel_ifp, inner_df = false; switch (ntohs(eh->ether_type)) { - case ETHER_TYPE_IPv4: + case RTE_ETHER_TYPE_IPV4: { struct iphdr *ip = iphdr(m); @@ -2202,17 +2202,19 @@ gre6_tunnel_add_encap(struct ifnet *tunnel_ifp, struct rte_mbuf *m, { struct gre_hdr *gre; struct ip6_hdr *ip6 = NULL; - struct ether_hdr *eth_hdr; + struct rte_ether_hdr *eth_hdr; /* Copy GRE header into mbuf then set the pak specific fields */ - gre = (struct gre_hdr *)(hdr + ETHER_HDR_LEN + sizeof(struct ip6_hdr)); + gre = (struct gre_hdr *)(hdr + + RTE_ETHER_HDR_LEN + + sizeof(struct ip6_hdr)); if (!gre_tunnel_add_gre_encap(greinfo, gre, proto)) { if_incr_oerror(tunnel_ifp); goto drop; } - ip6 = (struct ip6_hdr *) (hdr + ETHER_HDR_LEN); + ip6 = (struct ip6_hdr *) (hdr + RTE_ETHER_HDR_LEN); memcpy(ip6, outer_ip, sizeof(struct ip6_hdr)); if (ip6->ip6_hlim == 0) @@ -2234,12 +2236,12 @@ gre6_tunnel_add_encap(struct ifnet *tunnel_ifp, struct rte_mbuf *m, ip6->ip6_plen = htons(inner_len + greinfo->gre_size); ip6_ip_ecn_encap(&ip6->ip6_flow, inner_tos); - eth_hdr = (struct ether_hdr *)hdr; + eth_hdr = (struct rte_ether_hdr *)hdr; eth_hdr->ether_type = htons(ETH_P_IPV6); pktmbuf_prepare_encap_out(m); - pktmbuf_l2_len(m) = ETHER_HDR_LEN; - pktmbuf_l3_len(m) = sizeof(struct ip6_hdr); + dp_pktmbuf_l2_len(m) = RTE_ETHER_HDR_LEN; + dp_pktmbuf_l3_len(m) = sizeof(struct ip6_hdr); return true; @@ -2303,7 +2305,7 @@ gre6_tunnel_encap(struct ifnet *tunnel_ifp, t_vrfid = greinfo->t_vrfid; new_hdr_len = (greinfo->gre_size + sizeof(struct ip6_hdr)); - len_adjust = ETHER_HDR_LEN; + len_adjust = RTE_ETHER_HDR_LEN; switch (proto) { case ETH_P_IP: @@ -2359,7 +2361,7 @@ gre6_tunnel_do_send(struct ifnet *tunnel_ifp, struct rte_mbuf *m) /* Give IPsec first crack. Returns true if packet was consumed */ if (crypto_policy_check_outbound(tunnel_ifp, &m, RT_TABLE_MAIN, - htons(ETHER_TYPE_IPv6), NULL)) + htons(RTE_ETHER_TYPE_IPV6), NULL)) return; outer_ip = ip6hdr(m); @@ -2372,10 +2374,9 @@ gre6_tunnel_do_send(struct ifnet *tunnel_ifp, struct rte_mbuf *m) } /* A tunnel to ourselves. */ - if (unlikely(ip6_gre_tunnel_in(&m, outer_ip) == 1)) - goto drop; + if (likely(ip6_gre_tunnel_in(&m, outer_ip) == 0)) + return; -drop: rte_pktmbuf_free(m); } @@ -2500,12 +2501,12 @@ static int tun_show_neighbors(FILE *f, struct in_addr *tun_addr, int argc, jsonw_start_array(json); if (argc == 1) { - ifnet_walk(tun_neighbor_dump, json); + dp_ifnet_walk(tun_neighbor_dump, json); goto end; } while (--argc) { - struct ifnet *ifp = ifnet_byifname(*++argv); + struct ifnet *ifp = dp_ifnet_byifname(*++argv); if (!ifp) { err = -1; @@ -2540,12 +2541,12 @@ static void tun_show_tracker(json_writer_t *json, struct gre_info_st *greinfo) if (greinfo->family == AF_INET) { hash = ecmp_iphdr_hash(&greinfo->iph, 0); - ret = nh_lookup_by_index(greinfo->ti_info->nhindex, + ret = dp_nh_lookup_by_index(greinfo->ti_info->nhindex, hash, &nh, &nh_ifindex); if (ret != 0) goto unresolved; - nifp = ifnet_byifindex(nh_ifindex); + nifp = dp_ifnet_byifindex(nh_ifindex); if (!nifp) goto unresolved; jsonw_string_field(json, "state", "reachable"); @@ -2604,7 +2605,7 @@ int cmd_gre(FILE *f, int argc, char **argv) return -1; } --argc, ++argv; /* skip "tracker" */ - ifp = ifnet_byifname(*argv); + ifp = dp_ifnet_byifname(*argv); if (!ifp) { fprintf(f, "Invalid tunnel interface\n"); return -1; @@ -2700,31 +2701,41 @@ gre_tunnel_dump(struct ifnet *ifp, json_writer_t *wr, return 0; } -static void -gre_if_pre_delete(struct ifnet *ifp) +static int +gre_if_l3_disable(struct ifnet *ifp) { /* Delete the tunnel object */ gre_tunnel_remove_tep(ifp, NULL); - if_delete_l3_intf(ifp); + return if_fal_delete_l3_intf(ifp); } -static void -gre_if_create_finished(struct ifnet *ifp, - const struct ether_addr *mac_addr) +static int +gre_if_l3_enable(struct ifnet *ifp) { - if_create_l3_intf(ifp, mac_addr); + int ret; - if (!ifp->fal_l3) - return; + ret = if_fal_create_l3_intf(ifp); + if (ret < 0) + return ret; gre_tunnel_add_tep(ifp, NULL); + + return 0; +} + +static enum dp_ifnet_iana_type +gre_iana_type(struct ifnet *ifp __unused) +{ + return DP_IFTYPE_IANA_TUNNEL; } static const struct ift_ops gre_if_ops = { - .ifop_pre_uninit = gre_if_pre_delete, + .ifop_l3_disable = gre_if_l3_disable, .ifop_uninit = gre_tunnel_delete, .ifop_dump = gre_tunnel_dump, - .ifop_create_finished = gre_if_create_finished, + .ifop_l3_enable = gre_if_l3_enable, + .ifop_get_stats = if_fal_l3_get_stats, + .ifop_iana_type = gre_iana_type, }; static void gre_type_init(void) diff --git a/src/gre.h b/src/if/gre.h similarity index 94% rename from src/gre.h rename to src/if/gre.h index 8e37419b..57f19637 100644 --- a/src/gre.h +++ b/src/if/gre.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -30,7 +30,7 @@ #include "urcu.h" #include "util.h" -struct ether_addr; +struct rte_ether_addr; struct ifnet; struct ndmsg; struct rte_mbuf; @@ -107,7 +107,7 @@ void gre_table_uninit(struct vrf *vrf); /* GRE Tunnel Intf Functions */ struct ifnet *gre_tunnel_create(int ifindex, const char *ifname, - const struct ether_addr *eth_addr, + const struct rte_ether_addr *eth_addr, const unsigned int mtu, struct nlattr *data); void gre_tunnel_modify(struct ifnet *ifp, struct nlattr *data); @@ -133,7 +133,7 @@ gre_tunnel_encap(struct ifnet *input_ifp, struct ifnet *tunnel_ifp, int mgre_ipv4_neigh_change(struct ifnet *ifp, const struct nlmsghdr *nlh, const struct ndmsg *ndm, struct nlattr *tb[]); const in_addr_t * -mgre_nbma_to_tun_addr(struct ifnet *ifp, const in_addr_t *addr); +mgre_nbma_to_tun_addr(struct ifnet *ifp, const in_addr_t *nbma); static inline bool gre_encap_l2_frame(uint16_t proto) { diff --git a/src/if/ipip_tunnel.c b/src/if/ipip_tunnel.c new file mode 100644 index 00000000..985e784a --- /dev/null +++ b/src/if/ipip_tunnel.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * IPIP tunnel interface implementation + */ + +#include +#include + +#include "dp_event.h" +#include "if_var.h" +#include "ip_ttl.h" +#include "netinet6/ip6_funcs.h" + +void unsup_tunnel_output(struct ifnet *ifp, struct rte_mbuf *m, + struct ifnet *input_ifp, uint16_t proto) +{ + if (!input_ifp) { + rte_pktmbuf_free(m); + if_incr_dropped(ifp); + return; + } + + switch (proto) { + case ETH_P_IP: + /* + * Assume the packet has been forwarded and thus its + * ttl has been decremented. + */ + increment_ttl(iphdr(m)); + ip_local_deliver(ifp, m); + break; + case ETH_P_IPV6: + ip6hdr(m)->ip6_hlim += IPV6_HLIMDEC; + ip6_local_deliver(ifp, m); + break; + default: + local_packet(ifp, m); + break; + } +} + +static enum dp_ifnet_iana_type +ipip_iana_type(struct ifnet *ifp __unused) +{ + return DP_IFTYPE_IANA_TUNNEL; +} + +static const struct ift_ops ipip_tun_if_ops = { + .ifop_iana_type = ipip_iana_type, +}; + +static void ipip_tun_init(void) +{ + int ret = if_register_type(IFT_TUNNEL_OTHER, &ipip_tun_if_ops); + if (ret < 0) + rte_panic("Failed to register IPIP tunnel type: %s", + strerror(-ret)); +} + +static const struct dp_event_ops ipip_tun_events = { + .init = ipip_tun_init, +}; + +DP_STARTUP_EVENT_REGISTER(ipip_tun_events); diff --git a/src/if/loopback.c b/src/if/loopback.c new file mode 100644 index 00000000..23884496 --- /dev/null +++ b/src/if/loopback.c @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * Loopback interface implementation + */ + +#include +#include + +#include "crypto/crypto_forward.h" +#include "dp_event.h" +#include "if_var.h" + +/* Packet on virtual feature point */ +void vfp_output(struct ifnet *ifp, struct rte_mbuf *m, + struct ifnet *input_ifp, uint16_t proto) +{ + struct vfp_softc *vsc = ifp->if_softc; + + switch (vsc->vfp_type) { + case VFP_S2S_CRYPTO: + crypto_policy_post_features_outbound(ifp, input_ifp, m, proto); + break; + case VFP_NONE: + /* Packet on loopback shouldn't reach here */ + assert(0); + rte_pktmbuf_free(m); + if_incr_dropped(ifp); + break; + } +} + +static enum dp_ifnet_iana_type +lo_iana_type(struct ifnet *ifp __unused) +{ + return DP_IFTYPE_IANA_SOFTWARELOOPBACK; +} + +static const struct ift_ops lo_if_ops = { + .ifop_iana_type = lo_iana_type, +}; + +static void lo_type_init(void) +{ + int ret = if_register_type(IFT_LOOP, &lo_if_ops); + if (ret < 0) + rte_panic("Failed to register loopback type: %s", + strerror(-ret)); +} + +static const struct dp_event_ops loopback_events = { + .init = lo_type_init, +}; + +DP_STARTUP_EVENT_REGISTER(loopback_events); diff --git a/src/macvlan.c b/src/if/macvlan.c similarity index 81% rename from src/macvlan.c rename to src/if/macvlan.c index cb35bbf4..f1835e1f 100644 --- a/src/macvlan.c +++ b/src/if/macvlan.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -26,7 +26,7 @@ #include "ether.h" #include "if_var.h" #include "macvlan.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "urcu.h" #include "util.h" #include "vplane_debug.h" @@ -36,7 +36,6 @@ #define MACVLAN_MODE_VEPA 2 /* talk to other ports through ext bridge */ #define MACVLAN_MODE_BRIDGE 4 /* talk to bridge ports directly */ #define MACVLAN_MODE_PASSTHRU 8 /* take over the underlying device */ -#define MACVLAN_MODE_VRRP 32 /* pass all packets that aren't vrrp */ #define MACVLAN_MAX_NODES 65535 /* macvlans per interface */ @@ -62,10 +61,11 @@ struct mvl_tbl { static struct mvl_tbl *macvlan_table_init(struct ifnet *ifp); static void macvlan_table_free(struct ifnet *ifp, int flush); +static bool is_vrrp_mac_addr(struct rte_ether_addr *ll_addr); /* Given key (ether address) generate a hash using jhash */ static inline unsigned long -macvlan_hash(const struct ether_addr *key) +macvlan_hash(const struct rte_ether_addr *key) { return eth_addr_hash(key, MACVLAN_HASHTBL_BITS); } @@ -77,11 +77,11 @@ macvlan_match(struct cds_lfht_node *node, const void *key) const struct mvl_entry *mvle = caa_container_of(node, const struct mvl_entry, mvl_node); - return ether_addr_equal(&mvle->ifp->eth_addr, key); + return rte_ether_addr_equal(&mvle->ifp->eth_addr, key); } static struct ifnet * -macvlan_lookup(struct mvl_tbl *mvlt, const struct ether_addr *addr, +macvlan_lookup(struct mvl_tbl *mvlt, const struct rte_ether_addr *addr, bool return_parent_if) { /* lookup macvlan in hash by dest macaddr */ @@ -96,7 +96,7 @@ macvlan_lookup(struct mvl_tbl *mvlt, const struct ether_addr *addr, struct mvl_entry *mvle = caa_container_of(node, struct mvl_entry, mvl_node); - if (mvle->mode == MACVLAN_MODE_VRRP && return_parent_if) + if (is_vrrp_mac_addr(&mvle->ifp->eth_addr) && return_parent_if) return mvle->ifp->if_parent; return mvle->ifp; @@ -106,19 +106,19 @@ macvlan_lookup(struct mvl_tbl *mvlt, const struct ether_addr *addr, } static void -macvlan_add_mac(struct ifnet *ifp, struct ether_addr *eth_addr) +macvlan_add_mac(struct ifnet *ifp, struct rte_ether_addr *eth_addr) { - MVL_DEBUG("%s adding %s to port %d\n", ifp->if_name, - ether_ntoa(eth_addr), ifp->if_port); - if_add_l2_addr(ifp, eth_addr); + MVL_DEBUG("%s adding %s to parent %s\n", ifp->if_name, + ether_ntoa(eth_addr), ifp->if_parent->if_name); + if_add_l2_addr(ifp->if_parent, eth_addr); } static void -macvlan_del_mac(struct ifnet *ifp, struct ether_addr *eth_addr) +macvlan_del_mac(struct ifnet *ifp, struct rte_ether_addr *eth_addr) { - MVL_DEBUG("%s deleting %s from port %d\n", ifp->if_name, - ether_ntoa(eth_addr), ifp->if_port); - if_del_l2_addr(ifp, eth_addr); + MVL_DEBUG("%s deleting %s from parent %s\n", ifp->if_name, + ether_ntoa(eth_addr), ifp->if_parent->if_name); + if_del_l2_addr(ifp->if_parent, eth_addr); } static int @@ -150,7 +150,7 @@ macvlan_entry_destroy(struct mvl_tbl *mvlt, struct mvl_entry *mvle) struct ifnet * macvlan_create(struct ifnet *ifp, const char *mvl_name, - const struct ether_addr *eth_addr, int ifindex) + const struct rte_ether_addr *eth_addr, int ifindex) { struct ifnet *vifp; int err; @@ -179,7 +179,7 @@ macvlan_create(struct ifnet *ifp, const char *mvl_name, } vifp = if_alloc(mvl_name, IFT_MACVLAN, ifp->if_mtu, eth_addr, - ifp->if_socket); + ifp->if_socket, NULL); if (vifp) { vifp->if_parent = ifp; if_port_inherit(ifp, vifp); @@ -196,8 +196,8 @@ macvlan_create(struct ifnet *ifp, const char *mvl_name, mvle->ifp = vifp; vifp->if_softc = mvle; - /* For now default to VRRP */ - mvle->mode = MACVLAN_MODE_VRRP; + /* For now default to private */ + mvle->mode = MACVLAN_MODE_PRIVATE; err = macvlan_entry_insert(ifp->if_macvlantbl, mvle); if (err) { free(mvle); @@ -209,7 +209,7 @@ macvlan_create(struct ifnet *ifp, const char *mvl_name, } static void -macvlan_change_addr(struct ifnet *ifp, struct ether_addr *eth_addr) +macvlan_change_addr(struct ifnet *ifp, struct rte_ether_addr *eth_addr) { struct ifnet *pifp = ifp->if_parent; struct mvl_entry *mvle, *omvle = ifp->if_softc; @@ -222,7 +222,7 @@ macvlan_change_addr(struct ifnet *ifp, struct ether_addr *eth_addr) return; } macvlan_del_mac(ifp, &ifp->eth_addr); - memcpy(&ifp->eth_addr, eth_addr, sizeof(struct ether_addr)); + memcpy(&ifp->eth_addr, eth_addr, sizeof(struct rte_ether_addr)); mvle->ifp = ifp; mvle->mode = omvle->mode; @@ -333,7 +333,7 @@ void macvlan_flood(struct ifnet *ifp, struct rte_mbuf *m) return; cds_lfht_for_each_entry(mvlt->mvlt_hash, &iter, mvle, mvl_node) { - if (mvle->mode != MACVLAN_MODE_VRRP) { + if (!is_vrrp_mac_addr(&mvle->ifp->eth_addr)) { struct rte_mbuf *clone = pktmbuf_clone(m, m->pool); if (clone) ether_input(mvle->ifp, clone); @@ -341,7 +341,7 @@ void macvlan_flood(struct ifnet *ifp, struct rte_mbuf *m) } } -struct ifnet *macvlan_get_vrrp_ip_if(struct ifnet *ifp, in_addr_t target) +struct ifnet *macvlan_get_vrrp_ip_if(struct ifnet *ifp, struct sockaddr *target) { struct mvl_tbl *mvlt = rcu_dereference(ifp->if_macvlantbl); struct mvl_entry *mvle; @@ -352,7 +352,7 @@ struct ifnet *macvlan_get_vrrp_ip_if(struct ifnet *ifp, in_addr_t target) return NULL; cds_lfht_for_each_entry(mvlt->mvlt_hash, &iter, mvle, mvl_node) { - if (mvle->mode != MACVLAN_MODE_VRRP) + if (!is_vrrp_mac_addr(&mvle->ifp->eth_addr)) continue; cds_list_for_each_entry_rcu(ifa, &mvle->ifp->if_addrhead, @@ -360,9 +360,16 @@ struct ifnet *macvlan_get_vrrp_ip_if(struct ifnet *ifp, in_addr_t target) struct sockaddr *sa = (struct sockaddr *) &ifa->ifa_addr; - if (sa->sa_family != AF_INET) + if (sa->sa_family != target->sa_family) continue; - if (target != satosin(sa)->sin_addr.s_addr) + if (target->sa_family == AF_INET && + satosin(target)->sin_addr.s_addr != + satosin(sa)->sin_addr.s_addr) + continue; + if (target->sa_family == AF_INET6 && + !IN6_ARE_ADDR_EQUAL( + satosin6(target)->sin6_addr.s6_addr, + satosin6(sa)->sin6_addr.s6_addr)) continue; return mvle->ifp; @@ -378,8 +385,8 @@ struct ifnet *macvlan_get_vrrp_ip_if(struct ifnet *ifp, in_addr_t target) struct ifnet * macvlan_input(struct ifnet *ifp, struct rte_mbuf *m) { - const struct ether_hdr *eth - = rte_pktmbuf_mtod(m, struct ether_hdr *); + const struct rte_ether_hdr *eth + = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); struct mvl_tbl *mvlt = rcu_dereference(ifp->if_macvlantbl); @@ -391,6 +398,21 @@ macvlan_input(struct ifnet *ifp, struct rte_mbuf *m) return ifp; } +static bool +is_vrrp_mac_addr(struct rte_ether_addr *ll_addr) +{ + if (ll_addr->addr_bytes[0] == 0x00 && + ll_addr->addr_bytes[1] == 0x00 && + ll_addr->addr_bytes[2] == 0x5e && + ll_addr->addr_bytes[3] == 0x00 && + ( + ll_addr->addr_bytes[4] == 0x01 || + ll_addr->addr_bytes[4] == 0x02 + )) + return true; + return false; +} + ALWAYS_INLINE struct ifnet *macvlan_check_vrrp_if(struct ifnet *ifp) { @@ -399,7 +421,7 @@ struct ifnet *macvlan_check_vrrp_if(struct ifnet *ifp) if (unlikely(ifp->if_type == IFT_MACVLAN)) { mvle = rcu_dereference(ifp->if_softc); if (likely(mvle != NULL) && - mvle->mode == MACVLAN_MODE_VRRP) + is_vrrp_mac_addr(&mvle->ifp->eth_addr)) return ifp->if_parent; } @@ -407,7 +429,7 @@ struct ifnet *macvlan_check_vrrp_if(struct ifnet *ifp) } struct ifnet *macvlan_get_vrrp_if(const struct ifnet *ifp, - const struct ether_addr *dst_mac) + const struct rte_ether_addr *dst_mac) { struct mvl_tbl *mvlt = rcu_dereference(ifp->if_macvlantbl); @@ -424,17 +446,17 @@ void macvlan_output(struct ifnet *ifp, struct rte_mbuf *mbuf, static int macvlan_if_set_l2_address(struct ifnet *ifp, uint32_t l2_addr_len, void *l2_addr) { - struct ether_addr *macaddr = l2_addr; + struct rte_ether_addr *macaddr = l2_addr; char b1[32], b2[32]; - if (l2_addr_len != ETHER_ADDR_LEN) { + if (l2_addr_len != RTE_ETHER_ADDR_LEN) { RTE_LOG(NOTICE, DATAPLANE, "link address is not ethernet (len=%u)!\n", l2_addr_len); return -EINVAL; } - if (ether_addr_equal(&ifp->eth_addr, macaddr)) + if (rte_ether_addr_equal(&ifp->eth_addr, macaddr)) return 1; RTE_LOG(INFO, DATAPLANE, "%s change MAC from %s to %s\n", @@ -486,12 +508,19 @@ macvlan_if_dump(struct ifnet *ifp, json_writer_t *wr, return 0; } +static enum dp_ifnet_iana_type +macvlan_iana_type(struct ifnet *ifp __unused) +{ + return DP_IFTYPE_IANA_OTHER; +} + static const struct ift_ops macvlan_if_ops = { .ifop_set_l2_address = macvlan_if_set_l2_address, .ifop_start = macvlan_if_start, .ifop_stop = macvlan_if_stop, .ifop_uninit = macvlan_delete, .ifop_dump = macvlan_if_dump, + .ifop_iana_type = macvlan_iana_type, }; static void macvlan_init(void) diff --git a/src/macvlan.h b/src/if/macvlan.h similarity index 69% rename from src/macvlan.h rename to src/if/macvlan.h index 5d576b9c..a78c2d26 100644 --- a/src/macvlan.h +++ b/src/if/macvlan.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -11,14 +11,14 @@ #include -struct ether_addr; +struct rte_ether_addr; struct ifnet; struct mvl_tbl; struct rte_mbuf; struct ifnet * macvlan_create(struct ifnet *ifp, const char *mvl_name, - const struct ether_addr *eth_addr, int ifindex); + const struct rte_ether_addr *eth_addr, int ifindex); void macvlan_table_flush(struct mvl_tbl *mvlt); @@ -27,7 +27,7 @@ void macvlan_flood(struct ifnet *ifp, struct rte_mbuf *m); struct ifnet * -macvlan_get_vrrp_ip_if(struct ifnet *ifp, in_addr_t target); +macvlan_get_vrrp_ip_if(struct ifnet *ifp, struct sockaddr *target); struct ifnet * macvlan_input(struct ifnet *ifp, struct rte_mbuf *m); @@ -36,7 +36,8 @@ struct ifnet * macvlan_check_vrrp_if(struct ifnet *ifp); struct ifnet * -macvlan_get_vrrp_if(const struct ifnet *ifp, const struct ether_addr *dst_mac); +macvlan_get_vrrp_if(const struct ifnet *ifp, + const struct rte_ether_addr *dst_mac); void macvlan_output(struct ifnet *ifp, struct rte_mbuf *mbuf, struct ifnet *input_ifp, uint16_t proto); diff --git a/src/vlan_if.c b/src/if/vlan/vlan_if.c similarity index 84% rename from src/vlan_if.c rename to src/if/vlan/vlan_if.c index dc3c712a..157b96b7 100644 --- a/src/vlan_if.c +++ b/src/if/vlan/vlan_if.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * @@ -15,6 +15,7 @@ #include "qos.h" #include "vlan_if.h" #include "vplane_log.h" +#include "fal.h" /* * Set mac address on a vlan, making sure to register the new mac @@ -24,17 +25,17 @@ static int vlan_if_set_l2_address(struct ifnet *ifp, uint32_t l2_addr_len, void *l2_addr) { - struct ether_addr *macaddr = l2_addr; + struct rte_ether_addr *macaddr = l2_addr; char b1[32], b2[32]; - if (l2_addr_len != ETHER_ADDR_LEN) { + if (l2_addr_len != RTE_ETHER_ADDR_LEN) { RTE_LOG(NOTICE, DATAPLANE, "link address is not ethernet (len=%u)!\n", l2_addr_len); return -EINVAL; } - if (ether_addr_equal(&ifp->eth_addr, macaddr)) + if (rte_ether_addr_equal(&ifp->eth_addr, macaddr)) return 1; RTE_LOG(INFO, DATAPLANE, "%s change MAC from %s to %s\n", @@ -42,14 +43,14 @@ static int vlan_if_set_l2_address(struct ifnet *ifp, uint32_t l2_addr_len, ether_ntoa_r(&ifp->eth_addr, b1), ether_ntoa_r(macaddr, b2)); - if (ether_addr_equal(&ifp->if_parent->eth_addr, macaddr)) { - if (!ether_addr_equal(&ifp->eth_addr, macaddr)) + if (rte_ether_addr_equal(&ifp->if_parent->eth_addr, macaddr)) { + if (!rte_ether_addr_equal(&ifp->eth_addr, macaddr)) if_del_l2_addr(ifp, &ifp->eth_addr); } else { - if (!ether_addr_equal(&ifp->eth_addr, macaddr)) { + if (!rte_ether_addr_equal(&ifp->eth_addr, macaddr)) { if_add_l2_addr(ifp, macaddr); - if (!ether_addr_equal(&ifp->eth_addr, + if (!rte_ether_addr_equal(&ifp->eth_addr, &ifp->if_parent->eth_addr)) if_del_l2_addr(ifp, &ifp->eth_addr); } @@ -109,7 +110,7 @@ vlan_if_delete(struct ifnet *ifp) if_vlan_proto_set(pifp, ETH_P_8021Q); } - if (!ether_addr_equal(&ifp->eth_addr, &pifp->eth_addr)) + if (!rte_ether_addr_equal(&ifp->eth_addr, &pifp->eth_addr)) if_del_l2_addr(ifp, &ifp->eth_addr); if (ifp->qinq_inner) { @@ -152,7 +153,7 @@ vlan_if_create(struct ifnet *ifp, uint16_t vid, return vifp; vifp = if_alloc(ifname, IFT_L2VLAN, ifp->if_mtu, &ifp->eth_addr, - ifp->if_socket); + ifp->if_socket, NULL); if (vifp) { if_port_inherit(ifp, vifp); @@ -239,21 +240,25 @@ vlan_if_dump(struct ifnet *ifp, json_writer_t *wr, return 0; } -static void -vlan_if_create_finished(struct ifnet *ifp, - const struct ether_addr *mac_addr) +static int +vlan_if_l3_enable(struct ifnet *ifp) { - if (!ifp->if_parent || - (ifp->if_parent->if_type != IFT_BRIDGE)) - return; + if (!ifp->if_parent) + return 0; - if_create_l3_intf(ifp, mac_addr); + return if_fal_create_l3_intf(ifp); } -static void -vlan_if_pre_delete(struct ifnet *ifp) +static int +vlan_if_l3_disable(struct ifnet *ifp) +{ + return if_fal_delete_l3_intf(ifp); +} + +static enum dp_ifnet_iana_type +vlan_if_iana_type(struct ifnet *ifp __unused) { - if_delete_l3_intf(ifp); + return DP_IFTYPE_IANA_L2VLAN; } static const struct ift_ops vlan_if_ops = { @@ -262,12 +267,14 @@ static const struct ift_ops vlan_if_ops = { .ifop_del_l2_addr = vlan_if_del_l2_addr, .ifop_start = vlan_if_start, .ifop_stop = vlan_if_stop, - .ifop_pre_uninit = vlan_if_pre_delete, + .ifop_l3_disable = vlan_if_l3_disable, .ifop_uninit = vlan_if_delete, .ifop_set_broadcast = ether_if_set_broadcast, .ifop_set_promisc = vlan_if_set_promisc, .ifop_dump = vlan_if_dump, - .ifop_create_finished = vlan_if_create_finished, + .ifop_get_stats = if_fal_l3_get_stats, + .ifop_l3_enable = vlan_if_l3_enable, + .ifop_iana_type = vlan_if_iana_type, }; static void vlan_init(void) @@ -290,7 +297,7 @@ vlan_callback_mac_addr_changed(struct ifnet *ifp, void *arg) if (!ifp->if_vlan || ifp->if_parent != pifp) return; - if (!ether_addr_equal(&ifp->eth_addr, &pifp->eth_addr)) + if (!rte_ether_addr_equal(&ifp->eth_addr, &pifp->eth_addr)) if_add_l2_addr(ifp, &ifp->eth_addr); else if_del_l2_addr(ifp, &ifp->eth_addr); @@ -300,7 +307,7 @@ static void vlan_event_mac_addr_change(struct ifnet *ifp, __unused const void *l2_addr) { /* sync vlan interface mac addresses */ - ifnet_walk(vlan_callback_mac_addr_changed, ifp); + dp_ifnet_walk(vlan_callback_mac_addr_changed, ifp); } static const struct dp_event_ops vlan_events = { diff --git a/src/vlan_if.h b/src/if/vlan/vlan_if.h similarity index 93% rename from src/vlan_if.h rename to src/if/vlan/vlan_if.h index 44a288f1..97f16bf9 100644 --- a/src/vlan_if.h +++ b/src/if/vlan/vlan_if.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018, AT&T Intellectual Property. + * Copyright (c) 2019, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only diff --git a/src/vlan_if_netlink.c b/src/if/vlan/vlan_if_netlink.c similarity index 95% rename from src/vlan_if_netlink.c rename to src/if/vlan/vlan_if_netlink.c index 98836ef1..0bf0f9d8 100644 --- a/src/vlan_if_netlink.c +++ b/src/if/vlan/vlan_if_netlink.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -86,7 +86,7 @@ static bool change_vlan(struct ifnet *ifp, struct nlattr *tb[], /* not associated with specific downlink */ return changed; - parent_ifp = ifnet_byifindex(iflink); + parent_ifp = dp_ifnet_byifindex(iflink); } if (mnl_attr_parse_nested(kdata, vlaninfo_attr, vlaninfo) != MNL_CB_OK) @@ -145,12 +145,12 @@ void vlan_nl_modify(struct ifnet *ifp, struct ifnet *pifp = ifp->if_parent; bool vlan_changed = false; struct fal_attribute_t vlan_attr = { - FAL_PORT_ATTR_VLAN_ID, }; + .id = FAL_PORT_ATTR_VLAN_ID, }; if (pifp && kind && kdata) vlan_changed = change_vlan(ifp, tb, kind, kdata, cont_src); else if (tb[IFLA_LINK]) { - pifp = ifnet_byifindex( + pifp = dp_ifnet_byifindex( cont_src_ifindex(cont_src, mnl_attr_get_u32(tb[IFLA_LINK]))); diff --git a/src/vxlan.c b/src/if/vxlan.c similarity index 87% rename from src/vxlan.c rename to src/if/vxlan.c index be112aa7..451b7f71 100644 --- a/src/vxlan.c +++ b/src/if/vxlan.c @@ -1,7 +1,7 @@ /* * VXLAN forwarding database * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -42,14 +42,14 @@ #include #include -#include "bridge_port.h" #include "capture.h" #include "compat.h" #include "compiler.h" -#include "config.h" +#include "config_internal.h" #include "crypto/crypto_forward.h" #include "dp_event.h" #include "ether.h" +#include "if/bridge/bridge_port.h" #include "if_var.h" #include "in6.h" #include "in_cksum.h" @@ -61,9 +61,9 @@ #include "mpls/mpls.h" #include "netinet6/ip6_funcs.h" #include "netinet6/route_v6.h" -#include "nh.h" +#include "nh_common.h" #include "nsh.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" #include "route.h" @@ -75,7 +75,7 @@ #include "util.h" #include "vplane_debug.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" #include "vxlan.h" #define VXLAN_VNI_SIZE 3 @@ -147,17 +147,6 @@ unsigned long vxlan_stats[RTE_MAX_LCORE][VXLAN_STATS_MAX] __rte_cache_aligned; /* Table of active VNIs */ static struct vxlan_vnitbl *vxlans; -/* - * Forward references - */ -static void vxlan_timer(struct rte_timer *, void *); -static void vxlan_rtupdate(struct ifnet *ifp, - struct ip_addr *addr, - const struct ether_addr *dst); -static struct vxlan_rtnode * -vxlan_rtnode_lookup(struct vxlan_softc *sc, - const struct ether_addr *addr); - /* * VNI Table functions */ @@ -187,8 +176,7 @@ vxlan_vni_lookup(uint32_t vni) if (node) return caa_container_of(node, struct vxlan_vninode, vni_node); - else - return NULL; + return NULL; } /* Insert the specified vxlan node into the VNI table. */ @@ -221,16 +209,6 @@ vxlan_vni_destroy(struct vxlan_vninode *vni) call_rcu(&vni->vni_rcu, vxlan_vni_free); } -struct ifnet *vxlan_find_if(uint32_t vni) -{ - struct vxlan_vninode *vni_node = vxlan_vni_lookup(vni); - - if (vni_node) - return vni_node->ifp; - - return NULL; -} - ALWAYS_INLINE uint32_t vxlan_get_vni(struct ifnet *ifp) { @@ -319,8 +297,8 @@ uint16_t vxlan_get_src_port(struct vxlan_vninode *vnode, uint8_t *entropy, static ALWAYS_INLINE int vxlan_ipv4_set_encap(struct vxlan_vninode *vnode, struct rte_mbuf *m, uint8_t tos, struct ip_addr *sip, - struct ip_addr *dip, struct udp_hdr **udp, - struct vxlan_hdr **vxhdr) + struct ip_addr *dip, struct rte_udp_hdr **udp, + struct rte_vxlan_hdr **vxhdr) { uint16_t orig_pkt_data_len = rte_pktmbuf_pkt_len(m); struct iphdr *iph; @@ -333,10 +311,10 @@ int vxlan_ipv4_set_encap(struct vxlan_vninode *vnode, struct rte_mbuf *m, return -ENOMEM; /* Update L2 length in packet as vxlan_ipv4_encap includes ether_hdr */ - pktmbuf_l2_len(m) = ETHER_HDR_LEN; + dp_pktmbuf_l2_len(m) = RTE_ETHER_HDR_LEN; /* ethernet header */ - vhdr->ether_header.ether_type = htons(ETHER_TYPE_IPv4); + vhdr->ether_header.ether_type = htons(RTE_ETHER_TYPE_IPV4); /* IPv4 header construction */ iph = &vhdr->ip_header; @@ -356,12 +334,12 @@ int vxlan_ipv4_set_encap(struct vxlan_vninode *vnode, struct rte_mbuf *m, iph->frag_off = htons(IP_DF); iph->protocol = IPPROTO_UDP; iph->tot_len = htons(sizeof(vhdr->ip_header) + - sizeof(struct udp_hdr) + - sizeof(struct vxlan_hdr) + + sizeof(struct rte_udp_hdr) + + sizeof(struct rte_vxlan_hdr) + orig_pkt_data_len); iph->saddr = sip->address.ip_v4.s_addr; iph->daddr = dip->address.ip_v4.s_addr; - iph->check = in_cksum_hdr(iph); + iph->check = dp_in_cksum_hdr(iph); *udp = &vhdr->udp_header; *vxhdr = &vhdr->vxlan_header; @@ -371,8 +349,8 @@ int vxlan_ipv4_set_encap(struct vxlan_vninode *vnode, struct rte_mbuf *m, static ALWAYS_INLINE int vxlan_ipv6_set_encap(struct vxlan_vninode *vnode, struct rte_mbuf *m, uint8_t tc, struct ip_addr *sip, - struct ip_addr *dip, struct udp_hdr **udp, - struct vxlan_hdr **vxhdr) + struct ip_addr *dip, struct rte_udp_hdr **udp, + struct rte_vxlan_hdr **vxhdr) { uint16_t orig_pkt_data_len = rte_pktmbuf_pkt_len(m); struct ip6_hdr *ip6h; @@ -386,10 +364,10 @@ int vxlan_ipv6_set_encap(struct vxlan_vninode *vnode, struct rte_mbuf *m, return -ENOMEM; /* Update L2 length in packet as vxlan_ipv6_encap includes ether_hdr */ - pktmbuf_l2_len(m) = ETHER_HDR_LEN; + dp_pktmbuf_l2_len(m) = RTE_ETHER_HDR_LEN; /* ethernet header */ - vhdr->ether_header.ether_type = htons(ETHER_TYPE_IPv6); + vhdr->ether_header.ether_type = htons(RTE_ETHER_TYPE_IPV6); /* IPv6 header construction */ ip6h = &vhdr->ip6_header; @@ -402,8 +380,8 @@ int vxlan_ipv6_set_encap(struct vxlan_vninode *vnode, struct rte_mbuf *m, ip6h->ip6_hlim = IPV6_DEFAULT_HOPLIMIT; ip6h->ip6_src = sip->address.ip_v6; ip6h->ip6_dst = dip->address.ip_v6; - ip6h->ip6_plen = htons(sizeof(struct udp_hdr) + - sizeof(struct vxlan_hdr) + + ip6h->ip6_plen = htons(sizeof(struct rte_udp_hdr) + + sizeof(struct rte_vxlan_hdr) + orig_pkt_data_len); *udp = &vhdr->udp_header; *vxhdr = &vhdr->vxlan_header; @@ -414,14 +392,14 @@ static ALWAYS_INLINE void vxlan_udp_encap(struct vxlan_vninode *vnode, uint16_t orig_len, struct rte_mbuf *m, uint8_t *entropy, uint32_t entropy_len, - struct udp_hdr *udp, enum vxlan_type vxl_type) + struct rte_udp_hdr *udp, enum vxlan_type vxl_type) { uint16_t pkt_len; /* UDP header */ pkt_len = (uint16_t) - (sizeof(struct udp_hdr) + - sizeof(struct vxlan_hdr) + + (sizeof(struct rte_udp_hdr) + + sizeof(struct rte_vxlan_hdr) + orig_len); /* TBD: With GPE, source port calculation needs to change for other @@ -439,7 +417,7 @@ void vxlan_udp_encap(struct vxlan_vninode *vnode, uint16_t orig_len, static ALWAYS_INLINE int vxlan_vhdr_encap(struct vxlan_vninode *vnode, - struct vxlan_hdr *vhdr, + struct rte_vxlan_hdr *vhdr, enum vxlan_type vxl_type, enum vgpe_nxt_proto nxt_proto, bool oam) @@ -475,8 +453,8 @@ int vxlan_encap(struct vxlan_vninode *vnode, struct ip_addr *sip, { int err = 0; uint16_t orig_pkt_data_len = rte_pktmbuf_pkt_len(m); - struct udp_hdr *udp; - struct vxlan_hdr *vxh; + struct rte_udp_hdr *udp; + struct rte_vxlan_hdr *vxh; if (dip->type == AF_INET) err = vxlan_ipv4_set_encap(vnode, m, tos_tc, sip, dip, &udp, @@ -506,11 +484,11 @@ int vxlan_select_ipv4_src(struct vxlan_vninode *vnode, struct ip_addr *dip, struct ifnet *dif; /* Lookup destination */ - nxt = rt_lookup(dip->address.ip_v4.s_addr, RT_TABLE_MAIN, m); + nxt = dp_rt_lookup(dip->address.ip_v4.s_addr, RT_TABLE_MAIN, m); if (unlikely(nxt == NULL)) return -ENOENT; - dif = nh4_get_ifp(nxt); + dif = dp_nh_get_ifp(nxt); if (unlikely(dif == NULL)) return -ENOENT; @@ -521,7 +499,7 @@ int vxlan_select_ipv4_src(struct vxlan_vninode *vnode, struct ip_addr *dip, /* Store next hop address */ if (nxt->flags & RTF_GATEWAY) - nhip->address.ip_v4.s_addr = nxt->gateway; + nhip->address.ip_v4.s_addr = nxt->gateway.address.ip_v4.s_addr; else nhip->address.ip_v4.s_addr = dip->address.ip_v4.s_addr; @@ -544,15 +522,15 @@ int vxlan_select_ipv6_src(struct vxlan_vninode *vnode, struct ip_addr *dip, struct ifnet **oifp, struct ip_addr *sip, struct ip_addr *nhip) { - struct next_hop_v6 *nxt6; + struct next_hop *nxt6; const struct in6_addr *saddr_v6; struct ifnet *dif; - nxt6 = rt6_lookup(&dip->address.ip_v6, RT_TABLE_MAIN, m); + nxt6 = dp_rt6_lookup(&dip->address.ip_v6, RT_TABLE_MAIN, m); if (unlikely(nxt6 == NULL)) return -ENOENT; - dif = nh6_get_ifp(nxt6); + dif = dp_nh_get_ifp(nxt6); if (unlikely(dif == NULL)) return -ENOENT; @@ -562,7 +540,7 @@ int vxlan_select_ipv6_src(struct vxlan_vninode *vnode, struct ip_addr *dip, *oifp = dif; if (nxt6->flags & RTF_GATEWAY) - nhip->address.ip_v6 = nxt6->gateway; + nhip->address.ip_v6 = nxt6->gateway.address.ip_v6; else nhip->address.ip_v6 = dip->address.ip_v6; @@ -653,22 +631,22 @@ void vxlan_query_payload_ip4(struct iphdr *iph, uint8_t *tos, } static ALWAYS_INLINE -void vxlan_query_payload_eth(struct ether_hdr *eh, uint8_t *tos_tc, +void vxlan_query_payload_eth(struct rte_ether_hdr *eh, uint8_t *tos_tc, uint8_t **entropy, uint32_t *entropy_len) { struct ip6_hdr *ip6h; struct iphdr *iph; - if (eh->ether_type == htons(ETHER_TYPE_IPv4)) { + if (eh->ether_type == htons(RTE_ETHER_TYPE_IPV4)) { iph = (struct iphdr *)((uintptr_t)eh + sizeof(*eh)); vxlan_query_payload_ip4(iph, tos_tc, NULL, NULL); - } else if (eh->ether_type == htons(ETHER_TYPE_IPv6)) { + } else if (eh->ether_type == htons(RTE_ETHER_TYPE_IPV6)) { ip6h = (struct ip6_hdr *)((uintptr_t)eh + sizeof(*eh)); vxlan_query_payload_ip6(ip6h, tos_tc, NULL, NULL); } *entropy = (uint8_t *)eh; - *entropy_len = ETHER_ADDR_LEN * 2; + *entropy_len = RTE_ETHER_ADDR_LEN * 2; } /* @@ -760,21 +738,20 @@ static int vxlan_resolve_send_pak(struct rte_mbuf *m, struct ip_addr *nhip, { if (likely(dip->type == AF_INET)) { struct next_hop nh = {.flags = RTF_GATEWAY, - .gateway = nhip->address.ip_v4.s_addr, + .gateway = *nhip, .u.ifp = dif}; - if (!ip_l2_resolve_and_output(ifp, m, - &nh, ETH_P_IP)) { + if (!dp_ip_l2_nh_output(ifp, m, &nh, ETH_P_IP)) { VXLAN_STAT_INC(VXLAN_STATS_OUTDISCARDS_ARP_FAILED); goto err; } - IPSTAT_INC_IFP(ifp, IPSTATS_MIB_OUTPKTS); + IPSTAT_INC_IFP(dif, IPSTATS_MIB_OUTPKTS); } else if (likely(dip->type == AF_INET6)) { - struct next_hop_v6 nh = {.flags = RTF_GATEWAY, - .gateway = nhip->address.ip_v6, - .u.ifp = dif}; + struct next_hop nh = {.flags = RTF_GATEWAY, + .gateway = *nhip, + .u.ifp = dif}; - if (!ip6_l2_resolve_and_output(ifp, m, &nh, ETH_P_IPV6)) { + if (!dp_ip6_l2_nh_output(ifp, m, &nh, ETH_P_IPV6)) { VXLAN_STAT_INC(VXLAN_STATS_OUTDISCARDS_ND_FAILED); goto err; } @@ -851,63 +828,148 @@ vxlan_send_packet(struct ifnet *ifp, uint32_t vni, struct ip_addr *dip, return -EFAULT; } -/* Use this API for sending already encapped vxlan packet */ -void vxlan_send_encapped(struct rte_mbuf *m, struct ifnet *ifp, uint8_t af) +/* + * FDB functions + */ +/* Given key (ether address) generate a hash */ +static inline unsigned long +vxlan_rtnode_hash(const struct rte_ether_addr *key) { - struct ifnet *dif = NULL; - uint32_t vni; - struct vxlan_vninode *vnode; - struct ip_addr sip, dip, nhip; - int err; + return eth_addr_hash(key, VXLAN_RTHASH_BITS); +} - struct iphdr *ip; - struct ip6_hdr *ip6; +/* Test if ether address matches value for this entry */ +static int vxlan_rtnode_match(struct cds_lfht_node *node, + const void *key) +{ + const struct vxlan_rtnode *vxlrt + = caa_container_of(node, const struct vxlan_rtnode, vxlrt_node); - if (!ifp) - return; + return rte_ether_addr_equal(&vxlrt->vxlrt_addr, key); +} - vni = vxlan_get_vni(ifp); +/* Look up a vxlan route node for the specified destination. */ +static struct vxlan_rtnode * +vxlan_rtnode_lookup(struct vxlan_softc *sc, + const struct rte_ether_addr *addr) +{ + struct cds_lfht_iter iter; - if (af == AF_INET) { - ip = iphdr(m); - dip.type = AF_INET; - dip.address.ip_v4.s_addr = ip->daddr; + cds_lfht_lookup(sc->scvx_rthash, + vxlan_rtnode_hash(addr), + vxlan_rtnode_match, addr, &iter); - } else { - ip6 = ip6hdr(m); - dip.type = AF_INET6; - dip.address.ip_v6 = ip6->ip6_dst; + struct cds_lfht_node *node = cds_lfht_iter_get_node(&iter); + + if (node) + return caa_container_of(node, struct vxlan_rtnode, vxlrt_node); + return NULL; +} + +/* Insert the specified vxlan node into the route table. */ +static int +vxlan_rtnode_insert(struct vxlan_softc *sc, struct vxlan_rtnode *vxlrt) +{ + struct cds_lfht_node *ret_node; + + cds_lfht_node_init(&vxlrt->vxlrt_node); + + unsigned long hash = vxlan_rtnode_hash(&vxlrt->vxlrt_addr); + + ret_node = cds_lfht_add_unique(sc->scvx_rthash, hash, + vxlan_rtnode_match, &vxlrt->vxlrt_addr, + &vxlrt->vxlrt_node); + return (ret_node != &vxlrt->vxlrt_node) ? EEXIST : 0; +} + +/* Update existing forwarding table entry */ +static void +vxlan_rtupdate(struct ifnet *ifp, + struct ip_addr *addr, + const struct rte_ether_addr *dst) +{ + struct vxlan_softc *sc = ifp->if_softc; + struct vxlan_rtnode *vxlrt; + + /* + * A route for this destination might already exist. If so, + * update it. + */ + vxlrt = vxlan_rtnode_lookup(sc, dst); + if (unlikely(vxlrt == NULL)) { + vxlrt = zmalloc_aligned(sizeof(*vxlrt)); + if (unlikely(vxlrt == NULL)) + return; + + vxlrt->vxlrt_flags = IFBAF_DYNAMIC; + if (addr->type == AF_INET) { + vxlrt->vxlrt_dst = addr->address.ip_v4; + vxlrt->vxlrt_flags |= IFBAF_ADDR_V4; + } else { + vxlrt->vxlrt_dst_v6 = addr->address.ip_v6; + vxlrt->vxlrt_flags |= IFBAF_ADDR_V6; + } + vxlrt->vxlrt_addr = *dst; + vxlrt->vxlrt_expire = 0; + + if (vxlan_rtnode_insert(sc, vxlrt) != 0) { + free(vxlrt); + return; + } + } else if ((vxlrt->vxlrt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { + if (addr->type == AF_INET) { + vxlrt->vxlrt_dst = addr->address.ip_v4; + vxlrt->vxlrt_flags |= IFBAF_ADDR_V4; + } else { + vxlrt->vxlrt_dst_v6 = addr->address.ip_v6; + vxlrt->vxlrt_flags |= IFBAF_ADDR_V6; + } } - vnode = vxlan_vni_lookup(vni); - if (unlikely(vnode == NULL)) - goto drop; - err = vxlan_select_src(vnode, &dip, m, &dif, &sip, &nhip); - if (unlikely(err != 0)) - goto drop; + /* Entry is marked used */ + rte_atomic32_clear(&vxlrt->vxlrt_unused); +} - (void)vxlan_resolve_send_pak(m, &nhip, &dip, ifp, dif); - return; - drop: - rte_pktmbuf_free(m); - IPSTAT_INC_IFP(ifp, IPSTATS_MIB_OUTDISCARDS); - if_incr_dropped(ifp); +static void +vxlan_rtnode_free(struct rcu_head *head) +{ + free(caa_container_of(head, struct vxlan_rtnode, vxlrt_rcu)); +} + +/* Destroy a vxlan rtnode. */ +static void +vxlan_rtnode_destroy(struct vxlan_rtnode *vxlrt) +{ + call_rcu(&vxlrt->vxlrt_rcu, vxlan_rtnode_free); +} + +/* Create lock free hash table. */ +static void +vxlan_rtable_init(struct vxlan_softc *sc) +{ + sc->scvx_rthash = cds_lfht_new(VXLAN_RTHASH_MIN, + VXLAN_RTHASH_MIN, + VXLAN_RTHASH_MAX, + CDS_LFHT_AUTO_RESIZE, + NULL); + if (sc->scvx_rthash == NULL) + rte_panic("Can't allocate rthash\n"); } static void vxlan_snoop(enum vgpe_nxt_proto nxtproto, struct ifnet *ifp, struct rte_mbuf *m __unused, - uint16_t ether_type, void *l3hdr, struct vxlan_hdr *vxlan) + uint16_t ether_type, void *l3hdr, struct rte_vxlan_hdr *vxlan) { void *vxlan_end = vxlan + 1; struct ip_addr ipaddr; - if (ether_type == htons(ETHER_TYPE_IPv4)) { + if (ether_type == htons(RTE_ETHER_TYPE_IPV4)) { const struct iphdr *oip = l3hdr; ipaddr.type = AF_INET; ipaddr.address.ip_v4.s_addr = oip->saddr; - } else if (ether_type == htons(ETHER_TYPE_IPv6)) { + } else if (ether_type == htons(RTE_ETHER_TYPE_IPV6)) { const struct ip6_hdr *oip6 = l3hdr; ipaddr.type = AF_INET6; @@ -916,7 +978,7 @@ vxlan_snoop(enum vgpe_nxt_proto nxtproto, struct ifnet *ifp, return; /* where is the inner ether header? */ - struct ether_hdr *eh; + struct rte_ether_hdr *eh; if (nxtproto == VGPE_NXT_NONE || nxtproto == VGPE_NXT_ETHER) /* trivial for vxlan, or vxlan-gpe followed by ether */ @@ -937,13 +999,13 @@ vxlan_snoop(enum vgpe_nxt_proto nxtproto, struct ifnet *ifp, /* Don't learn my own address, * other side might be setup the same way */ - if (unlikely(ether_addr_equal(&eh->s_addr, &ifp->eth_addr))) + if (unlikely(rte_ether_addr_equal(&eh->s_addr, &ifp->eth_addr))) return; struct bridge_port *brport = rcu_dereference(ifp->if_brport); if (unlikely(brport && - ether_addr_equal( + rte_ether_addr_equal( &eh->s_addr, &bridge_port_get_bridge(brport)->eth_addr))) return; @@ -960,17 +1022,17 @@ vxlan_recv_encap(struct rte_mbuf *m, uint16_t ether_type, uint32_t vni; int cntr = VXLAN_STATS_INPKTS; unsigned int udp_encap_len; - struct vxlan_hdr *vxhdr; + struct rte_vxlan_hdr *vxhdr; uint32_t vx_flags; uint16_t hdr_len; udp_encap_len = (char *)(udp + 1) - rte_pktmbuf_mtod(m, char *); - hdr_len = udp_encap_len + sizeof(struct vxlan_hdr); + hdr_len = udp_encap_len + sizeof(struct rte_vxlan_hdr); if (rte_pktmbuf_data_len(m) < hdr_len) { cntr = VXLAN_STATS_INDISCARDS_BADHEADER; goto drop; } - vxhdr = (struct vxlan_hdr *)(udp + 1); + vxhdr = (struct rte_vxlan_hdr *)(udp + 1); vni = ntohl(vxhdr->vx_vni); if (vni & 0xff) { @@ -1025,12 +1087,12 @@ vxlan_recv_encap(struct rte_mbuf *m, uint16_t ether_type, switch (vxl_type) { case VXLAN_L2: - if (rte_pktmbuf_data_len(m) < sizeof(struct ether_hdr)) { + if (rte_pktmbuf_data_len(m) < sizeof(struct rte_ether_hdr)) { cntr = VXLAN_STATS_INDISCARDS_SHORTPAYLOAD; goto drop; } - set_spath_rx_meta_data(m, ifp, ETHER_TYPE_TEB, + set_spath_rx_meta_data(m, ifp, RTE_ETHER_TYPE_TEB, TUN_META_FLAGS_DEFAULT); ether_input(ifp, m); return; @@ -1038,18 +1100,18 @@ vxlan_recv_encap(struct rte_mbuf *m, uint16_t ether_type, switch (nxtproto) { case VGPE_NXT_ETHER: if (rte_pktmbuf_data_len(m) < - sizeof(struct ether_hdr)) { + sizeof(struct rte_ether_hdr)) { cntr = VXLAN_STATS_INDISCARDS_SHORTPAYLOAD; goto drop; } - set_spath_rx_meta_data(m, ifp, ETHER_TYPE_TEB, + set_spath_rx_meta_data(m, ifp, RTE_ETHER_TYPE_TEB, TUN_META_FLAGS_DEFAULT); ether_input(ifp, m); return; case VGPE_NXT_IPV4: /* The vxlan payload had no L2 header, add one now. */ - if (ethhdr_prepend(m, ETHER_TYPE_IPv4) == NULL) { + if (ethhdr_prepend(m, RTE_ETHER_TYPE_IPV4) == NULL) { cntr = VXLAN_STATS_INDISCARDS_PKT_HEADROOM; goto drop; } @@ -1059,14 +1121,14 @@ vxlan_recv_encap(struct rte_mbuf *m, uint16_t ether_type, .l2_pkt_type = L2_PKT_UNICAST, .in_ifp = ifp, }; - set_spath_rx_meta_data(m, ifp, ETHER_TYPE_IPv4, + set_spath_rx_meta_data(m, ifp, RTE_ETHER_TYPE_IPV4, TUN_META_FLAGS_DEFAULT); if (unlikely(ifp->capturing)) capture_burst(ifp, &m, 1); pipeline_fused_ipv4_validate(&pl_pkt); return; case VGPE_NXT_IPV6: { - if (ethhdr_prepend(m, ETHER_TYPE_IPv6) == NULL) { + if (ethhdr_prepend(m, RTE_ETHER_TYPE_IPV6) == NULL) { cntr = VXLAN_STATS_INDISCARDS_PKT_HEADROOM; goto drop; } @@ -1075,7 +1137,7 @@ vxlan_recv_encap(struct rte_mbuf *m, uint16_t ether_type, .mbuf = m, .in_ifp = ifp, }; - set_spath_rx_meta_data(m, ifp, ETHER_TYPE_IPv6, + set_spath_rx_meta_data(m, ifp, RTE_ETHER_TYPE_IPV6, TUN_META_FLAGS_DEFAULT); if (unlikely(ifp->capturing)) capture_burst(ifp, &m, 1); @@ -1109,7 +1171,7 @@ static int vxlan_recv_encap_ipv4(struct rte_mbuf *m, return 0; } - vxlan_recv_encap(m, htons(ETHER_TYPE_IPv4), ip, udp); + vxlan_recv_encap(m, htons(RTE_ETHER_TYPE_IPV4), ip, udp); return 0; } @@ -1120,7 +1182,7 @@ static int vxlan_recv_encap_ipv6(struct rte_mbuf *m, { struct ip6_hdr *ip6 = l3hdr; - vxlan_recv_encap(m, htons(ETHER_TYPE_IPv6), ip6, udp); + vxlan_recv_encap(m, htons(RTE_ETHER_TYPE_IPV6), ip6, udp); return 0; } @@ -1128,7 +1190,7 @@ static int vxlan_recv_encap_ipv6(struct rte_mbuf *m, void vxlan_output(struct ifnet *ifp, struct rte_mbuf *m, uint16_t proto) { - const struct ether_hdr *eh; + const struct rte_ether_hdr *eh; struct vxlan_softc *sc = ifp->if_softc; struct vxlan_rtnode *vxlrt = NULL; struct ip_addr dip; @@ -1154,7 +1216,7 @@ vxlan_output(struct ifnet *ifp, struct rte_mbuf *m, uint16_t proto) eh->ether_type != htons(ETH_P_IPV6))) { nxtproto = (vxl_type == VXLAN_L2) ? VGPE_NXT_NONE : VGPE_NXT_ETHER; - is_multicast = is_multicast_ether_addr(&eh->d_addr); + is_multicast = rte_is_multicast_ether_addr(&eh->d_addr); vxlrt = vxlan_rtnode_lookup(sc, &eh->d_addr); if (vxlrt) { if (vxlrt->vxlrt_flags & IFBAF_ADDR_V4) { @@ -1197,135 +1259,6 @@ vxlan_output(struct ifnet *ifp, struct rte_mbuf *m, uint16_t proto) rte_pktmbuf_free(m); } -/* - * FDB functions - */ -/* Given key (ether address) generate a hash */ -static inline unsigned long -vxlan_rtnode_hash(const struct ether_addr *key) -{ - return eth_addr_hash(key, VXLAN_RTHASH_BITS); -} - -/* Test if ether address matches value for this entry */ -static int vxlan_rtnode_match(struct cds_lfht_node *node, - const void *key) -{ - const struct vxlan_rtnode *vxlrt - = caa_container_of(node, const struct vxlan_rtnode, vxlrt_node); - - return ether_addr_equal(&vxlrt->vxlrt_addr, key); -} - -/* Look up a vxlan route node for the specified destination. */ -static struct vxlan_rtnode * -vxlan_rtnode_lookup(struct vxlan_softc *sc, - const struct ether_addr *addr) -{ - struct cds_lfht_iter iter; - - cds_lfht_lookup(sc->scvx_rthash, - vxlan_rtnode_hash(addr), - vxlan_rtnode_match, addr, &iter); - - struct cds_lfht_node *node = cds_lfht_iter_get_node(&iter); - - if (node) - return caa_container_of(node, struct vxlan_rtnode, vxlrt_node); - else - return NULL; -} - -/* Insert the specified vxlan node into the route table. */ -static int -vxlan_rtnode_insert(struct vxlan_softc *sc, struct vxlan_rtnode *vxlrt) -{ - struct cds_lfht_node *ret_node; - - cds_lfht_node_init(&vxlrt->vxlrt_node); - - unsigned long hash = vxlan_rtnode_hash(&vxlrt->vxlrt_addr); - - ret_node = cds_lfht_add_unique(sc->scvx_rthash, hash, - vxlan_rtnode_match, &vxlrt->vxlrt_addr, - &vxlrt->vxlrt_node); - return (ret_node != &vxlrt->vxlrt_node) ? EEXIST : 0; -} - -/* Update existing forwarding table entry */ -static void -vxlan_rtupdate(struct ifnet *ifp, - struct ip_addr *addr, - const struct ether_addr *dst) -{ - struct vxlan_softc *sc = ifp->if_softc; - struct vxlan_rtnode *vxlrt; - - /* - * A route for this destination might already exist. If so, - * update it. - */ - vxlrt = vxlan_rtnode_lookup(sc, dst); - if (unlikely(vxlrt == NULL)) { - vxlrt = zmalloc_aligned(sizeof(*vxlrt)); - if (unlikely(vxlrt == NULL)) - return; - - vxlrt->vxlrt_flags = IFBAF_DYNAMIC; - if (addr->type == AF_INET) { - vxlrt->vxlrt_dst = addr->address.ip_v4; - vxlrt->vxlrt_flags |= IFBAF_ADDR_V4; - } else { - vxlrt->vxlrt_dst_v6 = addr->address.ip_v6; - vxlrt->vxlrt_flags |= IFBAF_ADDR_V6; - } - vxlrt->vxlrt_addr = *dst; - vxlrt->vxlrt_expire = 0; - - if (vxlan_rtnode_insert(sc, vxlrt) != 0) { - free(vxlrt); - return; - } - } else if ((vxlrt->vxlrt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) { - if (addr->type == AF_INET) { - vxlrt->vxlrt_dst = addr->address.ip_v4; - vxlrt->vxlrt_flags |= IFBAF_ADDR_V4; - } else { - vxlrt->vxlrt_dst_v6 = addr->address.ip_v6; - vxlrt->vxlrt_flags |= IFBAF_ADDR_V6; - } - } - - /* Entry is marked used */ - rte_atomic32_clear(&vxlrt->vxlrt_unused); -} - -static void -vxlan_rtnode_free(struct rcu_head *head) -{ - free(caa_container_of(head, struct vxlan_rtnode, vxlrt_rcu)); -} - -/* Destroy a vxlan rtnode. */ -static void -vxlan_rtnode_destroy(struct vxlan_rtnode *vxlrt) -{ - call_rcu(&vxlrt->vxlrt_rcu, vxlan_rtnode_free); -} - -/* Create lock free hash table. */ -static void -vxlan_rtable_init(struct vxlan_softc *sc) -{ - sc->scvx_rthash = cds_lfht_new(VXLAN_RTHASH_MIN, - VXLAN_RTHASH_MIN, - VXLAN_RTHASH_MAX, - CDS_LFHT_AUTO_RESIZE, - NULL); - if (sc->scvx_rthash == NULL) - rte_panic("Can't allocate rthash\n"); -} - /* Should route entry be expired? * For dynamic entries only, check if it has been used. * for more than VXLAN_RTABLE_EXPIRE intervals. @@ -1351,14 +1284,14 @@ static void vxlan_timer(struct rte_timer *timer __rte_unused, void *arg) struct cds_lfht_iter iter; struct vxlan_rtnode *vxlrt; - rcu_read_lock(); + dp_rcu_read_lock(); cds_lfht_for_each_entry(sc->scvx_rthash, &iter, vxlrt, vxlrt_node) { if (vxlan_rtexpired(vxlrt)) { cds_lfht_del(sc->scvx_rthash, &vxlrt->vxlrt_node); vxlan_rtnode_destroy(vxlrt); } } - rcu_read_unlock(); + dp_rcu_read_unlock(); } @@ -1463,7 +1396,7 @@ vxlaninfo_attr(const struct nlattr *attr, void *data) return MNL_CB_OK; } -static void set_vxlan_params(struct ifnet *ifp, +static bool set_vxlan_params(struct ifnet *ifp, struct vxlan_vninode *vninode, struct nlattr **vxlaninfo, struct nlattr *tb[], uint flags __unused) @@ -1476,9 +1409,17 @@ static void set_vxlan_params(struct ifnet *ifp, if (vxlaninfo[IFLA_VXLAN_LINK]) { uint32_t pifi = mnl_attr_get_u32(vxlaninfo[IFLA_VXLAN_LINK]); - struct ifnet *pifp = ifnet_byifindex(pifi); + struct ifnet *pifp = dp_ifnet_byifindex(pifi); + + if (!pifp) { + RTE_LOG(ERR, VXLAN, + "vxlan %s(%u) missing parent interface\n", + ifp->if_name, ifp->if_index); + return false; + } ifp->if_parent = pifp; + /* Only use link MTU if MTU not explicitly configured */ if (!tb[IFLA_MTU]) ifp->if_mtu = pifp->if_mtu - VXLAN_OVERHEAD; @@ -1518,6 +1459,8 @@ static void set_vxlan_params(struct ifnet *ifp, /* TODO: dynamically allocate source port range */ vninode->port_low = VXLAN_PORT_LOW; vninode->port_high = VXLAN_PORT_HIGH; + + return true; } /* Handle RTM_NEWLINK netlink on existing vxlan interface */ @@ -1547,7 +1490,12 @@ void vxlan_modify(struct ifnet *ifp, uint flags, struct nlattr *tb[], ifp->if_name, vni); return; } - set_vxlan_params(ifp, vninode, vxlaninfo, tb, flags); + if (!set_vxlan_params(ifp, + vninode, vxlaninfo, tb, flags)){ + RTE_LOG(ERR, VXLAN, "%s failed to set VXLAN parameters\n", + ifp->if_name); + return; + } } static bool @@ -1564,7 +1512,7 @@ setup_vxlan(struct ifnet *ifp, uint flags, mnl_attr_get_u32(tb[IFLA_LINK])); if (iflink != 0) { - struct ifnet *pifp = ifnet_byifindex(iflink); + struct ifnet *pifp = dp_ifnet_byifindex(iflink); if (pifp) ifp->if_parent = pifp; @@ -1621,16 +1569,22 @@ setup_vxlan(struct ifnet *ifp, uint flags, sc = ifp->if_softc; sc->scvx_vni = vni; - set_vxlan_params(ifp, vninode, vxlaninfo, tb, flags); + if (!set_vxlan_params(ifp, + vninode, vxlaninfo, tb, flags)){ + RTE_LOG(ERR, VXLAN, "%s failed to set vxlan parameters\n", + ifp->if_name); + return false; + } + return true; } /* Create vxlan in response to netlink */ struct ifnet * vxlan_create(const struct ifinfomsg *ifi, const char *ifname, - const struct ether_addr *addr, + const struct rte_ether_addr *addr, struct nlattr *tb[], struct nlattr *data, - enum cont_src_en cont_src, const struct nlmsghdr *nlh) + enum cont_src_en cont_src) { struct nlattr *vxlaninfo[IFLA_VXLAN_MAX+1] = { NULL }; struct ifnet *ifp; @@ -1652,26 +1606,20 @@ vxlan_create(const struct ifinfomsg *ifi, const char *ifname, if_link = vxlaninfo[IFLA_VXLAN_LINK]; if (if_link) { - unsigned int link_idx, if_idx; + unsigned int link_idx; link_idx = cont_src_ifindex(cont_src, mnl_attr_get_u32(if_link)); - if_idx = cont_src_ifindex(cont_src, - ifi->ifi_index); if (link_idx != 0) { - struct ifnet *pifp = ifnet_byifindex(link_idx); + struct ifnet *pifp = dp_ifnet_byifindex(link_idx); - if (!pifp) { - missed_nl_child_link_add(link_idx, - if_idx, - nlh); + if (!pifp) return NULL; - } } } - ifp = if_alloc(ifname, IFT_VXLAN, mtu, addr, SOCKET_ID_ANY); + ifp = if_alloc(ifname, IFT_VXLAN, mtu, addr, SOCKET_ID_ANY, NULL); if (!ifp) { RTE_LOG(ERR, DATAPLANE, "out of memory for vxlan_ifnet\n"); @@ -1690,7 +1638,7 @@ vxlan_create(const struct ifinfomsg *ifi, const char *ifname, return ifp; } -static int vxlan_if_init(struct ifnet *ifp) +static int vxlan_if_init(struct ifnet *ifp, void *ctx __unused) { struct vxlan_softc *sc; @@ -1746,15 +1694,14 @@ static uint8_t ndmstate_to_flags(uint16_t state) { if (state & NUD_PERMANENT) return IFBAF_LOCAL; - else if (state & NUD_NOARP) + if (state & NUD_NOARP) return IFBAF_STATIC; - else - return IFBAF_DYNAMIC; + return IFBAF_DYNAMIC; } static void vxlan_newneigh(int ifindex, struct in_addr *addr, - const struct ether_addr *dst, + const struct rte_ether_addr *dst, uint16_t state) { struct ifnet *ifp; @@ -1762,7 +1709,7 @@ static void vxlan_newneigh(int ifindex, struct vxlan_rtnode *vrt; int err; - ifp = ifnet_byifindex(ifindex); + ifp = dp_ifnet_byifindex(ifindex); if (!ifp) return; /* not a DPDK interface */ @@ -1794,12 +1741,12 @@ static void vxlan_newneigh(int ifindex, } } -static void vxlan_delneigh(int ifindex, const struct ether_addr *dst) +static void vxlan_delneigh(int ifindex, const struct rte_ether_addr *dst) { struct ifnet *ifp; struct vxlan_softc *sc; - ifp = ifnet_byifindex(ifindex); + ifp = dp_ifnet_byifindex(ifindex); if (!ifp) return; /* not a DPDK interface */ @@ -1812,7 +1759,7 @@ static void vxlan_delneigh(int ifindex, const struct ether_addr *dst) vxlan_rtnode_destroy(vrt); } else { - rcu_read_unlock(); + dp_rcu_read_unlock(); RTE_LOG(NOTICE, VXLAN, "delneigh for %s but on %s not a in forwarding table\n", ether_ntoa(dst), ifp->if_name); @@ -1823,7 +1770,7 @@ int vxlan_neigh_change(const struct nlmsghdr *nlh, const struct ndmsg *ndm, struct nlattr *tb[]) { - const struct ether_addr *lladdr; + const struct rte_ether_addr *lladdr; struct in_addr ipaddr; in_addr_t *ip; @@ -1859,7 +1806,14 @@ int vxlan_neigh_change(const struct nlmsghdr *nlh, return MNL_CB_ERROR; } - struct ifnet *ifp = ifnet_byifindex(ndm->ndm_ifindex); + struct ifnet *ifp = dp_ifnet_byifindex(ndm->ndm_ifindex); + + if (ifp == NULL) { + RTE_LOG(NOTICE, VXLAN, + "No interface for %d\n", + ndm->ndm_ifindex); + return MNL_CB_ERROR; + } if (is_local_ipv4(if_vrfid(ifp), ipaddr.s_addr)) { RTE_LOG(NOTICE, VXLAN, @@ -1899,7 +1853,7 @@ vxlan_vniable_init(void) } -void +static void vxlan_init(void) { vxlans = malloc(sizeof(struct vxlan_vnitbl)); @@ -1925,7 +1879,7 @@ vxlan_init(void) rte_panic("cannot initialise vxlan-gpe ipv6 handler\n"); } -void vxlan_destroy(void) +static void vxlan_destroy(void) { udp_handler_unregister(AF_INET, htons(VXLAN_PORT)); udp_handler_unregister(AF_INET, htons(VXLAN_GPE_PORT)); @@ -1933,75 +1887,12 @@ void vxlan_destroy(void) udp_handler_unregister(AF_INET6, htons(VXLAN_GPE_PORT)); } -/* - * vxlan_set_flags - * - * set/clear flags that define the behavior of a vxlan - */ -void vxlan_set_flags(struct ifnet *ifp, uint32_t flags, bool set) -{ - struct vxlan_vninode *vnode; - - vnode = vxlan_vni_lookup(vxlan_get_vni(ifp)); - if (vnode) { - if (set) - vnode->flags |= flags; - else - vnode->flags &= (~flags); - } -} - -uint32_t vxlan_get_flags(struct ifnet *ifp) -{ - if (ifp->if_type != IFT_VXLAN) - return 0; - - struct vxlan_vninode *vnode = vxlan_vni_lookup(vxlan_get_vni(ifp)); - - return vnode ? vnode->flags : 0; -} - -/* bind vxlan to specified device. Used to allow delayed binding */ -void vxlan_set_device(struct ifnet *vxl_ifp, struct ifnet *ifp) -{ - uint16_t ifmtu = VXLAN_MTU; - - if (ifp) - ifmtu = ifp->if_mtu - VXLAN_OVERHEAD; - - vxl_ifp->if_parent = ifp; - vxl_ifp->if_mtu = ifmtu; -} - /* Update mtu of all VXLAN interfaces bound to specified device */ void vxlan_mtu_update(struct ifnet *ifp) { vxlan_tbl_walk(vxlan_walker_update_mtu, ifp); } -void vxlan_set_t_vrfid(struct ifnet *ifp, vrfid_t t_vrfid) -{ - struct vxlan_vninode *vnode; - - if (ifp == NULL) - return; - vnode = vxlan_vni_lookup(vxlan_get_vni(ifp)); - if (vnode == NULL) - return; - if (vnode->t_vrfid == t_vrfid) - return; - if (t_vrfid == VRF_INVALID_ID) - return; - vrf_delete(vnode->t_vrfid); - - if (vrf_find_or_create(t_vrfid) == NULL) { - vnode->t_vrfid = VRF_INVALID_ID; - RTE_LOG(ERR, VXLAN, "vxlan %s(%u) missing vrf %u\n", - ifp->if_name, ifp->if_index, t_vrfid); - } else - vnode->t_vrfid = t_vrfid; -} - /* * vxlan_show_stats * @@ -2043,7 +1934,7 @@ static void vxlan_show_macs_one(struct vxlan_vninode *vni, char addr_str[INET_ADDRSTRLEN]; uint8_t type; - rcu_read_lock(); + dp_rcu_read_lock(); jsonw_start_object(wr); jsonw_string_field(wr, "intf", ifp->if_name); jsonw_name(wr, "entries"); @@ -2071,7 +1962,7 @@ static void vxlan_show_macs_one(struct vxlan_vninode *vni, } jsonw_end_array(wr); jsonw_end_object(wr); - rcu_read_unlock(); + dp_rcu_read_unlock(); } static void vxlan_show_macs(FILE *f, int argc __unused, char *argv[] __unused) @@ -2113,7 +2004,7 @@ static void vxlan_clear_all_macs(void) } -static int vxlan_clear_one_mac(struct ifnet *ifp, struct ether_addr *mac) +static int vxlan_clear_one_mac(struct ifnet *ifp, struct rte_ether_addr *mac) { struct vxlan_rtnode *vxlrt; struct vxlan_softc *sc = ifp->if_softc; @@ -2130,19 +2021,19 @@ static int vxlan_clear_one_mac(struct ifnet *ifp, struct ether_addr *mac) static void vxlan_clear_macs(FILE *f, int argc, char *argv[]) { struct ifnet *ifp; - struct ether_addr mac; + struct rte_ether_addr mac; if (argc == 1) { vxlan_clear_all_macs(); } else if (argc == 2) { - ifp = ifnet_byifname(argv[1]); + ifp = dp_ifnet_byifname(argv[1]); if (!ifp) { fprintf(f, "Could not find interface %s\n", argv[1]); return; } vxlan_clear_intf_macs(ifp); } else if (argc == 3) { - ifp = ifnet_byifname(argv[1]); + ifp = dp_ifnet_byifname(argv[1]); if (!ifp) { fprintf(f, "Could not find interface %s\n", argv[1]); return; @@ -2229,22 +2120,34 @@ vxlan_if_dump(struct ifnet *ifp, json_writer_t *wr, return 0; } +static enum dp_ifnet_iana_type +vxlan_iana_type(struct ifnet *ifp __unused) +{ + return DP_IFTYPE_IANA_TUNNEL; +} + static const struct ift_ops vxlan_if_ops = { .ifop_set_mtu = vxlan_set_mtu, .ifop_init = vxlan_if_init, .ifop_uninit = vxlan_if_uninit, .ifop_dump = vxlan_if_dump, + .ifop_iana_type = vxlan_iana_type, }; static void vxlan_type_init(void) { - int ret = if_register_type(IFT_VXLAN, &vxlan_if_ops); + int ret; + + vxlan_init(); + + ret = if_register_type(IFT_VXLAN, &vxlan_if_ops); if (ret < 0) rte_panic("Failed to register VXLAN type: %s", strerror(-ret)); } static const struct dp_event_ops vxlan_events = { .init = vxlan_type_init, + .uninit = vxlan_destroy, }; DP_STARTUP_EVENT_REGISTER(vxlan_events); diff --git a/src/vxlan.h b/src/if/vxlan.h similarity index 77% rename from src/vxlan.h rename to src/if/vxlan.h index e9bd9cb1..387cdf7e 100644 --- a/src/vxlan.h +++ b/src/if/vxlan.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -27,6 +27,7 @@ #include #include #include +#include #include "control.h" #include "ip_addr.h" @@ -80,7 +81,7 @@ struct vxlan_rtnode { rte_atomic32_t vxlrt_unused; /* 0 = used */ uint8_t vxlrt_flags; /* address flags */ uint16_t vxlrt_expire; - struct ether_addr vxlrt_addr; + struct rte_ether_addr vxlrt_addr; struct rcu_head vxlrt_rcu; /* for deletion via rcu */ uint32_t vni; /* destination vni */ }; @@ -113,34 +114,30 @@ struct vxlan_vnitbl { }; struct vxlan_ipv4_encap { - struct ether_hdr ether_header; + struct rte_ether_hdr ether_header; struct iphdr ip_header __attribute__ ((__packed__)); - struct udp_hdr udp_header; - struct vxlan_hdr vxlan_header; -} __attribute__ ((__packed__)); + struct rte_udp_hdr udp_header; + struct rte_vxlan_hdr vxlan_header; +} __attribute__ ((__packed__)) __attribute__((aligned(2))); struct vxlan_ipv6_encap { - struct ether_hdr ether_header; + struct rte_ether_hdr ether_header; struct ip6_hdr ip6_header __attribute__ ((__packed__)); - struct udp_hdr udp_header; - struct vxlan_hdr vxlan_header; -} __attribute__ ((__packed__)); + struct rte_udp_hdr udp_header; + struct rte_vxlan_hdr vxlan_header; +} __attribute__ ((__packed__)) __attribute__((aligned(2))); #define VXLAN_OVERHEAD (sizeof(struct vxlan_ipv6_encap)) #define VXLAN_MTU (1500 - VXLAN_OVERHEAD) /* VXLAN Functions */ -void vxlan_init(void); -void vxlan_destroy(void); void vxlan_output(struct ifnet *ifp, struct rte_mbuf *m, uint16_t proto); struct ifnet *vxlan_create(const struct ifinfomsg *ifi, const char *ifname, - const struct ether_addr *eth_addr, + const struct rte_ether_addr *eth_addr, struct nlattr *tb[], struct nlattr *data, - enum cont_src_en cont_src, - const struct nlmsghdr *nlh); + enum cont_src_en cont_src); void vxlan_modify(struct ifnet *ifp, uint flags, struct nlattr *tb[], struct nlattr *data); -struct ifnet *vxlan_find_if(uint32_t vni); int vxlan_neigh_change(const struct nlmsghdr *nlh, const struct ndmsg *ndm, struct nlattr *tb[]); @@ -151,19 +148,6 @@ void vxlan_tbl_walk(vxlan_walker_t walk_func, void *ctx); /* update MTU of all VXLANs bound to specified device */ void vxlan_mtu_update(struct ifnet *ifp); -/* - * vxlan flags - */ -void vxlan_set_flags(struct ifnet *ifp, uint32_t flags, bool set); -uint32_t vxlan_get_flags(struct ifnet *ifp); - -/* associate ethernet device with vxlan */ -void vxlan_set_device(struct ifnet *vxl_ifp, struct ifnet *ifp); -/* set vxlan transport vrf */ -void vxlan_set_t_vrfid(struct ifnet *ifp, vrfid_t t_vrfid); -/* Send already l3 encapped packet for vxlan */ -void vxlan_send_encapped(struct rte_mbuf *m, struct ifnet *ifp, uint8_t af); - uint32_t vxlan_get_vni(struct ifnet *ifp); int cmd_vxlan(FILE *f, int argc, char **argv); diff --git a/src/if_ether.c b/src/if_ether.c index 7018905a..c6835ee8 100644 --- a/src/if_ether.c +++ b/src/if_ether.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * @@ -50,6 +50,7 @@ #include "arp.h" #include "compat.h" #include "control.h" +#include "dp_event.h" #include "ether.h" #include "fal.h" #include "fal_plugin.h" @@ -58,7 +59,7 @@ #include "if_var.h" #include "main.h" #include "netinet6/nd6_nbr.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "urcu.h" #include "util.h" #include "vplane_debug.h" @@ -97,7 +98,7 @@ static const char *lladdr_ntop(struct llentry *la) * There is no atomic 6 byte copy, but 64 bit operations are atomic * on 64 bit CPU's. */ -void ll_addr_set(struct llentry *lle, const struct ether_addr *eth) +void ll_addr_set(struct llentry *lle, const struct rte_ether_addr *eth) { union llentry_addr tmp; @@ -109,9 +110,13 @@ void ll_addr_set(struct llentry *lle, const struct ether_addr *eth) /* Update existing link-layer addr table entry. */ void lladdr_update(struct ifnet *ifp, struct llentry *la, - const struct ether_addr *enaddr, uint16_t flags) + const struct rte_ether_addr *enaddr, uint16_t flags) { + struct rte_mbuf *la_held[ARP_MAXHOLD]; + struct rte_ether_addr old_enaddr; char b1[20], b2[20]; + int la_numheld = 0; + bool was_valid; if (!enaddr) { RTE_LOG(ERR, LLADDR, "update with no mac addr\n"); @@ -119,77 +124,82 @@ void lladdr_update(struct ifnet *ifp, struct llentry *la, } rte_spinlock_lock(&la->ll_lock); - if (la->la_flags & LLE_VALID) { - if (ether_addr_equal(enaddr, &la->ll_addr)) { - rte_spinlock_unlock(&la->ll_lock); - return; - } - - /* static update can update an existing static entry */ + was_valid = la->la_flags & LLE_VALID; + if (!was_valid) + flags |= LLE_VALID; - if (la->la_flags & LLE_STATIC && !(flags & LLE_STATIC)) { - rte_spinlock_unlock(&la->ll_lock); - RTE_LOG(NOTICE, LLADDR, - "%s attempt to modify static entry %s on %s\n", - ether_ntoa_r(enaddr, b1), - lladdr_ntop(la), - ifp->if_name); + /* static update can update an existing static entry */ - return; - } + if (la->la_flags & LLE_STATIC && !(flags & LLE_STATIC)) { + rte_spinlock_unlock(&la->ll_lock); + RTE_LOG(NOTICE, LLADDR, + "%s attempt to modify static entry %s on %s\n", + ether_ntoa_r(enaddr, b1), + lladdr_ntop(la), + ifp->if_name); + return; + } + old_enaddr = la->ll_addr; + if (!was_valid || !rte_ether_addr_equal(enaddr, &la->ll_addr)) { ll_addr_set(la, enaddr); - la->la_flags |= flags; /* - * We have had an address change so it needs to be signalled - * to the hardware, mark it as incomplete in the hardware so - * that the master thread can pick this up and send an update + * Ensure the write to the address is seen by readers + * before the write to the flags below. */ - la->la_flags |= LLE_HW_UPD_PENDING; - rte_timer_reset(&ifp->if_lltable->lle_timer, 0, - SINGLE, rte_get_master_lcore(), - lladdr_timer, ifp->if_lltable); - - rte_spinlock_unlock(&la->ll_lock); - - LLADDR_DEBUG("%s moved from %s to %s on %s\n", - lladdr_ntop(la), - ether_ntoa_r(&la->ll_addr, b1), - ether_ntoa_r(enaddr, b2), - ifp->if_name); - - } else { - ll_addr_set(la, enaddr); rte_wmb(); - la->la_flags |= (LLE_VALID | flags); /* - * Fire the timer for this table immediately on master. It - * doesn't matter if it fails as it will get picked up on - * the next firing in that case. + * We have had an address change so it needs + * to be signalled to the hardware, mark it as + * incomplete in the hardware so that the + * main thread can pick this up and send an + * update */ + if (if_is_features_mode_active( + ifp, IF_FEAT_MODE_EVENT_L3_FAL_ENABLED)) + la->la_flags |= LLE_HW_UPD_PENDING; + + /* fire timer to update hardware and/or install routes */ rte_timer_reset(&ifp->if_lltable->lle_timer, 0, SINGLE, rte_get_master_lcore(), lladdr_timer, ifp->if_lltable); + } + la->la_flags |= flags; - int la_numheld = la->la_numheld; - struct rte_mbuf *la_held[ARP_MAXHOLD]; - + if (!was_valid) { + la_numheld = la->la_numheld; for (int i = 0; i < la_numheld; ++i) { la_held[i] = la->la_held[i]; la->la_held[i] = NULL; } la->la_numheld = 0; - rte_spinlock_unlock(&la->ll_lock); + } + + rte_spinlock_unlock(&la->ll_lock); + + if (was_valid) { + if (!rte_ether_addr_equal(enaddr, &old_enaddr)) + LLADDR_DEBUG("%s moved from %s to %s on %s\n", + lladdr_ntop(la), + ether_ntoa_r(&old_enaddr, b1), + ether_ntoa_r(enaddr, b2), + ifp->if_name); + } else { + LLADDR_DEBUG("entry for %s resolved to %s\n", + lladdr_ntop(la), + ether_ntoa_r(enaddr, b1)); + } + if (!was_valid) { /* now valid: release any pending packets */ for (int i = 0; i < la_numheld; i++) { struct rte_mbuf *m = la_held[i]; - struct ether_hdr *eh; + struct rte_ether_hdr *eh; /* fill in destination in held packet and send it */ - eh = rte_pktmbuf_mtod(m, struct ether_hdr *); + eh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); - ether_addr_copy(enaddr, &eh->d_addr); + rte_ether_addr_copy(enaddr, &eh->d_addr); /* * Note: even though this may be a forwarded * packet, NULL is passed in for the input @@ -200,36 +210,33 @@ void lladdr_update(struct ifnet *ifp, struct llentry *la, */ if_output(ifp, m, NULL, htons(eh->ether_type)); } - - LLADDR_DEBUG("entry for %s resolved to %s\n", - lladdr_ntop(la), - ether_ntoa_r(enaddr, b1)); } /* entry updated */ rte_atomic16_clear(&la->ll_idle); - la->ll_expire = rte_get_timer_cycles() + rte_get_timer_hz() * ARPT_KEEP; + + /* Expiry time is updated in ll_age() for entries not just added */ + if (la->ll_expire) + return; + + la->ll_expire = rte_get_timer_cycles() + + rte_get_timer_hz() * ARP_CFG(arp_aging_time); /* Extend the timeout for locally created proxy entries */ - if (la->la_flags & (LLE_LOCAL | LLE_PROXY)) - la->ll_expire += rte_get_timer_hz() * ARPT_KEEP; + if ((la->la_flags & LLE_LOCAL) && (la->la_flags & LLE_PROXY)) + la->ll_expire += rte_get_timer_hz() * ARP_CFG(arp_aging_time); } static int lladdr_add(struct ifnet *ifp, struct sockaddr *sock, - const struct ether_addr *mac, + const struct rte_ether_addr *mac, uint16_t state, uint8_t ntf_flags) { struct llentry *lle; uint16_t flags = LLE_CREATE; - if (state & NUD_PERMANENT) - flags |= LLE_STATIC; - switch (sock->sa_family) { case AF_INET: - lle = in_lltable_lookup(ifp, flags, - satosin(sock)->sin_addr.s_addr); break; case AF_INET6: @@ -240,27 +247,41 @@ lladdr_add(struct ifnet *ifp, struct sockaddr *sock, return -1; } - if (lle == NULL) { - RTE_LOG(NOTICE, LLADDR, "lladdr_add create failed\n"); - return -1; - } + if (!(state & (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_FAILED))) + return 0; + + /* Do not create an entry as a result of a fail notification */ + if (state & NUD_FAILED) + flags = 0; + + if (state & NUD_PERMANENT) + flags |= LLE_STATIC; + + lle = in_lltable_lookup(ifp, flags, satosin(sock)->sin_addr.s_addr); + + if (state & NUD_FAILED) { + + /* Ignore fail notification unless entry exists */ + if (!lle) + return 0; - if (state & (NUD_STALE|NUD_REACHABLE|NUD_NOARP|NUD_PERMANENT)) { + rte_spinlock_lock(&lle->ll_lock); + arp_entry_destroy(ifp->if_lltable, lle); + rte_spinlock_unlock(&lle->ll_lock); + } else { uint16_t new_flags = 0; + if (!lle) { + RTE_LOG(NOTICE, LLADDR, "lladdr add: create failed\n"); + return -1; + } + if (state & NUD_PERMANENT) new_flags |= LLE_STATIC; if (ntf_flags & NTF_PROXY) new_flags |= LLE_PROXY; lladdr_update(ifp, lle, mac, new_flags); - } else { - rte_spinlock_lock(&lle->ll_lock); - lle->la_flags &= ~(LLE_VALID | LLE_STATIC); - - pktmbuf_free_bulk(lle->la_held, lle->la_numheld); - lle->la_numheld = 0; - rte_spinlock_unlock(&lle->ll_lock); } return 0; @@ -290,7 +311,7 @@ lladdr_delete(struct ifnet *ifp, struct sockaddr *addr) */ void lladdr_nl_event(int family, struct ifnet *ifp, uint16_t type, const struct ndmsg *ndm, - const void *dst, const struct ether_addr *lladdr) + const void *dst, const struct rte_ether_addr *lladdr) { struct sockaddr_storage saddr; @@ -330,12 +351,8 @@ void lladdr_nl_event(int family, struct ifnet *ifp, uint16_t type, switch (type) { case RTM_NEWNEIGH: - if (lladdr) - lladdr_add(ifp, (struct sockaddr *) &saddr, lladdr, - ndm->ndm_state, ndm->ndm_flags); - else - RTE_LOG(NOTICE, LLADDR, - "NEWNEIGH without link layer address?\n"); + lladdr_add(ifp, (struct sockaddr *) &saddr, lladdr, + ndm->ndm_state, ndm->ndm_flags); break; case RTM_DELNEIGH: @@ -350,7 +367,7 @@ void lladdr_nl_event(int family, struct ifnet *ifp, uint16_t type, } /* Send a new ll request probe for entry that has not responded. - * Since this runs in master lcore, and that can't send directly, + * Since this runs in main lcore, and that can't send directly, * it intrudes into shadow output ring to send the packet. */ static void ll_probe(struct lltable *llt, struct llentry *la) @@ -381,13 +398,10 @@ static void ll_probe(struct lltable *llt, struct llentry *la) if (m) if_output(ifp, m, NULL, htons(ethhdr(m)->ether_type)); } else { - unsigned int pkts_dropped; - - pkts_dropped = llentry_destroy(llt, la); + arp_entry_destroy(llt, la); rte_spinlock_unlock(&la->ll_lock); ARPSTAT_INC(if_vrfid(ifp), timeouts); - ARPSTAT_ADD(if_vrfid(ifp), dropped, pkts_dropped); LLADDR_DEBUG("retries exhausted for %s\n", lladdr_ntop(la)); } @@ -396,17 +410,19 @@ static void ll_probe(struct lltable *llt, struct llentry *la) static void ll_age(struct lltable *llt, struct llentry *lle, uint64_t cur_time) { if (llentry_has_been_used_and_clear(lle)) { - lle->ll_expire = cur_time + rte_get_timer_hz() * ARPT_KEEP; + lle->ll_expire = + cur_time + rte_get_timer_hz() * ARP_CFG(arp_aging_time); /* Extend the timeout for locally created proxy entries */ - if (lle->la_flags & (LLE_LOCAL | LLE_PROXY)) - lle->ll_expire += rte_get_timer_hz() * ARPT_KEEP; + if ((lle->la_flags & LLE_LOCAL) && (lle->la_flags & LLE_PROXY)) + lle->ll_expire += + rte_get_timer_hz() * ARP_CFG(arp_aging_time); } else if ((int64_t)(cur_time - lle->ll_expire) >= 0) { LLADDR_DEBUG("expire entry for %s, flags %#x\n", lladdr_ntop(lle), lle->la_flags); rte_spinlock_lock(&lle->ll_lock); - llentry_destroy(llt, lle); + arp_entry_destroy(llt, lle); rte_spinlock_unlock(&lle->ll_lock); } } @@ -433,67 +449,34 @@ llentry_routing_install(struct llentry *lle) /* walk the ll addr table and look for entries that have been used */ void lladdr_timer(struct rte_timer *tim __rte_unused, void *arg) { - int ret = 0; - bool new = false; - bool upd = false; - uint32_t attr_count = 0; struct lltable *llt = arg; struct llentry *lle; - char b[INET_ADDRSTRLEN]; - struct sockaddr_in *sin; struct cds_lfht_iter iter; - struct fal_attribute_t attr_list[2]; + bool refresh_timer_expired = false; uint64_t cur_time = rte_get_timer_cycles(); - rcu_read_lock(); + if (llt->lle_refresh_expire < cur_time) { + refresh_timer_expired = true; + + /* one second later */ + llt->lle_refresh_expire = cur_time + rte_get_timer_hz(); + } + + dp_rcu_read_lock(); cds_lfht_for_each_entry(llt->llt_hash, &iter, lle, ll_node) { /* * If the delete flag is set (which can be done on any - * core) do the actual delete here on master + * core) do the actual delete here on main */ - sin = (struct sockaddr_in *) ll_sockaddr(lle); if (lle->la_flags & LLE_DELETED) { rte_spinlock_lock(&lle->ll_lock); __llentry_destroy(llt, lle); rte_spinlock_unlock(&lle->ll_lock); continue; } - rte_spinlock_lock(&lle->ll_lock); - if (lle->la_flags & LLE_HW_UPD_PENDING) { - if (lle->la_flags & LLE_VALID) { - lle->la_flags &= ~LLE_HW_UPD_PENDING; - upd = true; - attr_list[0].id = - FAL_NEIGH_ENTRY_ATTR_DST_MAC_ADDRESS; - attr_list[0].value.mac = lle->ll_addr; - attr_count++; - } - if (!(lle->la_flags & LLE_CREATED_IN_HW)) { - new = true; - lle->la_flags |= LLE_CREATED_IN_HW; - } - } - rte_spinlock_unlock(&lle->ll_lock); - if (new) { - ret = fal_ip4_new_neigh(lle->ifp->if_index, - sin, attr_count, attr_list); - if (ret < 0 && ret != -EOPNOTSUPP) { - RTE_LOG(NOTICE, DATAPLANE, - "FAL new neighbour %s, %s failed: %s\n", - inet_ntop(AF_INET, &sin, b, sizeof(b)), - lle->ifp->if_name, strerror(-ret)); - } - } else if (upd) { - ret = fal_ip4_upd_neigh(lle->ifp->if_index, sin, - attr_list); - if (ret < 0) { - RTE_LOG(NOTICE, DATAPLANE, - "FAL neighbour mac update for %s, %s failed: %s\n", - inet_ntop(AF_INET, &sin, b, sizeof(b)), - lle->ifp->if_name, strerror(-ret)); - } - } + llentry_issue_pending_fal_updates(lle); + if ((lle->la_flags & (LLE_VALID | LLE_FWDING)) == LLE_VALID) llentry_routing_install(lle); @@ -502,20 +485,24 @@ void lladdr_timer(struct rte_timer *tim __rte_unused, void *arg) /* retry incomplete entry */ if ((lle->la_flags & (LLE_LOCAL | LLE_VALID)) == LLE_LOCAL) { - if (lltable_probe_timer_is_enabled()) + if (lltable_probe_timer_is_enabled() && + refresh_timer_expired) ll_probe(llt, lle); - } else if (lle->ll_expire == 0) { + } else if (lle->ll_expire == 0 || !refresh_timer_expired) { continue; } else if (lle->la_flags & LLE_VALID || lle->la_flags == 0) { ll_age(llt, lle, cur_time); } } - rte_timer_reset(&llt->lle_timer, rte_get_timer_hz(), + cur_time = rte_get_timer_cycles(); + rte_timer_reset(&llt->lle_timer, + llt->lle_refresh_expire < cur_time ? 0 : + llt->lle_refresh_expire - cur_time, SINGLE, rte_get_master_lcore(), lladdr_timer, llt); - rcu_read_unlock(); + dp_rcu_read_unlock(); } static void lladdr_flush(struct ifnet *ifp, void *cont_src_p) @@ -530,5 +517,27 @@ static void lladdr_flush(struct ifnet *ifp, void *cont_src_p) void lladdr_flush_all(enum cont_src_en cont_src) { - ifnet_walk(lladdr_flush, &cont_src); + dp_ifnet_walk(lladdr_flush, &cont_src); } + +static void +lladdr_if_feat_mode_change(struct ifnet *ifp, + enum if_feat_mode_event event) +{ + if (event != IF_FEAT_MODE_EVENT_L3_FAL_ENABLED && + event != IF_FEAT_MODE_EVENT_L3_FAL_DISABLED) + return; + + if (lltable_fal_l3_change( + ifp->if_lltable, + event == IF_FEAT_MODE_EVENT_L3_FAL_ENABLED)) + rte_timer_reset(&ifp->if_lltable->lle_timer, 0, + SINGLE, rte_get_master_lcore(), + lladdr_timer, ifp->if_lltable); +} + +static const struct dp_event_ops lladdr_events = { + .if_feat_mode_change = lladdr_if_feat_mode_change, +}; + +DP_STARTUP_EVENT_REGISTER(lladdr_events); diff --git a/src/if_ether.h b/src/if_ether.h index 3d1c4aaa..9a179ebd 100644 --- a/src/if_ether.h +++ b/src/if_ether.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -11,7 +11,7 @@ #include -struct ether_addr; +struct rte_ether_addr; struct ifnet; struct llentry; struct ndmsg; @@ -20,15 +20,15 @@ struct rte_timer; enum cont_src_en; void lladdr_update(struct ifnet *ifp, struct llentry *la, - const struct ether_addr *enaddr, uint16_t flags); + const struct rte_ether_addr *enaddr, uint16_t flags); void lladdr_timer(struct rte_timer *, void *arg); void in6_lladdr_timer(struct rte_timer *tim, void *arg); void lladdr_nl_event(int family, struct ifnet *ifp, uint16_t type, const struct ndmsg *ndm, const void *dst, - const struct ether_addr *lladdr); + const struct rte_ether_addr *lladdr); void lladdr_flush_all(enum cont_src_en cont_src); -void ll_addr_set(struct llentry *lle, const struct ether_addr *eth); +void ll_addr_set(struct llentry *lle, const struct rte_ether_addr *eth); /* Call this to link to routes when an lle transitions to VALID */ void llentry_routing_install(struct llentry *lle); diff --git a/src/if_feat.c b/src/if_feat.c index 350ea068..cfa0ae03 100644 --- a/src/if_feat.c +++ b/src/if_feat.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -15,9 +15,8 @@ #include #include "util.h" -#include "vrf.h" +#include "vrf_internal.h" #include "if_var.h" -#include "dpi_public.h" #include "pl_node.h" #include "pipeline/nodes/pl_nodes_common.h" #include "vplane_log.h" @@ -143,7 +142,7 @@ if_feat_all_refcnt_incr_cb(struct ifnet *ifp, void *arg) void if_feat_all_refcnt_incr(enum if_feat_flag ffl) { - ifnet_walk(if_feat_all_refcnt_incr_cb, &ffl); + dp_ifnet_walk(if_feat_all_refcnt_incr_cb, &ffl); } /* @@ -160,7 +159,7 @@ if_feat_all_refcnt_decr_cb(struct ifnet *ifp, void *arg) void if_feat_all_refcnt_decr(enum if_feat_flag ffl) { - ifnet_walk(if_feat_all_refcnt_decr_cb, &ffl); + dp_ifnet_walk(if_feat_all_refcnt_decr_cb, &ffl); } /* diff --git a/src/if_feat.h b/src/if_feat.h index e777477a..c3b8e136 100644 --- a/src/if_feat.h +++ b/src/if_feat.h @@ -23,14 +23,17 @@ enum if_feat_enum { IF_FEAT_ACL_IN, IF_FEAT_ACL_OUT, IF_FEAT_DEFRAG, + IF_FEAT_DEFRAG_SPATH, IF_FEAT_FW, + IF_FEAT_FW_ORIG, IF_FEAT_PBR, IF_FEAT_NPTV6, IF_FEAT_DPI, IF_FEAT_CGNAT, + IF_FEAT_NAT64, }; #define IF_FEAT_FIRST IF_FEAT_ACL_IN -#define IF_FEAT_LAST IF_FEAT_CGNAT +#define IF_FEAT_LAST IF_FEAT_NAT64 #define IF_FEAT_COUNT (IF_FEAT_LAST+1) #define IF_FEAT2FLAG(_f) (1 << (_f)) @@ -39,11 +42,14 @@ enum if_feat_flag { IF_FEAT_FLAG_ACL_IN = IF_FEAT2FLAG(IF_FEAT_ACL_IN), IF_FEAT_FLAG_ACL_OUT = IF_FEAT2FLAG(IF_FEAT_ACL_OUT), IF_FEAT_FLAG_DEFRAG = IF_FEAT2FLAG(IF_FEAT_DEFRAG), + IF_FEAT_FLAG_DEFRAG_SPATH = IF_FEAT2FLAG(IF_FEAT_DEFRAG_SPATH), IF_FEAT_FLAG_FW = IF_FEAT2FLAG(IF_FEAT_FW), + IF_FEAT_FLAG_FW_ORIG = IF_FEAT2FLAG(IF_FEAT_FW_ORIG), IF_FEAT_FLAG_PBR = IF_FEAT2FLAG(IF_FEAT_PBR), IF_FEAT_FLAG_NPTV6 = IF_FEAT2FLAG(IF_FEAT_NPTV6), IF_FEAT_FLAG_DPI = IF_FEAT2FLAG(IF_FEAT_DPI), IF_FEAT_FLAG_CGNAT = IF_FEAT2FLAG(IF_FEAT_CGNAT), + IF_FEAT_FLAG_NAT64 = IF_FEAT2FLAG(IF_FEAT_NAT64), }; #define IF_FEAT_IS_SET(_ft, _flags) ((IF_FEAT2FLAG(_ft) & (_flags)) != 0) diff --git a/src/if_llatbl.c b/src/if_llatbl.c index 9d9cf1f8..96e96d89 100644 --- a/src/if_llatbl.c +++ b/src/if_llatbl.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2004 Luigi Rizzo, Alessandro Cerri. All rights reserved. * Copyright (c) 2004-2008 Qing Li. All rights reserved. * Copyright (c) 2008 Kip Macy. All rights reserved. @@ -44,13 +44,15 @@ #include #include +#include "dp_event.h" #include "fal.h" #include "if_ether.h" #include "if_llatbl.h" #include "if_var.h" +#include "lcore_sched.h" #include "main.h" #include "nd6_nbr.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "urcu.h" #include "util.h" #include "vplane_log.h" @@ -109,48 +111,62 @@ llentry_routing_uninstall(struct llentry *lle) } } +static int +llentry_fal_destroy(struct lltable *llt, struct llentry *lle) +{ + struct ifnet *ifp = llt->llt_ifp; + char b[INET6_ADDRSTRLEN]; + int ret = 0; + + llentry_routing_uninstall(lle); + + if (lle->la_flags & LLE_CREATED_IN_HW) { + if (lle->ll_sock.ss_family == AF_INET) { + ret = fal_ip4_del_neigh(ifp->if_index, + ifp->fal_l3, + satosin(ll_sockaddr(lle))); + if (ret < 0) { + RTE_LOG(NOTICE, DATAPLANE, + "FAL neighbour del for %s, %s failed: %s\n", + inet_ntop(lle->ll_sock.ss_family, + &satosin(ll_sockaddr(lle))->sin_addr, + b, sizeof(b)), + ifp->if_name, strerror(-ret)); + } + } else if (lle->ll_sock.ss_family == AF_INET6) { + ret = fal_ip6_del_neigh(ifp->if_index, + ifp->fal_l3, + satosin6(ll_sockaddr(lle))); + if (ret < 0) { + RTE_LOG(NOTICE, DATAPLANE, + "FAL neighbour del for %s, %s failed: %s\n", + inet_ntop(lle->ll_sock.ss_family, + &satosin6( + ll_sockaddr(lle))->sin6_addr, + b, sizeof(b)), + ifp->if_name, strerror(-ret)); + } + } + } + + return ret; +} + /* Drops entry, and frees the pending packets. * Final free done after RCU grace period. */ void __llentry_destroy(struct lltable *llt, struct llentry *lle) { - struct ifnet *ifp = llt->llt_ifp; - char b[INET6_ADDRSTRLEN]; - int ret; + llentry_routing_uninstall(lle); - if (lle->ll_sock.ss_family == AF_INET) { - llentry_routing_uninstall(lle); - ret = fal_ip4_del_neigh(ifp->if_index, - satosin(ll_sockaddr(lle))); - if (ret < 0) { - RTE_LOG(NOTICE, DATAPLANE, - "FAL neighbour del for %s, %s failed: %s\n", - inet_ntop(lle->ll_sock.ss_family, - &satosin(ll_sockaddr(lle))->sin_addr, - b, sizeof(b)), - ifp->if_name, strerror(-ret)); - } - } else if (lle->ll_sock.ss_family == AF_INET6) { - llentry_routing_uninstall(lle); - ret = fal_ip6_del_neigh(ifp->if_index, - satosin6(ll_sockaddr(lle))); - if (ret < 0) { - RTE_LOG(NOTICE, DATAPLANE, - "FAL neighbour del for %s, %s failed: %s\n", - inet_ntop(lle->ll_sock.ss_family, - &satosin6( - ll_sockaddr(lle))->sin6_addr, - b, sizeof(b)), - ifp->if_name, strerror(-ret)); - } - } + llentry_fal_destroy(llt, lle); cds_lfht_del(llt->llt_hash, &lle->ll_node); call_rcu(&lle->ll_rcu, llentry_free_rcu); } -/* Marks entry as DELETED, so that the master thread can then pick it +/* Marks entry as DELETED, so that the main thread can then pick it * up from the timer and complete the deletion. * Must be protected by spinlock. */ @@ -164,14 +180,16 @@ llentry_destroy(struct lltable *llt, struct llentry *lle) pktmbuf_free_bulk(lle->la_held, dropped); lle->la_numheld = 0; - if (is_master_thread()) + if (is_main_thread()) __llentry_destroy(llt, lle); else - /* Fire the timer for this table immediately on master */ + /* Fire the timer for this table immediately on main */ rte_timer_reset(&llt->lle_timer, 0, SINGLE, rte_get_master_lcore(), lladdr_timer, llt); + rte_atomic32_dec(&llt->lle_size); + return dropped; } @@ -247,13 +265,63 @@ lltable_new(struct ifnet *ifp) rte_timer_init(&llt->lle_timer); llt->lle_unrtoken = 0; rte_atomic16_set(&llt->lle_restoken, ND6_RES_TOKEN); - rte_atomic16_clear(&llt->lle_size); + rte_atomic32_clear(&llt->lle_size); return llt; } +static unsigned +lltable_fal_l3_enable_cb(struct lltable *llt __unused, struct llentry *lle, + void *arg) +{ + bool *any_entries = arg; + + lle->la_flags |= LLE_HW_UPD_PENDING; + *any_entries = true; + + return 0; +} + +static unsigned +lltable_fal_l3_disable_cb(struct lltable *llt, struct llentry *lle, + void *arg __unused) +{ + int ret; + + lle->la_flags &= ~LLE_HW_UPD_PENDING; + /* + * Do it straight away rather than deferring to timer callback + * because we are on the main thread and the FAL router + * interface object that these entries depend on is about to + * be deleted. + */ + ret = llentry_fal_destroy(llt, lle); + if (!ret) + lle->la_flags &= ~LLE_CREATED_IN_HW; + + return 0; +} + +bool +lltable_fal_l3_change(struct lltable *llt, bool enable) +{ + bool any_entries = false; + + if (enable) + lltable_walk(llt, lltable_fal_l3_enable_cb, &any_entries); + else + lltable_walk(llt, lltable_fal_l3_disable_cb, NULL); + + return any_entries; +} + void llentry_free(struct llentry *lle) { + if (lle->la_numheld != 0) + RTE_LOG(ERR, DATAPLANE, + "%s(%p) possible mbuf leak (%#x %d)\n", + __func__, lle, lle->la_flags, lle->la_numheld); + rte_free(lle); } @@ -264,6 +332,7 @@ struct llentry *llentry_new(const void *c, size_t len, struct ifnet *ifp) lle = rte_zmalloc_socket("llentry", sizeof(*lle) + len, RTE_CACHE_LINE_SIZE, ifp->if_socket); if (lle) { + cds_lfht_node_init(&lle->ll_node); rte_atomic16_clear(&lle->ll_idle); rte_spinlock_init(&lle->ll_lock); @@ -308,6 +377,7 @@ _llentry_has_been_used(struct llentry *lle, bool clear) ret = fal_ip_get_neigh_attrs( lle->ifp->if_index, + lle->ifp->fal_l3, ll_sockaddr(lle), 1, &attr); if (!ret && attr.value.booldata) @@ -316,6 +386,7 @@ _llentry_has_been_used(struct llentry *lle, bool clear) attr.value.booldata = false; ret = fal_ip_upd_neigh( lle->ifp->if_index, + lle->ifp->fal_l3, ll_sockaddr(lle), &attr); if (ret) { @@ -345,3 +416,71 @@ llentry_has_been_used(struct llentry *lle) { return _llentry_has_been_used(lle, false); } + +void +llentry_issue_pending_fal_updates(struct llentry *lle) +{ + struct fal_attribute_t attr_list[2]; + char b[INET6_ADDRSTRLEN]; + uint32_t attr_count = 0; + bool new = false; + bool upd = false; + void *addr_ptr; + int ret = 0; + + addr_ptr = lle->ll_sock.ss_family == AF_INET ? + (void *)&satosin(ll_sockaddr(lle))->sin_addr : + (void *)&satosin6(ll_sockaddr(lle))->sin6_addr; + + rte_spinlock_lock(&lle->ll_lock); + if (lle->la_flags & LLE_HW_UPD_PENDING) { + if (lle->la_flags & LLE_VALID) { + if (lle->la_flags & LLE_CREATED_IN_HW) + upd = true; + attr_list[0].id = + FAL_NEIGH_ENTRY_ATTR_DST_MAC_ADDRESS; + attr_list[0].value.mac = lle->ll_addr; + attr_count++; + } + if (!(lle->la_flags & LLE_CREATED_IN_HW) && + if_is_features_mode_active( + lle->ifp, + IF_FEAT_MODE_EVENT_L3_FAL_ENABLED)) + new = true; + lle->la_flags &= ~LLE_HW_UPD_PENDING; + } + rte_spinlock_unlock(&lle->ll_lock); + + if (new) { + ret = fal_ip_new_neigh(lle->ifp->if_index, + lle->ifp->fal_l3, + ll_sockaddr(lle), attr_count, + attr_list); + if (ret < 0 && ret != -EOPNOTSUPP) { + RTE_LOG(NOTICE, DATAPLANE, + "FAL new neighbour %s, %s failed: %s\n", + inet_ntop(lle->ll_sock.ss_family, + addr_ptr, + b, sizeof(b)), + lle->ifp->if_name, strerror(-ret)); + } + if (ret >= 0) { + rte_spinlock_lock(&lle->ll_lock); + lle->la_flags |= LLE_CREATED_IN_HW; + rte_spinlock_unlock(&lle->ll_lock); + } + } else if (upd) { + ret = fal_ip_upd_neigh(lle->ifp->if_index, + lle->ifp->fal_l3, + ll_sockaddr(lle), + attr_list); + if (ret < 0) { + RTE_LOG(NOTICE, DATAPLANE, + "FAL neighbour mac update for %s, %s failed: %s\n", + inet_ntop(lle->ll_sock.ss_family, + addr_ptr, + b, sizeof(b)), + lle->ifp->if_name, strerror(-ret)); + } + } +} diff --git a/src/if_llatbl.h b/src/if_llatbl.h index a3bedd56..314baee3 100644 --- a/src/if_llatbl.h +++ b/src/if_llatbl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2004 Luigi Rizzo, Alessandro Cerri. All rights reserved. * Copyright (c) 2004-2008 Qing Li. All rights reserved. * Copyright (c) 2008 Kip Macy. All rights reserved. @@ -49,6 +49,7 @@ #define ARP_MAXHOLD 8 /* packets held until entry resolved */ #define ARP_MAXPROBES 5 /* send at most 5 requests */ +#define ARP_MAX_ENTRY INT32_MAX /* default maximum number of entries */ /* timer values */ #define ARPT_KEEP (20*60) /* once resolved, good for 20 * minutes */ @@ -71,7 +72,7 @@ struct llentry { union llentry_addr { uint64_t lu_addr_flags; struct { - struct ether_addr lu_addr; + struct rte_ether_addr lu_addr; uint16_t lu_flags; }; } ll_u; @@ -81,12 +82,20 @@ struct llentry { uint8_t la_numheld; uint8_t la_asked; uint8_t la_state; + uint8_t pad1[3]; rte_spinlock_t ll_lock; + uint8_t pad2[4]; struct sockaddr_storage ll_sock; + /* --- cacheline 2 boundary (128 bytes) was 48 bytes ago --- */ uint64_t ll_expire; struct rcu_head ll_rcu; + /* --- cacheline 3 boundary (192 bytes) was 8 bytes ago --- */ struct rte_mbuf *la_held[ARP_MAXHOLD]; }; + +static_assert(offsetof(struct llentry, ll_sock) < 64, + "first cache line exceeded"); + LIST_HEAD(llentries, llentry); static inline struct sockaddr *ll_sockaddr(struct llentry *lle) @@ -101,7 +110,8 @@ struct lltable { struct rte_timer lle_timer; uint16_t lle_unrtoken; rte_atomic16_t lle_restoken; - rte_atomic16_t lle_size; + rte_atomic32_t lle_size; + uint64_t lle_refresh_expire; }; /* @@ -127,6 +137,13 @@ struct lltable { #define LLE_DELETE 0x0400 /* delete on a lookup - match LLE_IFADDR */ #define LLE_CREATE 0x0800 /* create on a lookup miss */ +/* + * mask of internal flags, i.e. that are set in the LLE, but shouldn't + * be displayed to the user. + */ +#define LLE_INTERNAL_MASK (LLE_FWDING | LLE_CREATED_IN_HW | \ + LLE_HW_UPD_PENDING) + struct lltable *lltable_new(struct ifnet *ifp); void lltable_stop_timer(struct lltable *); void lltable_free_rcu(struct lltable *); @@ -136,7 +153,9 @@ typedef unsigned int lltable_iter_func_t(struct lltable *, struct llentry *, unsigned int lltable_walk(struct lltable *llt, lltable_iter_func_t func, void *arg); void lltable_flush(struct lltable *); -/* Final destroy on master thread */ +bool lltable_fal_l3_change(struct lltable *llt, bool enable); + +/* Final destroy on main thread */ void __llentry_destroy(struct lltable *llt, struct llentry *lle); /* Destroy on any thread */ unsigned int llentry_destroy(struct lltable *, struct llentry *); @@ -153,7 +172,7 @@ int lla_match(struct cds_lfht_node *node, const void *key) /* * Fast link layer address lookup function. - * Assumes rcu_read_lock + * Assumes dp_rcu_read_lock */ static inline struct llentry * lla_lookup(struct lltable *llt, unsigned long hash, in_addr_t addr) @@ -186,11 +205,13 @@ struct in_addr *ll_ipv4_addr(struct llentry *lle); struct in6_addr *ll_ipv6_addr(struct llentry *lle); static ALWAYS_INLINE bool -llentry_copy_mac(struct llentry *la, struct ether_addr *desten) +llentry_copy_mac(struct llentry *la, struct rte_ether_addr *desten) { if (likely(la && (la->la_flags & LLE_VALID))) { - rte_atomic16_clear(&la->ll_idle); - ether_addr_copy((struct ether_addr *)&la->ll_addr, desten); + if (rte_atomic16_read(&la->ll_idle)) + rte_atomic16_clear(&la->ll_idle); + rte_ether_addr_copy((struct rte_ether_addr *)&la->ll_addr, + desten); return true; } return false; @@ -204,4 +225,12 @@ llentry_has_been_used_and_clear(struct llentry *lle); bool llentry_has_been_used(struct llentry *lle); +/* + * Issue updates that have been deferred from a non-main thread. + * + * Should be called without lle spinlock held. + */ +void +llentry_issue_pending_fal_updates(struct llentry *lle); + #endif /* IF_LLATBL_H */ diff --git a/src/if_name_types.h b/src/if_name_types.h index 54580140..0433b9b8 100644 --- a/src/if_name_types.h +++ b/src/if_name_types.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -20,11 +20,11 @@ #include /* - * Is the interface a dataplane port + * Is the interface a dataplane/backplane port */ static inline bool is_dp_intf(const char *ifname) { - return ifname[0] == 'd' && ifname[1] == 'p'; + return (ifname[0] == 'd' || ifname[0] == 'b') && ifname[1] == 'p'; } static inline bool is_l2tpeth(const char *ifname) diff --git a/src/if_var.h b/src/if_var.h index f9ed0d59..1aa0e812 100644 --- a/src/if_var.h +++ b/src/if_var.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. + * Copyright (c) 2017-2020, AT&T Intellectual Property. * All rights reserved. * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. @@ -42,22 +42,23 @@ #include /* get TAILQ macros */ #include "bitmask.h" -#include "config.h" +#include "config_internal.h" #include "main.h" #include "util.h" -#include "vrf.h" -#include "vxlan.h" +#include "vrf_internal.h" #include #include #include #include -#include "urcu.h" #include "arp_cfg.h" -#include -#include "storm_ctl.h" +#include "fal_plugin.h" #include "if_feat.h" +#include "interface.h" +#include "pl_fused_gen.h" +#include "storm_ctl.h" +#include "urcu.h" #define DATAPLANE_MAX_PORTS RTE_MAX_ETHPORTS #define IF_PORT_ID_INVALID UCHAR_MAX @@ -82,13 +83,13 @@ struct if_addr { struct bridge_softc; struct bridge_port; struct sched_info; -struct flow_counters; struct portmonitor_info; struct npf_if; +struct cgn_intf; +struct egress_map_info; /* - * Software statistics maintained per-core, - * therefore structure should be sizeof cache line (64 bytes) + * Software statistics maintained per-core. */ struct if_data { uint64_t ifi_ipackets; /* packets received on interface */ @@ -110,6 +111,10 @@ struct if_data { uint64_t ifi_unknown; /* packets non-dataplane protocol */ } __rte_cache_aligned; +/* Ensure struct fits in two cache lines */ +static_assert(sizeof(struct if_data) <= 128, + "struct is too large"); + static inline uint64_t ifi_odropped(const struct if_data *data) { return data->ifi_odropped_txring + @@ -151,12 +156,13 @@ enum if_type { IFT_TUNNEL_OTHER, IFT_TUNNEL_GRE, IFT_TUNNEL_VTI, + IFT_TUNNEL_PIMREG, IFT_L2VLAN, IFT_BRIDGE, IFT_VXLAN, IFT_MACVLAN, - IFT_VRFMASTER, + IFT_VRF, }; /* @@ -180,6 +186,33 @@ enum if_role { IF_ROLE_MAX }; +enum if_feat_mode_flags { + IF_FEAT_MODE_FLAG_NONE = 0, + /* notify that L2 will be disabled for the interface */ + IF_FEAT_MODE_FLAG_L2_DISABLE = (1 << 0), + /* request that L3 be disabled for the interface */ + IF_FEAT_MODE_FLAG_L3_DISABLE = (1 << 1), + /* + * request that L2 be enabled in the FAL for the interface. + * Note that this will result in the interface no long being + * embellished by the IF_EMB_FEAT_HW_SWITCHING_DISABLED + * feature. + */ + IF_FEAT_MODE_FLAG_L2_FAL_ENABLE = (1 << 2), + /* + * request that L2 be disabled in the FAL for the interface. + * Note that this will result in the interface being + * embellished by the IF_EMB_FEAT_HW_SWITCHING_DISABLED + * feature, and this can be queried by features to determine + * whether in this mode. + */ + IF_FEAT_MODE_FLAG_L2_FAL_DISABLE = (1 << 3), + /* interface-embellishing feature changed */ + IF_FEAT_MODE_FLAG_EMB_FEAT_CHANGED = (1 << 4), + /* notify that L2 has be enabled for the interface */ + IF_FEAT_MODE_FLAG_L2_ENABLED = (1 << 5), +}; + /* * TCP MSS Clamping */ @@ -190,21 +223,6 @@ enum tcp_mss_af { #define TCP_MSS_AF_MAX TCP_MSS_V6 #define TCP_MSS_AF_SIZE (TCP_MSS_AF_MAX + 1) -static const uint32_t IF_LINK_SPEED_UNKNOWN = 0; - -enum if_link_duplex_type { - IF_LINK_DUPLEX_HALF = 0, - IF_LINK_DUPLEX_FULL = 1, - IF_LINK_DUPLEX_UNKNOWN = 2, -}; - -struct if_link_status { - bool link_status; - enum if_link_duplex_type link_duplex; - /* Link speed in Mbps */ - uint32_t link_speed; -}; - struct if_vlan_feat { uint16_t vlan; fal_object_t fal_vlan_feat; @@ -231,10 +249,10 @@ struct ifnet { /* Network configuration bits */ uint16_t capturing:1, /* capture in progress */ portmonitor:1, - vlan_modify:1, + padding0:1, qos_software_fwd:1, tpid_offloaded:1, - flow_type:3, + unused:3, ip_proxy_arp:1, ip_mc_forwarding:1, ip6_mc_forwarding:1, @@ -245,12 +263,13 @@ struct ifnet { qinq_inner:1; vrfid_t if_vrfid; /* vrf tag */ - struct lltable *if_lltable; /* IPv4 address mapping */ - struct lltable *if_lltable6; /* IPv6 address mapping */ + void **node_instance_contexts; + uint16_t l2_output_features; + uint16_t padding[3]; struct npf_if *if_npf; /* NPF specific info */ struct ifnet *if_parent; /* real device for vlan */ - struct ether_addr eth_addr; + struct rte_ether_addr eth_addr; uint8_t ip_encap_features; uint8_t ip6_encap_features; @@ -276,18 +295,25 @@ struct ifnet { /* --- cacheline 2 boundary (128 bytes) --- */ /* Feature state */ - struct flow_counters *if_sample; struct portmonitor_info *pminfo; /* portmonitor info */ struct cds_lfht *mpls_label_table; + struct cgn_intf *if_cgn; /* CGNAT */ + /* Referenced on local packet to/from kernel path */ struct ifnet *aggregator; /* part of team */ struct cds_lfht *if_mcfltr_hash; /* Table of filtered mcast pkts*/ + struct lltable *if_lltable; /* IPv4 address mapping */ + struct lltable *if_lltable6; /* IPv6 address mapping */ + /* --- cacheline 3 boundary (192 bytes) --- */ uint8_t if_mac_filtr_supported:1, if_mac_filtr_active:1, if_mac_filtr_reprogram:1, - hw_forwarding:1; /* switch port hw fwded*/ + hw_forwarding:1, /* switch port hw fwded */ + if_broken_out:1, /* broken out into separate ifs */ + if_pause:2, /* pause_frame mode */ + spare1:1; int8_t if_socket; /* NUMA node (or -1 for ANY) */ @@ -301,11 +327,13 @@ struct ifnet { uint16_t vif_cnt; unsigned int if_pcount; /* promiscuous mode */ - struct ether_addr perm_addr; /* "permanent" MAC address */ + struct rte_ether_addr perm_addr; /* "permanent" MAC address */ uint16_t mpls_labelspace; + uint8_t padding3[4]; struct cds_list_head if_addrhead; /* list of addresses per if */ + /* --- cacheline 4 boundary (256 bytes) --- */ struct cds_list_head if_list; /* List of all interfaces */ struct cds_lfht_node ifname_hash; /* ifname hash table */ struct cds_lfht_node ifindex_hash; /* ifindex hash table */ @@ -314,7 +342,12 @@ struct ifnet { bool if_poe : 1, /* poe is enabled */ unplugged : 1, /* hot unplug event in progress */ if_team : 1, /* this is a bonding device */ - if_created : 1; /* All i/f build actions done */ + if_created : 1, /* All i/f build actions done */ + if_l3_enabled : 1, /* enabled for L3 use */ + hw_capturing : 1, /* Hardware capture enabled */ + spare2: 1, + spare3: 1; + uint8_t pad[7]; fal_object_t fal_l3; /* Software statistics */ @@ -323,6 +356,7 @@ struct ifnet { struct if_perf if_rxpps; /* packets rate */ struct if_perf if_rxbps; /* bandwidth */ struct rte_timer if_stats_timer; /* update performance */ + uint8_t padding4[40]; struct if_data if_data[RTE_MAX_LCORE]; @@ -334,12 +368,14 @@ struct ifnet { /* GARP processing config */ struct garp_cfg ip_garp_op; + uint8_t padding5[7]; /* storm control config */ struct if_storm_ctl_info *sc_info; /* ref counts for pipeline features */ uint16_t if_feat_refcnt[IF_FEAT_COUNT]; + uint8_t padding6[6]; /* vlan feature object table */ struct cds_lfht *vlan_feat_table; @@ -349,8 +385,21 @@ struct ifnet { /* vlan-modify default entry */ struct vlan_mod_tbl_entry *vlan_mod_default; + + /* Egress map object */ + fal_object_t egr_map_obj; + + uint16_t ip_out_spath_features; + uint16_t ip6_out_spath_features; + + struct egress_map_info *egr_map_info; }; +static_assert(offsetof(struct ifnet, if_vlantbl) == 64, + "first cache line exceeded"); +static_assert(offsetof(struct ifnet, pminfo) == 128, + "second cache line exceeded"); + static inline uint16_t if_tpid(const struct ifnet *ifp) { return ifp->tpid; @@ -451,23 +500,21 @@ struct ift_ops { * Called before the interface is started (if admin-up at * create time) and before it is added to interface databases. */ - int (*ifop_init)(struct ifnet *ifp); + int (*ifop_init)(struct ifnet *ifp, void *ctx); /* - * Inform that the interface is about to uninitialised + * Uninitialise the interface * - * Called just before the interface is stopped and removed + * Called with the interface stopped and after it is removed * from interface databases. */ - void (*ifop_pre_uninit)(struct ifnet *ifp); + void (*ifop_uninit)(struct ifnet *ifp); /* - * Uninitialise the interface - * - * Called with the interface stopped and after it is remove - * from interface databases. + * Get the rfc2233 interface type, Assumes valid ifnet pointer + * @return interface type as defined in the ianaiftype-mib */ - void (*ifop_uninit)(struct ifnet *ifp); + enum dp_ifnet_iana_type (*ifop_iana_type)(struct ifnet *ifp); /* * Enable/disable VLAN filtering @@ -498,8 +545,9 @@ struct ift_ops { /* * Enable/disable promiscuous mode * - * Enable/disable reception of all unicast packets vs. only - * for-us unicast packets. + * Enable/disable reception of all unicast and multicast + * packets on all VLANs vs. only for-us unicast packets or + * registered MAC addresses on selected VLANs. */ int (*ifop_set_promisc)(struct ifnet *ifp, bool enable); @@ -536,10 +584,61 @@ struct ift_ops { unsigned int *bp_ifindex); /* - * The interface create has finished in SW + * Enable L3 for the interface + + * Allows the interface to perform any actions specific to the + * type, such as creating a FAL router interface if required. + * + * This may be called during initial creation of the + * interface, or at any time later in the lifetime of the + * interface up until its deletion. */ - void (*ifop_create_finished)(struct ifnet *ifp, - const struct ether_addr *mac_addr); + int (*ifop_l3_enable)(struct ifnet *ifp); + + /* + * Disable L3 for the interface + * + * Allows the interface to perform and actions specific to the + * type, such as deleting a FAL router interface if required. + */ + int (*ifop_l3_disable)(struct ifnet *ifp); + + /* + * Query whether hardware switching is enabled + * + * Query whether hardware switching has been disabled by the + * system. If not specified then this defaults to true. + */ + bool (*ifop_is_hw_switching_enabled)(struct ifnet *ifp); + + /* + * Set the speed and duplex of an interface + */ + int (*ifop_set_speed)(struct ifnet *ifp, bool autoneg, + uint32_t fixed_speed, int duplex); + + /* + * Get link status for the interface + * + * Includes up/down and optionally speed & duplex if known. + * + * Optional function - will fall back to IFF_RUNNING flag + * determined by higher layers in the system, if not + * implemented by the interface type. + */ + int (*ifop_get_link_status)(struct ifnet *ifp, + struct dp_ifnet_link_status *if_link); + + /* + * Set the usability of the interface + * + * The interface can be marked as usable or unusable. + * + * Optional function - This allows link failure detection protocols + * such as BFD/uBFD to influence forwarding decision quicker. It's + * a no-op, if not implemented by the interface type. + */ + int (*ifop_set_usability)(struct ifnet *ifp, bool usable); }; struct lltable *in_domifattach(struct ifnet *); @@ -571,20 +670,58 @@ if_vlan_lookup(const struct ifnet *ifp, uint16_t vid) } struct ifnet *if_alloc(const char *name, enum if_type type, - unsigned int mtu, const struct ether_addr *eth_addr, - int socketid); + unsigned int mtu, const struct rte_ether_addr *eth_addr, + int socketid, void *ctx); void if_set_ifindex(struct ifnet *ifp, unsigned int ifindex); void if_unset_ifindex(struct ifnet *ifp); -struct ifnet *if_hwport_alloc(unsigned int port, - const struct ether_addr *eth_addr, - int socketid); void if_free(struct ifnet *ifp); void netlink_if_free(struct ifnet *ifp); void if_cleanup(enum cont_src_en cont_src); bool if_setup_vlan_storage(struct ifnet *ifp); void if_finish_create(struct ifnet *ifp, const char *ifi_type, const char *kind, - const struct ether_addr *mac_addr); + const struct rte_ether_addr *mac_addr); +enum if_feat_mode_event; +bool if_is_features_mode_active(struct ifnet *ifp, + enum if_feat_mode_event event); +void if_change_features_mode(struct ifnet *ifp, enum if_feat_mode_flags flags); + +/* + * Interface-embellishing features + * + * embellish, verb; make beatiful, decorate + * + * An interface-embellishing feature is defined as one that changes + * the appearance of the interface that is significant to either the + * core interface infra or to another interface feature. + */ +enum if_embellish_feat { + IF_EMB_FEAT_NONE = 0, + IF_EMB_FEAT_BRIDGE_MEMBER = (1 << 0), + IF_EMB_FEAT_HW_SWITCHING_DISABLED = (1 << 1), + IF_EMB_FEAT_UNPLUGGED = (1 << 2), + IF_EMB_FEAT_BREAK_OUT = (1 << 3), + IF_EMB_FEAT_LAG_MEMBER = (1 << 4), +}; + +/* + * If hardware-switching is disabled then the interface is still L3 + * enabled, but all other embellishing features result in it becoming L3 + * disabled. + */ +#define IF_EMB_FEATS_ALLOW_L3 IF_EMB_FEAT_HW_SWITCHING_DISABLED + +/* + * Check for any specified interface-embellishing features being present + */ +bool if_check_any_emb_feat(struct ifnet *ifp, enum if_embellish_feat feat_any); +/* + * Check for any interface-embellishing features being present other + * than thoses specified. + */ +bool if_check_any_except_emb_feat(struct ifnet *ifp, + enum if_embellish_feat feat_except); +void if_notify_emb_feat_change(struct ifnet *ifp); int if_blink(struct ifnet *ifp, bool on); bool if_stats(struct ifnet *ifp, struct if_data *stats); @@ -592,10 +729,6 @@ void if_mpls_stats(const struct ifnet *ifp, struct if_mpls_data *stats); const char *if_flags2str(char *buf, unsigned int flags); -struct ifnet *ifnet_byifindex(unsigned int ifindex); -struct ifnet *ifnet_byifname(const char *name); -struct ifnet *ifnet_byethname(const char *name); - struct ifnet *ifnet_byifname_cont_src(enum cont_src_en cont_src, const char *ifname); void if_set_vrf(struct ifnet *ifp, vrfid_t vrf_id); @@ -603,8 +736,6 @@ void fal_if_update_forwarding_all(struct ifnet *ifp); void fal_if_update_forwarding(struct ifnet *ifp, uint8_t family, bool multicast); -char *if_port_info(const struct ifnet *ifp); - static inline struct ifnet *ifnet_byport(portid_t port) { if (likely(port < DATAPLANE_MAX_PORTS)) @@ -615,14 +746,14 @@ static inline struct ifnet *ifnet_byport(portid_t port) static inline int ifnet_nametoindex(const char *ifname) { - struct ifnet *ifp = ifnet_byifname(ifname); + struct ifnet *ifp = dp_ifnet_byifname(ifname); return ifp ? ifp->if_index : 0; } static inline const char *ifnet_indextoname(int ifindex) { - struct ifnet *ifp = ifnet_byifindex(ifindex); + struct ifnet *ifp = dp_ifnet_byifindex(ifindex); return ifp ? ifp->if_name : NULL; } @@ -641,7 +772,7 @@ void if_disable_poll(portid_t port_id); static inline void if_disable_poll_rcu(portid_t port_id) { if_disable_poll(port_id); - synchronize_rcu(); + dp_rcu_synchronize(); } static inline bool if_port_isup(portid_t portid) @@ -787,6 +918,15 @@ static inline bool is_tunnel(const struct ifnet *ifp) return false; } +static inline bool is_tunnel_pimreg(const struct ifnet *ifp) +{ + switch (ifp->if_type) { + case IFT_TUNNEL_PIMREG: + return true; + } + return false; +} + static inline bool is_bridge(const struct ifnet *ifp) { return ifp->if_type == IFT_BRIDGE; @@ -810,23 +950,28 @@ static inline bool is_team(const struct ifnet *ifp) bool is_lo(const struct ifnet *ifp); bool is_s2s_feat_attach(const struct ifnet *ifp); int cmd_set_vfp(FILE *f, int argc, char **argv); +int cmd_ifconfig(FILE *f, int argc, char **argv); + +int if_node_instance_feat_print(struct pl_command *cmd, + struct pl_node_registration *node_ptr); -typedef void ifnet_iter_func_t(struct ifnet *ifp, void *arg); -void ifnet_walk(ifnet_iter_func_t func, void *arg); +void unsup_tunnel_output(struct ifnet *ifp, struct rte_mbuf *m, + struct ifnet *input_ifp, uint16_t proto); +void vfp_output(struct ifnet *ifp, struct rte_mbuf *m, + struct ifnet *input_ifp, uint16_t proto); int if_vlan_proto_set(struct ifnet *ifp, uint16_t proto); void if_qinq_created(struct ifnet *phy_ifp); void if_qinq_deleted(struct ifnet *phy_ifp); -int if_add_l2_addr(struct ifnet *ifp, struct ether_addr *addr); -int if_del_l2_addr(struct ifnet *ifp, struct ether_addr *addr); +int if_add_l2_addr(struct ifnet *ifp, struct rte_ether_addr *addr); +int if_del_l2_addr(struct ifnet *ifp, struct rte_ether_addr *addr); void ifpromisc(struct ifnet *ifp, int onswitch); -void if_allmulti(struct ifnet *ifp, int onswitch); +void if_allmulti(struct ifnet *ifp, int onoff); int if_start(struct ifnet *ifp); int if_stop(struct ifnet *ifp); int if_set_vlan_filter(struct ifnet *ifp, uint16_t vlan, bool enable); int if_set_broadcast(struct ifnet *ifp, bool enable); -void if_create_finished(struct ifnet *ifp, const struct ether_addr *mac_addr); uint64_t if_scaled(uint64_t value); void send_if_stats(const struct ifnet *ifp, const struct if_data *stats); @@ -848,50 +993,29 @@ bool is_ignored_interface(uint32_t ifindex); void incomplete_if_add_ignored(uint32_t ifindex); void incomplete_if_del_ignored(uint32_t ifindex); void incomplete_routes_make_complete(void); -void incomplete_route_add(vrfid_t vrf_id, const void *dst, - uint8_t family, uint8_t depth, uint32_t table, - uint8_t scope, uint8_t proto, - const struct nlmsghdr *nlh); -void incomplete_route_del(vrfid_t vrf_id, const void *dst, +void incomplete_route_add_nl(const void *dst, + uint8_t family, uint8_t depth, uint32_t table, + uint8_t scope, uint8_t proto, + const struct nlmsghdr *nlh); +void incomplete_route_add_pb(const void *dst, + uint8_t family, uint8_t depth, uint32_t table, + uint8_t scope, uint8_t proto, + void *data, size_t size); +void incomplete_route_del(const void *dst, uint8_t family, uint8_t depth, uint32_t table, uint8_t scope, uint8_t proto); -void missed_netlink_replay(unsigned int ifindex); -void missed_nl_unspec_link_add(unsigned int ifindex, - const struct nlmsghdr *nlh); -void missed_nl_unspec_link_del(unsigned int ifindex); -void missed_nl_unspec_addr_add(unsigned int ifindex, - const struct ether_addr *addr, - const struct nlmsghdr *nlh); -void missed_nl_unspec_addr_del(unsigned int ifindex, - const struct ether_addr *addr); -void missed_nl_inet_addr_add(unsigned int ifindex, - unsigned char family, - const void *addr, - const struct nlmsghdr *nlh); -void missed_nl_inet_addr_del(unsigned int ifindex, - unsigned char family, - const void *addr); -void missed_nl_inet_netconf_add(unsigned int ifindex, - unsigned char family, - const struct nlmsghdr *nlh); -void missed_nl_inet_netconf_del(unsigned int ifindex, - unsigned char family); -void missed_nl_child_link_add(unsigned int ifindex, - unsigned int child_ifindex, - const struct nlmsghdr *nlh); -void missed_nl_child_link_del(unsigned int ifindex, - unsigned int child_ifindex); void if_set_cont_src(struct ifnet *ifp, enum cont_src_en cont_src); bool if_port_is_uplink(portid_t portid); bool if_is_control_channel(struct ifnet *ifp); -bool if_port_is_owned_by_src(enum cont_src_en cont_src, portid_t portid); bool if_port_is_bkplane(portid_t portid); -void if_create_l3_intf(struct ifnet *ifp, - const struct ether_addr *mac_addr); -void if_delete_l3_intf(struct ifnet *ifp); +int if_fal_create_l3_intf(struct ifnet *ifp); +int if_fal_delete_l3_intf(struct ifnet *ifp); int if_set_l3_intf_attr(struct ifnet *ifp, struct fal_attribute_t *attr); +int if_fal_l3_get_stats(struct ifnet *ifp, struct if_data *stats); +int if_get_l3_intf_attr(struct ifnet *ifp, uint32_t attr_count, + struct fal_attribute_t *attr); /* TODO: Look into consolidating the if_is_uplink and * is_local_controller checks across the codebase. @@ -925,6 +1049,11 @@ int if_register_type(enum if_type type, const struct ift_ops *fns); */ int if_set_mtu(struct ifnet *ifp, uint32_t mtu, bool force_update); +/* + * Set the egress map on an interface + */ +int if_set_egress_map(struct ifnet *ifp, void *qos_mark_map); + /* * Set the Layer2 address of an interface */ @@ -941,12 +1070,11 @@ int if_set_poe(struct ifnet *ifp, bool enable); int if_get_poe(struct ifnet *ifp, bool *admin_status, bool *oper_status); /* - * Get link status. - * - * Up means signal detected and auto-negotiate successfully completed. + * Set the speed and duplex of an interface */ -void if_get_link_status(struct ifnet *ifp, - struct if_link_status *if_link); +int if_set_speed(struct ifnet *ifp, bool autoneg, uint32_t forced_speed, + int duplex); + /* * Dump state for an interface in JSON format. */ @@ -964,11 +1092,15 @@ struct if_vlan_feat *if_vlan_feat_get(struct ifnet *ifp, uint16_t vlan); int if_vlan_feat_delete(struct ifnet *ifp, uint16_t vlan); /* - * APIs to set/get backplane interface + * API to set backplane interface * Currently supported only on ethernet interfaces */ int if_set_backplane(struct ifnet *ifp, unsigned int ifindex); -int if_get_backplane(struct ifnet *ifp, unsigned int *ifindex); + +/* + * Set the usability of an interface + */ +int if_set_usability(struct ifnet *ifp, bool usability); static inline bool if_is_hwport(struct ifnet *ifp) @@ -980,4 +1112,37 @@ if_is_hwport(struct ifnet *ifp) return ifp->if_local_port && !ifp->if_parent; } +/* + * Transmit one packet + */ +void if_output(struct ifnet *ifp, struct rte_mbuf *m, + struct ifnet *input_ifp, uint16_t proto); + +void if_output_internal(struct pl_packet *pkt); + +int if_allocate_feature_space(struct ifnet *ifp, + enum pl_feature_point_id fp); +int +if_node_instance_register_storage(struct pl_node *node, + struct pl_feature_registration *feat, + void *context); +int +if_node_instance_unregister_storage(struct pl_node *node, + struct pl_feature_registration *feat); +void * +if_node_instance_get_storage(struct pl_node *node, + struct pl_feature_registration *feat); + +void * +if_node_instance_get_storage_internal(struct ifnet *ifp, + enum pl_feature_point_id feat_point, + int feat); +int +if_node_instance_set_cleanup_cb(struct pl_feature_registration *feat); + +int if_node_instance_feat_change_all(struct pl_feature_registration *feat, + enum pl_node_feat_action action, + pl_node_feat_change *feat_change); + + #endif /* !IF_VAR_H */ diff --git a/src/in.c b/src/in.c index 6f0885b6..cfcc5eae 100644 --- a/src/in.c +++ b/src/in.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * Copyright (C) 2001 WIDE Project. All rights reserved. @@ -49,11 +49,14 @@ #include #include +#include "arp.h" +#include "dp_event.h" #include "fal.h" #include "if_ether.h" #include "if_llatbl.h" #include "if_var.h" #include "in6_var.h" +#include "lcore_sched.h" #include "pipeline/nodes/pl_nodes_common.h" #include "pl_node.h" #include "urcu.h" @@ -81,11 +84,23 @@ in_lltable_lookup(struct ifnet *ifp, u_int flags, in_addr_t addr) if (!(flags & LLE_CREATE)) return NULL; + if (rte_atomic32_read(&llt->lle_size) >= + ARP_CFG(arp_max_entry)) { + ARPSTAT_INC(if_vrfid(ifp), tablimit); + return NULL; + } + lle = llentry_new(&sin, sizeof(sin), ifp); - if (lle == NULL) + if (unlikely(!lle)) { + ARPSTAT_INC(if_vrfid(ifp), memfail); return NULL; + } + + lle->la_flags = flags & ~LLE_CREATE; + if (if_is_features_mode_active( + ifp, IF_FEAT_MODE_EVENT_L3_FAL_ENABLED)) + lle->la_flags |= LLE_HW_UPD_PENDING; - lle->la_flags = (flags | LLE_HW_UPD_PENDING) & ~LLE_CREATE; struct cds_lfht_node *node; node = cds_lfht_add_unique(llt->llt_hash, hash, lla_match, &addr, &lle->ll_node); @@ -95,9 +110,12 @@ in_lltable_lookup(struct ifnet *ifp, u_int flags, in_addr_t addr) llentry_free(lle); lle = caa_container_of(node, struct llentry, ll_node); } else { - /* if on master thread */ - if (is_master_thread()) { + rte_atomic32_inc(&llt->lle_size); + /* if on main thread */ + if (is_main_thread() && if_is_features_mode_active( + ifp, IF_FEAT_MODE_EVENT_L3_FAL_ENABLED)) { ret = fal_ip4_new_neigh(lle->ifp->if_index, + lle->ifp->fal_l3, &sin, 0, NULL); if (ret < 0 && ret != -EOPNOTSUPP) { RTE_LOG(NOTICE, DATAPLANE, @@ -106,32 +124,35 @@ in_lltable_lookup(struct ifnet *ifp, u_int flags, in_addr_t addr) sizeof(b)), lle->ifp->if_name, strerror(-ret)); - } else { + } + if (ret >= 0) { rte_spinlock_lock(&lle->ll_lock); lle->la_flags |= LLE_CREATED_IN_HW; rte_spinlock_unlock(&lle->ll_lock); } - } else { - /* - * Fire the timer so it can be sourced in - * the hardware on the master thread - */ - rte_timer_reset(&llt->lle_timer, 0, - SINGLE, rte_get_master_lcore(), - lladdr_timer, llt); } + /* + * Fire the timer so it can be sourced in the + * hardware on the main thread and/or + * neighbour-sourced routes installed. + */ + rte_timer_reset(&llt->lle_timer, 0, + SINGLE, rte_get_master_lcore(), + lladdr_timer, llt); } } else if (unlikely(flags & LLE_DELETE)) { /* * Only delete static or idle entries. - * Leave dynamic in-use entries to time out - kernel may - * think they are stale but they may be in active use - * by the dataplane. + * Leave dynamic entries to time out if they are in use or + * created locally (unless the deletion is for local entries). + * An entry that is stale and being removed in the kernel may + * be in active use in the dataplane. */ if ((lle->la_flags & LLE_STATIC) || - !llentry_has_been_used(lle)) { + ((!(lle->la_flags & LLE_LOCAL) || (flags & LLE_LOCAL)) && + !llentry_has_been_used(lle))) { rte_spinlock_lock(&lle->ll_lock); - llentry_destroy(llt, lle); + arp_entry_destroy(llt, lle); rte_spinlock_unlock(&lle->ll_lock); lle = NULL; } @@ -147,6 +168,7 @@ in_domifattach(struct ifnet *ifp) llt = lltable_new(ifp); + llt->lle_refresh_expire = rte_get_timer_cycles() + rte_get_timer_hz(); rte_timer_reset(&llt->lle_timer, rte_get_timer_hz(), SINGLE, rte_get_master_lcore(), lladdr_timer, llt); @@ -199,7 +221,7 @@ static struct if_addr *ifa_find(struct ifnet *ifp, int family, case AF_INET6: sin6 = satosin6(sa); if (memcmp(&sin6->sin6_addr, addr, - sizeof(struct in6_addr))) + sizeof(struct in6_addr)) != 0) continue; break; default: @@ -243,7 +265,7 @@ void ifa_add(int ifindex, int family, uint32_t scope, struct sockaddr_in *sin; struct sockaddr_in6 *sin6; - ifp = ifnet_byifindex(ifindex); + ifp = dp_ifnet_byifindex(ifindex); if (!ifp) return; @@ -335,7 +357,7 @@ void ifa_remove(int ifindex, int family, const void *addr, uint8_t prefixlen) struct ifnet *ifp; struct if_addr *ifa; - ifp = ifnet_byifindex(ifindex); + ifp = dp_ifnet_byifindex(ifindex); if (!ifp) return; diff --git a/src/in_cksum.c b/src/in_cksum.c index 9fb2e84c..f81d6c27 100644 --- a/src/in_cksum.c +++ b/src/in_cksum.c @@ -1,10 +1,13 @@ /*- - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ #include +#include + +#include "compiler.h" #include "in_cksum.h" uint16_t in_cksum(const void *addr, int len) @@ -73,8 +76,8 @@ uint16_t in6_cksum(const struct ip6_hdr *ip6, uint8_t next, * Assume l4 hdr is in first segment. */ uint16_t -in4_cksum_mbuf(const struct rte_mbuf *pak, const struct iphdr *ip, - const void *l4_hdr) +dp_in4_cksum_mbuf(const struct rte_mbuf *m, const struct iphdr *ip, + const void *l4_hdr) { uint32_t sum = 0; uint16_t hdr_sum = 0; @@ -99,10 +102,10 @@ in4_cksum_mbuf(const struct rte_mbuf *pak, const struct iphdr *ip, sum = __rte_raw_cksum(&uph, sizeof(uph), sum); } - start_offset = (char *)l4_hdr - rte_pktmbuf_mtod(pak, char *); - len = pak->pkt_len - start_offset; + start_offset = (char *)l4_hdr - rte_pktmbuf_mtod(m, char *); + len = m->pkt_len - start_offset; - rte_raw_cksum_mbuf(pak, start_offset, len, &hdr_sum); + rte_raw_cksum_mbuf(m, start_offset, len, &hdr_sum); sum += hdr_sum; sum = __rte_raw_cksum_reduce(sum); @@ -114,8 +117,8 @@ in4_cksum_mbuf(const struct rte_mbuf *pak, const struct iphdr *ip, * Checksum a TCP, UDP or ICMP IPv6 packet in an mbuf chain. */ uint16_t -in6_cksum_mbuf(const struct rte_mbuf *pak, const struct ip6_hdr *ip6, - const void *l4_hdr) +dp_in6_cksum_mbuf(const struct rte_mbuf *m, const struct ip6_hdr *ip6, + const void *l4_hdr) { uint32_t sum = 0; uint16_t hdr_sum = 0; @@ -125,10 +128,10 @@ in6_cksum_mbuf(const struct rte_mbuf *pak, const struct ip6_hdr *ip6, if (ip6) sum = __in6_cksum(ip6, ip6->ip6_nxt, 0, 0); - start_offset = (char *)l4_hdr - rte_pktmbuf_mtod(pak, char *); - len = pak->pkt_len - start_offset; + start_offset = (char *)l4_hdr - rte_pktmbuf_mtod(m, char *); + len = m->pkt_len - start_offset; - rte_raw_cksum_mbuf(pak, start_offset, len, &hdr_sum); + rte_raw_cksum_mbuf(m, start_offset, len, &hdr_sum); sum += hdr_sum; sum = __rte_raw_cksum_reduce(sum); @@ -136,3 +139,96 @@ in6_cksum_mbuf(const struct rte_mbuf *pak, const struct ip6_hdr *ip6, return ~sum; } + + +#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64 +/* + * It it useful to have an Internet checksum routine which is inlineable + * and optimized specifically for the task of computing IP header checksums + * in the normal case (where there are no options and the header length is + * therefore always exactly five 32-bit words. + */ +ALWAYS_INLINE uint32_t dp_in_cksum_hdr(const struct iphdr *ip) +{ + /* + * Avoid violating type aliasing rules + */ + union ip32u { + struct iphdr ip; + uint32_t u32[5]; + } const *ipu = (const union ip32u *)ip; + uint32_t sum = 0; + + asm("addl %1, %0\n" + "adcl %2, %0\n" + "adcl %3, %0\n" + "adcl %4, %0\n" + "adcl %5, %0\n" + "adcl $0, %0" + : "+r" (sum) + : "g" (ipu->u32[0]), + "g" (ipu->u32[1]), + "g" (ipu->u32[2]), + "g" (ipu->u32[3]), + "g" (ipu->u32[4]) + : "cc" + ); + + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + + return ~sum; +} + +/* + * Aggressive optimization may result in elision of setting the IPv4 header + * checksum to zero prior to calculating it, resulting in a faulty calculation. + * Ensure this does not happen by setting the IPv4 header checksum without + * relying on its previous value. + */ +ALWAYS_INLINE void dp_set_cksum_hdr(struct iphdr *ip) +{ + unsigned char const *u8 = (void const *)ip; + uint32_t sum = 0; + + asm("movzwl %1, %0\n" + "adcl %2, %0\n" + "adcl %3, %0\n" + "adcl %4, %0\n" + "adcl %5, %0\n" + "adcl $0, %0" + : "=r" (sum) + : "o" (u8[8]), + "o" (u8[0]), + "o" (u8[4]), + "o" (u8[12]), + "o" (u8[16]) + : "cc" + ); + + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + ip->check = ~sum; +} + +#else + +#include + +/* Portable version using DPDK checksum code. */ +ALWAYS_INLINE uint16_t dp_in_cksum_hdr(const struct iphdr *ip) +{ + uint16_t sum = rte_raw_cksum(ip, sizeof(*ip)); + + return ~sum; +} + +ALWAYS_INLINE void dp_set_cksum_hdr(struct iphdr *ip) +{ + ip->check = 0; + ip->check = dp_in_cksum_hdr(ip); +} + +#endif + + diff --git a/src/in_cksum.h b/src/in_cksum.h index 70ff6508..55713024 100644 --- a/src/in_cksum.h +++ b/src/in_cksum.h @@ -1,6 +1,6 @@ /*- * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. @@ -48,6 +48,8 @@ #include #include +#include "ip_checksum.h" + struct ip6_hdr; struct rte_mbuf; @@ -150,58 +152,6 @@ uint16_t in_cksum(const void *addr, int len); uint16_t in6_cksum(const struct ip6_hdr *, uint8_t, uint32_t, uint32_t); -#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64 -/* - * It it useful to have an Internet checksum routine which is inlineable - * and optimized specifically for the task of computing IP header checksums - * in the normal case (where there are no options and the header length is - * therefore always exactly five 32-bit words. - */ -static inline uint32_t in_cksum_hdr(const struct iphdr *ip) -{ - /* - * Avoid violating type aliasing rules - */ - union ip32u { - struct iphdr ip; - uint32_t u32[5]; - } const *ipu = (const union ip32u *)ip; - uint32_t sum = 0; - - asm("addl %1, %0\n" - "adcl %2, %0\n" - "adcl %3, %0\n" - "adcl %4, %0\n" - "adcl %5, %0\n" - "adcl $0, %0" - : "+r" (sum) - : "g" (ipu->u32[0]), - "g" (ipu->u32[1]), - "g" (ipu->u32[2]), - "g" (ipu->u32[3]), - "g" (ipu->u32[4]) - : "cc" - ); - - sum = (sum & 0xffff) + (sum >> 16); - sum = (sum & 0xffff) + (sum >> 16); - - return ~sum; -} - -#else - -#include - -/* Portable version using DPDK checksum code. */ -static inline uint16_t in_cksum_hdr(const struct iphdr *ip) -{ - uint16_t sum = rte_raw_cksum(ip, sizeof(*ip)); - - return ~sum; -} -#endif - /** * Compute checksum of IP header. * Since IP options are rare, optimize for the case of no options @@ -209,45 +159,9 @@ static inline uint16_t in_cksum_hdr(const struct iphdr *ip) static inline uint16_t ip_checksum(const struct iphdr *ip, uint16_t hlen) { if (likely(hlen == sizeof(struct iphdr))) - return in_cksum_hdr(ip); + return dp_in_cksum_hdr(ip); else return in_cksum(ip, hlen); } -/** - * Checksum a TCP, UDP or ICMP IPv4 packet. - * - * The IPv4 header should not contains options. The layer 4 checksum - * must be set to 0 in the packet by the caller. The l4 header must be - * in the first mbuf. - * - * @param pak [in] Pointer to mbuf chain - * @param ip [in] Pointer to the contiguous IP header. Set to NULL for - * ICMP (the pseudo hdr is not checksummed) - * @param l4_hdr [in] Pointer to the beginning of the L4 header - * - * @return - * The complemented checksum to set in the IPv4 TCP, UDP or ICMP header - */ -uint16_t -in4_cksum_mbuf(const struct rte_mbuf *, const struct iphdr *, - const void *); - -/** - * Checksum a TCP, UDP or ICMP IPv6 packet. - * - * The layer 4 checksum must be set to 0 in the packet by the - * caller. The l4 header must be in the first mbuf. - * - * @param pak [in] Pointer to mbuf chain - * @param ip [in] Pointer to the contiguous IPv6 header. - * @param l4_hdr [in] Pointer to the beginning of the L4 header - * - * @return - * The complemented checksum to set in the IPv4 TCP, UDP or ICMPv6 header - */ -uint16_t -in6_cksum_mbuf(const struct rte_mbuf *, const struct ip6_hdr *, - const void *); - #endif /* IN_CKSUM_H */ diff --git a/src/ip_addr.h b/src/ip_addr.h index 12edae88..ca4668de 100644 --- a/src/ip_addr.h +++ b/src/ip_addr.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -13,29 +13,9 @@ #include #include +#include "ip.h" #define IP_ADDR_LEN sizeof(struct in_addr) -/* structure to be used by functions that can take either IPv4 or IPv6 addr */ -struct ip_addr { - uint32_t type; - union { - struct in_addr ip_v4; - struct in6_addr ip_v6; - } address; -}; - -static inline bool addr_eq(const struct ip_addr *addr1, - const struct ip_addr *addr2) -{ - if (addr1->type == AF_INET && addr2->type == AF_INET) - return addr1->address.ip_v4.s_addr == - addr2->address.ip_v4.s_addr; - else if (addr1->type == AF_INET6 && addr2->type == AF_INET6) - return IN6_ARE_ADDR_EQUAL(&addr1->address.ip_v6, - &addr2->address.ip_v6); - return false; -} - /* * Checks if address is set, true if set, false otherwise */ diff --git a/src/ip_commands.c b/src/ip_commands.c index b1542a36..fb573c2d 100644 --- a/src/ip_commands.c +++ b/src/ip_commands.c @@ -1,7 +1,7 @@ /* * IP Commands * - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ diff --git a/src/ip_forward.c b/src/ip_forward.c index 04f28a65..dc9af0c2 100644 --- a/src/ip_forward.c +++ b/src/ip_forward.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -28,31 +28,33 @@ #include #include "arp.h" -#include "bridge_port.h" #include "compat.h" #include "compiler.h" -#include "config.h" +#include "config_internal.h" #include "crypto/crypto.h" #include "crypto/crypto_forward.h" -#include "gre.h" +#include "if/bridge/bridge_port.h" +#include "if/gre.h" +#include "if/macvlan.h" #include "if_llatbl.h" #include "if_var.h" #include "in_cksum.h" +#include "ip_forward.h" #include "ip_funcs.h" #include "ip_icmp.h" #include "ip_mcast.h" #include "ip_options.h" #include "ip_ttl.h" #include "l2tp/l2tpeth.h" -#include "macvlan.h" #include "main.h" #include "mpls/mpls.h" #include "mpls/mpls_forward.h" -#include "nh.h" +#include "nh_common.h" #include "npf/npf.h" #include "npf/npf_if.h" +#include "npf/zones/npf_zone_public.h" #include "npf_shim.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" #include "route.h" @@ -61,7 +63,7 @@ #include "udp_handler.h" #include "urcu.h" #include "ip_addr.h" -#include "vrf.h" +#include "vrf_internal.h" /* MTU cutoff for PMTU in IP processing */ unsigned int slowpath_mtu; @@ -69,20 +71,27 @@ unsigned int slowpath_mtu; /* SNMP statistics for UDP in tunnels */ uint64_t udpstats[UDP_MIB_MAX]; +static int ip_output_filter(struct ifnet *ifp, struct rte_mbuf **mp); + ALWAYS_INLINE bool -ip_l2_resolve_and_output(struct ifnet *in_ifp, struct rte_mbuf *m, - struct next_hop *nh, uint16_t proto) +dp_ip_l2_nh_output(struct ifnet *in_ifp, struct rte_mbuf *m, + struct next_hop *nh, uint16_t proto) { struct pl_packet pl_pkt = { .mbuf = m, .l2_pkt_type = pkt_mbuf_get_l2_traffic_type(m), .l3_hdr = iphdr(m), .in_ifp = in_ifp, - .out_ifp = nh4_get_ifp(nh), + .out_ifp = dp_nh_get_ifp(nh), .nxt.v4 = nh, .l2_proto = proto, }; + /* Do not call spath features if this is a forwarded pkt */ + if (!in_ifp && proto == ETH_P_IP && + ip_output_filter(pl_pkt.out_ifp, &m)) + return false; + if (!pipeline_fused_ipv4_encap_only(&pl_pkt)) return false; @@ -91,11 +100,23 @@ ip_l2_resolve_and_output(struct ifnet *in_ifp, struct rte_mbuf *m, return true; } +ALWAYS_INLINE bool +dp_ip_l2_intf_output(struct ifnet *in_ifp, struct rte_mbuf *m, + struct ifnet *out_ifp, uint16_t proto) +{ + struct next_hop nh; + + memset(&nh, 0, sizeof(nh)); + nh_set_ifp(&nh, out_ifp); + + return dp_ip_l2_nh_output(in_ifp, m, &nh, proto); +} + /* * l2tp can't use any of the ports registered via udp_handler_register */ -static int ip_udp_tunnel_in(struct rte_mbuf **m, struct iphdr *ip, - struct ifnet *ifp) +int ip_udp_tunnel_in(struct rte_mbuf **m, struct iphdr *ip, + struct ifnet *ifp) { struct rte_mbuf *m0 = *m; @@ -116,40 +137,43 @@ static int ip_udp_tunnel_in(struct rte_mbuf **m, struct iphdr *ip, */ int l4_input(struct rte_mbuf **m, struct ifnet *ifp) { - struct iphdr *ip = iphdr(*m); - int rc, spi; - - if (crypto_policy_check_inbound_terminating(ifp, m, - htons(ETHER_TYPE_IPv4))) - return 0; - - switch (ip->protocol) { - case IPPROTO_UDP: - rc = ip_udp_tunnel_in(m, ip, ifp); - break; + struct pl_packet pl_pkt = { + .mbuf = *m, + .in_ifp = ifp, + }; - case IPPROTO_L2TPV3: - rc = l2tp_ipv4_recv_encap(*m, ip); - break; + pipeline_fused_ipv4_l4(&pl_pkt); - case IPPROTO_GRE: - rc = ip_gre_tunnel_in(m, ip); - break; + return 0; +} - case IPPROTO_ESP: - spi = crypto_retrieve_spi((unsigned char *)ip + - pktmbuf_l3_len(*m)); - rc = crypto_enqueue_inbound_v4(*m, ip, ifp, spi); - break; +/* + * IPv4 originate slow path filter. + * + * Run the originating firewall, and drop the packet if required. + * + * Return an indication of if the packet was consumed. + * 0 => Not filtered + * 1 => Filtered + */ +int ipv4_originate_filter_flags(struct ifnet *out_ifp, struct rte_mbuf *m, + uint16_t npf_flags) +{ + if (out_ifp == NULL) + return 0; - default: + if (npf_originate_fw(out_ifp, npf_flags, + &m, htons(RTE_ETHER_TYPE_IPV4))) { + IPSTAT_INC_VRF(if_vrf(out_ifp), IPSTATS_MIB_OUTDISCARDS); + rte_pktmbuf_free(m); return 1; } + return 0; +} - if (rc < 0) { - IPSTAT_INC_IFP(ifp, IPSTATS_MIB_INDISCARDS); - } - return rc; +int ipv4_originate_filter(struct ifnet *out_ifp, struct rte_mbuf *m) +{ + return ipv4_originate_filter_flags(out_ifp, m, NPF_FLAG_FROM_US); } /* @@ -183,7 +207,7 @@ ip_local_deliver(struct ifnet *ifp, struct rte_mbuf *m) * * Run the local firewall, and discard if so instructed. */ - if (npf_local_fw(ifp, &m, htons(ETHER_TYPE_IPv4))) + if (npf_local_fw(ifp, &m, htons(RTE_ETHER_TYPE_IPV4))) goto discard; IPSTAT_INC_VRF(vrf, IPSTATS_MIB_INDELIVERS); @@ -219,7 +243,7 @@ struct next_hop *ip_lookup(struct rte_mbuf *m, struct ifnet *ifp, /* * Lookup route */ - nxt = rt_lookup(ip->daddr, tbl_id, m); + nxt = dp_rt_lookup(ip->daddr, tbl_id, m); /* * No route to destination? @@ -268,7 +292,7 @@ void ip_out_features(struct rte_mbuf *m, struct ifnet *ifp, }; /* nxt->ifp may be changed by netlink messages. */ - struct ifnet *nxt_ifp = nh4_get_ifp(nxt); + struct ifnet *nxt_ifp = dp_nh_get_ifp(nxt); /* Destination device is not up? */ if (!nxt_ifp || !(nxt_ifp->if_flags & IFF_UP)) { @@ -288,9 +312,16 @@ void ip_out_features(struct rte_mbuf *m, struct ifnet *ifp, */ if (unlikely(nxt_ifp == ifp)) { if (ip_same_network(ifp, addr, ip->saddr) && - ip_redirects_get()) + ip_redirects_get()) { icmp_error(ifp, m, ICMP_REDIRECT, ICMP_REDIR_HOST, addr); + /* + * Cache will have been used for handling the + * ICMP redirect, so ensure it is created again + * when continuing with the original packet. + */ + pl_pkt.npf_flags |= NPF_FLAG_CACHE_EMPTY; + } } } @@ -305,7 +336,6 @@ void ip_out_features(struct rte_mbuf *m, struct ifnet *ifp, drop: __cold_label; rte_pktmbuf_free(m); - return; } static ALWAYS_INLINE @@ -330,15 +360,14 @@ void ip_switch(struct rte_mbuf *m, struct ifnet *ifp, /* MPLS imposition required because nh has given us a label */ if (unlikely(nh_outlabels_present(&nxt->outlabels))) { - union next_hop_v4_or_v6_ptr mpls_nh = { .v4 = nxt }; - - mpls_unlabeled_input(ifp, m, NH_TYPE_V4GW, mpls_nh, ip->ttl); + mpls_unlabeled_input(ifp, m, MPT_IPV4, NH_TYPE_V4GW, nxt, + ip->ttl); return; } /* Store next hop address */ if (nxt->flags & RTF_GATEWAY) - addr = nxt->gateway; + addr = nxt->gateway.address.ip_v4.s_addr; else addr = ip->daddr; @@ -362,7 +391,7 @@ enum ip_packet_validity ip_validate_packet( unsigned int len, ip_len, pkt_len; uint16_t hlen; - assert(pktmbuf_l2_len(m) == + assert(dp_pktmbuf_l2_len(m) == (const char *)ip - rte_pktmbuf_mtod(m, char *)); *needs_slow_path = false; @@ -371,7 +400,7 @@ enum ip_packet_validity ip_validate_packet( * Is packet big enough. * (i.e is there a valid IP header in first segment) */ - len = rte_pktmbuf_data_len(m) - pktmbuf_l2_len(m); + len = rte_pktmbuf_data_len(m) - dp_pktmbuf_l2_len(m); if (len < sizeof(struct iphdr)) goto bad_hdr; @@ -387,7 +416,7 @@ enum ip_packet_validity ip_validate_packet( hlen = ip->ihl << 2; if (hlen < sizeof(struct iphdr) || hlen > len) goto bad_hdr; - pktmbuf_l3_len(m) = hlen; + dp_pktmbuf_l3_len(m) = hlen; /* * Checksum correct? @@ -410,7 +439,7 @@ enum ip_packet_validity ip_validate_packet( ip_dooptions(m, needs_slow_path)) goto bad_hdr; - pkt_len = rte_pktmbuf_pkt_len(m) - pktmbuf_l2_len(m); + pkt_len = rte_pktmbuf_pkt_len(m) - dp_pktmbuf_l2_len(m); /* * Is IP length longer than packet we have got? @@ -521,6 +550,9 @@ ip_lookup_and_originate(struct rte_mbuf *m, struct ifnet *in_ifp) if (!nxt) return; + if (ipv4_originate_filter(dp_nh4_get_ifp(nxt), m)) + return; + enum ip4_features ip4_feat = IP4_FEA_ORIGINATE; ip_switch(m, in_ifp, ip, nxt, ip4_feat, NPF_FLAG_CACHE_EMPTY); } @@ -588,17 +620,25 @@ static void ip_spath_frag_output(struct ifnet *ifp __unused, */ static int ip_spath_filter_internal(struct ifnet *ifp, struct ifnet *l2_ifp, - struct rte_mbuf **mp) + struct rte_mbuf **mp, bool from_us) { uint16_t npf_flags = NPF_FLAG_CACHE_EMPTY; struct rte_mbuf *m = *mp; struct iphdr *ip = iphdr(m); - pktmbuf_l3_len(m) = ip->ihl << 2; + dp_pktmbuf_l3_len(m) = ip->ihl << 2; - /* The kernel can still forward some packets, identify them. */ - if (!ip->saddr || is_local_ipv4(if_vrfid(ifp), ip->saddr)) + /* + * The kernel can still forward some packets, identify them. Some + * locally generated ICMP packets may use a CGNAT or SNAT address, + * hence we use the from_us parameter to ensure we set the npf flags + * appropriately. + */ + if (from_us || !ip->saddr || is_local_ipv4(if_vrfid(ifp), ip->saddr)) { npf_flags |= NPF_FLAG_FROM_US | NPF_FLAG_FROM_LOCAL; + if (npf_zone_local_is_set()) + npf_flags |= NPF_FLAG_FROM_ZONE; + } /* * The kernel can L2 forward some bridged packets (i.e. IP broadcasts @@ -615,9 +655,10 @@ ip_spath_filter_internal(struct ifnet *ifp, struct ifnet *l2_ifp, .npf_flags = npf_flags, .in_ifp = NULL, .out_ifp = ifp, + .l2_proto = ETH_P_IP, }; - if (!pipeline_fused_ipv4_defrag_out_spath(&pl_pkt)) + if (!pipeline_fused_ipv4_out_spath(&pl_pkt)) return 1; if (unlikely(m != pl_pkt.mbuf)) @@ -639,7 +680,12 @@ ip_spath_filter(struct ifnet *l2_ifp, struct rte_mbuf **mp) else ifp = l2_ifp; - return ip_spath_filter_internal(ifp, l2_ifp, mp); + return ip_spath_filter_internal(ifp, l2_ifp, mp, false); +} + +static int ip_output_filter(struct ifnet *ifp, struct rte_mbuf **mp) +{ + return ip_spath_filter_internal(ifp, ifp, mp, true); } /* @@ -660,7 +706,7 @@ ip_spath_output(struct ifnet *l2_ifp, struct rte_mbuf *m) else ifp = l2_ifp; - if (ip_spath_filter_internal(ifp, l2_ifp, &m)) + if (ip_spath_filter_internal(ifp, l2_ifp, &m, false)) return 0; /* filtered or reassembled */ /* re-frag if needed */ diff --git a/src/ip_funcs.h b/src/ip_funcs.h index d110d47d..305ebc22 100644 --- a/src/ip_funcs.h +++ b/src/ip_funcs.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -15,8 +15,9 @@ #include #include "compiler.h" +#include "ip.h" #include "iptun_common.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "util.h" /* Is this address limited broadcast */ @@ -49,7 +50,7 @@ struct next_hop; static inline struct iphdr *iphdr(const struct rte_mbuf *m) { - return pktmbuf_mtol3(m, struct iphdr *); + return dp_pktmbuf_mtol3(m, struct iphdr *); } static inline bool ip_is_fragment(const struct iphdr *ip) @@ -94,11 +95,27 @@ int ip_spath_output(struct ifnet *, struct rte_mbuf *); int ip6_spath_filter(struct ifnet *, struct rte_mbuf **); int ip6_spath_output(struct ifnet *, struct rte_mbuf *); +int ipv4_originate_filter(struct ifnet *ifp, struct rte_mbuf *m); +int ipv4_originate_filter_flags(struct ifnet *out_ifp, struct rte_mbuf *m, + uint16_t npf_flags); + void ip_local_deliver(struct ifnet *ifp, struct rte_mbuf *m) __cold_func; int l4_input(struct rte_mbuf **m, struct ifnet *ifp); +int ip_udp_tunnel_in(struct rte_mbuf **m, struct iphdr *ip, + struct ifnet *ifp); + +void ip_forward_egress(struct ifnet *out_ifp, struct rte_mbuf *, + in_addr_t nh_addr, struct ifnet *in_ifp) + __hot_func; +int +ip_spath_output_with_eth_encap(struct ifnet *out_ifp, struct rte_mbuf *m, + in_addr_t nh_addr); + +struct nlattr; + static inline void ip_tos_ecn_clear(uint8_t *tos) { *tos = *tos & ~IPTOS_ECN_MASK; @@ -165,7 +182,6 @@ static inline void ip_tos_set_ecn_ce(struct iphdr *ip) } void ip_id_init(void); -uint16_t ip_randomid(uint16_t salt); u_int16_t icmp_common_exthdr(struct rte_mbuf *m, uint16_t cnum, uint8_t ctype, void *buf, void *ip_hdr, int hlen, u_int16_t ip_total_len, void *dataun, @@ -173,10 +189,6 @@ u_int16_t icmp_common_exthdr(struct rte_mbuf *m, uint16_t cnum, uint8_t ctype, bool ip_l2_resolve(struct ifnet *in_ifp, struct rte_mbuf *m, struct next_hop *nh, uint16_t proto); -bool ip_l2_resolve_and_output(struct ifnet *in_ifp, - struct rte_mbuf *m, - struct next_hop *nh, - uint16_t proto); bool ip_validate_packet_and_count(struct rte_mbuf *m, const struct iphdr *ip, struct ifnet *ifp, bool *needs_slow_path); diff --git a/src/ip_icmp.c b/src/ip_icmp.c index 8f904fca..7b342b93 100644 --- a/src/ip_icmp.c +++ b/src/ip_icmp.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * @@ -49,18 +49,19 @@ #include #include +#include "if/macvlan.h" #include "if_var.h" #include "in_cksum.h" #include "ip_funcs.h" #include "ip_icmp.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "route.h" #include "snmp_mib.h" #include "urcu.h" #include "util.h" +#include "nh_common.h" #include "npf/npf_nat.h" -#include "npf/cgnat/cgn.h" -#include "macvlan.h" +#include "npf/cgnat/cgn_public.h" #include "fal.h" #include "netinet6/ip6_funcs.h" #include "vplane_log.h" @@ -161,7 +162,7 @@ icmp_prepare_send(struct rte_mbuf *m) ip = iphdr(m); hlen = ip->ihl << 2; - ip->id = ip_randomid(0); + ip->id = dp_ip_randomid(0); ip->check = 0; ip->check = in_cksum(ip, hlen); @@ -192,6 +193,44 @@ icmp_send(struct rte_mbuf *m, bool srced_forus) ip_output(m, srced_forus); } +/* + * Send an ICMP packet *without* doing a route lookup. Assumes that the dest + * ether address already contains the next-hop ether address. + */ +static bool +icmp_send_no_route(struct rte_mbuf *m, struct ifnet *out_ifp) +{ + struct iphdr *ip; + int hlen; + struct icmphdr *icp; + struct next_hop singlehop_nh; + struct next_hop *nh = NULL; + + if (!(out_ifp->if_flags & IFF_UP)) { + rte_pktmbuf_free(m); + return false; + } + + icmp_prepare_send(m); + + ip = iphdr(m); + hlen = ip->ihl << 2; + icp = (struct icmphdr *) ((char *)ip + hlen); + + icmp_out_inc(pktmbuf_get_vrf(m), icp->type); + + memset(&singlehop_nh, 0, sizeof(singlehop_nh)); + nh_set_ifp(&singlehop_nh, out_ifp); + nh = &singlehop_nh; + + if (dp_ip_l2_nh_output(NULL, m, nh, ETH_P_IP)) { + IPSTAT_INC_IFP(out_ifp, IPSTATS_MIB_OUTPKTS); + return true; + } + + return false; +} + /* * Reflect the ip packet back to the source * @@ -361,10 +400,12 @@ icmp_do_error(struct rte_mbuf *n, int type, int code, uint32_t info, struct rte_mbuf *m; unsigned int icmplen, icmpelen, pktlen; - if (n->ol_flags & PKT_RX_SEEN_BY_CRYPTO) - return NULL; + if (n->ol_flags & PKT_RX_SEEN_BY_CRYPTO) { + if (!inif || (inif->if_type != IFT_TUNNEL_VTI)) + return NULL; + } - oiphlen = pktmbuf_l3_len(n); + oiphlen = dp_pktmbuf_l3_len(n); /* * Don't send error: @@ -385,7 +426,7 @@ icmp_do_error(struct rte_mbuf *n, int type, int code, uint32_t info, return NULL; /* Drop if IP header plus 8 bytes is not contiguous in first mbuf. */ - pktlen = rte_pktmbuf_data_len(n) - pktmbuf_l2_len(n); + pktlen = rte_pktmbuf_data_len(n) - dp_pktmbuf_l2_len(n); if (oiphlen + sizeof(struct icmphdr) > pktlen) return NULL; @@ -451,8 +492,9 @@ icmp_do_error(struct rte_mbuf *n, int type, int code, uint32_t info, * reply should bypass as well. */ mlen = sizeof(struct iphdr) + sizeof(struct icmphdr) + icmplen; - rte_pktmbuf_pkt_len(m) = rte_pktmbuf_data_len(m) = mlen + pktmbuf_l2_len(n); - pktmbuf_l2_len(m) = pktmbuf_l2_len(n); + rte_pktmbuf_pkt_len(m) = rte_pktmbuf_data_len(m) = + mlen + dp_pktmbuf_l2_len(n); + dp_pktmbuf_l2_len(m) = dp_pktmbuf_l2_len(n); nip = iphdr(m); icp = (struct icmphdr *) ((char *) nip + sizeof(struct iphdr)); @@ -492,14 +534,15 @@ icmp_do_error(struct rte_mbuf *n, int type, int code, uint32_t info, static void icmp_do_reflect(const struct ifnet *rcvif, struct rte_mbuf *m_in, struct rte_mbuf *m_out) { - struct ether_hdr *eh; + struct rte_ether_hdr *eh; struct ifnet *vrrp_ifp; if (!m_out) return; - eh = rte_pktmbuf_mtod(m_in, struct ether_hdr *); - vrrp_ifp = macvlan_get_vrrp_if(rcvif, (struct ether_addr *)&eh->d_addr); + eh = rte_pktmbuf_mtod(m_in, struct rte_ether_hdr *); + vrrp_ifp = macvlan_get_vrrp_if(rcvif, + (struct rte_ether_addr *)&eh->d_addr); if (vrrp_ifp) icmp_reflect(vrrp_ifp, m_out); else @@ -528,6 +571,105 @@ icmp_error_out(const struct ifnet *rcvif, struct rte_mbuf *n, icmp_do_reflect(rcvif, n, m); } +/* + * Send ICMP echo reply out the rcv interface in response to an echo request. + */ +static struct rte_mbuf * +icmp_do_echo_reply(struct ifnet *ifp, struct rte_mbuf *n, bool reflect) +{ + const struct iphdr *oip = iphdr(n); + struct iphdr *nip; + struct icmphdr *nicmp; + struct rte_mbuf *m; + uint pktlen; + + /* Drop if there are any IP options */ + if (dp_pktmbuf_l3_len(n) > sizeof(struct iphdr)) + return NULL; + + /* Drop if IP header plus 8 bytes is not contiguous in first mbuf. */ + pktlen = rte_pktmbuf_data_len(n) - dp_pktmbuf_l2_len(n); + + if (sizeof(struct iphdr) + sizeof(struct icmphdr) > pktlen) + return NULL; + + /* Make a copy of the ICMP Request packet */ + m = pktmbuf_copy(n, n->pool); + if (m == NULL) + return NULL; + + /* + * Drop if the new packet is not all in the one mbuf. The ICMP + * checksum is calculated over the ICMP header and ICMP data, and this + * assumes these are contiguous. + */ + if (rte_pktmbuf_data_len(m) != rte_pktmbuf_pkt_len(m)) { + rte_pktmbuf_free(m); + return NULL; + } + + /* preserve the input port number for use by shadow interface */ + m->port = n->port; + + struct rte_ether_hdr *neh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); + + /* Ethernet source addr is interface address */ + rte_ether_addr_copy(&ifp->eth_addr, &neh->s_addr); + + if (reflect) { + /* Echo req source ether is echo reply dest ether */ + struct rte_ether_hdr *oeh = + rte_pktmbuf_mtod(n, struct rte_ether_hdr *); + rte_ether_addr_copy(&oeh->s_addr, &neh->d_addr); + } + + nip = iphdr(m); + + /* Swap source and dest IP addrs from icmp request */ + nip->saddr = oip->daddr; + nip->daddr = oip->saddr; + + nip->ihl = 5; + nip->version = IPVERSION; + nip->tos = 0; + nip->tot_len = oip->tot_len; + nip->frag_off = 0; + nip->protocol = IPPROTO_ICMP; + nip->ttl = IPDEFTTL; + + /* Change the icmp type to ICMP_ECHOREPLY */ + nicmp = (struct icmphdr *)((char *)nip + sizeof(struct iphdr)); + nicmp->type = ICMP_ECHOREPLY; + + pktmbuf_mdata_set(m, PKT_MDATA_FROM_US); + return m; +} + +/* + * Send ICMP echo reply out the receive interface in response to an echo + * request. + * + * Returns true if successful. + */ +bool icmp_echo_reply_out(struct ifnet *rcvifp, struct rte_mbuf *n, + bool reflect) +{ + struct rte_mbuf *m; + bool rv = true; + + m = icmp_do_echo_reply(rcvifp, n, reflect); + if (!m) + return false; + + if (reflect) + /* Reflect reply directly back to sender */ + rv = icmp_send_no_route(m, rcvifp); + else + icmp_send(m, false); + + return rv; +} + u_int16_t icmp_common_exthdr(struct rte_mbuf *m, uint16_t cnum, uint8_t ctype, void *buf, void *ip_hdr, int hlen, u_int16_t ip_total_len, @@ -578,7 +720,7 @@ icmp_common_exthdr(struct rte_mbuf *m, uint16_t cnum, uint8_t ctype, ieh->ieh_cksum = in_cksum(ieh, sizeof(hdr) + len); rte_pktmbuf_pkt_len(m) = rte_pktmbuf_data_len(m) = - hlen + off + sizeof(hdr) + len + pktmbuf_l2_len(m); + hlen + off + sizeof(hdr) + len + dp_pktmbuf_l2_len(m); return hlen + off + sizeof(hdr) + len; } @@ -592,7 +734,7 @@ icmp_do_exthdr(struct rte_mbuf *m, uint16_t class, uint8_t ctype, void *buf, struct icmphdr *icp; int hlen; - hlen = pktmbuf_l3_len(m); + hlen = dp_pktmbuf_l3_len(m); icp = (struct icmphdr *) ((char *) ip + hlen); if (icp->type != ICMP_TIME_EXCEEDED && icp->type != ICMP_DEST_UNREACH && icp->type != ICMP_PARAMETERPROB) diff --git a/src/ip_icmp.h b/src/ip_icmp.h index da1f017f..5c35244a 100644 --- a/src/ip_icmp.h +++ b/src/ip_icmp.h @@ -1,7 +1,7 @@ /* * Public functions defined in ip_icmp.c * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -14,6 +14,8 @@ #include #include +#include "compiler.h" + struct ifnet; struct rte_mbuf; @@ -24,16 +26,18 @@ void ip_redirects_set(bool enable); bool ip_redirects_get(void); void icmp_error(const struct ifnet *rcvif, struct rte_mbuf *n, int type, int code, uint32_t info) - __attribute__((cold)); + __cold_func; void icmp_error_out(const struct ifnet *rcvif, struct rte_mbuf *n, int type, int code, uint32_t info, const struct ifnet *outif) - __attribute__((cold)); + __cold_func; struct rte_mbuf *icmp_do_error(struct rte_mbuf *n, int type, int code, uint32_t info, const struct ifnet *in, const struct ifnet *out); int icmp_do_exthdr(struct rte_mbuf *m, uint16_t class, uint8_t ctype, void *buf, unsigned int len); void icmp_prepare_send(struct rte_mbuf *m); +bool icmp_echo_reply_out(struct ifnet *rcvifp, struct rte_mbuf *n, + bool reflect); #endif diff --git a/src/ip_id.c b/src/ip_id.c index 41b1e5ac..221fb43e 100644 --- a/src/ip_id.c +++ b/src/ip_id.c @@ -3,7 +3,7 @@ /*- * Copyright (c) 2008 The NetBSD Foundation, Inc. * All rights reserved. - * Copyright (c) 2017, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017,2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: (LGPL-2.1-only AND BSD-2-Clause-NETBSD) * @@ -86,7 +86,7 @@ void ip_id_init(void) } } -uint16_t ip_randomid(uint16_t salt) +uint16_t dp_ip_randomid(uint16_t salt) { uint32_t r, k, id; diff --git a/src/ip_mcast.c b/src/ip_mcast.c index 3535f9bd..a80a3c39 100644 --- a/src/ip_mcast.c +++ b/src/ip_mcast.c @@ -6,7 +6,7 @@ * * Copyright (c) 2014-2016 by Brocade Communications Systems, Inc. * All rights reserved. - * Copyright (c) 2017, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017,2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -25,67 +25,18 @@ #include #include -#include "gre.h" +#include "if/gre.h" #include "if_var.h" #include "ip6_mroute.h" #include "ip_icmp.h" #include "ip_mcast.h" #include "main.h" #include "netinet/ip_mroute.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "snmp_mib.h" #include "vplane_debug.h" #include "vplane_log.h" -/* destination ethernet address is unique for each mcast group */ -static void mcast_eth_output(struct rte_mbuf *m, struct ifnet *ifp, - struct ifnet *rcvif) -{ - - struct ether_hdr *eth_hdr; - - /* set ethernet source address */ - eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); - ether_addr_copy(&ifp->eth_addr, ð_hdr->s_addr); - - IPSTAT_INC_IFP(ifp, IPSTATS_MIB_OUTMCASTPKTS); - if_output(ifp, m, rcvif, ETH_P_IP); -} - -static void mcast_ip_fragment(struct rte_mbuf *mm, struct ifnet *ifp) -{ - DP_DEBUG(MULTICAST, INFO, MCAST, - "Multicast packet fragmentation unsupported on %s.\n", - ifp->if_name); - IPSTAT_INC_IFP(ifp, IPSTATS_MIB_OUTDISCARDS); - rte_pktmbuf_free(mm); -} - -/* fast-path output */ -int mc_ip_output(struct ifnet *rcvif, - struct rte_mbuf *m, struct ifnet *ifp, struct iphdr *ip) -{ - /* Destination device is not up? */ - if (unlikely(!(ifp->if_flags & IFF_UP))) - return -1; - - /* - * is fragmentation necessary - */ - if (likely(ntohs(ip->tot_len) <= ifp->if_mtu)) - mcast_eth_output(m, ifp, rcvif); /* send it */ - else if (ip->frag_off & htons(IP_DF)) { - /* Handle with icmp reply needfrag for TCP MTU discovery */ - IPSTAT_INC_IFP(rcvif, IPSTATS_MIB_FRAGFAILS); - icmp_error(rcvif, m, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htons(ifp->if_mtu)); - rte_pktmbuf_free(m); - } else - mcast_ip_fragment(m, ifp); /* needs fragmentation */ - - return 0; -} - /* * Display old and new value of interface flags of interest to * multicast, after receipt of RTM_NEWLINK or RTM_DELLINK message. @@ -117,18 +68,6 @@ void mc_debug_if_flags(struct ifnet *ifp, unsigned int new_flags, (new_flags & IFF_ALLMULTI) ? "set" : "clr"); } -void mc_del_if(int ifindex) -{ - del_vif(ifindex); - del_m6if(ifindex); -} - -void mrt_purge(struct ifnet *ifp) -{ - mrt4_purge(ifp); - mrt6_purge(ifp); -} - void mc_dumpall(FILE *f, struct vrf *vrf) { mvif_dump(f, vrf); @@ -166,11 +105,12 @@ struct rte_mbuf *mcast_create_l2l3_header(struct rte_mbuf *m_header, m_newheader = pktmbuf_alloc(m_header->pool, pktmbuf_get_vrf(m_header)); if (m_newheader) { - char *hdr_ptr = rte_pktmbuf_append(m_newheader, - pktmbuf_l2_len(m_header) + iphdrlen); + char *hdr_ptr = rte_pktmbuf_append( + m_newheader, + dp_pktmbuf_l2_len(m_header) + iphdrlen); memcpy(hdr_ptr, rte_pktmbuf_mtod(m_header, char *), - pktmbuf_l2_len(m_header) + iphdrlen); - pktmbuf_l2_len(m_newheader) = pktmbuf_l2_len(m_header); + dp_pktmbuf_l2_len(m_header) + iphdrlen); + dp_pktmbuf_l2_len(m_newheader) = dp_pktmbuf_l2_len(m_header); /* Attach mew header mbuf to data mbuf. Increment * ref count on data mbuf due to new attachment. @@ -243,3 +183,46 @@ mcast_mgre_tunnel_endpoint_send(struct ifnet *out_ifp, gre_tunnel_fragment_and_send(in_ifp, out_ifp, tun_endpoint_addr, m_header, proto); } + +/* + * allocate a per-vrf index for the multicast VIF. This allows us to have up + * to 256 multicast enabled interfaces per vrf. + */ +int mcast_iftable_get_free_slot(struct if_set *mfc_ifset, int ifindex, + unsigned char *vif_index) +{ + unsigned char index = (unsigned char)ifindex; + int i; + + if (!mfc_ifset) + return -1; + + /* if the mod 8 of the ifindex is available use it */ + if (!IF_ISSET(index, mfc_ifset)) { + IF_SET(index, mfc_ifset); + *vif_index = index; + return 0; + } + + /* iterate up to limit of the if_set to find a free slot */ + for (i = index + 1; i < IF_SETSIZE; i++) { + if (!IF_ISSET(i, mfc_ifset)) { + IF_SET(i, mfc_ifset); + *vif_index = i; + return 0; + } + } + + /* start iterating up from 0 if we have not already done so */ + if (index) { + for (i = 0; i < index; i++) { + if (!IF_ISSET(i, mfc_ifset)) { + IF_SET(i, mfc_ifset); + *vif_index = i; + return 0; + } + } + } + + return -ENOSPC; +} diff --git a/src/ip_mcast.h b/src/ip_mcast.h index 3236f23a..52b5d382 100644 --- a/src/ip_mcast.h +++ b/src/ip_mcast.h @@ -69,7 +69,7 @@ mcast_mgre_tunnel_endpoint_send(struct ifnet *out_ifp, typedef union { uint64_t as_int; - struct ether_addr as_addr; + struct rte_ether_addr as_addr; } mcast_dst_eth_addr_t; /* @@ -120,10 +120,6 @@ struct rte_mbuf *mcast_create_l2l3_header(struct rte_mbuf *m_header, struct vif *get_vif_by_ifindex(unsigned int ifindex); struct mif6 *get_mif_by_ifindex(unsigned int ifindex); -int mc_ip_output(struct ifnet *rcvif, - struct rte_mbuf *m, struct ifnet *oifp, - struct iphdr *ip); - struct vmfcctl; struct vmf6cctl; @@ -139,9 +135,6 @@ int del_m6if(mifi_t); void mc_debug_if_flags(struct ifnet *ifp, unsigned int new_flags, unsigned int msg_type); -void mrt_purge(struct ifnet *ifp); -void mrt4_purge(struct ifnet *ifp); -void mrt6_purge(struct ifnet *ifp); /* Multicast Fastpath Route Lookup */ int mcast_ip(struct iphdr *, struct ifnet *, struct rte_mbuf *); @@ -151,12 +144,6 @@ int mcast_ip6(struct ip6_hdr *, struct ifnet *, struct rte_mbuf *); void mcast_ip_deliver(struct ifnet *ifp, struct rte_mbuf *m); void mcast_ip6_deliver(struct ifnet *ifp, struct rte_mbuf *m); -void mcast_init_ipv4(void); -void mcast_init_ipv6(void); - -int mcast_stop_ipv4(void); -int mcast_stop_ipv6(void); - int mcast_vrf_init(struct vrf *vrf); void mcast_vrf_uninit(struct vrf *vrf); @@ -183,4 +170,6 @@ void mvif6_dump(FILE *f, struct vrf *vrf); void send_sg_cnt(struct sioc_sg_req *rq, vrfid_t vrf_id, uint32_t flags); void send_sg6_cnt(struct sioc_sg_req6 *rq, vrfid_t vrf_id, uint32_t flags); +int mcast_iftable_get_free_slot(struct if_set *mfc_ifset, int ifindex, + unsigned char *vif_index); #endif diff --git a/src/ip_mcast_fal_interface.c b/src/ip_mcast_fal_interface.c index 4e50a578..bfb3e2c4 100644 --- a/src/ip_mcast_fal_interface.c +++ b/src/ip_mcast_fal_interface.c @@ -1,7 +1,11 @@ -/* Copyright (c) 2019, AT&T Intellectual Property. */ +/* + * Copyright (c) 2019-2020, AT&T Intellectual Property. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ #include "fal.h" #include "fal_plugin.h" -#include "vrf.h" +#include "vrf_internal.h" #include "vplane_debug.h" #include "ip_mcast_fal_interface.h" @@ -216,6 +220,9 @@ static int fal_mcast_int_disable(struct cds_lfht *viftable, vrfid_t vrf_id, { int ret; + if (!*rpf_lst) + return 0; + if ((*rpf_lst)->count == 1) { fal_cleanup_ipmc_rpf_group(fal_rpf, rpf_lst); diff --git a/src/ip_mcast_fal_interface.h b/src/ip_mcast_fal_interface.h index 94dbcd85..f6cd7d13 100644 --- a/src/ip_mcast_fal_interface.h +++ b/src/ip_mcast_fal_interface.h @@ -1,4 +1,8 @@ -/* Copyright (c) 2019, AT&T Intellectual Property. */ +/* + * Copyright (c) 2019, AT&T Intellectual Property. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ #ifndef __IP_MCAST_FAL_INTERFACE_H__ #define __IP_MCAST_FAL_INTERFACE_H__ diff --git a/src/ip_netlink.c b/src/ip_netlink.c index 54233ae0..7a4875c5 100644 --- a/src/ip_netlink.c +++ b/src/ip_netlink.c @@ -1,7 +1,7 @@ /* * Handle IPv4 rtnetlink events * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -33,15 +34,15 @@ #include "address.h" #include "control.h" -#include "ecmp.h" #include "dp_event.h" -#include "gre.h" +#include "ecmp.h" +#include "if/gre.h" #include "if_ether.h" #include "if_var.h" #include "ip_addr.h" #include "ip_funcs.h" #include "ip_mcast.h" -#include "master.h" +#include "controller.h" #include "mpls/mpls.h" #include "netinet/ip_mroute.h" #include "netinet6/ip6_funcs.h" @@ -56,10 +57,17 @@ #include "util.h" #include "vplane_debug.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" #include "vrf_if.h" -static const char anyaddr[16]; +#define IN6_SET_ADDR_V4MAPPED(a6, a4) { \ + (a6)->s6_addr32[0] = 0; \ + (a6)->s6_addr32[1] = 0; \ + (a6)->s6_addr32[2] = htonl(0xffff); \ + (a6)->s6_addr32[3] = (a4); \ + } + +static const struct in6_addr anyaddr; /* Callback to process neighbor messages */ static int inet_neigh_change(const struct nlmsghdr *nlh, @@ -69,12 +77,12 @@ static int inet_neigh_change(const struct nlmsghdr *nlh, { struct ifnet *ifp; const void *lladdr = NULL; - struct ether_addr ea; - const void *dst = anyaddr; + struct rte_ether_addr ea; + const void *dst = &anyaddr; size_t llen = 0; /* ignore neighbor updates for non DPDK interfaces */ - ifp = ifnet_byifindex(cont_src_ifindex(cont_src, ndm->ndm_ifindex)); + ifp = dp_ifnet_byifindex(cont_src_ifindex(cont_src, ndm->ndm_ifindex)); if (!ifp) return MNL_CB_OK; @@ -89,7 +97,7 @@ static int inet_neigh_change(const struct nlmsghdr *nlh, dst = mnl_attr_get_payload(tb[NDA_DST]); if (llen) { - if (llen > ETHER_ADDR_LEN) { + if (llen > RTE_ETHER_ADDR_LEN) { /* We do not support neighbours with IPv6 as the NH.*/ RTE_LOG(DEBUG, ROUTE, "neighbor message with addrlen = %zd not processed\n", @@ -98,9 +106,9 @@ static int inet_neigh_change(const struct nlmsghdr *nlh, } lladdr = mnl_attr_get_payload(tb[NDA_LLADDR]); - if (llen < ETHER_ADDR_LEN && lladdr != NULL) { - memset(&ea, 0, ETHER_ADDR_LEN); - /* Don't use ether_addr_copy here */ + if (llen < RTE_ETHER_ADDR_LEN && lladdr != NULL) { + memset(&ea, 0, RTE_ETHER_ADDR_LEN); + /* Don't use rte_ether_addr_copy here */ lladdr = memcpy(&ea, lladdr, llen); RTE_LOG(DEBUG, ROUTE, "neighbor message with addrlen = %zd %s\n", @@ -113,6 +121,555 @@ static int inet_neigh_change(const struct nlmsghdr *nlh, return MNL_CB_OK; } +/* Callback to store route attributes */ +static int route_attr(const struct nlattr *attr, void *data) +{ + const struct nlattr **tb = data; + unsigned int type = mnl_attr_get_type(attr); + + if (type <= RTA_MAX) + tb[type] = attr; + + return MNL_CB_OK; +} + +/* Fill nexthop struct */ +static bool nexthop_fill(struct nlattr *ntb_gateway, struct nlattr *ntb_encap, + struct rtnexthop *nhp, struct next_hop *next) +{ + label_t labels[NH_MAX_OUT_LABELS]; + uint16_t num_labels = 0; + void *labels_ptr; + uint32_t len; + int err; + struct ifnet *ifp; + + nh_outlabels_set(&next->outlabels, 0, NULL); + + nh_set_ifp(next, dp_ifnet_byifindex(nhp->rtnh_ifindex)); + if (!dp_nh_get_ifp(next) && !is_ignored_interface(nhp->rtnh_ifindex)) + return true; + if (ntb_gateway) { + next->gateway.address.ip_v4.s_addr = + mnl_attr_get_u32(ntb_gateway); + next->flags = RTF_GATEWAY; + } else { + next->gateway.address.ip_v4.s_addr = INADDR_ANY; + next->flags = 0; + } + next->gateway.type = AF_INET; + + if (ntb_encap) { + len = mnl_attr_get_payload_len(ntb_encap); + labels_ptr = mnl_attr_get_payload(ntb_encap); + err = rta_encap_get_labels(labels_ptr, len, + ARRAY_SIZE(labels), + labels, &num_labels); + if (err) { + RTE_LOG(NOTICE, MPLS, + "malformed label stack in netlink message\n"); + return false; + } + nh_outlabels_set(&next->outlabels, num_labels, labels); + } + + ifp = dp_nh_get_ifp(next); + if ((!ifp || ifp->if_type == IFT_LOOP) && + num_labels == 0) + /* no dp interface or via loopback */ + next->flags |= RTF_SLOWPATH; + + if (num_labels > 0 && !is_lo(ifp)) + /* Output label rather than local label */ + next->flags |= RTF_OUTLABEL; + + return false; +} + +static int mpls_payload_attr(const struct nlattr *attr, void *data) +{ + const struct nlattr **tb = data; + int type = mnl_attr_get_type(attr); + + if (mnl_attr_type_valid(attr, RTMPA_NH_FLAGS) < 0) + return MNL_CB_OK; + + switch (type) { + case RTMPA_TYPE: + case RTMPA_NH_FLAGS: + if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0) { + RTE_LOG(NOTICE, MPLS, + "invalid mpls payload attribute %d\n", type); + return MNL_CB_ERROR; + } + break; + } + + tb[type] = attr; + return MNL_CB_OK; +} + +static bool nexthop_fill_mpls_common(const struct nlattr *ntb_newdst, + union next_hop_outlabels *outlabels, + bool bos_only) +{ + label_t labels[NH_MAX_OUT_LABELS]; + uint16_t num_labels = 0; + void *labels_ptr; + uint32_t len; + int ret; + + if (ntb_newdst) { + len = mnl_attr_get_payload_len(ntb_newdst); + labels_ptr = mnl_attr_get_payload(ntb_newdst); + ret = rta_encap_get_labels(labels_ptr, len, + ARRAY_SIZE(labels), + labels, &num_labels); + if (ret) { + RTE_LOG(NOTICE, MPLS, + "malformed label stack in netlink message\n"); + return false; + } + nh_outlabels_set(outlabels, num_labels, labels); + } + + /* + * If there are no labels and BOS_ONLY not + * set, then this implies the implicit-null + * label. This won't go out on the wire and is + * for signaling only. + */ + if (num_labels == 0 && !bos_only) { + label_t lbl[1] = { MPLS_LABEL_IMPLNULL }; + + nh_outlabels_set(outlabels, 1, lbl); + } + + return false; +} + +/* + * Fill nh struct from an mpls route add netlink - which uses different + * attributes - via, newdest instead of gateway, encap. + */ +static bool nexthop_fill_mpls(struct nlattr *ntb_via, struct nlattr *ntb_newdst, + struct nlattr *ntb_payload, + struct rtnexthop *nhp, struct next_hop *next) +{ + const struct nlattr *pl_tb[RTMPA_NH_FLAGS+1]; + bool bos_only = false; + int ret; + + if (ntb_payload) { + ret = mnl_attr_parse_nested(ntb_payload, mpls_payload_attr, + &pl_tb); + if (ret == MNL_CB_OK && pl_tb[RTMPA_NH_FLAGS]) + bos_only = (mnl_attr_get_u32(pl_tb[RTMPA_NH_FLAGS]) & + RTMPNF_BOS_ONLY) != 0; + } + + /* initialize out labels to NULL */ + nh_outlabels_set(&next->outlabels, 0, NULL); + + nh_set_ifp(next, dp_ifnet_byifindex(nhp->rtnh_ifindex)); + if (!dp_nh_get_ifp(next) && !is_ignored_interface(nhp->rtnh_ifindex)) + return true; + if (ntb_via) { + const struct rtvia *via; + in_addr_t nh = INADDR_NONE; + + via = mnl_attr_get_payload(ntb_via); + if (via->rtvia_family == AF_INET) { + memcpy(&nh, &via->rtvia_addr, sizeof(nh)); + next->flags = RTF_GATEWAY; + } else { + RTE_LOG(NOTICE, MPLS, + "unsupported via AF %d in netlink message\n", + via->rtvia_family); + } + + next->gateway.address.ip_v4.s_addr = nh; + } else { + next->gateway.address.ip_v4.s_addr = INADDR_ANY; + next->flags = 0; + } + next->gateway.type = AF_INET; + + ret = nexthop_fill_mpls_common(ntb_newdst, &next->outlabels, bos_only); + if (!dp_nh_get_ifp(next)) + next->flags |= RTF_SLOWPATH; + + return ret; +} + +/* + * Fill nh6 struct from an mpls route add netlink. + */ +static bool nexthop6_fill_mpls(const struct nlattr *ntb_via, + const struct nlattr *ntb_newdst, + const struct nlattr *ntb_payload, + const struct rtnexthop *nhp, + struct next_hop *next) +{ + const struct nlattr *pl_tb[RTMPA_NH_FLAGS+1]; + struct in6_addr nh6 = IN6ADDR_ANY_INIT; + bool bos_only = false; + int ret; + + if (ntb_payload) { + ret = mnl_attr_parse_nested(ntb_payload, mpls_payload_attr, + &pl_tb); + if (ret == MNL_CB_OK && pl_tb[RTMPA_NH_FLAGS]) + bos_only = (mnl_attr_get_u32(pl_tb[RTMPA_NH_FLAGS]) & + RTMPNF_BOS_ONLY) != 0; + } + + /* initialise out labels to NULL */ + nh_outlabels_set(&next->outlabels, 0, NULL); + + nh_set_ifp(next, dp_ifnet_byifindex(nhp->rtnh_ifindex)); + if (!dp_nh_get_ifp(next) && !is_ignored_interface(nhp->rtnh_ifindex)) + return true; + if (ntb_via) { + const struct rtvia *via; + in_addr_t nh = INADDR_NONE; + + via = mnl_attr_get_payload(ntb_via); + if (via->rtvia_family == AF_INET) { + memcpy(&nh, &via->rtvia_addr, sizeof(nh)); + IN6_SET_ADDR_V4MAPPED(&nh6, nh); + } else if (via->rtvia_family == AF_INET6) { + memcpy(&nh6, &via->rtvia_addr, sizeof(nh6)); + } else { + RTE_LOG(NOTICE, MPLS, + "unsupported via AF %d in netlink message\n", + via->rtvia_family); + } + + next->gateway.address.ip_v6 = nh6; + next->flags = RTF_GATEWAY; + if (IN6_IS_ADDR_V4MAPPED(&nh6)) + next->flags |= RTF_MAPPED_IPV6; + } else { + next->gateway.address.ip_v6 = nh6; + next->flags = 0; + } + next->gateway.type = AF_INET6; + + ret = nexthop_fill_mpls_common(ntb_newdst, &next->outlabels, bos_only); + if (!dp_nh_get_ifp(next)) + next->flags |= RTF_SLOWPATH; + + return ret; +} + +static int mpls_attr(const struct nlattr *attr, void *data) +{ + const struct nlattr **tb = data; + int type = mnl_attr_get_type(attr); + + if (mnl_attr_type_valid(attr, MPLS_IPTUNNEL_MAX) < 0) + return MNL_CB_OK; + + tb[type] = attr; + return MNL_CB_OK; +} + +/* Create nexthop struct */ +static struct next_hop * +ecmp_create(struct nlattr *mpath, uint32_t *count, bool *missing_ifp) +{ + size_t size = 0, i; + struct next_hop *next, *n; + void *vnhp; + + /* + * Need to loop over the paths to find out how many there are + * as the size is not fixed because the gateway is optional. + */ + mnl_attr_for_each_nested(vnhp, mpath) { + size++; + } + + if (!size) + return NULL; + + n = next = calloc(sizeof(struct next_hop), size); + if (!next) + return NULL; + + mnl_attr_for_each_nested(vnhp, mpath) { + struct rtnexthop *nhp = vnhp; + + if (nhp->rtnh_len == sizeof(*nhp)) { + /* There is a NH with no extra attrs */ + if (nexthop_fill(NULL, NULL, nhp, n)) + goto missing; + n++; + + } else if (nhp->rtnh_len > sizeof(*nhp)) { + struct nlattr *ntb[RTA_MAX+1] = { NULL }; + struct nlattr *mpls_ntb[MPLS_IPTUNNEL_MAX+1] = { NULL }; + + int res = mnl_attr_parse_payload(RTNH_DATA(vnhp), + nhp->rtnh_len - sizeof(*nhp), + route_attr, ntb); + + if (res != MNL_CB_OK) + goto failed; + + if (ntb[RTA_ENCAP] && ntb[RTA_ENCAP_TYPE] && + (mnl_attr_get_u16(ntb[RTA_ENCAP_TYPE]) == + LWTUNNEL_ENCAP_MPLS)) { + res = mnl_attr_parse_nested(ntb[RTA_ENCAP], + mpls_attr, + mpls_ntb); + if (res != MNL_CB_OK) { + RTE_LOG(NOTICE, DATAPLANE, + "unparseable mpls attributes\n"); + goto failed; + } + } + + res = mnl_attr_parse_payload( + RTNH_DATA(vnhp), nhp->rtnh_len - sizeof(*nhp), + route_attr, ntb); + + if (res != MNL_CB_OK) + goto failed; + + if (ntb[RTA_VIA]) { + if (nexthop_fill_mpls(ntb[RTA_VIA], + ntb[RTA_NEWDST], + ntb[RTA_MPLS_PAYLOAD], + nhp, n)) { + goto missing; + } + } else { + if (nexthop_fill(ntb[RTA_GATEWAY], + mpls_ntb[MPLS_IPTUNNEL_DST], + nhp, n)) { + goto missing; + } + } + n++; + } + } + + *count = n - next; + + return next; + +missing: + *missing_ifp = true; +failed: + size = n - next; + for (i = 0; i < size; i++) + nh_outlabels_destroy(&next[i].outlabels); + free(next); + return NULL; +} + +/* Fill nexthop struct */ +static bool nexthop6_fill(struct nlattr *ntb_gateway, + struct nlattr *ntb_encap, + struct rtnexthop *nhp, struct next_hop *next) +{ + label_t labels[NH_MAX_OUT_LABELS]; + uint16_t num_labels = 0; + void *labels_ptr; + uint32_t len; + int err; + struct ifnet *ifp; + + nh_outlabels_set(&next->outlabels, 0, NULL); + + nh_set_ifp(next, dp_ifnet_byifindex(nhp->rtnh_ifindex)); + if (!dp_nh_get_ifp(next) && !is_ignored_interface(nhp->rtnh_ifindex)) + return true; + + if (ntb_gateway) { + next->gateway.address.ip_v6 = + *(struct in6_addr *)mnl_attr_get_payload(ntb_gateway); + next->flags = RTF_GATEWAY; + if (IN6_IS_ADDR_V4MAPPED(&next->gateway.address.ip_v6)) + next->flags |= RTF_MAPPED_IPV6; + } else { + next->gateway.address.ip_v6 = anyaddr; + next->flags = 0; + } + next->gateway.type = AF_INET6; + + if (ntb_encap) { + len = mnl_attr_get_payload_len(ntb_encap); + labels_ptr = mnl_attr_get_payload(ntb_encap); + err = rta_encap_get_labels(labels_ptr, len, + ARRAY_SIZE(labels), + labels, &num_labels); + if (err) { + RTE_LOG(NOTICE, MPLS, + "malformed label stack in netlink message\n"); + return false; + } + nh_outlabels_set(&next->outlabels, num_labels, labels); + } + + ifp = dp_nh_get_ifp(next); + if ((!ifp || ifp->if_type == IFT_LOOP) && + num_labels == 0) + /* no dp interface or via loopback */ + next->flags |= RTF_SLOWPATH; + + if (num_labels > 0 && !is_lo(ifp)) + /* Output label rather than local label */ + next->flags |= RTF_OUTLABEL; + + return false; +} + +/* Create nexthop struct */ +static struct next_hop * +ecmp6_create(struct nlattr *mpath, uint32_t *count, bool *missing_ifp) +{ + size_t size = 0, i; + struct next_hop *next, *n; + void *vnhp; + + /* + * Need to loop over the paths to find out how many there are + * as the size is not fixed because the gateway is optional. + */ + mnl_attr_for_each_nested(vnhp, mpath) { + size++; + } + + if (size == 0) + return NULL; + + n = next = calloc(sizeof(struct next_hop), size); + if (!next) + return NULL; + + mnl_attr_for_each_nested(vnhp, mpath) { + struct rtnexthop *nhp = vnhp; + + if (nhp->rtnh_len == sizeof(*nhp)) { + /* There is a NH with no extra attrs */ + if (nexthop6_fill(NULL, NULL, nhp, n)) + goto missing; + n++; + + } else if (nhp->rtnh_len > sizeof(*nhp)) { + struct nlattr *ntb[RTA_MAX+1] = { NULL }; + struct nlattr *mpls_ntb[MPLS_IPTUNNEL_MAX+1] = { NULL }; + + int res = mnl_attr_parse_payload(RTNH_DATA(vnhp), + nhp->rtnh_len - sizeof(*nhp), + route_attr, ntb); + + if (res != MNL_CB_OK) + goto failed; + + if (ntb[RTA_ENCAP] && ntb[RTA_ENCAP_TYPE] && + (mnl_attr_get_u16(ntb[RTA_ENCAP_TYPE]) == + LWTUNNEL_ENCAP_MPLS)) { + res = mnl_attr_parse_nested(ntb[RTA_ENCAP], + mpls_attr, + mpls_ntb); + if (res != MNL_CB_OK) { + RTE_LOG(NOTICE, DATAPLANE, + "unparseable mpls attributes\n"); + goto failed; + } + } + + res = mnl_attr_parse_payload( + RTNH_DATA(vnhp), nhp->rtnh_len - sizeof(*nhp), + route_attr, ntb); + + if (res != MNL_CB_OK) + goto failed; + + if (ntb[RTA_VIA]) { + if (nexthop6_fill_mpls(ntb[RTA_VIA], + ntb[RTA_NEWDST], + ntb[RTA_MPLS_PAYLOAD], + nhp, n)) { + goto missing; + } + } else { + if (nexthop6_fill(ntb[RTA_GATEWAY], + mpls_ntb[MPLS_IPTUNNEL_DST], + nhp, n)) { + goto missing; + } + } + n++; + } + } + + *count = n - next; + + return next; + +missing: + *missing_ifp = true; +failed: + size = n - next; + for (i = 0; i < size; i++) + nh_outlabels_destroy(&next[i].outlabels); + free(next); + return NULL; +} + +/* Create nexthop struct */ +struct next_hop *ecmp_mpls_create(struct nlattr *mpath, + uint32_t *count, + enum nh_type *nh_type, + bool *missing_ifp) +{ + struct next_hop *nh = NULL; + size_t size = 0; + void *vnhp; + struct nlattr *attr; + + /* + * Need to loop over the paths to find out how many there are + * and what type of nexthop we need. + */ + *nh_type = NH_TYPE_V4GW; + mnl_attr_for_each_nested(vnhp, mpath) { + struct rtnexthop *nhp = vnhp; + + mnl_attr_for_each_payload((void *)RTNH_DATA(nhp), + nhp->rtnh_len - sizeof(*nhp)) { + /* + * If at least one of the vias is an IPv6 + * address, then all nexthops are represented + * as IPv6. + */ + if (attr->nla_type == RTA_VIA) { + const struct rtvia *via = RTA_DATA(attr); + + if (via->rtvia_family == AF_INET6) + *nh_type = NH_TYPE_V6GW; + break; + } + } + size++; + } + + switch (*nh_type) { + case NH_TYPE_V4GW: + nh = ecmp_create(mpath, count, missing_ifp); + break; + case NH_TYPE_V6GW: + nh = ecmp6_create(mpath, count, missing_ifp); + break; + } + return nh; +} + static int handle_route(vrfid_t vrf_id, uint16_t type, const struct rtmsg *rtm, uint32_t table, const void *dest, const void *nexthop, uint32_t ifindex, uint8_t scope, @@ -144,15 +701,16 @@ static int handle_route(vrfid_t vrf_id, uint16_t type, const struct rtmsg *rtm, /* May resize route tables and that code calls defer_rcu * which is not safe inside RCU read lock */ - rcu_read_unlock(); + dp_rcu_read_unlock(); if (type == RTM_NEWROUTE) { - struct ifnet *ifp = ifnet_byifindex(ifindex); + struct ifnet *ifp = dp_ifnet_byifindex(ifindex); uint32_t gw = 0; uint32_t flags = 0; struct next_hop *next; uint32_t size; bool exp_ifp = true; + struct ip_addr ip_addr; if (rtm->rtm_type == RTN_BLACKHOLE) { flags |= RTF_BLACKHOLE; @@ -175,7 +733,7 @@ static int handle_route(vrfid_t vrf_id, uint16_t type, const struct rtmsg *rtm, /* Output label rather than local label */ flags |= RTF_OUTLABEL; - if (nexthop != anyaddr) { + if (nexthop != &anyaddr) { flags |= RTF_GATEWAY; gw = *(const uint32_t *) nexthop; } @@ -184,21 +742,23 @@ static int handle_route(vrfid_t vrf_id, uint16_t type, const struct rtmsg *rtm, assert(num_labels == 0); next = ecmp_create(mpath, &size, &missing_ifp); if (missing_ifp) { - rcu_read_lock(); + dp_rcu_read_lock(); return -1; } } else { if (exp_ifp && !ifp && !is_ignored_interface(ifindex)) { - rcu_read_lock(); + dp_rcu_read_lock(); return -1; } size = 1; - next = nexthop_create(ifp, gw, flags, + ip_addr.type = AF_INET; + ip_addr.address.ip_v4.s_addr = gw; + next = nexthop_create(ifp, &ip_addr, flags, num_labels, labels); } if (unlikely(!next)) { - rcu_read_lock(); + dp_rcu_read_lock(); return 0; /* no memory */ } @@ -209,7 +769,109 @@ static int handle_route(vrfid_t vrf_id, uint16_t type, const struct rtmsg *rtm, } else if (type == RTM_DELROUTE) { rt_delete(vrf_id, dst, depth, table, scope); } - rcu_read_lock(); + dp_rcu_read_lock(); + return 0; +} + +static int handle_route6(vrfid_t vrf_id, uint16_t type, + const struct rtmsg *rtm, uint32_t table, + const void *dest, const void *gateway, + unsigned int ifindex, uint8_t scope, + struct nlattr *mpath, uint32_t nl_flags, + uint16_t num_labels, label_t *labels) +{ + uint32_t depth = rtm->rtm_dst_len; + struct in6_addr dst = *(const struct in6_addr *)dest; + struct ifnet *ifp = dp_ifnet_byifindex(ifindex); + struct ip_addr ip_addr = { + .type = AF_INET6, + .address.ip_v6 = *(struct in6_addr *)gateway, + }; + struct next_hop *next; + uint32_t size; + uint32_t flags = 0; + bool missing_ifp = false; + bool exp_ifp = true; + + if (rtm->rtm_type != RTN_UNICAST && + rtm->rtm_type != RTN_LOCAL && + rtm->rtm_type != RTN_BLACKHOLE && + rtm->rtm_type != RTN_UNREACHABLE) + return 0; + + if (rtm->rtm_family != AF_INET6) + return 0; + + if (IN6_IS_ADDR_LOOPBACK(&dst)) + return 0; + + if (IN6_IS_ADDR_UNSPEC_LINKLOCAL(&dst)) + return 0; + + /* + * If LOCAL unicast then ensure we replace any connected + * /128 which may have preceded it unless it's linklocal + * which need not be unique. + * Also ignore any ff00::/8 summary routes for multicast. + */ + if (rtm->rtm_type == RTN_LOCAL) { + if (!IN6_IS_ADDR_LINKLOCAL(&dst)) + nl_flags |= NLM_F_REPLACE; + } else if (rtm->rtm_type == RTN_UNICAST && + IN6_IS_ADDR_MULTICAST(&dst) && depth == 8) { + return 0; + } + + if (type == RTM_NEWROUTE) { + if (rtm->rtm_type == RTN_BLACKHOLE) { + flags |= RTF_BLACKHOLE; + exp_ifp = false; + } else if (rtm->rtm_type == RTN_UNREACHABLE) { + flags |= RTF_REJECT; + exp_ifp = false; + } else if (rtm->rtm_type == RTN_LOCAL) { + flags |= RTF_LOCAL; + /* no need to store ifp for local routes */ + ifp = NULL; + exp_ifp = false; + } else if ((num_labels == 0) && + (!ifp || is_lo(ifp))) { + flags |= RTF_SLOWPATH; + } + + if (num_labels > 0 && !is_lo(ifp)) + /* Output label rather than local label */ + flags |= RTF_OUTLABEL; + + if (!(nl_flags & NL_FLAG_ANY_ADDR)) + flags |= RTF_GATEWAY; + + if (mpath) { + next = ecmp6_create(mpath, &size, &missing_ifp); + if (missing_ifp) + return -1; + } else { + if (exp_ifp && !ifp && !is_ignored_interface(ifindex)) + return -1; + size = 1; + if (IN6_IS_ADDR_V4MAPPED(&ip_addr.address.ip_v6)) + flags |= RTF_MAPPED_IPV6; + next = nexthop_create(ifp, &ip_addr, flags, num_labels, + labels); + } + + if (unlikely(!next)) + return 0; + + dp_rcu_read_unlock(); + rt6_add(vrf_id, &dst, depth, table, scope, next, size); + dp_rcu_read_lock(); + free(next); + } else if (type == RTM_DELROUTE) { + rt6_delete(vrf_id, &dst, depth, table, scope, + rtm->rtm_type == RTN_LOCAL); + } + return 0; } @@ -274,7 +936,7 @@ static int inet_mroute_ifset(struct nlattr *tb[], struct vmfcctl *mfcc) continue; } vifp->v_threshold = nhp->rtnh_hops; - IF_SET(vifp->v_if_index, &mfcc->mfcc_ifset); + IF_SET(vifp->v_vif_index, &mfcc->mfcc_ifset); if_count++; } mfcc->if_count = if_count; @@ -324,7 +986,7 @@ static int inet_mroute6_ifset(struct nlattr *tb[], struct vmf6cctl *mf6cc) */ continue; } - IF_SET(nhp->rtnh_ifindex, &mf6cc->mf6cc_ifset); + IF_SET(mifp->m6_mif_index, &mf6cc->mf6cc_ifset); if_count++; } mf6cc->if_count = if_count; @@ -450,18 +1112,6 @@ static char *mpls_labels_to_str(label_t *labels, uint16_t num_labels, return buf; } -static int mpls_attr(const struct nlattr *attr, void *data) -{ - const struct nlattr **tb = data; - int type = mnl_attr_get_type(attr); - - if (mnl_attr_type_valid(attr, MPLS_IPTUNNEL_MAX) < 0) - return MNL_CB_OK; - - tb[type] = attr; - return MNL_CB_OK; -} - static int inet_route_change(const struct nlmsghdr *nlh, const struct rtmsg *rtm, struct nlattr *tb[], @@ -474,36 +1124,50 @@ static int inet_route_change(const struct nlmsghdr *nlh, label_t labels[NH_MAX_OUT_LABELS]; uint16_t num_labels = 0; vrfid_t vrf_id = VRF_DEFAULT_ID; + uint32_t kernel_table; uint32_t table; if (tb[RTA_TABLE]) - table = mnl_attr_get_u32(tb[RTA_TABLE]); + kernel_table = mnl_attr_get_u32(tb[RTA_TABLE]); else - table = rtm->rtm_table; - - if (vrf_id == VRF_DEFAULT_ID) { - struct ifnet *vrf_master = vrfmaster_lookup_by_tableid(table); - if (vrf_master) { - vrf_id = vrfmaster_get_vrfid(vrf_master); - table = RT_TABLE_MAIN; + kernel_table = rtm->rtm_table; + + table = kernel_table; + + if (rtm->rtm_type == RTN_MULTICAST) { + /* + * Multicast routes do not come down out of order + * w.r.t. netlink link updates, since they don't use + * the route broker and the controller ensures that + * during replay the link updates are played out + * first. So they don't need incomplete route handling + * when either the VRF or the interfaces contained in + * the route update don't exist - instead these are + * logged and treated as an error. + */ + if (vrf_is_vrf_table_id(kernel_table) && + vrf_lookup_by_tableid(kernel_table, &vrf_id, + &table) < 0) { + RTE_LOG(NOTICE, ROUTE, + "unknown VRF table %d\n", kernel_table); + return MNL_CB_ERROR; } - } - if (!netlink_uplink_vrf(cont_src, &vrf_id)) - return MNL_CB_ERROR; + if (!netlink_uplink_vrf(cont_src, &vrf_id)) + return MNL_CB_ERROR; - if (rtm->rtm_type == RTN_MULTICAST) return inet_mroute_change(vrf_id, nlh, rtm, tb); + } if (tb[RTA_DST]) dest = mnl_attr_get_payload(tb[RTA_DST]); else - dest = anyaddr; + dest = &anyaddr; if (tb[RTA_GATEWAY]) nexthop = mnl_attr_get_payload(tb[RTA_GATEWAY]); else { - nexthop = anyaddr; + nexthop = &anyaddr; nl_flags |= NL_FLAG_ANY_ADDR; } @@ -557,23 +1221,43 @@ static int inet_route_change(const struct nlmsghdr *nlh, * Delete any existing entry for this prefix in the incomplete cache. * If still incomplete it will get re-added with correct details */ - incomplete_route_del(vrf_id, dest, rtm->rtm_family, - rtm->rtm_dst_len, table, + incomplete_route_del(dest, rtm->rtm_family, + rtm->rtm_dst_len, kernel_table, rtm->rtm_scope, rtm->rtm_protocol); + if (vrf_is_vrf_table_id(kernel_table) && + vrf_lookup_by_tableid(kernel_table, &vrf_id, &table) < 0) { + /* + * Route came down before the vrf device + * RTM_NEWLINK - defer route installation until it + * arrives. + */ + incomplete_route_add_nl(dest, + rtm->rtm_family, + rtm->rtm_dst_len, + kernel_table, + rtm->rtm_scope, + rtm->rtm_protocol, + nlh); + return MNL_CB_OK; + } + + if (!netlink_uplink_vrf(cont_src, &vrf_id)) + return MNL_CB_ERROR; + switch (rtm->rtm_family) { case AF_INET: if (handle_route(vrf_id, nlh->nlmsg_type, rtm, table, dest, nexthop, ifindex, rtm->rtm_scope, tb[RTA_MULTIPATH], nlh->nlmsg_flags, num_labels, labels) < 0) { - incomplete_route_add(vrf_id, dest, - rtm->rtm_family, - rtm->rtm_dst_len, - table, - rtm->rtm_scope, - rtm->rtm_protocol, - nlh); + incomplete_route_add_nl(dest, + rtm->rtm_family, + rtm->rtm_dst_len, + kernel_table, + rtm->rtm_scope, + rtm->rtm_protocol, + nlh); } break; @@ -583,14 +1267,13 @@ static int inet_route_change(const struct nlmsghdr *nlh, rtm->rtm_scope, tb[RTA_MULTIPATH], nlh->nlmsg_flags | nl_flags, num_labels, labels) < 0) { - incomplete_route_add(vrf_id, - dest, - rtm->rtm_family, - rtm->rtm_dst_len, - table, - rtm->rtm_scope, - rtm->rtm_protocol, - nlh); + incomplete_route_add_nl(dest, + rtm->rtm_family, + rtm->rtm_dst_len, + kernel_table, + rtm->rtm_scope, + rtm->rtm_protocol, + nlh); } break; @@ -607,11 +1290,13 @@ static int inet_addr_change(const struct nlmsghdr *nlh, { const void *addr, *broadcast = NULL; int ifindex = cont_src_ifindex(cont_src, ifa->ifa_index); - struct ifnet *ifp = ifnet_byifindex(ifindex); + struct ifnet *ifp = dp_ifnet_byifindex(ifindex); switch (nlh->nlmsg_type) { case RTM_NEWADDR: - if (tb[IFA_ADDRESS]) { + if (tb[IFA_LOCAL]) { + addr = mnl_attr_get_payload(tb[IFA_LOCAL]); + } else if (tb[IFA_ADDRESS]) { addr = mnl_attr_get_payload(tb[IFA_ADDRESS]); } else { RTE_LOG(ERR, ROUTE, "missing address in RTM_NEWADDR\n"); @@ -626,11 +1311,6 @@ static int inet_addr_change(const struct nlmsghdr *nlh, if (ifp) { ifa_add(ifindex, ifa->ifa_family, ifa->ifa_scope, addr, ifa->ifa_prefixlen, broadcast); - } else { - if (!is_ignored_interface(ifindex)) - missed_nl_inet_addr_add(ifindex, - ifa->ifa_family, - addr, nlh); } dp_event(DP_EVT_IF_ADDR_ADD, cont_src, ifp, ifindex, @@ -639,7 +1319,9 @@ static int inet_addr_change(const struct nlmsghdr *nlh, break; case RTM_DELADDR: - if (tb[IFA_ADDRESS]) { + if (tb[IFA_LOCAL]) { + addr = mnl_attr_get_payload(tb[IFA_LOCAL]); + } else if (tb[IFA_ADDRESS]) { addr = mnl_attr_get_payload(tb[IFA_ADDRESS]); } else { RTE_LOG(ERR, ROUTE, "missing address in RTM_DELADDR\n"); @@ -649,11 +1331,6 @@ static int inet_addr_change(const struct nlmsghdr *nlh, if (ifp) { ifa_remove(ifindex, ifa->ifa_family, addr, ifa->ifa_prefixlen); - } else { - if (!is_ignored_interface(ifindex)) - missed_nl_inet_addr_del(ifindex, - ifa->ifa_family, - addr); } dp_event(DP_EVT_IF_ADDR_DEL, cont_src, ifp, ifindex, @@ -683,15 +1360,10 @@ static void inet_netconf_change_mroute(int ifindex, struct nlattr *tb[], return; } - if ((unsigned int)ifindex < MFC_MAX_MVIFS) { - if (af == AF_INET) - add_vif(ifindex); - else - add_m6if(ifindex); - } else - DP_DEBUG(MULTICAST, ERR, MCAST, - "interface %s ifindex (%d) out of multicast range\n", - ifnet_indextoname(ifindex), ifindex); + if (af == AF_INET) + add_vif(ifindex); + else + add_m6if(ifindex); } /* Attribute changed */ @@ -760,16 +1432,7 @@ static int inet_netconf_change(const struct nlmsghdr *nlh, return MNL_CB_OK; /* NETCONFA_IFINDEX_ALL */ unsigned int ifindex = cont_src_ifindex(cont_src, signed_ifindex); - ifp = ifnet_byifindex(ifindex); - if (!ifp && !is_ignored_interface(ifindex)) { - if (nlh->nlmsg_type == RTM_NEWNETCONF) - missed_nl_inet_netconf_add(ifindex, - ncm->ncm_family, - nlh); - else - missed_nl_inet_netconf_del(ifindex, - ncm->ncm_family); - } + ifp = dp_ifnet_byifindex(ifindex); /* * Only given just before a delete, so we'll just let the diff --git a/src/ip_options.c b/src/ip_options.c index 37277ecd..46c3786e 100644 --- a/src/ip_options.c +++ b/src/ip_options.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017,2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. * Copyright (c) 2005 Andre Oppermann, Internet Business Solutions AG. diff --git a/src/ip_options.h b/src/ip_options.h index 0ac43781..ddfc221e 100644 --- a/src/ip_options.h +++ b/src/ip_options.h @@ -1,7 +1,7 @@ /* * Public functions defined in ip_options.c * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * diff --git a/src/ip_output.c b/src/ip_output.c index a103d2d9..da4898fc 100644 --- a/src/ip_output.c +++ b/src/ip_output.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -34,8 +34,8 @@ #include "ip_funcs.h" #include "mpls/mpls.h" #include "mpls/mpls_forward.h" -#include "nh.h" -#include "pktmbuf.h" +#include "nh_common.h" +#include "pktmbuf_internal.h" #include "route.h" #include "route_flags.h" #include "snmp_mib.h" @@ -47,15 +47,15 @@ void ip_output(struct rte_mbuf *m, bool srced_forus) { struct next_hop *nxt; - struct ether_hdr *eh = ethhdr(m); + struct rte_ether_hdr *eh = ethhdr(m); struct iphdr *ip = iphdr(m); struct ifnet *ifp; - eh->ether_type = htons(ETHER_TYPE_IPv4); + eh->ether_type = htons(RTE_ETHER_TYPE_IPV4); /* Do route lookup */ - nxt = rt_lookup(srced_forus ? ip->saddr : ip->daddr, - RT_TABLE_MAIN, m); + nxt = dp_rt_lookup(srced_forus ? ip->saddr : ip->daddr, + RT_TABLE_MAIN, m); if (!nxt) { /* * Since there is no output interface count against @@ -66,13 +66,12 @@ void ip_output(struct rte_mbuf *m, bool srced_forus) } /* ifp can be changed by nxt->ifp. use protected deref. */ - ifp = nh4_get_ifp(nxt); + ifp = dp_nh_get_ifp(nxt); /* MPLS imposition required because nh has given us a label */ if (nh_outlabels_present(&nxt->outlabels)) { - union next_hop_v4_or_v6_ptr mpls_nh = { .v4 = nxt }; - - mpls_unlabeled_input(ifp, m, NH_TYPE_V4GW, mpls_nh, ip->ttl); + mpls_unlabeled_input(ifp, m, MPT_IPV4, NH_TYPE_V4GW, nxt, + ip->ttl); return; } @@ -90,7 +89,7 @@ void ip_output(struct rte_mbuf *m, bool srced_forus) goto drop; if (srced_forus) { - ether_addr_copy(&ifp->eth_addr, &eh->d_addr); + rte_ether_addr_copy(&ifp->eth_addr, &eh->d_addr); /* * We want the kernel to believe this came from the interface * that we failed the mtu check on. @@ -100,7 +99,7 @@ void ip_output(struct rte_mbuf *m, bool srced_forus) return; } - if (ip_l2_resolve_and_output(NULL, m, nxt, ETH_P_IP)) + if (dp_ip_l2_nh_output(NULL, m, nxt, ETH_P_IP)) IPSTAT_INC_IFP(ifp, IPSTATS_MIB_OUTPKTS); return; @@ -225,7 +224,7 @@ void ip_fragment_mtu(struct ifnet *ifp, unsigned int mtu, struct rte_mbuf *m0, { struct iphdr *ip = iphdr(m0); struct vrf *vrf = if_vrf(ifp); - unsigned int hlen = pktmbuf_l3_len(m0); + unsigned int hlen = dp_pktmbuf_l3_len(m0); uint16_t iplen = ntohs(ip->tot_len); unsigned int len = (mtu - hlen) & ~7; /* size of payload */ struct rte_mbuf *m; @@ -258,7 +257,7 @@ void ip_fragment_mtu(struct ifnet *ifp, unsigned int mtu, struct rte_mbuf *m0, } m = pktmbuf_allocseg(m0->pool, pktmbuf_get_vrf(m0), - sz + ETHER_HDR_LEN + hlen); + sz + RTE_ETHER_HDR_LEN + hlen); if (m == NULL) goto drop; @@ -266,7 +265,7 @@ void ip_fragment_mtu(struct ifnet *ifp, unsigned int mtu, struct rte_mbuf *m0, memcpy(rte_pktmbuf_mtod(m, char *), rte_pktmbuf_mtod(m0, char *), - pktmbuf_l2_len(m0) + mhlen); + dp_pktmbuf_l2_len(m0) + mhlen); mhip = iphdr(m); if (hlen > sizeof(struct iphdr)) { @@ -274,8 +273,8 @@ void ip_fragment_mtu(struct ifnet *ifp, unsigned int mtu, struct rte_mbuf *m0, mhip->version = IPVERSION; mhip->ihl = mhlen >> 2; } - pktmbuf_l3_len(m) = mhlen; - rte_pktmbuf_data_len(m) = pktmbuf_l2_len(m) + mhlen; + dp_pktmbuf_l3_len(m) = mhlen; + rte_pktmbuf_data_len(m) = dp_pktmbuf_l2_len(m) + mhlen; rte_pktmbuf_pkt_len(m) = rte_pktmbuf_data_len(m); mhip->frag_off = htons(((len * frag_number) >> 3) + ntohs(ip->frag_off)); @@ -286,7 +285,7 @@ void ip_fragment_mtu(struct ifnet *ifp, unsigned int mtu, struct rte_mbuf *m0, mhip->check = 0; mhip->check = in_cksum(mhip, mhlen); - if (ip_mbuf_copy(m, m0, off + pktmbuf_l2_len(m0), sz) < 0) { + if (ip_mbuf_copy(m, m0, off + dp_pktmbuf_l2_len(m0), sz) < 0) { rte_pktmbuf_free(m); goto drop; } @@ -300,16 +299,16 @@ void ip_fragment_mtu(struct ifnet *ifp, unsigned int mtu, struct rte_mbuf *m0, * Copy first fragment and update header. */ m = pktmbuf_allocseg(m0->pool, pktmbuf_get_vrf(m0), - len + pktmbuf_l2_len(m0) + hlen); + len + dp_pktmbuf_l2_len(m0) + hlen); if (m == NULL) goto drop; pktmbuf_copy_meta(m, m0); memcpy(rte_pktmbuf_mtod(m, char *), - rte_pktmbuf_mtod(m0, char *), pktmbuf_l2_len(m0) + hlen); - pktmbuf_l3_len(m) = hlen; - rte_pktmbuf_data_len(m) = pktmbuf_l2_len(m0) + hlen; + rte_pktmbuf_mtod(m0, char *), dp_pktmbuf_l2_len(m0) + hlen); + dp_pktmbuf_l3_len(m) = hlen; + rte_pktmbuf_data_len(m) = dp_pktmbuf_l2_len(m0) + hlen; rte_pktmbuf_pkt_len(m) = rte_pktmbuf_data_len(m); mhip = iphdr(m); @@ -319,7 +318,7 @@ void ip_fragment_mtu(struct ifnet *ifp, unsigned int mtu, struct rte_mbuf *m0, mhip->check = 0; mhip->check = in_cksum(mhip, hlen); - int res = ip_mbuf_copy(m, m0, pktmbuf_l2_len(m0) + hlen, len); + int res = ip_mbuf_copy(m, m0, dp_pktmbuf_l2_len(m0) + hlen, len); if (res < 0) { rte_pktmbuf_free(m); goto drop; diff --git a/src/ip_route.c b/src/ip_route.c new file mode 100644 index 00000000..b9af2969 --- /dev/null +++ b/src/ip_route.c @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include +#include + +#include "dp_event.h" +#include "if_var.h" +#include "ip_forward.h" +#include "ip_route.h" +#include "urcu.h" +#include "vplane_debug.h" + +static struct cds_list_head rt_signal_unusable_list_head = + CDS_LIST_HEAD_INIT(rt_signal_unusable_list_head); + +struct rt_signal_unusable_client { + const char *source; + dp_rt_get_path_state_fn *get_state_fn; + struct cds_list_head list_entry; +}; + +static void dp_rt_path_state_uninit(void) +{ + struct cds_list_head *this_entry, *next; + struct rt_signal_unusable_client *client; + + cds_list_for_each_safe(this_entry, next, + &rt_signal_unusable_list_head) { + client = cds_list_entry(this_entry, + struct rt_signal_unusable_client, + list_entry); + free((char *)client->source); + free(client); + } +} + +struct dp_event_ops rt_signal_dp_event_ops = { + .uninit = dp_rt_path_state_uninit, +}; + +/* + * Provide a function that can be used to query the path state. + */ +int dp_rt_register_path_state(const char *source, + dp_rt_get_path_state_fn *get_state_fn) +{ + struct rt_signal_unusable_client *client; + static int initialised; + + cds_list_for_each_entry_rcu(client, &rt_signal_unusable_list_head, + list_entry) { + if (strcmp(source, client->source) == 0) + return -EINVAL; + } + + client = malloc(sizeof(*client)); + if (!client) + return -ENOMEM; + + client->source = strdup(source); + client->get_state_fn = get_state_fn; + if (!client->source) { + free(client); + return -ENOMEM; + } + cds_list_add_rcu(&client->list_entry, &rt_signal_unusable_list_head); + + if (!initialised) { + initialised = true; + dp_event_register(&rt_signal_dp_event_ops); + } + return 0; +} + +enum dp_rt_path_state +dp_rt_signal_check_paths_state(const struct dp_rt_path_unusable_key *key) +{ + struct rt_signal_unusable_client *client; + enum dp_rt_path_state state; + + cds_list_for_each_entry_rcu(client, &rt_signal_unusable_list_head, + list_entry) { + state = client->get_state_fn(key); + if (state == DP_RT_PATH_USABLE || + state == DP_RT_PATH_UNUSABLE) + return state; + } + + return DP_RT_PATH_UNKNOWN; +} + +static const char *dp_rt_path_state_to_str(enum dp_rt_path_state state) +{ + switch (state) { + case DP_RT_PATH_USABLE: + return "usable"; + case DP_RT_PATH_UNUSABLE: + return "unusable"; + default: + return "unknown"; + } +}; + + +void dp_rt_signal_path_state(const char *source, + enum dp_rt_path_state state, + const struct dp_rt_path_unusable_key *key) +{ + char buf[INET6_ADDRSTRLEN]; + + if (key->type == DP_RT_PATH_UNUSABLE_KEY_INTF) { + struct ifnet *ifp; + + ifp = dp_ifnet_byifindex(key->ifindex); + if (!ifp) + return; + + DP_DEBUG(ROUTE, DEBUG, ROUTE, + "paths using if %s marked %s by %s\n", + ifnet_indextoname(key->ifindex), + dp_rt_path_state_to_str(state), + source); + if_set_usability(ifp, (state == DP_RT_PATH_USABLE) ? + true : false); + } else + DP_DEBUG(ROUTE, DEBUG, ROUTE, + "paths using if %s, gw %s marked %s by %s\n", + ifnet_indextoname(key->ifindex), + inet_ntop(key->nexthop.type, + &key->nexthop.address, + buf, sizeof(buf)), + dp_rt_path_state_to_str(state), + source); + + if (state == DP_RT_PATH_USABLE || + state == DP_RT_PATH_UNUSABLE) + next_hop_mark_path_state(state, key); +} diff --git a/src/ip_route.h b/src/ip_route.h new file mode 100644 index 00000000..47a35807 --- /dev/null +++ b/src/ip_route.h @@ -0,0 +1,15 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef IP_ROUTE +#define IP_ROUTE + +#include "ip_forward.h" + +enum dp_rt_path_state +dp_rt_signal_check_paths_state(const struct dp_rt_path_unusable_key *key); + +#endif /* IP_ROUTE */ diff --git a/src/ip_rt_protobuf.c b/src/ip_rt_protobuf.c new file mode 100644 index 00000000..d08a8fd7 --- /dev/null +++ b/src/ip_rt_protobuf.c @@ -0,0 +1,520 @@ +/* + * Copyright (c) 2020-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * Route update protobuf handling for IP & MPLS + */ + +#include "if_var.h" +#include "ip_rt_protobuf.h" +#include "mpls/mpls_label_table.h" +#include "netinet6/in6.h" +#include "netinet6/route_v6.h" +#include "netlink.h" +#include "nh_common.h" +#include "protobuf/RibUpdate.pb-c.h" +#include "route.h" +#include "vplane_debug.h" +#include "vplane_log.h" +#include "vrf_if.h" +#include "vrf_internal.h" + +#define IN6_SET_ADDR_V4MAPPED(a6, a4) { \ + (a6)->s6_addr32[0] = 0; \ + (a6)->s6_addr32[1] = 0; \ + (a6)->s6_addr32[2] = htonl(0xffff); \ + (a6)->s6_addr32[3] = (a4); \ + } + +static bool nexthop_fill_common(struct next_hop *next, Path *path, + bool *missing_ifp) +{ + struct ifnet *ifp; + bool exp_ifp = true; + + ifp = dp_ifnet_byifindex(path->ifindex); + + switch (path->type) { + case PATH__PATH_TYPE__BLACKHOLE: + next->flags |= RTF_BLACKHOLE; + exp_ifp = false; + break; + case PATH__PATH_TYPE__UNREACHABLE: + next->flags |= RTF_REJECT; + exp_ifp = false; + break; + case PATH__PATH_TYPE__LOCAL: + next->flags |= RTF_LOCAL; + /* no need to store ifp for local routes */ + ifp = NULL; + exp_ifp = false; + break; + case PATH__PATH_TYPE__UNICAST: + break; + default: + RTE_LOG(NOTICE, DATAPLANE, + "unexpected path type %d in RibUpdate protobuf message\n", + path->type); + return false; + } + + nh_set_ifp(next, ifp); + if (!ifp && exp_ifp && !is_ignored_interface(path->ifindex)) { + *missing_ifp = true; + return false; + } + + nh_outlabels_set(&next->outlabels, path->n_mpls_labels, + path->mpls_labels); + + if ((!ifp || (is_lo(ifp) && path->n_mpls_labels == 0)) && + exp_ifp) + /* no dp interface or via loopback */ + next->flags |= RTF_SLOWPATH; + + if (path->n_mpls_labels > 0 && !is_lo(ifp)) + /* Output label rather than local label */ + next->flags |= RTF_OUTLABEL; + + if (path->backup) + next->flags |= RTF_BACKUP; + + return true; +} + +/* + * Returns true on success, false on failure. Failure includes a + * missing ifp. + */ +static bool nexthop_fill(struct next_hop *next, Path *path, bool *missing_ifp) +{ + if (path->nexthop) { + /* Cannot store IPv4 routes with IPv6 nexthops */ + if (path->nexthop->address_oneof_case != + IPADDRESS__ADDRESS_ONEOF_IPV4_ADDR) { + RTE_LOG(NOTICE, DATAPLANE, + "unexpected nexthop address %d in IPv4 RibUpdate protobuf message\n", + path->nexthop->address_oneof_case); + return false; + } + next->gateway.address.ip_v4.s_addr = path->nexthop->ipv4_addr; + next->gateway.type = AF_INET; + next->flags |= RTF_GATEWAY; + } + + if (!nexthop_fill_common(next, path, missing_ifp)) + return false; + + return true; +} + +/* + * Returns true on success, false on failure. Failure includes a + * missing ifp. + */ +static bool nexthop6_fill(struct next_hop *next, Path *path, + bool *missing_ifp) +{ + if (path->nexthop) { + if (path->nexthop->address_oneof_case == + IPADDRESS__ADDRESS_ONEOF_IPV6_ADDR && + path->nexthop->ipv6_addr.len == + sizeof(next->gateway.address)) { + memcpy(&next->gateway.address, + path->nexthop->ipv6_addr.data, + sizeof(next->gateway.address)); + } else if (path->nexthop->address_oneof_case == + IPADDRESS__ADDRESS_ONEOF_IPV4_ADDR) { + IN6_SET_ADDR_V4MAPPED(&next->gateway.address.ip_v6, + path->nexthop->ipv4_addr); + } else { + RTE_LOG(NOTICE, DATAPLANE, + "path nexthop address type %d in RibUpdate protobuf message\n", + path->nexthop->address_oneof_case); + return false; + } + next->gateway.type = AF_INET6; + + if (IN6_IS_ADDR_V4MAPPED(&next->gateway.address.ip_v6)) + next->flags |= RTF_MAPPED_IPV6; + next->flags |= RTF_GATEWAY; + } + + if (!nexthop_fill_common(next, path, missing_ifp)) + return false; + + if (path->backup) + next->flags |= RTF_BACKUP; + + return true; +} + +static struct next_hop * +nexthop_list_create(Route *route, enum nh_type nh_type, bool *missing_ifp) +{ + struct next_hop *next, *n; + size_t size; + Path *path; + size_t i; + + next = calloc(sizeof(*next), route->n_paths); + if (!next) + return NULL; + + for (i = 0; i < route->n_paths; i++) { + path = route->paths[i]; + n = &next[i]; + + if (nh_type == NH_TYPE_V4GW) { + if (!nexthop_fill(n, path, missing_ifp)) + goto fail; + } else { + if (!nexthop6_fill(n, path, missing_ifp)) + goto fail; + } + } + + return next; + +fail: + size = i; + for (i = 0; i < size; i++) + nh_outlabels_destroy(&next[i].outlabels); + free(next); + return NULL; +} + +static bool ip_rt_pb_table_to_vrf( + RibUpdate *rtupdate, enum cont_src_en cont_src, + bool *add_incomplete, uint32_t *table, vrfid_t *vrf_id) +{ + *vrf_id = VRF_DEFAULT_ID; + + if (vrf_is_vrf_table_id(*table) && + vrf_lookup_by_tableid(*table, vrf_id, table) < 0) { + /* + * Route came down before the vrf device + * RTM_NEWLINK - defer route installation until it + * arrives. + */ + if (rtupdate->action == RIB_UPDATE__ACTION__UPDATE) + *add_incomplete = true; + return false; + } + + if (!netlink_uplink_vrf(cont_src, vrf_id)) { + if (rtupdate->action == RIB_UPDATE__ACTION__UPDATE) + *add_incomplete = true; + return false; + } + + return true; +} + +static int ipv4_route_pb_handler(RibUpdate *rtupdate, + enum cont_src_en cont_src, + bool *add_incomplete) +{ + Route *route = rtupdate->route; + uint32_t table = route->table_id; + char b1[INET6_ADDRSTRLEN]; + struct next_hop *next; + vrfid_t vrf_id; + + if (!ip_rt_pb_table_to_vrf(rtupdate, cont_src, add_incomplete, + &table, &vrf_id)) + return 0; + + DP_DEBUG_W_VRF(NETLINK_ROUTE, INFO, ROUTE, vrf_id, + "%s table %u dst %s/%u scope %u proto %u num_paths %lu\n", + rtupdate->action == RIB_UPDATE__ACTION__DELETE ? + "delete" : "add/update", + table, + inet_ntop(AF_INET, &route->prefix->ipv4_addr, b1, + sizeof(b1)), + route->prefix_length, route->scope, + route->routing_protocol, route->n_paths); + + if (rtupdate->action == RIB_UPDATE__ACTION__DELETE) { + rt_delete(vrf_id, route->prefix->ipv4_addr, + route->prefix_length, table, route->scope); + return 0; + } + if (rtupdate->action != RIB_UPDATE__ACTION__UPDATE) { + RTE_LOG(NOTICE, DATAPLANE, + "unexpected action %d in RibUpdate protobuf message\n", + rtupdate->action); + return 0; + } + + next = nexthop_list_create(route, NH_TYPE_V4GW, add_incomplete); + if (!next && *add_incomplete) + return 0; + if (!next) + return -1; + + rt_insert(vrf_id, route->prefix->ipv4_addr, + route->prefix_length, table, route->scope, + route->routing_protocol, next, route->n_paths, true); + + free(next); + + return 0; +} + +static int ipv6_route_pb_handler(RibUpdate *rtupdate, + enum cont_src_en cont_src, + bool *add_incomplete) +{ + Route *route = rtupdate->route; + uint32_t table = route->table_id; + char b1[INET6_ADDRSTRLEN]; + struct next_hop *next; + struct in6_addr dst; + vrfid_t vrf_id; + + if (!ip_rt_pb_table_to_vrf(rtupdate, cont_src, add_incomplete, + &table, &vrf_id)) + return 0; + + memcpy(&dst, route->prefix->ipv6_addr.data, sizeof(dst)); + + DP_DEBUG_W_VRF(NETLINK_ROUTE, INFO, ROUTE, vrf_id, + "%s table %u dst %s/%u scope %u proto %u num_paths %lu\n", + rtupdate->action == RIB_UPDATE__ACTION__DELETE ? + "delete" : "add/update", + table, + inet_ntop(AF_INET6, &dst, b1, sizeof(b1)), + route->prefix_length, route->scope, + route->routing_protocol, route->n_paths); + + if (rtupdate->action == RIB_UPDATE__ACTION__DELETE) { + bool local; + + local = route->n_paths == 1 && + route->paths[0]->type == PATH__PATH_TYPE__LOCAL; + + rt6_delete(vrf_id, &dst, route->prefix_length, table, + route->scope, local); + return 0; + } + if (rtupdate->action != RIB_UPDATE__ACTION__UPDATE) { + RTE_LOG(NOTICE, DATAPLANE, + "unexpected action %d in RibUpdate protobuf message\n", + rtupdate->action); + return 0; + } + + next = nexthop_list_create(route, NH_TYPE_V6GW, add_incomplete); + if (!next && *add_incomplete) + return 0; + if (!next) + return -1; + + rt6_add(vrf_id, &dst, route->prefix_length, table, + route->scope, next, route->n_paths); + + free(next); + + return 0; +} + +static int mpls_route_pb_handler(RibUpdate *rtupdate, bool *add_incomplete) +{ + enum nh_type nh_type = NH_TYPE_V4GW; + uint32_t payload_type = MPT_UNSPEC; + Route *route = rtupdate->route; + struct next_hop *next; + Path *path; + size_t i; + + if (rtupdate->action == RIB_UPDATE__ACTION__DELETE) { + mpls_label_table_remove_label(global_label_space_id, + route->prefix->mpls_label); + return 0; + } + if (rtupdate->action != RIB_UPDATE__ACTION__UPDATE) { + RTE_LOG(NOTICE, DATAPLANE, + "unexpected action %d in RibUpdate protobuf message\n", + rtupdate->action); + return 0; + } + + switch (route->payload_type) { + case ROUTE__PAYLOAD_TYPE__IPV4: + payload_type = MPT_IPV4; + break; + case ROUTE__PAYLOAD_TYPE__IPV6: + payload_type = MPT_IPV6; + break; + case ROUTE__PAYLOAD_TYPE__UNSPEC: + break; + default: + RTE_LOG(NOTICE, DATAPLANE, + "unexpected payload_type %d in RibUpdate protobuf message for label %u\n", + route->payload_type, route->prefix->mpls_label); + return -1; + } + + for (i = 0; i < route->n_paths; i++) { + path = route->paths[i]; + + /* + * MPLS route uses IPv6 addresses if there is at least + * one IPv6 nexthop present, or if it's a deagg for an + * IPv6 payload. This is done for compatibility + * reasons since the RIB previously sent down a + * nexthop of IPv6 unspecified in this case, but no + * longer does. + */ + if ((path->nexthop && path->nexthop->address_oneof_case == + IPADDRESS__ADDRESS_ONEOF_IPV6_ADDR) || + (payload_type == MPT_IPV6 && + is_lo(dp_ifnet_byifindex(path->ifindex)))) { + nh_type = NH_TYPE_V6GW; + break; + } + } + + next = nexthop_list_create(route, nh_type, add_incomplete); + if (!next && *add_incomplete) + return 0; + if (!next) + return -1; + + for (i = 0; i < route->n_paths; i++) { + if (!route->paths[i]->mpls_bos_only && + route->paths[i]->n_mpls_labels == 0) { + /* + * If there are no labels and BOS_ONLY not + * set, then this implies the implicit-null + * label. This won't go out on the wire and is + * for signaling only. + */ + label_t lbl[1] = { MPLS_LABEL_IMPLNULL }; + + nh_outlabels_set(&next[i].outlabels, 1, lbl); + } + + /* + * Also remove setting of RTF_SLOWPATH done by + * nexthop[6]_fill, since it doesn't know that it's + * called from MPLS context and thus route with no + * labels via loopback means de-agg. + * In addition, set the gateway flag for preserving + * compatibility of show output where we showed for a + * deagg: + * + * in label: 53760, fec:ipv6 + * nexthop via ::, vrfred + * + */ + if (is_lo(dp_nh_get_ifp(&next[i]))) { + next[i].flags &= ~RTF_SLOWPATH; + next[i].flags |= RTF_GATEWAY; + } + } + + mpls_label_table_insert_label(global_label_space_id, + route->prefix->mpls_label, nh_type, + payload_type, next, + route->n_paths); + + free(next); + + return 0; +} + +int ip_route_pb_handler(void *data, size_t len, enum cont_src_en cont_src) +{ + bool add_incomplete = false; + RibUpdate *rtupdate; + Route *route; + int rc = -1; + void *dest; + int af; + + rtupdate = rib_update__unpack(NULL, len, data); + if (!rtupdate) { + RTE_LOG(ERR, DATAPLANE, + "failed to read RibUpdate protobuf message\n"); + return -1; + } + + if (!rtupdate->route) { + RTE_LOG(NOTICE, DATAPLANE, + "missing route in RibUpdate protobuf message\n"); + goto free_msg; + } + + if (!rtupdate->route->prefix) { + RTE_LOG(NOTICE, DATAPLANE, + "missing prefix in RibUpdate protobuf message\n"); + goto free_msg; + } + + if (rtupdate->route->n_paths == 0 && + rtupdate->action != RIB_UPDATE__ACTION__DELETE) { + RTE_LOG(NOTICE, DATAPLANE, + "Invalid n_paths in RibUpdate protobuf message\n"); + goto free_msg; + } + + route = rtupdate->route; + + switch (rtupdate->route->prefix->address_oneof_case) { + case IPADDRESS_OR_LABEL__ADDRESS_ONEOF_IPV4_ADDR: + af = AF_INET; + dest = &rtupdate->route->prefix->ipv4_addr; + break; + case IPADDRESS_OR_LABEL__ADDRESS_ONEOF_IPV6_ADDR: + if (route->prefix->ipv6_addr.len != sizeof(struct in6_addr)) { + RTE_LOG(NOTICE, DATAPLANE, + "bad prefix address length %lu in RibUpdate protobuf message\n", + route->prefix->ipv6_addr.len); + rc = -1; + goto free_msg; + } + + af = AF_INET6; + dest = rtupdate->route->prefix->ipv6_addr.data; + break; + case IPADDRESS_OR_LABEL__ADDRESS_ONEOF_MPLS_LABEL: + af = AF_MPLS; + dest = &rtupdate->route->prefix->mpls_label; + break; + default: + rc = -2; + goto free_msg; + } + + incomplete_route_del(dest, af, route->prefix_length, + route->table_id, route->scope, + route->routing_protocol); + + switch (rtupdate->route->prefix->address_oneof_case) { + case IPADDRESS_OR_LABEL__ADDRESS_ONEOF_IPV4_ADDR: + rc = ipv4_route_pb_handler(rtupdate, cont_src, &add_incomplete); + break; + case IPADDRESS_OR_LABEL__ADDRESS_ONEOF_IPV6_ADDR: + rc = ipv6_route_pb_handler(rtupdate, cont_src, &add_incomplete); + break; + case IPADDRESS_OR_LABEL__ADDRESS_ONEOF_MPLS_LABEL: + rc = mpls_route_pb_handler(rtupdate, &add_incomplete); + break; + default: + break; + } + + if (!rc && add_incomplete) + incomplete_route_add_pb(dest, af, + route->prefix_length, + route->table_id, route->scope, + route->routing_protocol, data, + len); + +free_msg: + rib_update__free_unpacked(rtupdate, NULL); + return rc; +} diff --git a/src/ip_rt_protobuf.h b/src/ip_rt_protobuf.h new file mode 100644 index 00000000..4e960533 --- /dev/null +++ b/src/ip_rt_protobuf.h @@ -0,0 +1,15 @@ +/*- + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#ifndef IP_RT_PROTOBUF_H +#define IP_RT_PROTOBUF_H + +#include + +#include "control.h" + +int ip_route_pb_handler(void *data, size_t len, enum cont_src_en cont_src); + +#endif /* IP_RT_PROTOBUF_H */ diff --git a/src/ip_ttl.h b/src/ip_ttl.h index e37408c8..bbb4d71c 100644 --- a/src/ip_ttl.h +++ b/src/ip_ttl.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * diff --git a/src/ipip_tunnel.c b/src/ipip_tunnel.c deleted file mode 100644 index 719045b0..00000000 --- a/src/ipip_tunnel.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. - * - * SPDX-License-Identifier: LGPL-2.1-only - * - * IPIP tunnel interface implementation - */ - -#include -#include - -#include "dp_event.h" -#include "if_var.h" - -static const struct ift_ops ipip_tun_if_ops = { -}; - -static void ipip_tun_init(void) -{ - int ret = if_register_type(IFT_TUNNEL_OTHER, &ipip_tun_if_ops); - if (ret < 0) - rte_panic("Failed to register IPIP tunnel type: %s", - strerror(-ret)); -} - -static const struct dp_event_ops ipip_tun_events = { - .init = ipip_tun_init, -}; - -DP_STARTUP_EVENT_REGISTER(ipip_tun_events); diff --git a/src/iptun_common.c b/src/iptun_common.c index 1c0387a2..728406b3 100644 --- a/src/iptun_common.c +++ b/src/iptun_common.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -22,7 +22,7 @@ #include "ip6_funcs.h" #include "ip_funcs.h" #include "iptun_common.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "fal_plugin.h" #include "fal.h" #include "vplane_log.h" @@ -37,7 +37,7 @@ ip_tos_ecn_set_inner(void *inner_hdr, uint16_t prot, uint8_t new_inner) } void -ip6_tos_copy_inner(uint32_t *outer_flow, uint32_t *inner_flow) +ip6_tos_copy_inner(uint32_t *outer_flow, const uint32_t *inner_flow) { uint32_t outer = ntohl(*outer_flow); uint32_t inner = ntohl(*inner_flow); @@ -48,7 +48,7 @@ ip6_tos_copy_inner(uint32_t *outer_flow, uint32_t *inner_flow) } /* Copy dscp from inner Ipv6 to outer IPv4 */ -void ip_ip6_dscp_copy_inner(uint8_t *outer_tos, uint32_t *inner_flow) +void ip_ip6_dscp_copy_inner(uint8_t *outer_tos, const uint32_t *inner_flow) { uint32_t inner = ntohl(*inner_flow); @@ -66,7 +66,7 @@ void ip6_ip_dscp_copy_inner(uint32_t *outer_flow, uint8_t inner_tos) } void -ip6_tos_copy_outer_noecn(uint32_t *outer_flow, uint32_t *inner_flow) +ip6_tos_copy_outer_noecn(const uint32_t *outer_flow, uint32_t *inner_flow) { uint32_t outer = ntohl(*outer_flow); uint32_t inner = ntohl(*inner_flow); @@ -95,7 +95,7 @@ ip_tos_ecn_encap(uint8_t *outer_tos, uint8_t inner_tos) } void -ip6_tos_ecn_encap(uint32_t *outer_flow, uint32_t *inner_flow) +ip6_tos_ecn_encap(uint32_t *outer_flow, const uint32_t *inner_flow) { uint32_t outer = ntohl(*outer_flow); uint32_t inner = ntohl(*inner_flow); @@ -124,7 +124,7 @@ ip6_ip_ecn_encap(uint32_t *outer_flow, uint8_t inner_tos) /* ecn encap when outer is IPv4 and inner is IPv6 */ void -ip_ip6_ecn_encap(uint8_t *outer_tos, uint32_t *inner_flow) +ip_ip6_ecn_encap(uint8_t *outer_tos, const uint32_t *inner_flow) { uint32_t inner = ntohl(*inner_flow); @@ -207,12 +207,12 @@ ip_tos_ecn_decap(uint8_t outer_tos, char *inner_hdr, uint16_t prot) */ char * mbuf_get_inner_ip(struct rte_mbuf *m, const char *outer, char *inner, - uint16_t *next_prot) + const uint16_t *next_prot) { unsigned int len; /* Is there enough data in the first segment to find the inner hdr. */ - len = rte_pktmbuf_data_len(m) - pktmbuf_l2_len(m); + len = rte_pktmbuf_data_len(m) - dp_pktmbuf_l2_len(m); if (*next_prot == ETH_P_IP) { if (len >= (inner - outer) + sizeof(struct iphdr)) @@ -230,22 +230,23 @@ int iptun_eth_hdr_fixup(struct rte_mbuf *m, uint16_t next_prot, uint16_t decap_size) { - struct ether_hdr *orig_eth; - struct ether_hdr *new_eth; + struct rte_ether_hdr *orig_eth; + struct rte_ether_hdr *new_eth; /* * Have found a tunnel, so remove the outer IP and other * protocol headers. Need to leave the mbuf such that if we * were to pass the packet to the kernel, the L2 header * protocol accurately reflects the next header and has a - * correct dest addr set. + * correct source and dest addr set. */ - orig_eth = rte_pktmbuf_mtod(m, struct ether_hdr *); - new_eth = (struct ether_hdr *)rte_pktmbuf_adj(m, decap_size); + orig_eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); + new_eth = (struct rte_ether_hdr *)rte_pktmbuf_adj(m, decap_size); if (unlikely(!new_eth)) return -1; new_eth->ether_type = htons(next_prot); + new_eth->s_addr = orig_eth->s_addr; new_eth->d_addr = orig_eth->d_addr; return 0; } @@ -310,8 +311,7 @@ void iptun_create_fal_tep(struct ifnet *ifp, struct tun_info_st *tun_info, } ret = fal_create_tunnel(l3_nattrs, l3_attrs, obj); - if (((ret < 0) && (ret != -EOPNOTSUPP)) || - (!*obj)) + if ((ret < 0 || !*obj) && (ret != -EOPNOTSUPP)) RTE_LOG(ERR, DATAPLANE, "Failed to create FAL tun object for GRE tun: %s\n", ifp->if_name); diff --git a/src/iptun_common.h b/src/iptun_common.h index 304844aa..8221af38 100644 --- a/src/iptun_common.h +++ b/src/iptun_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -12,9 +12,10 @@ #define IPTUN_COMMON_H #include -#include "vrf.h" -#include "if_var.h" +#include "util.h" #include "fal_plugin.h" +#include "interface.h" +#include "ip_addr.h" struct iphdr; struct rte_mbuf; @@ -43,18 +44,18 @@ struct tun_info_st { #define IPV6_FLOW_ECN_ECT0 0x00200000 #define IPV6_FLOW_TOS_SHIFT 20 -void ip6_tos_copy_inner(uint32_t *outer_flow, uint32_t *inner_flow); +void ip6_tos_copy_inner(uint32_t *outer_flow, const uint32_t *inner_flow); void ip6_ip_dscp_copy_inner(uint32_t *outer_flow, uint8_t inner_tos); -void ip_ip6_dscp_copy_inner(uint8_t *outer_tos, uint32_t *inner_flow); -void ip6_tos_copy_outer_noecn(uint32_t *outer_flow, uint32_t *inner_flow); +void ip_ip6_dscp_copy_inner(uint8_t *outer_tos, const uint32_t *inner_flow); +void ip6_tos_copy_outer_noecn(const uint32_t *outer_flow, uint32_t *inner_flow); void ip_tos_ecn_encap(uint8_t *outer_tos, uint8_t inner_tos); -void ip6_tos_ecn_encap(uint32_t *outer_flow, uint32_t *inner_flow); +void ip6_tos_ecn_encap(uint32_t *outer_flow, const uint32_t *inner_flow); void ip6_ip_ecn_encap(uint32_t *outer_flow, uint8_t inner_tos); -void ip_ip6_ecn_encap(uint8_t *outer_tos, uint32_t *inner_flow); +void ip_ip6_ecn_encap(uint8_t *outer_tos, const uint32_t *inner_flow); int ip_tos_ecn_decap(uint8_t outer_tos, char *inner_hdr, uint16_t prot); char * mbuf_get_inner_ip(struct rte_mbuf *m, const char *outer, char *inner, - uint16_t *next_prot); + const uint16_t *next_prot); int iptun_eth_hdr_fixup(struct rte_mbuf *m, uint16_t next_prot, uint16_t decap_size); diff --git a/src/json_writer.c b/src/json_writer.c index 5a205c15..e39a3c81 100644 --- a/src/json_writer.c +++ b/src/json_writer.c @@ -4,7 +4,7 @@ * This takes care of the annoying bits of JSON syntax like the commas * after elements * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -235,10 +235,10 @@ void jsonw_bool_field(json_writer_t *self, const char *prop, bool val) jsonw_bool(self, val); } -void jsonw_float_field(json_writer_t *self, const char *prop, double val) +void jsonw_float_field(json_writer_t *self, const char *prop, double num) { jsonw_name(self, prop); - jsonw_float(self, val); + jsonw_float(self, num); } void jsonw_uint_field(json_writer_t *self, const char *prop, uint64_t num) diff --git a/src/l2_rx_fltr.c b/src/l2_rx_fltr.c index d325e9c1..4265e8d9 100644 --- a/src/l2_rx_fltr.c +++ b/src/l2_rx_fltr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -59,7 +59,7 @@ l2_mcfltr_set_hw(struct ifnet *ifp) { struct cds_lfht_iter iter; struct l2_mcfltr_node *l2mf; - struct ether_addr l2mf_addr[L2_MCFLTRHASH_MAX]; + struct rte_ether_addr l2mf_addr[L2_MCFLTRHASH_MAX]; int count = 0; int ret; @@ -108,7 +108,7 @@ l2_mcfltr_set_hw(struct ifnet *ifp) * Add/update an mcast filter table entry */ static void -l2_mcfltr_add_entry(struct ifnet *ifp, const struct ether_addr *dst) +l2_mcfltr_add_entry(struct ifnet *ifp, const struct rte_ether_addr *dst) { struct l2_mcfltr_node *bmf; @@ -191,7 +191,7 @@ l2_rx_fltr_init(struct ifnet *ifp) } static void -l2_mcfltr_del_entry(struct ifnet *ifp, const struct ether_addr *dst) +l2_mcfltr_del_entry(struct ifnet *ifp, const struct rte_ether_addr *dst) { struct l2_mcfltr_node *bmf = l2_mcfltr_node_lookup(ifp, dst); @@ -305,23 +305,23 @@ void l2_rx_fltr_state_change(struct ifnet *ifp) } /* - * Add/update an mcast filter table entry to a bonded slave physical interface + * Add/update an mcast filter table entry to a bonded member physical interface */ static void l2_mcfltr_add_bonded_entry(struct ifnet *ifp, void *grp) { - const struct ether_addr *dst = grp; + const struct rte_ether_addr *dst = grp; l2_mcfltr_add_entry(ifp, dst); } /* - * Delete a mcast filter table entry from a bonded slave physical interface + * Delete a mcast filter table entry from a bonded member physical interface */ static void l2_mcfltr_del_bonded_entry(struct ifnet *ifp, void *grp) { - const struct ether_addr *dst = grp; + const struct rte_ether_addr *dst = grp; l2_mcfltr_del_entry(ifp, dst); } @@ -329,27 +329,35 @@ l2_mcfltr_del_bonded_entry(struct ifnet *ifp, void *grp) /* * Process a netlink add address message */ -void l2_rx_fltr_add_addr(struct ifnet *ifp, const struct ether_addr *dst) +void l2_rx_fltr_add_addr(struct ifnet *ifp, const struct rte_ether_addr *dst) { DP_DEBUG(MULTICAST, INFO, MCAST, "Processing RTM_NEWADDR for %s; MAC address: %s.\n", ifp->if_name, ether_ntoa(dst)); - if (is_multicast_ether_addr(dst)) { + /* + * If unplugged the interface will shortly be removed, so + * don't even update the software state. + */ + if (ifp->unplugged) + return; + + if (rte_is_multicast_ether_addr(dst)) { l2_mcfltr_add_entry(ifp, dst); /* If adding to a vlan also add to parent (real IF) */ if (ifp->if_parent) l2_mcfltr_add_entry(ifp->if_parent, dst); - /* If adding to a bonded IF add to slaves (physical IF) */ + /* If adding to a bonded IF add to members (physical IF) */ if (ifp->if_team) { int err; - struct ether_addr *grp = (struct ether_addr *)dst; - err = lag_walk_bond_slaves + struct rte_ether_addr *grp = + (struct rte_ether_addr *) dst; + err = lag_walk_team_members (ifp, l2_mcfltr_add_bonded_entry, grp); if (err < 0) DP_DEBUG(MULTICAST, INFO, MCAST, - "Failure to insert %s into slave " + "Failure to insert %s into member " "filter tables for %s.\n", ether_ntoa(dst), ifp->if_name); } @@ -362,22 +370,30 @@ void l2_rx_fltr_add_addr(struct ifnet *ifp, const struct ether_addr *dst) /* * Process a delete address netlink message */ -void l2_rx_fltr_del_addr(struct ifnet *ifp, const struct ether_addr *dst) +void l2_rx_fltr_del_addr(struct ifnet *ifp, const struct rte_ether_addr *dst) { DP_DEBUG(MULTICAST, INFO, MCAST, "Processing RTM_DELADDR for %s; MAC address: %s.\n", ifp->if_name, ether_ntoa(dst)); - if (is_multicast_ether_addr(dst)) { - /* If deleting from a bonded IF delete from slaves (phys IF) */ + /* + * If unplugged the interface will shortly be removed, so + * don't even update the software state. + */ + if (ifp->unplugged) + return; + + if (rte_is_multicast_ether_addr(dst)) { + /* If deleting from a bonded IF delete from members (phys IF) */ if (ifp->if_team) { int err; - struct ether_addr *grp = (struct ether_addr *)dst; - err = lag_walk_bond_slaves + struct rte_ether_addr *grp = + (struct rte_ether_addr *) dst; + err = lag_walk_team_members (ifp, l2_mcfltr_del_bonded_entry, grp); if (err < 0) DP_DEBUG(MULTICAST, INFO, MCAST, - "Failure to remove %s from slave " + "Failure to remove %s from member " "filter tables for %s.\n", ether_ntoa(dst), ifp->if_name); } diff --git a/src/l2_rx_fltr.h b/src/l2_rx_fltr.h index b5521bf8..4bc811c4 100644 --- a/src/l2_rx_fltr.h +++ b/src/l2_rx_fltr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -25,7 +25,7 @@ struct l2_mcfltr_node { struct rcu_head l2mf_rcu; /* for deletion via rcu */ struct cds_lfht_node l2mf_node; /* hash table node */ struct ifnet *l2mf_if; /* associated if */ - struct ether_addr l2mf_addr; /* multicast mac address */ + struct rte_ether_addr l2mf_addr; /* multicast mac address */ int16_t l2mf_ref; /* ref count (overloaded 24 bits) */ }; @@ -33,7 +33,7 @@ struct l2_mcfltr_node { #define L2_MCFLTRHASH_BITS 13 static inline unsigned long -l2_mcfltr_node_hash(const struct ether_addr *key) +l2_mcfltr_node_hash(const struct rte_ether_addr *key) { return eth_addr_hash(key, L2_MCFLTRHASH_BITS); } @@ -46,7 +46,7 @@ l2_mcfltr_node_match(struct cds_lfht_node *node, const void *key) = caa_container_of(node, const struct l2_mcfltr_node, l2mf_node); - return ether_addr_equal(&bmf->l2mf_addr, key); + return rte_ether_addr_equal(&bmf->l2mf_addr, key); } /* @@ -55,7 +55,8 @@ l2_mcfltr_node_match(struct cds_lfht_node *node, const void *key) * Look up a mcast filter node for the specified destination. */ static inline struct l2_mcfltr_node * -l2_mcfltr_node_lookup(const struct ifnet *ifp, const struct ether_addr *addr) +l2_mcfltr_node_lookup(const struct ifnet *ifp, + const struct rte_ether_addr *addr) { struct cds_lfht_iter iter; @@ -76,9 +77,9 @@ int l2_rx_fltr_init(struct ifnet *ifp); void l2_rx_fltr_cleanup(struct ifnet *ifp); void l2_rx_fltr_delete_rcu(struct ifnet *ifp); -void l2_rx_fltr_add_addr(struct ifnet *ifp, const struct ether_addr *dst); +void l2_rx_fltr_add_addr(struct ifnet *ifp, const struct rte_ether_addr *dst); -void l2_rx_fltr_del_addr(struct ifnet *ifp, const struct ether_addr *dst); +void l2_rx_fltr_del_addr(struct ifnet *ifp, const struct rte_ether_addr *dst); void l2_rx_fltr_state_change(struct ifnet *ifp); diff --git a/src/l2tp/l2tpeth.h b/src/l2tp/l2tpeth.h index 3c971a79..3ba580aa 100644 --- a/src/l2tp/l2tpeth.h +++ b/src/l2tp/l2tpeth.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2014-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -146,20 +146,20 @@ struct l2tpv3_ip_encap { } __attribute__((packed)); struct l2tpv3_encap { - struct ether_hdr ether_header; + struct rte_ether_hdr ether_header; char iphdr[0]; -} __attribute__((packed)); +} __attribute__((packed)) __attribute__((aligned(2))); typedef void l2tp_iter_func_t(void *, void *arg); struct ifnet *l2tpeth_create(int ifindex, const char *ifname, unsigned int mtu, - const struct ether_addr *addr); + const struct rte_ether_addr *addr); struct l2tp_session *l2tp_session_byid(uint32_t session_id); void l2tp_init_stats(struct l2tp_session *sess); void l2tp_session_walk(l2tp_iter_func_t func, void *arg); void l2tp_tunnel_walk(l2tp_iter_func_t func, void *arg); -void l2tp_output(struct ifnet *ifp, struct rte_mbuf *m, uint16_t rx_vlan); +void l2tp_output(struct ifnet *ifp, struct rte_mbuf *m); void l2tp_stats(const struct l2tp_session *session, struct l2tp_stats *stats); int l2tp_set_xconnect(char *cmd, char *, char*, char *); diff --git a/src/l2tp/l2tpeth_decap.c b/src/l2tp/l2tpeth_decap.c index ee192434..3f38e8d0 100644 --- a/src/l2tp/l2tpeth_decap.c +++ b/src/l2tp/l2tpeth_decap.c @@ -1,7 +1,7 @@ /* * Functions for handling l2tpeth data path operations. * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2014-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -21,16 +21,16 @@ #include #include -#include "bridge.h" -#include "bridge_port.h" #include "ether.h" +#include "if/bridge/bridge.h" +#include "if/bridge/bridge_port.h" #include "if_var.h" #include "in_cksum.h" #include "l2tpeth.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "urcu.h" #include "util.h" -#include "vrf.h" +#include "vrf_internal.h" #define chk_bit(x, m1, m2) (((x) & (m1)) == (m2)) @@ -80,24 +80,25 @@ static inline bool l2tp_seq_after(uint32_t ns, uint32_t olds) */ static void l2tp_decap(struct rte_mbuf *m, uint32_t offset) { - struct ether_hdr *eth = NULL; + struct rte_ether_hdr *eth = NULL; rte_pktmbuf_adj(m, offset); - eth = rte_pktmbuf_mtod(m, struct ether_hdr *); - if (eth->ether_type != htons(ETHER_TYPE_VLAN)) { + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); + if (eth->ether_type != htons(RTE_ETHER_TYPE_VLAN)) { m->ol_flags &= ~PKT_RX_VLAN; return; } /* Adjust vlan information */ - struct vlan_hdr *vh = (struct vlan_hdr *) (eth + 1); + struct rte_vlan_hdr *vh = (struct rte_vlan_hdr *) (eth + 1); m->vlan_tci = ntohs(vh->vlan_tci); m->ol_flags |= PKT_RX_VLAN; - memmove((char *)eth + sizeof(struct vlan_hdr), eth, 2 * ETHER_ADDR_LEN); - rte_pktmbuf_adj(m, sizeof(struct vlan_hdr)); + memmove((char *)eth + sizeof(struct rte_vlan_hdr), + eth, 2 * RTE_ETHER_ADDR_LEN); + rte_pktmbuf_adj(m, sizeof(struct rte_vlan_hdr)); } @@ -139,7 +140,7 @@ static int l2tp_recv_encap(struct rte_mbuf *m, return 1; if (s->peer_cookie_len != 0 - && memcmp(l2tp, s->peer_cookie, s->peer_cookie_len)) { + && memcmp(l2tp, s->peer_cookie, s->peer_cookie_len) != 0) { ++stats->rx_cookie_discards; return -1; /* discard - bad cookie */ } @@ -162,11 +163,11 @@ static int l2tp_recv_encap(struct rte_mbuf *m, if (unlikely(ifp == NULL)) return -1; + pktmbuf_prepare_decap_reswitch(m); l2tp_decap(m, offset); if_incr_in(ifp, m); - pktmbuf_prepare_decap_reswitch(m); - if (rte_pktmbuf_data_len(m) < sizeof(struct ether_hdr)) { + if (rte_pktmbuf_data_len(m) < sizeof(struct rte_ether_hdr)) { if_incr_error(ifp); rte_pktmbuf_free(m); return 0; @@ -265,7 +266,7 @@ int l2tp_udpv4_recv_encap(struct rte_mbuf *m, const struct iphdr *ip, int l2tp_ipv4_recv_encap(struct rte_mbuf *m, const struct iphdr *ip) { - const uint8_t *l2tp = pktmbuf_mtol4(m, const uint8_t *); + const uint8_t *l2tp = dp_pktmbuf_mtol4(m, const uint8_t *); struct l2tp_session *s; vrfid_t vrfid = pktmbuf_get_vrf(m); unsigned int offset; @@ -381,12 +382,13 @@ int l2tp_undo_decap(const struct ifnet *ifp, struct rte_mbuf *m) return -1; uint16_t len = rte_pktmbuf_pkt_len(m) + session->hdr_len; - if (rte_pktmbuf_prepend(m, session->hdr_len + ETHER_HDR_LEN) == NULL) + if (rte_pktmbuf_prepend(m, session->hdr_len + + RTE_ETHER_HDR_LEN) == NULL) return -1; /* fix outer IP length to account for possible trimming */ - struct ether_hdr *eth = ethhdr(m); - if (eth->ether_type == htons(ETHER_TYPE_IPv4)) { + struct rte_ether_hdr *eth = ethhdr(m); + if (eth->ether_type == htons(RTE_ETHER_TYPE_IPV4)) { struct iphdr *ip = (struct iphdr *)(eth + 1); uint16_t ip_len = htons(len); @@ -405,7 +407,7 @@ int l2tp_undo_decap(const struct ifnet *ifp, struct rte_mbuf *m) udp->check = 0; } } - } else if (eth->ether_type == htons(ETHER_TYPE_IPv6)) { + } else if (eth->ether_type == htons(RTE_ETHER_TYPE_IPV6)) { struct ip6_hdr *ip6 = (struct ip6_hdr *)(eth + 1); uint16_t plen = htons(len - sizeof(*ip6)); @@ -481,7 +483,7 @@ int l2tp_undo_decap_br(const struct ifnet *brif, struct rte_mbuf *m) if (ifp->if_parent && l2tp_undo_vlan_decap(m, ifp->if_parent) < 0) return -1; - if (!rte_pktmbuf_prepend(m, s->hdr_len + ETHER_HDR_LEN)) + if (!rte_pktmbuf_prepend(m, s->hdr_len + RTE_ETHER_HDR_LEN)) return -1; return 0; diff --git a/src/l2tp/l2tpeth_dp.c b/src/l2tp/l2tpeth_dp.c index 656f1de7..aef808f8 100644 --- a/src/l2tp/l2tpeth_dp.c +++ b/src/l2tp/l2tpeth_dp.c @@ -1,7 +1,7 @@ /* * L2TPETH Forwarding * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2014-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -30,72 +30,24 @@ #include "ip_funcs.h" #include "l2tpeth.h" #include "netinet6/ip6_funcs.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "urcu.h" #include "vplane_debug.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" #define L2TP_HDR_VER_3 0x0003 -static int -l2tp_undo_encap(struct ifnet *ifp, struct rte_mbuf *m, - struct l2tp_session *session, uint16_t rx_vlan, - bool tx_vlan) -{ - struct ether_hdr *eh; - uint16_t vlan = 0; - - if (tx_vlan) { - struct ether_vlan_hdr *vhdr = (struct ether_vlan_hdr *) - (rte_pktmbuf_mtod(m, char *) + - session->hdr_len + ETHER_HDR_LEN); - eh = (struct ether_hdr *) - ((char *)vhdr + sizeof(struct vlan_hdr)); - - memmove(&vhdr->eh, eh, 2 * ETHER_ADDR_LEN); - vlan = sizeof(struct vlan_hdr); - } else - eh = (struct ether_hdr *) - (rte_pktmbuf_mtod(m, char *) + - session->hdr_len + ETHER_HDR_LEN); - - /* Replace the dest addr to be the shadow if's if we have - replaced the original ether-hdr in routed case. - */ - if (ether_addr_equal(&eh->s_addr, &ifp->eth_addr)) { - const struct ifnet *dp_ifp = ifnet_byport(m->port); - - if (dp_ifp) - ether_addr_copy(&dp_ifp->eth_addr, &eh->d_addr); - else - return -1; - } - - if (rte_pktmbuf_adj(m, session->hdr_len + ETHER_HDR_LEN + vlan) - == NULL) - return -1; - - if (rx_vlan) { - m->ol_flags |= PKT_RX_VLAN; - m->ol_flags &= ~PKT_TX_VLAN_PKT; - m->vlan_tci &= ~VLAN_VID_MASK; - m->vlan_tci |= rx_vlan; - } - - return 0; -} - static int l2tp_add_vlan(struct rte_mbuf *m, uint8_t offset) { struct ether_vlan_hdr *vhdr = (struct ether_vlan_hdr *) (rte_pktmbuf_mtod(m, char *) + offset); - struct ether_hdr *eh = (struct ether_hdr *) - ((char *)vhdr + sizeof(struct vlan_hdr)); + struct rte_ether_hdr *eh = (struct rte_ether_hdr *) + ((char *)vhdr + sizeof(struct rte_vlan_hdr)); - memmove(&vhdr->eh, eh, 2 * ETHER_ADDR_LEN); - vhdr->eh.ether_type = htons(ETHER_TYPE_VLAN); + memmove(&vhdr->eh, eh, 2 * RTE_ETHER_ADDR_LEN); + vhdr->eh.ether_type = htons(RTE_ETHER_TYPE_VLAN); vhdr->vh.vlan_tci = htons(m->vlan_tci); vhdr->vh.eth_proto = eh->ether_type; m->ol_flags &= ~PKT_TX_VLAN_PKT; @@ -105,16 +57,15 @@ static int l2tp_add_vlan(struct rte_mbuf *m, uint8_t offset) /* Send a packet out. */ void -l2tp_output(struct ifnet *ifp, struct rte_mbuf *m, uint16_t rx_vlan) +l2tp_output(struct ifnet *ifp, struct rte_mbuf *m) { uint8_t ip_hdr_len = sizeof(struct iphdr); uint8_t flags = 0; - struct ether_hdr *orig_ethhdr = ethhdr(m); + struct rte_ether_hdr *orig_ethhdr = ethhdr(m); uint16_t etype = ntohs(orig_ethhdr->ether_type); struct iphdr *orig_ip = iphdr(m); struct l2tp_session *session; char *l2tp_hdr; - struct ifnet *orig_ifp = ifp; bool tx_vlan = m->ol_flags & PKT_TX_VLAN_PKT; struct l2tp_softc *sc = rcu_dereference(ifp->if_softc); @@ -130,16 +81,16 @@ l2tp_output(struct ifnet *ifp, struct rte_mbuf *m, uint16_t rx_vlan) goto drop; } - uint8_t encap_len = session->hdr_len + ETHER_HDR_LEN; + uint8_t encap_len = session->hdr_len + RTE_ETHER_HDR_LEN; struct l2tpv3_encap *encap = (struct l2tpv3_encap *) rte_pktmbuf_prepend(m, encap_len + - (tx_vlan ? sizeof(struct vlan_hdr) : 0)); + (tx_vlan ? sizeof(struct rte_vlan_hdr) : 0)); if (unlikely(encap == NULL)) { DP_DEBUG(L2TP, ERR, L2TP, "Not enough space in mbuf to allocate l2tp hdr\n"); goto drop; } - pktmbuf_l2_len(m) = ETHER_HDR_LEN; + dp_pktmbuf_l2_len(m) = RTE_ETHER_HDR_LEN; /* * L2tp interface supports only default VRF as of yet @@ -160,12 +111,12 @@ l2tp_output(struct ifnet *ifp, struct rte_mbuf *m, uint16_t rx_vlan) uint8_t proto = 0; uint8_t offset = 0; - if (etype == ETHER_TYPE_IPv4) { + if (etype == RTE_ETHER_TYPE_IPV4) { tos = orig_ip->tos; ttl = orig_ip->ttl; proto = orig_ip->protocol; offset = sizeof(struct iphdr); - } else if (etype == ETHER_TYPE_IPv6) { + } else if (etype == RTE_ETHER_TYPE_IPV6) { tos = ip6_tclass(*(uint32_t *)orig_ip); ttl = ((struct ip6_hdr *)orig_ip)->ip6_hlim; proto = ((struct ip6_hdr *)orig_ip)->ip6_nxt; @@ -178,7 +129,7 @@ l2tp_output(struct ifnet *ifp, struct rte_mbuf *m, uint16_t rx_vlan) if (flags & L2TP_ENCAP_IPV4) { struct iphdr *ip_header = (struct iphdr *)encap->iphdr; - encap->ether_header.ether_type = htons(ETHER_TYPE_IPv4); + encap->ether_header.ether_type = htons(RTE_ETHER_TYPE_IPV4); ip_header->ihl = sizeof(struct iphdr) >> 2; ip_header->version = IPVERSION; @@ -192,13 +143,12 @@ l2tp_output(struct ifnet *ifp, struct rte_mbuf *m, uint16_t rx_vlan) memcpy(&ip_header->saddr, &session->s_addr, sizeof(uint32_t)); memcpy(&ip_header->daddr, &session->d_addr, sizeof(uint32_t)); - ip_header->check = 0; - ip_header->check = in_cksum_hdr(ip_header); - pktmbuf_l3_len(m) = ip_header->ihl << 2; + dp_set_cksum_hdr(ip_header); + dp_pktmbuf_l3_len(m) = ip_header->ihl << 2; } else { struct ip6_hdr *ip_header = (struct ip6_hdr *)encap->iphdr; - encap->ether_header.ether_type = htons(ETHER_TYPE_IPv6); + encap->ether_header.ether_type = htons(RTE_ETHER_TYPE_IPV6); ip_hdr_len = sizeof(struct ip6_hdr); ip6_ver_tc_flow_hdr(ip_header, tos, 0); @@ -215,9 +165,9 @@ l2tp_output(struct ifnet *ifp, struct rte_mbuf *m, uint16_t rx_vlan) /* udp hdr */ uint16_t *udp_cksum = NULL; - uint16_t orig_cksum; + uint16_t orig_cksum = 0; struct ip6_hdr *ip6hdr = NULL; - struct udp_hdr *udp_header = (struct udp_hdr *) + struct rte_udp_hdr *udp_header = (struct rte_udp_hdr *) ((char *)encap->iphdr + ip_hdr_len); if (unlikely(flags & L2TP_ENCAP_UDP)) { uint16_t pkt_len = session->hdr_len - ip_hdr_len + orig_pkt_len; @@ -229,10 +179,10 @@ l2tp_output(struct ifnet *ifp, struct rte_mbuf *m, uint16_t rx_vlan) udp_header->dgram_cksum = 0; else { if (proto == IPPROTO_TCP) - orig_cksum = ((struct tcp_hdr *) + orig_cksum = ((struct rte_tcp_hdr *) ((char *)orig_ip + offset))->cksum; else if (proto == IPPROTO_UDP) - orig_cksum = ((struct udp_hdr *) + orig_cksum = ((struct rte_udp_hdr *) ((char *)orig_ip + offset))->dgram_cksum; ip6hdr = (struct ip6_hdr *)encap->iphdr; @@ -244,7 +194,8 @@ l2tp_output(struct ifnet *ifp, struct rte_mbuf *m, uint16_t rx_vlan) /* l2tp header */ if (flags & L2TP_ENCAP_UDP) { struct l2tpv3_udp_hdr *v3udp_hdr = (struct l2tpv3_udp_hdr *) - ((char *)encap->iphdr + ip_hdr_len + sizeof(struct udp_hdr)); + ((char *)encap->iphdr + + ip_hdr_len + sizeof(struct rte_udp_hdr)); v3udp_hdr->ver = htons(L2TP_HDR_VER_3); v3udp_hdr->zero = 0; l2tp_hdr = (char *)&v3udp_hdr->session_id; @@ -285,30 +236,13 @@ l2tp_output(struct ifnet *ifp, struct rte_mbuf *m, uint16_t rx_vlan) bool is_ipv4 = flags & L2TP_ENCAP_IPV4; - uint16_t eth_type - = is_ipv4 ? htons(ETHER_TYPE_IPv4) : htons(ETHER_TYPE_IPv6); - struct ifnet *dp_ifp = NULL; - - if (crypto_policy_outbound_match(ifp, &m, eth_type)) { - dp_ifp = ifnet_byport(m->port); - if (unlikely(l2tp_undo_encap(ifp, m, session, - rx_vlan, tx_vlan) < 0)) - goto drop; - - /* - * The following drop must happen in the context - * of a crypto match. Common to outer if, else - * but do not move it outside. - */ - if (!dp_ifp) - goto drop; - } + uint16_t eth_type = is_ipv4 ? htons(RTE_ETHER_TYPE_IPV4) : + htons(RTE_ETHER_TYPE_IPV6); + + if (crypto_policy_check_outbound(ifp, &m, RT_TABLE_MAIN, + eth_type, NULL)) + return; - /* - * Over-ride intf due to crypto match or fallback to - * original interface (e.g. child vif interface) while routing. - */ - ifp = dp_ifp ? dp_ifp : orig_ifp; pktmbuf_prepare_encap_out(m); if_incr_out(ifp, m); if (is_ipv4) diff --git a/src/l2tp/l2tpeth_netlink.c b/src/l2tp/l2tpeth_netlink.c index 44f2d01b..b2d3a1b5 100644 --- a/src/l2tp/l2tpeth_netlink.c +++ b/src/l2tp/l2tpeth_netlink.c @@ -1,7 +1,7 @@ /* * Handle L2TPv3 GeNetlink events * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2014-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -103,9 +103,7 @@ l2tp_session_byid(uint32_t session_id) if (likely(node != NULL)) return caa_container_of(node, struct l2tp_session, session_node); - else - return NULL; - + return NULL; } void l2tp_session_walk(l2tp_iter_func_t func, void *arg) @@ -139,7 +137,7 @@ static void l2tp_xconnect_update(struct ifnet *dpifp, struct ifnet *old_dpifp; old_session->flags |= L2TP_LNS_MODE; - old_dpifp = ifnet_byifindex(old_session->xconnect_ifidx); + old_dpifp = dp_ifnet_byifindex(old_session->xconnect_ifidx); if (old_dpifp) { cross_connect_unlink(old_dpifp, true); cross_connect_unlink(l2tpifp, true); @@ -261,7 +259,7 @@ l2tp_session_set_info(struct l2tp_session *session, uint32_t session_id, hdr_len += 4; session->flags &= ~L2TP_ENCAP_UDP; } else { - hdr_len += 8 + sizeof(struct udp_hdr); + hdr_len += 8 + sizeof(struct rte_udp_hdr); session->flags |= L2TP_ENCAP_UDP; } session->flags |= L2TP_LNS_MODE; @@ -339,7 +337,8 @@ l2tp_genl_session_create_modify(uint32_t tunnel_id, uint32_t session_id, if (old_session) { if (old_session->xconnect_ifidx) l2tp_xconnect_update( - ifnet_byifindex(old_session->xconnect_ifidx), + dp_ifnet_byifindex( + old_session->xconnect_ifidx), old_session, session, session->ifp, @@ -580,7 +579,7 @@ rtnl_process_l2tp_session(const struct nlmsghdr *nlh) */ uint16_t mtu = tb[L2TP_ATTR_MTU] ? - mnl_attr_get_u16(tb[L2TP_ATTR_MTU]) : ETHER_MTU; + mnl_attr_get_u16(tb[L2TP_ATTR_MTU]) : RTE_ETHER_MTU; switch (genlhdr->cmd) { case L2TP_CMD_SESSION_GET: @@ -631,11 +630,11 @@ rtnl_process_l2tp(const struct nlmsghdr *nlh, static struct ifnet * l2tpeth_create_internal(const char *ifname, unsigned int mtu, - const struct ether_addr *addr) + const struct rte_ether_addr *addr) { struct ifnet *ifp; - ifp = if_alloc(ifname, IFT_L2TPETH, mtu, addr, SOCKET_ID_ANY); + ifp = if_alloc(ifname, IFT_L2TPETH, mtu, addr, SOCKET_ID_ANY, NULL); if (!ifp) goto bad; @@ -652,7 +651,7 @@ l2tpeth_attach_session(const char *ifname, struct l2tp_session *session, { struct ifnet *ifp; - ifp = ifnet_byifname(ifname); + ifp = dp_ifnet_byifname(ifname); if (!ifp) { ifp = l2tpeth_create_internal(ifname, mtu, NULL); if (!ifp) @@ -670,7 +669,7 @@ l2tpeth_attach_session(const char *ifname, struct l2tp_session *session, } static struct ifnet * -l2tpeth_reuse(struct ifnet *ifp, const struct ether_addr *addr) +l2tpeth_reuse(struct ifnet *ifp, const struct rte_ether_addr *addr) { if (ifp->if_type != IFT_L2TPETH) { RTE_LOG(ERR, L2TP, "mismatch type for %s\n", ifp->if_name); @@ -681,14 +680,14 @@ l2tpeth_reuse(struct ifnet *ifp, const struct ether_addr *addr) if_unset_ifindex(ifp); /* if_set_ifindex does this. */ - ether_addr_copy(addr, &ifp->eth_addr); + rte_ether_addr_copy(addr, &ifp->eth_addr); return ifp; } struct ifnet * l2tpeth_create(int ifindex, const char *ifname, unsigned int mtu, - const struct ether_addr *addr) + const struct rte_ether_addr *addr) { struct ifnet *ifp; @@ -698,7 +697,7 @@ l2tpeth_create(int ifindex, const char *ifname, unsigned int mtu, } /* Try to reuse an existing interface */ - ifp = ifnet_byifname(ifname); + ifp = dp_ifnet_byifname(ifname); if (ifp) ifp = l2tpeth_reuse(ifp, addr); else @@ -712,7 +711,7 @@ l2tpeth_create(int ifindex, const char *ifname, unsigned int mtu, return ifp; } -static int l2tpeth_if_init(struct ifnet *ifp) +static int l2tpeth_if_init(struct ifnet *ifp, void *ctx __unused) { struct l2tp_softc *sc = NULL; @@ -800,7 +799,7 @@ void l2tp_init_stats(struct l2tp_session *session) int l2tp_set_xconnect(char *cmd, char *dpifname, char *l2tpifname, char *ttl) { - struct ifnet *dpifp = ifnet_byifname(dpifname); + struct ifnet *dpifp = dp_ifnet_byifname(dpifname); if (unlikely(dpifp == NULL)) { RTE_LOG(ERR, L2TP, @@ -809,7 +808,7 @@ int l2tp_set_xconnect(char *cmd, char *dpifname, char *l2tpifname, char *ttl) return -1; } - struct ifnet *l2tpifp = ifnet_byifname(l2tpifname); + struct ifnet *l2tpifp = dp_ifnet_byifname(l2tpifname); if (unlikely(l2tpifp == NULL)) { RTE_LOG(ERR, L2TP, diff --git a/src/lag.c b/src/lag.c index 218d8663..eeafbccf 100644 --- a/src/lag.c +++ b/src/lag.c @@ -1,712 +1,312 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2013-2016 by Brocade Communications Systems, Inc. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include +#include #include -#include -#include -#include - -#include "compiler.h" -#include "capture.h" -#include "compat.h" -#include "ether.h" + +#include "dp_event.h" +#include "if/dpdk-eth/dpdk_eth_if.h" #include "if_var.h" -#include "json_writer.h" #include "lag.h" -#include "main.h" -#include "master.h" -#include "netlink.h" -#include "pktmbuf.h" -#include "urcu.h" -#include "util.h" +#include "protobuf.h" +#include "protobuf/LAGConfig.pb-c.h" #include "vplane_debug.h" -#include "vplane_log.h" - -struct nlattr; -/* remember which slaves are collecting/distributing */ -static uint8_t enabled[LAG_MAX_SLAVES]; +static const struct lag_ops *current_lag_ops; struct ifnet *ifnet_byteam(int ifindex) { - struct ifnet *ifp = ifnet_byifindex(ifindex); + struct ifnet *ifp = dp_ifnet_byifindex(ifindex); if (ifp && ifp->if_team) return ifp; return NULL; } -static void lacp_recv_cb(portid_t slave_id, struct rte_mbuf *lacp_pkt) +int lag_etype_slow_tx(struct ifnet *team, struct ifnet *ifp, + struct rte_mbuf *lacp_pkt) { - struct ifnet *ifp = ifnet_byport(slave_id); - - if (unlikely(ifp == NULL)) { - rte_pktmbuf_free(lacp_pkt); - return; - } - - pktmbuf_mdata_clear_all(lacp_pkt); - - /* local packet capture */ - if (ifp->capturing) - capture_burst(ifp, &lacp_pkt, 1); - - local_packet(ifp, lacp_pkt); + return current_lag_ops->lagop_etype_slow_tx(team, ifp, lacp_pkt); } -/* - * outgoing ether type slow traffic has special handling: - * - capture via dataplane slave - * - send via rte_pmd_bond master - */ -int lag_etype_slow_tx(struct ifnet *master, struct ifnet *ifp, - struct rte_mbuf *lacp_pkt) +void lag_member_sync_mac_address(struct ifnet *ifp) { - if (ifp->capturing) - capture_burst(ifp, &lacp_pkt, 1); - - return rte_eth_bond_8023ad_ext_slowtx(master->if_port, ifp->if_port, - lacp_pkt); + current_lag_ops->lagop_member_sync_mac_address(ifp); } -/* - * The rte_pmd_bond might change MAC address of a slave port on certain events. - * This helper tries to update the MAC address of the DPDK device from the - * dataplane ifnet structure. - */ -void lag_slave_sync_mac_address(struct ifnet *ifp) +struct ifnet *lag_create(const struct ifinfomsg *ifi, struct nlattr *tb[]) { - struct ether_addr hwaddr; - char buf1[32], buf2[32]; - - rte_eth_macaddr_get(ifp->if_port, &hwaddr); - if (ether_addr_equal(&ifp->eth_addr, &hwaddr)) - return; - - DP_DEBUG(LAG, DEBUG, DATAPLANE, "%s updating MAC from %s to %s\n", - ifp->if_name, ether_ntoa_r(&hwaddr, buf2), - ether_ntoa_r(&ifp->eth_addr, buf1)); - - int rc = rte_eth_dev_default_mac_addr_set(ifp->if_port, &ifp->eth_addr); - if (rc) { - /* - * If updating the slave's address fails lets update the - * dataplane's address to be in sync! - */ - DP_DEBUG(LAG, ERR, DATAPLANE, "%s can't set address %s: %s\n", - ifp->if_name, ether_ntoa_r(&ifp->eth_addr, buf1), - strerror(-rc)); - ifp->eth_addr = hwaddr; - } + return current_lag_ops->lagop_create(ifi, tb); } -struct ifnet *lag_create(const struct ifinfomsg *ifi, struct nlattr *tb[]) +int lag_mode_set_balance(struct ifnet *ifp) { - int port_id; - const char *ifname; - struct ether_addr *macaddr = NULL; - uint8_t i; - struct ifnet *ifp; - char bond_name[RTE_ETH_NAME_MAX_LEN]; - int len; - - if (tb[IFLA_ADDRESS]) { - size_t addrlen = mnl_attr_get_payload_len(tb[IFLA_ADDRESS]); - - if (addrlen != ETHER_ADDR_LEN) - return NULL; - macaddr = mnl_attr_get_payload(tb[IFLA_ADDRESS]); - } - if (macaddr == NULL) - return NULL; - - if (tb[IFLA_IFNAME]) - ifname = mnl_attr_get_str(tb[IFLA_IFNAME]); - else - return NULL; - - struct rte_eth_dev *eth_dev = NULL; - - /* bond device name must start with "net_bonding" */ - len = snprintf(bond_name, sizeof(bond_name), "%s%s", - BOND_DEV_NAME, ifname); - if (len < 0 || len >= (int)sizeof(bond_name)) - return NULL; - - for (i = 0; i < DATAPLANE_MAX_PORTS; i++) { - if (!rte_eth_dev_is_valid_port(i)) - continue; - if (strcmp(rte_eth_devices[i].data->name, bond_name) == 0) { - eth_dev = &rte_eth_devices[i]; - break; - } - } - - if (eth_dev) { - /* teamd was restarted. Tell controller about new ifindex */ - port_id = eth_dev->data->port_id; - setup_interface_portid(port_id); - } else { - port_id = rte_eth_bond_create(bond_name, - BONDING_MODE_ACTIVE_BACKUP, - rte_socket_id()); - } - if (port_id < 0) - return NULL; - - rte_eth_bond_mac_address_set(port_id, macaddr); - - if (!eth_dev) { - if (insert_port(port_id) != 0) - return NULL; - } - - ifp = ifport_table[port_id]; - if (!ifp) - return NULL; - - ifp->if_flags = ifi->ifi_flags; - ifp->eth_addr = *macaddr; - - if_set_ifindex(ifp, ifi->ifi_index); - ifp->if_team = 1; - return ifp; + return current_lag_ops->lagop_mode_set_balance(ifp); } -static int slave_add(struct ifnet *master, struct ifnet *ifp) +int lag_mode_set_activebackup(struct ifnet *ifp) { - int rv; - struct rte_eth_dev_info slave_info, master_info; - - if (ifp->aggregator) { - DP_DEBUG(LAG, ERR, DATAPLANE, - "%s already slave of %s\n", ifp->if_name, - ifp->aggregator->if_name); - return -EEXIST; - } - - rte_eth_dev_info_get(master->if_port, &master_info); - rte_eth_dev_info_get(ifp->if_port, &slave_info); - - /* Ignore VMDQ information since we know that the BOND pmd - * will never have support for VMDQ and thus provides a - * reasonable upper bound. - */ - - if (slave_info.max_rx_queues < master_info.nb_rx_queues || - slave_info.max_tx_queues < master_info.nb_tx_queues) { - struct rte_eth_dev *master_dev = - &rte_eth_devices[master->if_port]; - int master_dev_started = master_dev->data->dev_started; - int nb_rx_queues = - MIN(slave_info.max_rx_queues, master_info.nb_rx_queues); - int nb_tx_queues = - MIN(slave_info.max_tx_queues, master_info.nb_tx_queues); - - if (master_dev_started) - stop_port(master->if_port); - rv = reconfigure_queues(master->if_port, - nb_rx_queues, nb_tx_queues); - if (rv) - return rv; - if (master_dev_started) - start_port(master->if_port, master->if_flags); - } - - /* - * Queues are assigned again by start_port() call in - * slave_remove() - */ - if_disable_poll_rcu(ifp->if_port); - if (ifp->if_flags & IFF_UP) - unassign_queues(ifp->if_port); - - rv = rte_eth_bond_slave_add(master->if_port, ifp->if_port); - if (rv < 0) { - if (ifp->if_flags & IFF_UP) - assign_queues(ifp->if_port); - if_enable_poll(ifp->if_port); - return rv; - } - /* - * internals is accessed in the forwarding threads. We stop them - * while we update this, but since there isn't a lock, there isn't a - * barrier to ensure that these updates are visible on the other - * lcores before we resume them. - */ - rte_smp_mb(); - - rcu_assign_pointer(ifp->aggregator, master); - - return 0; + return current_lag_ops->lagop_mode_set_activebackup(ifp); } -/* - * Assumes that polling is turned off on master interface, so that - * there's no race with an in-progress rx. - */ -static int slave_remove(struct ifnet *master, struct ifnet *ifp) +int lag_select(struct ifnet *ifp, bool enable) { - int rv; - - if (!ifp->aggregator) - return -ENOENT; - - rv = rte_eth_bond_slave_remove(master->if_port, ifp->if_port); - if (rv < 0) - return rv; - /* - * internals is accessed in the forwarding threads. We stop them - * while we update this, but since there isn't a lock, there isn't a - * barrier to ensure that these updates are visible on the other - * lcores before we resume them. - */ - rte_smp_mb(); - - /* clear RCU protected aggregator pointer */ - ifp->aggregator = NULL; - - /* - * Force the port to be stopped since it will have been - * started by bond if not already and there's no guarantee - * that our state is consistent with the DPDK state now. - */ - force_stop_port(ifp->if_port); - - /* enable any queues released by the bonding driver */ - rv = eth_port_config(ifp->if_port); - if (rv < 0) - return rv; - - /* - * There's no guarantee what order the slave removal team - * message and link update rtnl messages will arrive in, so - * attempt to start the port here. If it still has the - * IFF_SLAVE flag set then it won't be started here and - * instead it'll be started when the link update rtnl message - * removing that flag subsequently arrives. - */ - if (ifp->if_flags & IFF_UP) - start_port(ifp->if_port, ifp->if_flags); - if_enable_poll(ifp->if_port); - - return rv; + return current_lag_ops->lagop_select(ifp, enable); } -int lag_mode_set_balance(struct ifnet *ifp) +int lag_set_member_usable(struct ifnet *ifp, bool usable) { - struct rte_eth_bond_8023ad_conf conf; - int rv; - int mode = rte_eth_bond_mode_get(ifp->if_port); - - if (mode == BONDING_MODE_8023AD) - return 0; - - /* get default configuration */ - rv = rte_eth_bond_8023ad_setup(ifp->if_port, NULL); - if (rv < 0) - return rv; - - rv = rte_eth_bond_8023ad_conf_get(ifp->if_port, &conf); - if (rv < 0) - return rv; - - conf.slowrx_cb = lacp_recv_cb; - - rv = rte_eth_bond_8023ad_setup(ifp->if_port, &conf); - if (rv < 0) - return rv; - - struct rte_eth_dev *dev = &rte_eth_devices[ifp->if_port]; - uint8_t dev_started = dev->data->dev_started; - - if (dev_started) - rte_eth_dev_stop(ifp->if_port); - - rv = rte_eth_bond_mode_set(ifp->if_port, BONDING_MODE_8023AD); - if (rv < 0) - return rv; - - if (dev_started) - rte_eth_dev_start(ifp->if_port); - - - rte_eth_bond_xmit_policy_set(ifp->if_port, BALANCE_XMIT_POLICY_LAYER34); + if (current_lag_ops->lagop_set_member_usable) + return current_lag_ops->lagop_set_member_usable(ifp, usable); return 0; } -int lag_mode_set_activebackup(struct ifnet *ifp) +int lag_set_activeport(struct ifnet *ifp, struct ifnet *ifp_member) { - struct rte_eth_dev *dev = &rte_eth_devices[ifp->if_port]; - uint8_t dev_started = dev->data->dev_started; - int rv; - - if (dev_started) - rte_eth_dev_stop(ifp->if_port); - - rv = rte_eth_bond_mode_set(ifp->if_port, BONDING_MODE_ACTIVE_BACKUP); - if (rv < 0) - return rv; - - if (dev_started) - rte_eth_dev_start(ifp->if_port); - - return rv; + return current_lag_ops->lagop_set_activeport(ifp, ifp_member); } -int lag_select(struct ifnet *ifp, int sel) +void lag_nl_team_delete(const struct ifinfomsg *ifi __unused, + struct ifnet *team_ifp) { - if (ifp->aggregator == NULL) - return -1; - - DP_DEBUG(LAG, DEBUG, DATAPLANE, - "teamd runner %sselected ifindex %d:%s (port %u)\n", - sel ? "" : "de", ifp->if_index, ifp->if_name, ifp->if_port); - - int mode = rte_eth_bond_mode_get(ifp->aggregator->if_port); - - enabled[ifp->if_port] = !!sel; - - if (mode == BONDING_MODE_ACTIVE_BACKUP) - return 0; - - if (rte_eth_bond_8023ad_ext_collect(ifp->aggregator->if_port, - ifp->if_port, - enabled[ifp->if_port])) { - DP_DEBUG(LAG, ERR, DATAPLANE, "cannot set collecting flag\n"); - return -1; - } + return current_lag_ops->lagop_delete(team_ifp); +} - if (rte_eth_bond_8023ad_ext_distrib(ifp->aggregator->if_port, - ifp->if_port, - enabled[ifp->if_port])) { - DP_DEBUG(LAG, ERR, DATAPLANE, "cannot set distributing flag\n"); - return -1; - } +bool lag_can_start(const struct ifnet *ifp) +{ + if (!ifp->if_team) + return true; - return 0; + return current_lag_ops->lagop_can_start(ifp); } -int lag_activeport(struct ifnet *ifp, struct ifnet *ifp_slave) +int lag_member_add(struct ifnet *team, struct ifnet *ifp) { - DP_DEBUG(LAG, DEBUG, DATAPLANE, - "teamd runner %s activeport ifindex %d:%s (port %u)\n", - ifp->if_name, ifp_slave->if_index, ifp_slave->if_name, - ifp_slave->if_port); - - int mode = rte_eth_bond_mode_get(ifp->if_port); + int rc; - if (mode == BONDING_MODE_ACTIVE_BACKUP) - rte_eth_bond_primary_set(ifp->if_port, ifp_slave->if_port); + rc = current_lag_ops->lagop_member_add(team, ifp); + if (!rc) + dp_event(DP_EVT_IF_LAG_ADD_MEMBER, 0, team, 0, 0, ifp); - return 0; + return rc; } -/* Remove an aggregation. team%d interface went away. */ -void lag_nl_master_delete(const struct ifinfomsg *ifi __unused, - struct ifnet *master_ifp) +int lag_member_delete(struct ifnet *team, struct ifnet *ifp) { - /* librte_pmd_bond doesn't allow removal of interfaces, - * so just delete evidence of team interface. - */ - teardown_interface_portid(master_ifp->if_port); - if_unset_ifindex(master_ifp); + dp_event(DP_EVT_IF_LAG_DELETE_MEMBER, 0, team, 0, 0, ifp); + return current_lag_ops->lagop_member_delete(team, ifp); } -/* - * Returns the number of slaves associated with this bonding interface. - * - * If not a bonding interface, return -1. - */ -int slave_count(const struct ifnet *ifp) +/* Add interface to an aggregation or update an existing member interface */ +int lag_nl_member_update(const struct ifinfomsg *ifi, + struct ifnet *ifp, struct ifnet *team) { - portid_t slaves[LAG_MAX_SLAVES]; + return current_lag_ops->lagop_nl_member_update(ifi, ifp, team); +} - if (!ifp->if_team) - return -1; +void lag_refresh_actor_state(struct ifnet *team) +{ + return current_lag_ops->lagop_refresh_actor_state(team); +} - return rte_eth_bond_slaves_get(ifp->if_port, slaves, - LAG_MAX_SLAVES); +static void show_lag(struct ifnet *ifp, void *arg) +{ + if (is_team(ifp)) + current_lag_ops->lagop_show_detail(ifp, arg); } -int lag_slave_add(struct ifnet *master, struct ifnet *ifp) +int lag_summary(FILE *fp) { - int count, rv; + json_writer_t *wr = jsonw_new(fp); - count = slave_count(master); - if (count < 0) - return -EINVAL; + if (!wr) + return -1; - /* access to bonding "internals" structure is not thread-safe */ - if_disable_poll_rcu(master->if_port); + jsonw_pretty(wr, true); + jsonw_name(wr, "lag"); + jsonw_start_array(wr); - rv = slave_add(master, ifp); - if (rv < 0) - goto out; + dp_ifnet_walk(show_lag, wr); - /* We just added the first port, so we might need to finally - * start_port() if this interface is currently IFF_UP. - */ - if (count == 0 && master->if_flags & IFF_UP) - start_port(master->if_port, master->if_flags); + jsonw_end_array(wr); + jsonw_destroy(&wr); -out: - if_enable_poll(master->if_port); - return rv; + return 0; } -int lag_slave_delete(struct ifnet *master, struct ifnet *ifp) +int +lag_walk_team_members(struct ifnet *ifp, dp_ifnet_iter_func_t iter_func, + void *arg) { - portid_t slaves[LAG_MAX_SLAVES]; - int count, rv; - - count = rte_eth_bond_slaves_get(master->if_port, slaves, - LAG_MAX_SLAVES); - if (count < 0) - return -EINVAL; - - /* access to bonding "internals" structure is not thread-safe */ - if_disable_poll_rcu(master->if_port); - - rv = slave_remove(master, ifp); - if (rv < 0) - goto out; + return current_lag_ops->lagop_walk_team_members(ifp, iter_func, arg); +} - /* we just remove the last port, so lets stop polling */ - if (count == 1) { - stop_port(master->if_port); - return rv; - } +bool +lag_is_team(struct ifnet *ifp) +{ + if (ifp->if_type != IFT_ETHER) + return false; -out: - if_enable_poll(master->if_port); - return rv; + return current_lag_ops->lagop_is_team(ifp); } -bool lag_port_is_slave(struct ifnet *master, struct ifnet *ifp) +bool +lag_port_is_member(struct ifnet *ifp) { - portid_t slaves[LAG_MAX_SLAVES]; - int count, i; + if (current_lag_ops->lagop_port_is_member) + return current_lag_ops->lagop_port_is_member(ifp); - count = rte_eth_bond_slaves_get(master->if_port, slaves, - LAG_MAX_SLAVES); - - for (i = 0; i < count; i++) - if (slaves[i] == ifp->if_port) - return true; return false; } -/* Add interface to an aggregation or update an already enslaved interface */ -int lag_nl_slave_update(const struct ifinfomsg *ifi, struct ifnet *ifp, - struct ifnet *master) +int lag_can_startstop_member(struct ifnet *ifp) { - if (ifi == NULL || ifp == NULL) - return -1; + return current_lag_ops->lagop_can_startstop_member(ifp); +} - if ((!ifp->aggregator && master) || (ifp->aggregator && !master)) { - /* master was either set or cleared */ - lag_slave_sync_mac_address(ifp); - } else { - /* if link up, restore collect/dist flags */ - if (ifi->ifi_flags & IFF_RUNNING) { - lag_select(ifp, enabled[ifp->if_port]); - lag_slave_sync_mac_address(ifp); - } - } +int lag_set_l2_address(struct ifnet *ifp, struct rte_ether_addr *macaddr) +{ + return current_lag_ops->lagop_set_l2_address(ifp, macaddr); +} - return 0; +int lag_min_links(struct ifnet *ifp, uint16_t *min_links) +{ + if (current_lag_ops->lagop_min_links) + return current_lag_ops->lagop_min_links(ifp, min_links); + return -ENOTSUP; } -void lag_refresh_actor_state(struct ifnet *master) +int lag_set_min_links(struct ifnet *ifp, uint16_t min_links) { - portid_t slaves[LAG_MAX_SLAVES]; - int count, i; + if (current_lag_ops->lagop_set_min_links) + return current_lag_ops->lagop_set_min_links(ifp, min_links); + return -ENOTSUP; +} - count = rte_eth_bond_slaves_get(master->if_port, slaves, - LAG_MAX_SLAVES); +fal_object_t dp_ifnet_fal_lag_member(const struct ifnet *ifp) +{ + struct dpdk_eth_if_softc *member_sc; - for (i = 0; i < count; i++) - lag_select(ifport_table[slaves[i]], enabled[slaves[i]]); -} + if (ifp->if_type != IFT_ETHER) + return FAL_NULL_OBJECT_ID; -static const char * const bonding_modes[] = { - [BONDING_MODE_ROUND_ROBIN] = "Round Robin", - [BONDING_MODE_ACTIVE_BACKUP] = "Active-Backup", - [BONDING_MODE_BALANCE] = "Balanced", - [BONDING_MODE_BROADCAST] = "Broadcast", - [BONDING_MODE_8023AD] = "802.3AD", - [BONDING_MODE_TLB] = "Adaptive Transmit", - [BONDING_MODE_ALB] = "Adaptive Load Balance", -}; + if (!ifp->aggregator) + return FAL_NULL_OBJECT_ID; -static const char * const policy_names[] = { - [BALANCE_XMIT_POLICY_LAYER2] = "BALANCE_XMIT_POLICY_LAYER2", - [BALANCE_XMIT_POLICY_LAYER23] = "BALANCE_XMIT_POLICY_LAYER23", - [BALANCE_XMIT_POLICY_LAYER34] = "BALANCE_XMIT_POLICY_LAYER34" -}; + member_sc = ifp->if_softc; + if (!member_sc->scd_fal_lag_member_created) + return FAL_NULL_OBJECT_ID; + return member_sc->scd_fal_lag_member_obj; +} -static bool lag_slave_is_active(portid_t active[], int len, uint16_t portid) +static void lag_init(void) { - int i; - - for (i = 0; i < len; i++) - if (active[i] == portid) - return true; - return false; + if (platform_cfg.hardware_lag) + current_lag_ops = &fal_lag_ops; + else + current_lag_ops = &dpdk_lag_ops; } -static void show_lag_detail(struct ifnet *node, void *arg) +static const struct dp_event_ops lag_events = { + .init = lag_init, +}; + +DP_STARTUP_EVENT_REGISTER(lag_events); + +static int +lag_pb_create_handler(LAGConfig__LagCreate *lag_create) { - json_writer_t *wr = arg; - int num_slaves; - int num_active; - int i; - int primary = rte_eth_bond_primary_get(node->if_port); - int mode = rte_eth_bond_mode_get(node->if_port); - int policy = rte_eth_bond_xmit_policy_get(node->if_port); - const char *policy_str = "n/a"; - portid_t slaves[LAG_MAX_SLAVES]; - portid_t active[LAG_MAX_SLAVES]; - - jsonw_start_object(wr); - jsonw_string_field(wr, "ifname", node->if_name); - jsonw_uint_field(wr, "teamdev", - node->if_team ? node->if_index : 0); - jsonw_bool_field(wr, "lacp", !!(mode == BONDING_MODE_8023AD)); - jsonw_string_field(wr, "mode", - mode >= 0 ? bonding_modes[mode] : "Unknown"); - - if (mode == BONDING_MODE_8023AD && policy >= 0 && - policy < (int)ARRAY_SIZE(policy_names)) - policy_str = policy_names[policy]; - jsonw_string_field(wr, "hash", policy_str); - - num_active = rte_eth_bond_active_slaves_get(node->if_port, - active, - LAG_MAX_SLAVES); - num_slaves = rte_eth_bond_slaves_get(node->if_port, slaves, - LAG_MAX_SLAVES); - jsonw_name(wr, "slaves"); - jsonw_start_array(wr); - for (i = 0; i < num_slaves; i++) { - struct ifnet *sl = ifnet_byport(slaves[i]); - struct rte_eth_bond_8023ad_slave_info info; - int rc; - - if (!sl) - continue; - - bool is_primary = primary == sl->if_port; - bool is_active = lag_slave_is_active(active, num_active, - sl->if_port); - jsonw_start_object(wr); - jsonw_string_field(wr, "ifname", sl->if_name); - jsonw_bool_field(wr, "primary", is_primary); - jsonw_bool_field(wr, "active", is_active); - if (mode == BONDING_MODE_8023AD) { - rc = rte_eth_bond_8023ad_slave_info(node->if_port, - sl->if_port, &info); - if (rc == 0) { - jsonw_name(wr, "802-3ad"); - jsonw_start_array(wr); - jsonw_start_object(wr); - jsonw_int_field(wr, "selected", - info.selected); - jsonw_int_field(wr, "actor-state", - info.actor_state); - jsonw_int_field(wr, "partner-state", - info.partner_state); - jsonw_int_field(wr, "agg-port-id", - info.agg_port_id); - jsonw_end_object(wr); - jsonw_end_array(wr); - } - } + struct ifnet *ifp; + + if (!lag_create->ifname) + return -EINVAL; - jsonw_end_object(wr); + ifp = dp_ifnet_byifname(lag_create->ifname); + if (!ifp) { + /* We will get another update when + * the interface eventually appears. + */ + return 0; } - jsonw_end_array(wr); - jsonw_end_object(wr); -} -static void show_lag(struct ifnet *ifp, void *arg) -{ - if (is_team(ifp)) - show_lag_detail(ifp, arg); -} + if (lag_create->has_minimum_links) { + uint16_t minimum_links; + int ret; -int lag_summary(FILE *fp) -{ - json_writer_t *wr = jsonw_new(fp); + if (lag_create->minimum_links > UINT16_MAX) + return -EINVAL; - if (!wr) - return -1; + ret = lag_min_links(ifp, &minimum_links); + if (ret == -ENOTSUP) + return 0; - jsonw_pretty(wr, true); - jsonw_name(wr, "lag"); - jsonw_start_array(wr); + /* If min links was never set, lag_min_links will fail, + * but this isn't a problem. + */ + if (ret == -EINVAL) { + minimum_links = 0; + ret = 0; + } - ifnet_walk(show_lag, wr); + if (ret < 0) { + RTE_LOG(ERR, DATAPLANE, "%s: lag_min_links failed: %d\n", + __func__, ret); + return ret; + } - jsonw_end_array(wr); - jsonw_destroy(&wr); + if (lag_create->minimum_links != minimum_links) { + lag_set_min_links(ifp, lag_create->minimum_links); + dp_event(DP_EVT_IF_LAG_CHANGE, 0, ifp, + DP_IF_LAG_EVENT_MIN_LINKS_CHANGE, 0, NULL); + } + } return 0; } -void -lag_set_phy_qinq_mtu_slave(struct ifnet *sl, void *arg) +static int +lag_pb_delete_handler(LAGConfig__LagDelete *lag_delete) { - if (!sl || arg) - return; + if (!lag_delete->ifname) + return -EINVAL; - if_set_mtu(sl, sl->if_mtu, true); + return 0; } -int -lag_walk_bond_slaves(struct ifnet *ifp, ifnet_iter_func_t iter_func, void *arg) +static int +lag_pb_handler(struct pb_msg *msg) { - int num_slaves; - portid_t slaves[LAG_MAX_SLAVES]; - int i; - - if (!ifp->if_team || !iter_func) - return -EINVAL; - - num_slaves = rte_eth_bond_slaves_get(ifp->if_port, slaves, - LAG_MAX_SLAVES); - if (num_slaves < 0) - return -EINVAL; - - for (i = 0; i < num_slaves; i++) { - struct ifnet *sl = ifnet_byport(slaves[i]); - - if (sl) - (iter_func)(sl, arg); + LAGConfig *lag = lagconfig__unpack(NULL, msg->msg_len, msg->msg); + int ret; + + switch (lag->mtype_case) { + case LAGCONFIG__MTYPE_LAG_CREATE: + ret = lag_pb_create_handler(lag->lag_create); + break; + case LAGCONFIG__MTYPE_LAG_DELETE: + ret = lag_pb_delete_handler(lag->lag_delete); + break; + default: + RTE_LOG(ERR, DATAPLANE, "unhandled LAG message type %d\n", + lag->mtype_case); + ret = 0; } - return 0; + lagconfig__free_unpacked(lag, NULL); + return ret; } + +PB_REGISTER_CMD(lag_create_cmd) = { + .cmd = "vyatta:lag", + .handler = lag_pb_handler, +}; + diff --git a/src/lag.h b/src/lag.h index 08c919f7..475ecfee 100644 --- a/src/lag.h +++ b/src/lag.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * @@ -15,29 +15,68 @@ struct rte_mbuf; -#define BOND_DEV_NAME "net_bonding" -#define LAG_MAX_SLAVES RTE_MAX_ETHPORTS +#define LAG_MAX_MEMBERS RTE_MAX_ETHPORTS + +struct lag_ops { + int (*lagop_etype_slow_tx)(struct ifnet *team, struct ifnet *ifp, + struct rte_mbuf *lacp_pkt); + struct ifnet *(*lagop_create)(const struct ifinfomsg *ifi, + struct nlattr *tb[]); + int (*lagop_member_add)(struct ifnet *team, struct ifnet *ifp); + int (*lagop_member_delete)(struct ifnet *team, struct ifnet *ifp); + void (*lagop_delete)(struct ifnet *ifp); + int (*lagop_nl_member_update)(const struct ifinfomsg *ifi, + struct ifnet *ifp, + struct ifnet *team); + int (*lagop_mode_set_balance)(struct ifnet *ifp); + int (*lagop_mode_set_activebackup)(struct ifnet *ifp); + int (*lagop_select)(struct ifnet *ifp, bool sel); + int (*lagop_set_member_usable)(struct ifnet *ifp, bool usable); + int (*lagop_set_activeport)(struct ifnet *ifp, + struct ifnet *ifp_member); + void (*lagop_refresh_actor_state)(struct ifnet *team); + void (*lagop_show_detail)(struct ifnet *ifp, json_writer_t *wr); + void (*lagop_member_sync_mac_address)(struct ifnet *ifp); + int (*lagop_walk_team_members)(struct ifnet *ifp, + dp_ifnet_iter_func_t func, void *arg); + bool (*lagop_can_start)(const struct ifnet *ifp); + bool (*lagop_port_is_member)(struct ifnet *ifp); + bool (*lagop_is_team)(struct ifnet *ifp); + bool (*lagop_can_startstop_member)(struct ifnet *ifp); + int (*lagop_set_l2_address)(struct ifnet *ifp, + struct rte_ether_addr *macaddr); + int (*lagop_min_links)(struct ifnet *ifp, uint16_t *min_links); + int (*lagop_set_min_links)(struct ifnet *ifp, uint16_t min_links); +}; + +extern const struct lag_ops dpdk_lag_ops; +extern const struct lag_ops fal_lag_ops; struct ifnet *ifnet_byteam(int ifindex); -int lag_etype_slow_tx(struct ifnet *master, struct ifnet *ifp, +int lag_etype_slow_tx(struct ifnet *team, struct ifnet *ifp, struct rte_mbuf *lacp_pkt); struct ifnet *lag_create(const struct ifinfomsg *ifi, struct nlattr *tb[]); -int lag_slave_add(struct ifnet *master, struct ifnet *ifp); -int lag_slave_delete(struct ifnet *master, struct ifnet *ifp); -int lag_lsupdate(struct ifnet *ifp); -void lag_nl_master_delete(const struct ifinfomsg *ifi, struct ifnet *ifp); -int lag_nl_slave_update(const struct ifinfomsg *ifi, struct ifnet *ifp, - struct ifnet *master); +int lag_member_add(struct ifnet *team, struct ifnet *ifp); +int lag_member_delete(struct ifnet *team, struct ifnet *ifp); +void lag_nl_team_delete(const struct ifinfomsg *ifi, struct ifnet *ifp); +int lag_nl_member_update(const struct ifinfomsg *ifi, struct ifnet *ifp, + struct ifnet *team); int lag_mode_set_balance(struct ifnet *ifp); int lag_mode_set_activebackup(struct ifnet *ifp); -int lag_select(struct ifnet *ifp, int sel); -int lag_activeport(struct ifnet *ifp, struct ifnet *ifp_slave); -void lag_refresh_actor_state(struct ifnet *master); +int lag_select(struct ifnet *ifp, bool enable); +int lag_set_member_usable(struct ifnet *ifp, bool usable); +int lag_set_activeport(struct ifnet *ifp, struct ifnet *ifp_member); +void lag_refresh_actor_state(struct ifnet *team); int lag_summary(FILE *fp); -void lag_slave_sync_mac_address(struct ifnet *ifp); -void lag_set_phy_qinq_mtu_slave(struct ifnet *sl, void *unused); -int lag_walk_bond_slaves(struct ifnet *ifp, ifnet_iter_func_t func, void *arg); -int slave_count(const struct ifnet *ifp); -bool lag_port_is_slave(struct ifnet *master, struct ifnet *ifp); +void lag_member_sync_mac_address(struct ifnet *ifp); +int lag_walk_team_members(struct ifnet *ifp, dp_ifnet_iter_func_t func, + void *arg); +bool lag_can_start(const struct ifnet *ifp); +bool lag_is_team(struct ifnet *ifp); +bool lag_port_is_member(struct ifnet *ifp); +int lag_can_startstop_member(struct ifnet *ifp); +int lag_set_l2_address(struct ifnet *ifp, struct rte_ether_addr *macaddr); +int lag_min_links(struct ifnet *ifp, uint16_t *min_links); +int lag_set_min_links(struct ifnet *ifp, uint16_t min_links); #endif diff --git a/src/lcore_sched.c b/src/lcore_sched.c new file mode 100644 index 00000000..a8a37cd9 --- /dev/null +++ b/src/lcore_sched.c @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include + +#include "main.h" +#include "lcore_sched.h" +#include "lcore_sched_internal.h" +#include "util.h" +#include "vplane_log.h" + +int dp_foreach_lcore(int (*dp_per_lcore_fn)(unsigned int lcore, void *arg), + void *arg) +{ + unsigned int i; + int rv; + + for (i = 0; i <= get_lcore_max(); i++) { + rv = dp_per_lcore_fn(i, arg); + if (rv) + return rv; + } + return 0; +} + +int dp_foreach_forwarding_lcore(int (*dp_per_lcore_fn)(unsigned int lcore, + void *arg), + void *arg) +{ + int i, rv; + + /* + * Loop over all forwarding lcores. In the single cpu case return + * the main as that will also be doing forwarding. + */ + for (i = rte_get_next_lcore(-1, !single_cpu, 0); + i < RTE_MAX_LCORE; + i = rte_get_next_lcore((i), !single_cpu, 0)) { + + rv = dp_per_lcore_fn(i, arg); + if (rv) + return rv; + } + + return 0; +} + +static pthread_mutex_t dp_lcore_events_mutex = PTHREAD_MUTEX_INITIALIZER; + +struct dp_lcore_events_internal { + const struct dp_lcore_events *events; + void *arg; + LIST_ENTRY(dp_lcore_events_internal) list_entry; +}; + +LIST_HEAD(dp_lcore_events_list_head, dp_lcore_events_internal); + +struct dp_lcore_events_list_head dp_lcore_events_list = + LIST_HEAD_INITIALIZER(dp_lcore_events_list); + +int dp_lcore_events_register(const struct dp_lcore_events *events, + void *arg) +{ + struct dp_lcore_events_internal *entry; + + ASSERT_MAIN(); + + if (!events) + return -EINVAL; + + entry = malloc(sizeof(*entry)); + if (!entry) + return -ENOMEM; + + entry->events = events; + entry->arg = arg; + + pthread_mutex_lock(&dp_lcore_events_mutex); + LIST_INSERT_HEAD(&dp_lcore_events_list, entry, list_entry); + pthread_mutex_unlock(&dp_lcore_events_mutex); + + return 0; +} + +int dp_lcore_events_unregister(const struct dp_lcore_events *events) +{ + struct dp_lcore_events_internal *entry; + + ASSERT_MAIN(); + + if (!events) + return -EINVAL; + + pthread_mutex_lock(&dp_lcore_events_mutex); + + LIST_FOREACH(entry, &dp_lcore_events_list, list_entry) { + if (entry->events == events) { + LIST_REMOVE(entry, list_entry); + pthread_mutex_unlock(&dp_lcore_events_mutex); + + free(entry); + return 0; + } + } + + pthread_mutex_unlock(&dp_lcore_events_mutex); + return -ENOENT; +} + +void dp_lcore_events_init(unsigned int lcore_id) +{ + struct dp_lcore_events_internal *entry; + int rv; + + pthread_mutex_lock(&dp_lcore_events_mutex); + + LIST_FOREACH(entry, &dp_lcore_events_list, list_entry) { + if (entry->events->dp_lcore_events_init_fn) { + rv = entry->events->dp_lcore_events_init_fn( + lcore_id, + entry->arg); + if (rv) + RTE_LOG(INFO, DATAPLANE, + "Failed to init per lcore on lcore %d (%d)\n", + lcore_id, rv); + } + } + + pthread_mutex_unlock(&dp_lcore_events_mutex); +} + +void dp_lcore_events_teardown(unsigned int lcore_id) +{ + struct dp_lcore_events_internal *entry; + int rv; + + pthread_mutex_lock(&dp_lcore_events_mutex); + + LIST_FOREACH(entry, &dp_lcore_events_list, list_entry) { + if (entry->events->dp_lcore_events_teardown_fn) { + rv = entry->events->dp_lcore_events_teardown_fn( + lcore_id, + entry->arg); + if (rv) + RTE_LOG(INFO, DATAPLANE, + "Failed to teardown per lcore on lcore %d (%d)\n", + lcore_id, rv); + } + } + + pthread_mutex_unlock(&dp_lcore_events_mutex); +} diff --git a/src/lcore_sched_internal.h b/src/lcore_sched_internal.h new file mode 100644 index 00000000..05594179 --- /dev/null +++ b/src/lcore_sched_internal.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef LCORE_SCHED_INTERNAL_H +#define LCORE_SCHED_INTERNAL_H + +#include + +/* + * Run all the registered per lcore init functions. + */ +void dp_lcore_events_init(unsigned int lcore_id); + +/* + * Run all the registered per lcore teardown functions. + */ +void dp_lcore_events_teardown(unsigned int lcore_id); + +#endif /* LCORE_SCHED_INTERNAL_H */ diff --git a/src/linkwatch.c b/src/linkwatch.c deleted file mode 100644 index c148a6bb..00000000 --- a/src/linkwatch.c +++ /dev/null @@ -1,607 +0,0 @@ -/*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. - * All rights reserved. - * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. - * All rights reserved. - * - * SPDX-License-Identifier: LGPL-2.1-only - */ - -/* - * Port link state events - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "bitmask.h" -#include "compiler.h" -#include "config.h" -#include "control.h" -#include "dpdk_eth_if.h" -#include "dp_event.h" -#include "event.h" -#include "if_var.h" -#include "l2_rx_fltr.h" -#include "lag.h" -#include "main.h" -#include "qos.h" -#include "urcu.h" -#include "vhost.h" -#include "vplane_debug.h" -#include "vplane_log.h" - -static bitmask_t started_port_mask; /* port has been started */ -bitmask_t linkup_port_mask __hot_data; /* link is up */ -static bitmask_t lsc_irq_mask; /* link interrupt enabled */ -static bitmask_t lsc_irq_pending; /* link state changed */ -static bitmask_t link_reset_pending; /* link reset pending */ -static bitmask_t queue_state_pending; /* queue state change pending */ - -/* event file descriptor for link state change */ -static void *lsc_arg; - -/* decode DPDK definition of duplex */ -const char *link_duplexstr(unsigned int duplex) -{ - switch (duplex) { - case ETH_LINK_HALF_DUPLEX: return "half"; - case ETH_LINK_FULL_DUPLEX: return "full"; - default: return "unknown?"; - } -} - -/* notify qos of link state change */ -static void notify_port_status(portid_t port, - const struct rte_eth_link *link) -{ - struct ifnet *ifp = ifport_table[port]; - - if (link->link_status) { - RTE_LOG(NOTICE, DATAPLANE, - "%s Link up at %d Mbps, %s duplex\n", - ifp->if_name, - link->link_speed, - link_duplexstr(link->link_duplex)); - - bitmask_set(&linkup_port_mask, port); - bitmask_and(&active_port_mask, &poll_port_mask, - &linkup_port_mask); - - dp_event(DP_EVT_IF_LINK_CHANGE, 0, ifp, - link->link_status, link->link_speed, NULL); - } else { - RTE_LOG(WARNING, DATAPLANE, - "%s Link down\n", ifp->if_name); - bitmask_clear(&linkup_port_mask, port); - bitmask_and(&active_port_mask, &poll_port_mask, - &linkup_port_mask); - - dp_event(DP_EVT_IF_LINK_CHANGE, 0, ifp, - link->link_status, link->link_speed, NULL); - - /* Note: it is probably a good idea to drain the pkt - * ring and burst at this point to avoid stale packets - * going out once the link comes back up. However, - * doing the necessary synchronize_rcu() here could - * potentially have a negative impact on link changes - * for other interfaces and other events so isn't done - * for the moment. - */ - } -} - -/* Timer for peroidic check of link state - * - * Note: rcu_read_lock not held here! - */ -static void linkwatch_timer(struct rte_timer *tim __rte_unused, void *arg) -{ - struct ifnet *ifp = arg; - portid_t port = ifp->if_port; - struct rte_eth_link link; - - /* ignore timer when race with admin down */ - if (bitmask_isset(&started_port_mask, port)) { - rte_eth_link_get_nowait(port, &link); - bitmask_set(&lsc_irq_mask, port); /* re-enable irq */ - - int old_status = if_port_isup(port); - if (link.link_status != old_status) - notify_port_status(port, &link); - - send_port_status(port, &link); - } -} - -/* Check link state */ -static void update_port_status(portid_t port, bool link_down) -{ - struct rte_eth_link link; - - rte_eth_link_get_nowait(port, &link); - /* The kernel needs to be informed that the link is operationally down - * when the port is stopped, so intervene in this case as the link state - * in some if not all DPDK PMDs remains up. - */ - if (link_down) - link.link_status = ETH_LINK_DOWN; - notify_port_status(port, &link); - send_port_status(port, &link); -} - -static void soft_stop_port(portid_t port) -{ - struct ifnet *ifp = ifport_table[port]; - struct dpdk_eth_if_softc *sc = ifp->if_softc; - - if (!bitmask_isset(&started_port_mask, port)) - return; /* already inactive */ - - /* if we're about to yank one of the slaves out from under the bonding - * driver, stop the bonding interface first. - */ - if (ifp->aggregator) - soft_stop_port(ifp->aggregator->if_port); - - bitmask_clear(&started_port_mask, port); - rte_eth_led_off(port); - - update_port_status(port, true); - - /* Stop monitoring port */ - rte_timer_stop(&sc->scd_link_timer); - - qos_sched_stop(ifp); - - /* make sure cores have drained */ - synchronize_rcu(); - - /* free any leftovers */ - pkt_ring_empty(port); -} - -static void soft_start_port(portid_t port) -{ - struct ifnet *ifp = ifport_table[port]; - struct dpdk_eth_if_softc *sc = ifp->if_softc; - - if (bitmask_isset(&started_port_mask, port)) - return; /* already active */ - - rte_eth_led_on(port); - - bitmask_set(&started_port_mask, port); - bitmask_set(&lsc_irq_mask, port); - update_port_status(port, false); - - /* Start timer to send keepalive messages */ - if (rte_timer_reset(&sc->scd_link_timer, - config.port_update * rte_get_timer_hz(), - PERIODICAL, rte_get_master_lcore(), - linkwatch_timer, ifp) < 0) - RTE_LOG(ERR, DATAPLANE, - "rte_timer_reset failed for linkwatch timer port:%u\n", - port); -} - -/* Timer for periodic check of link reset - * - * Note: rcu_read_lock not held here! - * This can be run both via directly in response to a link reset interrupt - * (tim will be NULL) or from an rte_timer callback (tim will be the actual - * timer). In both cases it will be ran from the master thread. - */ -static void reset_port(struct rte_timer *tim, void *arg) -{ - struct ifnet *ifp = arg; - portid_t port = ifp->if_port; - struct dpdk_eth_if_softc *sc = ifp->if_softc; - int ret; -#if RTE_VERSION >= RTE_VERSION_NUM(18, 5, 0, 0) - struct rte_eth_conf dev_conf; - struct rte_eth_dev *eth_dev; -#endif - -#if RTE_VERSION >= RTE_VERSION_NUM(18,5,0,0) - stop_port(port); -#else - soft_stop_port(port); - - /* PF down -> VF down -> VF up -> PF up - when VF comes back up and PF - * is still down, rte_eth_dev_start will fail, and the reset will fail - * because of that. Only way to fix this case is to run _start before - * _reset when PF is back up too. - */ - struct rte_eth_link link; - rte_eth_link_get(port, &link); - if (!link.link_status) { - ret = rte_eth_dev_start(port); - if (ret < 0) - RTE_LOG(DEBUG, DATAPLANE, "reset_port failed to start " - "device: port=%u err=%d\n", port, ret); - } -#endif - -#ifdef HAVE_RTE_ETH_DEV_RESET_2_ARGS - ret = rte_eth_dev_reset(port, 0); -#else - ret = rte_eth_dev_reset(port); -#endif - /* Only VF receives interrupt, bonding int will NOT reset. Also if the - * port is bonded, the bond interface must be restarted AFTER the - * reset call, otherwise bonding will be broken once PF is back up. - */ - if (ifp->aggregator) { - soft_start_port(ifp->aggregator->if_port); - if (is_team(ifp->aggregator)) - lag_refresh_actor_state(ifp->aggregator); - } - if (ret == -ENODEV || ret == -EINVAL) { - RTE_LOG(ERR, DATAPLANE, - "rte_eth_dev_reset: invalid port=%u err=%d\n", - port, ret); - } else if (ret == -ENOTSUP) { - RTE_LOG(NOTICE, DATAPLANE, - "rte_eth_dev_reset: no reset on HW port=%u err=%d\n", - port, ret); - } else if (ret == -EAGAIN || ret == -15) { - RTE_LOG(DEBUG, DATAPLANE, - "rte_eth_dev_reset: PF still down port=%u err=%d\n", - port, ret); - - /* reset failed, start timer to check again. If tim is not - * NULL then the call is from the timer, so it's already - * running, no need to start it again - * -15 is a weird IXGBE specific error code - */ - if (!tim && rte_timer_reset(&sc->scd_reset_timer, - config.port_update * rte_get_timer_hz(), - PERIODICAL, rte_get_master_lcore(), - reset_port, ifp) < 0) - RTE_LOG(ERR, DATAPLANE, "rte_timer_reset failed for " - "reset timer port:%u\n", port); - return; - } else if (ret < 0) { - /* drivers can return weird errors, catch it and log it */ - RTE_LOG(ERR, DATAPLANE, - "rte_eth_dev_reset: reset failed on HW port=%u err=%d\n", - port, ret); - } - - /* Port is inactive, no races are possible. If tim is NULL then this - * is the first call on interrupt and the timer is not running. - */ - if (tim) - rte_timer_stop_sync(tim); - -#if RTE_VERSION >= RTE_VERSION_NUM(18,5,0,0) - /* stop_port has to set need_reset if the timer is running, but - * setting it from here would cause a loop - */ - eth_dev = &rte_eth_devices[ifp->if_port]; - memcpy(&dev_conf, ð_dev->data->dev_conf, sizeof(dev_conf)); - - sc->scd_need_reset = false; - reconfigure_port(ifp, &dev_conf, NULL); -#else - soft_start_port(port); -#endif -} - -static void update_queue_state(struct ifnet *ifp) -{ - unassign_queues(ifp->if_port); - - set_port_queue_state(ifp->if_port); - - if (bitmask_isset(&started_port_mask, ifp->if_port)) - assign_queues(ifp->if_port); -} - -/* Callback from being woken up on link_fd. - * Runs on master thread (via get_next_event) - * - * irq_mask is used to debounce events so that only one link - * state change between timer interval is possible - * - * For ports that use the queue state events, the queue state was read when - * the callback was received, so now we need to bring the state into line - * with the configured set of queues here. - * - * Note: rcu_read_lock not held here! - */ -static int link_state_event(void *arg) -{ - int lsc_fd = (unsigned long) arg; - unsigned int port; - uint64_t seqno; - - if (read(lsc_fd, &seqno, sizeof(seqno)) < 0) { - if (errno != EINTR) - RTE_LOG(NOTICE, DATAPLANE, - "link state event read error: %s\n", - strerror(errno)); - } - - for (port = 0; port < DATAPLANE_MAX_PORTS; port++) { - if (!rte_eth_dev_is_valid_port(port)) - continue; - - if (bitmask_isset(&lsc_irq_pending, port)) { - bitmask_clear(&lsc_irq_pending, port); - if (bitmask_isset(&started_port_mask, port)) - update_port_status(port, false); - } - - if (bitmask_isset(&link_reset_pending, port)) { - bitmask_clear(&link_reset_pending, port); - if (bitmask_isset(&started_port_mask, port)) - reset_port(NULL, ifport_table[port]); - } - - if (bitmask_isset(&queue_state_pending, port)) { - bitmask_clear(&queue_state_pending, port); - update_queue_state(ifport_table[port]); - } - } - - return 0; -} - -/* Open eventfd handle used to notify master thread - * by callbacks called in interrupt thread. - */ -void link_state_init(void) -{ - int fd = eventfd(0, EFD_NONBLOCK); - if (fd < 0) - rte_panic("%s: eventfd failed: %s\n", - __func__, strerror(errno)); - - lsc_arg = (void *) (unsigned long) fd; - register_event_fd(fd, link_state_event, lsc_arg); -} - -/* Start device (admin up) */ -void start_port(portid_t port, unsigned int flags) -{ - struct ifnet *ifp = ifport_table[port]; - struct dpdk_eth_if_softc *sc = ifp->if_softc; - int ret; - - if (slave_count(ifp) == 0) { - /* A bonding interface might not have any slaves yet. Don't - * try to start it since this will result in an error from - * rte_eth_dev_start(). Instead, lag_slave_add() will start - * the interface (if necessary) when the first slave is added. - */ - RTE_LOG(DEBUG, DATAPLANE, - "no slaves on bonding device %s", ifp->if_name); - return; - } - - if (bitmask_isset(&started_port_mask, port)) - return; /* already active */ - - /* bonding driver will start slave device when ready */ - if (!(flags & IFF_SLAVE)) { - if (assign_queues(port)) - return; /* failure */ - - if (sc->scd_need_reset) - reset_port(NULL, ifp); - - ret = rte_eth_dev_start(port); - if (ret < 0 && !sc->scd_need_reset) { - RTE_LOG(ERR, DATAPLANE, - "rte_eth_dev_start: port=%u err=%d\n", - port, ret); - unassign_queues(port); - return; - } - - sc->scd_need_reset = false; - } - - soft_start_port(port); -} - -/* Stop device (admin down) */ -void stop_port(portid_t port) -{ - struct ifnet *ifp = ifport_table[port]; - struct dpdk_eth_if_softc *sc = ifp->if_softc; - - if (!bitmask_isset(&started_port_mask, port) && - !rte_timer_pending(&sc->scd_reset_timer)) - return; /* already inactive */ - - /* if the PF is down when the port is stopped, then it will not work - * once it restarts unless rte_eth_dev_reset is called. - * But if the timer is simply left running then port will be set to UP - * when the PF goes back online, even if it should still be stopped - */ - if (rte_timer_pending(&sc->scd_reset_timer)) { - rte_timer_stop_sync(&sc->scd_reset_timer); - sc->scd_need_reset = true; - } - - soft_stop_port(port); - - /* if we're about to yank one of the slaves out from under the bonding - * driver, stop the bonding interface first. - */ - if (ifp->aggregator) { - rte_eth_dev_stop(ifp->aggregator->if_port); - unassign_queues(ifp->aggregator->if_port); - } - - rte_eth_dev_stop(port); - - unassign_queues(port); - - /* - * Some drivers require the HW multicast filter to be reprogrammed when - * the interface is next brought up after being taken down, regardless - * of whether this filter is already active - */ - l2_rx_fltr_set_reprogram(ifp); -} - -/* - * Stop a port when the dataplane port state may not be in sync with - * the dpdk port state, ensuring that either way the dpdk port is - * stopped on return. - */ -void force_stop_port(portid_t port) -{ - struct ifnet *ifp = ifport_table[port]; - struct dpdk_eth_if_softc *sc = ifp->if_softc; - - if (!bitmask_isset(&started_port_mask, port) && - !rte_timer_pending(&sc->scd_reset_timer)) - rte_eth_dev_stop(port); - else - stop_port(port); -} - -/* Stop data transfer */ -void stop_all_ports(void) -{ - unsigned int port; - - for (port = 0; port < DATAPLANE_MAX_PORTS; port++) { - if (bitmask_isset(&started_port_mask, port)) - stop_port(port); - } -} - -/* Port event occurred. - * - * Called from another Posix thread therefore can't safely update - * port state directly, need to wakeup master thread - */ -static int -eth_port_event(portid_t port_id, enum rte_eth_event_type type, void *arg, - __unused void *ret_arg) -{ - unsigned long link_fd = (unsigned long) arg; - static const uint64_t incr = 1; - bool wakeup = false; - - /* Notify master thread, and debounce */ - if (type == RTE_ETH_EVENT_INTR_LSC) { - /* - * If the port uses the queue state events, and it is down - * then we have to clear the enabled queues otherwise we - * can get into an inconsistent state. - */ - if (get_port_uses_queue_state(port_id)) { - struct rte_eth_link link; - - rte_eth_link_get_nowait(port_id, &link); - if (link.link_status == ETH_LINK_DOWN) - reset_port_enabled_queue_state(port_id); - } - if (bitmask_isset(&lsc_irq_mask, port_id)) { - bitmask_clear(&lsc_irq_mask, port_id); - bitmask_set(&lsc_irq_pending, port_id); - wakeup = true; - } - } - - if (type == RTE_ETH_EVENT_INTR_RESET && - bitmask_isset(&started_port_mask, port_id)) { - bitmask_set(&link_reset_pending, port_id); - wakeup = true; - } - - if (type == RTE_ETH_EVENT_QUEUE_STATE) { - /* - * Pull all the events off the queue, and set the - * enabled queus correctly. The master thread will then - * do the work to actually enable them. - */ - struct rte_eth_vhost_queue_event event; - - while (rte_eth_vhost_get_queue_event(port_id, &event) == 0) - track_port_queue_state(port_id, event.queue_id, - event.rx, event.enable); - - bitmask_set(&queue_state_pending, port_id); - wakeup = true; - } - - if (wakeup && write(link_fd, &incr, sizeof(incr)) < 0) - RTE_LOG(NOTICE, DATAPLANE, - "wakeup of link state thread failed: %s\n", - strerror(errno)); - - return 0; -} - -int linkwatch_port_config(portid_t portid) -{ - int ret; - - /* Enable Link State Interrupt */ - ret = rte_eth_dev_callback_register(portid, RTE_ETH_EVENT_INTR_LSC, - eth_port_event, lsc_arg); - if (ret < 0) - RTE_LOG(WARNING, DATAPLANE, - "rte_eth_dev_callback_register(lsc): err=%d, port=%u\n", - ret, portid); - - ret = rte_eth_dev_callback_register(portid, RTE_ETH_EVENT_INTR_RESET, - eth_port_event, lsc_arg); - if (ret < 0) - RTE_LOG(WARNING, DATAPLANE, - "rte_eth_dev_callback_register(reset): err=%d, port=%u\n", - ret, portid); - - if (port_uses_queue_state(portid)) { - set_port_uses_queue_state(portid, true); - reset_port_all_queue_state(portid); - ret = rte_eth_dev_callback_register(portid, - RTE_ETH_EVENT_QUEUE_STATE, - eth_port_event, - lsc_arg); - if (ret < 0) - RTE_LOG(WARNING, DATAPLANE, - "rte_eth_dev_callback_register(queue state): err=%d, port=%u\n", - ret, portid); - - } - - return 0; -} - -void linkwatch_port_unconfig(portid_t portid) -{ - /* Disable Link State Interrupt */ - rte_eth_dev_callback_unregister(portid, RTE_ETH_EVENT_INTR_LSC, - eth_port_event, lsc_arg); - - /* Disable Port Reset callback */ - rte_eth_dev_callback_unregister(portid, RTE_ETH_EVENT_INTR_RESET, - eth_port_event, lsc_arg); - - rte_eth_dev_callback_unregister(portid, RTE_ETH_EVENT_QUEUE_STATE, - eth_port_event, lsc_arg); - set_port_uses_queue_state(portid, false); -} diff --git a/src/log.c b/src/log.c index 155f3f32..fcbfc0e1 100644 --- a/src/log.c +++ b/src/log.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2013-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -13,10 +13,11 @@ #include #include -#include "config.h" +#include "config_internal.h" +#include "main.h" static ssize_t -do_log_write(__attribute__((unused)) void *c, const char *buf, size_t bufsize) +do_log_write(__unused void *c, const char *buf, size_t bufsize) { const char *cp, *ep; uint32_t loglevel; diff --git a/src/loopback.c b/src/loopback.c deleted file mode 100644 index cd3f211f..00000000 --- a/src/loopback.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. - * - * SPDX-License-Identifier: LGPL-2.1-only - * - * Loopback interface implementation - */ - -#include -#include - -#include "dp_event.h" -#include "if_var.h" - -static const struct ift_ops lo_if_ops = { -}; - -static void lo_type_init(void) -{ - int ret = if_register_type(IFT_LOOP, &lo_if_ops); - if (ret < 0) - rte_panic("Failed to register loopback type: %s", - strerror(-ret)); -} - -static const struct dp_event_ops loopback_events = { - .init = lo_type_init, -}; - -DP_STARTUP_EVENT_REGISTER(loopback_events); diff --git a/src/lpm/lpm.c b/src/lpm/lpm.c index 8133552f..c16de870 100644 --- a/src/lpm/lpm.c +++ b/src/lpm/lpm.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2021, AT&T Intellectual Property. All rights reserved. * * BSD LICENSE * @@ -102,8 +102,8 @@ struct lpm { { \ .valid = VALID, \ .ext_entry = 0, \ - .depth = n_depth, \ - .next_hop = nhop, \ + .depth = (n_depth), \ + .next_hop = (nhop), \ } #define MAX_DEPTH_TBL24 24 @@ -137,10 +137,9 @@ static inline int rules_cmp(const struct lpm_rule *r1, { if (r1->ip < r2->ip) return -1; - else if (r1->ip > r2->ip) + if (r1->ip > r2->ip) return 1; - else - return r1->scope - r2->scope; + return r1->scope - r2->scope; } static inline int tracker_cmp(const struct rt_tracker_info *r1, @@ -151,12 +150,10 @@ static inline int tracker_cmp(const struct rt_tracker_info *r1, sizeof(r1->dst_addr.address.ip_v4.s_addr)); } -#ifndef __clang_analyzer__ /* Generate internal functions and make them static. */ RB_GENERATE_STATIC(lpm_rules_tree, lpm_rule, link, rules_cmp) RB_GENERATE_STATIC(lpm_tracker_tree, rt_tracker_info, rti_tree_node, tracker_cmp) -#endif /* __clang_analyzer__ */ /* * Converts a given depth value to its corresponding mask value. @@ -341,6 +338,7 @@ rule_add(struct lpm *lpm, uint32_t ip_masked, uint8_t depth, r->next_hop = next_hop; r->scope = scope; r->tracker_count = 0; + memset(&r->pd_state, 0, sizeof(r->pd_state)); RB_INIT(&r->tracker_head); old = RB_INSERT(lpm_rules_tree, head, r); @@ -789,7 +787,7 @@ lpm_tracker_find_next(struct lpm_rule *rule, struct rt_tracker_info key; uint32_t ip_masked; - ip_masked = (ip & lpm_depth_to_mask(depth)); + ip_masked = htonl(ip & lpm_depth_to_mask(depth)); key.dst_addr.type = AF_INET; key.dst_addr.address.ip_v4.s_addr = ip_masked; @@ -877,7 +875,6 @@ lpm_tracker_rule_changed(struct lpm *lpm, struct rt_tracker_info *ti_info, RTE_LOG(ERR, LPM, "LPM failed to update tracker\n"); ti_info->rti_cb_func(ti_info); - return; } /* @@ -966,6 +963,17 @@ lpm_tracker_update(struct lpm *lpm, struct lpm_rule *old_rule, } } +static void lpm_tracker_call_cbs(struct lpm_rule *rule) +{ + struct rt_tracker_info *ti_iter, *next; + + if (rule->tracker_count == 0) + return; + + RB_FOREACH_SAFE(ti_iter, lpm_tracker_tree, &rule->tracker_head, next) + ti_iter->rti_cb_func(ti_iter); +} + int lpm_tracker_get_cover_ip_and_depth(struct rt_tracker_info *ti_info, uint32_t *ip, uint8_t *depth) @@ -1375,11 +1383,17 @@ lpm_delete_all(struct lpm *lpm, lpm_walk_func_t func, void *arg) for (depth = 0; depth < LPM_MAX_DEPTH; ++depth) { struct lpm_rules_tree *head = &lpm->rules[depth]; struct lpm_rule *r, *n; + struct lpm_walk_params params; RB_FOREACH_SAFE(r, lpm_rules_tree, head, n) { - if (func) - func(lpm, r->ip, depth, r->scope, - r->next_hop, r->pd_state, arg); + if (func) { + params.ip = r->ip; + params.depth = depth; + params.scope = r->scope; + params.next_hop = r->next_hop; + + func(lpm, ¶ms, &r->pd_state, arg); + } rule_delete(lpm, r, depth); } } @@ -1397,6 +1411,7 @@ lpm_walk(struct lpm *lpm, lpm_walk_func_t func, uint32_t rule_cnt = 0; uint32_t ip_masked; bool len_match = true; + struct lpm_walk_params params; for (; depth < LPM_MAX_DEPTH; depth++) { struct lpm_rule *r, *n; @@ -1416,8 +1431,16 @@ lpm_walk(struct lpm *lpm, lpm_walk_func_t func, continue; RB_FOREACH_FROM(r, lpm_rules_tree, n) { - func(lpm, r->ip, depth, r->scope, r->next_hop, - r->pd_state, r_arg->walk_arg); + params.ip = r->ip; + params.depth = depth; + params.scope = r->scope; + params.next_hop = r->next_hop; + params.call_tracker_cbs = false; + + func(lpm, ¶ms, &r->pd_state, r_arg->walk_arg); + + if (params.call_tracker_cbs) + lpm_tracker_call_cbs(r); if (r_arg->is_segment && (++rule_cnt == r_arg->cnt)) return rule_cnt; } @@ -1442,6 +1465,22 @@ lpm_tbl8_count(const struct lpm *lpm) return count; } +int +lpm_nexthop_lookup(struct lpm *lpm, uint32_t ip, uint8_t depth, + int16_t scope, uint32_t *next_hop) +{ + struct lpm_rule *r; + uint32_t ip_masked; + + ip_masked = (ip & lpm_depth_to_mask(depth)); + r = rule_find(lpm, ip_masked, depth, scope); + if (!r) + return -ENOENT; + + *next_hop = r->next_hop; + return 0; +} + int lpm_lookup_exact(struct lpm *lpm, uint32_t ip, uint8_t depth, uint32_t *next_hop) diff --git a/src/lpm/lpm.h b/src/lpm/lpm.h index baad82fd..e5a2fb3e 100644 --- a/src/lpm/lpm.h +++ b/src/lpm/lpm.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2021, AT&T Intellectual Property. All rights reserved. * * BSD LICENSE * @@ -108,7 +108,7 @@ struct lpm_walk_arg { /* A scope that is even lower than RT_SCOPE_UNIVERSE */ #define LPM_SCOPE_PAN_DIMENSIONAL -1 -ALWAYS_INLINE uint32_t +static ALWAYS_INLINE uint32_t lpm_tbl24_get_next_hop_idx(struct lpm_tbl24_entry *entry) { return entry->next_hop; @@ -248,11 +248,23 @@ lpm_delete(struct lpm *lpm, uint32_t ip, uint8_t depth, uint32_t *new_next_hop, struct pd_obj_state_and_flags **new_pd_state); +struct lpm_walk_params { + uint32_t ip; + uint8_t depth; + int16_t scope; + uint32_t next_hop; + /* + * Set this to true in the walker callback to have the callbacks of + * any trackers on this entry called after the walker callback func + * has been called. + */ + bool call_tracker_cbs; +}; + /** iterator function for LPM rule */ typedef void (*lpm_walk_func_t)(struct lpm *lpm, - uint32_t ip, uint8_t depth, int16_t scope, - uint32_t next_hop, - struct pd_obj_state_and_flags pd_state, + struct lpm_walk_params *params, + struct pd_obj_state_and_flags *pd_state, void *arg); /** @@ -281,6 +293,25 @@ lpm_delete_all(struct lpm *lpm, lpm_walk_func_t func, void *arg); int lpm_lookup(const struct lpm *lpm, uint32_t ip, uint32_t *next_hop); +/* + * Lookup an IP in the LPM table and return exact match + * @param lpm + * LPM object handle + * @param ip + * IP to be looked up in the LPM table + * @param depth + * Prefix length + * @param scope + * Scope of the rule + * @param next_hop + * Next hop of the best exact match (valid on lookup hit only) + * @return + * -EINVAL for incorrect arguments, -ENOENT on lookup miss, 0 on lookup hit + */ +int +lpm_nexthop_lookup(struct lpm *lpm, uint32_t ip, + uint8_t depth, int16_t scope, uint32_t *next_hop); + /** * Lookup an IP in the LPM table and return exact match * diff --git a/src/lpm/lpm6.c b/src/lpm/lpm6.c index db4cd173..afb62b0a 100644 --- a/src/lpm/lpm6.c +++ b/src/lpm/lpm6.c @@ -159,12 +159,10 @@ static inline int tracker_cmp(const struct rt_tracker_info *r1, &r2->dst_addr.address.ip_v6, LPM6_IPV6_ADDR_SIZE); } -#ifndef __clang_analyzer__ /* Generate internal functions and make them static. */ RB_GENERATE_STATIC(lpm6_rules_tree, lpm6_rule, link, rules_cmp) RB_GENERATE_STATIC(lpm6_tracker_tree, rt_tracker_info, rti_tree_node, tracker_cmp) -#endif /* __clang_analyzer__ */ /* * Takes an array of uint8_t (IPv6 address) and masks it using the depth. @@ -305,7 +303,7 @@ rule_find_any(struct lpm6 *lpm, const uint8_t *ip, uint8_t depth) else r = RB_MAX(lpm6_rules_tree, &lpm->rules[depth]); - if (r && memcmp(ip, r->ip, LPM6_IPV6_ADDR_SIZE)) + if (r && memcmp(ip, r->ip, LPM6_IPV6_ADDR_SIZE) != 0) return NULL; return r; @@ -334,6 +332,7 @@ rule_add(struct lpm6 *lpm, uint8_t *ip, uint32_t next_hop, r->next_hop = next_hop; r->scope = scope; r->tracker_count = 0; + memset(&r->pd_state, 0, sizeof(r->pd_state)); RB_INIT(&r->tracker_head); old = RB_INSERT(lpm6_rules_tree, head, r); @@ -788,17 +787,16 @@ lookup_step(const struct lpm6 *lpm, const struct lpm6_tbl_entry *tbl, /* If it is valid and extended we calculate the new pointer to return. */ if (!tbl_entry.valid) return -ENOENT; - else if (tbl_entry.ext_entry) { + if (tbl_entry.ext_entry) { uint32_t tbl8_index = ip[first_byte-1] + tbl_entry.next_hop * LPM6_TBL8_GROUP_NUM_ENTRIES; *tbl_next = &lpm->tbl8[tbl8_index]; return 1; - } else { - /* If not extended then we can have a match. */ - *next_hop = tbl_entry.next_hop; - return 0; } + /* If not extended then we can have a match. */ + *next_hop = tbl_entry.next_hop; + return 0; } /* @@ -1035,7 +1033,7 @@ tbl8_recycle_check(const struct lpm6_tbl_entry *tbl8, return -EINVAL; } -static void tbl8_recycle(struct lpm6 *lpm, uint32_t indices[], +static void tbl8_recycle(struct lpm6 *lpm, const uint32_t indices[], int index_count) { uint32_t i, j; @@ -1176,9 +1174,10 @@ delete_rule(struct lpm6 *lpm, const uint8_t *ip, uint8_t depth, /* Continue inspecting levels until success or failure */ tbl_entry = CMM_ACCESS_ONCE(*tbl); - if (!tbl_entry.valid) { + if (!tbl_entry.valid) return; - } else if (tbl_entry.ext_entry) { + + if (tbl_entry.ext_entry) { /* find next tbl8 */ tbl8_index = tbl_entry.next_hop * LPM6_TBL8_GROUP_NUM_ENTRIES + @@ -1202,7 +1201,7 @@ delete_rule(struct lpm6 *lpm, const uint8_t *ip, uint8_t depth, * be everything that is valid but with a depth > the * depth being removed. */ - if (depth < 24) { + if (depth <= 24) { tbl24_range = depth_to_range(depth); /* @@ -1350,7 +1349,8 @@ static int rule_replace(struct lpm6 *lpm, struct lpm6_rule *old_rule, * with a lower scope. Otherwise it is not, and we * need to check a different depth. */ - if (memcmp(old_rule->ip, sub_rule->ip, LPM6_IPV6_ADDR_SIZE)) + if (memcmp(old_rule->ip, + sub_rule->ip, LPM6_IPV6_ADDR_SIZE) != 0) sub_rule = NULL; else sub_depth = depth; @@ -1394,7 +1394,7 @@ static int rule_replace(struct lpm6 *lpm, struct lpm6_rule *old_rule, */ int lpm6_delete(struct lpm6 *lpm, const uint8_t *ip, uint8_t depth, - uint32_t *index, int16_t scope, + uint32_t *next_hop, int16_t scope, struct pd_obj_state_and_flags *pd_state, uint32_t *new_next_hop, struct pd_obj_state_and_flags **new_pd_state) @@ -1423,8 +1423,8 @@ lpm6_delete(struct lpm6 *lpm, const uint8_t *ip, uint8_t depth, if (pd_state) *pd_state = rule_to_delete->pd_state; - if (index) - *index = rule_to_delete->next_hop; + if (next_hop) + *next_hop = rule_to_delete->next_hop; /* Replace with next level up rule */ rc = rule_replace(lpm, rule_to_delete, ip, depth, &new_rule); @@ -1438,6 +1438,16 @@ lpm6_delete(struct lpm6 *lpm, const uint8_t *ip, uint8_t depth, return rc; } +static void lpm6_tracker_call_cbs(struct lpm6_rule *rule) +{ + struct rt_tracker_info *ti_iter, *next; + + if (rule->tracker_count == 0) + return; + + RB_FOREACH_SAFE(ti_iter, lpm6_tracker_tree, &rule->tracker_head, next) + ti_iter->rti_cb_func(ti_iter); +} /* * Delete all rules from the LPM table. @@ -1464,11 +1474,17 @@ lpm6_delete_all(struct lpm6 *lpm, lpm6_walk_func_t func, void *arg) for (depth = 0; depth <= LPM6_MAX_DEPTH; ++depth) { struct lpm6_rules_tree *head = &lpm->rules[depth]; struct lpm6_rule *r, *n; + struct lpm6_walk_params params; RB_FOREACH_SAFE(r, lpm6_rules_tree, head, n) { - if (func) - func(r->ip, depth, r->scope, r->next_hop, - r->pd_state, arg); + if (func) { + memcpy(¶ms.prefix, r->ip, + LPM6_IPV6_ADDR_SIZE); + params.pr_len = depth; + params.scope = r->scope; + params.next_hop = r->next_hop; + func(¶ms, &r->pd_state, arg); + } rule_delete(lpm, r, depth); } } @@ -1485,6 +1501,7 @@ lpm6_walk(struct lpm6 *lpm, lpm6_walk_func_t func, for (; depth <= LPM6_MAX_DEPTH; ++depth) { struct lpm6_rule *r, *n; + struct lpm6_walk_params params; if (r_arg->get_next && len_match) { mask_ip6(masked_ip, r_arg->addr.s6_addr, depth); @@ -1501,11 +1518,16 @@ lpm6_walk(struct lpm6 *lpm, lpm6_walk_func_t func, continue; RB_FOREACH_FROM(r, lpm6_rules_tree, n) { - uint8_t tmp_ip[LPM6_IPV6_ADDR_SIZE]; - memcpy(tmp_ip, r->ip, sizeof(tmp_ip)); + memcpy(¶ms.prefix, r->ip, LPM6_IPV6_ADDR_SIZE); + params.pr_len = depth; + params.scope = r->scope; + params.next_hop = r->next_hop; + params.call_tracker_cbs = false; + + func(¶ms, &r->pd_state, r_arg->walk_arg); + if (params.call_tracker_cbs) + lpm6_tracker_call_cbs(r); - func(tmp_ip, depth, r->scope, r->next_hop, - r->pd_state, r_arg->walk_arg); if (r_arg->is_segment && (++rule_cnt == r_arg->cnt)) return rule_cnt; } @@ -1539,7 +1561,8 @@ void lpm6_subtree_walk(struct lpm6 *lpm, RB_FOREACH_FROM(r, lpm6_rules_tree, n) { mask_ip6(masked_ip, r->ip, root_depth); - if (memcmp(masked_ip, root_ip, LPM6_IPV6_ADDR_SIZE)) + if (memcmp(masked_ip, + root_ip, LPM6_IPV6_ADDR_SIZE) != 0) break; /* * Have to take a copy of the IP addr as the callback @@ -1716,7 +1739,7 @@ lpm6_tracker_update(struct lpm6 *lpm, struct lpm6_rule *old_rule, mask_ip6(masked_ip, (const uint8_t *)&ti_iter->dst_addr.address.ip_v6, depth); - if (memcmp(masked_ip, ip, LPM6_IPV6_ADDR_SIZE)) + if (memcmp(masked_ip, ip, LPM6_IPV6_ADDR_SIZE) != 0) break; /* Tracker changed ?*/ @@ -1735,7 +1758,7 @@ lpm6_tracker_update(struct lpm6 *lpm, struct lpm6_rule *old_rule, (const uint8_t *) &ti_iter->dst_addr.address.ip_v6, depth); - if (memcmp(masked_ip, ip, LPM6_IPV6_ADDR_SIZE)) + if (memcmp(masked_ip, ip, LPM6_IPV6_ADDR_SIZE) != 0) break; /* Tracker changed ? */ @@ -1745,7 +1768,6 @@ lpm6_tracker_update(struct lpm6 *lpm, struct lpm6_rule *old_rule, } } - int lpm6_tracker_get_cover_ip_and_depth(struct rt_tracker_info *ti_info, uint8_t *ip, uint8_t *depth) diff --git a/src/lpm/lpm6.h b/src/lpm/lpm6.h index 8591b081..2b29a394 100644 --- a/src/lpm/lpm6.h +++ b/src/lpm/lpm6.h @@ -185,10 +185,22 @@ lpm6_delete(struct lpm6 *lpm, const uint8_t *ip, uint8_t depth, uint32_t *new_next_hop, struct pd_obj_state_and_flags **new_pd_state); +struct lpm6_walk_params { + uint8_t prefix[LPM6_IPV6_ADDR_SIZE]; + uint32_t pr_len; + int16_t scope; + uint32_t next_hop; + /* + * Set this to true in the walker callback to have the callbacks of + * any trackers on this entry called after the walker callback func + * has been called. + */ + bool call_tracker_cbs; +}; + /** iterator function for LPM rule */ -typedef void (*lpm6_walk_func_t)(const uint8_t *prefix, uint32_t pr_len, - int16_t scope, uint32_t next_hop, - struct pd_obj_state_and_flags pd_state, +typedef void (*lpm6_walk_func_t)(struct lpm6_walk_params *params, + struct pd_obj_state_and_flags *pd_state, void *arg); /** diff --git a/src/lpm/test/Makefile b/src/lpm/test/Makefile deleted file mode 100644 index 84a459ec..00000000 --- a/src/lpm/test/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -default: - gcc -o v6lpm -gstabs -DDEBUG_MODE ../rte_v6_lpm.c diff --git a/src/lpm/test/delete.txt b/src/lpm/test/delete.txt deleted file mode 100644 index 6eedcb0c..00000000 --- a/src/lpm/test/delete.txt +++ /dev/null @@ -1,6 +0,0 @@ -# Delete one of the entries that hashes to the same prefix-length. -ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe;127 -# Delete a original entry that is marker and original. (after this it will be only a marker). -ffff:ffff:0000:0000:0000:0000:0000:0000;32 -# Delete a non-exisitent entry. -abcd:ef01:0000:0000:0000:0000:0000:0000;32 diff --git a/src/lpm/test/routes.txt b/src/lpm/test/routes.txt deleted file mode 100644 index e383334c..00000000 --- a/src/lpm/test/routes.txt +++ /dev/null @@ -1,14 +0,0 @@ -# Route at the right most prefix-lenght. -ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe;127;1 -# route at the same hash entry. -ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffd;127;2 -# route for a random prefix-length right of the root. -ffff:ffff:ffff:ffff:ffff:ffff:ffff:fff0;120;3 -# route for a left tree prefix-legnth. -afff:ffff:ffff:ffff:ffff:ffff:ffff:ffff;4;4 -# Conversion of a marker entry into original route. -ffff:ffff:0000:0000:0000:0000:0000:0000;32;5 -# Add a 128 bit route -abcd:1234:5678:abcd:ef01:2345:6789:abcd;128;6 -# Add a 1 bit prefix-length route -8000:0000:0000:0000:0000:0000:0000:0000;1;7 diff --git a/src/lpm/test/search.txt b/src/lpm/test/search.txt deleted file mode 100644 index 06156f67..00000000 --- a/src/lpm/test/search.txt +++ /dev/null @@ -1,9 +0,0 @@ -ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe;1 -ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffd;2 -ffff:ffff:ffff:ffff:ffff:ffff:ffff:fff0;3 -ffff:ffff:ffff:ffff:0000:0000:0000:0000;5 -ffff:ffff:ffff:ffff:ffff:ffff:00ff:0000;5 -ffff:ffff:ffff:ffff:ffff:ffff:ffff:fcfd;5 -afff:0000:0000:0000:0000:0000:0000:0000;4 -#Search for a marker... -ffff:fff0:0000:0000:0000:0000:0000:0000;-1 diff --git a/src/lpm/test/search2.txt b/src/lpm/test/search2.txt deleted file mode 100644 index ad358a31..00000000 --- a/src/lpm/test/search2.txt +++ /dev/null @@ -1 +0,0 @@ -ffff:ffff:ffff:ffff:0000:0000:0000:0000;4 diff --git a/src/lpm/test/search_last.txt b/src/lpm/test/search_last.txt deleted file mode 100644 index db78574e..00000000 --- a/src/lpm/test/search_last.txt +++ /dev/null @@ -1,9 +0,0 @@ -ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe;3 -ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffd;2 -ffff:ffff:ffff:ffff:ffff:ffff:ffff:fff0;3 -ffff:ffff:ffff:ffff:0000:0000:0000:0000;-1 -afff:0000:0000:0000:0000:0000:0000:0000;4 -#Search for a marker... -ffff:fff0:0000:0000:0000:0000:0000:0000;-1 -abcd:1234:5678:abcd:ef01:2345:6789:abcd;6 -8000:0000:0000:0000:0000:0000:0000:1234;7 diff --git a/src/mac_limit.c b/src/mac_limit.c new file mode 100644 index 00000000..c6f87536 --- /dev/null +++ b/src/mac_limit.c @@ -0,0 +1,784 @@ +/*- + * Copyright (c) 2020, AT&T Intellectual Property. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * mac limit feature handling + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "feature_commands.h" +#include "mac_limit.h" +#include "if/bridge/bridge.h" +#include "protobuf.h" +#include "protobuf/MacLimitConfig.pb-c.h" + +static struct cds_lfht *mac_limit_profile_tbl; +static struct cds_list_head *mac_limit_list; + +#define MAC_LIMIT_PROFILE_TABLE_MIN 8 +#define MAC_LIMIT_PROFILE_TABLE_MAX 1024 + +struct mac_limit_profile { + struct cds_lfht_node mlp_node; + struct cds_list_head mlp_list; + struct rcu_head mlp_rcu; + char *mlp_name; + uint32_t mlp_limit; +}; + +struct mac_limit_entry { + struct cds_list_head mle_list; + struct cds_list_head mle_profile_list; + struct ifnet *mle_ifp; + uint16_t mle_vlan; + struct mac_limit_profile *mle_profile; + struct rcu_head mle_rcu; +}; + +static bool mac_limit_check_vlan(struct ifnet *ifp, uint16_t vlan) +{ + return ifp->if_brport && + bridge_port_is_vlan_member(ifp->if_brport, vlan); +} + +static int mac_limit_fal_apply(struct mac_limit_entry *entry, + bool update) +{ + int rv = 0; + uint32_t limit; + struct if_vlan_feat *vlan_feat; + uint16_t vlan = entry->mle_vlan; + struct ifnet *ifp = entry->mle_ifp; + struct mac_limit_profile *profile = entry->mle_profile; + + /* + * If vlan does not yet exist, we'll handle it when created. + */ + if (!mac_limit_check_vlan(ifp, vlan)) + return 0; + + DP_DEBUG(MAC_LIMIT, DEBUG, MAC_LIMIT, + "%s update %d int %s profile %s limit %d\n", + __func__, update, ifp->if_name, profile->mlp_name, + profile->mlp_limit); + + limit = profile->mlp_limit; + + struct fal_attribute_t vlan_attr[3] = { + { .id = FAL_VLAN_FEATURE_INTERFACE_ID, + .value.u32 = entry->mle_ifp->if_index }, + { .id = FAL_VLAN_FEATURE_VLAN_ID, + .value.u16 = vlan }, + { .id = FAL_VLAN_FEATURE_ATTR_MAC_LIMIT, + .value.u32 = limit } + }; + + vlan_feat = if_vlan_feat_get(ifp, vlan); + if (!vlan_feat) { + DP_DEBUG(MAC_LIMIT, DEBUG, MAC_LIMIT, + "Create vlan feature for Intf: %s, vlan: %d\n", + ifp->if_name, vlan); + rv = if_vlan_feat_create(ifp, vlan, FAL_NULL_OBJECT_ID); + if (rv) { + RTE_LOG(ERR, MAC_LIMIT, + "Could not create VLAN feature block for intf %s, vlan %d\n", + ifp->if_name, vlan); + return rv; + } + vlan_feat = if_vlan_feat_get(ifp, vlan); + if (!vlan_feat) + return -ENOENT; + + rv = fal_vlan_feature_create(ARRAY_SIZE(vlan_attr), + vlan_attr, + &vlan_feat->fal_vlan_feat); + if (rv) { + if (rv != -EOPNOTSUPP) { + RTE_LOG(ERR, MAC_LIMIT, + "Could not create vlan_feat for vlan %d in fal (%d)\n", + vlan, rv); + if_vlan_feat_delete(ifp, vlan); + return rv; + } + } + } else { + DP_DEBUG(MAC_LIMIT, DEBUG, MAC_LIMIT, "Found vlan feature\n"); + rv = fal_vlan_feature_set_attr(vlan_feat->fal_vlan_feat, + &vlan_attr[2]); + if (rv) { + RTE_LOG(ERR, MAC_LIMIT, + "Could not associate mac limit for intf %s vlan %d\n", + ifp->if_name, vlan); + return rv; + } + } + if (!update) + vlan_feat->refcount++; + + return rv; +} + +static int mac_limit_fal_unapply(struct mac_limit_entry *entry) +{ + int rv = 0; + struct if_vlan_feat *vlan_feat; + uint16_t vlan = entry->mle_vlan; + struct ifnet *ifp = entry->mle_ifp; + + /* + * vlan may never have existed and so limit was never applied. + */ + if (!mac_limit_check_vlan(entry->mle_ifp, vlan)) + return 0; + + struct fal_attribute_t vlan_attr[1] = { + { .id = FAL_VLAN_FEATURE_ATTR_MAC_LIMIT, + .value.u32 = 0 } + }; + + /* + * Remove the vlan feature. + */ + vlan_feat = if_vlan_feat_get(ifp, vlan); + if (!vlan_feat) { + RTE_LOG(ERR, MAC_LIMIT, + "Could not find vlan feat for intf %s vlan %d\n", + ifp->if_name, vlan); + return -ENOENT; + } + + rv = fal_vlan_feature_set_attr(vlan_feat->fal_vlan_feat, + &vlan_attr[0]); + if (rv) { + RTE_LOG(ERR, MAC_LIMIT, + "Could not disassociate mac limit for intf %s vlan %d\n", + ifp->if_name, vlan); + return rv; + } + + vlan_feat->refcount--; + + if (vlan_feat && !vlan_feat->refcount) { + DP_DEBUG(MAC_LIMIT, DEBUG, MAC_LIMIT, "Remove vlan feature\n"); + rv = fal_vlan_feature_delete(vlan_feat->fal_vlan_feat); + if (rv) { + RTE_LOG(ERR, MAC_LIMIT, + "Could not destroy fal vlan feature obj for %s vlan %d (%d)\n", + ifp->if_name, vlan, rv); + return rv; + } + + rv = if_vlan_feat_delete(ifp, vlan); + if (rv) { + RTE_LOG(ERR, MAC_LIMIT, + "Could not destroy vlan feature obj for %s vlan %d (%d)\n", + ifp->if_name, vlan, rv); + return rv; + } + RTE_LOG(INFO, MAC_LIMIT, + "Destroyed vlan feature obj for %s vlan %d\n", + ifp->if_name, vlan); + } + + return rv; +} + +/* MAC limit profile functions*/ +static int mac_limit_setup_profile_table(void) +{ + mac_limit_profile_tbl = cds_lfht_new(MAC_LIMIT_PROFILE_TABLE_MIN, + MAC_LIMIT_PROFILE_TABLE_MIN, + MAC_LIMIT_PROFILE_TABLE_MAX, + CDS_LFHT_AUTO_RESIZE, + NULL); + if (!mac_limit_profile_tbl) { + RTE_LOG(ERR, MAC_LIMIT, + "Could not allocate mac limit profile table\n"); + return -ENOMEM; + } + return 0; +} + +static inline uint32_t mac_limit_profile_hash(const char *profile_name) +{ + int len = strlen(profile_name); + char copy[len+3]; + + memcpy(copy, profile_name, len); + return rte_jhash(copy, len, 0); +} + +static inline int mac_limit_profile_match_fn(struct cds_lfht_node *node, + const void *arg) +{ + const char *profile_name = arg; + const struct mac_limit_profile *profile; + + profile = caa_container_of(node, const struct mac_limit_profile, + mlp_node); + + if (strcmp(profile_name, profile->mlp_name) == 0) + return 1; + + return 0; +} + +static struct mac_limit_profile * +mac_limit_add_profile(const char *name) +{ + struct mac_limit_profile *profile = NULL; + unsigned long name_hash; + struct cds_lfht_node *ret_node; + int rc; + + if (!mac_limit_profile_tbl) { + rc = mac_limit_setup_profile_table(); + if (rc) { + RTE_LOG(ERR, MAC_LIMIT, + "Failed to add profile %s, no profile tbl\n", + name); + return NULL; + } + } + + profile = calloc(1, sizeof(*profile)); + if (!profile) { + RTE_LOG(ERR, MAC_LIMIT, + "Could not allocate mac limit profile %s\n", + name); + return NULL; + } + + profile->mlp_name = strdup(name); + if (!profile->mlp_name) { + free(profile); + RTE_LOG(ERR, MAC_LIMIT, + "Could not allocate mac limit profile %s\n", + name); + return NULL; + } + + cds_lfht_node_init(&profile->mlp_node); + name_hash = mac_limit_profile_hash(name); + ret_node = cds_lfht_add_unique(mac_limit_profile_tbl, name_hash, + mac_limit_profile_match_fn, name, + &profile->mlp_node); + + if (ret_node != &profile->mlp_node) { + free(profile->mlp_name); + free(profile); + profile = caa_container_of(ret_node, struct mac_limit_profile, + mlp_node); + DP_DEBUG(MAC_LIMIT, DEBUG, MAC_LIMIT, + "Found an existing profile %s (%lx)\n", + name, (unsigned long)profile); + } else { + CDS_INIT_LIST_HEAD(&profile->mlp_list); + DP_DEBUG(MAC_LIMIT, DEBUG, MAC_LIMIT, + "Added profile %s (%lx)\n", name, + (unsigned long)profile); + } + + return profile; +} + + +static void mac_limit_free_profile(struct rcu_head *head) +{ + struct mac_limit_profile *profile; + + profile = caa_container_of(head, struct mac_limit_profile, mlp_rcu); + free(profile->mlp_name); + free(profile); +} + +static void mac_limit_delete_profile(struct mac_limit_profile *profile) +{ + RTE_LOG(INFO, MAC_LIMIT, "Delete profile %s\n", profile->mlp_name); + + cds_lfht_del(mac_limit_profile_tbl, &profile->mlp_node); + call_rcu(&profile->mlp_rcu, mac_limit_free_profile); +} + +static struct mac_limit_profile * +mac_limit_find_profile(const char *name) +{ + struct mac_limit_profile *profile = NULL; + struct cds_lfht_iter iter; + struct cds_lfht_node *node; + + if (!mac_limit_profile_tbl) + return NULL; + + cds_lfht_lookup(mac_limit_profile_tbl, + mac_limit_profile_hash(name), + mac_limit_profile_match_fn, + name, &iter); + + node = cds_lfht_iter_get_node(&iter); + if (node) + profile = caa_container_of(node, struct mac_limit_profile, + mlp_node); + + return profile; +} + +static void +mac_limit_profile_set_limit(struct mac_limit_profile *profile, uint32_t limit) +{ + struct mac_limit_entry *entry; + + profile->mlp_limit = limit; + + DP_DEBUG(MAC_LIMIT, DEBUG, MAC_LIMIT, + "mac limit profile %s %s limit %d\n", + profile->mlp_name, limit ? "set" : "delete", + profile->mlp_limit); + + /* For all the places where the profile is bound */ + cds_list_for_each_entry_rcu(entry, &profile->mlp_list, + mle_profile_list) { + if (limit == 0) + mac_limit_fal_unapply(entry); + else + mac_limit_fal_apply(entry, true); + } +} + +static int mac_limit_set_profile(MacLimitConfig__MacLimitProfileConfig *cfg) +{ + char *profile_name; + struct mac_limit_profile *profile; + bool set = cfg->action == MAC_LIMIT_CONFIG__ACTION__SET; + + profile_name = cfg->profile; + if (!profile_name) { + RTE_LOG(ERR, MAC_LIMIT, + "Missing profile name in profile update\n"); + return 0; + } + + profile = mac_limit_find_profile(profile_name); + if (!profile) { + if (set) { + profile = mac_limit_add_profile(profile_name); + if (!profile) { + RTE_LOG(INFO, MAC_LIMIT, + "Could not create mac limit profile %s\n", + profile_name); + return -ENOMEM; + } + } else { + RTE_LOG(INFO, MAC_LIMIT, + "Could not find profile %s\n", profile_name); + return -ENOENT; + } + } + + if (!set) + mac_limit_profile_set_limit(profile, 0); + else + mac_limit_profile_set_limit(profile, cfg->limit); + + if (!set) { + if (!cds_list_empty(&profile->mlp_list)) { + DP_DEBUG(MAC_LIMIT, DEBUG, MAC_LIMIT, + "Not deleting profile %s, list not EMPTY\n", + profile->mlp_name); + return 0; + } + /* Delete the profile if it is not referred to by anything */ + mac_limit_delete_profile(profile); + } + + return 0; +} + +/* + * MAC limit Entry functions + */ +static struct mac_limit_entry *mle_find_entry(struct ifnet *ifp, + uint16_t vlan) +{ + struct mac_limit_entry *entry; + struct mac_limit_entry *next; + + if (!mac_limit_list) + return NULL; + + cds_list_for_each_entry_safe(entry, next, mac_limit_list, mle_list) { + if ((entry->mle_ifp == ifp) && (entry->mle_vlan == vlan)) + return entry; + } + return NULL; +} + +static struct mac_limit_entry *mle_add_entry(struct ifnet *ifp, + uint16_t vlan) +{ + struct mac_limit_entry *entry; + + if (!mac_limit_list) { + mac_limit_list = calloc(1, sizeof(*mac_limit_list)); + if (!mac_limit_list) + return NULL; + + CDS_INIT_LIST_HEAD(mac_limit_list); + } + entry = calloc(1, sizeof(*entry)); + if (!entry) { + RTE_LOG(ERR, MAC_LIMIT, + "Failed to alloc mac limit_list entry"); + return NULL; + } + entry->mle_vlan = vlan; + entry->mle_ifp = ifp; + cds_list_add_tail(&entry->mle_list, mac_limit_list); + DP_DEBUG(MAC_LIMIT, DEBUG, MAC_LIMIT, + "Allocated entry %lx for Intf: %s, vlan: %d\n", + (unsigned long)entry, ifp->if_name, vlan); + return entry; +} + +static void mle_entry_free(struct rcu_head *head) +{ + struct mac_limit_entry *entry; + + entry = caa_container_of(head, struct mac_limit_entry, mle_rcu); + free(entry); +} + +static void mle_delete_entry(struct mac_limit_entry *entry) +{ + if (!entry) + return; + + cds_list_del(&entry->mle_list); + cds_list_del(&entry->mle_profile_list); + DP_DEBUG(MAC_LIMIT, DEBUG, MAC_LIMIT, "Freeing entry %lx\n", + (unsigned long)entry); + call_rcu(&entry->mle_rcu, mle_entry_free); +} + +/* + * mac-limit + */ +static int mac_limit_set_intf_cfg(MacLimitConfig__MacLimitIfVLANConfig *cfg) +{ + bool set = cfg->action == MAC_LIMIT_CONFIG__ACTION__SET; + char *ifname, *pname; + struct ifnet *ifp = NULL; + struct mac_limit_entry *entry = NULL; + struct mac_limit_profile *profile = NULL; + uint16_t vlan; + bool update = false; + + ifname = cfg->ifname; + vlan = cfg->vlan; + pname = cfg->profile; + + DP_DEBUG(MAC_LIMIT, DEBUG, MAC_LIMIT, + "set_intf_cfg: %s intf %s vlan %u profile %s\n", + set ? "Set" : "Delete", + ifname, vlan, pname); + + ifp = dp_ifnet_byifname(ifname); + if (!ifp) { + RTE_LOG(ERR, MAC_LIMIT, "No interface %s\n", ifname); + return -1; + } + + entry = mle_find_entry(ifp, vlan); + + if (set) { + profile = mac_limit_find_profile(pname); + if (!profile) { + RTE_LOG(ERR, MAC_LIMIT, "Invalid profile %s\n", pname); + return -1; + } + if (entry) { + update = true; + /* + * Existing entry. If the profile name differs from the + * one we are adding, need to undo the existing one + * first. + */ + if (entry->mle_profile != profile) { + cds_list_del(&entry->mle_profile_list); + entry->mle_profile = NULL; + } + } else { + entry = mle_add_entry(ifp, vlan); + if (!entry) + return -1; + } + entry->mle_profile = profile; + cds_list_add_rcu(&entry->mle_profile_list, + &profile->mlp_list); + mac_limit_fal_apply(entry, update); + } else { + /* Nothing to delete */ + if (!entry) + return 0; + + if (entry->mle_profile->mlp_limit) + mac_limit_fal_unapply(entry); + + mle_delete_entry(entry); + } + + return 0; +} + +/* + * mac-limit SET profile + * mac-limit DELETE profile + * + * mac-limit SET + * mac-limit DELETE + * + */ +static int +cmd_mac_limit_cfg(struct pb_msg *msg) +{ + MacLimitConfig *mlmsg = mac_limit_config__unpack(NULL, msg->msg_len, + msg->msg); + int ret; + + if (!mlmsg) { + RTE_LOG(ERR, DATAPLANE, + "failed to read MacLimitConfig protobuf command\n"); + return -1; + } + + switch (mlmsg->mtype_case) { + case MAC_LIMIT_CONFIG__MTYPE_PROFILE: + ret = mac_limit_set_profile(mlmsg->profile); + break; + case MAC_LIMIT_CONFIG__MTYPE_IFVLAN: + ret = mac_limit_set_intf_cfg(mlmsg->ifvlan); + break; + default: + RTE_LOG(INFO, MAC_LIMIT, + "unhandled MacLimitConfig message type %d\n", + mlmsg->mtype_case); + ret = 0; + break; + } + + mac_limit_config__free_unpacked(mlmsg, NULL); + return ret; +} + +PB_REGISTER_CMD(maclimit_cmd) = { + .cmd = "vyatta:maclimit", + .handler = cmd_mac_limit_cfg, +}; + +static int mac_limit_entry_get_count(struct mac_limit_entry *entry) +{ + uint16_t vlan; + struct ifnet *ifp; + struct if_vlan_feat *vlan_feat; + struct fal_attribute_t vlan_attr; + + vlan = entry->mle_vlan; + ifp = entry->mle_ifp; + vlan_attr.id = FAL_VLAN_FEATURE_ATTR_MAC_COUNT; + + vlan_feat = if_vlan_feat_get(ifp, vlan); + if (!vlan_feat) { + RTE_LOG(ERR, MAC_LIMIT, + "Failed to retrieve mac count for intf %s vlan %d\n", + ifp->if_name, vlan); + return 0; + } + if (!fal_vlan_feature_get_attr(vlan_feat->fal_vlan_feat, 1, + &vlan_attr)) + return vlan_attr.value.u32; + + return 0; +} + +/* + * Dump all structures or specific info. + */ +static void mac_limit_dump(FILE *f, const char *intf, + uint16_t vlan, const char *profile) +{ + struct mac_limit_entry *entry; + struct mac_limit_entry *next; + struct mac_limit_profile *instance; + struct cds_lfht_iter iter; + json_writer_t *wr; + + if (!intf || !profile) + return; + + if (!mac_limit_profile_tbl) + return; + + if (f == NULL) + f = stderr; + + wr = jsonw_new(f); + jsonw_name(wr, "mac-limit"); + jsonw_start_object(wr); + if (strcmp(intf, "none") != 0) { + jsonw_name(wr, "instance"); + if (mac_limit_list) { + jsonw_start_array(wr); + cds_list_for_each_entry_safe(entry, next, + mac_limit_list, mle_list) { + if (!strcmp(intf, "all") || + (!strcmp(intf, + entry->mle_ifp->if_name) && + entry->mle_vlan == vlan)) { + jsonw_start_object(wr); + jsonw_string_field( + wr, "interface", + entry->mle_ifp->if_name); + jsonw_uint_field(wr, "vlan", + entry->mle_vlan); + jsonw_string_field( + wr, "profile", + entry->mle_profile->mlp_name); + jsonw_end_object(wr); + } + } + jsonw_end_array(wr); + } + } + + if (strcmp(profile, "none") != 0) { + jsonw_name(wr, "profile"); + jsonw_start_array(wr); + cds_lfht_for_each_entry(mac_limit_profile_tbl, &iter, + instance, mlp_node) { + if (!strcmp(profile, "all") + || !strcmp(instance->mlp_name, profile)) { + jsonw_start_object(wr); + jsonw_string_field(wr, "name", + instance->mlp_name); + jsonw_uint_field(wr, "limit", + instance->mlp_limit); + jsonw_end_object(wr); + } + } + jsonw_end_array(wr); + } + jsonw_end_object(wr); + jsonw_destroy(&wr); +} + +/* + * mac-limit show status + * mac-limit dump (internal use) + */ +int cmd_mac_limit_op(FILE *f, int argc, char **argv) +{ + int count; + char *ifname; + uint16_t vlan; + struct ifnet *ifp; + json_writer_t *wr; + struct mac_limit_profile *mlp; + struct mac_limit_entry *mac_limit; + + if (argc < 5) + goto error; + + if (!strcmp(argv[1], "dump")) { + mac_limit_dump(f, argv[2], atoi(argv[3]), argv[4]); + return 0; + } + + if ((strcmp(argv[1], "show") != 0) || (strcmp(argv[2], "status") != 0)) + goto error; + + ifname = argv[3]; + vlan = atoi(argv[4]); + + ifp = dp_ifnet_byifname(ifname); + if (!ifp) { + fprintf(f, "No interface %s\n", ifname); + return -1; + } + + mac_limit = mle_find_entry(ifp, vlan); + if (!mac_limit) { + fprintf(f, "No mac-limit configuration found for %s %d\n", + ifname, vlan); + return -1; + } + + mlp = mac_limit->mle_profile; + if (mlp == NULL) { + fprintf(f, "Failed to find profile intf:%s, vlan %d\n", + ifname, vlan); + return -1; + } + + count = mac_limit_entry_get_count(mac_limit); + wr = jsonw_new(f); + jsonw_name(wr, "statistics"); + jsonw_start_object(wr); + jsonw_uint_field(wr, "limit", mlp->mlp_limit); + jsonw_uint_field(wr, "count", count); + jsonw_end_object(wr); + jsonw_destroy(&wr); + + return 0; + +error: + fprintf(f, "Usage: mac-limit show "); + return -1; +} + +static void +mac_limit_if_vlan_add(struct ifnet *ifp, uint16_t vlan) +{ + struct mac_limit_entry *entry; + + entry = mle_find_entry(ifp, vlan); + if (!entry) + return; + + DP_DEBUG(MAC_LIMIT, DEBUG, MAC_LIMIT, + "%s: Found entry for intf %s, vlan %d\n", + __func__, ifp->if_name, vlan); + + mac_limit_fal_apply(entry, false); +} + +static void +mac_limit_if_vlan_del(struct ifnet *ifp, uint16_t vlan) +{ + struct mac_limit_entry *entry; + + entry = mle_find_entry(ifp, vlan); + if (!entry) + return; + + DP_DEBUG(MAC_LIMIT, DEBUG, MAC_LIMIT, + "%s: Found entry for intf %s vlan %d\n", + __func__, ifp->if_name, vlan); + + mac_limit_fal_unapply(entry); +} + +static const struct dp_event_ops mac_limit_events = { + .if_vlan_add = mac_limit_if_vlan_add, + .if_vlan_del = mac_limit_if_vlan_del, +}; + +DP_STARTUP_EVENT_REGISTER(mac_limit_events); diff --git a/src/mac_limit.h b/src/mac_limit.h new file mode 100644 index 00000000..6cc4a0e9 --- /dev/null +++ b/src/mac_limit.h @@ -0,0 +1,14 @@ +/*- + * Copyright (c) 2020, AT&T Intellectual Property. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * MAC limit feature handling + */ +#ifndef MAC_LIMIT_H +#define MAC_LIMIT_H + +int cmd_mac_limit_op(FILE *f, int argc, char **argv); + +#endif diff --git a/src/main.c b/src/main.c index a714af5c..3401d349 100644 --- a/src/main.c +++ b/src/main.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2017 by Brocade Communications Systems, Inc. * All rights reserved. */ @@ -89,57 +89,50 @@ #include "address.h" #include "bitmask.h" -#include "bridge.h" #include "capture.h" #include "commands.h" #include "compat.h" #include "compiler.h" -#include "config.h" +#include "config_internal.h" #include "crypto/crypto_forward.h" #include "crypto/crypto_main.h" -#include "crypto/vti.h" #include "dp_event.h" #include "ether.h" -#include "event.h" +#include "event_internal.h" #include "fal.h" -#include "gre.h" +#include "feature_plugin_internal.h" +#include "if/dpdk-eth/dpdk_eth_if.h" +#include "if/dpdk-eth/dpdk_eth_linkwatch.h" +#include "if/dpdk-eth/vhost.h" #include "if_llatbl.h" #include "if_var.h" #include "ip_funcs.h" -#include "ip_mcast.h" #include "ip_ttl.h" #include "json_writer.h" #include "l2_rx_fltr.h" -#include "l2tp/l2tpeth.h" -#include "lag.h" -#include "macvlan.h" #include "main.h" -#include "master.h" +#include "controller.h" #include "mpls/mpls_label_table.h" #include "netinet6/ip6_funcs.h" #include "npf/fragment/ipv4_rsmbl.h" #include "npf_shim.h" #include "pipeline/pl_internal.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "portmonitor/portmonitor.h" #include "power.h" #include "qos.h" +#include "rcu.h" #include "route.h" #include "session/session.h" -#include "shadow.h" +#include "lcore_sched.h" +#include "lcore_sched_internal.h" #include "udp_handler.h" -#include "urcu.h" #include "util.h" #include "version.h" -#include "vhost.h" #include "vplane_debug.h" #include "vplane_log.h" -#include "vrf.h" -#include "vxlan.h" -#include "pipeline/nodes/pppoe/pppoe.h" +#include "vrf_internal.h" #include "backplane.h" -#include "vlan_modify.h" -#include "dpdk_eth_if.h" packet_input_t packet_input_func __hot_data = ether_input_no_dyn_feats; @@ -160,8 +153,8 @@ packet_input_t packet_input_func __hot_data = ether_input_no_dyn_feats; * 2. Rx_desc must be power of 2 */ -/* For bond interface, maximum number of slave interfaces */ -#define DATAPLANE_SLAVE_MULTIPLIER 2 +/* For bond interface, maximum number of member interfaces */ +#define DATAPLANE_MEMBER_MULTIPLIER 2 static struct rxtx_param *driver_param; @@ -215,6 +208,7 @@ struct lcore_conf { uint16_t high_txq; /* highest index assigned to tx_poll */ uint8_t tx_qid; /* my tx queue for multi-queue devices */ uint8_t do_crypto; /* thread is tasked with doing crypto */ + uint8_t crypto_fwd; /* post-crypto forwarding workload present */ /* receive queues this cpu should check for input */ struct lcore_rx_queue { @@ -252,6 +246,14 @@ struct lcore_conf { struct rate_stats rx_poll_stats[MAX_RX_QUEUE_PER_CORE]; struct rate_stats tx_poll_stats[MAX_TX_QUEUE_PER_CORE]; struct rate_stats crypt_stats; + struct rate_stats crypt_fwd_stats; + bool ded_to_feature; + + /* State for when a feature has registered to use this core */ + uint8_t do_feature; + struct dp_lcore_feat feat; + struct rate_stats feat_rx_stats; + struct rate_stats feat_tx_stats; } __rte_cache_aligned; static struct lcore_conf *lcore_conf[RTE_MAX_LCORE]; @@ -263,34 +265,58 @@ static const uint8_t NO_OWNER = 255; /* Port configuration */ static struct port_conf { - struct rte_ring *pkt_ring[MAX_TX_QUEUE_PER_PORT]; - int8_t socketid; /* NUMA socket */ - uint8_t rx_queues; - uint8_t tx_queues; - uint8_t nrings; - bool percoreq; - uint8_t max_rings; - uint16_t rx_desc; - uint16_t tx_desc; - uint16_t buffers; - uint32_t buf_size; - bitmask_t rx_cpu_affinity; - bitmask_t tx_cpu_affinity; - bitmask_t tx_enabled_queues; - bitmask_t rx_enabled_queues; - bool uses_queue_state; - - struct rte_mempool *rx_pool; /* Receive buffer pool */ - struct rte_eth_txconf tx_conf; - struct rte_eth_rxconf rx_conf; + struct rte_ring *pkt_ring[MAX_TX_QUEUE_PER_PORT]; /* 0 32 */ + uint8_t nrings; /* 32 1 */ + uint8_t max_rings; /* 33 1 */ + bool percoreq; /* 34 1 */ + + /* XXX 5 bytes hole, try to pack. */ + + bitmask_t tx_enabled_queues; /* 40 16 */ + bitmask_t rx_enabled_queues; /* 56 16 */ + + /* size: 128, cachlines: 2, members: 6 */ + /* sum members: 57, holes: 1, sum holes: 5 */ + /* padding: 56 */ } __rte_cache_aligned port_config[DATAPLANE_MAX_PORTS] __hot_data; +/* Port allocations */ +static struct port_alloc { + uint64_t dev_flags; /* 0 8 */ + uint32_t buf_size; /* 8 4 */ + uint16_t rx_desc; /* 12 2 */ + uint16_t tx_desc; /* 14 2 */ + uint32_t buffers; /* 16 4 */ + uint8_t rx_queues; /* 20 1 */ + uint8_t tx_queues; /* 20 1 */ + int8_t socketid; /* 22 1 */ + bool uses_queue_state; /* 23 1 */ + bitmask_t rx_cpu_affinity; /* 24 16 */ + bitmask_t tx_cpu_affinity; /* 40 16 */ + struct rte_eth_txconf tx_conf; /* 56 56 */ + /* --- cacheline 1 boundary (64 bytes) was 48 bytes ago --- */ + struct rte_eth_rxconf rx_conf; /* 112 48 */ + /* --- cacheline 2 boundary (128 bytes) was 32 bytes ago --- */ + enum rte_eth_rx_mq_mode rx_mq_mode; /* 160 4 */ + + /* XXX 4 bytes hole, try to packet */ + + struct rte_mempool *rx_pool; /* 168 8 */ + + /* size: 176, cachelines: 3, members: 15 */ + /* sum members: 172, holes: 1, sum holes: 4 */ + /* last cacheline: 48 bytes */ +} port_allocations[DATAPLANE_MAX_PORTS]; + /* Per socket mbuf pool */ static struct rte_mempool *numa_pool[RTE_MAX_NUMA_NODES]; /* Single CPU forwarding thread */ static pthread_t single_forward_thread; +/* DPDK owner for ports */ +struct rte_eth_dev_owner owner = { .id = RTE_ETH_DEV_NO_OWNER }; + /* Program name for log and usage message */ char *progname; @@ -300,28 +326,25 @@ volatile bool running = true; uid_t dataplane_uid; gid_t dataplane_gid; -static bitmask_t enabled_port_mask; /* port is admin UP */ +bitmask_t enabled_port_mask; /* port is valid */ bitmask_t poll_port_mask; /* should be polled */ /* port should be polled and is link up */ bitmask_t active_port_mask __hot_data; -uint64_t dp_debug = DP_DBG_DEFAULT; +uint16_t nb_ports_total; /* highest DPDK portid + 1 */ static bool daemon_mode; /* become daemon */ static unsigned int avail_cores; /* number of forwarding cores */ -static bool single_cpu; /* is dataplane running on uP */ -static const char *pid_file; /* record pid of master thread */ -static const char *config_file = VYATTA_SYSCONF_DIR"/dataplane.conf"; +bool single_cpu; /* is dataplane running on uP */ +static const char *pid_file; /* record pid of main thread */ static const char *drv_cfg_file = VYATTA_DATA_DIR"/dataplane-drivers-default.conf"; static const char *drv_override_cfg_file = VYATTA_SYSCONF_DIR"/dataplane-drivers.conf"; -static pthread_t master_pthread; +static pthread_t main_pthread; -/* Modified version of RTE_FOREACH_SLAVE which - * which accounts for case of uP - */ +/* Modified version of DPDK routine which accounts for case of uP. */ #define FOREACH_FORWARD_LCORE(i) \ for ((i) = rte_get_next_lcore(-1, !single_cpu, 0); \ (i) < RTE_MAX_LCORE; \ @@ -330,21 +353,20 @@ static pthread_t master_pthread; /* * Default Ethernet configuration * Modified as needed to support different MTU + * + * We may need to transmit a jumbo frame, or prepend to a + * cloned packet and both of these require multiple segment + * support for TX, so request it. */ static const struct rte_eth_conf eth_base_conf = { .rxmode = { .mq_mode = ETH_MQ_RX_RSS, - .max_rx_pkt_len = ETHER_MAX_LEN, + .max_rx_pkt_len = RTE_ETHER_MAX_LEN, .split_hdr_size = 0, -#if RTE_VERSION < RTE_VERSION_NUM(18,8,0,0) - .header_split = 0, /**< Header Split disabled */ - .hw_ip_checksum = 0, /**< IP checksum offload disabled */ - .hw_vlan_filter = 1, - .hw_vlan_strip = 1, - .jumbo_frame = 0, - .hw_strip_crc = 1, - .enable_scatter = 0, -#endif + }, + .txmode = { + .offloads = DEV_TX_OFFLOAD_MULTI_SEGS | + DEV_TX_OFFLOAD_VLAN_INSERT, }, .rx_adv_conf = { .rss_conf = { @@ -411,19 +433,19 @@ eth_tx_burst(struct ifnet *ifp, uint16_t queue_id, } /* Used to send burst of packets when only one queue available. - * Since multiple pthreads run on master core, need a mutex. + * Since multiple pthreads run on main core, need a mutex. */ static int -master_eth_tx(struct ifnet *ifp, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +main_eth_tx(struct ifnet *ifp, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { int ret; - static pthread_mutex_t master_tx_lock = PTHREAD_MUTEX_INITIALIZER; + static pthread_mutex_t main_tx_lock = PTHREAD_MUTEX_INITIALIZER; eth_tx_run_post_qos_features(ifp, tx_pkts, nb_pkts); - pthread_mutex_lock(&master_tx_lock); + pthread_mutex_lock(&main_tx_lock); ret = rte_eth_tx_burst(ifp->if_port, 0, tx_pkts, nb_pkts); - pthread_mutex_unlock(&master_tx_lock); + pthread_mutex_unlock(&main_tx_lock); return ret; } @@ -500,7 +522,7 @@ static void pkt_burst_init(unsigned int lcore_id, uint16_t qid) } -void pkt_burst_free(void) +void dp_pkt_burst_free(void) { unsigned int lcore_id = rte_lcore_id(); @@ -511,7 +533,7 @@ void pkt_burst_free(void) rte_free(RTE_PER_LCORE(pkt_burst)); } -void pkt_burst_setup(void) +void dp_pkt_burst_setup(void) { unsigned int lcore_id = rte_lcore_id(); @@ -602,7 +624,7 @@ static __hot_func void pkt_ring_drain(void) crypto_send(cpb); } -static __hot_func +ALWAYS_INLINE __hot_func void pkt_ring_output(struct ifnet *ifp, struct rte_mbuf *m) { portid_t portid = ifp->if_port; @@ -630,8 +652,7 @@ void pkt_ring_output(struct ifnet *ifp, struct rte_mbuf *m) if (likely(pb != NULL)) { if (unlikely(ifp->portmonitor) && - __use_directpath(pb->port, - ifp->qos_software_fwd)) + __use_directpath(portid, ifp->qos_software_fwd)) portmonitor_src_phy_tx_output(ifp, &m, 1); /* If changing flows to another port */ @@ -651,7 +672,7 @@ void pkt_ring_output(struct ifnet *ifp, struct rte_mbuf *m) if (unlikely(ifp->portmonitor)) portmonitor_src_phy_tx_output(ifp, &m, 1); - if (!master_eth_tx(ifp, &m, 1)) + if (!main_eth_tx(ifp, &m, 1)) goto full_hwq; } else { /* must be lcore 0 */ @@ -672,10 +693,9 @@ full_txring: __cold_label; full_hwq: __cold_label; if_incr_full_hwq(ifp, 1); rte_pktmbuf_free(m); - return; } -void pkt_burst_flush(void) +void dp_pkt_burst_flush(void) { unsigned int lcore_id = rte_lcore_id(); @@ -688,145 +708,6 @@ void pkt_burst_flush(void) pkt_ring_burst(pb, true); } -static struct rte_mbuf * -if_output_features(struct ifnet *ifp, struct rte_mbuf **m) -{ - if (unlikely(ifp->vlan_modify)) - if (unlikely(!vlan_modify_egress(ifp, m))) - return NULL; - - if (unlikely(ifp->portmonitor)) - portmonitor_src_vif_tx_output(ifp, m); - - if (unlikely(ifp->capturing) && - capture_if_use_common_cap_points(ifp)) - capture_burst(ifp, m, 1); - - return *m; -} - -static void unsup_tunnel_output(struct ifnet *ifp, struct rte_mbuf *m, - struct ifnet *input_ifp, uint16_t proto) -{ - if (!input_ifp) { - rte_pktmbuf_free(m); - if_incr_dropped(ifp); - return; - } - - switch (proto) { - case ETH_P_IP: - /* - * Assume the packet has been forwarded and thus its - * ttl has been decremented. - */ - increment_ttl(iphdr(m)); - ip_local_deliver(ifp, m); - break; - case ETH_P_IPV6: - ip6hdr(m)->ip6_hlim += IPV6_HLIMDEC; - ip6_local_deliver(ifp, m); - break; - default: - local_packet(ifp, m); - break; - } -} - -/* Packet on virtual feature point */ -static void vfp_output(struct ifnet *ifp, struct rte_mbuf *m, - struct ifnet *input_ifp, uint16_t proto) -{ - struct vfp_softc *vsc = ifp->if_softc; - - switch (vsc->vfp_type) { - case VFP_S2S_CRYPTO: - crypto_policy_post_features_outbound(ifp, input_ifp, m, proto); - break; - case VFP_NONE: - /* Packet on loopback shouldn't reach here */ - assert(0); - rte_pktmbuf_free(m); - if_incr_dropped(ifp); - break; - } -} - -/* - * Transmit one packet - * - * The expectation is that for !IFF_NOARP interfaces then the packet - * will be properly L2 encapsulated at this point such that it can be - * sent to the L2 neighbour. - * - * For IFF_NOARP interfaces then the packet will be L2 encapsulated - * during send. - * - * The reason for this asymmetry is to keep the address resolution - * above this layer for multipoint interfaces, yet to keep things - * simple and fast for point-to-point interfaces to avoid needing to - * perform an extra encap step before calling this function. - * - * The proto passed in is the link-layer protocol used for - * point-to-point interfaces. - */ -__hot_func __rte_cache_aligned -void if_output(struct ifnet *ifp, struct rte_mbuf *m, - struct ifnet *input_ifp, uint16_t proto) -{ - uint16_t rx_vlan = pktmbuf_get_rxvlanid(m); - - if (ifp->if_type == IFT_L2VLAN) { - if_add_vlan(ifp, &m); - - if (!if_output_features(ifp, &m)) - goto out; - - ifp = ifp->if_parent; - - /* for the case where original ifp was for QinQ */ - if (ifp->if_type == IFT_L2VLAN) { - if (!if_output_features(ifp, &m)) - goto out; - ifp = ifp->if_parent; - } - } - - if (!if_output_features(ifp, &m)) - goto out; - - if (likely(ifp->if_type == IFT_ETHER)) - pkt_ring_output(ifp, m); - else if (ifp->if_type == IFT_BRIDGE) - bridge_output(ifp, m, input_ifp); - else if (ifp->if_type == IFT_VXLAN) - vxlan_output(ifp, m, proto); - else if (ifp->if_type == IFT_L2TPETH) - l2tp_output(ifp, m, rx_vlan); - else if (ifp->if_type == IFT_TUNNEL_GRE) - gre_tunnel_send(input_ifp, ifp, m, proto); - else if (ifp->if_type == IFT_TUNNEL_VTI) - vti_tunnel_out(input_ifp, ifp, m, proto); - else if (ifp->if_type == IFT_PPP) - ppp_tunnel_output(ifp, m, input_ifp, proto); - else if (ifp->if_type == IFT_TUNNEL_OTHER) - unsup_tunnel_output(ifp, m, input_ifp, proto); - else if (ifp->if_type == IFT_LOOP) - vfp_output(ifp, m, input_ifp, proto); - else if (ifp->if_type == IFT_MACVLAN) - macvlan_output(ifp, m, input_ifp, proto); - else { - /* - * Packets for other interface types shouldn't reach - * this point. - */ -out: - assert(0); - rte_pktmbuf_free(m); - if_incr_dropped(ifp); - } -} - static __hot_func void process_burst(portid_t portid, struct rte_mbuf *pkts[], uint16_t nb) { @@ -1138,7 +1019,9 @@ forwarding_loop(unsigned int lcore_id) enum lcore_state state; RTE_PER_LCORE(_dp_lcore_id) = lcore_id; - dp_crypto_per_lcore_init(lcore_id); + dp_lcore_events_init(lcore_id); + + crypto_create_fwd_queue(lcore_id); pkt_burst_init(lcore_id, conf->tx_qid); @@ -1151,11 +1034,11 @@ forwarding_loop(unsigned int lcore_id) conf->do_crypto ? "and crypto " : "", lcore_id); /* Each thread containing read-side critical sections must be registered - * with rcu_register_thread() before calling rcu_read_lock(). + * with rcu_register_thread() before calling dp_rcu_read_lock(). */ - rcu_register_thread(); + dp_rcu_register_thread(); do { - rcu_read_lock(); + dp_rcu_read_lock(); pm = get_current_pm(); for (i = 0; i < pm->idle_thresh ; i++) { @@ -1165,6 +1048,8 @@ forwarding_loop(unsigned int lcore_id) process_crypto(conf); if (CMM_LOAD_SHARED(conf->num_txq) > 0) poll_transmit_queues(conf); + if (CMM_LOAD_SHARED(conf->crypto_fwd)) + crypto_fwd_processed_packets(); } /* Move leftover packets */ @@ -1172,7 +1057,7 @@ forwarding_loop(unsigned int lcore_id) state = lcore_next_state(conf, pm, &us); - rcu_read_unlock(); + dp_rcu_read_unlock(); switch (state) { case LCORE_STATE_EXIT: @@ -1180,22 +1065,23 @@ forwarding_loop(unsigned int lcore_id) lcore_id); break; case LCORE_STATE_POLL: - rcu_quiescent_state(); + dp_rcu_quiescent_state(lcore_id); break; case LCORE_STATE_POWERSAVE: - rcu_quiescent_state(); + dp_rcu_quiescent_state(lcore_id); usleep(us); break; case LCORE_STATE_IDLE: - rcu_thread_offline(); + dp_rcu_thread_offline(); sleep(LCORE_IDLE_SLEEP_SECS); - rcu_thread_online(); + dp_rcu_thread_online(); break; } } while (likely(state != LCORE_STATE_EXIT)); - rcu_unregister_thread(); + dp_rcu_unregister_thread(); - pkt_burst_free(); + dp_lcore_events_teardown(lcore_id); + dp_pkt_burst_free(); RTE_LOG(DEBUG, DATAPLANE, "stopped core %d\n", lcore_id); @@ -1213,7 +1099,18 @@ launch_one_lcore(void *arg __unused) RTE_LOG(DEBUG, DATAPLANE, "start core %d\n", lcore); - forwarding_loop(lcore); + renice(-20); + + if (CMM_LOAD_SHARED(lcore_conf[lcore]->do_feature)) { + RTE_PER_LCORE(_dp_lcore_id) = lcore; + dp_lcore_events_init(lcore); + + lcore_conf[lcore]->feat.dp_lcore_feat_fn(lcore, NULL); + + dp_lcore_events_teardown(lcore); + } else { + forwarding_loop(lcore); + } return 0; } @@ -1233,6 +1130,7 @@ int reconfigure_queues(portid_t portid, uint16_t nb_rx_queues, uint16_t nb_tx_queues) { struct port_conf *port_conf = &port_config[portid]; + struct port_alloc *port_alloc = &port_allocations[portid]; struct rte_eth_dev *eth_dev = &rte_eth_devices[portid]; struct rte_eth_conf dev_conf; int ret; @@ -1240,7 +1138,7 @@ int reconfigure_queues(portid_t portid, /* The device may have changed its configuration since * we last configured. This is typical for bonding which - * must use a subset of the capabilities of the slaves. + * must use a subset of the capabilities of the members. */ memcpy(&dev_conf, ð_dev->data->dev_conf, sizeof(dev_conf)); @@ -1253,38 +1151,39 @@ int reconfigure_queues(portid_t portid, goto out; } - port_conf->rx_queues = nb_rx_queues; - port_conf->tx_queues = nb_tx_queues; + port_alloc->rx_queues = nb_rx_queues; + port_alloc->tx_queues = nb_tx_queues; bitmask_zero(&port_conf->tx_enabled_queues); bitmask_zero(&port_conf->rx_enabled_queues); - for (q = 0; q < port_conf->tx_queues; q++) + for (q = 0; q < port_alloc->tx_queues; q++) bitmask_set(&port_conf->tx_enabled_queues, q); - for (q = 0; q < port_conf->rx_queues; q++) + for (q = 0; q < port_alloc->rx_queues; q++) bitmask_set(&port_conf->rx_enabled_queues, q); out: return ret; } -static int +int eth_port_configure(portid_t portid, struct rte_eth_conf *dev_conf) { - struct port_conf *port_conf = &port_config[portid]; + struct port_alloc *port_alloc = &port_allocations[portid]; uint16_t queueid; int ret; int8_t socketid; - if (port_conf->socketid == SOCKET_ID_ANY) + if (port_alloc->socketid == SOCKET_ID_ANY) socketid = 0; else - socketid = port_conf->socketid; + socketid = port_alloc->socketid; DP_DEBUG(INIT, DEBUG, DATAPLANE, "Configure port %u (rxq %u, txq %u, socket %d)\n", - portid, port_conf->rx_queues, port_conf->tx_queues, socketid); + portid, port_alloc->rx_queues, + port_alloc->tx_queues, socketid); - ret = rte_eth_dev_configure(portid, port_conf->rx_queues, - port_conf->tx_queues, dev_conf); + ret = rte_eth_dev_configure(portid, port_alloc->rx_queues, + port_alloc->tx_queues, dev_conf); if (ret < 0) { RTE_LOG(ERR, DATAPLANE, "Cannot configure device: err=%d, port=%u\n", @@ -1292,11 +1191,11 @@ eth_port_configure(portid_t portid, struct rte_eth_conf *dev_conf) return -1; } - for (queueid = 0; queueid < port_conf->rx_queues; ++queueid) { + for (queueid = 0; queueid < port_alloc->rx_queues; ++queueid) { ret = rte_eth_rx_queue_setup(portid, queueid, - port_conf->rx_desc, - socketid, &port_conf->rx_conf, - port_conf->rx_pool); + port_alloc->rx_desc, + socketid, &port_alloc->rx_conf, + port_alloc->rx_pool); if (ret < 0) { RTE_LOG(ERR, DATAPLANE, "rte_eth_rx_queue_setup: err=%d, port=%u\n", @@ -1305,10 +1204,10 @@ eth_port_configure(portid_t portid, struct rte_eth_conf *dev_conf) } } - for (queueid = 0; queueid < port_conf->tx_queues; ++queueid) { + for (queueid = 0; queueid < port_alloc->tx_queues; ++queueid) { ret = rte_eth_tx_queue_setup(portid, queueid, - port_conf->tx_desc, - socketid, &port_conf->tx_conf); + port_alloc->tx_desc, + socketid, &port_alloc->tx_conf); if (ret < 0) { RTE_LOG(ERR, DATAPLANE, "rte_eth_tx_queue_setup: err=%d, port=%u\n", @@ -1320,167 +1219,6 @@ eth_port_configure(portid_t portid, struct rte_eth_conf *dev_conf) return 0; } -static void reconfigure_slave(struct ifnet *ifp, void *arg) -{ - struct rte_eth_conf *conf = arg; - struct rte_eth_conf *slave_conf; - struct rte_eth_dev *slave_dev; - - /* Ensure slave is stopped as stopping master does not do this */ - rte_eth_dev_stop(ifp->if_port); - - /* - * Update slave config to match the master jumbo config - * so that it will accept a jumbo mtu change. - * Leave everything else alone. - * When the master is restarted, it will configure the slave, - * set up its queues, and start it, so don't call - * rte_eth_dev_configure() directly here. - */ - slave_dev = &rte_eth_devices[ifp->if_port]; - slave_conf = &slave_dev->data->dev_conf; -#if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,0) - if (conf->rxmode.offloads & DEV_RX_OFFLOAD_SCATTER) - slave_conf->rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER; - else - slave_conf->rxmode.offloads &= ~(DEV_RX_OFFLOAD_SCATTER); - if (conf->rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) - slave_conf->rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; - else - slave_conf->rxmode.offloads &= ~(DEV_RX_OFFLOAD_JUMBO_FRAME); -#else - slave_conf->rxmode.enable_scatter = conf->rxmode.enable_scatter; - slave_conf->rxmode.jumbo_frame = conf->rxmode.jumbo_frame; -#endif -} - -/* - * Reconfigure a port, stopping the port if necessary and - * performing any necessary work after restarting the port. - * - * reconfigure_port_cb can be used to perform any additional - * operations before the port is restarted. - */ -int reconfigure_port(struct ifnet *ifp, - struct rte_eth_conf *dev_conf, - reconfigure_port_cb_fn reconfigure_port_cb) -{ - portid_t portid = ifp->if_port; - int err; - struct rte_eth_dev *dev = &rte_eth_devices[portid]; - int dev_started = dev->data->dev_started; - - if (dev_started) - stop_port(ifp->if_port); - - err = eth_port_configure(portid, dev_conf); - - if (!err && reconfigure_port_cb) - err = reconfigure_port_cb(ifp, dev_conf); - - /* - * If we brought the port down then bring it back up, even if there - * was an error. - */ - if (dev_started) { - start_port(ifp->if_port, ifp->if_flags); - if (is_team(ifp)) - lag_refresh_actor_state(ifp); - /* Reprogram HW multicast filter after restarting port */ - l2_rx_fltr_state_change(ifp); - } - - if (err && reconfigure_port_cb) - /* - * Try again if it failed when the port was down. Some - * drivers such as igb require the port to be brought - * back up after the jumbo cfg is set before the mtu - * can be set into the jumbo range. - */ - err = reconfigure_port_cb(ifp, dev_conf); - - return err; -} - -static int reconfigure_pkt_len_cb(struct ifnet *ifp, - struct rte_eth_conf *dev_conf) -{ - int err; - - /* Reconfigure slaves to match master jumbo config */ - if (is_team(ifp)) - lag_walk_bond_slaves(ifp, reconfigure_slave, dev_conf); - - err = rte_eth_dev_set_mtu(ifp->if_port, ifp->if_mtu_adjusted); - if (err == -ENOTSUP) - err = 0; - - return err; -} - -/* Change hardware MTU, can only be called if stopped. */ -int reconfigure_pkt_len(struct ifnet *ifp, uint32_t mtu) -{ - struct rte_eth_conf dev_conf; - struct rte_eth_dev *eth_dev = &rte_eth_devices[ifp->if_port]; - - memcpy(&dev_conf, ð_dev->data->dev_conf, sizeof(dev_conf)); - -#if RTE_VERSION >= RTE_VERSION_NUM(18,8,0,0) - if (mtu > ETHER_MTU) { - struct rte_eth_dev_info dev_info; - rte_eth_dev_info_get(ifp->if_port, &dev_info); - if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_JUMBO_FRAME) - dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; - if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SCATTER) - dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER; - } else { - dev_conf.rxmode.offloads &= ~(DEV_RX_OFFLOAD_JUMBO_FRAME | - DEV_RX_OFFLOAD_SCATTER); - } -#else - if (mtu > ETHER_MTU) { - dev_conf.rxmode.jumbo_frame = 1; - dev_conf.rxmode.enable_scatter = 1; - } else { - dev_conf.rxmode.jumbo_frame = 0; - dev_conf.rxmode.enable_scatter = 0; - } -#endif - dev_conf.rxmode.max_rx_pkt_len = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; - - return reconfigure_port(ifp, &dev_conf, reconfigure_pkt_len_cb); -} - -void set_speed(struct ifnet *ifp, uint32_t link_speeds) -{ - struct rte_eth_conf dev_conf; - struct rte_eth_dev *eth_dev; - - if (ifp->if_type != IFT_ETHER) { - RTE_LOG(ERR, DATAPLANE, - "%s: %s not valid local port\n", - __func__, ifp->if_name); - return; - } - - if (ifp->unplugged) - return; - - eth_dev = &rte_eth_devices[ifp->if_port]; - memcpy(&dev_conf, ð_dev->data->dev_conf, sizeof(dev_conf)); - - if (dev_conf.link_speeds == link_speeds) - return; - - RTE_LOG(INFO, DATAPLANE, - "%s: setting %s to link_speeds 0x%x\n", - __func__, ifp->if_name, link_speeds); - - dev_conf.link_speeds = link_speeds; - reconfigure_port(ifp, &dev_conf, NULL); -} - uint64_t get_link_modes(struct ifnet *ifp) { struct rte_eth_dev *eth_dev; @@ -1518,13 +1256,28 @@ uint64_t get_link_modes(struct ifnet *ifp) return link_modes; } -/* Shutdown DPDK on port. */ -static void close_all_ports(void) +/* Shutdown all regular (not including backplane) DPDK ports */ +static void close_all_regular_ports(void) { unsigned int portid; for (portid = 0; portid < DATAPLANE_MAX_PORTS; ++portid) { - if (rte_eth_dev_is_valid_port(portid)) + if (bitmask_isset(&enabled_port_mask, portid) && + rte_eth_dev_is_valid_port(portid) && + !if_port_is_bkplane(portid)) + rte_eth_dev_close(portid); + } +} + +/* Shutdown all DPDK backplane ports */ +static void close_all_backplane_ports(void) +{ + unsigned int portid; + + for (portid = 0; portid < DATAPLANE_MAX_PORTS; ++portid) { + if (bitmask_isset(&enabled_port_mask, portid) && + rte_eth_dev_is_valid_port(portid) && + if_port_is_bkplane(portid)) rte_eth_dev_close(portid); } } @@ -1539,6 +1292,7 @@ usage(int status) " OPTIONS:\n" " -d, --daemon Run in daemon mode.\n" " -f, --file FILE Use configuration file\n" + " -F, --feat_plugin_dir Extra directory to check for feat plugins\n" " -h, --help Display this help and exit\n" " -i, --pid_file FILE Set process id file name\n" " -p, --port_mask PORTMASK Bitmask of ports to configure\n" @@ -1579,17 +1333,19 @@ parse_args(int argc, char **argv) { "port_mask", required_argument, NULL, 'p' }, { "daemon", no_argument, NULL, 'd' }, { "file", required_argument, NULL, 'f' }, + { "feat_plugin_dir", required_argument, NULL, 'F' }, { "user", required_argument, NULL, 'u' }, { "group", required_argument, NULL, 'g' }, { "debug", required_argument, NULL, 'D' }, { "console", required_argument, NULL, 'C' }, { "config", required_argument, NULL, 'c' }, + { "platform_file ", required_argument, NULL, 'P' }, { "list_cmd_versions", no_argument, NULL, ARGS_LIST_CMDS }, { "list_msg_versions", no_argument, NULL, ARGS_LIST_MSGS }, { NULL, 0, NULL, 0} }; - while ((opt = getopt_long(argc, argv, "hvdi:p:u:g:o:f:c:N:D:C:s", + while ((opt = getopt_long(argc, argv, "hvdi:p:u:g:o:f:c:N:D:C:sF:P:", lgopts, &option_index)) != EOF) { switch (opt) { @@ -1607,8 +1363,16 @@ parse_args(int argc, char **argv) enabled_port_mask = pm; break; + case 'P': + set_platform_cfg_file(optarg); + break; + case 'f': - config_file = optarg; + set_config_file(optarg); + break; + + case 'F': + set_feat_plugin_dir(optarg); break; case 'c': @@ -1641,7 +1405,6 @@ parse_args(int argc, char **argv) case 'v': printf("%s version %s\n", DATAPLANE_PROGNAME, DATAPLANE_VERSION); - printf("%s\n", DATAPLANE_COPYRIGHT); exit(0); case 'h': @@ -1655,6 +1418,7 @@ parse_args(int argc, char **argv) case 'D': dp_debug = strtoul(optarg, NULL, 0); + dp_debug_init = dp_debug; break; case ARGS_LIST_CMDS: @@ -1686,26 +1450,30 @@ static void init_rate_stats(struct rate_stats *stats) gettimeofday(&stats->last_time, NULL); } -void scale_rate_stats(struct rate_stats *stats, uint64_t *packets, - uint64_t *bytes) +void scale_rate_stats(struct rate_stats *stats, const uint64_t *packets, + const uint64_t *bytes) { struct timeval now, diff; uint64_t scaled; + uint64_t time_diff_usec; gettimeofday(&now, NULL); timersub(&now, &stats->last_time, &diff); stats->last_time = now; + + time_diff_usec = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; + if (time_diff_usec == 0) + time_diff_usec = 1; + /* scale the packts to reflect 1 second */ scaled = *packets - stats->last_packets; - scaled = (scaled * USEC_PER_SEC) / - (diff.tv_sec * USEC_PER_SEC + diff.tv_usec); + scaled = (scaled * USEC_PER_SEC) / time_diff_usec; stats->packet_rate = scaled; stats->last_packets = *packets; if (bytes) { scaled = *bytes - stats->last_bytes; - scaled = (scaled * USEC_PER_SEC) / - (diff.tv_sec * USEC_PER_SEC + diff.tv_usec); + scaled = (scaled * USEC_PER_SEC) / time_diff_usec; stats->byte_rate = scaled; stats->last_bytes = *bytes; } @@ -1723,7 +1491,7 @@ static void stop_one_cpu(unsigned int lcore) if (!single_cpu) { RTE_LOG(ERR, DATAPLANE, - "attempt to stop forwarding thread for master core in non-single-cpu case\n"); + "attempt to stop forwarding thread for main core in non-single-cpu case\n"); return; } @@ -1738,6 +1506,8 @@ static void stop_one_cpu(unsigned int lcore) RTE_LOG(ERR, DATAPLANE, "core %d wait failed\n", lcore); } + + crypto_destroy_fwd_queue(); } conf->running = false; } @@ -1751,7 +1521,7 @@ static void stop_cpus(void) const struct lcore_conf *conf = lcore_conf[lcore]; if (forwarding_or_crypto_engine_lcore(conf) || - !conf->running) + !conf->running || conf->ded_to_feature) continue; stop_one_cpu(lcore); @@ -1830,22 +1600,23 @@ void unassign_queues(portid_t portid) unassign_port_receive_queues(portid, conf); } - synchronize_rcu(); + dp_rcu_synchronize(); pkt_ring_empty(portid); stop_cpus(); } -static bool any_assigned_queues(portid_t portid) +void enable_crypto_fwd(unsigned int lcore) { - unsigned int lcore; + struct lcore_conf *conf = lcore_conf[lcore]; - FOREACH_FORWARD_LCORE(lcore) { - struct lcore_conf *conf = lcore_conf[lcore]; + CMM_STORE_SHARED(conf->crypto_fwd, 1); +} - if (bitmask_isset(&conf->portmask, portid)) - return true; - } - return false; +void disable_crypto_fwd(unsigned int lcore) +{ + struct lcore_conf *conf = lcore_conf[lcore]; + + CMM_STORE_SHARED(conf->crypto_fwd, 0); } /* Compute load based on how much work CPU core is doing @@ -1905,21 +1676,57 @@ static int next_available_lcore(int socket_id, return best; } +static bitmask_t online_lcores_mask(void) +{ + unsigned int lcore; + bitmask_t online; + + memset(&online, 0, sizeof(online)); + + FOREACH_FORWARD_LCORE(lcore) { + if (lcore_conf[lcore]->ded_to_feature) + continue; + + bitmask_set(&online, lcore); + } + + return online; +} + +/* + * Ensures only online forwarding cores are in the mask returned. + * Note that if there are none in the set, then all online cores + * are returned in the mask. + */ +static bitmask_t cpu_affinity_online(const bitmask_t *cpu_affinity_mask) +{ + bitmask_t mask = online_lcores_mask(); + + bitmask_and(&mask, cpu_affinity_mask, &mask); + + if (bitmask_isempty(&mask)) + mask = online_lcores_mask(); + + return mask; +} + /* Assign all receive queues for a port */ -static int assign_port_receive_queues(portid_t portid, bitmask_t *allowed) +static int assign_port_receive_queues(portid_t portid) { struct port_conf *port_conf = &port_config[portid]; + struct port_alloc *port_alloc = &port_allocations[portid]; unsigned int q; + bitmask_t allowed = cpu_affinity_online(&port_alloc->rx_cpu_affinity); - for (q = 0; q < port_conf->rx_queues; q++) { + for (q = 0; q < port_alloc->rx_queues; q++) { struct lcore_conf *conf; int i, lcore; if (!bitmask_isset(&port_conf->rx_enabled_queues, q)) continue; - lcore = next_available_lcore(port_conf->socketid, - allowed, + lcore = next_available_lcore(port_alloc->socketid, + &allowed, false); if (lcore < 0) { RTE_LOG(ERR, DATAPLANE, @@ -1939,18 +1746,19 @@ static int assign_port_receive_queues(portid_t portid, bitmask_t *allowed) else { RTE_LOG(ERR, DATAPLANE, "Socket %d has no unused rx queues\n", - port_conf->socketid); + port_alloc->socketid); return -ENOMEM; } found: - bitmask_clear(allowed, lcore); - if (bitmask_isempty(allowed)) - *allowed = port_conf->rx_cpu_affinity; /* start over */ + bitmask_clear(&allowed, lcore); + if (bitmask_isempty(&allowed)) + allowed = cpu_affinity_online( /* start over */ + &port_alloc->rx_cpu_affinity); _CMM_STORE_SHARED(conf->num_rxq, conf->num_rxq + 1); DP_DEBUG(INIT, DEBUG, DATAPLANE, "Assign RX port %u queue %u to core %u (node %u)\n", - portid, q, lcore, port_conf->socketid); + portid, q, lcore, port_alloc->socketid); struct lcore_rx_queue *rxq = &conf->rx_poll[i]; struct rate_stats *rxq_stats = &conf->rx_poll_stats[i]; @@ -1971,9 +1779,11 @@ static int assign_port_receive_queues(portid_t portid, bitmask_t *allowed) } /* Assign lcores that will handle transmit queues (bottom half) */ -static int assign_port_transmit_queues(portid_t portid, bitmask_t *allowed) +static int assign_port_transmit_queues(portid_t portid) { struct port_conf *port_conf = &port_config[portid]; + struct port_alloc *port_alloc = &port_allocations[portid]; + bitmask_t allowed = cpu_affinity_online(&port_alloc->tx_cpu_affinity); struct ifnet *ifp = ifport_table[portid]; uint16_t q; uint8_t r; @@ -1983,7 +1793,7 @@ static int assign_port_transmit_queues(portid_t portid, bitmask_t *allowed) * gaps for not-enabled rings. */ for (r = 0, q = 0; - r < port_conf->nrings && q < port_conf->tx_queues; + r < port_conf->nrings && q < port_alloc->tx_queues; q++) { struct lcore_conf *conf; int i, lcore; @@ -1991,8 +1801,8 @@ static int assign_port_transmit_queues(portid_t portid, bitmask_t *allowed) if (!bitmask_isset(&port_conf->tx_enabled_queues, q)) continue; - lcore = next_available_lcore(port_conf->socketid, - allowed, + lcore = next_available_lcore(port_alloc->socketid, + &allowed, true); if (lcore < 0) { RTE_LOG(ERR, DATAPLANE, @@ -2012,14 +1822,15 @@ static int assign_port_transmit_queues(portid_t portid, bitmask_t *allowed) else { RTE_LOG(ERR, DATAPLANE, "Socket %d has no unused tx queues\n", - port_conf->socketid); + port_alloc->socketid); return -ENOMEM; } found: - bitmask_clear(allowed, lcore); - if (bitmask_isempty(allowed)) - *allowed = port_conf->tx_cpu_affinity; /* start over */ + bitmask_clear(&allowed, lcore); + if (bitmask_isempty(&allowed)) + allowed = cpu_affinity_online( /* start over */ + &port_alloc->tx_cpu_affinity); _CMM_STORE_SHARED(conf->num_txq, conf->num_txq + 1); struct lcore_tx_queue *txq = &conf->tx_poll[i]; @@ -2044,7 +1855,7 @@ static int assign_port_transmit_queues(portid_t portid, bitmask_t *allowed) DP_DEBUG(INIT, DEBUG, DATAPLANE, "Assign TX port %u queue %u to core %u (node %u)\n", - portid, q, lcore, port_conf->socketid); + portid, q, lcore, port_alloc->socketid); bitmask_set(&conf->portmask, portid); } @@ -2064,7 +1875,7 @@ static bool start_one_cpu(unsigned int lcore) if (lcore == rte_get_master_lcore()) { if (!single_cpu) { RTE_LOG(ERR, DATAPLANE, - "attempt to create forwarding thread for master core in non-single-cpu case\n"); + "attempt to create forwarding thread for main core in non-single-cpu case\n"); return false; } @@ -2092,7 +1903,8 @@ static void start_cpus(void) FOREACH_FORWARD_LCORE(lcore) { const struct lcore_conf *conf = lcore_conf[lcore]; - if (!forwarding_or_crypto_engine_lcore(conf) || conf->running) + if ((!forwarding_or_crypto_engine_lcore(conf) && + !conf->ded_to_feature) || conf->running) continue; (void)start_one_cpu(lcore); @@ -2122,12 +1934,9 @@ static bool transmit_thread_running(portid_t portid) /* Start queues for new port. */ int assign_queues(portid_t portid) { - const struct port_conf *port_conf = &port_config[portid]; - bitmask_t rxtmpmask = port_conf->rx_cpu_affinity; - bitmask_t txtmpmask = port_conf->tx_cpu_affinity; int rc; - rc = assign_port_receive_queues(portid, &rxtmpmask); + rc = assign_port_receive_queues(portid); if (rc != 0) goto exit; @@ -2135,14 +1944,14 @@ int assign_queues(portid_t portid) if (transmit_thread_running(portid)) goto startcpus; - rc = assign_port_transmit_queues(portid, &txtmpmask); + rc = assign_port_transmit_queues(portid); if (rc != 0) { unsigned int lcore; FOREACH_FORWARD_LCORE(lcore) { struct lcore_conf *conf = lcore_conf[lcore]; unassign_port_receive_queues(portid, conf); - synchronize_rcu(); + dp_rcu_synchronize(); pkt_ring_empty(portid); stop_cpus(); } @@ -2159,14 +1968,15 @@ int assign_queues(portid_t portid) /* Called from QoS when transmit needs to be activated. */ int enable_transmit_thread(portid_t portid) { - const struct port_conf *port_conf = &port_config[portid]; - bitmask_t tmpmask = port_conf->tx_cpu_affinity; int ret; + if (!dpdk_eth_if_port_started(portid)) + return -1; + if (transmit_thread_running(portid)) return 0; - ret = assign_port_transmit_queues(portid, &tmpmask); + ret = assign_port_transmit_queues(portid); if (ret == 0) start_cpus(); @@ -2188,14 +1998,14 @@ void disable_transmit_thread(portid_t portid) unassign_port_transmit_queues(portid, conf); } - synchronize_rcu(); + dp_rcu_synchronize(); pkt_ring_empty(portid); stop_cpus(); } bool port_uses_queue_state(uint16_t portid) { - struct port_conf *port_conf = &port_config[portid]; + struct port_alloc *port_alloc = &port_allocations[portid]; struct rte_eth_dev *dev = &rte_eth_devices[portid]; /* @@ -2204,21 +2014,21 @@ bool port_uses_queue_state(uint16_t portid) */ return !strncmp(dev->data->name, "eth_vhost", 9) && - port_conf->tx_queues > 1 && port_conf->rx_queues > 1; + port_alloc->tx_queues > 1 && port_alloc->rx_queues > 1; } void set_port_uses_queue_state(uint16_t portid, bool val) { - struct port_conf *port_conf = &port_config[portid]; + struct port_alloc *port_alloc = &port_allocations[portid]; - CMM_STORE_SHARED(port_conf->uses_queue_state, val); + CMM_STORE_SHARED(port_alloc->uses_queue_state, val); } bool get_port_uses_queue_state(uint16_t portid) { - struct port_conf *port_conf = &port_config[portid]; + struct port_alloc *port_alloc = &port_allocations[portid]; - return CMM_ACCESS_ONCE(port_conf->uses_queue_state); + return CMM_ACCESS_ONCE(port_alloc->uses_queue_state); } void reset_port_all_queue_state(uint16_t portid) @@ -2277,6 +2087,7 @@ void track_port_queue_state(uint16_t portid, uint16_t queue_id, bool rx, void set_port_queue_state(uint16_t portid) { struct port_conf *port_conf = &port_config[portid]; + struct port_alloc *port_alloc = &port_allocations[portid]; unsigned int lcore; bool percoreq = true; unsigned int q = 0; @@ -2286,7 +2097,7 @@ void set_port_queue_state(uint16_t portid) bitmask_copy(&temp_mask, &port_conf->tx_enabled_queues); RTE_LCORE_FOREACH(lcore) { - if (q >= port_conf->tx_queues || + if (q >= port_alloc->tx_queues || !bitmask_isset(&temp_mask, q)) { percoreq = false; break; @@ -2312,9 +2123,9 @@ void set_port_queue_state(uint16_t portid) /* Steal mbuf pool on an existing port (for use creating control packets) */ struct rte_mempool *mbuf_pool(unsigned int portid) { - const struct port_conf *port_conf = &port_config[portid]; + const struct port_alloc *port_alloc = &port_allocations[portid]; - return port_conf->rx_pool; + return port_alloc->rx_pool; } /* Create a standard mbuf pool */ @@ -2362,21 +2173,23 @@ static uint16_t mbuf_pool_init(void) } /* How many mbufs are needed for each device? */ - for (portid = 0; portid < DATAPLANE_MAX_PORTS; ++portid) { - const struct port_conf *port_conf = &port_config[portid]; + for (portid = 0; portid < nb_ports_total; ++portid) { + const struct port_alloc *port_alloc = &port_allocations[portid]; - if (!rte_eth_dev_is_valid_port(portid)) + if (!bitmask_isset(&enabled_port_mask, portid) || + !rte_eth_dev_is_valid_port(portid)) continue; - socketid = port_conf->socketid; + socketid = port_alloc->socketid; - bufs_per_socket[socketid] += port_conf->buffers; + bufs_per_socket[socketid] += + port_alloc->buffers + SHADOW_IO_RING_SIZE; /* device may need larger buffer size */ - if (port_conf->buf_size > buf_size[socketid]) { - buf_size[socketid] = port_conf->buf_size; - if (max_mbuf_sz < port_conf->buf_size) - max_mbuf_sz = port_conf->buf_size; + if (port_alloc->buf_size > buf_size[socketid]) { + buf_size[socketid] = port_alloc->buf_size; + if (max_mbuf_sz < port_alloc->buf_size) + max_mbuf_sz = port_alloc->buf_size; } } @@ -2390,6 +2203,9 @@ static uint16_t mbuf_pool_init(void) bufs_per_socket[socketid] += (num_fwd_lcores + 1) * NUMA_POOL_MBUF_CACHE_SIZE; + /* account for buffers in ring for spathintf */ + bufs_per_socket[socketid] += SHADOW_IO_RING_SIZE; + /* Align to optimum size for mempool */ unsigned int nbufs = rte_align32pow2(bufs_per_socket[socketid]) - 1; @@ -2402,6 +2218,11 @@ static uint16_t mbuf_pool_init(void) pool = mbuf_pool_create(name, nbufs, NUMA_POOL_MBUF_CACHE_SIZE, bufsz, socketid); if (pool == NULL) { + RTE_LOG(NOTICE, DATAPLANE, + "Failed to create pool %s of %u mbufs size %uM in socket %d\n", + name, nbufs, (bufsz * nbufs) / (1024*1024u), + socketid); + if (rte_errno != ENOMEM) rte_panic("mbuf %s create failed: %s\n", name, rte_strerror(rte_errno)); @@ -2410,29 +2231,24 @@ static uint16_t mbuf_pool_init(void) rte_panic("mbuf %s no space for %u bufs\n", name, nbufs); - RTE_LOG(NOTICE, DATAPLANE, - "Not enough memory for pool of %u mbufs size %uM\n", - nbufs, (bufsz * nbufs) / (1024*1024u)); nbufs /= 2; goto retry; } - DP_DEBUG(INIT, DEBUG, DATAPLANE, - "Created %s mbuf pool size %u %uM\n", name, - nbufs, (bufsz * nbufs) / (1024*1024u)); + RTE_LOG(INFO, DATAPLANE, + "Created %s mbuf pool size %u %uM in socket %d\n", name, + nbufs, (bufsz * nbufs) / (1024*1024u), socketid); numa_pool[socketid] = pool; } /* Assign mbuf pool for each device */ -#ifdef HAVE_RTE_ETH_DEV_COUNT_AVAIL - for (portid = 0; portid < rte_eth_dev_count_avail(); ++portid) { -#else - for (portid = 0; portid < rte_eth_dev_count(); ++portid) { -#endif - struct port_conf *port_conf = &port_config[portid]; - - port_conf->rx_pool = numa_pool[port_conf->socketid]; + for (portid = 0; portid < nb_ports_total; ++portid) { + struct port_alloc *port_alloc = &port_allocations[portid]; + + if (!bitmask_isset(&enabled_port_mask, portid)) + continue; + port_alloc->rx_pool = numa_pool[port_alloc->socketid]; } return max_mbuf_sz; @@ -2441,30 +2257,30 @@ static uint16_t mbuf_pool_init(void) /* Initialize interface specific mbuf pool. */ int mbuf_pool_init_portid(const portid_t portid) { - struct port_conf *port_conf = &port_config[portid]; + struct port_alloc *port_alloc = &port_allocations[portid]; /* * mbuf pool can't be destroyed so may exist * from previous use of this port. */ - if (port_conf->rx_pool == NULL) { + if (port_alloc->rx_pool == NULL) { unsigned int buf_size = RTE_MBUF_DEFAULT_BUF_SIZE; - int socketid = port_conf->socketid; + int socketid = port_alloc->socketid; - if (port_conf->buf_size > buf_size) - buf_size = port_conf->buf_size; + if (port_alloc->buf_size > buf_size) + buf_size = port_alloc->buf_size; /* Align to optimum size for mempool */ - unsigned int nbufs = rte_align32pow2(port_conf->buffers) - 1; + unsigned int nbufs = rte_align32pow2(port_alloc->buffers) - 1; char name[RTE_MEMPOOL_NAMESIZE]; snprintf(name, RTE_MEMPOOL_NAMESIZE, "mbuf_%u", portid); - port_conf->rx_pool = mbuf_pool_create(name, nbufs, + port_alloc->rx_pool = mbuf_pool_create(name, nbufs, MBUF_CACHE_SIZE_DEFAULT, buf_size, socketid); - if (port_conf->rx_pool == NULL) { + if (port_alloc->rx_pool == NULL) { RTE_LOG(ERR, DATAPLANE, "could not create pool %s with %u bufs\n", name, nbufs); @@ -2554,18 +2370,17 @@ get_driver_param(const char *driver_name, uint32_t speed_capa) return param; } -static bitmask_t online_slave_mask(void) +static bitmask_t all_lcores_mask(void) { unsigned int lcore; - bitmask_t online; + bitmask_t all; - memset(&online, 0, sizeof(online)); + memset(&all, 0, sizeof(all)); - FOREACH_FORWARD_LCORE(lcore) { - bitmask_set(&online, lcore); - } + FOREACH_FORWARD_LCORE(lcore) + bitmask_set(&all, lcore); - return online; + return all; } static bitmask_t fwding_core_mask(void) @@ -2578,7 +2393,7 @@ static bitmask_t fwding_core_mask(void) FOREACH_FORWARD_LCORE(lcore) { conf = lcore_conf[lcore]; - if (forwarding_lcore(conf)) + if (forwarding_lcore(conf) || conf->ded_to_feature) bitmask_set(&fwding, lcore); } @@ -2608,9 +2423,9 @@ static void cpuset_update(void) } } -static int master_worker_event_handler(zloop_t *loop __unused, - zmq_pollitem_t *item, - void *arg __unused) +static int main_worker_event_handler(zloop_t *loop __unused, + zmq_pollitem_t *item, + void *arg __unused) { uint64_t seqno; @@ -2639,35 +2454,35 @@ static int master_worker_event_handler(zloop_t *loop __unused, strerror(errno)); return -1; } - rcu_thread_online(); + dp_rcu_thread_online(); /* Call vhost event handler */ vhost_event_handler(); - rcu_thread_offline(); + dp_rcu_thread_offline(); } return 0; } -static pthread_t master_worker_thread; -static struct master_worker_thread_info { +static pthread_t main_worker_thread; +static struct main_worker_thread_info { int cpushield_fd; int vhost_fd; -} master_worker_info; +} main_worker_info; /* Handle thread cancellation */ -static void master_worker_cleanup(void *arg __unused) +static void main_worker_cleanup(void *arg __unused) { - rcu_unregister_thread(); + dp_rcu_unregister_thread(); } -static void *master_worker_thread_fn(void *args) +static void *main_worker_thread_fn(void *args) { - struct master_worker_thread_info *info = - (struct master_worker_thread_info *)args; + struct main_worker_thread_info *info = + (struct main_worker_thread_info *)args; - pthread_setname_np(pthread_self(), "dataplane/master_worker"); - pthread_cleanup_push(master_worker_cleanup, NULL); + pthread_setname_np(pthread_self(), "dataplane/main_worker"); + pthread_cleanup_push(main_worker_cleanup, NULL); - /* poll event fd to wakeup master_worker thread*/ + /* poll event fd to wakeup main_worker thread*/ zmq_pollitem_t event_poll[] = { {.fd = info->cpushield_fd, .events = ZMQ_POLLIN, @@ -2680,19 +2495,19 @@ static void *master_worker_thread_fn(void *args) }; int ev_count = ARRAY_SIZE(event_poll); - rcu_register_thread(); - rcu_thread_offline(); + dp_rcu_register_thread(); + dp_rcu_thread_offline(); while (!zsys_interrupted) { if (zmq_poll(event_poll, ev_count, -1) < 0) { if (errno == EINTR) continue; RTE_LOG(ERR, DATAPLANE, - "master_worker poll failed: %s\n", + "main_worker poll failed: %s\n", strerror(errno)); break; /* error detected */ } - (void)master_worker_event_handler(NULL, event_poll, NULL); + (void)main_worker_event_handler(NULL, event_poll, NULL); } pthread_cleanup_pop(1); return NULL; @@ -2700,36 +2515,36 @@ static void *master_worker_thread_fn(void *args) /* * Create a new thread to handle CPU shield changes. We do this as we call out - * to an external script, and we do not want that to block the master thread. + * to an external script, and we do not want that to block the main thread. */ -static void master_worker_thread_init(void) +static void main_worker_thread_init(void) { - master_worker_info.cpushield_fd = eventfd(0, EFD_NONBLOCK); - if (master_worker_info.cpushield_fd < 0) - rte_panic("Cannot open cpu_shield fd for master_worker\n"); + main_worker_info.cpushield_fd = eventfd(0, EFD_NONBLOCK); + if (main_worker_info.cpushield_fd < 0) + rte_panic("Cannot open cpu_shield fd for main_worker\n"); - master_worker_info.vhost_fd = eventfd(0, EFD_NONBLOCK); - if (master_worker_info.vhost_fd < 0) - rte_panic("Cannot open vhost fd for master_worker\n"); + main_worker_info.vhost_fd = eventfd(0, EFD_NONBLOCK); + if (main_worker_info.vhost_fd < 0) + rte_panic("Cannot open vhost fd for main_worker\n"); vhost_event_init(); - if (pthread_create(&master_worker_thread, NULL, - master_worker_thread_fn, &master_worker_info) < 0) + if (pthread_create(&main_worker_thread, NULL, + main_worker_thread_fn, &main_worker_info) < 0) rte_panic("cpu_shield thread creation failed\n"); } -static void master_worker_thread_cleanup(void) +static void main_worker_thread_cleanup(void) { int join_rc; - pthread_cancel(master_worker_thread); - join_rc = pthread_join(master_worker_thread, NULL); + pthread_cancel(main_worker_thread); + join_rc = pthread_join(main_worker_thread, NULL); if (join_rc != 0) RTE_LOG(ERR, DATAPLANE, - "master_worker thread join failed, rc %i\n", join_rc); - close(master_worker_info.cpushield_fd); - close(master_worker_info.vhost_fd); + "main_worker thread join failed, rc %i\n", join_rc); + close(main_worker_info.cpushield_fd); + close(main_worker_info.vhost_fd); } /* @@ -2748,33 +2563,33 @@ static void cpuset_init(void) void register_forwarding_cores(void) { - /* wake up master_worker thread for cpu_shield fd */ + /* wake up main_worker thread for cpu_shield fd */ static const uint64_t incr = 1; - if (write(master_worker_info.cpushield_fd, &incr, sizeof(incr)) < 0) + if (write(main_worker_info.cpushield_fd, &incr, sizeof(incr)) < 0) RTE_LOG(NOTICE, DATAPLANE, - "master_worker cpu shield event write failed: %s\n", + "main_worker cpu shield event write failed: %s\n", strerror(errno)); } -int set_master_worker_vhost_event_fd(void) +int set_main_worker_vhost_event_fd(void) { - /* wake up master_worker thread for vhost fd */ + /* wake up main_worker thread for vhost fd */ static const uint64_t incr = 1; - if (write(master_worker_info.vhost_fd, &incr, sizeof(incr)) < 0) { + if (write(main_worker_info.vhost_fd, &incr, sizeof(incr)) < 0) { RTE_LOG(NOTICE, DATAPLANE, - "master_worker vhost event fd write failed: %s\n", + "main_worker vhost event fd write failed: %s\n", strerror(errno)); return -1; } return 0; } -static int port_initial_conf(portid_t portid, struct rte_eth_conf *dev_conf) +static int port_conf_final(portid_t portid, struct rte_eth_conf *dev_conf) { - struct rte_eth_dev *dev = &rte_eth_devices[portid]; struct rte_eth_dev_info dev_info; + struct port_alloc *port_alloc = &port_allocations[portid]; if (!dev_conf) return -1; @@ -2783,49 +2598,34 @@ static int port_initial_conf(portid_t portid, struct rte_eth_conf *dev_conf) rte_eth_dev_info_get(portid, &dev_info); - dev_conf->intr_conf.lsc = (dev->data->dev_flags & + dev_conf->intr_conf.lsc = (port_alloc->dev_flags & RTE_ETH_DEV_INTR_LSC) ? 1 : 0; + dev_conf->rxmode.offloads = port_alloc->rx_conf.offloads; + dev_conf->rxmode.mq_mode = port_alloc->rx_mq_mode; + /* DPDK 18.08 errors if offload flags don't match PMD caps */ -#if RTE_VERSION >= RTE_VERSION_NUM(18, 8, 0, 0) if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_FILTER) dev_conf->rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_FILTER; if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_STRIP) dev_conf->rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_STRIP; - /* Default in 18.11, flag is gone */ -#if RTE_VERSION < RTE_VERSION_NUM(18, 11, 0, 0) - if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_CRC_STRIP) - dev_conf->rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP; -#endif dev_conf->rx_adv_conf.rss_conf.rss_hf &= dev_info.flow_type_rss_offloads; -#endif - /* Want VLAN offload, refcount and multisegment */ -#if RTE_VERSION >= RTE_VERSION_NUM(18, 8, 0, 0) - dev_conf->txmode.offloads &= ~(DEV_TX_OFFLOAD_SCTP_CKSUM | - DEV_TX_OFFLOAD_TCP_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM); - /* - * We may need to transmit a jumbo frame, or prepend to a - * cloned packet and both of these require multiple segment - * support for TX, so request it. - * - * However, not on net_ring - it doesn't care about - * multi-segment packets, but doesn't advertise support ether. + /* If we want VLAN offload, but don't have it, + * continue but issue a warning. */ - if (strcmp(dev_info.driver_name, "net_ring") && - strcmp(dev_info.driver_name, "net_bonding")) - dev_conf->txmode.offloads |= - DEV_TX_OFFLOAD_MULTI_SEGS; - - if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VLAN_INSERT) { - if (!is_device_mlx5(portid)) - dev_conf->txmode.offloads |= DEV_TX_OFFLOAD_VLAN_INSERT; - } else - RTE_LOG(ERR, DATAPLANE, - "Driver %s missing vlan insertion capability\n", - dev_info.driver_name); -#endif + if (port_alloc->tx_conf.offloads & DEV_TX_OFFLOAD_VLAN_INSERT) { + if (!(dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VLAN_INSERT)) { + port_alloc->tx_conf.offloads &= + ~DEV_TX_OFFLOAD_VLAN_INSERT; + RTE_LOG(WARNING, DATAPLANE, + "Driver %s missing hardware VLAN insertion capability; performance may be reduced.\n", + dev_info.driver_name); + } + } + + dev_conf->txmode.offloads = port_alloc->tx_conf.offloads; DP_DEBUG(INIT, INFO, DATAPLANE, "Port %d, tx_offloads 0x%lx, rx_offloads 0x%lx\n", @@ -2834,9 +2634,11 @@ static int port_initial_conf(portid_t portid, struct rte_eth_conf *dev_conf) return 0; } -static int port_conf_init_portid(portid_t portid) +static int port_conf_init(portid_t portid) { + struct rte_eth_dev *dev = &rte_eth_devices[portid]; struct port_conf *port_conf = &port_config[portid]; + struct port_alloc *port_alloc = &port_allocations[portid]; int socketid = rte_eth_dev_socket_id(portid); struct rte_eth_dev_info dev_info; const struct rxtx_param *parm; @@ -2845,44 +2647,51 @@ static int port_conf_init_portid(portid_t portid) uint16_t q; uint8_t r; uint16_t pf_max_rx_queues, pf_max_tx_queues; + uint8_t tx_desc_vm_multiplier; if (socketid < 0) /* SOCKET_ID_ANY */ socketid = 0; - port_conf->socketid = socketid; + port_alloc->socketid = socketid; rte_eth_dev_info_get(portid, &dev_info); parm = get_driver_param(dev_info.driver_name, dev_info.speed_capa); - port_conf->rx_desc = parm->rx_desc; - port_conf->tx_desc = parm->tx_desc; + port_alloc->rx_desc = parm->rx_desc; + port_alloc->tx_desc = parm->tx_desc; if (hypervisor_id() && - !(parm->drv_flags & DRV_PARAM_VIRTUAL)) - port_conf->tx_desc = 8 * port_conf->tx_desc; - if (port_conf->rx_desc > dev_info.rx_desc_lim.nb_max) { - port_conf->rx_desc = dev_info.rx_desc_lim.nb_max; + !(parm->drv_flags & DRV_PARAM_VIRTUAL)) { + if (parm->tx_desc_vm_multiplier) + tx_desc_vm_multiplier = parm->tx_desc_vm_multiplier; + else + tx_desc_vm_multiplier = MAX_TX_DESC_VM_MULTIPLIER; + port_alloc->tx_desc = tx_desc_vm_multiplier * + port_alloc->tx_desc; + } + if (port_alloc->rx_desc > dev_info.rx_desc_lim.nb_max) { + port_alloc->rx_desc = dev_info.rx_desc_lim.nb_max; DP_DEBUG(INIT, INFO, DATAPLANE, "Lowering rx buf to max supported %d for port %u\n", dev_info.tx_desc_lim.nb_max, portid); } - if (port_conf->tx_desc > dev_info.tx_desc_lim.nb_max) { - port_conf->tx_desc = dev_info.tx_desc_lim.nb_max; + if (port_alloc->tx_desc > dev_info.tx_desc_lim.nb_max) { + port_alloc->tx_desc = dev_info.tx_desc_lim.nb_max; DP_DEBUG(INIT, INFO, DATAPLANE, "Lowering tx buf to max supported %d for port %u\n", dev_info.tx_desc_lim.nb_max, portid); } - port_conf->rx_queues = parm->max_rxq; - port_conf->tx_queues = rte_lcore_count(); + port_alloc->rx_queues = parm->max_rxq; + port_alloc->tx_queues = rte_lcore_count(); - port_conf->buf_size = dev_info.min_rx_bufsize + MBUF_OVERHEAD; - port_conf->rx_cpu_affinity = online_slave_mask(); - port_conf->tx_cpu_affinity = online_slave_mask(); + port_alloc->buf_size = dev_info.min_rx_bufsize + MBUF_OVERHEAD; + port_alloc->rx_cpu_affinity = all_lcores_mask(); + port_alloc->tx_cpu_affinity = all_lcores_mask(); bitmask_zero(&port_conf->tx_enabled_queues); bitmask_zero(&port_conf->rx_enabled_queues); /* reduce Rx queues if limited by device or system */ - if (port_conf->rx_queues > avail_cores) - port_conf->rx_queues = avail_cores; + if (port_alloc->rx_queues > avail_cores) + port_alloc->rx_queues = avail_cores; /* If an adapter has VMDQ support, the start of the virtual * machine queues may not overlap with the non-VMDQ queues. @@ -2898,16 +2707,16 @@ static int port_conf_init_portid(portid_t portid) pf_max_tx_queues = dev_info.max_tx_queues; } - if (port_conf->rx_queues > pf_max_rx_queues || + if (port_alloc->rx_queues > pf_max_rx_queues || parm->drv_flags & DRV_PARAM_USE_ALL_RXQ) - port_conf->rx_queues = pf_max_rx_queues; + port_alloc->rx_queues = pf_max_rx_queues; /* Account for worst case Rx buffers */ - port_conf->buffers = port_conf->rx_queues * + port_alloc->buffers = port_alloc->rx_queues * (parm->rx_desc + parm->extra); if (parm->match && strstr(parm->match, "bond")) - port_conf->buffers *= DATAPLANE_SLAVE_MULTIPLIER; + port_alloc->buffers *= DATAPLANE_MEMBER_MULTIPLIER; /* If device does not have enough TX queues for each lcore * then disable percoreq mode. @@ -2915,10 +2724,10 @@ static int port_conf_init_portid(portid_t portid) * Further, some devices, flagged as DRV_PARAM_LIMITTXQ, * cannot support more TX queues than RX queues. */ - if (port_conf->tx_queues > pf_max_tx_queues || + if (port_alloc->tx_queues > pf_max_tx_queues || parm->drv_flags & DRV_PARAM_NO_DIRECT || ((parm->drv_flags & DRV_PARAM_LIMITTXQ) && - port_conf->tx_queues > port_conf->rx_queues)) { + port_alloc->tx_queues > port_alloc->rx_queues)) { if (parm->drv_flags & DRV_PARAM_USE_ALL_TXQ) { port_conf->max_rings = pf_max_tx_queues; } else { @@ -2934,7 +2743,7 @@ static int port_conf_init_portid(portid_t portid) * so reduce the number of queues to use * accordingly. */ - max_txq = port_conf->rx_queues / 2; + max_txq = port_alloc->rx_queues / 2; /* * Make sure that we still have at least one @@ -2954,19 +2763,19 @@ static int port_conf_init_portid(portid_t portid) if (parm->drv_flags & DRV_PARAM_LIMITTXQ) max_txq = RTE_MIN(max_txq, - RTE_MIN(port_conf->tx_queues, - port_conf->rx_queues)); + RTE_MIN(port_alloc->tx_queues, + port_alloc->rx_queues)); else /* * Don't ask for more queues than there * are cores, since they won't be used. */ max_txq = RTE_MIN(max_txq, - port_conf->tx_queues); + port_alloc->tx_queues); port_conf->max_rings = max_txq; } - port_conf->tx_queues = port_conf->max_rings; + port_alloc->tx_queues = port_conf->max_rings; port_conf->percoreq = false; } else { port_conf->percoreq = true; @@ -2974,15 +2783,15 @@ static int port_conf_init_portid(portid_t portid) } port_conf->nrings = port_conf->max_rings; - for (q = 0; q < port_conf->tx_queues; q++) + for (q = 0; q < port_alloc->tx_queues; q++) bitmask_set(&port_conf->tx_enabled_queues, q); - for (q = 0; q < port_conf->rx_queues; q++) + for (q = 0; q < port_alloc->rx_queues; q++) bitmask_set(&port_conf->rx_enabled_queues, q); DP_DEBUG(INIT, DEBUG, DATAPLANE, "Port %u %s rx_queues %d tx_queues %d percoreq %d\n", portid, dev_info.driver_name, - port_conf->rx_queues, port_conf->tx_queues, + port_alloc->rx_queues, port_alloc->tx_queues, port_conf->percoreq ? 1 : 0); tx_pkt_ring_size = parm->tx_pkt_ring_size ? parm->tx_pkt_ring_size : @@ -3006,36 +2815,94 @@ static int port_conf_init_portid(portid_t portid) /* If not percoreq or QoS is enabled then there will * need to be a pkt ring. */ - port_conf->buffers += tx_pkt_ring_size * port_conf->max_rings; + port_alloc->buffers += tx_pkt_ring_size * port_conf->max_rings; /* Overhead of every Tx queue being full */ - port_conf->buffers += port_conf->tx_desc * port_conf->tx_queues; + port_alloc->buffers += port_alloc->tx_desc * port_alloc->tx_queues; - /* Want VLAN offload, refcount and multisegment */ - port_conf->tx_conf = dev_info.default_txconf; -#if RTE_VERSION < RTE_VERSION_NUM(18, 8, 0, 0) - port_conf->tx_conf.txq_flags = ETH_TXQ_FLAGS_NOXSUMS; -#endif + /* Defaults from PMD and eth_base_conf */ + port_alloc->tx_conf = dev_info.default_txconf; + port_alloc->tx_conf.offloads |= eth_base_conf.txmode.offloads; + port_alloc->rx_conf = dev_info.default_rxconf; + port_alloc->rx_conf.offloads |= eth_base_conf.rxmode.offloads; + port_alloc->rx_mq_mode = eth_base_conf.rxmode.mq_mode; /* This avoids head of line blocking when one queue is overloaded. */ - port_conf->rx_conf = dev_info.default_rxconf; - port_conf->rx_conf.rx_drop_en = 1; + port_alloc->rx_conf.rx_drop_en = 1; + + /* Set offloads from conf file */ + port_alloc->rx_conf.offloads |= parm->rx_offloads; + port_alloc->rx_conf.offloads &= ~parm->neg_rx_offloads; + port_alloc->tx_conf.offloads |= parm->tx_offloads; + port_alloc->tx_conf.offloads &= ~parm->neg_tx_offloads; + if (parm->rx_mq_mode_set) + port_alloc->rx_mq_mode = parm->rx_mq_mode; + + /* Potentially restrict device capabilities */ + port_alloc->dev_flags = dev->data->dev_flags; + port_alloc->dev_flags |= parm->dev_flags; + port_alloc->dev_flags &= ~parm->neg_dev_flags; DP_DEBUG(INIT, INFO, DATAPLANE, "Port %u %s on socket %d (mbufs %u) (rx %u) (tx %u)\n", - portid, dev_info.driver_name, port_conf->socketid, - port_conf->buffers, port_conf->rx_desc, port_conf->tx_desc); + portid, dev_info.driver_name, port_alloc->socketid, + port_alloc->buffers, port_alloc->rx_desc, port_alloc->tx_desc); return 0; } +/* setup data structures per-port */ +static int eth_port_init(portid_t portid) +{ + int rc; + + rc = rte_eth_dev_owner_set(portid, &owner); + if (rc < 0) { + RTE_LOG(NOTICE, DATAPLANE, "Port%d failed to set owner!\n", + portid); + goto fail; + } + + rc = eth_port_config(portid); + if (rc < 0) { + RTE_LOG(NOTICE, DATAPLANE, + "Port%d failed to configure!\n", portid); + goto fail; + } + + return 0; + +fail: + bitmask_clear(&enabled_port_mask, portid); + return rc; +} + +/* teardown data structures per-portid */ +static void eth_port_uninit(portid_t portid) +{ + uint8_t q; + struct port_conf *port_conf = &port_config[portid]; + int rc; + + linkwatch_port_unconfig(portid); + bitmask_clear(&enabled_port_mask, portid); + + for (q = 0; q < MAX_TX_QUEUE_PER_PORT; q++) { + if (port_conf->pkt_ring[q]) { + rte_ring_free(port_conf->pkt_ring[q]); + port_conf->pkt_ring[q] = NULL; + } + } + + rc = rte_eth_dev_owner_unset(portid, owner.id); + if (rc < 0) + RTE_LOG(NOTICE, DATAPLANE, "Port%d failed to unset owner!\n", + portid); +} + int insert_port(portid_t port_id) { -#ifdef HAVE_RTE_DEV_REMOVE struct rte_eth_dev_info dev_info; -#else - char name[RTE_ETH_NAME_MAX_LEN]; -#endif if (port_id >= DATAPLANE_MAX_PORTS) { RTE_LOG(ERR, DATAPLANE, @@ -3048,7 +2915,7 @@ int insert_port(portid_t port_id) bitmask_clear(&linkup_port_mask, port_id); if_enable_poll(port_id); - if (port_conf_init_portid(port_id) < 0) { + if (port_conf_init(port_id) < 0) { RTE_LOG(ERR, DATAPLANE, "insert_port(%u): ring init failed\n", port_id); goto failed; @@ -3060,32 +2927,25 @@ int insert_port(portid_t port_id) goto failed; } - if (eth_port_init(port_id, 1) != 0) { + if (eth_port_init(port_id) != 0) { RTE_LOG(ERR, DATAPLANE, "insert_port(%u): failed to init port\n", port_id); goto failed; } - if (setup_interface_portid(port_id) != 0) { - RTE_LOG(ERR, DATAPLANE, - "insert_port(%u): cannot setup interface\n", - port_id); - eth_port_uninit_portid(port_id); - goto failed; - } - return 0; failed: -#ifdef HAVE_RTE_DEV_REMOVE rte_eth_dev_info_get(port_id, &dev_info); rte_dev_remove(dev_info.device); -#else - rte_eth_dev_detach(port_id, name); -#endif return -1; } +void remove_port(portid_t port_id) +{ + eth_port_uninit(port_id); +} + static bitmask_t generate_crypto_engine_set(void) { bitmask_t cores, fwding_cores, crypto_cores; @@ -3093,7 +2953,7 @@ static bitmask_t generate_crypto_engine_set(void) memset(&crypto_cores, 0, sizeof(crypto_cores)); - cores = online_slave_mask(); + cores = online_lcores_mask(); fwding_cores = fwding_core_mask(); RTE_LCORE_FOREACH(i) { @@ -3121,7 +2981,7 @@ unsigned int probe_crypto_engines(bool *sticky) } crypto_cpus = generate_crypto_engine_set(); if (bitmask_isempty(&crypto_cpus)) - crypto_cpus = online_slave_mask(); + crypto_cpus = online_lcores_mask(); bitmask_sprint(&crypto_cpus, tmp, sizeof(tmp)); DP_DEBUG(INIT, INFO, DATAPLANE, @@ -3136,14 +2996,14 @@ unsigned int probe_crypto_engines(bool *sticky) * set for future allocations. If no mask or an empty mask is * passed, then auto probe the system, disabling stickyness. */ -int set_crypto_engines(const char *str, bool *sticky) +int set_crypto_engines(const uint8_t *bytes, uint8_t len, bool *sticky) { bitmask_t cores; int rc; char tmp[BITMASK_STRSZ]; bool tmp_sticky; - rc = str ? bitmask_parse(&cores, str) : 1; + rc = bitmask_parse_bytes(&cores, bytes, len); if (rc || bitmask_isempty(&cores)) { crypto_sticky = false; @@ -3161,10 +3021,8 @@ int set_crypto_engines(const char *str, bool *sticky) *sticky = crypto_sticky; return bitmask_numset(&crypto_cpus); } -/* - * Assign one forwarding thread to have crypto processing role - */ -int crypto_assign_engine(int crypto_dev_id) + +int next_available_crypto_lcore(void) { int lcore; @@ -3175,6 +3033,14 @@ int crypto_assign_engine(int crypto_dev_id) return -1; } + return lcore; +} + +/* + * Assign one forwarding thread to have crypto processing role + */ +int crypto_assign_engine(int crypto_dev_id, int lcore) +{ /* we have a winner! */ struct lcore_conf *conf = lcore_conf[lcore]; if (crypto_attach_pmd(&conf->crypt.pmd_list, crypto_dev_id, lcore) < 0) @@ -3218,6 +3084,21 @@ void crypto_unassign_from_engine(int lcore) } } +static void +reassign_queues_for_all_ports(void) +{ + portid_t portid; + + for (portid = 0; portid < DATAPLANE_MAX_PORTS; ++portid) { + struct port_alloc *port_alloc = &port_allocations[portid]; + + if (!bitmask_isset(&enabled_port_mask, portid)) + continue; + set_port_affinity(portid, &port_alloc->rx_cpu_affinity, + &port_alloc->tx_cpu_affinity); + } +} + /* Configure ethernet port */ int eth_port_config(portid_t portid) { @@ -3229,7 +3110,7 @@ int eth_port_config(portid_t portid) }; struct rte_eth_conf dev_conf; - ret = port_initial_conf(portid, &dev_conf); + ret = port_conf_final(portid, &dev_conf); if (ret < 0) return ret; @@ -3447,7 +3328,7 @@ static void set_privilege(void) caps = cap_from_text( "cap_net_admin=pe cap_net_raw=pe cap_chown=pe " "cap_dac_override=pe cap_ipc_lock=pe " - "cap_sys_admin=p"); + "cap_sys_admin=p cap_sys_nice=pe"); if (!caps) rte_panic("%s: cap_from_text failed:%s\n", __func__, strerror(errno)); @@ -3457,7 +3338,7 @@ static void set_privilege(void) cap_free(caps); } -static void port_conf_init(uint8_t start_id, uint8_t num_ports) +static void init_port_configurations(uint8_t start_id, uint8_t num_ports) { portid_t portid; @@ -3466,7 +3347,7 @@ static void port_conf_init(uint8_t start_id, uint8_t num_ports) if (!bitmask_isset(&enabled_port_mask, portid)) continue; - if (port_conf_init_portid(portid) < 0) { + if (port_conf_init(portid) < 0) { RTE_LOG(ERR, DATAPLANE, "port_conf_init failed for port %u\n", portid); bitmask_clear(&enabled_port_mask, portid); @@ -3475,7 +3356,7 @@ static void port_conf_init(uint8_t start_id, uint8_t num_ports) } /* setup data structures per-port */ -int eth_port_init(uint8_t start_id, uint8_t num_ports) +static void init_eth_ports(uint8_t start_id, uint8_t num_ports) { portid_t portid; int rc; @@ -3488,57 +3369,12 @@ int eth_port_init(uint8_t start_id, uint8_t num_ports) continue; } - rc = eth_port_config(portid); + rc = eth_port_init(portid); if (rc < 0) { RTE_LOG(NOTICE, DATAPLANE, "Port%d failed to configure!\n", portid); continue; } - - struct ether_addr mac_addr; - rte_eth_macaddr_get(portid, &mac_addr); - - int socketid = port_config[portid].socketid; - struct ifnet *ifp - = if_hwport_alloc(portid, &mac_addr, socketid); - - if (!ifp) { - /* only happens if name lookup is confused. */ - RTE_LOG(ERR, DATAPLANE, - "Failed to create ifp for port %u\n", portid); - rc = -EINVAL; - goto fail; - } - - const struct rte_eth_dev *dev = &rte_eth_devices[portid]; - DP_DEBUG(INIT, DEBUG, DATAPLANE, - "%u: %s address %s\n", portid, - dev->data->name, ether_ntoa(&mac_addr)); - - rcu_assign_pointer(ifport_table[portid], ifp); - } - return 0; - -fail: - bitmask_clear(&enabled_port_mask, portid); - return rc; -} - -/* teardown data structures per-portid */ -void eth_port_uninit_portid(portid_t portid) -{ - uint8_t q; - struct port_conf *port_conf = &port_config[portid]; - - shadow_uninit_port(portid); - linkwatch_port_unconfig(portid); - bitmask_clear(&enabled_port_mask, portid); - - for (q = 0; q < MAX_TX_QUEUE_PER_PORT; q++) { - if (port_conf->pkt_ring[q]) { - rte_ring_free(port_conf->pkt_ring[q]); - port_conf->pkt_ring[q] = NULL; - } } } @@ -3557,15 +3393,16 @@ static void pkt_ring_destroy(void) } } -bool is_master_thread(void) +bool is_main_thread(void) { pthread_t self; self = pthread_self(); - return pthread_equal(self, master_pthread); + return pthread_equal(self, main_pthread); } -uint16_t __externally_visible +__FOR_EXPORT +uint16_t fal_tx_pkt_burst(uint16_t tx_port, struct rte_mbuf **bufs, uint16_t nb_bufs) { uint16_t n; @@ -3592,9 +3429,9 @@ fal_tx_pkt_burst(uint16_t tx_port, struct rte_mbuf **bufs, uint16_t nb_bufs) } else { /* * If we have reached here then we must have come - * though master_eth_tx, and a mutex taken, and so if + * though main_eth_tx, and a mutex taken, and so if * directpath is subsequently taken there will be no - * contention for queue 0 with other master core pthreads + * contention for queue 0 with other main core pthreads */ queue = 0; } @@ -3615,19 +3452,22 @@ full_hwq: __cold_label; } -void __externally_visible +__FOR_EXPORT +void fal_pkt_mark_set_framed(struct rte_mbuf *m) { pktmbuf_mdata_set(m, PKT_MDATA_FAL_FRAMED); } -bool __externally_visible +__FOR_EXPORT +bool fal_pkt_mark_is_framed(struct rte_mbuf *m) { return pktmbuf_mdata_exists(m, PKT_MDATA_FAL_FRAMED); } -int __externally_visible +__FOR_EXPORT +int fal_prepare_for_header_change(struct rte_mbuf **m, uint16_t header_len) { return pktmbuf_prepare_for_header_change(m, header_len); @@ -3637,13 +3477,14 @@ int main(int argc, char **argv) { int ret; - portid_t nb_ports; uint16_t max_mbuf_sz = RTE_MBUF_DEFAULT_BUF_SIZE; zactor_t *vplane_auth = NULL; struct call_rcu_data *rcu_data; pthread_t rcu_thread; unsigned int i; char tmp[BITMASK_STRSZ]; + bitmask_t default_enabled_port_mask; + uint16_t nb_ports = 0; /* * Ensure this is not fully buffered, such that do not get lines @@ -3666,9 +3507,9 @@ main(int argc, char **argv) argc -= ret; argv += ret; - parse_config(config_file); + parse_config(); - parse_platform_config(PLATFORM_FILE); + parse_platform_config(get_platform_cfg_file()); /* Must be before any threads are created, and before eal_init */ cpuset_init(); @@ -3686,8 +3527,8 @@ main(int argc, char **argv) /* Setup signal handlers */ set_signal_handlers(); - /* keep track of master thread for consistency checking */ - master_pthread = pthread_self(); + /* keep track of main thread for consistency checking */ + main_pthread = pthread_self(); ret = backplane_init(&platform_cfg.bp_list); if (ret < 0) @@ -3721,10 +3562,11 @@ main(int argc, char **argv) } open_log(); + debug_init(); RTE_LOG(INFO, DATAPLANE, - "%s version %s - %s\n", - DATAPLANE_PROGNAME, DATAPLANE_VERSION, DATAPLANE_COPYRIGHT); + "%s version %s\n", + DATAPLANE_PROGNAME, DATAPLANE_VERSION); interface_init(); incomplete_interface_init(); @@ -3739,61 +3581,66 @@ main(int argc, char **argv) rte_timer_subsystem_init(); + snprintf(owner.name, RTE_ETH_MAX_OWNER_NAME_LEN, "%s", progname); + ret = rte_eth_dev_owner_new(&owner.id); + if (ret < 0) + rte_panic("Can't get owner id\n"); + parse_driver_config(&driver_param, drv_cfg_file); parse_driver_config(&driver_param, drv_override_cfg_file); -#ifdef HAVE_RTE_ETH_DEV_COUNT_AVAIL - nb_ports = rte_eth_dev_count_avail(); -#else - nb_ports = rte_eth_dev_count(); -#endif - if (nb_ports > DATAPLANE_MAX_PORTS) { - DP_DEBUG(INIT, NOTICE, DATAPLANE, - "Too many Ethernet ports %u, downgrade to %u\n", - nb_ports, DATAPLANE_MAX_PORTS); - nb_ports = DATAPLANE_MAX_PORTS; + + bitmask_zero(&default_enabled_port_mask); + RTE_ETH_FOREACH_DEV(i) { + nb_ports_total = MAX(nb_ports_total, i + 1); + if (nb_ports_total > DATAPLANE_MAX_PORTS) { + DP_DEBUG(INIT, NOTICE, DATAPLANE, + "Too many Ethernet ports %u, downgrade to %u\n", + nb_ports, DATAPLANE_MAX_PORTS); + nb_ports_total = DATAPLANE_MAX_PORTS; + break; + } + nb_ports++; + bitmask_set(&default_enabled_port_mask, i); } /* default to enabling all ports */ if (bitmask_numset(&enabled_port_mask) == 0 && nb_ports != 0) - for (i = 0; i < nb_ports; i++) - bitmask_set(&enabled_port_mask, i); + bitmask_copy(&enabled_port_mask, &default_enabled_port_mask); for (i = 0; i < RTE_MAX_ETHPORTS; i++) bitmask_set(&poll_port_mask, i); bitmask_sprint(&enabled_port_mask, tmp, sizeof(tmp)); DP_DEBUG(INIT, INFO, DATAPLANE, - "%u ports available (enabled mask %s)\n", - nb_ports, tmp); + "%u ports (%u total) available (enabled mask %s)\n", + nb_ports, nb_ports_total, tmp); random_init(); lcore_init(); link_state_init(); - port_conf_init(0, nb_ports); + if (dp_rcu_setup()) + rte_panic("Setting up dataplane RCU environment failed\n"); + + init_port_configurations(0, nb_ports_total); if (nb_ports) max_mbuf_sz = mbuf_pool_init(); - dp_event(DP_EVT_INIT, 0, NULL, 0, 0, NULL); + udp_handler_init(); - pl_load_plugins(); + feature_load_plugins(); pl_graph_validate(); - shadow_init(); + + dp_event(DP_EVT_INIT, 0, NULL, 0, 0, NULL); + npf_init(); session_init(); nexthop_tbl_init(); ip6_init(); - - if (eth_port_init(0, nb_ports) != 0) - rte_panic("Can't init eth ports\n"); - - udp_handler_init(); + init_eth_ports(0, nb_ports_total); fragment_tables_timer_init(); - mcast_init_ipv4(); - mcast_init_ipv6(); mpls_init(); - vxlan_init(); ip_id_init(); inet_netlink_init(); @@ -3805,12 +3652,14 @@ main(int argc, char **argv) dp_crypto_init(); vrf_init(); qos_init(); - master_worker_thread_init(); + main_worker_thread_init(); + /* needs to be after features have had a chance to register */ + dp_lcore_events_init(rte_lcore_id()); console_setup(); device_server_init(); - rcu_register_thread(); + dp_rcu_register_thread(); if (rcu_defer_register_thread()) rte_panic("rcu defer register thread failed\n"); @@ -3824,44 +3673,43 @@ main(int argc, char **argv) DP_DEBUG(INIT, INFO, DATAPLANE, "naming of rcu thread failed\n"); - master_loop(); + main_loop(); crypto_pmd_remove_all(); stop_all_ports(); - close_all_ports(); - dp_crypto_shutdown(); - - vxlan_destroy(); device_server_destroy(); - shadow_destroy(); console_destroy(); zactor_destroy(&vplane_auth); - udp_handler_destroy(); interface_cleanup(); incomplete_interface_cleanup(); pkt_ring_destroy(); vrf_cleanup(); npf_cleanup(); - mcast_stop_ipv6(); - mcast_stop_ipv4(); + capture_destroy(); dp_event(DP_EVT_UNINIT, 0, NULL, 0, 0, NULL); + close_all_regular_ports(); + dp_lcore_events_teardown(rte_lcore_id()); + feature_unload_plugins(); platform_config_cleanup(); fal_cleanup(); - master_worker_thread_cleanup(); + close_all_backplane_ports(); + main_worker_thread_cleanup(); /* wait for all RCU handlers */ - rcu_barrier(); + dp_rcu_barrier(); + + dp_crypto_shutdown(); + udp_handler_destroy(); + rcu_defer_unregister_thread(); - rcu_unregister_thread(); + dp_rcu_unregister_thread(); lcore_cleanup(); -#ifdef HAVE_RTE_EAL_CLEANUP rte_eal_cleanup(); -#endif RTE_LOG(NOTICE, DATAPLANE, "normal exit\n"); @@ -3906,6 +3754,23 @@ void load_estimator(void) scale_rate_stats(&conf->crypt_stats, &packets, NULL); dp_crypto_periodic(&conf->crypt.pmd_list); } + + if (conf->do_feature) { + if (conf->feat.dp_lcore_feat_get_rx) { + conf->feat.dp_lcore_feat_get_rx(id, &packets); + scale_rate_stats(&conf->feat_rx_stats, &packets, + NULL); + } + if (conf->feat.dp_lcore_feat_get_tx) { + conf->feat.dp_lcore_feat_get_tx(id, &packets); + scale_rate_stats(&conf->feat_tx_stats, &packets, + NULL); + } + + } + + packets = crypto_fwd[id].fwd_cnt; + scale_rate_stats(&conf->crypt_fwd_stats, &packets, NULL); } } @@ -3919,6 +3784,7 @@ void show_per_core(FILE *f) unsigned int id, i; char tmp[BITMASK_STRSZ]; bitmask_t fwding_cores; + char feat_name[DP_LCORE_FEAT_MAX_NAME_SIZE + 2]; if (!wr) return; @@ -3959,6 +3825,21 @@ void show_per_core(FILE *f) : "no"); jsonw_end_object(wr); } + + if (conf->do_feature && conf->feat.dp_lcore_feat_get_rx) { + jsonw_start_object(wr); + snprintf(feat_name, sizeof(feat_name), "[%s]", + conf->feat.name); + jsonw_string_field(wr, "interface", feat_name); + jsonw_uint_field(wr, "queue", 0); + jsonw_uint_field(wr, "packets", + conf->feat_rx_stats.last_packets); + jsonw_uint_field(wr, "rate", + conf->feat_rx_stats.packet_rate); + jsonw_uint_field(wr, "idle", 0); + jsonw_string_field(wr, "directpath", "no"); + jsonw_end_object(wr); + } jsonw_end_array(wr); jsonw_name(wr, "tx"); @@ -3998,6 +3879,27 @@ void show_per_core(FILE *f) jsonw_uint_field(wr, "idle", cpq->gov.nap); jsonw_end_object(wr); } + if (conf->do_feature && conf->feat.dp_lcore_feat_get_tx) { + jsonw_start_object(wr); + snprintf(feat_name, sizeof(feat_name), "[%s]", + conf->feat.name); + jsonw_string_field(wr, "interface", feat_name); + jsonw_uint_field(wr, "packets", + conf->feat_tx_stats.last_packets); + jsonw_uint_field(wr, "rate", + conf->feat_tx_stats.packet_rate); + jsonw_uint_field(wr, "idle", 0); + jsonw_end_object(wr); + } + + if (conf->crypt_fwd_stats.packet_rate) { + jsonw_start_object(wr); + jsonw_string_field(wr, "interface", "[crypt-fwd]"); + jsonw_uint_field(wr, "rate", + conf->crypt_fwd_stats.packet_rate); + jsonw_uint_field(wr, "idle", 0); + jsonw_end_object(wr); + } jsonw_end_array(wr); jsonw_end_object(wr); } @@ -4016,7 +3918,7 @@ void show_per_core(FILE *f) static void show_ifp_affinity(struct ifnet *ifp, void *arg) { - const struct port_conf *port_conf = &port_config[ifp->if_port]; + const struct port_alloc *port_alloc = &port_allocations[ifp->if_port]; char tmp[BITMASK_STRSZ]; unsigned int lcore, q; json_writer_t *wr = arg; @@ -4040,16 +3942,20 @@ static void show_ifp_affinity(struct ifnet *ifp, void *arg) jsonw_name(wr, ifp->if_name); jsonw_start_object(wr); + bitmask_t affinity_online; bitmask_t cpu_affinity; - bitmask_or(&cpu_affinity, &port_conf->rx_cpu_affinity, - &port_conf->tx_cpu_affinity); - bitmask_sprint(&cpu_affinity, tmp, sizeof(tmp)); + bitmask_or(&cpu_affinity, &port_alloc->rx_cpu_affinity, + &port_alloc->tx_cpu_affinity); + affinity_online = cpu_affinity_online(&cpu_affinity); + bitmask_sprint(&affinity_online, tmp, sizeof(tmp)); jsonw_string_field(wr, "affinity", tmp); - bitmask_sprint(&port_conf->rx_cpu_affinity, tmp, sizeof(tmp)); + affinity_online = cpu_affinity_online(&port_alloc->rx_cpu_affinity); + bitmask_sprint(&affinity_online, tmp, sizeof(tmp)); jsonw_string_field(wr, "rx_affinity", tmp); - bitmask_sprint(&port_conf->tx_cpu_affinity, tmp, sizeof(tmp)); + affinity_online = cpu_affinity_online(&port_alloc->tx_cpu_affinity); + bitmask_sprint(&affinity_online, tmp, sizeof(tmp)); jsonw_string_field(wr, "tx_affinity", tmp); bitmask_sprint(&rx_mask, tmp, sizeof(tmp)); @@ -4071,10 +3977,10 @@ int show_affinity(FILE *f, int argc, char **argv) jsonw_pretty(wr, true); if (argc == 1) - ifnet_walk(show_ifp_affinity, wr); + dp_ifnet_walk(show_ifp_affinity, wr); else { while (--argc > 0) { - struct ifnet *ifp = ifnet_byifname(*++argv); + struct ifnet *ifp = dp_ifnet_byifname(*++argv); if (!ifp) { fprintf(f, "Unknown interface: %s\n", *argv); @@ -4092,27 +3998,20 @@ int show_affinity(FILE *f, int argc, char **argv) void set_port_affinity(portid_t portid, const bitmask_t *rx_mask, const bitmask_t *tx_mask) { - struct port_conf *port_conf = &port_config[portid]; - bitmask_t online_rx_mask; - bitmask_t online_tx_mask; - - online_rx_mask = online_tx_mask = online_slave_mask(); - if (rx_mask) { - bitmask_and(&online_rx_mask, rx_mask, &online_rx_mask); - if (bitmask_isempty(&online_rx_mask)) - online_rx_mask = online_slave_mask(); - } - if (tx_mask) { - bitmask_and(&online_tx_mask, tx_mask, &online_tx_mask); - if (bitmask_isempty(&online_tx_mask)) - online_tx_mask = online_slave_mask(); - } + struct port_alloc *port_alloc = &port_allocations[portid]; - port_conf->rx_cpu_affinity = online_rx_mask; - port_conf->tx_cpu_affinity = online_tx_mask; + if (rx_mask) + port_alloc->rx_cpu_affinity = *rx_mask; + else + port_alloc->rx_cpu_affinity = all_lcores_mask(); + + if (tx_mask) + port_alloc->tx_cpu_affinity = *tx_mask; + else + port_alloc->tx_cpu_affinity = all_lcores_mask(); /* reassign queues to make the affinity take effect */ - if (any_assigned_queues(portid)) { + if (dpdk_eth_if_port_started(portid)) { unassign_queues(portid); assign_queues(portid); } @@ -4132,3 +4031,181 @@ switch_port_process_burst(portid_t portid, struct rte_mbuf *pkts[], uint16_t nb) { process_burst(portid, pkts, nb); } + +bool dp_lcore_is_active(unsigned int lcore) +{ + const struct lcore_conf *conf; + + if (lcore >= get_lcore_max()) + return false; + + conf = lcore_conf[lcore]; + if (CMM_LOAD_SHARED(conf->running)) + return true; + + return false; +} + +enum dp_lcore_use dp_lcore_get_current_use(unsigned int lcore) +{ + struct lcore_conf *conf; + + if (lcore > get_lcore_max()) + return DP_LCORE_INVALID; + + if (lcore == rte_get_master_lcore()) + return DP_LCORE_MAIN; + + conf = lcore_conf[lcore]; + /* + * Crypto is not considered to be 'FEATURE' as it can run alongside + * forwarders, i.e crypto does not take dedicated control of the core + */ + if (conf->do_feature) + return DP_LCORE_FEATURE; + + return DP_LCORE_FORWARDER; +} + +int +dp_allocate_lcore_to_feature(unsigned int lcore, + struct dp_lcore_feat *feat) +{ + struct lcore_conf *conf; + portid_t portid; + unsigned int id; + enum dp_lcore_use core_use; + int core_count; + const bitmask_t all_lcores = all_lcores_mask(); + + if (!feat->dp_lcore_feat_fn) + return -EINVAL; + + if (lcore > get_lcore_max()) { + RTE_LOG(ERR, DATAPLANE, + "Request to allocate invalid core %d to feature\n", + lcore); + return -EINVAL; + } + + conf = lcore_conf[lcore]; + if (dp_lcore_get_current_use(lcore) == DP_LCORE_MAIN) { + RTE_LOG(ERR, DATAPLANE, + "Request to allocate main core %d to feature\n", + lcore); + return -EINVAL; + } + + if (dp_lcore_get_current_use(lcore) == DP_LCORE_FEATURE) { + RTE_LOG(ERR, DATAPLANE, + "Request to allocate feature core %d to feature\n", + lcore); + return -EINVAL; + } + + /* + * If crypto is on this core (either due to config or arbitrary + * allocation then reject). This may change with some of the crypto + * rework being planned. + */ + if (conf->do_crypto) { + RTE_LOG(ERR, DATAPLANE, + "Request to allocate crypto core %d to feature\n", + lcore); + return -EINVAL; + } + + /* Check all ports to see if one is configured for this core */ + for (portid = 0; portid < DATAPLANE_MAX_PORTS; ++portid) { + struct port_alloc *port_alloc = &port_allocations[portid]; + + /* + * If the affinity is the same as the all_lcores_mask + * then not configured. + */ + if (bitmask_equal(&all_lcores, &port_alloc->rx_cpu_affinity) && + bitmask_equal(&all_lcores, &port_alloc->tx_cpu_affinity)) + continue; + + if (bitmask_isset(&port_alloc->rx_cpu_affinity, lcore) || + bitmask_isset(&port_alloc->tx_cpu_affinity, lcore)) { + RTE_LOG(ERR, DATAPLANE, + "Request to allocate cfged forwarding core %d to feature\n", + lcore); + return -EBUSY; + } + } + + /* Must have at least one forwarder left after this change. */ + core_count = 0; + FOREACH_FORWARD_LCORE(id) { + if (id == lcore) + continue; + core_use = dp_lcore_get_current_use(id); + if (core_use == DP_LCORE_FORWARDER) + core_count++; + } + if (core_count == 0) { + RTE_LOG(ERR, DATAPLANE, + "Request to allocate feature core %d would leave no forwarders\n", + lcore); + return -EINVAL; + } + + /* + * Indicate that the core is dedicated to a feature, and so should + * not be used for forwarding. + */ + conf->ded_to_feature = true; + + /* + * Cause the ports to be reassigned, so that the forwarding thread + * will no longer use this core + */ + reassign_queues_for_all_ports(); + + conf->feat = *feat; + CMM_STORE_SHARED(conf->do_feature, true); + + /* wait for the core to be available to start a thread on it again */ + stop_one_cpu(lcore); + start_one_cpu(lcore); + + return 0; +} + +int dp_unallocate_lcore_from_feature(unsigned int lcore) +{ + struct lcore_conf *conf; + + if (lcore > get_lcore_max()) { + RTE_LOG(ERR, DATAPLANE, + "Request to unallocate invalid core %d from feature\n", + lcore); + return -EINVAL; + } + + conf = lcore_conf[lcore]; + if (dp_lcore_get_current_use(lcore) != DP_LCORE_FEATURE) { + RTE_LOG(ERR, DATAPLANE, + "Request to unallocate feature core %d, but not a feature core\n", + lcore); + return -EINVAL; + } + + if (CMM_LOAD_SHARED(conf->do_feature)) { + CMM_STORE_SHARED(conf->do_feature, false); + memset(&conf->feat, 0, sizeof(conf->feat)); + conf->ded_to_feature = false; + + stop_one_cpu(lcore); + + reassign_queues_for_all_ports(); + } else { + RTE_LOG(ERR, DATAPLANE, + "Request to unallocate feature core %d, but not a feature core\n", + lcore); + return -EINVAL; + } + return 0; +} diff --git a/src/main.h b/src/main.h index 8a692f19..dd7918b1 100644 --- a/src/main.h +++ b/src/main.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -37,12 +37,13 @@ extern volatile bool running; extern bitmask_t linkup_port_mask; /* has carrier */ extern bitmask_t active_port_mask; extern bitmask_t poll_port_mask; +extern bitmask_t enabled_port_mask; extern unsigned int slowpath_mtu; struct rte_mempool *mbuf_pool(unsigned int portid); struct rte_mempool *mbuf_pool_create(const char *name, unsigned int n, - unsigned int cache_sz, + unsigned int cache_size, unsigned long roomsz, int socket_id); @@ -75,24 +76,16 @@ enum l2_packet_type { extern sigjmp_buf hotplug_jmpbuf; extern bool hotplug_inprogress; -void if_output(struct ifnet *ifp, struct rte_mbuf *m, - struct ifnet *input_ifp, uint16_t proto); void local_packet(struct ifnet *ifp, struct rte_mbuf *m); +void local_packet_internal(struct ifnet *ifp, struct rte_mbuf *m); -void start_port(portid_t port, unsigned int flags); -void stop_port(portid_t port); -void force_stop_port(portid_t port); -void stop_all_ports(void); const char *link_duplexstr(unsigned int duplex); void link_state_init(void); void send_port_status(uint32_t port_id, const struct rte_eth_link *link); int show_affinity(FILE *f, int argc, char **argv); void set_port_affinity(portid_t portid, const bitmask_t *rx_mask, const bitmask_t *tx_mask); -void set_speed(struct ifnet *ifp, uint32_t link_speeds); uint64_t get_link_modes(struct ifnet *ifp); -int linkwatch_port_config(portid_t portid); -void linkwatch_port_unconfig(portid_t portid); int assign_queues(portid_t portid); void unassign_queues(portid_t portid); @@ -103,26 +96,23 @@ void reset_port_all_queue_state(uint16_t port); bool port_uses_queue_state(uint16_t port); int mbuf_pool_init_portid(const portid_t portid); void pkt_ring_empty(portid_t portid); -int eth_port_init(uint8_t start_id, uint8_t num_ports); +void pkt_ring_output(struct ifnet *ifp, struct rte_mbuf *m); int insert_port(portid_t port_id); -void eth_port_uninit_portid(portid_t portid); +void remove_port(portid_t port_id); int launch_one_lcore(void *arg); int send_device_event(const char *name, bool is_add); void device_server_init(void); void device_server_destroy(void); int eth_port_config(portid_t portid); +int eth_port_configure(portid_t portid, struct rte_eth_conf *dev_conf); unsigned int probe_crypto_engines(bool *sticky); -int set_crypto_engines(const char *str, bool *sticky); -int crypto_assign_engine(int crypto_dev_id); +int set_crypto_engines(const uint8_t *bytes, uint8_t len, bool *sticky); +int crypto_assign_engine(int crypto_dev_id, int lcore); void crypto_unassign_from_engine(int lcore); void register_forwarding_cores(void); -int reconfigure_queues(portid_t portid, uint16_t nb_rx_qs, uint16_t nb_tx_qs); -int reconfigure_pkt_len(struct ifnet *ifp, uint32_t mtu); -typedef int (*reconfigure_port_cb_fn)(struct ifnet *ifp, - struct rte_eth_conf *dev_conf); -int reconfigure_port(struct ifnet *ifp, - struct rte_eth_conf *dev_conf, - reconfigure_port_cb_fn reconfigure_port_cb); +int reconfigure_queues(portid_t portid, uint16_t nb_rx_queues, + uint16_t nb_tx_queues); + /* Rate states */ struct rate_stats { uint32_t packet_rate; @@ -131,8 +121,8 @@ struct rate_stats { uint64_t last_bytes; struct timeval last_time; }; -void scale_rate_stats(struct rate_stats *stats, uint64_t *packets, - uint64_t *bytes); +void scale_rate_stats(struct rate_stats *stats, const uint64_t *packets, + const uint64_t *bytes); #define DRV_PARAM_LIMITTXQ (1<<0) /* size of rxq == size of txq */ #define DRV_PARAM_VIRTUAL (1<<1) /* is a "virtual" device */ @@ -149,6 +139,15 @@ struct rxtx_param { uint16_t extra; uint16_t drv_flags; uint16_t tx_pkt_ring_size; + uint64_t rx_offloads; + uint64_t neg_rx_offloads; + uint8_t tx_desc_vm_multiplier; + uint64_t tx_offloads; + uint64_t neg_tx_offloads; + uint64_t dev_flags; + uint64_t neg_dev_flags; + uint64_t rx_mq_mode; + bool rx_mq_mode_set; }; #define MAX_RX_QUEUE_PER_PORT 20 @@ -160,13 +159,9 @@ struct rxtx_param { #define MAX_RX_DESC_PER_QUEUE 65536 #define MAX_TX_DESC_PER_QUEUE 65536 -#define MBUF_CACHE_SIZE_DEFAULT 32 /* per-core buffer cache size */ - -bool is_master_thread(void); +#define MAX_TX_DESC_VM_MULTIPLIER 8 -#define ASSERT_MASTER() \ -{ if (!is_master_thread()) rte_panic("not on master thread\n"); \ -} +#define MBUF_CACHE_SIZE_DEFAULT 32 /* per-core buffer cache size */ void set_port_uses_queue_state(uint16_t portid, bool val); bool get_port_uses_queue_state(uint16_t portid); @@ -175,10 +170,15 @@ void track_port_queue_state(uint16_t portid, uint16_t queue_id, bool rx, bool enable); void switch_port_process_burst(portid_t portid, struct rte_mbuf *pkts[], uint16_t nb); -int set_master_worker_vhost_event_fd(void); +int set_main_worker_vhost_event_fd(void); + +extern bool single_cpu; + +extern uint16_t nb_ports_total; + +int next_available_crypto_lcore(void); -void pkt_burst_setup(void); -void pkt_burst_flush(void); -void pkt_burst_free(void); +void enable_crypto_fwd(unsigned int lcore); +void disable_crypto_fwd(unsigned int lcore); #endif /* MAIN_H */ diff --git a/src/meson.build b/src/meson.build new file mode 100644 index 00000000..820b462b --- /dev/null +++ b/src/meson.build @@ -0,0 +1,355 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright (c) 2020-2021, AT&T Intellectual Property. All rights reserved. + +jsonw_library = shared_library( + 'vyatta-jsonw', + sources: ['json_writer.c'], + include_directories: public_include, + install: true, + version: '1.0.0' +) + +internal_inc = include_directories( + '.', + 'npf', + 'netinet6', + 'pipeline' +) + +crypto_sources = files( + 'crypto/crypto.c', + 'crypto/crypto_engine.c', + 'crypto/crypto_policy.c', + 'crypto/crypto_rte_pmd.c', + 'crypto/crypto_sadb.c', + 'crypto/esp.c', + 'crypto/vti.c', + 'crypto/xfrm_client.c', + 'crypto/crypto_pmd.c' +) + +ipv6_sources = files( + 'netinet6/in6.c', + 'netinet6/ip6_commands.c', + 'netinet6/ip6_forward.c', + 'netinet6/ip6_icmp.c', + 'netinet6/ip6_main.c', + 'netinet6/ip6_mroute.c', + 'netinet6/ip6_options.c', + 'netinet6/nd6_nbr.c', + 'netinet6/route_v6.c', + 'netinet6/scope6.c' +) + +mpls_sources = files( + 'mpls/mpls_commands.c', + 'mpls/mpls_forward.c', + 'mpls/mpls_label_table.c', + 'mpls/mpls_netlink.c' +) + +session_sources = files( + 'session/session.c', + 'session/session_cmds.c', + 'session/session_feature.c', + 'session/session_op.c', + 'session/session_watch.c' +) + +core_sources = files( + 'arp.c', + 'backplane.c', + 'bpf_filter.c', + 'bridge_vlan_set.c', + 'commands.c', + 'protobuf.c', + 'protobuf_util.c', + 'config.c', + 'control.c', + 'cpp_rate_limiter.c', + 'dealer.c', + 'debug.c', + 'dp_event.c', + 'ecmp.c', + 'ether.c', + 'event.c', + 'fal.c', + 'feature_plugin.c', + 'flow_cache.c', + 'if/bridge/bridge.c', + 'if/bridge/bridge_netlink.c', + 'if/bridge/bridge_port.c', + 'if/bridge/switch.c', + 'if/dpdk-eth/hotplug.c', + 'if/dpdk-eth/devinfo.c', + 'if/dpdk-eth/dpdk_eth_if.c', + 'if/dpdk-eth/dpdk_eth_linkwatch.c', + 'if/dpdk-eth/dpdk_lag.c', + 'if/dpdk-eth/fal_lag.c', + 'if/dpdk-eth/vhost.c', + 'if/gre.c', + 'if/ipip_tunnel.c', + 'if/loopback.c', + 'if/macvlan.c', + 'if/vlan/vlan_if.c', + 'if/vlan/vlan_if_netlink.c', + 'if/vxlan.c', + 'if.c', + 'if_ether.c', + 'if_feat.c', + 'if_llatbl.c', + 'in.c', + 'in_cksum.c', + 'ip_forward.c', + 'ip_commands.c', + 'ip_icmp.c', + 'ip_mcast.c', + 'ip_mcast_fal_interface.c', + 'ip_netlink.c', + 'ip_options.c', + 'ip_output.c', + 'ip_route.c', + 'ip_rt_protobuf.c', + 'iptun_common.c', + 'l2_rx_fltr.c', + 'l2tp/l2tpeth_decap.c', + 'l2tp/l2tpeth_dp.c', + 'l2tp/l2tpeth_netlink.c', + 'lag.c', + 'lcore_sched.c', + 'log.c', + 'lpm/lpm.c', + 'lpm/lpm6.c', + 'mac_limit.c', + 'main.c', + 'controller.c', + 'mstp.c', + 'netinet/ip_mroute.c', + 'netlink.c', + 'nh_common.c', + 'nsh.c', + 'pd_show.c', + 'pktmbuf.c', + 'pathmonitor/pathmonitor_cmds.c', + 'portmonitor/portmonitor_cmds.c', + 'portmonitor/portmonitor_dp.c', + 'power.c', + 'ptp.c', + 'qos_sched.c', + 'qos_dpdk.c', + 'qos_ext_buf_monitor.c', + 'qos_hw.c', + 'qos_hw_show.c', + 'qos_obj_db.c', + 'rcu.c', + 'route.c', + 'route_broker.c', + 'rt_commands.c', + 'rt_tracker.c', + 'storm_ctl.c', + 'sfp.c', + 'switchport.c', + 'udp_handler.c', + 'util.c', + 'vlan_modify.c', + 'vrf.c', + 'shadow.c', + 'zmq_dp.c' +) + +not_for_test_sources = files( + 'capture.c', + 'ip_id.c', + 'team.c', + 'shadow_receive.c' +) + +gpc_sources = files( + 'gpc/gpc_op_mode.c', + 'gpc/gpc_pb_config.c', + 'gpc/gpc_util.c' +) + +npf_sources = files( + 'npf/alg/alg_apt.c', + 'npf/apm/apm.c', + 'npf/alg/alg.c', + 'npf/alg/alg_npf.c', + 'npf/alg/alg_ftp.c', + 'npf/alg/alg_tftp.c', + 'npf/alg/alg_rpc.c', + 'npf/alg/alg_sip.c', + 'npf/alg/sip/sip_request.c', + 'npf/alg/sip/sip_response.c', + 'npf/alg/sip/sip_parse.c', + 'npf/alg/sip/sip_translate.c', + 'npf/cgnat/cgn.c', + 'npf/cgnat/cgn_cmd_cfg.c', + 'npf/cgnat/cgn_cmd_op.c', + 'npf/cgnat/cgn_if.c', + 'npf/cgnat/cgn_log.c', + 'npf/cgnat/cgn_log_rte.c', + 'npf/cgnat/cgn_log_protobuf_zmq.c', + 'npf/cgnat/cgn_map.c', + 'npf/cgnat/cgn_mbuf.c', + 'npf/cgnat/cgn_policy.c', + 'npf/cgnat/cgn_rc.c', + 'npf/cgnat/cgn_session.c', + 'npf/cgnat/cgn_sess2.c', + 'npf/cgnat/cgn_sess_state.c', + 'npf/cgnat/cgn_source.c', + 'npf/cgnat/cgn_test.c', + 'npf/config/gpc_acl_cli.c', + 'npf/config/gpc_cntr.c', + 'npf/config/gpc_db.c', + 'npf/config/gpc_hw.c', + 'npf/config/npf_attach_point.c', + 'npf/config/npf_auto_attach.c', + 'npf/config/npf_config.c', + 'npf/config/npf_config_state.c', + 'npf/config/npf_dump.c', + 'npf/config/npf_gen_ruleset.c', + 'npf/config/npf_rule_group.c', + 'npf/config/npf_ruleset_type.c', + 'npf/config/pmf_rule.c', + 'npf/config/pmf_parse.c', + 'npf/config/pmf_dump.c', + 'npf/config/pmf_att_rlgrp.c', + 'npf/fragment/ipv4_frag_tbl.c', + 'npf/fragment/ipv4_rsmbl.c', + 'npf/fragment/ipv6_rsmbl.c', + 'npf/fragment/ipv6_rsmbl_tbl.c', + 'npf/nat/nat_cmd_cfg.c', + 'npf/nat/nat_cmd_op.c', + 'npf/nat/nat_pool.c', + 'npf/nat/nat_pool_event.c', + 'npf/grouper2.c', + 'npf/npf_nat64.c', + 'npf/npf_addrgrp.c', + 'npf/npf_apm.c', + 'npf/npf_cache.c', + 'npf/npf_cidr_util.c', + 'npf/npf_cmd.c', + 'npf/npf_cmd_cfg.c', + 'npf/npf_cmd_op.c', + 'npf/npf_dataplane_session.c', + 'npf/npf_disassemble.c', + 'npf/npf_event.c', + 'npf/npf_grouper.c', + 'npf/npf_icmp.c', + 'npf/npf_if.c', + 'npf/npf_if_feat.c', + 'npf/npf_instr.c', + 'npf/npf_match.c', + 'npf/npf_mbuf.c', + 'npf/npf_nat.c', + 'npf/npf_ncgen.c', + 'npf/npf_pack.c', + 'npf/npf_unpack.c', + 'npf/npf_processor.c', + 'npf/npf_ptree.c', + 'npf/npf_rc.c', + 'npf/npf_rte_acl.c', + 'npf/npf_rule_gen.c', + 'npf/npf_ruleset.c', + 'npf/npf_session.c', + 'npf/npf_state.c', + 'npf/npf_state_tcp.c', + 'npf/npf_tblset.c', + 'npf/npf_timeouts.c', + 'npf/npf_vrf.c', + 'npf/rldb.c', + 'npf/zones/npf_zone_private.c', + 'npf/zones/npf_zone_public.c', + 'npf/rproc/npf_ext_action_group.c', + 'npf/rproc/npf_ext_counter.c', + 'npf/rproc/npf_ext_ctrdef.c', + 'npf/rproc/npf_ext_ctrref.c', + 'npf/rproc/npf_ext_log.c', + 'npf/rproc/npf_ext_mark.c', + 'npf/rproc/npf_ext_nat64.c', + 'npf/rproc/npf_ext_nptv6.c', + 'npf/rproc/npf_ext_policer.c', + 'npf/rproc/npf_ext_pathmon.c', + 'npf/rproc/npf_ext_session_limit.c', + 'npf/rproc/npf_ext_setvrf.c', + 'npf/rproc/npf_ext_tag.c', + 'npf/rproc/npf_rproc.c', + 'npf_shim.c' +) + +dpi_sources = files( + 'npf/dpi/app_cmds.c', + 'npf/dpi/dpi_public.c', + 'npf/dpi/dpi_user.c', + 'npf/dpi/npf_appdb.c', + 'npf/dpi/npf_typedb.c', + 'npf/rproc/npf_ext_app.c', + 'npf/dpi/dpi.c', + 'npf/rproc/npf_ext_appfw.c', + 'npf/rproc/npf_ext_dpi.c', + 'npf/dpi/ndpi.c' +) + +app_group_sources = files( + 'npf/app_group/app_group_cmd.c', + 'npf/app_group/app_group_db.c', + 'npf/app_group/app_group.c' +) + +dataplane_deps = [ + cap_dep, + crypto_dep, + czmq_dep, + dl_dep, + dpdk_dep, + ini_dep, + m_dep, + mnl_dep, + ndpi_dep, + osip2_dep, + pcap_dep, + pipeline_dep, + protobuf_generated_c_dependency, + proto_c_dep, + rte_bus_vdev_dep, + rte_pmd_bond_dep, + rte_pmd_vhost_dep, + swport_dep, + systemd_dep, + threads_dep, + urcu_cds_dep, + urcu_dep, + urcu_qsbr_dep, + zmq_dep +] + +dataplane_common_sources = [ + app_group_sources, + core_sources, + crypto_sources, + dpi_sources, + gpc_sources, + ipv6_sources, + mpls_sources, + npf_sources, + pipeline_node_sources, + pipeline_sources, + pl_fused_gen_c, + protobuf_generated_c, + session_sources +] + +executable( + 'dataplane', + sources: [dataplane_common_sources, not_for_test_sources], + override_options: [ + 'b_ndebug=true' + ], + dependencies: dataplane_deps, + include_directories: [public_include, internal_inc], + link_with: [jsonw_library], + export_dynamic: true, + install: true, + install_dir: get_option('prefix') / get_option('sbindir') +) diff --git a/src/mpls/mpls.h b/src/mpls/mpls.h index 67dfea7f..312189d7 100644 --- a/src/mpls/mpls.h +++ b/src/mpls/mpls.h @@ -1,7 +1,7 @@ /* * MPLS label stack encoding/decoding * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -11,6 +11,8 @@ #ifndef MPLS_H #define MPLS_H +#include +#include #include #ifndef MPLS_LS_LABEL_MASK diff --git a/src/mpls/mpls_commands.c b/src/mpls/mpls_commands.c index 223bdd34..f421054e 100644 --- a/src/mpls/mpls_commands.c +++ b/src/mpls/mpls_commands.c @@ -3,7 +3,7 @@ * * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. - * Copyright (c) 2017, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017,2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -92,12 +92,12 @@ static int cmd_mifconfig(FILE *f, int argc, char **argv) jsonw_name(wr, "interfaces"); jsonw_start_array(wr); if (argc == 3) - ifnet_walk(mifconfig_up, wr); + dp_ifnet_walk(mifconfig_up, wr); else if (argc > 3 && strcmp(argv[3], "-a") == 0) - ifnet_walk(mifconfig, wr); + dp_ifnet_walk(mifconfig, wr); else { while (--argc > 0) { - struct ifnet *ifp = ifnet_byifname(*++argv); + struct ifnet *ifp = dp_ifnet_byifname(*++argv); if (ifp) mifconfig(ifp, wr); @@ -198,27 +198,36 @@ int cmd_mpls(FILE *f, int argc, char **argv) mpls_label_table_resize(global_label_space_id, size); return 0; - } else if (argc == 3 && !strcmp(argv[1], "ipttlpropagate")) { + } + if (argc == 3 && !strcmp(argv[1], "ipttlpropagate")) { bool enable = !strcmp(argv[2], "enable"); mpls_global_set_ipttlpropagate(enable); return 0; - } else if (argc == 3 && !strcmp(argv[1], "defaultttl")) { + } + if (argc == 3 && !strcmp(argv[1], "defaultttl")) { int ttl = atoi(argv[2]); mpls_global_set_defaultttl(ttl); return 0; - } else if (argc >= 3 && !strcmp(argv[1], "show")) { + } + if (argc >= 3 && !strcmp(argv[1], "show")) { if (!strcmp(argv[2], "tables")) { - mpls_label_table_set_dump(f, -1); + mpls_label_table_set_dump(f, -1, MPLS_LABEL_ALL); return 0; - } else if (!strcmp(argv[2], "ifconfig")) { + } + if (!strcmp(argv[2], "ifconfig")) { cmd_mifconfig(f, argc, argv); return 0; - } else if (!strcmp(argv[2], "config")) { + } + if (!strcmp(argv[2], "config")) { mpls_config_dump(f); return 0; } + if (!strcmp(argv[2], "label") && argc >= 4) { + mpls_label_table_set_dump(f, -1, atoi(argv[3])); + return 0; + } } else if (argc >= 3 && !strcmp(argv[1], "oam")) { #define MAX_OAM_LABEL_STACK_DEPTH (MAX_MP_SELECT_LABELS + 1) int opt, opt_index = 0, labelspace = 0; diff --git a/src/mpls/mpls_forward.c b/src/mpls/mpls_forward.c index ce068389..3b5e21bc 100644 --- a/src/mpls/mpls_forward.c +++ b/src/mpls/mpls_forward.c @@ -1,7 +1,7 @@ /* * MPLS forwarder * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -30,6 +30,7 @@ #include "compat.h" #include "ecmp.h" #include "ether.h" +#include "fal.h" #include "if_var.h" #include "in6.h" #include "in_cksum.h" @@ -42,10 +43,10 @@ #include "mpls/mpls.h" #include "mpls_forward.h" #include "mpls_label_table.h" -#include "nh.h" +#include "nh_common.h" #include "npf/npf.h" #include "npf_shim.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "route.h" #include "route_flags.h" #include "route_v6.h" @@ -54,7 +55,7 @@ #include "util.h" #include "vplane_debug.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" struct mplshdr; @@ -94,7 +95,7 @@ struct mplshdr { * send. The input labels remain in the packet for as long as possible in case * they are needed for ICMP generation or if the packet needs to be punted. * Popped labels are handled by increasing the L2 len. The original packet - * is recovered by restoring the L2 len to ETHER_HDR_LEN. + * is recovered by restoring the L2 len to RTE_ETHER_HDR_LEN. * * Pkt: Ethernet hdr | Popped lbls (0..Np) | Remaining lbls (0..Nb) | IP hdr * <--------- l2 len ----------------> @@ -128,7 +129,20 @@ bool mpls_global_get_ipttlpropagate(void) void mpls_global_set_ipttlpropagate(bool enable) { + struct fal_attribute_t sw_attr = { + .id = FAL_SWITCH_ATTR_MPLS_IP_TTL_MODE, + .value.i32 = enable ? FAL_MPLS_TTL_MODE_UNIFORM : + FAL_MPLS_TTL_MODE_PIPE, + }; + int ret; + propagate_ttl = enable ? TTL_PROPAGATE_ENABLED : TTL_PROPAGATE_DISABLED; + + ret = fal_set_switch_attr(&sw_attr); + if (ret < 0 && ret != -EOPNOTSUPP) + RTE_LOG(ERR, MPLS, + "FAL error setting TTL propagation to %s: %s\n", + enable ? "enabled" : "disabled", strerror(-ret)); } int mpls_global_get_defaultttl(void) @@ -138,11 +152,25 @@ int mpls_global_get_defaultttl(void) void mpls_global_set_defaultttl(int ttl) { + struct fal_attribute_t sw_attr; + int ret; + cfg_default_ttl = ttl; if (cfg_default_ttl != -1) default_ttl = ttl; else default_ttl = MAX_TTL; + + sw_attr = (struct fal_attribute_t) { + .id = FAL_SWITCH_ATTR_MPLS_PIPE_TTL, + .value.u8 = default_ttl, + }; + + ret = fal_set_switch_attr(&sw_attr); + if (ret < 0 && ret != -EOPNOTSUPP) + RTE_LOG(ERR, MPLS, + "FAL error setting pipe TTL to %u: %s\n", + default_ttl, strerror(-ret)); } static inline struct mplshdr * @@ -161,7 +189,7 @@ mplshdr_safe(struct rte_mbuf *m) * Validate that the remaining non-L2 data in the first * segment is long enough to contain at least one label */ - len = rte_pktmbuf_data_len(m) - pktmbuf_l2_len(m); + len = rte_pktmbuf_data_len(m) - dp_pktmbuf_l2_len(m); if (unlikely(len < sizeof(struct mplshdr))) return NULL; @@ -203,7 +231,7 @@ mpls_label_cache_adjust(struct rte_mbuf *m, struct mpls_label_cache *cache, uint8_t l2_len) { return (cache->num_labels * sizeof(struct mplshdr)) - - (pktmbuf_l2_len(m) - l2_len); + (dp_pktmbuf_l2_len(m) - l2_len); } /* @@ -248,7 +276,7 @@ mpls_label_cache_write(struct rte_mbuf *m, hdr->ls = label.ls; } - pktmbuf_l2_len(m) = l2_len; + dp_pktmbuf_l2_len(m) = l2_len; /* * If not pushing any labels, update TTL in top-most label @@ -296,7 +324,7 @@ swap_labels(struct rte_mbuf *m, /* * Make swapped label part of l2_len as we don't care about it anymore */ - pktmbuf_l2_len(m) += sizeof(struct mplshdr); + dp_pktmbuf_l2_len(m) += sizeof(struct mplshdr); return true; } @@ -309,12 +337,11 @@ pop_label(struct rte_mbuf *m) /* * Make this label part of l2_len as we don't care about it anymore */ - pktmbuf_l2_len(m) += sizeof(struct mplshdr); + dp_pktmbuf_l2_len(m) += sizeof(struct mplshdr); if (mpls_ls_get_bos(hdr->ls)) return true; - else - return false; + return false; } static inline bool @@ -354,7 +381,7 @@ is_mpls_oam(const struct ifnet *ifp, const struct rte_mbuf *m) hdr = mplshdr(m); lssize = 1; - len = rte_pktmbuf_data_len(m) - pktmbuf_l2_len(m) - sizeof(*hdr); + len = rte_pktmbuf_data_len(m) - dp_pktmbuf_l2_len(m) - sizeof(*hdr); while (!mpls_ls_get_bos(hdr->ls)) { if (++lssize > MAX_LABEL_STACK_DEPTH) return false; @@ -402,10 +429,10 @@ mpls_error(struct ifnet *ifp, struct rte_mbuf *m, /* * Note offset of popped labels and restore original packet */ - pop_offset = pktmbuf_l2_len(m) - ETHER_HDR_LEN; - pktmbuf_l2_len(m) = ETHER_HDR_LEN; + pop_offset = dp_pktmbuf_l2_len(m) - RTE_ETHER_HDR_LEN; + dp_pktmbuf_l2_len(m) = RTE_ETHER_HDR_LEN; - len = rte_pktmbuf_data_len(m) - pktmbuf_l2_len(m); + len = rte_pktmbuf_data_len(m) - dp_pktmbuf_l2_len(m); /* * Find the IP header, within reason @@ -469,8 +496,8 @@ mpls_error(struct ifnet *ifp, struct rte_mbuf *m, switch (payload_type) { case MPT_IPV4: { - pktmbuf_l2_len(m) += (lssize * sizeof(struct mplshdr)); - pktmbuf_l3_len(m) = ip->ihl << 2; + dp_pktmbuf_l2_len(m) += (lssize * sizeof(struct mplshdr)); + dp_pktmbuf_l3_len(m) = ip->ihl << 2; if (!ip_valid_packet(m, ip)) { DBG_MPLS_PKTERR(ifp, m, @@ -482,7 +509,7 @@ mpls_error(struct ifnet *ifp, struct rte_mbuf *m, if (n == NULL) return NULL; - pktmbuf_l3_len(n) = pktmbuf_l3_len(m); + dp_pktmbuf_l3_len(n) = dp_pktmbuf_l3_len(m); if (icmp_do_exthdr(n, ICMP_EXT_MPLS_LS, ICMP_EXT_MPLS_LS_INCOMING, lstack, lssize * sizeof(struct mplshdr))) { @@ -513,14 +540,14 @@ mpls_error(struct ifnet *ifp, struct rte_mbuf *m, icmp_prepare_send(n); /* restore original l2 length */ - pktmbuf_l2_len(n) -= (lssize * sizeof(struct mplshdr)); + dp_pktmbuf_l2_len(n) -= (lssize * sizeof(struct mplshdr)); break; } case MPT_IPV6: - pktmbuf_l2_len(m) += (lssize * sizeof(struct mplshdr)); - pktmbuf_l3_len(m) = sizeof(*ip6); + dp_pktmbuf_l2_len(m) += (lssize * sizeof(struct mplshdr)); + dp_pktmbuf_l3_len(m) = sizeof(*ip6); ip6 = (struct ip6_hdr *)ip; if (!ip6_valid_packet(m, ip6)) { @@ -535,7 +562,7 @@ mpls_error(struct ifnet *ifp, struct rte_mbuf *m, if (n == NULL) return NULL; - pktmbuf_l3_len(n) = pktmbuf_l3_len(m); + dp_pktmbuf_l3_len(n) = dp_pktmbuf_l3_len(m); if (icmp6_do_exthdr(n, ICMP_EXT_MPLS_LS, ICMP_EXT_MPLS_LS_INCOMING, lstack, lssize * sizeof(struct mplshdr))) { @@ -548,7 +575,7 @@ mpls_error(struct ifnet *ifp, struct rte_mbuf *m, icmp6_prepare_send(n); /* restore original l2 length */ - pktmbuf_l2_len(n) -= (lssize * sizeof(struct mplshdr)); + dp_pktmbuf_l2_len(n) -= (lssize * sizeof(struct mplshdr)); break; default: @@ -559,8 +586,8 @@ mpls_error(struct ifnet *ifp, struct rte_mbuf *m, * Copy layer 2 header and label stack to new packet * Restore pop offset in new packet */ - memcpy(ethhdr(n), ethhdr(m), pktmbuf_l2_len(m)); - pktmbuf_l2_len(n) += pop_offset; + memcpy(ethhdr(n), ethhdr(m), dp_pktmbuf_l2_len(m)); + dp_pktmbuf_l2_len(n) += pop_offset; /* * Set default TTL in all labels in the packet - those in label cache @@ -603,7 +630,7 @@ mpls_oam_ip_exception(struct rte_mbuf *m) unsigned int len; /* Check for OAM packet when pop last label and forward to IP */ - len = rte_pktmbuf_data_len(m) - pktmbuf_l2_len(m); + len = rte_pktmbuf_data_len(m) - dp_pktmbuf_l2_len(m); if (!is_mpls_ip_oam(iphdr(m), len)) return false; @@ -612,12 +639,23 @@ mpls_oam_ip_exception(struct rte_mbuf *m) * punt to OAM daemon (if listening) or else drop the packet rather * than forward it. */ - pktmbuf_l2_len(m) = ETHER_HDR_LEN; + dp_pktmbuf_l2_len(m) = RTE_ETHER_HDR_LEN; hdr = mplshdr(m); mpls_ls_set_ttl(&hdr->ls, 1); return true; } +enum nh_fwd_ret { + NH_FWD_FAILURE = -1, + NH_FWD_SUCCESS = 0, + NH_FWD_RESWITCH_IPv4 = 2, + NH_FWD_RESWITCH_IPv6 = 3, + NH_FWD_RESWITCH_MPLS = 4, + NH_FWD_SLOWPATH, + NH_FWD_IPv4, + NH_FWD_IPv6, +}; + /* * Forward an mpls packet to a nexthop. m is a buffer that is known * to hold an MPLS packet encapsulating a payload packet of type @@ -629,7 +667,7 @@ mpls_oam_ip_exception(struct rte_mbuf *m) * the offset of the new top of stack label (if any). */ static inline enum nh_fwd_ret -nh_fwd_mpls(enum nh_type nht, union next_hop_v4_or_v6_ptr nh, +nh_fwd_mpls(struct next_hop *nh, struct rte_mbuf *m, bool have_labels, enum mpls_payload_type payload_type, struct mpls_label_cache *cache, bool *pop) @@ -638,13 +676,13 @@ nh_fwd_mpls(enum nh_type nht, union next_hop_v4_or_v6_ptr nh, const union next_hop_outlabels *labels; unsigned int num_labels; - if (unlikely(nh_get_flags(nht, nh) & RTF_SLOWPATH)) + if (unlikely(nh_get_flags(nh) & RTF_SLOWPATH)) return NH_FWD_SLOWPATH; /* * Impose outlabels, if any */ - labels = nh_get_labels(nht, nh); + labels = nh_get_labels(nh); new_label = nh_outlabels_get_value(labels, 0); num_labels = nh_outlabels_get_cnt(labels); @@ -652,7 +690,7 @@ nh_fwd_mpls(enum nh_type nht, union next_hop_v4_or_v6_ptr nh, *pop = (num_labels == 0); if (new_label == MPLS_IMPLICITNULL || num_labels == 0) { - struct ifnet *ifp = nh_get_if(nht, nh); + struct ifnet *ifp = dp_nh_get_ifp(nh); /* imp-null should be the only outlabel */ assert(num_labels <= 1); @@ -680,29 +718,29 @@ nh_fwd_mpls(enum nh_type nht, union next_hop_v4_or_v6_ptr nh, return NH_FWD_SLOWPATH; return ifp && !is_lo(ifp) ? NH_FWD_IPv4 : NH_FWD_RESWITCH_IPv4; - } else if (likely(payload_type == MPT_IPV6)) { + } + if (likely(payload_type == MPT_IPV6)) { if (have_labels && num_labels == 0 && unlikely(mpls_oam_ip_exception(m))) return NH_FWD_SLOWPATH; return ifp && !is_lo(ifp) ? NH_FWD_IPv6 : NH_FWD_RESWITCH_IPv6; - } else - return NH_FWD_FAILURE; - } else { - /* Non-bottom of stack */ + } + return NH_FWD_FAILURE; + } + /* Non-bottom of stack */ - /* - * If the nexthop is unlabeled then drop the packet - */ - if (unlikely(!num_labels)) - return NH_FWD_FAILURE; + /* + * If the nexthop is unlabeled then drop the packet + */ + if (unlikely(!num_labels)) + return NH_FWD_FAILURE; - if (!have_labels || ifp) - return NH_FWD_SUCCESS; - else - return NH_FWD_RESWITCH_MPLS; - } - } else if (have_labels) { + if (!have_labels || ifp) + return NH_FWD_SUCCESS; + return NH_FWD_RESWITCH_MPLS; + } + if (have_labels) { if (!swap_labels(m, labels, cache)) return NH_FWD_FAILURE; } else { @@ -737,7 +775,7 @@ mpls_ecmp_hash(const struct rte_mbuf *m) bos = mpls_ls_get_bos(hdr->ls); hdr++; - len = rte_pktmbuf_data_len(m) - pktmbuf_l2_len(m); + len = rte_pktmbuf_data_len(m) - dp_pktmbuf_l2_len(m); for (label_cnt = 0; label_cnt < MAX_MP_SELECT_LABELS && !bos; label_cnt++) { @@ -762,12 +800,14 @@ mpls_ecmp_hash(const struct rte_mbuf *m) likely(len >= l3offs + sizeof(struct iphdr))) hash = rte_jhash_1word( hash, - ecmp_ipv4_hash(m, pktmbuf_l2_len(m) + l3offs)); + ecmp_ipv4_hash(m, + dp_pktmbuf_l2_len(m) + l3offs)); else if (v4hdr->version == 6 && likely(len >= l3offs + sizeof(struct ip6_hdr))) hash = rte_jhash_1word( hash, - ecmp_ipv6_hash(m, pktmbuf_l2_len(m) + l3offs)); + ecmp_ipv6_hash(m, + dp_pktmbuf_l2_len(m) + l3offs)); } return hash; } @@ -784,19 +824,20 @@ mpls_ecmp_hash(const struct rte_mbuf *m) * remaining labels, or both. */ static inline void nh_eth_output_mpls(enum nh_type nh_type, - union next_hop_v4_or_v6_ptr nh, + struct next_hop *nh, uint8_t ttl, struct rte_mbuf *m, struct mpls_label_cache *cache, struct ifnet *input_ifp) { - struct ether_hdr *hdr; + struct rte_ether_hdr *hdr; unsigned int len; /* * Replace any popped labels with any labels in the cache */ - if (unlikely(!mpls_label_cache_write(m, cache, ttl, ETHER_HDR_LEN))) { - mpls_if_incr_out_errors(nh_get_if(nh_type, nh)); + if (unlikely(!mpls_label_cache_write(m, + cache, ttl, RTE_ETHER_HDR_LEN))) { + mpls_if_incr_out_errors(dp_nh_get_ifp(nh)); rte_pktmbuf_free(m); return; } @@ -804,7 +845,7 @@ static inline void nh_eth_output_mpls(enum nh_type nh_type, /* * Start of buffer should be one eth hdr before the current label. */ - assert(pktmbuf_l2_len(m) == ETHER_HDR_LEN); + assert(dp_pktmbuf_l2_len(m) == RTE_ETHER_HDR_LEN); /* * Set the ethertype (the src and dest mac addrs will be in * the output function. @@ -814,41 +855,64 @@ static inline void nh_eth_output_mpls(enum nh_type nh_type, len = rte_pktmbuf_pkt_len(m); if (nh_type == NH_TYPE_V6GW) { - if (unlikely((nh.v6->flags & RTF_MAPPED_IPV6))) { + + /* must at least have an IPv6 header */ + if (len < sizeof(struct ip6_hdr)) { + mpls_if_incr_out_errors(dp_nh_get_ifp(nh)); + rte_pktmbuf_free(m); + return; + } + + if (unlikely((nh->flags & RTF_MAPPED_IPV6))) { struct next_hop v4nh = { .flags = RTF_GATEWAY, - .gateway = V4MAPPED_IPV6_TO_IPV4( - nh.v6->gateway), - .u.ifp = nh6_get_ifp(nh.v6), + .gateway.address.ip_v4.s_addr = + V4MAPPED_IPV6_TO_IPV4( + nh->gateway.address.ip_v6), + .gateway.type = AF_INET, + .u.ifp = dp_nh_get_ifp(nh), }; - if (ip_l2_resolve_and_output(input_ifp, m, &v4nh, - ETH_P_MPLS_UC)) - mpls_if_incr_out_ucastpkts(nh6_get_ifp(nh.v6), - len); + if (dp_ip_l2_nh_output(input_ifp, m, &v4nh, + ETH_P_MPLS_UC)) + mpls_if_incr_out_ucastpkts( + dp_nh_get_ifp(nh), + len); } else { - struct next_hop_v6 v6nh = { + struct next_hop v6nh = { .flags = RTF_GATEWAY, - .gateway = nh.v6->gateway, - .u.ifp = nh6_get_ifp(nh.v6), + .gateway.address.ip_v6 = + nh->gateway.address.ip_v6, + .gateway.type = AF_INET6, + .u.ifp = dp_nh_get_ifp(nh), }; - if (ip6_l2_resolve_and_output(input_ifp, m, - &v6nh, ETH_P_MPLS_UC)) - mpls_if_incr_out_ucastpkts(nh6_get_ifp(nh.v6), - len); + if (dp_ip6_l2_nh_output(input_ifp, m, + &v6nh, ETH_P_MPLS_UC)) + mpls_if_incr_out_ucastpkts( + dp_nh_get_ifp(nh), + len); } } else { assert(nh_type == NH_TYPE_V4GW); struct next_hop v4nh = { .flags = RTF_GATEWAY, - .gateway = nh.v4->gateway, - .u.ifp = nh4_get_ifp(nh.v4), + .gateway.address.ip_v4.s_addr = + nh->gateway.address.ip_v4.s_addr, + .gateway.type = AF_INET, + .u.ifp = dp_nh_get_ifp(nh), }; - if (ip_l2_resolve_and_output(input_ifp, m, &v4nh, - ETH_P_MPLS_UC)) - mpls_if_incr_out_ucastpkts(nh4_get_ifp(nh.v4), len); + /* must at least have an IPv4 header */ + if (len < sizeof(struct iphdr)) { + mpls_if_incr_out_errors(dp_nh_get_ifp(nh)); + rte_pktmbuf_free(m); + return; + } + + if (dp_ip_l2_nh_output(input_ifp, m, &v4nh, + ETH_P_MPLS_UC)) + mpls_if_incr_out_ucastpkts(dp_nh_get_ifp(nh), len); } } @@ -860,7 +924,7 @@ struct mpls_frag_obj_cb { unsigned int pop_offset; uint8_t ttl; enum nh_type nht; - union next_hop_v4_or_v6_ptr nh; + struct next_hop *nh; struct mpls_label_cache *cache; struct mplshdr *remaining_labels; struct ifnet *input_ifp; @@ -887,7 +951,7 @@ nh_mpls_frag_out(struct ifnet *out_ifp, struct rte_mbuf *m, void *obj) } /* Copy any remaining labels into the fragment */ - pktmbuf_l2_len(m) = ETHER_HDR_LEN + fobj->pop_offset; + dp_pktmbuf_l2_len(m) = RTE_ETHER_HDR_LEN + fobj->pop_offset; hdr = mplshdr(m); memcpy(hdr, fobj->remaining_labels, offset - fobj->pop_offset); @@ -898,7 +962,7 @@ nh_mpls_frag_out(struct ifnet *out_ifp, struct rte_mbuf *m, void *obj) static void nh_mpls_ip_fragment(struct ifnet *out_ifp, enum mpls_payload_type payload_type, - enum nh_type nht, union next_hop_v4_or_v6_ptr nh, + enum nh_type nht, struct next_hop *nh, bool have_labels, int adjust, uint8_t ttl, struct rte_mbuf *m, struct mpls_label_cache *cache, struct ifnet *input_ifp) @@ -915,11 +979,11 @@ nh_mpls_ip_fragment(struct ifnet *out_ifp, enum mpls_payload_type payload_type, * Note offset of popped labels and reset pkt back to original state */ fobj.remaining_labels = mplshdr(m); - fobj.pop_offset = pktmbuf_l2_len(m) - ETHER_HDR_LEN; - pktmbuf_l2_len(m) = ETHER_HDR_LEN; + fobj.pop_offset = dp_pktmbuf_l2_len(m) - RTE_ETHER_HDR_LEN; + dp_pktmbuf_l2_len(m) = RTE_ETHER_HDR_LEN; if (have_labels) { - len = rte_pktmbuf_data_len(m) - pktmbuf_l2_len(m); + len = rte_pktmbuf_data_len(m) - dp_pktmbuf_l2_len(m); hdr = mplshdr(m); for (num_labels = 1; ; hdr++, num_labels++) { @@ -950,11 +1014,11 @@ nh_mpls_ip_fragment(struct ifnet *out_ifp, enum mpls_payload_type payload_type, if (payload_type == MPT_IPV4) { const struct iphdr *ip; - pktmbuf_l2_len(m) += offset; + dp_pktmbuf_l2_len(m) += offset; ip = iphdr(m); if (!ip_valid_packet(m, ip)) { - pktmbuf_l2_len(m) -= offset; + dp_pktmbuf_l2_len(m) -= offset; DBG_MPLS_PKTERR(out_ifp, m, "Packet needing fragmentation not valid\n"); mpls_if_incr_out_errors(out_ifp); @@ -964,7 +1028,7 @@ nh_mpls_ip_fragment(struct ifnet *out_ifp, enum mpls_payload_type payload_type, /* check for ip df bit */ if (ip->frag_off & htons(IP_DF)) { - pktmbuf_l2_len(m) = pktmbuf_l2_len(m) - + dp_pktmbuf_l2_len(m) = dp_pktmbuf_l2_len(m) - offset + fobj.pop_offset; if (have_labels) { icmp = mpls_icmp_df(out_ifp, m, cache, @@ -994,8 +1058,8 @@ nh_mpls_ip_fragment(struct ifnet *out_ifp, enum mpls_payload_type payload_type, rte_pktmbuf_free(m); return; } - pktmbuf_l2_len(m) = ETHER_HDR_LEN; - pktmbuf_l3_len(m) = ip->ihl << 2; + dp_pktmbuf_l2_len(m) = RTE_ETHER_HDR_LEN; + dp_pktmbuf_l3_len(m) = ip->ihl << 2; mpls_if_incr_out_fragment_pkts(out_ifp); @@ -1023,7 +1087,7 @@ nh_mpls_ip_fragment(struct ifnet *out_ifp, enum mpls_payload_type payload_type, */ static inline void nh_mpls_forward(enum mpls_payload_type payload_type, - enum nh_type nht, union next_hop_v4_or_v6_ptr nh, + enum nh_type nht, struct next_hop *nh, bool have_labels, uint8_t ttl, struct rte_mbuf *m, struct mpls_label_cache *cache, struct ifnet *input_ifp) @@ -1031,18 +1095,18 @@ nh_mpls_forward(enum mpls_payload_type payload_type, struct ifnet *out_ifp; int adjust; - assert(pktmbuf_l2_len(m) >= ETHER_HDR_LEN); + assert(dp_pktmbuf_l2_len(m) >= RTE_ETHER_HDR_LEN); /* * Check for fragmentation * adjust pkt len for difference between cached and popped labels */ - out_ifp = nh_get_if(nht, nh); - adjust = mpls_label_cache_adjust(m, cache, ETHER_HDR_LEN); - if (likely(rte_pktmbuf_pkt_len(m) + adjust - ETHER_HDR_LEN <= + out_ifp = dp_nh_get_ifp(nh); + adjust = mpls_label_cache_adjust(m, cache, RTE_ETHER_HDR_LEN); + if (likely(rte_pktmbuf_pkt_len(m) + adjust - RTE_ETHER_HDR_LEN <= out_ifp->if_mtu)) { nh_eth_output_mpls(nht, nh, ttl, m, cache, - input_ifp); + input_ifp); } else nh_mpls_ip_fragment(out_ifp, payload_type, nht, nh, have_labels, adjust, ttl, m, cache, @@ -1101,15 +1165,15 @@ mpls_forward_to_ipv4(struct ifnet *ifp, bool local, /* * Disposition to ipv4. */ - assert(pktmbuf_l2_len(m) >= ETHER_HDR_LEN); + assert(dp_pktmbuf_l2_len(m) >= RTE_ETHER_HDR_LEN); /* * Fixup mbuf before we give it back to ip. * Adjust the pkt start to be one eth hdr in * front of current l2 offset - to componsate * for any pops. */ - pop_offset = pktmbuf_l2_len(m) - ETHER_HDR_LEN; - pktmbuf_l2_len(m) = ETHER_HDR_LEN; + pop_offset = dp_pktmbuf_l2_len(m) - RTE_ETHER_HDR_LEN; + dp_pktmbuf_l2_len(m) = RTE_ETHER_HDR_LEN; if (!rte_pktmbuf_adj(m, pop_offset)) { DBG_MPLS_PKTERR(ifp, m, @@ -1121,13 +1185,13 @@ mpls_forward_to_ipv4(struct ifnet *ifp, bool local, return; } - ethhdr(m)->ether_type = htons(ETHER_TYPE_IPv4); + ethhdr(m)->ether_type = htons(RTE_ETHER_TYPE_IPV4); /* * Is packet big enough. * (i.e is there a valid IP header in first segment) */ - len = rte_pktmbuf_data_len(m) - pktmbuf_l2_len(m); + len = rte_pktmbuf_data_len(m) - dp_pktmbuf_l2_len(m); if (unlikely(len < sizeof(struct iphdr))) { DBG_MPLS_PKTERR(ifp, m, "Truncated packet during forward as IPv4 (%u). Dropping...\n", @@ -1138,7 +1202,7 @@ mpls_forward_to_ipv4(struct ifnet *ifp, bool local, return; } - pktmbuf_set_vrf(m, if_vrfid(nh4_get_ifp(v4nh))); + pktmbuf_set_vrf(m, if_vrfid(dp_nh_get_ifp(v4nh))); ip = iphdr(m); if (!local && unlikely(!mpls_propagate_ttl_to_ip(ip, ttl, pop))) { @@ -1156,19 +1220,19 @@ mpls_forward_to_ipv4(struct ifnet *ifp, bool local, * ifp must be non-NULL, but all we've got is the * output ifp so use that. */ - ifp = nh4_get_ifp(v4nh); + ifp = dp_nh_get_ifp(v4nh); if (!ifp) { rte_pktmbuf_free(m); return; } } - ip_out_features(m, ifp, ip, v4nh, v4nh->gateway, ip4_feat, - NPF_FLAG_CACHE_EMPTY); + ip_out_features(m, ifp, ip, v4nh, v4nh->gateway.address.ip_v4.s_addr, + ip4_feat, NPF_FLAG_CACHE_EMPTY); } static void mpls_forward_to_ipv6(struct ifnet *ifp, bool local, struct rte_mbuf *m, - struct next_hop_v6 *v6nh, + struct next_hop *v6nh, uint8_t ttl, bool pop) { uint32_t pop_offset; @@ -1178,7 +1242,7 @@ static void mpls_forward_to_ipv6(struct ifnet *ifp, bool local, /* * Disposition to ipv6. */ - assert(pktmbuf_l2_len(m) >= ETHER_HDR_LEN); + assert(dp_pktmbuf_l2_len(m) >= RTE_ETHER_HDR_LEN); /* * Fixup mbuf before we give it back to ip. @@ -1186,8 +1250,8 @@ static void mpls_forward_to_ipv6(struct ifnet *ifp, bool local, * front of current l2 offset - to componsate * for any pops. */ - pop_offset = pktmbuf_l2_len(m) - ETHER_HDR_LEN; - pktmbuf_l2_len(m) = ETHER_HDR_LEN; + pop_offset = dp_pktmbuf_l2_len(m) - RTE_ETHER_HDR_LEN; + dp_pktmbuf_l2_len(m) = RTE_ETHER_HDR_LEN; if (!rte_pktmbuf_adj(m, pop_offset)) { DBG_MPLS_PKTERR(ifp, m, @@ -1199,13 +1263,13 @@ static void mpls_forward_to_ipv6(struct ifnet *ifp, bool local, return; } - ethhdr(m)->ether_type = htons(ETHER_TYPE_IPv6); + ethhdr(m)->ether_type = htons(RTE_ETHER_TYPE_IPV6); /* * Is packet big enough. * (i.e is there a valid IPv6 header in first segment) */ - len = rte_pktmbuf_data_len(m) - pktmbuf_l2_len(m); + len = rte_pktmbuf_data_len(m) - dp_pktmbuf_l2_len(m); if (unlikely(len < sizeof(struct ip6_hdr))) { DBG_MPLS_PKTERR(ifp, m, "Truncated packet during forward as IPv6 (%u). Dropping...\n", @@ -1216,7 +1280,7 @@ static void mpls_forward_to_ipv6(struct ifnet *ifp, bool local, return; } - pktmbuf_set_vrf(m, if_vrfid(nh6_get_ifp(v6nh))); + pktmbuf_set_vrf(m, if_vrfid(dp_nh_get_ifp(v6nh))); ip6 = ip6hdr(m); if (!local && unlikely(!mpls_propagate_ttl_to_ip6(ip6, ttl, pop))) { @@ -1242,7 +1306,7 @@ static void mpls_forward_to_ipv6(struct ifnet *ifp, bool local, * ifp must be non-NULL, but all we've got is the * output ifp so use that. */ - ifp = nh6_get_ifp(v6nh); + ifp = dp_nh_get_ifp(v6nh); if (!ifp) { rte_pktmbuf_free(m); return; @@ -1282,7 +1346,7 @@ static void mpls_vpnv4_local_deliver(struct ifnet *ifp, struct rte_mbuf *m) * * Run the local firewall, and discard if so instructed. */ - if (npf_local_fw(ifp, &m, htons(ETHER_TYPE_IPv4))) + if (npf_local_fw(ifp, &m, htons(RTE_ETHER_TYPE_IPV4))) goto discard; IPSTAT_INC_VRF(vrf, IPSTATS_MIB_INDELIVERS); @@ -1331,8 +1395,8 @@ static bool mpls_reswitch_as_ipv4(struct ifnet *input_ifp, * means setting the ether header to be in front of our * current label and setting the ethertype to be ip. */ - assert(pktmbuf_l2_len(m) >= ETHER_HDR_LEN); - pop_offset = pktmbuf_l2_len(m) - ETHER_HDR_LEN; + assert(dp_pktmbuf_l2_len(m) >= RTE_ETHER_HDR_LEN); + pop_offset = dp_pktmbuf_l2_len(m) - RTE_ETHER_HDR_LEN; memmove((uint8_t *)ethhdr(m) + pop_offset, ethhdr(m), ETH_HLEN); if (!rte_pktmbuf_adj(m, pop_offset)) { @@ -1341,8 +1405,8 @@ static bool mpls_reswitch_as_ipv4(struct ifnet *input_ifp, __func__); return false; } - ethhdr(m)->ether_type = htons(ETHER_TYPE_IPv4); - pktmbuf_l2_len(m) = ETHER_HDR_LEN; + ethhdr(m)->ether_type = htons(RTE_ETHER_TYPE_IPV4); + dp_pktmbuf_l2_len(m) = RTE_ETHER_HDR_LEN; if (likely(input_ifp != NULL)) { /* Propagate or decrement (if not local) the ttl */ @@ -1393,7 +1457,7 @@ static void mpls_vpnv6_local_deliver(struct ifnet *ifp, struct rte_mbuf *m) * * Run the local firewall, and discard if so instructed. */ - if (npf_local_fw(ifp, &m, htons(ETHER_TYPE_IPv6))) + if (npf_local_fw(ifp, &m, htons(RTE_ETHER_TYPE_IPV6))) goto discard; IP6STAT_INC_IFP(ifp, IPSTATS_MIB_INDELIVERS); @@ -1442,8 +1506,8 @@ static bool mpls_reswitch_as_ipv6(struct ifnet *input_ifp, * means setting the ether header to be in front of our * current label and setting the ethertype to be ipv6. */ - assert(pktmbuf_l2_len(m) >= ETHER_HDR_LEN); - pop_offset = pktmbuf_l2_len(m) - ETHER_HDR_LEN; + assert(dp_pktmbuf_l2_len(m) >= RTE_ETHER_HDR_LEN); + pop_offset = dp_pktmbuf_l2_len(m) - RTE_ETHER_HDR_LEN; memmove((uint8_t *)ethhdr(m) + pop_offset, ethhdr(m), ETH_HLEN); if (!rte_pktmbuf_adj(m, pop_offset)) { @@ -1452,8 +1516,8 @@ static bool mpls_reswitch_as_ipv6(struct ifnet *input_ifp, __func__); return false; } - ethhdr(m)->ether_type = htons(ETHER_TYPE_IPv6); - pktmbuf_l2_len(m) = ETHER_HDR_LEN; + ethhdr(m)->ether_type = htons(RTE_ETHER_TYPE_IPV6); + dp_pktmbuf_l2_len(m) = RTE_ETHER_HDR_LEN; if (likely(input_ifp != NULL)) { /* Propagate or decrement (if not local) the ttl */ @@ -1474,7 +1538,7 @@ static bool mpls_reswitch_as_ipv6(struct ifnet *input_ifp, /* Is it for a local address on this host? */ if (is_local) { - ip6_local_deliver(input_ifp, m); + ip6_l4_input(m, input_ifp); return true; } @@ -1499,7 +1563,7 @@ mpls_labeled_forward(struct ifnet *input_ifp, bool local, uint32_t in_label; enum nh_type nht; uint8_t ttl; - union next_hop_v4_or_v6_ptr nh; + struct next_hop *nh; bool pop; mpls_label_cache_init(&cache); @@ -1512,7 +1576,7 @@ mpls_labeled_forward(struct ifnet *input_ifp, bool local, if (unlikely(!hdr)) { DBG_MPLS_PKTERR(input_ifp, m, "mpls_labeled_input truncated packet %u if %s(%d)\n", - rte_pktmbuf_data_len(m) - pktmbuf_l2_len(m), + rte_pktmbuf_data_len(m) - dp_pktmbuf_l2_len(m), local ? "(local)" : input_ifp->if_name, local ? 0 : input_ifp->if_index); goto drop; @@ -1559,7 +1623,7 @@ mpls_labeled_forward(struct ifnet *input_ifp, bool local, nh = mpls_label_table_lookup(label_table, in_label, m, ETH_P_MPLS_UC, &nht, &payload_type); - if (unlikely(!nh.v4)) { + if (unlikely(!nh)) { if (!local && label_table) { DBG_MPLS_PKTERR(input_ifp, m, "label table entry not found\n"); @@ -1570,35 +1634,39 @@ mpls_labeled_forward(struct ifnet *input_ifp, bool local, break; } - ret = nh_fwd_mpls(nht, nh, m, true, payload_type, &cache, &pop); - + ret = nh_fwd_mpls(nh, m, true, payload_type, &cache, &pop); if (likely(ret == NH_FWD_IPv4)) { - mpls_forward_to_ipv4(input_ifp, local, m, nh.v4, ttl, + mpls_forward_to_ipv4(input_ifp, local, m, nh, ttl, pop); return; - } else if (likely(ret == NH_FWD_SUCCESS)) { + } + if (likely(ret == NH_FWD_SUCCESS)) { nh_mpls_forward(payload_type, nht, nh, true, ttl, m, &cache, input_ifp); return; - } else if (likely(ret == NH_FWD_IPv6)) { - mpls_forward_to_ipv6(input_ifp, local, m, nh.v6, ttl, + } + if (likely(ret == NH_FWD_IPv6)) { + mpls_forward_to_ipv6(input_ifp, local, m, nh, ttl, pop); return; - } else if (unlikely(ret == NH_FWD_RESWITCH_IPv4)) { + } + if (unlikely(ret == NH_FWD_RESWITCH_IPv4)) { if (!mpls_reswitch_as_ipv4( - input_ifp, m, nh4_get_ifp(nh.v4) ? - if_vrfid(nh6_get_ifp(nh.v6)) : + input_ifp, m, dp_nh_get_ifp(nh) ? + if_vrfid(dp_nh_get_ifp(nh)) : VRF_DEFAULT_ID, ttl)) goto drop; return; - } else if (unlikely(ret == NH_FWD_RESWITCH_IPv6)) { + } + if (unlikely(ret == NH_FWD_RESWITCH_IPv6)) { if (!mpls_reswitch_as_ipv6( - input_ifp, m, nh6_get_ifp(nh.v6) ? - if_vrfid(nh6_get_ifp(nh.v6)) : + input_ifp, m, dp_nh_get_ifp(nh) ? + if_vrfid(dp_nh_get_ifp(nh)) : VRF_DEFAULT_ID, ttl)) goto drop; return; - } else if (unlikely(ret == NH_FWD_SLOWPATH)) { + } + if (unlikely(ret == NH_FWD_SLOWPATH)) { /* * Put the packet back to its newly arrived * state. NOTE: we are assuming that we @@ -1606,7 +1674,7 @@ mpls_labeled_forward(struct ifnet *input_ifp, bool local, * labels here so that we can simply reset * the L2 len to ethernet. */ - pktmbuf_l2_len(m) = ETHER_HDR_LEN; + dp_pktmbuf_l2_len(m) = RTE_ETHER_HDR_LEN; if (unlikely(local)) break; /* @@ -1618,9 +1686,9 @@ mpls_labeled_forward(struct ifnet *input_ifp, bool local, break; local_packet(input_ifp, m); return; - } else if (unlikely(ret == NH_FWD_FAILURE)) { - break; } + if (unlikely(ret == NH_FWD_FAILURE)) + break; /* * We don't support push/swap and lookup semantics so we * cannot currently have labels in the label cache when we get @@ -1651,12 +1719,12 @@ static void mpls_output(struct rte_mbuf *m) } void mpls_unlabeled_input(struct ifnet *input_ifp, struct rte_mbuf *m, + enum mpls_payload_type payload_type, enum nh_type ip_nh_type, - union next_hop_v4_or_v6_ptr ip_nh, + struct next_hop *ip_nh, uint8_t ttl) { const union next_hop_outlabels *labels; - enum mpls_payload_type payload_type; struct mpls_label_cache cache; unsigned int num_labels; uint32_t local_label; @@ -1664,7 +1732,7 @@ void mpls_unlabeled_input(struct ifnet *input_ifp, struct rte_mbuf *m, enum nh_fwd_ret ret; unsigned int i = 0; enum nh_type nht; - union next_hop_v4_or_v6_ptr nh; + struct next_hop *nh; label_t label; uint8_t bos; @@ -1673,7 +1741,7 @@ void mpls_unlabeled_input(struct ifnet *input_ifp, struct rte_mbuf *m, if (propagate_ttl != TTL_PROPAGATE_ENABLED) ttl = default_ttl; - if (unlikely(nh_get_flags(ip_nh_type, ip_nh) & RTF_OUTLABEL)) { + if (unlikely(nh_get_flags(ip_nh) & RTF_OUTLABEL)) { /* * Output labels are provided * Payload type is not required for imposition but needs to be @@ -1681,12 +1749,11 @@ void mpls_unlabeled_input(struct ifnet *input_ifp, struct rte_mbuf *m, */ nht = ip_nh_type; nh = ip_nh; - payload_type = MPT_UNSPEC; } else { /* * Push all except the top (local) label onto the label cache */ - labels = nh_get_labels(ip_nh_type, ip_nh); + labels = nh_get_labels(ip_nh); num_labels = nh_outlabels_get_cnt(labels); assert(num_labels); bos = true; @@ -1706,10 +1773,10 @@ void mpls_unlabeled_input(struct ifnet *input_ifp, struct rte_mbuf *m, /* Assumes nexthop address family == link address family */ if (ip_nh_type == NH_TYPE_V6GW) - ether_type = ETHER_TYPE_IPv6; + ether_type = RTE_ETHER_TYPE_IPV6; else { assert(ip_nh_type == NH_TYPE_V4GW); - ether_type = ETHER_TYPE_IPv4; + ether_type = RTE_ETHER_TYPE_IPV4; } /* @@ -1719,7 +1786,7 @@ void mpls_unlabeled_input(struct ifnet *input_ifp, struct rte_mbuf *m, rcu_dereference(global_label_table), local_label, m, ether_type, &nht, &payload_type); - if (unlikely(!nh.v4)) { + if (unlikely(!nh)) { DBG_MPLS_PKTERR(input_ifp, m, "%s %s: no route for %d\n", __func__, input_ifp ? input_ifp->if_name : "(local)", @@ -1728,21 +1795,23 @@ void mpls_unlabeled_input(struct ifnet *input_ifp, struct rte_mbuf *m, } } - ret = nh_fwd_mpls(nht, nh, m, false, payload_type, &cache, NULL); - + ret = nh_fwd_mpls(nh, m, false, payload_type, &cache, NULL); if (likely(ret == NH_FWD_SUCCESS)) { nh_mpls_forward(payload_type, nht, nh, false, ttl, m, &cache, input_ifp); return; - } else if (likely(ret == NH_FWD_IPv4)) { + } + if (likely(ret == NH_FWD_IPv4)) { mpls_forward_to_ipv4(input_ifp, input_ifp == NULL, m, - nh.v4, ttl, false); + nh, ttl, false); return; - } else if (likely(ret == NH_FWD_IPv6)) { + } + if (likely(ret == NH_FWD_IPv6)) { mpls_forward_to_ipv6(input_ifp, input_ifp == NULL, m, - nh.v6, ttl, false); + nh, ttl, false); return; - } else if (unlikely(ret == NH_FWD_SLOWPATH)) { + } + if (unlikely(ret == NH_FWD_SLOWPATH)) { /* * Put the packet back to its newly arrived * state. NOTE: we are assuming that we @@ -1750,7 +1819,7 @@ void mpls_unlabeled_input(struct ifnet *input_ifp, struct rte_mbuf *m, * labels here so that we can simply reset * the L2 len to ethernet. */ - pktmbuf_l2_len(m) = ETHER_HDR_LEN; + dp_pktmbuf_l2_len(m) = RTE_ETHER_HDR_LEN; if (nht == NH_TYPE_V4GW) { struct iphdr *ip = iphdr(m); diff --git a/src/mpls/mpls_forward.h b/src/mpls/mpls_forward.h index b90c675c..acd3bf98 100644 --- a/src/mpls/mpls_forward.h +++ b/src/mpls/mpls_forward.h @@ -1,7 +1,7 @@ /* * MPLS Forwarding * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -11,13 +11,15 @@ #ifndef MPLS_FORWARD_H #define MPLS_FORWARD_H +#include #include #include #include +#include "compiler.h" #include "if_var.h" #include "mpls.h" -#include "nh.h" +#include "nh_common.h" struct ifnet; struct rte_mbuf; @@ -25,6 +27,12 @@ struct rte_mbuf; #define ETH_P_MPLS_UC 0x8847 /* MPLS Unicast traffic */ #define ETH_P_MPLS_MC 0x8848 /* MPLS Multicast traffic */ +enum mpls_payload_type { + MPT_UNSPEC = RTMPT_IP, + MPT_IPV4 = RTMPT_IPV4, + MPT_IPV6 = RTMPT_IPV6, +}; + bool mpls_global_get_ipttlpropagate(void); void mpls_global_set_ipttlpropagate(bool enable); int mpls_global_get_defaultttl(void); @@ -33,10 +41,11 @@ void mpls_global_set_defaultttl(int ttl); uint32_t mpls_ecmp_hash(const struct rte_mbuf *m); void mpls_labeled_input(struct ifnet *ifp, struct rte_mbuf *m) - __attribute__((hot)); + __hot_func; void mpls_unlabeled_input(struct ifnet *ifp, struct rte_mbuf *m, + enum mpls_payload_type payload_type, enum nh_type nh_type, - union next_hop_v4_or_v6_ptr ip_nh, uint8_t ttl) - __attribute__((hot)); + struct next_hop *ip_nh, uint8_t ttl) + __hot_func; #endif diff --git a/src/mpls/mpls_label_table.c b/src/mpls/mpls_label_table.c index b6c88d3a..e621d5ef 100644 --- a/src/mpls/mpls_label_table.c +++ b/src/mpls/mpls_label_table.c @@ -1,7 +1,7 @@ /* * MPLS label table manipulation * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -28,11 +28,13 @@ #include #include +#include "fal.h" #include "json_writer.h" #include "main.h" #include "mpls/mpls.h" #include "mpls_label_table.h" -#include "pktmbuf.h" +#include "pd_show.h" +#include "pktmbuf_internal.h" #include "route.h" #include "route_flags.h" #include "route_v6.h" @@ -40,7 +42,7 @@ #include "util.h" #include "vplane_debug.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" struct cds_lfht; @@ -54,6 +56,9 @@ struct label_table_node { uint32_t next_hop; /* idx of output info */ uint8_t nh_type; uint8_t payload_type; + uint16_t pd_state : 15; + uint16_t pd_created : 1; + uint32_t padding1; struct cds_lfht_node node; struct rcu_head rcu_head; } __rte_cache_aligned; @@ -78,6 +83,8 @@ struct label_table_set_entry { struct rcu_head rcu_head; }; +static uint32_t mpls_route_hw_stats[PD_OBJ_STATE_LAST]; + /* * Setup mbuf pool for oam lookup. */ @@ -146,10 +153,10 @@ static void free_label_table_node(struct label_table_node *label_table_entry) { switch (label_table_entry->nh_type) { case NH_TYPE_V4GW: - nexthop_put(label_table_entry->next_hop); + nexthop_put(AF_INET, label_table_entry->next_hop); break; case NH_TYPE_V6GW: - nexthop6_put(label_table_entry->next_hop); + nexthop_put(AF_INET6, label_table_entry->next_hop); break; } @@ -176,18 +183,95 @@ free_label_table_set_entry_rcu(struct rcu_head *head) free(ls_entry); } +static void +mpls_label_table_fal_create_or_upd(struct label_table_node *label_table_node, + bool added_new) +{ + struct fal_attribute_t attr_list[] = { + { + .id = FAL_MPLS_ROUTE_ATTR_PACKET_ACTION, + }, + { + .id = FAL_MPLS_ROUTE_ATTR_NEXT_HOP_GROUP, + }, + }; + struct fal_mpls_route_t fal_mpls_route = { + .label = label_table_node->in_label, + }; + enum pd_obj_state nhl_pd_state; + bool update_pd_state = true; + struct next_hop *hops; + size_t size; + int family = label_table_node->nh_type == NH_TYPE_V4GW ? + AF_INET : AF_INET6; + int rc; + + size = next_hop_list_get_fal_nhs(family, label_table_node->next_hop, + &hops); + + attr_list[0].value.u32 = fal_next_hop_group_packet_action(size, hops); + attr_list[1].value.objid = next_hop_list_get_fal_obj( + family, label_table_node->next_hop, &nhl_pd_state); + + if (!added_new) + mpls_route_hw_stats[label_table_node->pd_state]--; + + if (nhl_pd_state != PD_OBJ_STATE_FULL && + nhl_pd_state != PD_OBJ_STATE_NOT_NEEDED) { + label_table_node->pd_state = nhl_pd_state; + update_pd_state = false; + } + + if (!label_table_node->pd_created) { + rc = fal_create_mpls_route(&fal_mpls_route, + ARRAY_SIZE(attr_list), attr_list); + if (rc < 0) { + if (rc != -EOPNOTSUPP) { + RTE_LOG(ERR, MPLS, + "FAL create of label %d failed: %s\n", + label_table_node->in_label, + strerror(-rc)); + } + } else + label_table_node->pd_created = true; + if (update_pd_state) + label_table_node->pd_state = fal_state_to_pd_state(rc); + mpls_route_hw_stats[label_table_node->pd_state]++; + } else { + rc = fal_set_mpls_route_attr(&fal_mpls_route, &attr_list[0]); + if (update_pd_state) + label_table_node->pd_state = fal_state_to_pd_state(rc); + if (rc < 0) { + RTE_LOG(ERR, MPLS, + "FAL set of label %d forwarding action failed: %s\n", + label_table_node->in_label, strerror(-rc)); + } + rc = fal_set_mpls_route_attr(&fal_mpls_route, &attr_list[1]); + if (rc < 0) { + RTE_LOG(ERR, MPLS, + "FAL set of label %d next hop group failed: %s\n", + label_table_node->in_label, strerror(-rc)); + if (update_pd_state) + label_table_node->pd_state = + fal_state_to_pd_state(rc); + } + mpls_route_hw_stats[label_table_node->pd_state]++; + } +} + static bool mpls_label_table_ins_lbl_internal(struct cds_lfht *label_table, uint32_t in_label, enum nh_type nh_type, enum mpls_payload_type payload_type, - union next_hop_v4_or_v6_ptr hops, + struct next_hop *hops, size_t size) { + struct label_table_node *old_label_table_node; struct label_table_node *label_table_node; struct cds_lfht_node *node; + bool added_new = false; uint32_t nextu_idx; int rc; - bool added_new = false; if (!label_table) { RTE_LOG(ERR, MPLS, @@ -209,26 +293,8 @@ mpls_label_table_ins_lbl_internal(struct cds_lfht *label_table, switch (nh_type) { case NH_TYPE_V4GW: - rc = nexthop_new(hops.v4, size, RTPROT_UNSPEC, &nextu_idx); - if (rc < 0) { - RTE_LOG(ERR, MPLS, - "Failed to create nexthops for label table entry: %s\n", - strerror(-rc)); - free(label_table_node); - return false; - } - break; - case NH_TYPE_V6GW: { - rc = nexthop6_new(hops.v6, size, &nextu_idx); - if (rc < 0) { - RTE_LOG(ERR, MPLS, - "Failed to create nexthops for label table entry: %s\n", - strerror(-rc)); - free(label_table_node); - return false; - } + case NH_TYPE_V6GW: break; - } default: RTE_LOG(ERR, MPLS, "Unsupported nh type %d\n", nh_type); @@ -236,13 +302,26 @@ mpls_label_table_ins_lbl_internal(struct cds_lfht *label_table, return false; } + rc = nexthop_new(nh_type == NH_TYPE_V4GW ? AF_INET : AF_INET6, + hops, size, RTPROT_UNSPEC, + FAL_NHG_USE_MPLS_LABEL_SWITCH, + &nextu_idx); + if (rc < 0) { + RTE_LOG(ERR, MPLS, + "Failed to create nexthops for label table entry: %s\n", + strerror(-rc)); + free(label_table_node); + return false; + } + label_table_node->next_hop = nextu_idx; cds_lfht_node_init(&label_table_node->node); label_table_node->in_label = in_label; label_table_node->nh_type = nh_type; label_table_node->payload_type = (uint8_t)payload_type; + label_table_node->pd_created = false; - rcu_read_lock(); + dp_rcu_read_lock(); node = cds_lfht_add_replace(label_table, mpls_label_table_node_hash( label_table_node), @@ -252,16 +331,22 @@ mpls_label_table_ins_lbl_internal(struct cds_lfht *label_table, DP_DEBUG(MPLS_CTRL, DEBUG, MPLS, "Free the old label table entry for label %d\n", in_label); - label_table_node = caa_container_of(node, - struct label_table_node, - node); - free_label_table_node(label_table_node); + old_label_table_node = caa_container_of(node, + struct label_table_node, + node); + label_table_node->pd_created = old_label_table_node->pd_created; + label_table_node->pd_state = old_label_table_node->pd_state; + + free_label_table_node(old_label_table_node); } else { added_new = true; } + + mpls_label_table_fal_create_or_upd(label_table_node, added_new); + DP_DEBUG(MPLS_CTRL, DEBUG, MPLS, "%s count = %lu\n", __func__, mpls_label_table_count(label_table)); - rcu_read_unlock(); + dp_rcu_read_unlock(); return added_new; } @@ -270,12 +355,15 @@ static int mpls_label_table_rem_lbl_internal(struct cds_lfht *label_table, uint32_t in_label) { + struct fal_mpls_route_t fal_mpls_route = { + .label = in_label, + }; struct label_table_node *out, in; struct cds_lfht_iter iter; struct cds_lfht_node *node; int rc; - rcu_read_lock(); + dp_rcu_read_lock(); in.in_label = in_label; cds_lfht_lookup(label_table, mpls_label_table_node_hash(&in), @@ -283,8 +371,21 @@ mpls_label_table_rem_lbl_internal(struct cds_lfht *label_table, node = cds_lfht_iter_get_node(&iter); if (node) { out = caa_container_of(node, struct label_table_node, node); + + mpls_route_hw_stats[out->pd_state]--; + + if (out->pd_created) { + rc = fal_delete_mpls_route(&fal_mpls_route); + if (rc < 0) { + RTE_LOG(ERR, MPLS, + "FAL delete of label %d failed: %s\n", + in_label, strerror(-rc)); + } + } + if (!cds_lfht_del(label_table, &out->node)) free_label_table_node(out); + rc = 0; } else { rc = -ENOENT; @@ -292,7 +393,7 @@ mpls_label_table_rem_lbl_internal(struct cds_lfht *label_table, DP_DEBUG(MPLS_CTRL, DEBUG, MPLS, "%s rc = %d count = %lu\n", __func__, rc, mpls_label_table_count(label_table)); - rcu_read_unlock(); + dp_rcu_read_unlock(); return rc; } @@ -313,7 +414,11 @@ mpls_label_table_del_reserved_labels(struct cds_lfht *table) static bool mpls_label_table_add_reserved_labels(struct cds_lfht *table) { - union next_hop_v4_or_v6_ptr nhop; + struct next_hop *nhop; + struct ip_addr addr_any = { + .type = AF_INET, + .address.ip_v4.s_addr = INADDR_ANY, + }; /* * IPv4/6 Exp NULL @@ -322,8 +427,8 @@ mpls_label_table_add_reserved_labels(struct cds_lfht *table) */ label_t outlabels[] = {MPLS_IMPLICITNULL}; - nhop.v4 = nexthop_create(NULL, INADDR_ANY, 0, 1, outlabels); - if (!nhop.v4) + nhop = nexthop_create(NULL, &addr_any, 0, 1, outlabels); + if (!nhop) goto error; mpls_label_table_ins_lbl_internal(table, MPLS_IPV4EXPLICITNULL, NH_TYPE_V4GW, MPT_IPV4, @@ -331,14 +436,14 @@ mpls_label_table_add_reserved_labels(struct cds_lfht *table) mpls_label_table_ins_lbl_internal(table, MPLS_IPV6EXPLICITNULL, NH_TYPE_V4GW, MPT_IPV6, nhop, 1); - free(nhop.v4); + free(nhop); - nhop.v4 = nexthop_create(NULL, INADDR_ANY, RTF_SLOWPATH, 1, outlabels); - if (!nhop.v4) + nhop = nexthop_create(NULL, &addr_any, RTF_SLOWPATH, 1, outlabels); + if (!nhop) goto error; mpls_label_table_ins_lbl_internal(table, MPLS_ROUTERALERT, NH_TYPE_V4GW, 0, nhop, 1); - free(nhop.v4); + free(nhop); return true; @@ -447,10 +552,10 @@ mpls_label_table_get_and_lock(int labelspace) */ if (labelspace == global_label_space_id) { assert(!global_label_table); - rcu_read_lock(); + dp_rcu_read_lock(); rcu_assign_pointer(global_label_table, ls_entry->label_table); - rcu_read_unlock(); + dp_rcu_read_unlock(); } return ls_entry->label_table; } @@ -512,7 +617,7 @@ void mpls_label_table_unlock(int labelspace) void mpls_label_table_insert_label(int labelspace, uint32_t in_label, enum nh_type nh_type, enum mpls_payload_type payload_type, - union next_hop_v4_or_v6_ptr hops, + struct next_hop *hops, size_t size) { struct cds_lfht *label_table = @@ -548,20 +653,30 @@ mpls_label_table_lookup_internal(struct cds_lfht *label_table, return NULL; } -union next_hop_v4_or_v6_ptr +static inline int nh_type_to_address_family(enum nh_type type) +{ + if (type == NH_TYPE_V6GW) + return AF_INET6; + + return AF_INET; +} + +struct next_hop * mpls_label_table_lookup(struct cds_lfht *label_table, uint32_t in_label, const struct rte_mbuf *m, uint16_t ether_type, enum nh_type *nht, enum mpls_payload_type *payload_type) { struct label_table_node *out; - union next_hop_v4_or_v6_ptr nh = { NULL }; + struct next_hop *nh = NULL; out = mpls_label_table_lookup_internal(label_table, in_label); if (likely(out != NULL)) { *nht = out->nh_type; *payload_type = out->payload_type; - return nh_select(*nht, out->next_hop, m, ether_type); + nh = nexthop_select(nh_type_to_address_family(*nht), + out->next_hop, m, ether_type); + return nh; } return nh; } @@ -590,11 +705,11 @@ void mpls_label_table_resize(int labelspace, uint32_t max_label) DP_DEBUG(MPLS_CTRL, INFO, MPLS, "mpls label table resize to %u\n", max_label); - rcu_read_lock(); + dp_rcu_read_lock(); ls_entry = mpls_label_space_entry_get(labelspace); if (!ls_entry) { - rcu_read_unlock(); + dp_rcu_read_unlock(); return; } @@ -612,29 +727,44 @@ void mpls_label_table_resize(int labelspace, uint32_t max_label) } } - rcu_read_unlock(); + dp_rcu_read_unlock(); } static void -mpls_label_table_dump(struct cds_lfht *label_table, json_writer_t *json) +mpls_label_table_dump(struct cds_lfht *label_table, json_writer_t *json, + enum pd_obj_state pd_state, uint32_t label_filter) { struct label_table_node *label_table_entry; + enum rt_print_nexthop_verbosity nh_v = + label_filter == MPLS_LABEL_ALL ? RT_PRINT_NH_BRIEF : + RT_PRINT_NH_DETAIL; struct cds_lfht_iter iter; if (!mpls_label_table_count(label_table)) return; jsonw_name(json, "mpls_routes"); jsonw_start_array(json); - rcu_read_lock(); + dp_rcu_read_lock(); cds_lfht_for_each_entry(label_table, &iter, label_table_entry, node) { + /* use PD_OBJ_STATE_LAST to mean wildcard any state */ + if (pd_state != PD_OBJ_STATE_LAST && + pd_state != label_table_entry->pd_state) + continue; + + if (label_filter != MPLS_LABEL_ALL && + label_table_entry->in_label != label_filter) + continue; + jsonw_start_object(json); jsonw_uint_field(json, "address", label_table_entry->in_label); switch (label_table_entry->nh_type) { case NH_TYPE_V4GW: - rt_print_nexthop(json, label_table_entry->next_hop); + rt_print_nexthop(json, label_table_entry->next_hop, + nh_v); break; case NH_TYPE_V6GW: - rt6_print_nexthop(json, label_table_entry->next_hop); + rt6_print_nexthop(json, label_table_entry->next_hop, + nh_v); break; } jsonw_uint_field(json, "payload", @@ -644,12 +774,12 @@ mpls_label_table_dump(struct cds_lfht *label_table, json_writer_t *json) jsonw_end_object(json); } - rcu_read_unlock(); + dp_rcu_read_unlock(); jsonw_end_array(json); } void -mpls_label_table_set_dump(FILE *fp, const int labelspace) +mpls_label_table_set_dump(FILE *fp, int labelspace, uint32_t label_filter) { struct label_table_set_entry *ls_entry; json_writer_t *json = jsonw_new(fp); @@ -662,13 +792,30 @@ mpls_label_table_set_dump(FILE *fp, const int labelspace) jsonw_start_object(json); jsonw_uint_field(json, "lblspc", ls_entry->labelspace); - mpls_label_table_dump(ls_entry->label_table, json); + mpls_label_table_dump(ls_entry->label_table, json, + PD_OBJ_STATE_LAST, label_filter); jsonw_end_object(json); } jsonw_end_array(json); jsonw_destroy(&json); } +int mpls_label_table_get_pd_subset_data(json_writer_t *json, + enum pd_obj_state subset) +{ + struct label_table_set_entry *ls_entry; + + cds_list_for_each_entry_rcu(ls_entry, &label_table_set, entry) { + jsonw_start_object(json); + jsonw_uint_field(json, "lblspc", ls_entry->labelspace); + mpls_label_table_dump(ls_entry->label_table, json, + subset, MPLS_LABEL_ALL); + jsonw_end_object(json); + } + + return 0; +} + void mpls_oam_v4_lookup(int labelspace, uint8_t nlabels, const label_t *labels, uint32_t saddr, uint32_t daddr, @@ -677,12 +824,12 @@ mpls_oam_v4_lookup(int labelspace, uint8_t nlabels, const label_t *labels, struct mpls_oam_outinfo outinfo[], unsigned int max_fanout) { - union next_hop_v4_or_v6_ptr nh; + struct next_hop *nh; struct cds_lfht *label_table; struct label_table_node *out; struct next_hop *paths; struct rte_mbuf *m; - struct ether_hdr *eth; + struct rte_ether_hdr *eth; label_t *lbl_stack; struct iphdr *ip; struct udphdr *udp; @@ -691,49 +838,49 @@ mpls_oam_v4_lookup(int labelspace, uint8_t nlabels, const label_t *labels, uint16_t payload, hlen = 0; unsigned int oi; - rcu_read_lock(); + dp_rcu_read_lock(); label_table = mpls_label_table_get_rcu(labelspace); if (!label_table) { - rcu_read_unlock(); + dp_rcu_read_unlock(); return; } out = mpls_label_table_lookup_internal(label_table, labels[0]); if (!out) { - rcu_read_unlock(); + dp_rcu_read_unlock(); return; } if (out->nh_type != NH_TYPE_V4GW) { - rcu_read_unlock(); + dp_rcu_read_unlock(); return; } m = pktmbuf_alloc(mpls_oam_pool, VRF_DEFAULT_ID); if (!m) { - rcu_read_unlock(); + dp_rcu_read_unlock(); return; } hlen = nlabels * sizeof(label_t); payload = sizeof(struct udphdr) + sizeof(struct iphdr); - if (!rte_pktmbuf_append(m, sizeof(struct ether_hdr) + hlen + + if (!rte_pktmbuf_append(m, sizeof(struct rte_ether_hdr) + hlen + payload)) { rte_pktmbuf_free(m); - rcu_read_unlock(); + dp_rcu_read_unlock(); return; } - eth = rte_pktmbuf_mtod(m, struct ether_hdr *); + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); memset(eth, 0, sizeof(*eth)); eth->ether_type = htons(ETH_P_MPLS_UC); - m->l2_len = sizeof(struct ether_hdr); + m->l2_len = sizeof(struct rte_ether_hdr); /* * MPLS Incoming Label Stack */ - lbl_stack = pktmbuf_mtol3(m, label_t *); + lbl_stack = dp_pktmbuf_mtol3(m, label_t *); for (i = 0; i < nlabels; i++) if (i == nlabels - 1) *lbl_stack++ = htonl(labels[i] << MPLS_LS_LABEL_SHIFT | @@ -747,7 +894,7 @@ mpls_oam_v4_lookup(int labelspace, uint8_t nlabels, const label_t *labels, /* * IP Header */ - ip = pktmbuf_mtol3(m, struct iphdr *); + ip = dp_pktmbuf_mtol3(m, struct iphdr *); ip->ihl = 5; ip->version = 4; ip->tot_len = htons(payload); @@ -760,7 +907,7 @@ mpls_oam_v4_lookup(int labelspace, uint8_t nlabels, const label_t *labels, /* * UDP MPLS Echo Request */ - udp = pktmbuf_mtol4(m, struct udphdr *); + udp = dp_pktmbuf_mtol4(m, struct udphdr *); memset(udp, 0, sizeof(struct udphdr)); udp->source = htons(sport); udp->dest = htons(dport); @@ -768,19 +915,20 @@ mpls_oam_v4_lookup(int labelspace, uint8_t nlabels, const label_t *labels, /* * Reset L2 header to the end of the ethernet header */ - m->l2_len = ETHER_HDR_LEN; + m->l2_len = RTE_ETHER_HDR_LEN; npaths = 0; paths = nexthop_get(out->next_hop, &npaths); for (i = 0; i < npaths; i++) { - nh.v4 = paths + i; - if (nh.v4->flags & RTF_DEAD) + nh = paths + i; + if (nh->flags & RTF_DEAD) continue; for (oi = 0; oi < max_fanout; oi++) { if (!outinfo[oi].inuse) { - outinfo[oi].ifp = nh4_get_ifp(nh.v4); - outinfo[oi].gateway = nh.v4->gateway; - outinfo[oi].outlabels = nh.v4->outlabels; + outinfo[oi].ifp = dp_nh_get_ifp(nh); + outinfo[oi].gateway = + nh->gateway.address.ip_v4.s_addr; + outinfo[oi].outlabels = nh->outlabels; outinfo[oi].bitmask = 0; outinfo[oi].inuse = true; break; @@ -796,23 +944,26 @@ mpls_oam_v4_lookup(int labelspace, uint8_t nlabels, const label_t *labels, ip->daddr = htonl(daddr + addr_index); ip->check = 0; - nh = nh_select(out->nh_type, out->next_hop, m, ETH_P_MPLS_UC); - if (!nh.v4) + nh = nexthop_select(nh_type_to_address_family(out->nh_type), + out->next_hop, m, ETH_P_MPLS_UC); + if (!nh) continue; for (oi = 0; oi < max_fanout; oi++) { if (!outinfo[oi].inuse) { - outinfo[oi].ifp = nh4_get_ifp(nh.v4); - outinfo[oi].gateway = nh.v4->gateway; - outinfo[oi].outlabels = nh.v4->outlabels; + outinfo[oi].ifp = dp_nh_get_ifp(nh); + outinfo[oi].gateway = + nh->gateway.address.ip_v4.s_addr; + outinfo[oi].outlabels = nh->outlabels; outinfo[oi].bitmask = ((uint64_t)1 << i); outinfo[oi].inuse = true; break; } - if ((outinfo[oi].ifp == nh4_get_ifp(nh.v4)) && - (outinfo[oi].gateway == nh.v4->gateway) && + if ((outinfo[oi].ifp == dp_nh_get_ifp(nh)) && + (outinfo[oi].gateway == + nh->gateway.address.ip_v4.s_addr) && nh_outlabels_cmpfn(&outinfo[oi].outlabels, - &nh.v4->outlabels)) { + &nh->outlabels)) { outinfo[oi].bitmask |= ((uint64_t)1 << i); break; @@ -821,5 +972,35 @@ mpls_oam_v4_lookup(int labelspace, uint8_t nlabels, const label_t *labels, } rte_pktmbuf_free(m); - rcu_read_unlock(); + dp_rcu_read_unlock(); +} + +uint32_t *mpls_label_table_hw_stats_get(void) +{ + return mpls_route_hw_stats; +} + +void +mpls_update_all_routes_for_nh_change(int family, uint32_t nhl_idx) +{ + struct label_table_node *label_table_entry; + struct label_table_set_entry *ls_entry; + struct cds_lfht_iter iter; + + cds_list_for_each_entry_rcu(ls_entry, &label_table_set, entry) { + cds_lfht_for_each_entry(ls_entry->label_table, &iter, + label_table_entry, node) { + if (family == AF_INET && + label_table_entry->nh_type != NH_TYPE_V4GW) + continue; + if (family == AF_INET6 && + label_table_entry->nh_type != NH_TYPE_V6GW) + continue; + if (nhl_idx != label_table_entry->next_hop) + continue; + + mpls_label_table_fal_create_or_upd(label_table_entry, + false); + } + } } diff --git a/src/mpls/mpls_label_table.h b/src/mpls/mpls_label_table.h index c9a0275f..6ff358a0 100644 --- a/src/mpls/mpls_label_table.h +++ b/src/mpls/mpls_label_table.h @@ -1,7 +1,7 @@ /* * MPLS Label Table * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -19,18 +19,16 @@ #include #include -#include "nh.h" +#include "compiler.h" +#include "nh_common.h" #include "route.h" +#include "mpls_forward.h" + +#define MPLS_LABEL_ALL (1 << 20) struct cds_lfht; struct rte_mbuf; -enum mpls_payload_type { - MPT_UNSPEC = RTMPT_IP, - MPT_IPV4 = RTMPT_IPV4, - MPT_IPV6 = RTMPT_IPV6, -}; - #define MPLS_OAM_MAX_FANOUT (16) struct mpls_oam_outinfo { bool inuse; @@ -51,19 +49,20 @@ void mpls_label_table_unlock(int labelspace); void mpls_label_table_insert_label(int labelspace, uint32_t in_label, enum nh_type nh_type, enum mpls_payload_type payload_type, - union next_hop_v4_or_v6_ptr hops, + struct next_hop *hops, size_t size); void mpls_label_table_remove_label(int labelspace, uint32_t in_label); -union next_hop_v4_or_v6_ptr +struct next_hop * mpls_label_table_lookup(struct cds_lfht *label_table, uint32_t in_label, const struct rte_mbuf *m, uint16_t ether_type, enum nh_type *nht, enum mpls_payload_type *payload_type) - __attribute__((hot)); + __hot_func; void mpls_label_table_resize(int labelspace, uint32_t max_label); -void mpls_label_table_set_dump(FILE *fp, const int labelspace); +void mpls_label_table_set_dump(FILE *fp, int labelspace, + uint32_t label_filter); void mpls_oam_v4_lookup(int labelspace, uint8_t nlabels, const label_t *labels, uint32_t saddr, uint32_t daddr, @@ -72,4 +71,10 @@ void mpls_oam_v4_lookup(int labelspace, uint8_t nlabels, struct mpls_oam_outinfo outinfo[], unsigned int max_fanout); +uint32_t *mpls_label_table_hw_stats_get(void); +int mpls_label_table_get_pd_subset_data(json_writer_t *json, + enum pd_obj_state subset); + +void mpls_update_all_routes_for_nh_change(int family, uint32_t nhl_idx); + #endif diff --git a/src/mpls/mpls_netlink.c b/src/mpls/mpls_netlink.c index 29ed31eb..5db87930 100644 --- a/src/mpls/mpls_netlink.c +++ b/src/mpls/mpls_netlink.c @@ -1,7 +1,7 @@ /* * Handle MPLS Netlink events * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -33,7 +33,7 @@ #include "mpls/mpls.h" #include "mpls_label_table.h" #include "netlink.h" -#include "nh.h" +#include "nh_common.h" #include "route.h" #include "route_flags.h" #include "route_v6.h" @@ -41,7 +41,7 @@ #include "util.h" #include "vplane_debug.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" #ifndef MIN # define MIN(a, b) ((a) < (b) ? (a) : (b)) @@ -103,7 +103,11 @@ nh_outlabels_copy(union next_hop_outlabels *old, union next_hop_outlabels *copy) return false; memcpy(labels, old->lbl_blk.labels, sizeof(label_t) * count); copy->lbl_blk.labels = labels; + copy->lbl_blk.lb_count = old->lbl_blk.lb_count; + } else { + memcpy(copy, old, sizeof(*copy)); } + return true; } @@ -202,7 +206,8 @@ static int mpls_route_change(const struct nlmsghdr *nlh, in_addr_t v4; struct in6_addr v6; } nh = { INADDR_ANY }; - union next_hop_v4_or_v6_ptr nhops; + struct ip_addr ip_addr; + struct next_hop *nhops; uint32_t size = 0; struct ifnet *oifp = NULL; uint32_t flags = 0; @@ -229,8 +234,7 @@ static int mpls_route_change(const struct nlmsghdr *nlh, * Delete any existing entry for this label in the incomplete cache. * If still incomplete it will get re-added with correct details */ - incomplete_route_del(VRF_DEFAULT_ID, - &in_label, + incomplete_route_del(&in_label, rtm->rtm_family, rtm->rtm_dst_len, rtm->rtm_table, @@ -269,14 +273,13 @@ static int mpls_route_change(const struct nlmsghdr *nlh, &size, &nh_type, &missing_ifp); if (missing_ifp) { - incomplete_route_add(VRF_DEFAULT_ID, - &in_label, - rtm->rtm_family, - rtm->rtm_dst_len, - rtm->rtm_table, - rtm->rtm_scope, - rtm->rtm_protocol, - nlh); + incomplete_route_add_nl(&in_label, + rtm->rtm_family, + rtm->rtm_dst_len, + rtm->rtm_table, + rtm->rtm_scope, + rtm->rtm_protocol, + nlh); return MNL_CB_OK; } } else { @@ -307,7 +310,7 @@ static int mpls_route_change(const struct nlmsghdr *nlh, if (tb[RTA_OIF]) { ifindex = cont_src_ifindex(cont_src, mnl_attr_get_u32(tb[RTA_OIF])); - oifp = ifnet_byifindex(ifindex); + oifp = dp_ifnet_byifindex(ifindex); } if (out_label_count > MAX_LABEL_STACK_DEPTH) { @@ -335,35 +338,40 @@ static int mpls_route_change(const struct nlmsghdr *nlh, if (!oifp) { flags |= RTF_SLOWPATH; if (!is_ignored_interface(ifindex)) { - incomplete_route_add(VRF_DEFAULT_ID, - &in_label, - rtm->rtm_family, - rtm->rtm_dst_len, - rtm->rtm_table, - rtm->rtm_scope, - rtm->rtm_protocol, - nlh); + incomplete_route_add_nl( + &in_label, + rtm->rtm_family, + rtm->rtm_dst_len, + rtm->rtm_table, + rtm->rtm_scope, + rtm->rtm_protocol, + nlh); return MNL_CB_OK; } } if (!via || via->rtvia_family == AF_INET) { - nhops.v4 = nexthop_create(oifp, nh.v4, - flags, - out_label_count, - hl_out_labels); + ip_addr.type = AF_INET; + ip_addr.address.ip_v4.s_addr = nh.v4; + nhops = nexthop_create(oifp, &ip_addr, + flags, + out_label_count, + hl_out_labels); } else if (via->rtvia_family == AF_INET6) { nh_type = NH_TYPE_V6GW; - nhops.v6 = nexthop6_create(oifp, - &nh.v6, - flags, - out_label_count, - hl_out_labels); + ip_addr.type = AF_INET6; + ip_addr.address.ip_v6 = nh.v6; + + nhops = nexthop_create(oifp, + &ip_addr, + flags, + out_label_count, + hl_out_labels); } else { RTE_LOG(INFO, MPLS, "unsupported via address in route change message: %u\n", via->rtvia_family); - nhops.v4 = NULL; + nhops = NULL; } } @@ -379,7 +387,7 @@ static int mpls_route_change(const struct nlmsghdr *nlh, via ? inet_ntop(via->rtvia_family, via->rtvia_addr, b3, sizeof(b3)) : "none"); - if (nhops.v4 == NULL) + if (nhops == NULL) RTE_LOG(ERR, MPLS, "No next-hops for route change message\n"); else @@ -388,10 +396,7 @@ static int mpls_route_change(const struct nlmsghdr *nlh, payload_type, nhops, size); - if (nh_type == NH_TYPE_V6GW) - free(nhops.v6); - else - free(nhops.v4); + free(nhops); } else { mpls_label_table_remove_label(global_label_space_id, in_label); } @@ -439,7 +444,7 @@ static int mpls_netconf_change(const struct nlmsghdr *nlh, return MNL_CB_OK; /* NETCONFA_IFINDEX_ALL */ unsigned int ifindex = cont_src_ifindex(cont_src, signed_ifindex); - ifp = ifnet_byifindex(ifindex); + ifp = dp_ifnet_byifindex(ifindex); if (!ifp) /* not local to DP */ return MNL_CB_OK; diff --git a/src/mstp.c b/src/mstp.c index 20ee1dfe..a5fc8253 100644 --- a/src/mstp.c +++ b/src/mstp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, AT&T Intellectual Property. + * Copyright (c) 2018-2020, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -10,8 +10,9 @@ #include #include "assert.h" -#include "bridge.h" +#include "if/bridge/bridge.h" #include "bridge_flags.h" +#include "dp_event.h" #include "if_var.h" #include "mstp.h" #include "vplane_debug.h" @@ -348,9 +349,6 @@ mstp_msti_update(struct ifnet *bridge, uint16_t mstid, bridge->if_name, mstid, mstidindex, update ? "update" : "create"); - assert(STP_INST_COUNT == MSTP_MSTI_COUNT); - assert(STP_INST_IST == MSTP_MSTI_IST); - struct mstp_bridge *mstp = sc->scbr_mstp; int ret = 0; @@ -1022,7 +1020,7 @@ mstp_setup_cmd(FILE *f, int argc, char **argv, int minargs, const char *func, } argc--, argv++; /* skip 'mstp' */ - bridge = ifnet_byifname(argv[0]); + bridge = dp_ifnet_byifname(argv[0]); if (!bridge || !bridge->if_softc || bridge->if_type != IFT_BRIDGE) { fprintf(f, "Unknown bridge: %s\n", argv[0]); @@ -1045,6 +1043,7 @@ mstp_setup_cmd(FILE *f, int argc, char **argv, int minargs, const char *func, cmd->revision = 0; cmd->mstid = -1; cmd->msti_vlan_count = 0; + cmd->msti_state = __STP_IFSTATE_MAX; return 0; } @@ -1093,3 +1092,46 @@ cmd_mstp_ut(FILE *f, int argc, char **argv) { return cmd_mstp(f, argc, argv); } + +static void +mstp_if_feat_mode_change(struct ifnet *ifp, + enum if_feat_mode_event event) +{ + struct bridge_softc *sc; + + if (!is_bridge(ifp)) + /* nothing to do */ + return; + + sc = ifp->if_softc; + + switch (event) { + case IF_FEAT_MODE_EVENT_L2_CREATED: { + const struct fal_attribute_t attr_list[] = { + {FAL_STP_ATTR_INSTANCE, .value.u8 = STP_INST_IST}, + {FAL_STP_ATTR_MSTI, .value.u16 = MSTP_MSTI_IST} + }; + + int rc = fal_stp_create(ifp->if_index, ARRAY_SIZE(attr_list), + &attr_list[0], &sc->stp); + if (rc < 0) + RTE_LOG(ERR, BRIDGE, + "FAL(%u): failed to create STP: '%s'\n", + ifp->if_index, strerror(-rc)); + + break; + } + case IF_FEAT_MODE_EVENT_L2_DELETED: + if (sc->stp) + fal_stp_delete(sc->stp); + break; + default: + break; + } +} + +static const struct dp_event_ops mstp_events = { + .if_feat_mode_change = mstp_if_feat_mode_change, +}; + +DP_STARTUP_EVENT_REGISTER(mstp_events); diff --git a/src/mstp.h b/src/mstp.h index 4e68a1c7..63c0720f 100644 --- a/src/mstp.h +++ b/src/mstp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. + * Copyright (c) 2018-2019, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -10,8 +10,8 @@ #ifndef MSTP_H #define MSTP_H -#include "bridge.h" #include "bridge_flags.h" +#include "if/bridge/bridge.h" #include "if_var.h" /* @@ -22,6 +22,11 @@ #define MSTP_MSTI_COUNT (STP_INST_COUNT) #define MSTP_MSTI_IST (STP_INST_IST) +static_assert(STP_INST_COUNT == MSTP_MSTI_COUNT, + "stp and mstp values don't match"); +static_assert(STP_INST_IST == MSTP_MSTI_IST, + "stp and mstp values don't match"); + struct mstp_vlan2mstiindex { int8_t vlan2mstiindex[VLAN_N_VID]; struct rcu_head rcu; diff --git a/src/netinet/ip_mroute.c b/src/netinet/ip_mroute.c index f5d0e5d3..4eafb511 100644 --- a/src/netinet/ip_mroute.c +++ b/src/netinet/ip_mroute.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2014-2016 by Brocade Communications Systems, Inc. * All rights reserved. */ @@ -93,8 +93,8 @@ #include #include -#include "crypto/vti.h" -#include "gre.h" +#include "dp_event.h" +#include "if/gre.h" #include "if_var.h" #include "ip_funcs.h" #include "ip_icmp.h" @@ -105,16 +105,19 @@ #include "json_writer.h" #include "netinet/ip_mroute.h" #include "pd_show.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "route_flags.h" #include "snmp_mib.h" #include "urcu.h" #include "util.h" #include "vplane_debug.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" #include "fal.h" #include "ip_mcast_fal_interface.h" +#include "pl_common.h" +#include "pl_fused.h" +#include "npf.h" /* * Multicast packets are punted to the slow path when they cannot be @@ -131,24 +134,20 @@ static struct rte_meter_srtcm_params mfc_meter_params = { .ebs = PUNT_FUZZ /* effectively zero */ }; -#ifdef HAVE_RTE_METER_SRTCM_PROFILE_CONFIG static struct rte_meter_srtcm_profile mfc_meter_profile; -#endif - -static struct cds_lfht *viftable; static struct rte_timer mrt_stats_timer; -static void mrt_stats(struct rte_timer *, void *arg); +static void mrt_stats(struct rte_timer *rtetm, void *arg); #define UPCALL_TIMER 1 #ifdef UPCALL_TIMER static struct rte_timer expire_upcalls_ch; -static void expire_upcalls(struct rte_timer *, void *arg); +static void expire_upcalls(struct rte_timer *rtetm, void *arg); #endif -static void expire_mfc(struct vrf *, struct mfc *); -static int ip_mdq(struct mcast_vrf *, struct rte_mbuf *, struct ip *ip, - struct ifnet *, struct mfc *); +static void expire_mfc(struct vrf *vrf, struct mfc *rt); +static int ip_mdq(struct mcast_vrf *mvrf, struct rte_mbuf *m, + struct ip *ip, struct ifnet *in_ifp, struct mfc *rt); static void sg_cnt_update(struct vrf *vrf, struct mfc *rt, bool last_mfc_deletion); @@ -196,7 +195,7 @@ static void rt_show_subset(struct vrf *vrf, struct mfc *rt, void *arg) subset->vrf = vrf->v_id; jsonw_start_object(subset->json); jsonw_uint_field(subset->json, "vrf_id", - vrf_get_external_id(vrf->v_id)); + dp_vrf_get_external_id(vrf->v_id)); jsonw_end_object(subset->json); } @@ -309,7 +308,7 @@ static inline struct mfc *mfc_find(struct mcast_vrf *mvrf, } /* - * This MUST be called with a rcu_read_lock and only unlocked after vif is + * This MUST be called with a dp_rcu_read_lock and only unlocked after vif is * no longer used. */ struct vif *get_vif_by_ifindex(unsigned int ifindex) @@ -317,8 +316,18 @@ struct vif *get_vif_by_ifindex(unsigned int ifindex) struct vif *vifp = NULL; struct cds_lfht_iter iter; struct cds_lfht_node *retnode; + struct ifnet *ifp = dp_ifnet_byifindex(ifindex); + struct vrf *vrf; + + if (!ifp) + return NULL; + + vrf = vrf_get_rcu(if_vrfid(ifp)); + if (!vrf) + return NULL; - cds_lfht_lookup(viftable, ifindex, vif_match, &ifindex, &iter); + cds_lfht_lookup(vrf->v_mvrf4.viftable, ifindex, vif_match, &ifindex, + &iter); retnode = cds_lfht_iter_get_node(&iter); if (retnode) { vifp = caa_container_of(retnode, struct vif, node); @@ -327,7 +336,7 @@ struct vif *get_vif_by_ifindex(unsigned int ifindex) } -void mrt4_purge(struct ifnet *ifp) +static void mrt4_purge(struct ifnet *ifp) { struct vif *vifp; unsigned int v_if_index; @@ -368,12 +377,12 @@ void mrt4_purge(struct ifnet *ifp) "%s is input interface so delete MFC.", ifp->if_name); expire_mfc(vrf, rt); - } else if (IF_ISSET(v_if_index, &rt->mfc_ifset)) { + } else if (IF_ISSET(vifp->v_vif_index, &rt->mfc_ifset)) { mfc_debug(vrf->v_id, &rt->mfc_origin, &rt->mfc_mcastgrp, "Removing %s from olist.", ifp->if_name); - IF_CLR(v_if_index, &rt->mfc_ifset); + IF_CLR(vifp->v_vif_index, &rt->mfc_ifset); } } del_vif(v_if_index); @@ -385,8 +394,22 @@ void mrt4_purge(struct ifnet *ifp) int add_vif(int ifindex) { struct vif *vifp; - struct ifnet *ifp; + struct ifnet *ifp = dp_ifnet_byifindex(ifindex); struct cds_lfht_node *retnode; + struct cds_lfht *viftable; + unsigned char vif_index; + struct vrf *vrf; + + if (!ifp) { + DP_DEBUG(MULTICAST, ERR, MCAST, + "Failure adding IPv4 VIF index %d.\n", ifindex); + return -EINVAL; + } + + vrf = vrf_get_rcu(if_vrfid(ifp)); + + if (!vrf) + return -EINVAL; if (ifindex <= 0) return -EINVAL; @@ -394,37 +417,47 @@ int add_vif(int ifindex) if (get_vif_by_ifindex(ifindex)) return -EEXIST; - DP_DEBUG(MULTICAST, INFO, MCAST, "Adding IPv4 VIF %s.\n", - ifnet_indextoname(ifindex)); + viftable = vrf->v_mvrf4.viftable; - vifp = malloc(sizeof(struct vif)); - if (!vifp) - return -ENOMEM; - memset(vifp, 0, sizeof(*vifp)); + if (!viftable) + return -EINVAL; + + if (mcast_iftable_get_free_slot(&vrf->v_mvrf4.mfc_ifset, ifindex, + &vif_index) != 0) + return -EDQUOT; - ifp = ifnet_byifindex(ifindex); + DP_DEBUG(MULTICAST, INFO, MCAST, "Adding IPv4 VIF to slot %d (%d).\n", + vif_index, ifindex); + + vifp = calloc(1, sizeof(struct vif)); + if (!vifp) { + IF_CLR(vif_index, &vrf->v_mvrf4.mfc_ifset); + return -ENOMEM; + } vifp->v_if_index = ifindex; + vifp->v_vif_index = vif_index; vifp->v_ifp = ifp; vifp->v_threshold = 1; vifp->v_flags = VIFF_USE_IFINDEX; - vifp->v_flags |= (ifp) ? 0:VIFF_REGISTER; - vifp->v_flags |= (ifp && is_tunnel(ifp)) ? VIFF_TUNNEL:0; + vifp->v_flags |= (is_tunnel_pimreg(ifp)) ? VIFF_REGISTER:0; + vifp->v_flags |= (is_tunnel(ifp)) ? VIFF_TUNNEL:0; cds_lfht_node_init(&vifp->node); retnode = cds_lfht_add_replace(viftable, vifp->v_if_index, vif_match, &vifp->v_if_index, &vifp->node); if (retnode) { vifp = caa_container_of(retnode, struct vif, node); + IF_CLR(vifp->v_vif_index, &vrf->v_mvrf4.mfc_ifset); call_rcu(&vifp->rcu_head, vif_free); } - if (ifp) { - ip_mcast_fal_int_enable(vifp, viftable); - if (!(ifp->if_flags & IFF_MULTICAST)) - return -EOPNOTSUPP; - if_allmulti(ifp, 1); - } + + ip_mcast_fal_int_enable(vifp, viftable); + if (!(ifp->if_flags & IFF_MULTICAST)) + return -EOPNOTSUPP; + if_allmulti(ifp, 1); + return 0; } @@ -435,6 +468,16 @@ int add_vif(int ifindex) int del_vif(vifi_t vifi) { struct vif *vifp; + struct ifnet *ifp = dp_ifnet_byifindex(vifi); + struct vrf *vrf; + + if (!ifp) + return -EINVAL; + + vrf = vrf_get_rcu(if_vrfid(ifp)); + + if (!vrf) + return -EINVAL; vifp = get_vif_by_ifindex(vifi); if (!vifp) @@ -447,8 +490,9 @@ int del_vif(vifi_t vifi) if (vifp->v_ifp) if_allmulti(vifp->v_ifp, 0); - if (!cds_lfht_del(viftable, &vifp->node)) { - ip_mcast_fal_int_disable(vifp, viftable); + IF_CLR(vifp->v_vif_index, &vrf->v_mvrf4.mfc_ifset); + if (!cds_lfht_del(vrf->v_mvrf4.viftable, &vifp->node)) { + ip_mcast_fal_int_disable(vifp, vrf->v_mvrf4.viftable); call_rcu(&vifp->rcu_head, vif_free); } return 0; @@ -460,12 +504,22 @@ static void debug_update_mfc_count(vrfid_t vrf_id, struct mfc *rt, struct vif *vifp; struct cds_lfht_iter iter; int i; + struct vrf *vrf = vrf_get_rcu(vrf_id); + struct cds_lfht *viftable; + + if (vrf == NULL) { + mfc_debug(vrf_id, &rt->mfc_origin, &rt->mfc_mcastgrp, + "MFC invalid vrf ID"); + return; + } + + viftable = vrf->v_mvrf4.viftable; mfc_debug(vrf_id, &rt->mfc_origin, &rt->mfc_mcastgrp, "MFC count parameters being updated/initialised."); cds_lfht_for_each_entry(viftable, &iter, vifp, node) { - i = vifp->v_if_index; + i = vifp->v_vif_index; if (IF_ISSET(i, &rt->mfc_ifset) != IF_ISSET(i, &mfccp->mfcc_ifset)) { @@ -474,7 +528,7 @@ static void debug_update_mfc_count(vrfid_t vrf_id, struct mfc *rt, mfc_debug(vrf_id, &rt->mfc_origin, &rt->mfc_mcastgrp, "%s added to olist (new olist size is %u).", - ifnet_indextoname(i), + ifnet_indextoname(vifp->v_if_index), rt->mfc_olist_size); } else { if (rt->mfc_olist_size) @@ -482,14 +536,14 @@ static void debug_update_mfc_count(vrfid_t vrf_id, struct mfc *rt, mfc_debug(vrf_id, &rt->mfc_origin, &rt->mfc_mcastgrp, "%s removed from olist (new olist size is %u).", - ifnet_indextoname(i), + ifnet_indextoname(vifp->v_if_index), rt->mfc_olist_size); } } else if (IF_ISSET(i, &rt->mfc_ifset)) { mfc_debug(vrf_id, &rt->mfc_origin, &rt->mfc_mcastgrp, "%s already present in olist (size is %u).", - ifnet_indextoname(i), + ifnet_indextoname(vifp->v_if_index), rt->mfc_olist_size); } } @@ -498,13 +552,17 @@ static void debug_update_mfc_count(vrfid_t vrf_id, struct mfc *rt, * update an mfc entry without resetting counters and S,G addresses. */ static void update_mfc_params(vrfid_t vrf_id, struct mfc *rt, - struct vmfcctl *mfccp) + struct vmfcctl *mfccp) { int controller = 0; struct vif *vifp; + struct vrf *vrf = vrf_get_rcu(vrf_id); struct cds_lfht_iter iter; int i; + if (!vrf) + return; + if (rt->mfc_parent != mfccp->mfcc_parent) { mfc_debug(vrf_id, &rt->mfc_origin, &rt->mfc_mcastgrp, "Input interface changed from %s (%u) to %s (%u)", @@ -517,8 +575,8 @@ static void update_mfc_params(vrfid_t vrf_id, struct mfc *rt, rt->mfc_parent = mfccp->mfcc_parent; rt->mfc_ifset = mfccp->mfcc_ifset; - cds_lfht_for_each_entry(viftable, &iter, vifp, node) { - i = vifp->v_if_index; + cds_lfht_for_each_entry(vrf->v_mvrf4.viftable, &iter, vifp, node) { + i = vifp->v_vif_index; if (!IF_ISSET(i, &rt->mfc_ifset)) continue; @@ -529,7 +587,7 @@ static void update_mfc_params(vrfid_t vrf_id, struct mfc *rt, mfc_debug(vrf_id, &rt->mfc_origin, &rt->mfc_mcastgrp, "%s is register VIF.", - ifnet_indextoname(i)); + ifnet_indextoname(vifp->v_if_index)); continue; } @@ -538,12 +596,12 @@ static void update_mfc_params(vrfid_t vrf_id, struct mfc *rt, * Punt stream to controller to let PIM do wrong-vif * processing to fix this problem. */ - if (i == rt->mfc_parent) { + if (vifp->v_if_index == rt->mfc_parent) { controller++; mfc_debug(vrf_id, &rt->mfc_origin, &rt->mfc_mcastgrp, "%s is both incoming and outgoing interface.", - ifnet_indextoname(i)); + ifnet_indextoname(vifp->v_if_index)); } } @@ -585,14 +643,10 @@ static bool init_mfc_params(vrfid_t vrf_id, debug_update_mfc_count(vrf_id, rt, mfccp); update_mfc_params(vrf_id, rt, mfccp); -#ifdef HAVE_RTE_METER_SRTCM_PROFILE_CONFIG ret = rte_meter_srtcm_profile_config(&mfc_meter_profile, &mfc_meter_params); if (ret == 0) ret = rte_meter_srtcm_config(&rt->meter, &mfc_meter_profile); -#else - ret = rte_meter_srtcm_config(&rt->meter, &mfc_meter_params); -#endif if (ret != 0) { RTE_LOG(NOTICE, MCAST, "Failure configuring metering algorithm; pkts will not be punted to slow path (Err = %d)\n", @@ -648,6 +702,10 @@ ip_mroute_add_fal_objects(vrfid_t vrf_id, struct vmfcctl *mfccp, struct mfc *rt) { enum pd_obj_state old_pd_state; int rc; + struct vrf *vrf = vrf_get_rcu(vrf_id); + + if (!vrf) + return; old_pd_state = rt->mfc_pd_state; if (rt->mfc_fal_obj) { @@ -655,7 +713,7 @@ ip_mroute_add_fal_objects(vrfid_t vrf_id, struct vmfcctl *mfccp, struct mfc *rt) "Updating FAL object 0x%lx for mroute", rt->mfc_fal_obj); rc = fal_ip4_upd_mroute(rt->mfc_fal_obj, rt, mfccp, - viftable); + vrf->v_mvrf4.viftable); if (rc && rc != -EOPNOTSUPP) mfc_debug(vrf_id, &rt->mfc_origin, &rt->mfc_mcastgrp, @@ -667,7 +725,8 @@ ip_mroute_add_fal_objects(vrfid_t vrf_id, struct vmfcctl *mfccp, struct mfc *rt) &rt->mfc_mcastgrp, "Creating FAL object for mroute"); - rc = fal_ip4_new_mroute(vrf_id, mfccp, rt, viftable); + rc = fal_ip4_new_mroute(vrf_id, mfccp, rt, + vrf->v_mvrf4.viftable); if (rc && rc != -EOPNOTSUPP) mfc_debug(vrf_id, &rt->mfc_origin, &rt->mfc_mcastgrp, @@ -798,7 +857,7 @@ int del_mfc(vrfid_t vrf_id, struct vmfcctl *mfccp) */ static bool ip_punt_rate_limit(struct mfc *rt) { - enum rte_meter_color color; + enum rte_color color; #ifdef PUNT_RATE_LIMIT_DEBUG char oa[INET_ADDRSTRLEN]; @@ -808,12 +867,10 @@ static bool ip_punt_rate_limit(struct mfc *rt) inet_ntop(AF_INET, &rt->mfc_mcastgrp, ga, sizeof(ga)); #endif color = rte_meter_srtcm_color_blind_check(&rt->meter, -#ifdef HAVE_RTE_METER_SRTCM_PROFILE_CONFIG &mfc_meter_profile, -#endif rte_rdtsc(), PUNT_1PKT); - if (color == e_RTE_METER_GREEN) { + if (color == RTE_COLOR_GREEN) { rt->mfc_punted++; #ifdef PUNT_RATE_LIMIT_DEBUG @@ -822,7 +879,8 @@ static bool ip_punt_rate_limit(struct mfc *rt) (uint)rt->mfc_punts_dropped); #endif return false; - } else if (color == e_RTE_METER_YELLOW) { + } + if (color == RTE_COLOR_YELLOW) { rt->mfc_punted++; #ifdef PUNT_RATE_LIMIT_DEBUG @@ -831,16 +889,16 @@ static bool ip_punt_rate_limit(struct mfc *rt) (uint)rt->mfc_punted); #endif return false; - } else { - rt->mfc_punts_dropped++; + } + + rt->mfc_punts_dropped++; #ifdef PUNT_RATE_LIMIT_DEBUG - RTE_LOG(INFO, METER, "RTE_METER_RED %s %s drop %d, punt %d\n", - oa, ga, (uint)rt->mfc_punts_dropped, - (uint)rt->mfc_punted); + RTE_LOG(INFO, METER, "RTE_METER_RED %s %s drop %d, punt %d\n", + oa, ga, (uint)rt->mfc_punts_dropped, + (uint)rt->mfc_punted); #endif - return true; - } + return true; } /* @@ -960,8 +1018,8 @@ static int ip_mforward(vrfid_t vrf_id, struct mcast_vrf *mvrf, #ifdef UPCALL_TIMER /* Clean up the cache entry if upcall is not serviced */ -static void expire_upcalls(__attribute__((unused)) struct rte_timer *rtetm, - __attribute__((unused)) void *arg) +static void expire_upcalls(__unused struct rte_timer *rtetm, + __unused void *arg) { struct mfc *rt; struct cds_lfht_iter iter; @@ -985,78 +1043,53 @@ static void expire_upcalls(__attribute__((unused)) struct rte_timer *rtetm, } #endif -static int mcast_ethernet_send(struct ifnet *in_ifp, - struct vif *out_vifp, - struct rte_mbuf *m, int plen) -{ - struct iphdr *ip; - struct ether_hdr *eth_hdr; - - ip = iphdr(m); - decrement_ttl(ip); - - mcast_dst_eth_addr_t eth_daddr = mcast_dst_eth_addr(ip->daddr); - eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); - ether_addr_copy(ð_daddr.as_addr, ð_hdr->d_addr); - - mc_ip_output(in_ifp, m, out_vifp->v_ifp, ip); - out_vifp->v_pkt_out++; - out_vifp->v_bytes_out += plen; - return 0; -} - static void mcast_tunnel_send(struct ifnet *in_ifp, struct vif *out_vifp, struct rte_mbuf *m, int plen) { struct ifnet *out_ifp; - struct vrf *vrf; - struct iphdr *ip; struct mcast_mgre_tun_walk_ctx mgre_tun_walk_ctx; out_ifp = out_vifp->v_ifp; - ip = iphdr(m); - switch (out_ifp->if_type) { - case IFT_TUNNEL_GRE: - decrement_ttl(ip); + /* Call GRE API which will invoke specified callback + * for each end point in P2P or P2MP tunnel + */ + mgre_tun_walk_ctx.proto = ETH_P_IP; + mgre_tun_walk_ctx.mbuf = m; + mgre_tun_walk_ctx.in_ifp = in_ifp; + mgre_tun_walk_ctx.pkt_len = plen; + mgre_tun_walk_ctx.out_vif = out_vifp; + mgre_tun_walk_ctx.hdr_len = sizeof(struct iphdr); + gre_tunnel_peer_walk(out_ifp, mcast_mgre_tunnel_endpoint_send, + &mgre_tun_walk_ctx); + /* + * Decrement ref count on original mbuf as new mbuf + * was transmitted in replication loop. + */ + rte_pktmbuf_free(m); +} - /* Call GRE API which will invoke specified callback - * for each end point in P2P or P2MP tunnel - */ - mgre_tun_walk_ctx.proto = ETH_P_IP; - mgre_tun_walk_ctx.mbuf = m; - mgre_tun_walk_ctx.in_ifp = in_ifp; - mgre_tun_walk_ctx.pkt_len = plen; - mgre_tun_walk_ctx.out_vif = out_vifp; - mgre_tun_walk_ctx.hdr_len = sizeof(struct iphdr); - gre_tunnel_peer_walk(out_ifp, - mcast_mgre_tunnel_endpoint_send, - &mgre_tun_walk_ctx); - /* - * Decrement ref count on original mbuf as new mbuf - * was transmitted in replication loop. - */ - rte_pktmbuf_free(m); - return; - case IFT_TUNNEL_VTI: - decrement_ttl(ip); - out_vifp->v_pkt_out++; - out_vifp->v_bytes_out += plen; - IPSTAT_INC_VRF(if_vrf(in_ifp), IPSTATS_MIB_OUTMCASTPKTS); - vti_tunnel_out(in_ifp, out_ifp, m, ETH_P_IP); - return; - default: - /* - * Punt for any tunnels unsupported in data plane. - * Note that if packet successfully switched out - * of some other interfaces in the olist in the - * data plane, a duplicate packet may be sent out - * of these interfaces by the kernel. Essentially, - * as things stand, option is potentially duplicate - * packets on some interfaces or fail to transmit - * packets on other interfaces in the olist. - */ - vrf = vrf_get_rcu(if_vrfid(in_ifp)); +/* + * Packet transmission routine for VIF in olist. Select appropriate send + * function based on underlying interface type. + */ +static void vif_send(struct ifnet *in_ifp, struct vif *out_vifp, + struct rte_mbuf *m, int plen) +{ + struct ifnet *out_ifp = out_vifp->v_ifp; + + /* + * Punt for any tunnels unsupported in data plane. + * + * Note that if a packet is successfully switched out of some + * other interfaces in the olist in the data plane, a duplicate + * packet may be sent out of these interfaces by the kernel. + * Essentially, as things stand, the option is to potentially + * duplicate packets on some interfaces or fail to transmit + * packets on other interfaces in the olist. + */ + if (unlikely(out_ifp->if_type == IFT_TUNNEL_OTHER)) { + struct vrf *vrf = vrf_get_rcu(if_vrfid(in_ifp)); if (vrf) { struct mcast_vrf *mvrf = &vrf->v_mvrf4; MRTSTAT_INC(mvrf, mrts_slowpath); @@ -1064,30 +1097,55 @@ static void mcast_tunnel_send(struct ifnet *in_ifp, struct vif *out_vifp, out_vifp->v_pkt_out_punt++; out_vifp->v_bytes_out_punt += plen; mcast_ip_deliver(in_ifp, m); + return; } -} -/* - * Packet transmission routine for VIF in olist. Select appropriate send - * function based on underlying interface type. - */ -static void vif_send(struct ifnet *in_ifp, struct vif *out_vifp, - struct rte_mbuf *m, int plen) -{ - if (unlikely(out_vifp->v_flags & VIFF_TUNNEL)) { + struct iphdr *ip = iphdr(m); + + /* + * Time to decrement ttl since packet is being forwarded, not + * just punted. It was previously tested to ensure it is greater + * than 1 so there is no need to test for ttl expire here. + */ + decrement_ttl(ip); + + if (unlikely(out_ifp->if_type == IFT_TUNNEL_GRE && + !(out_ifp->if_flags & IFF_NOARP))) { mcast_tunnel_send(in_ifp, out_vifp, m, plen); return; } - mcast_ethernet_send(in_ifp, out_vifp, m, plen); + /* OIL replication counts */ + out_vifp->v_pkt_out++; + out_vifp->v_bytes_out += plen; + + /* + * Send the packet down the pipeline graph. + */ + struct next_hop nh = { + .flags = RTF_MULTICAST, + .u.ifp = out_ifp, + }; + struct pl_packet pl_pkt = { + .mbuf = m, + .l2_pkt_type = pkt_mbuf_get_l2_traffic_type(m), + .l3_hdr = ip, + .in_ifp = in_ifp, + .out_ifp = out_ifp, + .nxt.v4 = &nh, + .l2_proto = ETH_P_IP, + .npf_flags = NPF_FLAG_CACHE_EMPTY, + }; + + pipeline_fused_ipv4_out(&pl_pkt); } /* * Packet forwarding routine once entry in the cache is made */ static int ip_mdq(struct mcast_vrf *mvrf, struct rte_mbuf *m, struct ip *ip, - struct ifnet *ifp, struct mfc *rt) + struct ifnet *in_ifp, struct mfc *rt) { struct vif *vifp; int plen = ntohs(ip->ip_len); @@ -1096,7 +1154,7 @@ static int ip_mdq(struct mcast_vrf *mvrf, struct rte_mbuf *m, struct ip *ip, /* Don't forward if it didn't arrive on parent vif for its origin. */ vifp = get_vif_by_ifindex(rt->mfc_parent); - if (!vifp || (vifp->v_if_index != ifp->if_index)) { + if (!vifp || (vifp->v_if_index != in_ifp->if_index)) { MRTSTAT_INC(mvrf, mrts_wrong_if); ++rt->mfc_wrong_if; @@ -1104,10 +1162,9 @@ static int ip_mdq(struct mcast_vrf *mvrf, struct rte_mbuf *m, struct ip *ip, if (ip_punt_rate_limit(rt)) { MRTSTAT_INC(mvrf, mrts_upq_ovflw); return RTF_BLACKHOLE; - } else { - rt->mfc_ctrl_pkts++; - return RTF_SLOWPATH; } + rt->mfc_ctrl_pkts++; + return RTF_SLOWPATH; } /* Rate limit this punted packet */ @@ -1116,9 +1173,8 @@ static int ip_mdq(struct mcast_vrf *mvrf, struct rte_mbuf *m, struct ip *ip, if (ip_punt_rate_limit(rt)) { MRTSTAT_INC(mvrf, mrts_upq_ovflw); return RTF_BLACKHOLE; - } else { - return RTF_SLOWPATH; } + return RTF_SLOWPATH; } vifp->v_pkt_in++; @@ -1133,22 +1189,27 @@ static int ip_mdq(struct mcast_vrf *mvrf, struct rte_mbuf *m, struct ip *ip, if (!md) return -ENOBUFS; - rte_pktmbuf_adj(md, pktmbuf_l2_len(md) + sizeof(struct iphdr)); + rte_pktmbuf_adj(md, dp_pktmbuf_l2_len(md) + sizeof(struct iphdr)); /* For each dataplane vif, forward if: * - the ifset bit is set for this interface. * - there are group members downstream on interface */ - cds_lfht_for_each_entry(viftable, &iter, vifp, node) { - if (IF_ISSET(vifp->v_if_index, &rt->mfc_ifset) && + cds_lfht_for_each_entry(mvrf->viftable, &iter, vifp, node) { + if (IF_ISSET(vifp->v_vif_index, &rt->mfc_ifset) && ip->ip_ttl > vifp->v_threshold) { - if (!vifp->v_ifp) + struct ifnet *out_ifp = vifp->v_ifp; + + if (!out_ifp) + continue; + const bool if_up = (out_ifp->if_flags & IFF_UP); + if (!if_up) continue; mh = mcast_create_l2l3_header(m, md, sizeof(struct iphdr)); if (mh) { /* send the newly created packet chain */ - vif_send(ifp, vifp, mh, plen); + vif_send(in_ifp, vifp, mh, plen); } else { rte_pktmbuf_free(md); return -ENOBUFS; @@ -1209,7 +1270,7 @@ static void sg_cnt_update(struct vrf *vrf, struct mfc *rt, flags = 1; } - send_sg_cnt(&req, vrf_get_external_id(vrf->v_id), flags); + send_sg_cnt(&req, dp_vrf_get_external_id(vrf->v_id), flags); } /* @@ -1231,8 +1292,8 @@ static void sg_cnt_dump(void) } } -static void mrt_stats(__attribute__((unused)) struct rte_timer *rtetm, - __attribute__((unused)) void *arg) +static void mrt_stats(__unused struct rte_timer *rtetm, + __unused void *arg) { sg_cnt_dump(); } @@ -1258,12 +1319,13 @@ void mrt_dump(FILE *f, struct vrf *vrf) jsonw_start_array(wr); memset(olist_buf, 0, (IFNAMSIZ+1) * MFC_MAX_MVIFS); - + cds_lfht_for_each_entry(vrf->v_mvrf4.mfchashtbl, &iter, rt, node) { olist_index = 0; - cds_lfht_for_each_entry(viftable, &iter_vif, vifp, node) { - if (IF_ISSET(vifp->v_if_index, &rt->mfc_ifset)) { + cds_lfht_for_each_entry(vrf->v_mvrf4.viftable, &iter_vif, + vifp, node) { + if (IF_ISSET(vifp->v_vif_index, &rt->mfc_ifset)) { olist_index += snprintf(olist_buf + olist_index, sizeof(olist_buf) - olist_index, @@ -1370,7 +1432,7 @@ void mrt_stat(FILE *f, struct vrf *vrf) jsonw_destroy(&wr); } -void mvif_dump(FILE *f, __attribute__((unused)) struct vrf *vrf) +void mvif_dump(FILE *f, __unused struct vrf *vrf) { struct cds_lfht_iter iter; struct vif *vifp; @@ -1383,11 +1445,11 @@ void mvif_dump(FILE *f, __attribute__((unused)) struct vrf *vrf) jsonw_name(wr, "mif"); jsonw_start_array(wr); - cds_lfht_for_each_entry(viftable, &iter, vifp, node) { + cds_lfht_for_each_entry(vrf->v_mvrf4.viftable, &iter, vifp, node) { jsonw_start_object(wr); jsonw_string_field(wr, "interface", vifp->v_ifp ? vifp->v_ifp->if_name : "non-vplane"); - jsonw_int_field(wr, "if_index", vifp->v_if_index); + jsonw_int_field(wr, "if_index", vifp->v_vif_index); jsonw_int_field(wr, "threshold", vifp->v_threshold); jsonw_int_field(wr, "flags", vifp->v_flags); jsonw_uint_field(wr, "pkt_in", vifp->v_pkt_in); @@ -1466,6 +1528,15 @@ int mcast_vrf_init(struct vrf *vrf) vrf->v_mvrf4.v_fal_rpf_lst = NULL; vrf->v_mvrf4.mfchashtbl = mfctbl; + vrf->v_mvrf4.viftable = cds_lfht_new(MFC_MAX_MVIFS, MFC_MAX_MVIFS, + MFC_MAX_MVIFS, CDS_LFHT_ACCOUNTING, + NULL); + if (!vrf->v_mvrf4.viftable) { + RTE_LOG(ERR, MCAST, "%s: cds_lfht_new viftable failed vrf %s\n", + __func__, vrf->v_name); + return -1; + } + memset(&(vrf->v_mvrf4.mfc_ifset), 0, sizeof(struct if_set)); return 0; } @@ -1482,28 +1553,30 @@ void mcast_vrf_uninit(struct vrf *vrf) dp_ht_destroy_deferred(vrf->v_mvrf4.mfchashtbl); vrf->v_mvrf4.mfchashtbl = NULL; + + dp_ht_destroy_deferred(vrf->v_mvrf4.viftable); + vrf->v_mvrf4.viftable = NULL; + } -int mcast_stop_ipv4(void) +static void mcast_stop_ipv4(void) { #ifdef UPCALL_TIMER rte_timer_stop(&expire_upcalls_ch); #endif rte_timer_stop(&mrt_stats_timer); - - if (cds_lfht_destroy(viftable, NULL)) - RTE_LOG(ERR, MCAST, - "Destroying IPv4 VIF table failed.\n"); - - return 0; } -void mcast_init_ipv4(void) +static const struct ift_ops pimreg_if_ops = { +}; + +static void mcast_init_ipv4(void) { - viftable = cds_lfht_new(MFC_MAX_MVIFS, MFC_MAX_MVIFS, MFC_MAX_MVIFS, - CDS_LFHT_ACCOUNTING, NULL); - if (!viftable) - rte_panic("%s: cds_lfht_new viftable failed\n", __func__); + int ret; + + ret = if_register_type(IFT_TUNNEL_PIMREG, &pimreg_if_ops); + if (ret < 0) + rte_panic("Failed to register PIMREG type: %s", strerror(-ret)); #ifdef UPCALL_TIMER rte_timer_init(&expire_upcalls_ch); @@ -1514,3 +1587,26 @@ void mcast_init_ipv4(void) rte_timer_reset(&mrt_stats_timer, SG_CNT_INTERVAL, PERIODICAL, rte_get_master_lcore(), mrt_stats, NULL); } + +static void mrt4_if_delete(struct ifnet *ifp) +{ + del_vif(ifp->if_index); +} + +static void mrt4_if_admin_status_change(struct ifnet *ifp, bool up) +{ + /* not interested in admin-up events */ + if (up) + return; + + mrt4_purge(ifp); +} + +static const struct dp_event_ops ip_mroute_events = { + .init = mcast_init_ipv4, + .uninit = mcast_stop_ipv4, + .if_delete = mrt4_if_delete, + .if_admin_status_change = mrt4_if_admin_status_change, +}; + +DP_STARTUP_EVENT_REGISTER(ip_mroute_events); diff --git a/src/netinet/ip_mroute.h b/src/netinet/ip_mroute.h index 8661b411..95f079fa 100644 --- a/src/netinet/ip_mroute.h +++ b/src/netinet/ip_mroute.h @@ -113,6 +113,8 @@ struct mrtstat { struct mcast_vrf { struct cds_lfht *mfchashtbl; struct mrtstat stat; + struct cds_lfht *viftable; + struct if_set mfc_ifset; /* set of mulicast ifs */ fal_object_t v_fal_obj; /* fal object */ fal_object_t v_fal_rpf; /* fal rpf group object */ struct fal_object_list_t *v_fal_rpf_lst;/* fal rpf members object */ @@ -133,6 +135,7 @@ struct vif { unsigned char v_threshold; /* min ttl required to fwd on vif */ struct ifnet *v_ifp; /* pointer to interface */ uint32_t v_if_index; /* interface device index */ + unsigned char v_vif_index; /* per vrf vif index */ uint64_t v_pkt_in; /* # pkts in on interface */ uint64_t v_pkt_out; /* # pkts out on interface */ uint64_t v_pkt_out_punt; /* # pkts punted at output intf */ diff --git a/src/netinet6/in6.c b/src/netinet6/in6.c index 1c407df0..66396dc0 100644 --- a/src/netinet6/in6.c +++ b/src/netinet6/in6.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2014-2016 by Brocade Communications Systems, Inc. * All rights reserved. */ @@ -86,7 +86,7 @@ #include "in6_var.h" #include "ip6_funcs.h" #include "pipeline/nodes/pl_nodes_common.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_node.h" #include "urcu.h" @@ -153,7 +153,8 @@ in6ifa_ifplocaladdr(const struct ifnet *ifp, const struct in6_addr *addr) continue; struct sockaddr_in6 *sin6 = satosin6(sa); - if (in6_prefix_eq(addr, &sin6->sin6_addr, ifa->ifa_prefixlen)) + if (dp_in6_prefix_eq(addr, &sin6->sin6_addr, + ifa->ifa_prefixlen)) return ifa; } @@ -202,6 +203,7 @@ in6_domifattach(struct ifnet *ifp) llt = lltable_new(ifp); + llt->lle_refresh_expire = rte_get_timer_cycles() + rte_get_timer_hz(); rte_timer_reset(&llt->lle_timer, rte_get_timer_hz(), SINGLE, rte_get_master_lcore(), in6_lladdr_timer, llt); @@ -310,7 +312,7 @@ uint16_t ip6_findpayload(struct rte_mbuf *m, uint16_t *offset) struct ip6_ext *ip6e; struct ip6_frag *fh; - uint16_t off = pktmbuf_l2_len(m) + sizeof(*ip6); + uint16_t off = dp_pktmbuf_l2_len(m) + sizeof(*ip6); uint16_t proto = ip6->ip6_nxt; for (;;) { diff --git a/src/netinet6/in6.h b/src/netinet6/in6.h index ec327031..a5ae2d81 100644 --- a/src/netinet6/in6.h +++ b/src/netinet6/in6.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * @@ -74,10 +74,10 @@ #include #include +#include "ip.h" + struct ifnet; -#define IPV6_VERSION 0x60 -#define IPV6_VERSION_MASK 0xf0 #if __BYTE_ORDER == __BIG_ENDIAN #define IPV6_FLOWINFO_MASK 0x0fffffff /* flow info (28 bits) */ #define IPV6_FLOWLABEL_MASK 0x000fffff /* flow label (20 bits) */ @@ -90,9 +90,6 @@ struct ifnet; # error "Please include " #endif - -#define IPV6_DEFAULT_HOPLIMIT 64 - /* * Local definition for masks */ @@ -191,7 +188,7 @@ static inline void *ip6_exthdr(struct rte_mbuf *m, uint16_t offs, size_t len) struct if_addr *in6ifa_ifplocaladdr(const struct ifnet *ifp, const struct in6_addr *addr); struct if_addr *in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst); struct lltable *in6_domifattach(struct ifnet *ifp); -uint32_t in6_addr_hash(const void *key, uint32_t key_len, uint32_t init_val); +uint32_t in6_addr_hash(const void *key, uint32_t key_len, uint32_t seed); uint16_t ip6_findprevoff(struct rte_mbuf *m); uint16_t ip6_findpayload(struct rte_mbuf *m, uint16_t *offset); diff --git a/src/netinet6/in6_var.h b/src/netinet6/in6_var.h index 26ed5a53..1ffa54dd 100644 --- a/src/netinet6/in6_var.h +++ b/src/netinet6/in6_var.h @@ -28,7 +28,7 @@ */ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 1985, 1986, 1993 * The Regents of the University of California. All rights reserved. * @@ -90,30 +90,6 @@ #define IN6_IFF_AUTOCONF 0x40 /* autoconfigurable address. */ #define IN6_IFF_TEMPORARY 0x80 /* temporary (anonymous) address. */ - -/* fast prefix compare */ -static inline bool in6_prefix_eq(const struct in6_addr *a1, - const struct in6_addr *a2, - unsigned int prefix_len) -{ - const uint32_t *p1 = a1->s6_addr32; - const uint32_t *p2 = a2->s6_addr32; - - while (prefix_len >= 32) { - if (*p1++ != *p2++) - return false; - prefix_len -= 32; - } - - if (likely(prefix_len == 0)) - return true; - - uint32_t m = htonl(~0ul << (32 - prefix_len)); - - /* find bits that differ, and mask in network byte order */ - return ((*p1 ^ *p2) & m) == 0; -} - /* fast IPv6 prefix copy */ static inline void in6_prefix_cpy(struct in6_addr *dest, const struct in6_addr *src, diff --git a/src/netinet6/ip6_commands.c b/src/netinet6/ip6_commands.c index 44e6ce9f..1415be87 100644 --- a/src/netinet6/ip6_commands.c +++ b/src/netinet6/ip6_commands.c @@ -1,30 +1,40 @@ /* * IPv6 Commands * - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ -#include -#include -#include -#include -#include -#include -#include - -#include "commands.h" #include "ip6_funcs.h" +#include "vplane_log.h" + +#include "protobuf.h" +#include "protobuf/IP6RedirectsConfig.pb-c.h" -int cmd_ip6(FILE *f, int argc, char **argv) +static int +ip6_cmd_handler(struct pb_msg *msg) { - if (argc == 3 && !strcmp(argv[1], "redirects")) { - bool enable = !strcmp(argv[2], "enable"); + void *payload = (void *)((char *)msg->msg); + int len = msg->msg_len; - ip6_redirects_set(enable); - return 0; + IP6RedirectsConfig *smsg = + ip6_redirects_config__unpack(NULL, len, payload); + + if (!smsg) { + RTE_LOG(ERR, DATAPLANE, + "failed to read IP6RedirectsConfig protobuf command\n"); + return -1; } - fprintf(f, "ip6 command invalid\n"); - return -1; + + ip6_redirects_set(smsg->enable_redirects); + + ip6_redirects_config__free_unpacked(smsg, NULL); + + return 0; } + +PB_REGISTER_CMD(ip6_cmd) = { + .cmd = "vyatta:ip6", + .handler = ip6_cmd_handler, +}; diff --git a/src/netinet6/ip6_forward.c b/src/netinet6/ip6_forward.c index 561cec4c..033c7e5f 100644 --- a/src/netinet6/ip6_forward.c +++ b/src/netinet6/ip6_forward.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -29,30 +29,32 @@ #include #include -#include "bridge_port.h" #include "compat.h" #include "compiler.h" #include "crypto/crypto.h" #include "crypto/crypto_forward.h" #include "ether.h" +#include "if/bridge/bridge_port.h" +#include "if/macvlan.h" #include "if_var.h" #include "in6.h" +#include "ip_forward.h" #include "ip6_funcs.h" #include "ip_funcs.h" #include "ip_mcast.h" #include "l2tp/l2tpeth.h" -#include "macvlan.h" #include "main.h" #include "mpls/mpls.h" #include "mpls/mpls_forward.h" #include "nd6_nbr.h" -#include "nh.h" +#include "nh_common.h" #include "npf/npf.h" #include "npf/fragment/ipv6_rsmbl.h" #include "npf/npf_cache.h" #include "npf/npf_if.h" +#include "npf/zones/npf_zone_public.h" #include "npf_shim.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pipeline/nodes/pl_nodes_common.h" #include "pl_fused.h" @@ -64,7 +66,7 @@ #include "urcu.h" #include "util.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" enum ip6_packet_validity { IP6_PKT_VALID, @@ -75,19 +77,17 @@ enum ip6_packet_validity { /* * Resolve the L3 nexthop and add the L2 encap - * - * Returns true if the packet should be sent, false if consumed. */ ALWAYS_INLINE bool -ip6_l2_resolve_and_output(struct ifnet *in_ifp, struct rte_mbuf *m, - struct next_hop_v6 *nh, uint16_t proto) +dp_ip6_l2_nh_output(struct ifnet *in_ifp, struct rte_mbuf *m, + struct next_hop *nh, uint16_t proto) { struct pl_packet pl_pkt = { .mbuf = m, .l2_pkt_type = pkt_mbuf_get_l2_traffic_type(m), .l3_hdr = ip6hdr(m), .in_ifp = in_ifp, - .out_ifp = nh6_get_ifp(nh), + .out_ifp = dp_nh_get_ifp(nh), .nxt.v6 = nh, .l2_proto = proto, }; @@ -100,10 +100,24 @@ ip6_l2_resolve_and_output(struct ifnet *in_ifp, struct rte_mbuf *m, return true; } +/* + * Returns true if the packet should be sent, false if consumed. + */ +ALWAYS_INLINE bool +dp_ip6_l2_intf_output(struct ifnet *in_ifp, struct rte_mbuf *m, + struct ifnet *out_ifp, uint16_t proto) +{ + struct next_hop nh6; + + memset(&nh6, 0, sizeof(nh6)); + nh_set_ifp(&nh6, out_ifp); + return dp_ip6_l2_nh_output(in_ifp, m, &nh6, proto); +} + /* * l2tp can't use any of the ports registered via udp_handler_register */ -static int ip6_udp_tunnel_in(struct rte_mbuf *m, struct ifnet *ifp) +int ip6_udp_tunnel_in(struct rte_mbuf *m, struct ifnet *ifp) { return udp_input(m, AF_INET6, ifp); } @@ -118,66 +132,52 @@ static int ip6_udp_tunnel_in(struct rte_mbuf *m, struct ifnet *ifp) * 1 not consumed, */ -static int ip6_tunnel_input(struct rte_mbuf *m, struct ifnet *ifp) +int ip6_l4_input(struct rte_mbuf *m, struct ifnet *ifp) { - struct ip6_hdr *ip6 = ip6hdr(m); - int rc = -1; - uint32_t spi; - struct ip6_frag *ip6_frag; + struct pl_packet pl_pkt = { + .mbuf = m, + .in_ifp = ifp, + }; + + pipeline_fused_ipv6_l4(&pl_pkt); - if (crypto_policy_check_inbound_terminating(ifp, &m, - htons(ETHER_TYPE_IPv6))) + return 0; +} + +/* + * IPv6 originate slow path filter. + * + * Run the originating firewall, and drop the packet if required. + * + * Return an indication of if the packet was consumed. + * 0 => Not filtered + * 1 => Filtered + */ +int ipv6_originate_filter_flags(struct ifnet *out_ifp, struct rte_mbuf *m, + uint16_t npf_flags) +{ + if (out_ifp == NULL) return 0; - switch (ip6->ip6_nxt) { - case IPPROTO_UDP: - rc = ip6_udp_tunnel_in(m, ifp); - break; - case IPPROTO_L2TPV3: - rc = l2tp_ipv6_recv_encap(m, ip6, - (unsigned char *)ip6 + sizeof(struct ip6_hdr)); - break; - case IPPROTO_GRE: - rc = ip6_gre_tunnel_in(&m, ip6); - break; - case IPPROTO_ESP: - spi = crypto_retrieve_spi((unsigned char *)ip6 + - pktmbuf_l3_len(m)); - rc = crypto_enqueue_inbound_v6(m, ifp, spi); - break; - case IPPROTO_FRAGMENT: - /* - * If it is a fragment, and the next proto is ESP send - * to the crypto code. It will reassemble it and then find - * the SPI, so pass in 0. - */ - ip6_frag = (struct ip6_frag *)(ip6 + 1); - if (ip6_frag->ip6f_nxt == IPPROTO_ESP) { - rc = crypto_enqueue_inbound_v6(m, ifp, 0); - break; - } - return 1; - default: - return 1; - /* other protocols */ - } - if (rc < 0) { - IP6STAT_INC_IFP(ifp, IPSTATS_MIB_INDISCARDS); + if (npf_originate_fw(out_ifp, npf_flags, &m, + htons(RTE_ETHER_TYPE_IPV6))) { + IPSTAT_INC_VRF(if_vrf(out_ifp), IPSTATS_MIB_OUTDISCARDS); rte_pktmbuf_free(m); + return 1; } - return rc; + return 0; } +static int ipv6_originate_filter(struct ifnet *ifp, struct rte_mbuf *m) +{ + return ipv6_originate_filter_flags(ifp, m, NPF_FLAG_FROM_US); +} /* * Deliver local destined packet to slow path */ void __cold_func ip6_local_deliver(struct ifnet *ifp, struct rte_mbuf *m) { - /* Check for l2tp tunnels */ - if (unlikely(ip6_tunnel_input(m, ifp) <= 0)) - return; - /* Check if the nd will take care of the packet. */ if (nd6_input(ifp, m) == 0) return; @@ -198,7 +198,7 @@ ip6_local_deliver(struct ifnet *ifp, struct rte_mbuf *m) * * Run the local firewall, and discard if so instructed. */ - if (npf_local_fw(ifp, &m, htons(ETHER_TYPE_IPv6))) + if (npf_local_fw(ifp, &m, htons(RTE_ETHER_TYPE_IPV6))) goto discard; IP6STAT_INC_IFP(ifp, IPSTATS_MIB_INDELIVERS); @@ -229,7 +229,7 @@ int ip6_fragment_mtu(struct ifnet *ifp, unsigned int mtu_size, { struct rte_mbuf *m_table[IPV6_MAX_FRAGS]; struct ip6_hdr *in_ip6, *frag_ip6; - struct ether_hdr *eth_hdr; + struct rte_ether_hdr *eth_hdr; struct ip6_frag *ip6f; struct rte_mbuf *m_frag; uint32_t fh_id = random(); @@ -264,7 +264,7 @@ int ip6_fragment_mtu(struct ifnet *ifp, unsigned int mtu_size, m_frag->data_len += IPV6_FRAG_OVRHD; m_frag->pkt_len += IPV6_FRAG_OVRHD; if (ip_mbuf_copy(m_frag, m_in, - pktmbuf_l2_len(m_in) + + dp_pktmbuf_l2_len(m_in) + sizeof(struct ip6_hdr) + frag_off, copy_len)) { rte_pktmbuf_free(m_frag); @@ -297,10 +297,10 @@ int ip6_fragment_mtu(struct ifnet *ifp, unsigned int mtu_size, /* * Fixup fragment L2 header */ - rte_pktmbuf_prepend(m_frag, sizeof(struct ether_hdr)); - eth_hdr = rte_pktmbuf_mtod(m_frag, struct ether_hdr *); - eth_hdr->ether_type = htons(ETHER_TYPE_IPv6); - m_frag->l2_len = sizeof(struct ether_hdr); + rte_pktmbuf_prepend(m_frag, sizeof(struct rte_ether_hdr)); + eth_hdr = rte_pktmbuf_mtod(m_frag, struct rte_ether_hdr *); + eth_hdr->ether_type = htons(RTE_ETHER_TYPE_IPV6); + m_frag->l2_len = sizeof(struct rte_ether_hdr); m_table[nfrags++] = m_frag; } @@ -336,7 +336,7 @@ ip6_refragment_packet(struct ifnet *o_ifp, struct rte_mbuf *m, void *ctx, ip6_output_fn_t output_fn) { struct rte_mbuf *m_table[IPV6_MAX_FRAGS_PER_SET]; - struct ether_hdr *eth_hdr, eth_copy; + struct rte_ether_hdr *eth_hdr, eth_copy; uint16_t l2_len; int32_t nfrags; uint32_t fh_id; @@ -356,10 +356,10 @@ ip6_refragment_packet(struct ifnet *o_ifp, struct rte_mbuf *m, * addresses, but we don't know that just yet so just copy the * complete header. */ - eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); - ether_addr_copy(ð_hdr->d_addr, ð_copy.d_addr); - ether_addr_copy(ð_hdr->s_addr, ð_copy.s_addr); + rte_ether_addr_copy(ð_hdr->d_addr, ð_copy.d_addr); + rte_ether_addr_copy(ð_hdr->s_addr, ð_copy.s_addr); eth_copy.ether_type = eth_hdr->ether_type; /* @@ -406,20 +406,20 @@ ip6_refragment_packet(struct ifnet *o_ifp, struct rte_mbuf *m, */ fh = (struct ip6_frag *) (rte_pktmbuf_mtod(m, char *) + - sizeof(struct ipv6_hdr)); + sizeof(struct rte_ipv6_hdr)); fh->ip6f_ident = htonl(fh_id); /* Prepend space for l2 hdr */ - rte_pktmbuf_prepend(m, sizeof(struct ether_hdr)); - m->l2_len = sizeof(struct ether_hdr); + rte_pktmbuf_prepend(m, sizeof(struct rte_ether_hdr)); + m->l2_len = sizeof(struct rte_ether_hdr); /* * Write the ethernet header */ - eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); - ether_addr_copy(ð_copy.d_addr, ð_hdr->d_addr); - ether_addr_copy(ð_copy.s_addr, ð_hdr->s_addr); + rte_ether_addr_copy(ð_copy.d_addr, ð_hdr->d_addr); + rte_ether_addr_copy(ð_copy.s_addr, ð_hdr->s_addr); eth_hdr->ether_type = eth_copy.ether_type; (*output_fn)(o_ifp, m, ctx); @@ -432,14 +432,14 @@ ip6_refragment_packet(struct ifnet *o_ifp, struct rte_mbuf *m, * If NULL is returned, the packet has been consumed */ static ALWAYS_INLINE -struct next_hop_v6 *ip6_lookup(struct rte_mbuf *m, struct ifnet *ifp, +struct next_hop *ip6_lookup(struct rte_mbuf *m, struct ifnet *ifp, struct ip6_hdr *ip6, uint32_t tbl_id, bool hlim_decremented) { - struct next_hop_v6 *nxt; + struct next_hop *nxt; /* Lookup route */ - nxt = rt6_lookup(&ip6->ip6_dst, tbl_id, m); + nxt = dp_rt6_lookup(&ip6->ip6_dst, tbl_id, m); /* no nexthop found, send icmp error */ if (unlikely(!nxt)) { @@ -456,7 +456,7 @@ struct next_hop_v6 *ip6_lookup(struct rte_mbuf *m, struct ifnet *ifp, slow_path: __cold_label; if (hlim_decremented) ip6->ip6_hlim += IPV6_HLIMDEC; - ip6_local_deliver(ifp, m); + ip6_l4_input(m, ifp); return NULL; } @@ -470,7 +470,7 @@ struct next_hop_v6 *ip6_lookup(struct rte_mbuf *m, struct ifnet *ifp, */ ALWAYS_INLINE void ip6_out_features(struct rte_mbuf *m, struct ifnet *ifp, - struct ip6_hdr *ip6, struct next_hop_v6 *nxt, + struct ip6_hdr *ip6, struct next_hop *nxt, enum ip6_features ip6_feat, uint16_t npf_flags) { struct pl_packet pl_pkt = { @@ -482,7 +482,7 @@ void ip6_out_features(struct rte_mbuf *m, struct ifnet *ifp, }; /* nxt->ifp may be changed by netlink messages. */ - struct ifnet *nxt_ifp = nh6_get_ifp(nxt); + struct ifnet *nxt_ifp = dp_nh_get_ifp(nxt); /* Destination device is not up? */ if (!nxt_ifp || !(nxt_ifp->if_flags & IFF_UP)) { @@ -521,6 +521,13 @@ void ip6_out_features(struct rte_mbuf *m, struct ifnet *ifp, return; } icmp6_redirect(ifp, m, nxt); + /* + * Cache will have been used for handling + * the ICMPv6 redirect, so ensure it is created + * again when continuing with the original + * packet. + */ + pl_pkt.npf_flags |= NPF_FLAG_CACHE_EMPTY; } } @@ -531,12 +538,11 @@ void ip6_out_features(struct rte_mbuf *m, struct ifnet *ifp, pl_pkt.npf_flags |= NPF_FLAG_FROM_US; pipeline_fused_ipv6_out(&pl_pkt); - return; } static ALWAYS_INLINE void ip6_switch(struct rte_mbuf *m, struct ifnet *ifp, - struct ip6_hdr *ip6, struct next_hop_v6 *nxt, + struct ip6_hdr *ip6, struct next_hop *nxt, enum ip6_features ip6_feat, uint16_t npf_flags) { /* Immediately drop blackholed traffic. */ @@ -553,9 +559,7 @@ void ip6_switch(struct rte_mbuf *m, struct ifnet *ifp, /* MPLS imposition required because nh has given us a label */ if (unlikely(nh_outlabels_present(&nxt->outlabels))) { - union next_hop_v4_or_v6_ptr mpls_nh = { .v6 = nxt }; - - mpls_unlabeled_input(ifp, m, NH_TYPE_V6GW, mpls_nh, + mpls_unlabeled_input(ifp, m, MPT_IPV6, NH_TYPE_V6GW, nxt, ip6->ip6_hops); return; } @@ -573,7 +577,7 @@ ip6_validate_packet(struct rte_mbuf *m, const struct ip6_hdr *ip6) * Is packet big enough. * (i.e is there a valid IP header in first segment) */ - if (rte_pktmbuf_data_len(m) < pktmbuf_l2_len(m) + sizeof(*ip6)) + if (rte_pktmbuf_data_len(m) < dp_pktmbuf_l2_len(m) + sizeof(*ip6)) goto bad_packet; /* @@ -583,14 +587,14 @@ ip6_validate_packet(struct rte_mbuf *m, const struct ip6_hdr *ip6) goto bad_packet; /* Runt? */ - len = rte_pktmbuf_pkt_len(m) - pktmbuf_l2_len(m) - sizeof(*ip6); + len = rte_pktmbuf_pkt_len(m) - dp_pktmbuf_l2_len(m) - sizeof(*ip6); ip6_len = ntohs(ip6->ip6_plen); /* Packet is less than what the ip header tell us */ if (unlikely(len < ip6_len)) goto pkt_truncated; - pktmbuf_l3_len(m) = sizeof(*ip6); + dp_pktmbuf_l3_len(m) = sizeof(*ip6); /* * Is packet longer than IP header tells us? @@ -600,22 +604,15 @@ ip6_validate_packet(struct rte_mbuf *m, const struct ip6_hdr *ip6) /* * RFC 4291 - Source address sanity checks. - * The following are not allowed: multicast, loopback, V4 mapped. + * The following are not allowed: multicast, loopback + * draft-itojun-v6ops-v4mapped-harmful-02: + * Don't allow V4 mapped source either. */ if (unlikely(IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) || unlikely(IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src)) || unlikely(IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src))) goto bad_addr; - /* - * RFC 4291 - Unicast destination address sanity checks. - * The following are not allowed: unspecified, loopback, V4 mapped. - */ - if (unlikely(IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst)) || - unlikely(IN6_IS_ADDR_LOOPBACK(&ip6->ip6_dst)) || - unlikely(IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst))) - goto bad_addr; - /* * RFC 4291 - Multicast destination address sanity checks. * The following are not allowed: m/c scope of 0 or 1. @@ -707,13 +704,13 @@ void ip6_input_from_ipsec(struct ifnet *ifp, struct rte_mbuf *m) /* * Check if forwarding is enabled */ - if (unlikely(pl_node_is_feature_enabled( + if (unlikely(pl_node_is_feature_enabled_by_inst( &ipv6_in_no_forwarding_feat, ifp))) goto drop; /* Give IPsec a chance to consume it */ if (unlikely(crypto_policy_check_outbound(ifp, &m, RT_TABLE_MAIN, - htons(ETHER_TYPE_IPv6), + htons(RTE_ETHER_TYPE_IPV6), NULL))) return; @@ -726,19 +723,19 @@ void ip6_input_from_ipsec(struct ifnet *ifp, struct rte_mbuf *m) return; slow_path: __cold_label; - ip6_local_deliver(ifp, m); + ip6_l4_input(m, ifp); } ALWAYS_INLINE void ip6_output(struct rte_mbuf *m, bool srced_forus) { struct ip6_hdr *ip6 = ip6hdr(m); - struct next_hop_v6 *nxt; + struct next_hop *nxt; struct ifnet *ifp; /* Lookup route */ - nxt = rt6_lookup(srced_forus ? &ip6->ip6_src : &ip6->ip6_dst, - RT_TABLE_MAIN, m); + nxt = dp_rt6_lookup(srced_forus ? &ip6->ip6_src : &ip6->ip6_dst, + RT_TABLE_MAIN, m); if (!nxt) { /* * Since there is no output interface count against @@ -749,7 +746,7 @@ void ip6_output(struct rte_mbuf *m, bool srced_forus) } /* ifp can be changed by nxt->ifp. use protected deref. */ - ifp = nh6_get_ifp(nxt); + ifp = dp_nh_get_ifp(nxt); if (unlikely(ifp == NULL)) { if (net_ratelimit()) { @@ -787,8 +784,8 @@ void ip6_lookup_and_originate(struct rte_mbuf *m, struct ifnet *in_ifp) { struct ip6_hdr *ip6 = ip6hdr(m); - struct next_hop_v6 *nxt; - struct next_hop_v6 ll_nh; + struct next_hop *nxt; + struct next_hop ll_nh; /* * RFC 4291 - Do not try to transmit to unspecified or loopback @@ -800,7 +797,7 @@ ip6_lookup_and_originate(struct rte_mbuf *m, struct ifnet *in_ifp) } if (unlikely(IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst))) { - ll_nh = (struct next_hop_v6) { + ll_nh = (struct next_hop) { .u.ifp = in_ifp, }; nxt = &ll_nh; @@ -811,6 +808,13 @@ ip6_lookup_and_originate(struct rte_mbuf *m, struct ifnet *in_ifp) } } + /* + * This hook shall cover: ESPv6, GREv6, LTP2v6 outer header remark + * ICMPv6 error, redirect + */ + if (ipv6_originate_filter(dp_nh6_get_ifp(nxt), m)) + return; + enum ip6_features ip6_feat = IP6_FEA_ORIGINATE; ip6_switch(m, in_ifp, ip6, nxt, ip6_feat, NPF_FLAG_CACHE_EMPTY); } @@ -826,7 +830,7 @@ ip6_lookup_and_forward(struct rte_mbuf *m, struct ifnet *in_ifp, bool hlim_decremented, uint16_t npf_flags) { struct ip6_hdr *ip6 = ip6hdr(m); - struct next_hop_v6 *nxt; + struct next_hop *nxt; /* * RFC 4291 - Source address of unspecified must never be forwarded. @@ -894,8 +898,11 @@ ip6_spath_filter_internal(struct ifnet *ifp, struct ifnet *l2_ifp, */ if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src) || IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) || - is_local_ipv6(if_vrfid(ifp), &ip6->ip6_src)) + is_local_ipv6(if_vrfid(ifp), &ip6->ip6_src)) { npf_flags |= NPF_FLAG_FROM_US | NPF_FLAG_FROM_LOCAL; + if (npf_zone_local_is_set()) + npf_flags |= NPF_FLAG_FROM_ZONE; + } /* * The kernel can L2 forward some bridged packets (i.e. IP broadcasts @@ -912,9 +919,10 @@ ip6_spath_filter_internal(struct ifnet *ifp, struct ifnet *l2_ifp, .npf_flags = npf_flags, .in_ifp = NULL, .out_ifp = ifp, + .l2_proto = ETH_P_IPV6, }; - if (!pipeline_fused_ipv6_defrag_out_spath(&pl_pkt)) + if (!pipeline_fused_ipv6_out_spath(&pl_pkt)) return 1; if (unlikely(m != pl_pkt.mbuf)) diff --git a/src/netinet6/ip6_funcs.h b/src/netinet6/ip6_funcs.h index 85e6df54..37884ba8 100644 --- a/src/netinet6/ip6_funcs.h +++ b/src/netinet6/ip6_funcs.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -19,10 +19,9 @@ #include "compiler.h" #include "ip_funcs.h" -#include "vrf.h" +#include "ip_forward.h" #define IPV6_HLIMDEC 1 -#define IPV6_DEF_HLIM 64 #define V4MAPPED_IPV6_TO_IPV4(A) ((A).s6_addr32[3]) @@ -40,7 +39,7 @@ struct ifnet; static inline struct ip6_hdr *ip6hdr(const struct rte_mbuf *m) { - return pktmbuf_mtol3(m, struct ip6_hdr *); + return dp_pktmbuf_mtol3(m, struct ip6_hdr *); } static inline void ip6_ver_tc_flow_hdr(struct ip6_hdr *hdr, uint32_t tc, @@ -99,9 +98,10 @@ icmp6_do_exthdr(struct rte_mbuf *m, uint16_t class, uint8_t ctype, void *buf, unsigned int len); void icmp6_prepare_send(struct rte_mbuf *m); -struct next_hop_v6; -void icmp6_redirect(struct ifnet *ifp, struct rte_mbuf *m, - const struct next_hop_v6 *nxt); +struct next_hop; +/* Send icmp6 redirect without modifying original packet */ +void icmp6_redirect(struct ifnet *ifp, struct rte_mbuf *n, + const struct next_hop *nxt); void ip6_redirects_set(bool enable); bool ip6_redirects_get(void); typedef void (*ip6_output_fn_t)(struct ifnet *, struct rte_mbuf *, void *); @@ -121,7 +121,7 @@ void ip6_lookup_and_forward(struct rte_mbuf *m, struct ifnet *ifp, bool hlim_decremented, uint16_t npf_flags) __hot_func; void ip6_out_features(struct rte_mbuf *m, struct ifnet *ifp, - struct ip6_hdr *ip6, struct next_hop_v6 *nxt, + struct ip6_hdr *ip6, struct next_hop *nxt, enum ip6_features ip6_feat, uint16_t npf_flags); int ip6_hopopts_input(struct rte_mbuf *m, struct ifnet *iif, @@ -134,18 +134,20 @@ ip6_select_source(struct ifnet *ifp, const struct in6_addr *addr); void ip6_unreach(struct ifnet *ifp, struct rte_mbuf *m); +int ipv6_originate_filter_flags(struct ifnet *ifp, struct rte_mbuf *m, + uint16_t npf_flags); + void ip6_local_deliver(struct ifnet *ifp, struct rte_mbuf *m) __cold_func; bool ip6_l2_resolve(struct ifnet *in_ifp, struct rte_mbuf *m, - const struct next_hop_v6 *nh, uint16_t proto); -bool -ip6_l2_resolve_and_output(struct ifnet *in_ifp, struct rte_mbuf *m, - struct next_hop_v6 *nh, uint16_t proto); - + const struct next_hop *nh, uint16_t proto); void ip6_refragment_packet(struct ifnet *o_ifp, struct rte_mbuf *m, void *ctx, ip6_output_fn_t output_fn); + +int ip6_udp_tunnel_in(struct rte_mbuf *m, struct ifnet *ifp); +int ip6_l4_input(struct rte_mbuf *m, struct ifnet *ifp); #endif /*IP6_FUNCS_H*/ diff --git a/src/netinet6/ip6_icmp.c b/src/netinet6/ip6_icmp.c index abe990df..e03da109 100644 --- a/src/netinet6/ip6_icmp.c +++ b/src/netinet6/ip6_icmp.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -35,7 +35,7 @@ #include "ip6_funcs.h" #include "ip_funcs.h" #include "nd6_nbr.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "route_flags.h" #include "route_v6.h" #include "snmp_mib.h" @@ -48,7 +48,7 @@ /* Option Formats Length 8-bit unsigned integer. The length of the option (including the type and length fields) in units of 8 octets. */ -#define ICMP6_OPT_LEN(opttype, len) (((sizeof(struct opttype) + len) + 7) / 8) +#define ICMP6_OPT_LEN(opttype, len) (((sizeof(struct opttype) + (len)) + 7) / 8) static bool ip6_redirects = true; uint64_t icmp6stats[ICMP6_MIB_MAX]; @@ -132,9 +132,9 @@ ip6_select_source(struct ifnet *ifp, const struct in6_addr *addr) void icmp6_reflect(struct ifnet *ifp, struct rte_mbuf *m) { - struct ether_hdr *eh = ethhdr(m); + struct rte_ether_hdr *eh = ethhdr(m); - eh->ether_type = htons(ETHER_TYPE_IPv6); + eh->ether_type = htons(RTE_ETHER_TYPE_IPV6); ICMP6STAT_INC(pktmbuf_get_vrf(m), ICMP6_MIB_OUTMSGS); ip6_lookup_and_originate(m, ifp); @@ -186,7 +186,7 @@ icmp6_do_exthdr(struct rte_mbuf *m, uint16_t class, uint8_t ctype, void *buf, u_int16_t total_len; int hlen; - hlen = pktmbuf_l3_len(m); + hlen = dp_pktmbuf_l3_len(m); icmpv6 = (struct icmp6_hdr *) ((char *) ip6 + hlen); switch (icmpv6->icmp6_type) { case ICMP6_TIME_EXCEEDED: @@ -289,7 +289,7 @@ struct rte_mbuf *icmp6_do_error(struct ifnet *rcvif, struct rte_mbuf *n, .rcvif = rcvif, .saddr = NULL, }; - ifnet_walk(icmp6_lookup_minscope, &ctx); + dp_ifnet_walk(icmp6_lookup_minscope, &ctx); saddr = ctx.saddr; } @@ -303,14 +303,19 @@ struct rte_mbuf *icmp6_do_error(struct ifnet *rcvif, struct rte_mbuf *n, return NULL; /* Copy up to ICPMV6_PLD_MAXLEN bytes from the orignal packet */ - unsigned int icmplen = RTE_MIN(ICMP6_PLD_MAXLEN - sizeof(struct icmp6_hdr), - (unsigned int) rte_pktmbuf_data_len(n) - pktmbuf_l2_len(n)); + unsigned int icmplen = RTE_MIN(ICMP6_PLD_MAXLEN - + sizeof(struct icmp6_hdr), + (unsigned int) rte_pktmbuf_data_len(n) + - dp_pktmbuf_l2_len(n)); uint16_t plen = sizeof(struct icmp6_hdr) + icmplen; if (!rte_pktmbuf_append(m, - pktmbuf_l2_len(n) + sizeof(struct ip6_hdr) + plen)) + dp_pktmbuf_l2_len(n) + sizeof(struct ip6_hdr) + plen)) rte_panic("out of space to append icmp\n"); - pktmbuf_l2_len(m) = pktmbuf_l2_len(n); + dp_pktmbuf_l2_len(m) = dp_pktmbuf_l2_len(n); + + /* preserve the input port number for use by shadow interface */ + m->port = n->port; /* * OK, ICMP6 can be generated. @@ -404,7 +409,7 @@ inline bool ip6_redirects_get(void) * Does not modify original packet (n) */ void icmp6_redirect(struct ifnet *ifp, struct rte_mbuf *n, - const struct next_hop_v6 *nxt) + const struct next_hop *nxt) { const struct ip6_hdr *sip6 = ip6hdr(n); struct in6_addr saddr6 = sip6->ip6_src; @@ -434,7 +439,7 @@ void icmp6_redirect(struct ifnet *ifp, struct rte_mbuf *n, /* get ip6 linklocal address for the router. */ struct in6_addr taddr; if (nxt->flags & RTF_GATEWAY) { - taddr = nxt->gateway; + taddr = nxt->gateway.address.ip_v6; if (!IN6_IS_ADDR_LINKLOCAL(&taddr)) return; } else @@ -447,11 +452,12 @@ void icmp6_redirect(struct ifnet *ifp, struct rte_mbuf *n, /* offset for the Redirected Header option */ uint16_t rd_hdr_off = sizeof(struct nd_redirect); if (add_target_ll) - rd_hdr_off += ICMP6_OPT_LEN(nd_opt_hdr, ETHER_ADDR_LEN) << 3; + rd_hdr_off += ICMP6_OPT_LEN(nd_opt_hdr, + RTE_ETHER_ADDR_LEN) << 3; /* how much of the original packet can we fit? */ uint16_t origoff = rd_hdr_off + sizeof(struct nd_opt_rd_hdr); - uint16_t origlen = rte_pktmbuf_data_len(n) - ETHER_HDR_LEN; + uint16_t origlen = rte_pktmbuf_data_len(n) - RTE_ETHER_HDR_LEN; origlen = RTE_MIN(origlen, ICMP6_PLD_MAXLEN - origoff); /* RFC4861 and comments in KAME say the original packet should be @@ -463,7 +469,7 @@ void icmp6_redirect(struct ifnet *ifp, struct rte_mbuf *n, origlen -= origlen % 8; uint16_t plen = origoff + origlen + origpad; - uint16_t totallen = ETHER_HDR_LEN + sizeof(struct ip6_hdr) + plen; + uint16_t totallen = RTE_ETHER_HDR_LEN + sizeof(struct ip6_hdr) + plen; struct rte_mbuf *m = pktmbuf_alloc(n->pool, pktmbuf_get_vrf(n)); @@ -472,7 +478,10 @@ void icmp6_redirect(struct ifnet *ifp, struct rte_mbuf *n, if (!rte_pktmbuf_append(m, totallen)) rte_panic("out of space to append icmp\n"); - pktmbuf_l2_len(m) = pktmbuf_l2_len(n); + dp_pktmbuf_l2_len(m) = dp_pktmbuf_l2_len(n); + + /* preserve the input port number for use by shadow interface */ + m->port = n->port; /* ip6 */ struct ip6_hdr *ip6 = ip6hdr(m); @@ -482,7 +491,7 @@ void icmp6_redirect(struct ifnet *ifp, struct rte_mbuf *n, ip6->ip6_flow |= htonl(IPTOS_PREC_NETCONTROL << 20); ip6->ip6_vfc = IPV6_VERSION; ip6->ip6_nxt = IPPROTO_ICMPV6; - ip6->ip6_hlim = IPV6_DEFAULT_HOPLIMIT; + ip6->ip6_hlim = IPV6_ONLINK_HOPLIMIT; ip6->ip6_plen = htons(plen); /* ND Redirect */ @@ -497,8 +506,9 @@ void icmp6_redirect(struct ifnet *ifp, struct rte_mbuf *n, if (add_target_ll) { struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_rd + 1); nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; - nd_opt->nd_opt_len = ICMP6_OPT_LEN(nd_opt_hdr, ETHER_ADDR_LEN); - ether_addr_copy(&ln->ll_addr, (void *)(nd_opt + 1)); + nd_opt->nd_opt_len = ICMP6_OPT_LEN(nd_opt_hdr, + RTE_ETHER_ADDR_LEN); + rte_ether_addr_copy(&ln->ll_addr, (void *)(nd_opt + 1)); } /* Add Redirected Header option */ diff --git a/src/netinet6/ip6_mroute.c b/src/netinet6/ip6_mroute.c index 6586466b..e759c389 100644 --- a/src/netinet6/ip6_mroute.c +++ b/src/netinet6/ip6_mroute.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2014-2016 by Brocade Communications Systems, Inc. * All rights reserved. */ @@ -117,8 +117,8 @@ #include #include -#include "crypto/vti.h" -#include "gre.h" +#include "dp_event.h" +#include "if/gre.h" #include "if_var.h" #include "in6.h" #include "in6_var.h" @@ -130,16 +130,19 @@ #include "main.h" #include "netinet6/ip6_mroute.h" #include "pd_show.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "route_flags.h" #include "snmp_mib.h" #include "urcu.h" #include "util.h" #include "vplane_debug.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" #include "fal.h" #include "ip_mcast_fal_interface.h" +#include "pl_common.h" +#include "pl_fused.h" +#include "npf.h" /* * Multicast packets are punted to the slow path when they cannot be @@ -156,11 +159,7 @@ static struct rte_meter_srtcm_params mfc_meter_params = { .ebs = PUNT_FUZZ /* effectively zero */ }; -#ifdef HAVE_RTE_METER_SRTCM_PROFILE_CONFIG static struct rte_meter_srtcm_profile mfc_meter_profile; -#endif - -static struct cds_lfht *mif6table; #define UPCALL_TIMER 1 #ifdef UPCALL_TIMER @@ -168,10 +167,10 @@ static struct rte_timer expire_upcalls_ch; static void expire_upcalls(struct rte_timer *rtetm, void *arg); #endif static struct rte_timer mrt6_stats_timer; -static void mrt6_stats(struct rte_timer *, void *arg); +static void mrt6_stats(struct rte_timer *rtetm, void *arg); -static int ip6_mdq(struct mcast6_vrf *, struct rte_mbuf *, - struct ifnet *, struct mf6c *); +static int ip6_mdq(struct mcast6_vrf *mvrf6, struct rte_mbuf *m, + struct ifnet *in_ifp, struct mf6c *rt); static void expire_mf6c(struct vrf *vrf, struct mf6c *rt); static void sg6_cnt_update(struct vrf *vrf, struct mf6c *rt, bool last_mfc_deletion); @@ -220,7 +219,7 @@ static void rt6_show_subset(struct vrf *vrf, struct mf6c *rt, void *arg) subset->vrf = vrf->v_id; jsonw_start_object(subset->json); jsonw_uint_field(subset->json, "vrf_id", - vrf_get_external_id(vrf->v_id)); + dp_vrf_get_external_id(vrf->v_id)); jsonw_end_object(subset->json); } @@ -335,7 +334,7 @@ static struct mf6c *mf6c_find(struct mcast6_vrf *mvrf6, } /* - * This MUST be called with a rcu_read_lock and only unlocked after mif6 is + * This MUST be called with a dp_rcu_read_lock and only unlocked after mif6 is * no longer used. */ struct mif6* get_mif_by_ifindex(unsigned int ifindex) @@ -343,8 +342,18 @@ struct mif6* get_mif_by_ifindex(unsigned int ifindex) struct mif6 *mifp = NULL; struct cds_lfht_iter iter; struct cds_lfht_node *retnode; + struct ifnet *ifp = dp_ifnet_byifindex(ifindex); + struct vrf *vrf; + + if (!ifp) + return NULL; - cds_lfht_lookup(mif6table, ifindex, mif6_match, &ifindex, &iter); + vrf = vrf_get_rcu(if_vrfid(ifp)); + if (!vrf) + return NULL; + + cds_lfht_lookup(vrf->v_mvrf6.mif6table, ifindex, mif6_match, &ifindex, + &iter); retnode = cds_lfht_iter_get_node(&iter); if (retnode) mifp = caa_container_of(retnode, struct mif6, node); @@ -352,7 +361,7 @@ struct mif6* get_mif_by_ifindex(unsigned int ifindex) return mifp; } -void mrt6_purge(struct ifnet *ifp) +static void mrt6_purge(struct ifnet *ifp) { struct mif6 *mifp; struct mf6c *rt; @@ -387,12 +396,12 @@ void mrt6_purge(struct ifnet *ifp) "%s is input interface so delete MFC.", ifp->if_name); expire_mf6c(vrf, rt); - } else if (IF_ISSET(mifp->m6_if_index, &rt->mf6c_ifset)) { + } else if (IF_ISSET(mifp->m6_mif_index, &rt->mf6c_ifset)) { mfc6_debug(vrf->v_id, &rt->mf6c_origin, &rt->mf6c_mcastgrp, "Removing %s from olist.", ifp->if_name); - IF_CLR(mifp->m6_if_index, &rt->mf6c_ifset); + IF_CLR(mifp->m6_mif_index, &rt->mf6c_ifset); } } del_m6if(mifp->m6_if_index); @@ -404,8 +413,22 @@ void mrt6_purge(struct ifnet *ifp) int add_m6if(mifi_t ifindex) { struct mif6 *mifp; - struct ifnet *ifp; + struct ifnet *ifp = dp_ifnet_byifindex(ifindex); struct cds_lfht_node *retnode; + struct cds_lfht *mif6table; + unsigned char mif6_index; + struct vrf *vrf; + + if (!ifp) { + DP_DEBUG(MULTICAST, ERR, MCAST, + "Failure adding IPv6 MIF index %d.\n", ifindex); + return -EINVAL; + } + + vrf = vrf_get_rcu(if_vrfid(ifp)); + + if (!vrf) + return -EINVAL; if (ifindex <= 0) return -EINVAL; @@ -413,36 +436,44 @@ int add_m6if(mifi_t ifindex) if (get_mif_by_ifindex(ifindex)) return -EEXIST; - DP_DEBUG(MULTICAST, INFO, MCAST, "Adding IPv6 VIF %s.\n", - ifnet_indextoname(ifindex)); + mif6table = vrf->v_mvrf6.mif6table; + if (!mif6table) + return -EINVAL; - mifp = malloc(sizeof(struct mif6)); - if (!mifp) - return -ENOMEM; - memset(mifp, 0, sizeof(*mifp)); + if (mcast_iftable_get_free_slot(&vrf->v_mvrf6.mf6c_ifset, ifindex, + &mif6_index) != 0) + return -EDQUOT; + + DP_DEBUG(MULTICAST, INFO, MCAST, "Adding IPv6 VIF to slot %d (%d).\n", + mif6_index, ifindex); - ifp = ifnet_byifindex(ifindex); + mifp = calloc(1, sizeof(struct mif6)); + if (!mifp) { + IF_CLR(mif6_index, &vrf->v_mvrf6.mf6c_ifset); + return -ENOMEM; + } mifp->m6_if_index = ifindex; + mifp->m6_mif_index = mif6_index; mifp->m6_ifp = ifp; mifp->m6_flags = VIFF_USE_IFINDEX; - mifp->m6_flags |= (ifp) ? 0:VIFF_REGISTER; - mifp->m6_flags |= - (ifp && is_tunnel(ifp)) ? VIFF_TUNNEL:0; + mifp->m6_flags |= (is_tunnel_pimreg(ifp)) ? VIFF_REGISTER:0; + mifp->m6_flags |= (ifp && is_tunnel(ifp)) ? VIFF_TUNNEL:0; cds_lfht_node_init(&mifp->node); retnode = cds_lfht_add_replace(mif6table, mifp->m6_if_index, mif6_match, &mifp->m6_if_index, &mifp->node); if (retnode) { mifp = caa_container_of(retnode, struct mif6, node); + IF_CLR(mifp->m6_mif_index, &vrf->v_mvrf6.mf6c_ifset); call_rcu(&mifp->rcu_head, mif6_free); } - if (ifp) { - ip6_mcast_fal_int_enable(mifp, mif6table); - if (!(ifp->if_flags & IFF_MULTICAST)) - return -EOPNOTSUPP; - if_allmulti(ifp, 1); - } + + ip6_mcast_fal_int_enable(mifp, mif6table); + if (!(ifp->if_flags & IFF_MULTICAST)) + return -EOPNOTSUPP; + if_allmulti(ifp, 1); + return 0; } @@ -452,9 +483,19 @@ static void update_mfc6_count(vrfid_t vrf_id, struct mf6c *rt, struct mif6 *mifp; struct cds_lfht_iter iter; int i; + struct vrf *vrf = vrf_get_rcu(vrf_id); + struct cds_lfht *mif6table; + + if (vrf == NULL) { + DP_DEBUG(MULTICAST, ERR, MCAST, "MFC invalid vrf ID %d\n", + vrf_id); + return; + } + + mif6table = vrf->v_mvrf6.mif6table; cds_lfht_for_each_entry(mif6table, &iter, mifp, node) { - i = mifp->m6_if_index; + i = mifp->m6_mif_index; if (IF_ISSET(i, &rt->mf6c_ifset) != IF_ISSET(i, &mfccp->mf6cc_ifset)) { @@ -507,16 +548,20 @@ static void update_mfc6_params(vrfid_t vrf_id, struct mf6c *rt, { int controller = 0; struct mif6 *mifp; + struct vrf *vrf = vrf_get_rcu(vrf_id); struct cds_lfht_iter iter; int i; + if (!vrf) + return; + debug_update_mfc6_params(vrf_id, rt, mfccp); rt->mf6c_parent = mfccp->mf6cc_parent; rt->mf6c_ifset = mfccp->mf6cc_ifset; - cds_lfht_for_each_entry(mif6table, &iter, mifp, node) { - i = mifp->m6_if_index; + cds_lfht_for_each_entry(vrf->v_mvrf6.mif6table, &iter, mifp, node) { + i = mifp->m6_mif_index; if (!IF_ISSET(i, &rt->mf6c_ifset)) continue; @@ -564,7 +609,16 @@ static void update_mfc6_params(vrfid_t vrf_id, struct mf6c *rt, int del_m6if(mifi_t mifi) { struct mif6 *mifp; - struct ifnet *ifp; + struct ifnet *ifp = dp_ifnet_byifindex(mifi); + struct vrf *vrf; + + if (!ifp) + return -EINVAL; + + vrf = vrf_get_rcu(if_vrfid(ifp)); + + if (!vrf) + return -EINVAL; mifp = get_mif_by_ifindex(mifi); if (mifp == NULL) @@ -574,12 +628,11 @@ int del_m6if(mifi_t mifi) "Deleting IPv6 VIF %s.\n", ifnet_indextoname(mifi)); - ifp = mifp->m6_ifp; - if (ifp) - if_allmulti(ifp, 0); + if_allmulti(ifp, 0); - if (!cds_lfht_del(mif6table, &mifp->node)) { - ip6_mcast_fal_int_disable(mifp, mif6table); + IF_CLR(mifp->m6_mif_index, &vrf->v_mvrf6.mf6c_ifset); + if (!cds_lfht_del(vrf->v_mvrf6.mif6table, &mifp->node)) { + ip6_mcast_fal_int_disable(mifp, vrf->v_mvrf6.mif6table); call_rcu(&mifp->rcu_head, mif6_free); } @@ -613,14 +666,10 @@ static bool init_m6fc_params(vrfid_t vrf_id, struct mf6c *rt, update_mfc6_count(vrf_id, rt, mfccp); update_mfc6_params(vrf_id, rt, mfccp); -#ifdef HAVE_RTE_METER_SRTCM_PROFILE_CONFIG ret = rte_meter_srtcm_profile_config(&mfc_meter_profile, &mfc_meter_params); if (ret == 0) ret = rte_meter_srtcm_config(&rt->meter, &mfc_meter_profile); -#else - ret = rte_meter_srtcm_config(&rt->meter, &mfc_meter_params); -#endif if (ret != 0) { RTE_LOG(NOTICE, MCAST, "Failure configuring metering algorithm; pkts will not be punted to slow path (Err = %d)\n", @@ -646,6 +695,10 @@ ip6_mroute_add_fal_objects(vrfid_t vrf_id, struct vmf6cctl *mfccp, { enum pd_obj_state old_pd_state; int rc; + struct vrf *vrf = vrf_get_rcu(vrf_id); + + if (!vrf) + return; old_pd_state = rt->mfc_pd_state; @@ -654,7 +707,7 @@ ip6_mroute_add_fal_objects(vrfid_t vrf_id, struct vmf6cctl *mfccp, "Updating FAL object 0x%lx for mroute", rt->mf6c_fal_obj); rc = fal_ip6_upd_mroute(rt->mf6c_fal_obj, rt, mfccp, - mif6table); + vrf->v_mvrf6.mif6table); if (rc && rc != -EOPNOTSUPP) mfc6_debug(vrf_id, &rt->mf6c_origin, &rt->mf6c_mcastgrp, @@ -666,7 +719,8 @@ ip6_mroute_add_fal_objects(vrfid_t vrf_id, struct vmf6cctl *mfccp, &rt->mf6c_mcastgrp, "Creating FAL objects for mroute"); - rc = fal_ip6_new_mroute(vrf_id, mfccp, rt, mif6table); + rc = fal_ip6_new_mroute(vrf_id, mfccp, rt, + vrf->v_mvrf6.mif6table); if (rc && rc != -EOPNOTSUPP) mfc6_debug(vrf_id, &rt->mf6c_origin, &rt->mf6c_mcastgrp, @@ -799,21 +853,19 @@ int del_m6fc(vrfid_t vrf_id, struct vmf6cctl *mfccp) */ static bool ip6_punt_rate_limit(struct mf6c *rt) { - enum rte_meter_color color; + enum rte_color color; color = rte_meter_srtcm_color_blind_check(&rt->meter, -#ifdef HAVE_RTE_METER_SRTCM_PROFILE_CONFIG &mfc_meter_profile, -#endif rte_rdtsc(), PUNT_1PKT); - if (color != e_RTE_METER_RED) { + if (color != RTE_COLOR_RED) { rt->mf6c_punted++; return false; - } else { - rt->mf6c_punts_dropped++; - return true; } + + rt->mf6c_punts_dropped++; + return true; } /* @@ -914,8 +966,8 @@ static int ip6_mforward(vrfid_t vrf_id, struct mcast6_vrf *mvrf6, #ifdef UPCALL_TIMER /* Clean up cache entries if upcalls are not serviced */ -static void expire_upcalls(__attribute__((unused)) struct rte_timer *rtetm, - __attribute__((unused)) void *arg) +static void expire_upcalls(__unused struct rte_timer *rtetm, + __unused void *arg) { struct mf6c *mfc; struct cds_lfht_iter iter; @@ -924,7 +976,7 @@ static void expire_upcalls(__attribute__((unused)) struct rte_timer *rtetm, VRF_FOREACH(vrf, vrf_id) { struct mcast6_vrf *mvrf6 = &vrf->v_mvrf6; - rcu_read_lock(); + dp_rcu_read_lock(); cds_lfht_for_each_entry(mvrf6->mf6ctable, &iter, mfc, node) { /* Skip real cache entries. Make sure it wasn't * marked to not expire (shouldn't happen) @@ -939,131 +991,113 @@ static void expire_upcalls(__attribute__((unused)) struct rte_timer *rtetm, MRT6STAT_INC(mvrf6, mrt6s_cache_cleanups); } } - rcu_read_unlock(); + dp_rcu_read_unlock(); } } #endif -static int mcast6_ethernet_send(struct mif6 *mifp, struct rte_mbuf *m, - struct ifnet *in_ifp) -{ - struct ifnet *ifp = mifp->m6_ifp; - struct ip6_hdr *ip6 = ip6hdr(m); - struct ether_hdr *eth_hdr; - mcast_dst_eth_addr_t eth_daddr; - - if (unlikely(rte_pktmbuf_pkt_len(m) > ifp->if_mtu)) - return ICMP6_PACKET_TOO_BIG; - - /* - * Time to decrement ttl since packet is being forwarded, not - * just punted. It was previously tested to ensure it is greater - * than 1 so there is no need to test for ttl expire here. - */ - ip6->ip6_hlim--; - - eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); - eth_daddr = mcast6_dst_eth_addr(&ip6->ip6_dst); - ether_addr_copy(ð_daddr.as_addr, ð_hdr->d_addr); - ether_addr_copy(&ifp->eth_addr, ð_hdr->s_addr); - - if_output(ifp, m, in_ifp, ETH_P_IPV6); - return 0; -} - static void mcast6_tunnel_send(struct ifnet *in_ifp, struct mif6 *out_mifp, struct rte_mbuf *m, int plen) { struct ifnet *out_ifp; - struct vrf *vrf; - struct ip6_hdr *ip6; struct mcast_mgre_tun_walk_ctx mgre_tun_walk_ctx; out_ifp = out_mifp->m6_ifp; - ip6 = ip6hdr(m); - switch (out_ifp->if_type) { - case IFT_TUNNEL_GRE: - ip6->ip6_hlim--; + /* Call GRE API which will invoke specified callback + * for each end point in P2P or P2MP tunnel + */ + mgre_tun_walk_ctx.proto = ETH_P_IPV6; + mgre_tun_walk_ctx.mbuf = m; + mgre_tun_walk_ctx.in_ifp = in_ifp; + mgre_tun_walk_ctx.pkt_len = plen; + mgre_tun_walk_ctx.out_vif = out_mifp; + mgre_tun_walk_ctx.hdr_len = sizeof(struct ip6_hdr); + gre_tunnel_peer_walk(out_ifp, mcast_mgre_tunnel_endpoint_send, + &mgre_tun_walk_ctx); + /* + * Decrement ref count on original mbuf as new mbuf + * was transmitted in replication loop. + */ + rte_pktmbuf_free(m); +} - /* Call GRE API which will invoke specified callback - * for each end point in P2P or P2MP tunnel - */ - mgre_tun_walk_ctx.proto = ETH_P_IPV6; - mgre_tun_walk_ctx.mbuf = m; - mgre_tun_walk_ctx.in_ifp = in_ifp; - mgre_tun_walk_ctx.pkt_len = plen; - mgre_tun_walk_ctx.out_vif = out_mifp; - mgre_tun_walk_ctx.hdr_len = sizeof(struct ip6_hdr); - gre_tunnel_peer_walk(out_ifp, - mcast_mgre_tunnel_endpoint_send, - &mgre_tun_walk_ctx); - /* - * Decrement ref count on original mbuf as new mbuf - * was transmitted in replication loop. - */ - rte_pktmbuf_free(m); - return; +/* + * Packet transmission routine for VIF in olist. Select appropriate send + * function based on underlying interface type. + */ +static void mif6_send(struct ifnet *in_ifp, struct mif6 *out_mifp, + struct rte_mbuf *m, int plen) +{ + struct ifnet *out_ifp = out_mifp->m6_ifp; - case IFT_TUNNEL_VTI: - ip6->ip6_hlim--; - out_mifp->m6_pkt_out++; - out_mifp->m6_bytes_out += plen; - IP6STAT_INC_IFP(out_ifp, IPSTATS_MIB_OUTMCASTPKTS); - vti_tunnel_out(in_ifp, out_ifp, m, ETH_P_IPV6); - return; - default: - /* - * Punt for any tunnels unsupported in data plane. - * Note that if packet successfully switched out - * of some other interfaces in the olist in the - * data plane, a duplicate packet may be sent out - * of these interfaces by the kernel. Essentially, - * as things stand, option is potentially duplicate - * packets on some interfaces or fail to transmit - * packets on other interfaces in the olist. - */ - mcast_ip6_deliver(in_ifp, m); - vrf = vrf_get_rcu(if_vrfid(in_ifp)); + /* + * Punt for any tunnels unsupported in data plane. + * + * Note that if a packet is successfully switched out of some + * other interfaces in the olist in the data plane, a duplicate + * packet may be sent out of these interfaces by the kernel. + * Essentially, as things stand, the option is to potentially + * duplicate packets on some interfaces or fail to transmit + * packets on other interfaces in the olist. + */ + if (unlikely(out_ifp->if_type == IFT_TUNNEL_OTHER)) { + struct vrf *vrf = vrf_get_rcu(if_vrfid(in_ifp)); if (vrf) { struct mcast6_vrf *mvrf6 = &vrf->v_mvrf6; MRT6STAT_INC(mvrf6, mrt6s_slowpath); } out_mifp->m6_pkt_out_punt++; out_mifp->m6_bytes_out_punt += plen; + mcast_ip6_deliver(in_ifp, m); + return; } -} -/* - * Packet transmission routine for VIF in olist. Select appropriate send - * function based on underlying interface type. - */ -static void mif6_send(struct ifnet *in_ifp, struct mif6 *out_mifp, - struct rte_mbuf *m, int plen) -{ - struct vrf *vrf; + struct ip6_hdr *ip6 = ip6hdr(m); - if (unlikely(out_mifp->m6_flags & VIFF_TUNNEL)) { + /* + * Time to decrement ttl since packet is being forwarded, not + * just punted. It was previously tested to ensure it is greater + * than 1 so there is no need to test for ttl expire here. + */ + ip6->ip6_hlim--; + + if (unlikely(out_ifp->if_type == IFT_TUNNEL_GRE && + !(out_ifp->if_flags & IFF_NOARP))) { mcast6_tunnel_send(in_ifp, out_mifp, m, plen); return; } - if (mcast6_ethernet_send(out_mifp, m, in_ifp) == ICMP6_PACKET_TOO_BIG) { - vrf = vrf_get_rcu(if_vrfid(in_ifp)); - if (vrf) { - struct mcast6_vrf *mvrf6 = &vrf->v_mvrf6; - MRT6STAT_INC(mvrf6, mrt6s_pkttoobig); - } - icmp6_error(in_ifp, m, ICMP6_PACKET_TOO_BIG, - 0, htonl(out_mifp->m6_ifp->if_mtu)); - } + /* OIL replication counts */ + out_mifp->m6_pkt_out++; + out_mifp->m6_bytes_out += plen; + + /* + * Send the packet down the pipeline graph. + */ + struct next_hop nh = { + .flags = RTF_MULTICAST, + .u.ifp = out_ifp, + }; + struct pl_packet pl_pkt = { + .mbuf = m, + .l2_pkt_type = pkt_mbuf_get_l2_traffic_type(m), + .l3_hdr = ip6, + .in_ifp = in_ifp, + .out_ifp = out_ifp, + .nxt.v6 = &nh, + .l2_proto = ETH_P_IPV6, + .npf_flags = NPF_FLAG_CACHE_EMPTY, + }; + + pipeline_fused_ipv6_out(&pl_pkt); } /* * Packet forwarding routine once entry in the cache is made */ static int ip6_mdq(struct mcast6_vrf *mvrf6, struct rte_mbuf *m, - struct ifnet *ifp, struct mf6c *rt) + struct ifnet *in_ifp, struct mf6c *rt) { struct ip6_hdr *ip6 = ip6hdr(m); struct mif6 *mifp; @@ -1074,7 +1108,7 @@ static int ip6_mdq(struct mcast6_vrf *mvrf6, struct rte_mbuf *m, /* Don't forward if it didn't arrive on parent mif* for its origin. */ mifp = get_mif_by_ifindex(rt->mf6c_parent); - if (mifp == NULL || mifp->m6_if_index != ifp->if_index) { + if (mifp == NULL || mifp->m6_if_index != in_ifp->if_index) { /* if wrong iif */ MRT6STAT_INC(mvrf6, mrt6s_wrong_if); rt->mf6c_wrong_if++; @@ -1083,18 +1117,15 @@ static int ip6_mdq(struct mcast6_vrf *mvrf6, struct rte_mbuf *m, if (ip6_punt_rate_limit(rt)) { MRT6STAT_INC(mvrf6, mrt6s_upq_ovflw); return RTF_BLACKHOLE; - } else { - return RTF_SLOWPATH; } + return RTF_SLOWPATH; } /* Rate limit this punted packet */ if (rt->mf6c_controller) { - if (ip6_punt_rate_limit(rt)) { + if (ip6_punt_rate_limit(rt)) return RTF_BLACKHOLE; - } else { - return RTF_SLOWPATH; - } + return RTF_SLOWPATH; } /* @@ -1104,7 +1135,7 @@ static int ip6_mdq(struct mcast6_vrf *mvrf6, struct rte_mbuf *m, */ if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) || IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst)) { - IP6STAT_INC_IFP(ifp, IPSTATS_MIB_INADDRERRORS); + IP6STAT_INC_IFP(in_ifp, IPSTATS_MIB_INADDRERRORS); return RTF_BLACKHOLE; } @@ -1116,8 +1147,8 @@ static int ip6_mdq(struct mcast6_vrf *mvrf6, struct rte_mbuf *m, * Drop packets not on loopback interface that have a loopback source * or destination address. */ - if (in6_setscope(&ip6->ip6_src, ifp, &iszone) || - in6_setscope(&ip6->ip6_dst, ifp, &idzone)) + if (in6_setscope(&ip6->ip6_src, in_ifp, &iszone) || + in6_setscope(&ip6->ip6_dst, in_ifp, &idzone)) return RTF_REJECT; mifp->m6_pkt_in++; @@ -1132,22 +1163,25 @@ static int ip6_mdq(struct mcast6_vrf *mvrf6, struct rte_mbuf *m, if (!md) return -ENOBUFS; - rte_pktmbuf_adj(md, pktmbuf_l2_len(md) + sizeof(struct ip6_hdr)); + rte_pktmbuf_adj(md, dp_pktmbuf_l2_len(md) + sizeof(struct ip6_hdr)); /* For each mif, forward a copy of the packet if there are group * members downstream on the interface. */ - cds_lfht_for_each_entry(mif6table, &iter, mifp, node) { - if (IF_ISSET(mifp->m6_if_index, &rt->mf6c_ifset)) { - mifp->m6_pkt_out++; - mifp->m6_bytes_out += plen; - if (!mifp->m6_ifp) + cds_lfht_for_each_entry(mvrf6->mif6table, &iter, mifp, node) { + if (IF_ISSET(mifp->m6_mif_index, &rt->mf6c_ifset)) { + struct ifnet *out_ifp = mifp->m6_ifp; + + if (!out_ifp) + continue; + const bool if_up = (out_ifp->if_flags & IFF_UP); + if (!if_up) continue; mh = mcast_create_l2l3_header(m, md, sizeof(struct ip6_hdr)); if (mh) { /* send the newly created packet chain */ - mif6_send(ifp, mifp, mh, plen); + mif6_send(in_ifp, mifp, mh, plen); } else { rte_pktmbuf_free(md); return -ENOBUFS; @@ -1167,7 +1201,6 @@ static void sg6_cnt_update(struct vrf *vrf, struct mf6c *rt, { struct sioc_sg_req6 sr; uint32_t flags = 0; - enum fal_ip_mcast_entry_stat_type cntr_ids[] = { FAL_IP_MCAST_GROUP_STAT_IN_PACKETS, FAL_IP_MCAST_GROUP_STAT_IN_OCTETS @@ -1175,6 +1208,8 @@ static void sg6_cnt_update(struct vrf *vrf, struct mf6c *rt, uint64_t cntrs[ARRAY_SIZE(cntr_ids)]; int ret; + memset(&sr, 0, sizeof(sr)); + ret = fal_ip_mcast_get_stats(rt->mf6c_fal_obj, ARRAY_SIZE(cntr_ids), &cntr_ids[0], &cntrs[0]); if (ret < 0) { @@ -1206,7 +1241,7 @@ static void sg6_cnt_update(struct vrf *vrf, struct mf6c *rt, flags = 1; } - send_sg6_cnt(&sr, vrf_get_external_id(vrf->v_id), flags); + send_sg6_cnt(&sr, dp_vrf_get_external_id(vrf->v_id), flags); } /* @@ -1228,8 +1263,8 @@ static void sg6_cnt_dump(void) } } -static void mrt6_stats(__attribute__((unused)) struct rte_timer *rtetm, - __attribute__((unused)) void *arg) +static void mrt6_stats(__unused struct rte_timer *rtetm, + __unused void *arg) { sg6_cnt_dump(); } @@ -1261,8 +1296,9 @@ void mrt6_dump(FILE *f, struct vrf *vrf) olist_index = 0; olist_buf[olist_index] = '\0'; - cds_lfht_for_each_entry(mif6table, &iter_mif, mifp, node) { - if (IF_ISSET(mifp->m6_if_index, &mfc->mf6c_ifset)) { + cds_lfht_for_each_entry(vrf->v_mvrf6.mif6table, &iter_mif, + mifp, node) { + if (IF_ISSET(mifp->m6_mif_index, &mfc->mf6c_ifset)) { olist_index += snprintf(olist_buf + olist_index, sizeof(olist_buf) - @@ -1372,7 +1408,7 @@ void mrt6_stat(FILE *f, struct vrf *vrf) jsonw_destroy(&wr); } -void mvif6_dump(FILE *f, __attribute__((unused)) struct vrf *vrf) +void mvif6_dump(FILE *f, __unused struct vrf *vrf) { struct cds_lfht_iter iter; struct mif6 *mifp; @@ -1385,12 +1421,12 @@ void mvif6_dump(FILE *f, __attribute__((unused)) struct vrf *vrf) jsonw_name(wr, "mif6"); jsonw_start_array(wr); - cds_lfht_for_each_entry(mif6table, &iter, mifp, node) { + cds_lfht_for_each_entry(vrf->v_mvrf6.mif6table, &iter, mifp, node) { if (mifp->m6_flags) { jsonw_start_object(wr); jsonw_string_field(wr, "interface", mifp->m6_ifp ? mifp->m6_ifp->if_name : "non-vplane"); - jsonw_int_field(wr, "if_index", mifp->m6_if_index); + jsonw_int_field(wr, "if_index", mifp->m6_mif_index); jsonw_int_field(wr, "flags", mifp->m6_flags); jsonw_uint_field(wr, "pkt_in", mifp->m6_pkt_in); jsonw_uint_field(wr, "pkt_out", mifp->m6_pkt_out); @@ -1469,6 +1505,17 @@ int mcast6_vrf_init(struct vrf *vrf) vrf->v_mvrf6.v_fal_rpf_lst = NULL; vrf->v_mvrf6.mf6ctable = mf6ctable; + + vrf->v_mvrf6.mif6table = cds_lfht_new(MFC_MAX_MVIFS, MFC_MAX_MVIFS, + MFC_MAX_MVIFS, CDS_LFHT_ACCOUNTING, + NULL); + if (!vrf->v_mvrf6.mif6table) { + RTE_LOG(ERR, MCAST, + "%s: cds_lfht_new mif6table failed vrf %s\n", __func__, + vrf->v_name); + return -1; + } + memset(&(vrf->v_mvrf6.mf6c_ifset), 0, sizeof(struct if_set)); return 0; } @@ -1485,19 +1532,17 @@ void mcast6_vrf_uninit(struct vrf *vrf) dp_ht_destroy_deferred(vrf->v_mvrf6.mf6ctable); vrf->v_mvrf6.mf6ctable = NULL; + + dp_ht_destroy_deferred(vrf->v_mvrf6.mif6table); + vrf->v_mvrf6.mif6table = NULL; + } -int mcast_stop_ipv6(void) +static void mcast_stop_ipv6(void) { #ifdef UPCALL_TIMER rte_timer_stop(&expire_upcalls_ch); #endif - - if (cds_lfht_destroy(mif6table, NULL)) - RTE_LOG(ERR, MCAST, - "Destroying IPv6 VIF table failed.\n"); - - return 0; } static void expire_mf6c(struct vrf *vrf, struct mf6c *rt) @@ -1530,12 +1575,9 @@ static void expire_mf6c(struct vrf *vrf, struct mf6c *rt) call_rcu(&rt->rcu_head, mf6c_free); } -void mcast_init_ipv6(void) +static void mcast_init_ipv6(void) { - mif6table = cds_lfht_new(MFC_MAX_MVIFS, MFC_MAX_MVIFS, MFC_MAX_MVIFS, - CDS_LFHT_ACCOUNTING, NULL); - if (!mif6table) - rte_panic("mcast_init_ipv6: cds_lfht_new failed\n"); + #ifdef UPCALL_TIMER rte_timer_init(&expire_upcalls_ch); rte_timer_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT, PERIODICAL, @@ -1545,3 +1587,26 @@ void mcast_init_ipv6(void) rte_timer_reset(&mrt6_stats_timer, SG_CNT_INTERVAL, PERIODICAL, rte_get_master_lcore(), mrt6_stats, NULL); } + +static void mrt6_if_delete(struct ifnet *ifp) +{ + del_m6if(ifp->if_index); +} + +static void mrt6_if_admin_status_change(struct ifnet *ifp, bool up) +{ + /* not interested in admin-up events */ + if (up) + return; + + mrt6_purge(ifp); +} + +static const struct dp_event_ops ip6_mroute_events = { + .init = mcast_init_ipv6, + .uninit = mcast_stop_ipv6, + .if_delete = mrt6_if_delete, + .if_admin_status_change = mrt6_if_admin_status_change, +}; + +DP_STARTUP_EVENT_REGISTER(ip6_mroute_events); diff --git a/src/netinet6/ip6_mroute.h b/src/netinet6/ip6_mroute.h index 853b81bf..91e78730 100644 --- a/src/netinet6/ip6_mroute.h +++ b/src/netinet6/ip6_mroute.h @@ -97,6 +97,8 @@ struct mrt6stat { struct mcast6_vrf { struct cds_lfht *mf6ctable; struct mrt6stat stat; + struct cds_lfht *mif6table; + struct if_set mf6c_ifset; fal_object_t v_fal_obj; /* fal object */ fal_object_t v_fal_rpf; /* fal rpf group object */ struct fal_object_list_t *v_fal_rpf_lst;/* fal rpf members object */ @@ -114,6 +116,7 @@ struct mif6 { unsigned char m6_flags; /* MIFF flags defined above */ struct ifnet *m6_ifp; /* pointer to interface */ unsigned int m6_if_index; /* interface device index */ + unsigned char m6_mif_index; /* per-vrf mif index */ uint64_t m6_pkt_in; /* # pkts in on interface */ uint64_t m6_pkt_out; /* # pkts out on interface */ uint64_t m6_pkt_out_punt; /* # pkts punted at output */ diff --git a/src/netinet6/ip6_options.c b/src/netinet6/ip6_options.c index 39dd23ef..50940241 100644 --- a/src/netinet6/ip6_options.c +++ b/src/netinet6/ip6_options.c @@ -1,7 +1,7 @@ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. - * Copyright (c) 2017, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -68,9 +68,10 @@ #include #include +#include "compiler.h" #include "if_var.h" #include "ip6_funcs.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "snmp_mib.h" /* @@ -79,8 +80,9 @@ * which is necessary if the IPv6 header the and option header and IPv6 header * is not continuous in order to return an ICMPv6 error. */ -static int __attribute__((cold)) -ip6_unknown_opt(uint8_t *optp, struct rte_mbuf *m, struct ifnet *iif, int off) +static int __cold_func +ip6_unknown_opt(const uint8_t *optp, struct rte_mbuf *m, + struct ifnet *iif, int off) { struct ip6_hdr *ip6; @@ -197,7 +199,7 @@ ip6_hopopts_input(struct rte_mbuf *m, struct ifnet *iif, uint32_t *rtalertp) struct ip6_hbh *hbh; /* validation of the length of the header */ - l3_data_len = rte_pktmbuf_data_len(m) - pktmbuf_l2_len(m); + l3_data_len = rte_pktmbuf_data_len(m) - dp_pktmbuf_l2_len(m); if (l3_data_len - sizeof(struct ip6_hdr) < sizeof(*hbh)) { IP6STAT_INC(if_vrfid(iif), IPSTATS_MIB_INHDRERRORS); rte_pktmbuf_free(m); diff --git a/src/netinet6/nd6.h b/src/netinet6/nd6.h index c2f40f59..399a5ef2 100644 --- a/src/netinet6/nd6.h +++ b/src/netinet6/nd6.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2014-2016 by Brocade Communications Systems, Inc. * All rights reserved. */ diff --git a/src/netinet6/nd6_nbr.c b/src/netinet6/nd6_nbr.c index c6d8d387..29e2b23c 100644 --- a/src/netinet6/nd6_nbr.c +++ b/src/netinet6/nd6_nbr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2014-2016 by Brocade Communications Systems, Inc. * All rights reserved. */ @@ -65,9 +65,11 @@ #include "arp.h" #include "compiler.h" +#include "dp_event.h" #include "ether.h" #include "fal.h" #include "fal_plugin.h" +#include "if/macvlan.h" #include "if_ether.h" #include "if_llatbl.h" #include "if_var.h" @@ -75,10 +77,13 @@ #include "in6_var.h" #include "in_cksum.h" #include "ip6_funcs.h" +#include "lcore_sched.h" #include "main.h" #include "nd6.h" #include "nd6_nbr.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" +#include "protobuf.h" +#include "protobuf/NbrResConfig.pb-c.h" #include "snmp_mib.h" #include "urcu.h" #include "util.h" @@ -113,10 +118,22 @@ static const char *const nd6_dbgstate[] = { static const struct in6_addr in6addr_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; -static const char in6ether_allnodes[ETHER_ADDR_LEN] = { +static const char in6ether_allnodes[RTE_ETHER_ADDR_LEN] = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x01}; -static void nd6_log_conflict(struct ifnet *ifp, const struct ether_addr *lladdr, +static struct nd6_nbr_cfg nd6_cfg = { + .nd6_ns_retries = ND6_NS_RETRIES, + .nd6_reachable_time = ND6_REACHABLE_TIME, + .nd6_scavenge_time = ND6_SCAVENGE_TIME, + .nd6_delay_time = ND6_DELAY_TIME, + .nd6_max_entry = ND6_MAX_ENTRY, + .nd6_res_token = ND6_RES_TOKEN, + .nd6_unr_token = ND6_UNR_TOKEN, + .nd6_maxhold = ND6_MAXHOLD, +}; + +static void nd6_log_conflict(struct ifnet *ifp, + const struct rte_ether_addr *lladdr, struct in6_addr *saddr) { char b1[ETH_ADDR_STR_LEN]; @@ -222,7 +239,8 @@ nd6_option(union nd_opts *ndopts) /* option overruns the end of buffer, invalid */ memset(ndopts, 0, sizeof(*ndopts)); return NULL; - } else if (ndopts->nd_opts_search == ndopts->nd_opts_last) { + } + if (ndopts->nd_opts_search == ndopts->nd_opts_last) { /* reached the end of options chain */ ndopts->nd_opts_done = 1; ndopts->nd_opts_search = NULL; @@ -325,28 +343,26 @@ nd6_change_state(struct ifnet *ifp, struct llentry *lle, uint8_t state, lle->ll_expire = rte_get_timer_cycles() + expire * rte_get_timer_hz(); } +/* should be called with la->ll_lock held */ static inline void nd6_update_lla(struct ifnet *ifp, struct llentry *la, - const struct ether_addr *enaddr) + const struct rte_ether_addr *enaddr) { - struct sockaddr_in6 *sin6 = satosin6(ll_sockaddr(la)); - struct fal_attribute_t dst_attr = { - FAL_NEIGH_ENTRY_ATTR_DST_MAC_ADDRESS, .value.mac = *enaddr }; char buf[ETH_ADDR_STR_LEN]; - char b[INET6_ADDRSTRLEN]; - int ret; ND6_DEBUG("%s/%s LLA %s\n", ifp->if_name, lladdr_ntop6(la), ether_ntoa_r(enaddr, buf)); ll_addr_set(la, enaddr); - ret = fal_ip6_upd_neigh(ifp->if_index, sin6, &dst_attr); - if (ret < 0) { - RTE_LOG(NOTICE, DATAPLANE, - "FAL neighbour MAC update for %s, %s failed: %s\n", - inet_ntop(AF_INET6, &sin6->sin6_addr, b, sizeof(b)), - ifp->if_name, strerror(-ret)); - } + la->la_flags |= LLE_HW_UPD_PENDING; + + /* + * Fire the timer for this table immediately on the main + * thread so that FAL updates can be issued. + */ + rte_timer_reset(&ifp->if_lltable6->lle_timer, 0, + SINGLE, rte_get_master_lcore(), + in6_lladdr_timer, ifp->if_lltable6); } /* @@ -355,7 +371,7 @@ nd6_update_lla(struct ifnet *ifp, struct llentry *la, */ static inline void nd6_entry_amend(struct ifnet *ifp, struct llentry *la, uint8_t state, - const struct ether_addr *enaddr, uint16_t secs, u_int flags) + const struct rte_ether_addr *enaddr, uint16_t secs, u_int flags) { struct lltable *llt = ifp->if_lltable6; @@ -373,7 +389,7 @@ nd6_entry_amend(struct ifnet *ifp, struct llentry *la, uint8_t state, /* * Valid entry, update MAC. */ - if (!ether_addr_equal(enaddr, &la->ll_addr)) + if (!rte_ether_addr_equal(enaddr, &la->ll_addr)) nd6_update_lla(ifp, la, enaddr); la->la_flags |= flags; @@ -384,7 +400,8 @@ nd6_entry_amend(struct ifnet *ifp, struct llentry *la, uint8_t state, /* * Invalid (incomplete) entry. */ - if (rte_atomic16_read(&llt->lle_restoken) < ND6_RES_TOKEN) + if (rte_atomic16_read(&llt->lle_restoken) < + nd6_cfg.nd6_res_token) rte_atomic16_inc(&llt->lle_restoken); nd6_update_lla(ifp, la, enaddr); @@ -402,16 +419,16 @@ nd6_entry_amend(struct ifnet *ifp, struct llentry *la, uint8_t state, for (i = 0; i < la->la_numheld; ++i) { struct rte_mbuf *m = la->la_held[i]; - struct ether_hdr *eh; + struct rte_ether_hdr *eh; if (!m) break; eh = rte_pktmbuf_mtod(m, - struct ether_hdr *); + struct rte_ether_hdr *); la->la_held[i] = NULL; - ether_addr_copy(enaddr, &eh->d_addr); + rte_ether_addr_copy(enaddr, &eh->d_addr); if_output(ifp, m, NULL, ntohs(eh->ether_type)); } la->la_numheld = 0; @@ -469,7 +486,7 @@ lla_match6(struct cds_lfht_node *node, const void *key) */ static struct llentry * nd6_create_valid(struct ifnet *ifp, const struct in6_addr *addr, - uint8_t state, const struct ether_addr *enaddr, + uint8_t state, const struct rte_ether_addr *enaddr, uint16_t secs, u_int flags) { struct lltable *llt = ifp->if_lltable6; @@ -485,8 +502,10 @@ nd6_create_valid(struct ifnet *ifp, const struct in6_addr *addr, char b[INET6_ADDRSTRLEN]; int ret; - if (rte_atomic16_read(&llt->lle_size) >= ND6_MAX_ENTRY) + if (rte_atomic32_read(&llt->lle_size) >= nd6_cfg.nd6_max_entry) { + ND6NBR_INC(tablimit); return NULL; + } lle = llentry_new(&sin6, sizeof(sin6), ifp); if (!lle) @@ -519,20 +538,33 @@ nd6_create_valid(struct ifnet *ifp, const struct in6_addr *addr, nd6_entry_amend(ifp, lle, ND6_LLINFO_REACHABLE, enaddr, secs, (LLE_VALID | flags)); } else { - rte_atomic16_inc(&llt->lle_size); - ret = fal_ip6_new_neigh(ifp->if_index, &sin6, - RTE_DIM(attr_list), attr_list); - if (ret < 0 && ret != -EOPNOTSUPP) { - RTE_LOG(NOTICE, DATAPLANE, - "FAL new neighbour for %s, %s failed: %s\n", - inet_ntop(AF_INET6, &sin6.sin6_addr, - b, sizeof(b)), - ifp->if_name, strerror(-ret)); + rte_atomic32_inc(&llt->lle_size); + if (is_main_thread() && if_is_features_mode_active( + lle->ifp, IF_FEAT_MODE_EVENT_L3_FAL_ENABLED)) { + ret = fal_ip6_new_neigh(ifp->if_index, ifp->fal_l3, + &sin6, + RTE_DIM(attr_list), attr_list); + if (ret < 0 && ret != -EOPNOTSUPP) { + RTE_LOG(NOTICE, DATAPLANE, + "FAL new neighbour for %s, %s failed: %s\n", + inet_ntop(AF_INET6, &sin6.sin6_addr, + b, sizeof(b)), + ifp->if_name, strerror(-ret)); + } + if (ret >= 0) { + rte_spinlock_lock(&lle->ll_lock); + lle->la_flags |= LLE_CREATED_IN_HW; + rte_spinlock_unlock(&lle->ll_lock); + } + } else { + rte_spinlock_lock(&lle->ll_lock); + lle->la_flags |= LLE_HW_UPD_PENDING; + rte_spinlock_unlock(&lle->ll_lock); } } /* - * Fire the timer for this table immediately on master. It + * Fire the timer for this table immediately on main. It * doesn't matter if it fails as it will get picked up on * the next firing in that case. */ @@ -547,13 +579,13 @@ nd6_create_valid(struct ifnet *ifp, const struct in6_addr *addr, * Send an NA packet */ static void -nd6_na_output(struct ifnet *ifp, const struct ether_addr *lladdr, +nd6_na_output(struct ifnet *ifp, const struct rte_ether_addr *lladdr, const struct in6_addr *daddr6, const struct in6_addr *taddr6, uint32_t flags, int tlladdr) { struct rte_mbuf *m; - struct ether_hdr *eh; + struct rte_ether_hdr *eh; struct ip6_hdr *ip6; struct nd_neighbor_advert *nd_na; const struct in6_addr *src; @@ -570,26 +602,27 @@ nd6_na_output(struct ifnet *ifp, const struct ether_addr *lladdr, return; } - pktmbuf_l2_len(m) = ETHER_HDR_LEN; + dp_pktmbuf_l2_len(m) = RTE_ETHER_HDR_LEN; paylen = sizeof(*nd_na); if (tlladdr) { - optlen = (sizeof(struct nd_opt_hdr) + ETHER_ADDR_LEN + 7) & ~7; + optlen = (sizeof(struct nd_opt_hdr) + + RTE_ETHER_ADDR_LEN + 7) & ~7; paylen += optlen; } pktlen = sizeof(*eh) + sizeof(*ip6) + paylen; - eh = (struct ether_hdr *)rte_pktmbuf_append(m, pktlen); - ether_addr_copy(&ifp->eth_addr, &eh->s_addr); + eh = (struct rte_ether_hdr *)rte_pktmbuf_append(m, pktlen); + rte_ether_addr_copy(&ifp->eth_addr, &eh->s_addr); if (lladdr) - ether_addr_copy(lladdr, &eh->d_addr); - eh->ether_type = htons(ETHER_TYPE_IPv6); + rte_ether_addr_copy(lladdr, &eh->d_addr); + eh->ether_type = htons(RTE_ETHER_TYPE_IPV6); ip6 = (struct ip6_hdr *)(eh + 1); ip6->ip6_flow = htonl(IPTOS_PREC_INTERNETCONTROL << 20); ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_nxt = IPPROTO_ICMPV6; - ip6->ip6_hlim = 255; + ip6->ip6_hlim = IPV6_ONLINK_HOPLIMIT; ip6->ip6_plen = htons((uint16_t)paylen); ip6->ip6_dst = *daddr6; @@ -618,7 +651,8 @@ nd6_na_output(struct ifnet *ifp, const struct ether_addr *lladdr, memset((void *)nd_opt, 0, optlen); nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; nd_opt->nd_opt_len = optlen >> 3; - memcpy((void *)(nd_opt + 1), &ifp->eth_addr, ETHER_ADDR_LEN); + memcpy((void *)(nd_opt + 1), + &ifp->eth_addr, RTE_ETHER_ADDR_LEN); } else { flags &= ~ND_NA_FLAG_OVERRIDE; } @@ -631,6 +665,9 @@ nd6_na_output(struct ifnet *ifp, const struct ether_addr *lladdr, lladdr ? 1 : 0); ND6NBR_INC(natx); + if (ip6_spath_filter(ifp, &m)) + return; + /* * Send NA. If we don't have dest MAC then resolve it */ @@ -652,13 +689,17 @@ nd6_ns_input(struct ifnet *ifp, struct rte_mbuf *m, unsigned int off, struct in6_addr daddr6 = ip6->ip6_dst; struct in6_addr taddr6; const struct in6_addr *dest; - const struct ether_addr *lladdr = NULL; + const struct rte_ether_addr *lladdr = NULL; union nd_opts ndopts; char buf0[INET6_ADDRSTRLEN], buf1[INET6_ADDRSTRLEN], buf2[INET6_ADDRSTRLEN]; uint32_t flags; int rc; bool punt = false; + struct ifnet *vrrp_ifp; + struct sockaddr_storage sock_storage; + struct sockaddr_in6 *ip_storage = + (struct sockaddr_in6 *) &sock_storage; ND6NBR_INC(nsrx); @@ -692,9 +733,9 @@ nd6_ns_input(struct ifnet *ifp, struct rte_mbuf *m, unsigned int off, goto bad; if (ndopts.nd_opts_src_lladdr) { - lladdr = (const struct ether_addr *) + lladdr = (const struct rte_ether_addr *) (ndopts.nd_opts_src_lladdr + 1); - if (!is_valid_assigned_ether_addr(lladdr)) { + if (!rte_is_valid_assigned_ether_addr(lladdr)) { char buf[ETH_ADDR_STR_LEN]; ND6_DEBUG("Bad MAC %s\n", ether_ntoa_r(lladdr, buf)); @@ -711,7 +752,11 @@ nd6_ns_input(struct ifnet *ifp, struct rte_mbuf *m, unsigned int off, * Check for us. Also detect impersonators */ rc = nd6_forus(ifp, &saddr6, &taddr6); - if (unlikely(rc != 0)) { + ip_storage->sin6_family = AF_INET6; + ip_storage->sin6_addr = taddr6; + vrrp_ifp = macvlan_get_vrrp_ip_if(ifp, + (struct sockaddr *)&sock_storage); + if (unlikely(rc != 0 && vrrp_ifp == NULL)) { if (rc == -EADDRINUSE) { nd6_log_conflict(ifp, lladdr, &saddr6); } else { @@ -737,13 +782,14 @@ nd6_ns_input(struct ifnet *ifp, struct rte_mbuf *m, unsigned int off, la = in6_lltable_lookup(ifp, 0, &saddr6); if (!la) { nd6_create_valid(ifp, &saddr6, ND6_LLINFO_STALE, - lladdr, ND6_SCAVENGE_TIME, 0); + lladdr, nd6_cfg.nd6_scavenge_time, 0); if (nd6_sync) punt = true; } else { - if (!ether_addr_equal(lladdr, &la->ll_addr)) { + if (!rte_ether_addr_equal(lladdr, &la->ll_addr)) { nd6_entry_amend(ifp, la, ND6_LLINFO_STALE, - lladdr, ND6_SCAVENGE_TIME, + lladdr, + nd6_cfg.nd6_scavenge_time, LLE_VALID); if (nd6_sync) punt = true; @@ -762,7 +808,7 @@ nd6_ns_input(struct ifnet *ifp, struct rte_mbuf *m, unsigned int off, */ if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) { dest = &in6addr_allnodes; - lladdr = (const struct ether_addr *)in6ether_allnodes; + lladdr = (const struct rte_ether_addr *)in6ether_allnodes; flags &= ~ND_NA_FLAG_SOLICITED; } else { dest = &saddr6; @@ -800,11 +846,11 @@ nd6_na_input(struct ifnet *ifp, struct rte_mbuf *m, int is_solicited; int is_override; int is_mcast; - const struct ether_addr *lladdr = NULL; + const struct rte_ether_addr *lladdr = NULL; int lladdrlen = 0; struct if_addr *ifa; union nd_opts ndopts; - int mac_addrlen = sizeof(struct ether_addr); + int mac_addrlen = sizeof(struct rte_ether_addr); char buf0[INET6_ADDRSTRLEN], buf1[INET6_ADDRSTRLEN], buf2[INET6_ADDRSTRLEN]; bool punt = false; @@ -844,10 +890,10 @@ nd6_na_input(struct ifnet *ifp, struct rte_mbuf *m, } if (ndopts.nd_opts_tgt_lladdr) { - lladdr = (const struct ether_addr *) + lladdr = (const struct rte_ether_addr *) (ndopts.nd_opts_tgt_lladdr + 1); lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3; - if (!is_valid_assigned_ether_addr(lladdr)) { + if (!rte_is_valid_assigned_ether_addr(lladdr)) { char buf[ETH_ADDR_STR_LEN]; ND6_DEBUG("Bad MAC %s\n", @@ -861,7 +907,7 @@ nd6_na_input(struct ifnet *ifp, struct rte_mbuf *m, */ ifa = in6ifa_ifpwithaddr(ifp, &taddr6); if (ifa) { - nd6_log_conflict(ifp, (const struct ether_addr *)lladdr, + nd6_log_conflict(ifp, (const struct rte_ether_addr *)lladdr, &taddr6); goto freeit; } @@ -903,7 +949,7 @@ nd6_na_input(struct ifnet *ifp, struct rte_mbuf *m, if (is_solicited) { state = ND6_LLINFO_REACHABLE; - time = ND6_REACHABLE_TIME; + time = nd6_cfg.nd6_reachable_time; } else { state = ND6_LLINFO_STALE; time = ND6_SCAVENGE_TIME; @@ -933,12 +979,12 @@ nd6_na_input(struct ifnet *ifp, struct rte_mbuf *m, (la->la_flags & LLE_CTRL); llchange = (lladdr && - !ether_addr_equal(lladdr, &la->ll_addr)); + !rte_ether_addr_equal(lladdr, &la->ll_addr)); if (!is_override && llchange) { if (la->la_state == ND6_LLINFO_REACHABLE) nd6_change_state(ifp, la, ND6_LLINFO_STALE, - ND6_SCAVENGE_TIME); + nd6_cfg.nd6_scavenge_time); rte_spinlock_unlock(&la->ll_lock); goto done; } @@ -948,7 +994,7 @@ nd6_na_input(struct ifnet *ifp, struct rte_mbuf *m, nd6_update_lla(ifp, la, lladdr); if (!is_solicited) nd6_change_state(ifp, la, ND6_LLINFO_STALE, - ND6_SCAVENGE_TIME); + nd6_cfg.nd6_scavenge_time); } if (is_solicited && (la->la_state != ND6_LLINFO_REACHABLE)) nd6_change_state(ifp, la, ND6_LLINFO_REACHABLE, @@ -978,10 +1024,10 @@ nd6_na_input(struct ifnet *ifp, struct rte_mbuf *m, static struct rte_mbuf * nd6_ns_build(struct ifnet *ifp, const struct in6_addr *res_src, const struct in6_addr *taddr6, - const struct ether_addr *dst_mac) + const struct rte_ether_addr *dst_mac) { struct rte_mbuf *m; - struct ether_hdr *eh; + struct rte_ether_hdr *eh; struct ip6_hdr *ip6; struct nd_neighbor_solicit *nd_ns; const struct in6_addr *src; @@ -999,21 +1045,21 @@ nd6_ns_build(struct ifnet *ifp, const struct in6_addr *res_src, return NULL; } - pktmbuf_l2_len(m) = ETHER_HDR_LEN; - optlen = (sizeof(struct nd_opt_hdr) + ETHER_ADDR_LEN + 7) & ~7; + dp_pktmbuf_l2_len(m) = RTE_ETHER_HDR_LEN; + optlen = (sizeof(struct nd_opt_hdr) + RTE_ETHER_ADDR_LEN + 7) & ~7; paylen = sizeof(*nd_ns) + optlen; pktlen = sizeof(*eh) + sizeof(*ip6) + paylen; - eh = (struct ether_hdr *)rte_pktmbuf_append(m, pktlen); - ether_addr_copy(&ifp->eth_addr, &eh->s_addr); - eh->ether_type = htons(ETHER_TYPE_IPv6); + eh = (struct rte_ether_hdr *)rte_pktmbuf_append(m, pktlen); + rte_ether_addr_copy(&ifp->eth_addr, &eh->s_addr); + eh->ether_type = htons(RTE_ETHER_TYPE_IPV6); ip6 = (struct ip6_hdr *)(eh + 1); ip6->ip6_flow = htonl(IPTOS_PREC_INTERNETCONTROL << 20); ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_nxt = IPPROTO_ICMPV6; - ip6->ip6_hlim = 255; + ip6->ip6_hlim = IPV6_ONLINK_HOPLIMIT; ip6->ip6_plen = htons((uint16_t)paylen); /* @@ -1022,7 +1068,7 @@ nd6_ns_build(struct ifnet *ifp, const struct in6_addr *res_src, */ if (dst_mac) { ip6->ip6_dst = *taddr6; - ether_addr_copy(dst_mac, &eh->d_addr); + rte_ether_addr_copy(dst_mac, &eh->d_addr); } else { ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL; ip6->ip6_dst.s6_addr16[1] = 0; @@ -1057,7 +1103,7 @@ nd6_ns_build(struct ifnet *ifp, const struct in6_addr *res_src, memset((void *)nd_opt, 0, optlen); nd_opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; nd_opt->nd_opt_len = optlen >> 3; - ether_addr_copy(&ifp->eth_addr, (void *)(nd_opt + 1)); + rte_ether_addr_copy(&ifp->eth_addr, (void *)(nd_opt + 1)); nd_ns->nd_ns_cksum = 0; nd_ns->nd_ns_cksum = @@ -1072,13 +1118,16 @@ nd6_ns_build(struct ifnet *ifp, const struct in6_addr *res_src, static void nd6_ns_output(struct ifnet *ifp, const struct in6_addr *res_src, const struct in6_addr *taddr6, - const struct ether_addr *dst_mac) + const struct rte_ether_addr *dst_mac) { struct rte_mbuf *m; m = nd6_ns_build(ifp, res_src, taddr6, dst_mac); - if (m) - if_output(ifp, m, NULL, ETH_P_IPV6); + + if (m) { + if (!ip6_spath_filter(ifp, &m)) + if_output(ifp, m, NULL, ETH_P_IPV6); + } } /* @@ -1087,7 +1136,7 @@ static void nd6_ns_output(struct ifnet *ifp, */ int nd6_resolve(struct ifnet *in_ifp, struct ifnet *ifp, struct rte_mbuf *m, const struct in6_addr *addr, - struct ether_addr *desten) + struct rte_ether_addr *desten) { struct lltable *llt = ifp->if_lltable6; struct llentry *la; @@ -1099,7 +1148,7 @@ int nd6_resolve(struct ifnet *in_ifp, struct ifnet *ifp, if (likely(la && (la->la_flags & LLE_VALID))) { resolved: rte_atomic16_clear(&la->ll_idle); - ether_addr_copy(&la->ll_addr, desten); + rte_ether_addr_copy(&la->ll_addr, desten); return 0; } @@ -1114,8 +1163,8 @@ int nd6_resolve(struct ifnet *in_ifp, struct ifnet *ifp, rte_pktmbuf_free(m); return -ENOMEM; } - if (unlikely(rte_atomic16_read(&llt->lle_size) >= - ND6_MAX_ENTRY)) { + if (unlikely(rte_atomic32_read(&llt->lle_size) >= + nd6_cfg.nd6_max_entry)) { ND6NBR_INC(tablimit); rte_pktmbuf_free(m); return -ENOMEM; @@ -1141,7 +1190,7 @@ int nd6_resolve(struct ifnet *in_ifp, struct ifnet *ifp, goto resolved; } - if (unlikely(la->la_flags == LLE_DELETED)) { + if (unlikely(la->la_flags & LLE_DELETED)) { rte_spinlock_unlock(&la->ll_lock); goto lookup; } @@ -1152,12 +1201,12 @@ int nd6_resolve(struct ifnet *in_ifp, struct ifnet *ifp, */ if (in_ifp) pktmbuf_save_ifp(m, in_ifp); - if (la->la_numheld >= ND6_MAXHOLD) { + if (la->la_numheld >= nd6_cfg.nd6_maxhold) { ND6NBR_INC(dropped); rte_pktmbuf_free(la->la_held[0]); memmove(&la->la_held[0], &la->la_held[1], - (ND6_MAXHOLD - 1) * sizeof(la->la_held[0])); - la->la_held[ND6_MAXHOLD - 1] = m; + (nd6_cfg.nd6_maxhold - 1) * sizeof(la->la_held[0])); + la->la_held[nd6_cfg.nd6_maxhold - 1] = m; } else { la->la_held[la->la_numheld++] = m; } @@ -1193,7 +1242,7 @@ int nd6_input(struct ifnet *ifp, struct rte_mbuf *m) if (ip6->ip6_nxt != IPPROTO_ICMPV6) return 1; - off = pktmbuf_l2_len(m) + sizeof(*ip6); + off = dp_pktmbuf_l2_len(m) + sizeof(*ip6); icmp6 = ip6_exthdr(m, off, sizeof(*icmp6)); if (unlikely(!icmp6)) { IP6STAT_INC(if_vrfid(ifp), IPSTATS_MIB_INDISCARDS); @@ -1211,7 +1260,7 @@ int nd6_input(struct ifnet *ifp, struct rte_mbuf *m) return 0; } - if (unlikely(ip6->ip6_hlim != 255)) { + if (unlikely(ip6->ip6_hlim != IPV6_ONLINK_HOPLIMIT)) { ND6NBR_INC(badpkt); rte_pktmbuf_free(m); return 0; @@ -1243,47 +1292,9 @@ int nd6_input(struct ifnet *ifp, struct rte_mbuf *m) } } -/* - * Check if the packet is an ND solicited NA. - * return true if solicited NA - */ -bool nd6_is_sol_na(struct rte_mbuf *m) -{ - struct ip6_hdr *ip6 = ip6hdr(m); - struct icmp6_hdr *icmp6; - unsigned int off, icmp6len; - struct nd_neighbor_advert *nd_na; - uint32_t flags; - - if (!ip6_valid_packet(m, ip6)) - return false; - - if (likely(ip6->ip6_nxt != IPPROTO_ICMPV6)) - return false; - - off = pktmbuf_l2_len(m) + sizeof(*ip6); - icmp6 = ip6_exthdr(m, off, sizeof(*icmp6)); - if (unlikely(!icmp6)) - return false; - - if (icmp6->icmp6_type != ND_NEIGHBOR_ADVERT) - return false; - - icmp6len = rte_pktmbuf_pkt_len(m) - off; - nd_na = ip6_exthdr(m, off, icmp6len); - if (!nd_na) - return false; - - flags = nd_na->nd_na_flags_reserved; - if ((flags & ND_NA_FLAG_SOLICITED) == 0) - return false; - - return true; -} - /* * Walk the ND6 table. - * Only called by console (master thread); + * Only called by console (main thread); * Can not be called safely from forwarding loop. */ void @@ -1314,18 +1325,17 @@ nd6_entry_destroy(struct lltable *llt, struct llentry *lle) * Update resolution tokens */ if (!(lle->la_flags & LLE_VALID) && - rte_atomic16_read(&llt->lle_restoken) < ND6_RES_TOKEN) + rte_atomic16_read(&llt->lle_restoken) < nd6_cfg.nd6_res_token) rte_atomic16_inc(&llt->lle_restoken); pkts_dropped = llentry_destroy(llt, lle); ND6NBR_ADD(dropped, pkts_dropped); - rte_atomic16_dec(&llt->lle_size); } /* * Fast link layer address lookup function for IPv6 - * Assumes rcu_read_lock + * Assumes dp_rcu_read_lock */ struct llentry * lla_lookup6(struct lltable *llt, const struct in6_addr *addr) @@ -1389,17 +1399,43 @@ in6_lltable_lookup(struct ifnet *ifp, u_int flags, llentry_free(lle); lle = caa_container_of(node, struct llentry, ll_node); } else { - rte_atomic16_inc(&llt->lle_size); - ret = fal_ip6_new_neigh(ifp->if_index, &sin6, - RTE_DIM(attr_list), attr_list); - if (ret < 0 && ret != -EOPNOTSUPP) { - RTE_LOG(NOTICE, DATAPLANE, - "FAL new neighbour for %s, %s failed: %s\n", - inet_ntop(AF_INET6, &sin6.sin6_addr, - b, sizeof(b)), - ifp->if_name, strerror(-ret)); + rte_atomic32_inc(&llt->lle_size); + if (is_main_thread() && if_is_features_mode_active( + lle->ifp, + IF_FEAT_MODE_EVENT_L3_FAL_ENABLED)) { + ret = fal_ip6_new_neigh(ifp->if_index, + ifp->fal_l3, &sin6, + RTE_DIM(attr_list), + attr_list); + if (ret < 0 && ret != -EOPNOTSUPP) { + RTE_LOG(NOTICE, DATAPLANE, + "FAL new neighbour for %s, %s failed: %s\n", + inet_ntop(AF_INET6, + &sin6.sin6_addr, + b, sizeof(b)), + ifp->if_name, strerror(-ret)); + } + if (ret >= 0) { + rte_spinlock_lock(&lle->ll_lock); + lle->la_flags |= LLE_CREATED_IN_HW; + rte_spinlock_unlock(&lle->ll_lock); + } + } else { + rte_spinlock_lock(&lle->ll_lock); + lle->la_flags |= LLE_HW_UPD_PENDING; + rte_spinlock_unlock(&lle->ll_lock); } + /* + * Fire the timer for this table immediately + * on main. It doesn't matter if it fails as + * it will get picked up on the next firing in + * that case. + */ + rte_timer_reset(&ifp->if_lltable6->lle_timer, 0, + SINGLE, rte_get_master_lcore(), + in6_lladdr_timer, ifp->if_lltable6); + /* * Count outstanding resolutions */ @@ -1408,13 +1444,14 @@ in6_lltable_lookup(struct ifnet *ifp, u_int flags, } } else if (flags & LLE_DELETE) { /* - * Only delete static or idle entries. + * Only delete static entries or stale entries that are idle. * Leave dynamic in-use entries to time out - kernel may * think they are stale but they may be in active use * by the dataplane. */ if ((lle->la_flags & LLE_STATIC) || - !llentry_has_been_used(lle)) { + ((lle->la_state == ND6_LLINFO_STALE) && + !llentry_has_been_used(lle))) { ND6_DEBUG("%s/%s Delete\n", ifp->if_name, ip6_sprintf(addr)); @@ -1429,12 +1466,12 @@ in6_lltable_lookup(struct ifnet *ifp, u_int flags, } /* - * Called from master thread + * Called from main thread * Handle ND cache change notification from control plane */ int nd6_lladdr_add(struct ifnet *ifp, struct in6_addr *addr, - const struct ether_addr *mac, uint16_t state, + const struct rte_ether_addr *mac, uint16_t state, uint8_t ntf_flags) { struct llentry *lle; @@ -1452,7 +1489,7 @@ nd6_lladdr_add(struct ifnet *ifp, struct in6_addr *addr, if (!(state & (NUD_PERMANENT | NUD_REACHABLE | NUD_FAILED))) return 0; - rcu_read_lock(); + dp_rcu_read_lock(); lle = in6_lltable_lookup(ifp, 0, addr); @@ -1460,7 +1497,7 @@ nd6_lladdr_add(struct ifnet *ifp, struct in6_addr *addr, flags = LLE_STATIC; } else if (state & NUD_REACHABLE) { flags = LLE_CTRL; - secs = ND6_REACHABLE_TIME; + secs = nd6_cfg.nd6_reachable_time; } if (state & NUD_FAILED) { @@ -1487,7 +1524,7 @@ nd6_lladdr_add(struct ifnet *ifp, struct in6_addr *addr, } } - rcu_read_unlock(); + dp_rcu_read_unlock(); return 0; } @@ -1561,7 +1598,7 @@ nd6_resolve_timeout(struct lltable *llt, struct llentry *lle, { struct ifnet *ifp = llt->llt_ifp; - if (++lle->la_asked <= ND6_NS_RETRIES) { + if (++lle->la_asked <= nd6_cfg.nd6_ns_retries) { struct sockaddr_in6 *sin6 = satosin6(ll_sockaddr(lle)); /* @@ -1571,21 +1608,21 @@ nd6_resolve_timeout(struct lltable *llt, struct llentry *lle, return nd6_ns_build(ifp, NULL, &sin6->sin6_addr, nud ? &lle->ll_addr : NULL); - } else { - /* - * Reached retry limit. Delete entry - */ - ND6_DEBUG("%s/%s Retry limit\n", ifp->if_name, - lladdr_ntop6(lle)); + } - if (nud) - ND6NBR_INC(nudfail); - else - ND6NBR_INC(timeouts); + /* + * Reached retry limit. Delete entry + */ + ND6_DEBUG("%s/%s Retry limit\n", ifp->if_name, + lladdr_ntop6(lle)); - nd6_unreachable(ifp, lle, m_for_icmp_unreach, - ifp_for_icmp_unreach); - } + if (nud) + ND6NBR_INC(nudfail); + else + ND6NBR_INC(timeouts); + + nd6_unreachable(ifp, lle, m_for_icmp_unreach, + ifp_for_icmp_unreach); return NULL; } @@ -1594,7 +1631,7 @@ nd6_reachable_timeout(struct lltable *llt, struct llentry *lle) { struct ifnet *ifp = llt->llt_ifp; - nd6_change_state(ifp, lle, ND6_LLINFO_STALE, ND6_SCAVENGE_TIME); + nd6_change_state(ifp, lle, ND6_LLINFO_STALE, nd6_cfg.nd6_scavenge_time); return NULL; } @@ -1642,7 +1679,7 @@ in6_ll_age(struct lltable *llt, struct llentry *lle, uint64_t cur_time) rte_spinlock_lock(&lle->ll_lock); if (lle->la_state == ND6_LLINFO_STALE) nd6_change_state(ifp, lle, ND6_LLINFO_DELAY, - ND6_DELAY_TIME); + nd6_cfg.nd6_delay_time); rte_spinlock_unlock(&lle->ll_lock); return; @@ -1685,8 +1722,10 @@ in6_ll_age(struct lltable *llt, struct llentry *lle, uint64_t cur_time) } rte_spinlock_unlock(&lle->ll_lock); - if (m) - if_output(ifp, m, NULL, ETH_P_IPV6); + if (m) { + if (!ip6_spath_filter(ifp, &m)) + if_output(ifp, m, NULL, ETH_P_IPV6); + } if (m_for_icmp_unreach) icmp6_error(ifp_for_icmp_unreach, @@ -1710,7 +1749,7 @@ nd6_cache_purge(struct lltable *llt) } static void -nd6_cache_age(struct lltable *llt) +nd6_cache_age(struct lltable *llt, bool refresh_timer_expired) { struct llentry *lle; struct cds_lfht_iter iter; @@ -1724,16 +1763,18 @@ nd6_cache_age(struct lltable *llt) continue; } + llentry_issue_pending_fal_updates(lle); + if ((lle->la_flags & (LLE_VALID | LLE_FWDING)) == LLE_VALID) llentry_routing_install(lle); if (lle->la_flags & LLE_STATIC) continue; - if (lle->ll_expire == 0) + if (lle->ll_expire == 0 || !refresh_timer_expired) continue; - else - in6_ll_age(llt, lle, cur_time); + + in6_ll_age(llt, lle, cur_time); } } @@ -1744,21 +1785,162 @@ void in6_lladdr_timer(struct rte_timer *tim __rte_unused, void *arg) { struct lltable *llt = arg; struct ifnet *ifp = llt->llt_ifp; + bool refresh_timer_fired = false; + uint64_t cur_time = rte_get_timer_cycles(); - /* - * Refresh resolution tokens - */ - rte_atomic16_set(&llt->lle_restoken, ND6_RES_TOKEN); - llt->lle_unrtoken = ND6_UNR_TOKEN; + if (llt->lle_refresh_expire < cur_time) { + refresh_timer_fired = true; + + /* + * Refresh resolution tokens + */ + rte_atomic16_set(&llt->lle_restoken, nd6_cfg.nd6_res_token); + llt->lle_unrtoken = nd6_cfg.nd6_unr_token; + + /* one second later */ + llt->lle_refresh_expire = cur_time + rte_get_timer_hz(); + } - rcu_read_lock(); + dp_rcu_read_lock(); if (!(ifp->if_flags & IFF_UP)) nd6_cache_purge(llt); else - nd6_cache_age(llt); + nd6_cache_age(llt, refresh_timer_fired); - rte_timer_reset(&llt->lle_timer, rte_get_timer_hz(), + cur_time = rte_get_timer_cycles(); + rte_timer_reset(&llt->lle_timer, + llt->lle_refresh_expire < cur_time ? 0 : + llt->lle_refresh_expire - cur_time, SINGLE, rte_get_master_lcore(), in6_lladdr_timer, llt); - rcu_read_unlock(); + dp_rcu_read_unlock(); } + +/* + * nd6-cfg ND6 {SET|DELETE} + */ +static int cmd_nd6_cfg_handler(struct pb_msg *pbmsg) +{ + NbrResConfig *msg = nbr_res_config__unpack(NULL, pbmsg->msg_len, + pbmsg->msg); + uint32_t val; + char *ifname; + int ret = -1; + bool set; + + if (!msg) { + RTE_LOG(ERR, ND6, + "Cfg failed to read NbrResConfig protobuf cmd\n"); + return ret; + } + if (msg->prot != NBR_RES_CONFIG__PROT__ND6) { + RTE_LOG(ERR, ND6, + "Cfg incorrect protocol (%d)\n", msg->prot); + goto end; + } + ifname = msg->ifname; + if (ifname && (*ifname != '\0' && strncmp("all", ifname, 4) != 0)) { + RTE_LOG(ERR, ND6, + "Cfg per-interface config not yet supported\n"); + goto end; + } + set = msg->action == NBR_RES_CONFIG__ACTION__SET; + val = msg->value; + + switch (msg->param) { + case NBR_RES_CONFIG__PARAM__MAX_ENTRY: + /* + * Changes to cache size only impact subsequent resolutions. + * So if cache size is reduced to less than the number of + * entries for an interface, then the latter number decreases + * only as entries fail to re-resolve. + */ + if (set && (int)val <= 0) { + RTE_LOG(ERR, ND6, + "Cfg max entry value %d out of range\n", val); + goto end; + } + nd6_cfg.nd6_max_entry = set ? val : ND6_MAX_ENTRY; + ND6_DEBUG("Cfg param nd6_max_entry (cache size) set to: %d\n", + nd6_cfg.nd6_max_entry); + break; + case NBR_RES_CONFIG__PARAM__RES_TOKEN: + /* + * Changes to resolution throttling only impact subsequent + * resolutions. So if this limit is reduced to less than the + * number of pending resolutions for an interface in a given + * second, these are not affected. Value must be a +ve int16_t. + */ + if (set && (val == 0 || val >= 1 << 15)) { + RTE_LOG(ERR, ND6, + "Cfg res token value %d out of range\n", val); + goto end; + } + nd6_cfg.nd6_res_token = set ? val : ND6_RES_TOKEN; + ND6_DEBUG("Cfg param nd6_res_token (resolution throttling) set " + "to: %d\n", nd6_cfg.nd6_res_token); + break; + default: + RTE_LOG(ERR, ND6, + "Cfg parameter not supported (%d)\n", msg->param); + goto end; + } + + ret = 0; +end: + nbr_res_config__free_unpacked(msg, NULL); + return ret; +} + +PB_REGISTER_CMD(nd6_cfg_cmd) = { + .cmd = "vyatta:nd6", + .handler = cmd_nd6_cfg_handler, +}; + +int cmd_nd6_get_cfg(FILE *f) +{ + json_writer_t *wr = jsonw_new(f); + + if (!wr) { + RTE_LOG(NOTICE, DATAPLANE, + "nd6: Error creating JSON object for cfg params\n"); + return -1; + } + + jsonw_pretty(wr, true); + + jsonw_uint_field(wr, "NS retries", nd6_cfg.nd6_ns_retries); + jsonw_uint_field(wr, "Reachable time", nd6_cfg.nd6_reachable_time); + jsonw_uint_field(wr, "Scavenge time", nd6_cfg.nd6_scavenge_time); + jsonw_uint_field(wr, "Delay time", nd6_cfg.nd6_delay_time); + jsonw_int_field(wr, "Max entries", nd6_cfg.nd6_max_entry); + jsonw_int_field(wr, "Resolution tokens", nd6_cfg.nd6_res_token); + jsonw_uint_field(wr, "Unreachable tokens", nd6_cfg.nd6_unr_token); + jsonw_uint_field(wr, "Max hold", nd6_cfg.nd6_maxhold); + + jsonw_destroy(&wr); + + return 0; +} + +static void +nd6_lladdr_if_feat_mode_change(struct ifnet *ifp, + enum if_feat_mode_event event) +{ + if (event != IF_FEAT_MODE_EVENT_L3_FAL_ENABLED && + event != IF_FEAT_MODE_EVENT_L3_FAL_DISABLED) + return; + + if (lltable_fal_l3_change( + ifp->if_lltable6, + event == IF_FEAT_MODE_EVENT_L3_FAL_ENABLED)) + rte_timer_reset(&ifp->if_lltable6->lle_timer, 0, + SINGLE, rte_get_master_lcore(), + in6_lladdr_timer, ifp->if_lltable6); +} + +static const struct dp_event_ops nd6_lladdr_events = { + .if_feat_mode_change = nd6_lladdr_if_feat_mode_change, +}; + +DP_STARTUP_EVENT_REGISTER(nd6_lladdr_events); diff --git a/src/netinet6/nd6_nbr.h b/src/netinet6/nd6_nbr.h index 5dce9b5e..c4e1d37e 100644 --- a/src/netinet6/nd6_nbr.h +++ b/src/netinet6/nd6_nbr.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -26,14 +26,25 @@ struct rte_mbuf; #define ND6_REACHABLE_TIME 30 #define ND6_SCAVENGE_TIME (20 * 60) /* Remove stale entries after 20 minutes */ #define ND6_DELAY_TIME 5 -#define ND6_MAX_ENTRY 16384 -#define ND6_RES_TOKEN 128 +#define ND6_MAX_ENTRY 8192 /* Same as yang default for consistency */ +#define ND6_RES_TOKEN 100 #define ND6_UNR_TOKEN 2 #define ND6_MAXHOLD ARP_MAXHOLD -struct ether_addr; +struct rte_ether_addr; struct ifnet; +struct nd6_nbr_cfg { + uint8_t nd6_ns_retries; + uint16_t nd6_reachable_time; + uint16_t nd6_scavenge_time; + uint16_t nd6_delay_time; + int32_t nd6_max_entry; + int16_t nd6_res_token; + uint16_t nd6_unr_token; + uint8_t nd6_maxhold; +}; + struct nd6_nbr_stats { uint64_t received; /* # of ND packets received by this host. */ uint64_t rxignored; /* # of requests ignored (wrong net) */ @@ -56,7 +67,7 @@ extern struct nd6_nbr_stats nd6nbrstat; int nd6_resolve(struct ifnet *in_ifp, struct ifnet *ifp, struct rte_mbuf *m, const struct in6_addr *addr, - struct ether_addr *desten); + struct rte_ether_addr *desten); int nd6_input(struct ifnet *ifp, struct rte_mbuf *m); void nd6_nbr_walk(const struct ifnet *, ll_walkhash_f_t *, void *); @@ -67,9 +78,9 @@ struct llentry *in6_lltable_lookup(struct ifnet *ifp, u_int flags, struct llentry * lla_lookup6(struct lltable *llt, const struct in6_addr *addr); int nd6_lladdr_add(struct ifnet *ifp, struct in6_addr *addr, - const struct ether_addr *mac, uint16_t state, + const struct rte_ether_addr *mac, uint16_t state, uint8_t ntf_flags); -bool nd6_is_sol_na(struct rte_mbuf *m); +int cmd_nd6_get_cfg(FILE *f); /* Minimized inline link address lookup */ static inline struct llentry * @@ -85,14 +96,14 @@ in6_lltable_find(struct ifnet *ifp, const struct in6_addr *addr) */ static inline int nd6_resolve_fast(struct ifnet *in_ifp, struct ifnet *ifp, struct rte_mbuf *m, - const struct in6_addr *addr, struct ether_addr *desten) + const struct in6_addr *addr, struct rte_ether_addr *desten) { struct llentry *la; la = in6_lltable_find(ifp, addr); if (likely(la && (la->la_flags & LLE_VALID))) { rte_atomic16_clear(&la->ll_idle); - ether_addr_copy(&la->ll_addr, desten); + rte_ether_addr_copy(&la->ll_addr, desten); return 0; } diff --git a/src/netinet6/route_v6.c b/src/netinet6/route_v6.c index a0ef95f8..419c2f0f 100644 --- a/src/netinet6/route_v6.c +++ b/src/netinet6/route_v6.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -37,23 +37,25 @@ #include "dp_event.h" #include "ecmp.h" #include "fal.h" +#include "ip_forward.h" #include "if_var.h" #include "in6_var.h" #include "ip6_funcs.h" #include "json_writer.h" +#include "lcore_sched.h" #include "lpm/lpm6.h" +#include "mpls/mpls_label_table.h" #include "netinet6/route_v6.h" #include "netinet6/nd6_nbr.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "route_flags.h" #include "route_v6.h" #include "urcu.h" #include "vplane_debug.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" +#include "vrf_if.h" -#define NEXTHOP6_HASH_TBL_MIN (UINT8_MAX + 1) -#define NEXTHOP6_HASH_TBL_SIZE RTE_FBK_HASH_ENTRIES_MAX /* Use */ #define IPV6_NH_HASH_KEY_SIZE 6 @@ -73,54 +75,28 @@ * * addr +----------+ * ---->| | - * | L P M 6 | idx +-----------+ - * | +---->| nexthop_u | - * | | +-----------+ - * | | | | - * +----------+ +-----------+ - * |nexthop_v6 | - * | 0 | - * +-----------+ - * | ... | - * +-----------+ - * |nexthop_v6 | - * | count - 1 | - * +-----------+ + * | L P M 6 | idx +--------------+ + * | +---->| nexthop_list | + * | | +--------------+ + * | | | | + * +----------+ +--------------+ + * |nexthop_v6 | + * | 0 | + * +--------------+ + * | ... | + * +--------------+ + * |nexthop_v6 | + * | count - 1 | + * +--------------+ */ -struct next_hop_v6_u { - struct next_hop_v6 *siblings; - uint32_t nsiblings; /* size of next_hop array */ - uint32_t refcount; /* # of LPM entries referring */ - uint32_t index; - struct next_hop_v6 hop0; /* optimization for non-ECMP */ - struct cds_lfht_node nh_node; - enum pd_obj_state pd_state; - fal_object_t nhg_fal_obj; /* FAL handle for next_hop_group */ - fal_object_t *nh_fal_obj; /* Per-nh FAL handles */ - struct rcu_head rcu; -} __rte_cache_aligned; - static struct cds_lfht *nexthop6_hash; -struct nexthop6_hash_key { - const struct next_hop_v6 *nh; - size_t size; -}; - -struct nexthop6_table { - uint32_t in_use; /* # of table entries */ - uint32_t rover; /* last used slot */ - struct next_hop_v6_u *entry[NEXTHOP6_HASH_TBL_SIZE]; - uint32_t neigh_present; - uint32_t neigh_created; -}; - /* Nexthop entry table, could be per-namespace */ -static struct nexthop6_table nh6_tbl; +static struct nexthop_table nh6_tbl; -/* Well-known blackhole next_hop_v6_u for failure cases */ -static struct next_hop_v6_u *nextu6_blackhole; +/* Well-known blackhole next_hop_u for failure cases */ +static struct next_hop_list *nextl6_blackhole; static pthread_mutex_t route6_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -128,6 +104,8 @@ static pthread_mutex_t route6_mutex = PTHREAD_MUTEX_INITIALIZER; static uint32_t route6_sw_stats[PD_OBJ_STATE_LAST]; static uint32_t route6_hw_stats[PD_OBJ_STATE_LAST]; +#define IN6ADDR_V4MAPPED_INIT { { { 0,0,0,0,0,0,0,0,0,0,0xff,0xff,0,0,0,0 } } } + static const struct reserved_route { struct in6_addr addr; int prefix_length; @@ -140,19 +118,55 @@ static const struct reserved_route { .flags = RTF_NOROUTE | RTF_REJECT, .scope = LPM_SCOPE_PAN_DIMENSIONAL, }, + /* + * RFC 4291 - Unicast destination address sanity checks. + * The following are not allowed: unspecified, loopback + * draft-itojun-v6ops-v4mapped-harmful-02: + * Don't allow V4 mapped either. + */ + { + .addr = IN6ADDR_ANY_INIT, + .prefix_length = 128, + .flags = RTF_BLACKHOLE, + .scope = RT_SCOPE_HOST, + }, + { + .addr = IN6ADDR_LOOPBACK_INIT, + .prefix_length = 128, + .flags = RTF_BLACKHOLE, + .scope = RT_SCOPE_HOST, + }, + { + .addr = IN6ADDR_V4MAPPED_INIT, + .prefix_length = 96, + .flags = RTF_BLACKHOLE, + .scope = RT_SCOPE_HOST, + }, }; +static struct nexthop_table *route6_get_nh_table(void) +{ + return &nh6_tbl; +} + + +static struct cds_lfht *route6_get_nh_hash_table(void) +{ + return nexthop6_hash; +} + /* - * Wrapper round the nexthop6_new function. This one keeps track of the + * Wrapper round the nexthop_new function. This one keeps track of the * failures and successes. */ static int -route_nexthop6_new(struct next_hop_v6 *nh, uint16_t size, +route_nexthop6_new(struct next_hop *nh, uint16_t size, uint32_t *slot) { int rc; - rc = nexthop6_new(nh, size, slot); + rc = nexthop_new(AF_INET6, nh, size, RTPROT_UNSPEC, FAL_NHG_USE_IP, + slot); if (rc >= 0) return rc; @@ -175,18 +189,20 @@ route_nexthop6_new(struct next_hop_v6 *nh, uint16_t size, * failures and successes. */ static int -route_lpm6_add(vrfid_t vrf_id, struct lpm6 *lpm, +route_lpm6_add(vrfid_t vrf_id, fal_object_t vrf_obj, struct lpm6 *lpm, const struct in6_addr *ip, uint8_t depth, uint32_t next_hop, int16_t scope, uint32_t tableid) { int rc; struct pd_obj_state_and_flags *pd_state; struct pd_obj_state_and_flags *old_pd_state; + enum pd_obj_state nhl_pd_state; + fal_object_t nhg_fal_obj; + struct next_hop *hops; uint32_t old_nh; bool demoted = false; - struct next_hop_v6_u *nextu = - rcu_dereference(nh6_tbl.entry[next_hop]); bool update_pd_state = true; + size_t size; rc = lpm6_add(lpm, ip->s6_addr, depth, next_hop, scope, &pd_state, &old_nh, &old_pd_state); @@ -216,28 +232,25 @@ route_lpm6_add(vrfid_t vrf_id, struct lpm6 *lpm, return rc; } - if (nextu->pd_state != PD_OBJ_STATE_FULL) { - pd_state->state = nextu->pd_state; - nextu = nextu6_blackhole; + size = next_hop_list_get_fal_nhs(AF_INET6, next_hop, &hops); + nhg_fal_obj = next_hop_list_get_fal_obj( + AF_INET6, next_hop, &nhl_pd_state); + + if (nhl_pd_state != PD_OBJ_STATE_FULL && + nhl_pd_state != PD_OBJ_STATE_NOT_NEEDED) { + pd_state->state = nhl_pd_state; update_pd_state = false; } if (demoted) { - struct next_hop_v6_u *nextu = - rcu_dereference(nh6_tbl.entry[next_hop]); - if (old_pd_state->created) { - rc = fal_ip6_upd_route(vrf_id, ip, depth, + rc = fal_ip6_upd_route(vrf_id, vrf_obj, ip, depth, tableid, - nextu->siblings, - nextu->nsiblings, - nextu->nhg_fal_obj); + hops, size, nhg_fal_obj); } else { - rc = fal_ip6_new_route(vrf_id, ip, depth, + rc = fal_ip6_new_route(vrf_id, vrf_obj, ip, depth, tableid, - nextu->siblings, - nextu->nsiblings, - nextu->nhg_fal_obj); + hops, size, nhg_fal_obj); } if (update_pd_state) pd_state->state = fal_state_to_pd_state(rc); @@ -254,10 +267,8 @@ route_lpm6_add(vrfid_t vrf_id, struct lpm6 *lpm, * We have successfully added to the lpm, and now need to update the * platform, if there is one. */ - rc = fal_ip6_new_route(vrf_id, ip, depth, tableid, - nextu->siblings, - nextu->nsiblings, - nextu->nhg_fal_obj); + rc = fal_ip6_new_route(vrf_id, vrf_obj, ip, depth, tableid, + hops, size, nhg_fal_obj); if (update_pd_state) pd_state->state = fal_state_to_pd_state(rc); if (!rc) @@ -273,7 +284,7 @@ route_lpm6_add(vrfid_t vrf_id, struct lpm6 *lpm, } static int -route_lpm6_delete(vrfid_t vrf_id, struct lpm6 *lpm, +route_lpm6_delete(vrfid_t vrf_id, fal_object_t vrf_obj, struct lpm6 *lpm, const struct in6_addr *ip, uint8_t depth, uint32_t *index, int16_t scope) { @@ -305,29 +316,30 @@ route_lpm6_delete(vrfid_t vrf_id, struct lpm6 *lpm, } if (promoted) { - struct next_hop_v6_u *nextu = - rcu_dereference(nh6_tbl.entry[new_nh]); - bool update_new_pd_state = true; - - if (nextu->pd_state != PD_OBJ_STATE_FULL) { - new_pd_state->state = nextu->pd_state; - nextu = nextu6_blackhole; + enum pd_obj_state nhl_pd_state; + fal_object_t nhg_fal_obj; + struct next_hop *hops; + size_t size; + + size = next_hop_list_get_fal_nhs(AF_INET6, new_nh, &hops); + nhg_fal_obj = next_hop_list_get_fal_obj( + AF_INET6, new_nh, &nhl_pd_state); + + if (nhl_pd_state != PD_OBJ_STATE_FULL && + nhl_pd_state != PD_OBJ_STATE_NOT_NEEDED) { + new_pd_state->state = nhl_pd_state; update_new_pd_state = false; } if (pd_state.created) { - rc = fal_ip6_upd_route(vrf_id, ip, depth, + rc = fal_ip6_upd_route(vrf_id, vrf_obj, ip, depth, lpm6_get_id(lpm), - nextu->siblings, - nextu->nsiblings, - nextu->nhg_fal_obj); + hops, size, nhg_fal_obj); } else { - rc = fal_ip6_new_route(vrf_id, ip, depth, + rc = fal_ip6_new_route(vrf_id, vrf_obj, ip, depth, lpm6_get_id(lpm), - nextu->siblings, - nextu->nsiblings, - nextu->nhg_fal_obj); + hops, size, nhg_fal_obj); } if (update_new_pd_state) new_pd_state->state = fal_state_to_pd_state(rc); @@ -340,7 +352,8 @@ route_lpm6_delete(vrfid_t vrf_id, struct lpm6 *lpm, /* successfully removed and no lower scope promoted */ if (pd_state.created) { - rc = fal_ip6_del_route(vrf_id, ip, depth, lpm6_get_id(lpm)); + rc = fal_ip6_del_route(vrf_id, vrf_obj, ip, depth, + lpm6_get_id(lpm)); switch (rc) { case 0: route6_hw_stats[pd_state.state]--; @@ -357,14 +370,11 @@ route_lpm6_delete(vrfid_t vrf_id, struct lpm6 *lpm, } static int -route_lpm6_update(vrfid_t vrf_id __unused, struct lpm6 *lpm, +route_lpm6_update(vrfid_t vrf_id, fal_object_t vrf_obj, struct lpm6 *lpm, const struct in6_addr *ip, uint8_t depth, uint32_t *old_nh, uint32_t next_hop, int16_t scope, - uint32_t tableid __unused, - struct next_hop_v6 hops[] __unused, - size_t nhops __unused, - fal_object_t nhg_object __unused) + uint32_t tableid) { int rc; struct pd_obj_state_and_flags pd_state; @@ -373,6 +383,10 @@ route_lpm6_update(vrfid_t vrf_id __unused, struct lpm6 *lpm, uint32_t new_nh; uint32_t dummy_old_nh; bool update_new_pd_state = true; + enum pd_obj_state nhl_pd_state; + fal_object_t nhg_fal_obj; + struct next_hop *hops; + size_t size; /* * Remove an old entry from the lpm, and add a new one. lpm @@ -427,23 +441,22 @@ route_lpm6_update(vrfid_t vrf_id __unused, struct lpm6 *lpm, route6_sw_stats[PD_OBJ_STATE_ERROR]++; } - struct next_hop_v6_u *nextu = - rcu_dereference(nh6_tbl.entry[next_hop]); + size = next_hop_list_get_fal_nhs(AF_INET6, next_hop, &hops); + nhg_fal_obj = next_hop_list_get_fal_obj( + AF_INET6, next_hop, &nhl_pd_state); - if (nextu->pd_state != PD_OBJ_STATE_FULL) { - new_pd_state->state = nextu->pd_state; - nextu = nextu6_blackhole; + if (nhl_pd_state != PD_OBJ_STATE_FULL && + nhl_pd_state != PD_OBJ_STATE_NOT_NEEDED) { + new_pd_state->state = nhl_pd_state; update_new_pd_state = false; } if (pd_state.created) { - rc = fal_ip6_upd_route(vrf_id, ip, depth, tableid, - nextu->siblings, nextu->nsiblings, - nextu->nhg_fal_obj); + rc = fal_ip6_upd_route(vrf_id, vrf_obj, ip, depth, tableid, + hops, size, nhg_fal_obj); } else { - rc = fal_ip6_new_route(vrf_id, ip, depth, tableid, - nextu->siblings, nextu->nsiblings, - nextu->nhg_fal_obj); + rc = fal_ip6_new_route(vrf_id, vrf_obj, ip, depth, tableid, + hops, size, nhg_fal_obj); } route6_hw_stats[pd_state.state]--; @@ -524,19 +537,23 @@ rt6_lpm_add_reserved_routes(struct lpm6 *lpm, struct vrf *vrf) { char b[INET_ADDRSTRLEN]; unsigned int rt_idx; + struct ip_addr addr_any = { + .type = AF_INET6, + .address.ip_v6 = in6addr_any, + }; if (vrf->v_id == VRF_INVALID_ID) return true; for (rt_idx = 0; rt_idx < ARRAY_SIZE(reserved_routes); rt_idx++) { const struct in6_addr *addr = &reserved_routes[rt_idx].addr; - struct next_hop_v6 *nhop; + struct next_hop *nhop; uint32_t nh_idx; int err_code; - nhop = nexthop6_create(NULL, &in6addr_any, - reserved_routes[rt_idx].flags, - 0, NULL); + nhop = nexthop_create(NULL, &addr_any, + reserved_routes[rt_idx].flags, + 0, NULL); if (!nhop) return false; @@ -555,6 +572,7 @@ rt6_lpm_add_reserved_routes(struct lpm6 *lpm, struct vrf *vrf) err_code = route_lpm6_add( vrf->v_id, + vrf->v_fal_obj, lpm, addr, reserved_routes[rt_idx].prefix_length, @@ -571,7 +589,7 @@ rt6_lpm_add_reserved_routes(struct lpm6 *lpm, struct vrf *vrf) } free(nhop); if (err_code != 0) { - nexthop6_put(nh_idx); + nexthop_put(AF_INET6, nh_idx); return false; } } @@ -595,6 +613,7 @@ rt6_lpm_del_reserved_routes(struct lpm6 *lpm, struct vrf *vrf) err_code = route_lpm6_delete( vrf->v_id, + vrf->v_fal_obj, lpm, addr, reserved_routes[rt_idx].prefix_length, @@ -610,7 +629,7 @@ rt6_lpm_del_reserved_routes(struct lpm6 *lpm, struct vrf *vrf) nh_idx, err_code); return false; } - nexthop6_put(nh_idx); + nexthop_put(AF_INET6, nh_idx); } return true; @@ -641,104 +660,9 @@ static struct lpm6 *rt6_create_lpm(uint32_t id, struct vrf *vrf) rcu_assign_pointer(vrf->v_rt6_head.rt6_table[id], lpm); - if (vrf->v_id == VRF_DEFAULT_ID && id != RT_TABLE_MAIN) { - struct vrf *dst_vrf; - vrfid_t vrf_id; - - VRF_FOREACH_KERNEL(dst_vrf, vrf_id) { - if (rt_lpm_v6_resize(&dst_vrf->v_rt6_head, id) < 0) - return NULL; - - rcu_assign_pointer(dst_vrf->v_rt6_head.rt6_table[id], - lpm); - } - } - return lpm; } - -int route_v6_init(struct vrf *vrf) -{ - struct lpm6 *lpm; - - lpm = rt6_create_lpm(RT_TABLE_MAIN, vrf); - if (!lpm) { - DP_LOG_W_VRF(ERR, ROUTE6, vrf->v_id, - "rte_route_v6_init: can't create ipv6 LPM\n"); - return -1; - } - - /* - * All tables other than default alias tables in the default VRF. - * This is necessary in order to easily support PBR setvrf + tableid. - */ - if (is_nondefault_vrf(vrf->v_id)) { - struct vrf *default_vrf = vrf_get_rcu(VRF_DEFAULT_ID); - struct route6_head *rt6_head = &default_vrf->v_rt6_head; - uint32_t id; - - /* - * Stash the main table LPM point so it can be freed - * and also so it can be assigned over RT_TABLE_MAIN - * when unaliasing later. - */ - rcu_assign_pointer(vrf->v_rt6_head.rt6_table[RT_TABLE_UNSPEC], - lpm); - - for (id = 1; id < rt6_head->rt6_rtm_max; id++) { - struct lpm6 *src_lpm = rt6_head->rt6_table[id]; - - if (!src_lpm || id == RT_TABLE_MAIN) - continue; - - if (rt_lpm_v6_resize(&vrf->v_rt6_head, id) < 0) - return -1; - - rcu_assign_pointer(vrf->v_rt6_head.rt6_table[id], - src_lpm); - } - } - - return 0; -} - -void route_v6_uninit(struct vrf *vrf, struct route6_head *rt6_head) -{ - uint32_t id; - - if (rt6_head == NULL) - return; - - for (id = 0; id < rt6_head->rt6_rtm_max; id++) { - struct lpm6 *lpm = rt6_head->rt6_table[id]; - - /* - * Only free the unspec table as all the others - * could be aliases. - */ - if (is_nondefault_vrf(vrf->v_id) && - id != RT_TABLE_UNSPEC) - continue; - - if (lpm) { - if (!lpm6_is_empty(lpm)) { - if (!rt6_lpm_is_empty(lpm)) { - RTE_LOG(ERR, ROUTE, - "%s:non empty lpm vrf %u table %u\n", - __func__, vrf->v_id, id); - return; - } - rt6_lpm_del_reserved_routes(lpm, vrf); - } - lpm6_free(lpm); - } - } - free_huge(rt6_head->rt6_table, (rt6_head->rt6_rtm_max * - sizeof(struct lpm6 *))); - rt6_head->rt6_table = NULL; -} - static struct lpm6 *rt6_get_lpm(struct route6_head *rt6_head, uint32_t id) { if (unlikely(id >= rt6_head->rt6_rtm_max)) @@ -747,44 +671,6 @@ static struct lpm6 *rt6_get_lpm(struct route6_head *rt6_head, uint32_t id) return rcu_dereference(rt6_head->rt6_table[id]); } -bool route6_link_vrf_to_table(struct vrf *vrf, uint32_t tableid) -{ - struct vrf *default_vrf = vrf_get_rcu(VRF_DEFAULT_ID); - struct route6_head *rt6_head = &default_vrf->v_rt6_head; - struct lpm6 *new_lpm; - - new_lpm = rt6_get_lpm(rt6_head, tableid); - if (!new_lpm) { - new_lpm = rt6_create_lpm(tableid, default_vrf); - if (!new_lpm) - return false; - } - - /* - * Alias the main table to the given tableid in the default - * VRF. - */ - rcu_assign_pointer(vrf->v_rt6_head.rt6_table[RT_TABLE_MAIN], - new_lpm); - - return true; -} - -bool route6_unlink_vrf_from_table(struct vrf *vrf) -{ - struct lpm6 *old_main_lpm = vrf->v_rt6_head.rt6_table[ - RT_TABLE_UNSPEC]; - - /* - * Unalias the main table. We require the pointer to be valid - * so we use the table we initially created the VRF with. - */ - rcu_assign_pointer(vrf->v_rt6_head.rt6_table[RT_TABLE_MAIN], - old_main_lpm); - - return true; -} - /* Do lookahead into route table and get first prefix match table */ void rt6_prefetch_fast(const struct rte_mbuf *m, const struct in6_addr *dst) { @@ -810,76 +696,32 @@ void rt6_prefetch(const struct rte_mbuf *m, const struct in6_addr *dst) lpm6_prefetch(lpm, dst->s6_addr); } -ALWAYS_INLINE -struct next_hop_v6 *nexthop6_select_internal(struct next_hop_v6 *next, - uint32_t size, - uint32_t hash) -{ - uint32_t path; - - if (ecmp_max_path && ecmp_max_path < size) - size = ecmp_max_path; - - path = ecmp_lookup(size, hash); - if (unlikely(next[path].flags & RTF_DEAD)) { - /* retry to find a good path */ - for (path = 0; path < size; path++) { - if (!(next[path].flags & RTF_DEAD)) - break; - } - - if (path == size) - return NULL; - } - return next + path; -} - -ALWAYS_INLINE -struct next_hop_v6 *nexthop6_select(uint32_t index, const struct rte_mbuf *m, - uint16_t ether_type) -{ - struct next_hop_v6_u *nextu; - struct next_hop_v6 *next; - uint32_t size; - - nextu = rcu_dereference(nh6_tbl.entry[index]); - if (unlikely(!nextu)) - return NULL; - size = nextu->nsiblings; - next = nextu->siblings; - if (likely(size == 1)) - return next; - - return nexthop6_select_internal(next, size, - ecmp_mbuf_hash(m, ether_type)); -} - -int nh6_lookup_by_index(uint32_t nhindex, uint32_t hash, +int dp_nh6_lookup_by_index(uint32_t nhindex, uint32_t hash, struct in6_addr *nh, uint32_t *ifindex) { - const struct next_hop_v6_u *nextu; - struct next_hop_v6 *next; + const struct next_hop_list *nextl; + struct next_hop *next; struct ifnet *ifp; uint32_t size; - nextu = rcu_dereference(nh6_tbl.entry[nhindex]); - if (nextu == NULL) + nextl = rcu_dereference(nh6_tbl.entry[nhindex]); + if (nextl == NULL) return -1; - next = nextu->siblings; + next = nextl->siblings; if (!next) return -1; - size = nextu->nsiblings; + size = nextl->nsiblings; if (size > 1) - next = nexthop6_select_internal(next, size, hash); + next = nexthop_mp_select(nextl, next, size, hash); if (next->flags & RTF_GATEWAY) - *nh = next->gateway; + *nh = next->gateway.address.ip_v6; else *nh = in6addr_any; - ifp = nh6_get_ifp(next); + ifp = dp_nh_get_ifp(next); if (!ifp) return -1; @@ -904,7 +746,7 @@ bool rt6_valid_tblid(vrfid_t vrfid, uint32_t tbl_id) * Returns RCU protected nexthop structure or NULL. */ ALWAYS_INLINE -struct next_hop_v6 *rt6_lookup(const struct in6_addr *dst, uint32_t tbl_id, +struct next_hop *dp_rt6_lookup(const struct in6_addr *dst, uint32_t tbl_id, const struct rte_mbuf *m) { vrfid_t vrfid = pktmbuf_get_vrf(m); @@ -922,13 +764,13 @@ struct next_hop_v6 *rt6_lookup(const struct in6_addr *dst, uint32_t tbl_id, * Returns RCU protected nexthop structure or NULL. */ ALWAYS_INLINE -struct next_hop_v6 *rt6_lookup_fast(struct vrf *vrf, - const struct in6_addr *dst, - uint32_t tbl_id, - const struct rte_mbuf *m) +struct next_hop *rt6_lookup_fast(struct vrf *vrf, + const struct in6_addr *dst, + uint32_t tbl_id, + const struct rte_mbuf *m) { const struct lpm6 *lpm; - struct next_hop_v6 *nh; + struct next_hop *nh; uint32_t index = 0; lpm = rcu_dereference(vrf->v_rt6_head.rt6_table[tbl_id]); @@ -936,86 +778,22 @@ struct next_hop_v6 *rt6_lookup_fast(struct vrf *vrf, if (unlikely(lpm6_lookup(lpm, dst->s6_addr, &index) != 0)) return NULL; - nh = nexthop6_select(index, m, ETHER_TYPE_IPv6); + nh = nexthop_select(AF_INET6, index, m, RTE_ETHER_TYPE_IPV6); if (nh && unlikely(nh->flags & RTF_NOROUTE)) return NULL; return nh; } -/* - * Modifying a NH in non atomic way, so this must be atomically swapped - * into the forwarding state when ready - */ -static void nh6_set_neigh_present(struct next_hop_v6 *next_hop, - struct llentry *lle) -{ - assert((next_hop->flags & RTF_NEIGH_PRESENT) == 0); - next_hop->flags |= RTF_NEIGH_PRESENT; - next_hop->u.lle = lle; - nh6_tbl.neigh_present++; -} - -static void nh6_clear_neigh_present(struct next_hop_v6 *next_hop) -{ - assert(next_hop->flags & RTF_NEIGH_PRESENT); - next_hop->flags &= ~RTF_NEIGH_PRESENT; - next_hop->u.ifp = next_hop->u.lle->ifp; - nh6_tbl.neigh_present--; -} - -static void nh6_set_neigh_created(struct next_hop_v6 *next_hop, - struct llentry *lle) -{ - assert((next_hop->flags & RTF_NEIGH_CREATED) == 0); - next_hop->flags |= RTF_NEIGH_CREATED; - next_hop->u.lle = lle; - nh6_tbl.neigh_created++; -} - -static void nh6_clear_neigh_created(struct next_hop_v6 *next_hop) -{ - assert(next_hop->flags & RTF_NEIGH_CREATED); - next_hop->flags &= ~RTF_NEIGH_CREATED; - next_hop->u.ifp = next_hop->u.lle->ifp; - nh6_tbl.neigh_created--; -} - -static bool nh6_is_connected(const struct next_hop_v6 *nh) -{ - if (nh->flags & (RTF_BLACKHOLE | RTF_REJECT | - RTF_SLOWPATH | RTF_GATEWAY | - RTF_LOCAL | RTF_NOROUTE)) - return false; - - return true; -} - -static bool nh6_is_local(const struct next_hop_v6 *nh) -{ - if (nh->flags & RTF_LOCAL) - return true; - - return false; -} - -static bool nh6_is_gw(const struct next_hop_v6 *nh) -{ - if (nh->flags & RTF_GATEWAY) - return true; - - return false; -} - static inline bool rt6_is_nh_local(int nhindex) { - struct next_hop_v6_u *nextu; - struct next_hop_v6 *next; + struct next_hop_list *nextl; + struct next_hop *next; - nextu = rcu_dereference(nh6_tbl.entry[nhindex]); - if (unlikely(!nextu)) + nextl = rcu_dereference(nh6_tbl.entry[nhindex]); + if (unlikely(!nextl)) return false; - next = rcu_dereference(nextu->siblings); + next = rcu_dereference(nextl->siblings); if (next && next->flags & RTF_LOCAL) return true; @@ -1038,55 +816,9 @@ inline bool is_local_ipv6(vrfid_t vrf_id, const struct in6_addr *dst) return rt6_is_nh_local(index); } -struct next_hop_v6 * -nexthop6_create(struct ifnet *ifp, const struct in6_addr *gw, uint32_t flags, - uint16_t num_labels, label_t *labels) -{ - struct next_hop_v6 *next = malloc(sizeof(struct next_hop_v6)); - - if (next) { - next->gateway = *gw; - next->flags = flags; - nh6_set_ifp(next, ifp); - if (!nh_outlabels_set(&next->outlabels, num_labels, labels)) { - RTE_LOG(ERR, ROUTE, - "Failed to set outlabels for nexthop with %u labels\n", - num_labels); - free(next); - return NULL; - } - } - return next; -} - -/* - * Create an array of next_hops based on the hops in the NHU. - */ -static struct next_hop_v6 *nexthop6_create_copy(struct next_hop_v6_u *nhu, - int *size) -{ - struct next_hop_v6 *next, *n; - struct next_hop_v6 *array = rcu_dereference(nhu->siblings); - uint32_t i; - - *size = nhu->nsiblings; - n = next = calloc(sizeof(struct next_hop_v6), *size); - if (!next) - return NULL; - - for (i = 0; i < nhu->nsiblings; i++) { - struct next_hop_v6 *nhu_next = array + i; - - memcpy(n, nhu_next, sizeof(struct next_hop_v6)); - nh_outlabels_copy(&nhu_next->outlabels, &n->outlabels); - n++; - } - return next; -} - /* Reuse existing next hop entry */ static int -nexthop6_hashfn(const struct nexthop6_hash_key *key, +nexthop6_hashfn(const struct nexthop_hash_key *key, unsigned long seed __rte_unused) { size_t size = key->size; @@ -1095,9 +827,9 @@ nexthop6_hashfn(const struct nexthop6_hash_key *key, uint16_t i, j = 0; for (i = 0; i < size; i++, j += IPV6_NH_HASH_KEY_SIZE) { - memcpy(&hash_keys[j], &key->nh[i].gateway, - sizeof(key->nh[i].gateway)); - ifp = nh6_get_ifp(&key->nh[i]); + memcpy(&hash_keys[j], &key->nh[i].gateway.address.ip_v6, + sizeof(key->nh[i].gateway.address.ip_v6)); + ifp = dp_nh_get_ifp(&key->nh[i]); hash_keys[j+4] = ifp ? ifp->if_index : 0; hash_keys[j+5] = key->nh[i].flags & NH_FLAGS_CMP_MASK; } @@ -1108,328 +840,109 @@ nexthop6_hashfn(const struct nexthop6_hash_key *key, static int nexthop6_cmpfn(struct cds_lfht_node *node, const void *key) { - const struct nexthop6_hash_key *h_key = key; - const struct next_hop_v6_u *nu = - caa_container_of(node, const struct next_hop_v6_u, nh_node); + const struct nexthop_hash_key *h_key = key; + const struct next_hop_list *nl = + caa_container_of(node, const struct next_hop_list, nh_node); uint16_t i; - if (h_key->size != nu->nsiblings) + if (h_key->size != nl->nsiblings || + h_key->use != nl->use) return false; for (i = 0; i < h_key->size; i++) { - if ((nh6_get_ifp(&nu->siblings[i]) != - nh6_get_ifp(&h_key->nh[i])) || - (!IN6_ARE_ADDR_EQUAL(&nu->siblings[i].gateway, - &h_key->nh[i].gateway)) || - ((nu->siblings[i].flags & NH_FLAGS_CMP_MASK) != + if ((dp_nh_get_ifp(&nl->siblings[i]) != + dp_nh_get_ifp(&h_key->nh[i])) || + (!IN6_ARE_ADDR_EQUAL(&nl->siblings[i].gateway.address, + &h_key->nh[i].gateway.address)) || + ((nl->siblings[i].flags & NH_FLAGS_CMP_MASK) != (h_key->nh[i].flags & NH_FLAGS_CMP_MASK)) || - !nh_outlabels_cmpfn(&nu->siblings[i].outlabels, + !nh_outlabels_cmpfn(&nl->siblings[i].outlabels, &h_key->nh[i].outlabels)) return false; } return true; } -static struct next_hop_v6_u * -nexthop6_lookup(const struct nexthop6_hash_key *key) -{ - struct cds_lfht_iter iter; - struct cds_lfht_node *node; - - cds_lfht_lookup(nexthop6_hash, - nexthop6_hashfn(key, 0), - nexthop6_cmpfn, key, &iter); - node = cds_lfht_iter_get_node(&iter); - if (node) - return caa_container_of(node, struct next_hop_v6_u, nh_node); - else - return NULL; -} - -static int -nexthop6_hash_insert(struct next_hop_v6_u *nu, - const struct nexthop6_hash_key *key) -{ - struct cds_lfht_node *ret_node; - - cds_lfht_node_init(&nu->nh_node); - unsigned long hash = nexthop6_hashfn(key, 0); - - ret_node = cds_lfht_add_unique(nexthop6_hash, hash, - nexthop6_cmpfn, key, - &nu->nh_node); - - return (ret_node != &nu->nh_node) ? EEXIST : 0; -} - -/* - * Remove the old NH from the hash and add the new one. Can not - * use a call to cds_lfht_add_replace() or any of the variants - * as the key for the new NH may be very different in the case - * where there are a different number of paths. - */ -static int -nexthop6_hash_del_add(struct next_hop_v6_u *old_nu, - struct next_hop_v6_u *new_nu) -{ - struct nexthop6_hash_key key = {.nh = new_nu->siblings, - .size = new_nu->nsiblings}; - int rc; - - /* Remove old one */ - rc = cds_lfht_del(nexthop6_hash, &old_nu->nh_node); - assert(rc == 0); - if (rc != 0) - return rc; - - /* add new one */ - return nexthop6_hash_insert(new_nu, &key); -} - -static struct next_hop_v6_u * -nexthop6_reuse(const struct nexthop6_hash_key *key, uint32_t *slot) -{ - struct next_hop_v6_u *nextu; - - nextu = nexthop6_lookup(key); - if (!nextu) - return NULL; - - *slot = nextu->index; - ++nextu->refcount; - - DP_DEBUG(ROUTE, DEBUG, ROUTE, - "nexthop6 reuse: nexthop %u, refs %u\n", - nextu->index, nextu->refcount); - - return nextu; -} - -static struct next_hop_v6_u *nexthop6_alloc(int size) -{ - struct next_hop_v6_u *nextu; - - nextu = calloc(1, sizeof(*nextu)); - if (unlikely(!nextu)) { - RTE_LOG(ERR, ROUTE, "can't alloc next_hop_v6_u\n"); - return false; - } - - nextu->nh_fal_obj = calloc(size, sizeof(*nextu->nh_fal_obj)); - if (!nextu->nh_fal_obj) { - free(nextu); - return NULL; - } - - if (size == 1) { - nextu->siblings = &nextu->hop0; - } else { - nextu->siblings = calloc(1, size * sizeof(struct next_hop_v6)); - if (unlikely(nextu->siblings == NULL)) { - free(nextu->nh_fal_obj); - free(nextu); - return NULL; - } - } - nextu->nsiblings = size; - return nextu; -} - -static void __nexthop6_destroy(struct next_hop_v6_u *nextu) -{ - unsigned int i; - - for (i = 0; i < nextu->nsiblings; i++) - nh_outlabels_destroy(&nextu->siblings[i].outlabels); - - if (nextu->siblings != &nextu->hop0) - free(nextu->siblings); - - free(nextu->nh_fal_obj); - free(nextu); -} - -/* callback from RCU after all other threads are done. */ -static void nexthop6_destroy(struct rcu_head *head) -{ - struct next_hop_v6_u *nextu = - caa_container_of(head, struct next_hop_v6_u, rcu); - - __nexthop6_destroy(nextu); -} - -/* Look (or create) nexthop based on gateway */ -int -nexthop6_new(struct next_hop_v6 *nh, size_t size, uint32_t *slot) +int route_v6_init(struct vrf *vrf) { - struct next_hop_v6_u *nextu; - struct nexthop6_hash_key key = {.nh = nh, .size = size }; - uint32_t rover = nh6_tbl.rover; - uint32_t nh6_iter; - int ret; - - nextu = nexthop6_reuse(&key, slot); - if (nextu) - return 0; - - if (unlikely(nh6_tbl.in_use == NEXTHOP6_HASH_TBL_SIZE)) { - RTE_LOG(ERR, ROUTE, "V6 Next Hop tbl is full\n"); - return -ENOSPC; - } + struct lpm6 *lpm; - nextu = nexthop6_alloc(size); - if (!nextu) { - RTE_LOG(ERR, ROUTE, "can't alloc next_hop_v6_u\n"); - return -ENOMEM; - } - nextu->nsiblings = size; - nextu->refcount = 1; - nextu->index = rover; - if (size == 1) { - nextu->hop0 = *nh; - } else { - memcpy(nextu->siblings, nh, size * sizeof(struct next_hop_v6)); - } - if (unlikely(nexthop6_hash_insert(nextu, &key))) { - __nexthop6_destroy(nextu); - return -ENOMEM; + lpm = rt6_create_lpm(RT_TABLE_MAIN, vrf); + if (!lpm) { + DP_LOG_W_VRF(ERR, ROUTE6, vrf->v_id, + "rte_route_v6_init: can't create ipv6 LPM\n"); + return -1; } - ret = fal_ip6_new_next_hops(nextu->nsiblings, nextu->siblings, - &nextu->nhg_fal_obj, - nextu->nh_fal_obj); - if (ret < 0) { - if (ret != -EOPNOTSUPP) - RTE_LOG(ERR, ROUTE, - "FAL IPv6 next-hop-group create failed: %s\n", - strerror(-ret)); - nextu->pd_state = fal_state_to_pd_state(ret); - } else - nextu->pd_state = PD_OBJ_STATE_FULL; - - nh6_iter = rover; - do { - nh6_iter++; - if (nh6_iter >= NEXTHOP6_HASH_TBL_SIZE) - nh6_iter = 0; - } while ((rcu_dereference(nh6_tbl.entry[nh6_iter]) != NULL) && - likely(nh6_iter != rover)); - - nh6_tbl.rover = nh6_iter; - *slot = rover; - nh6_tbl.in_use++; - - rcu_assign_pointer(nh6_tbl.entry[rover], nextu); - return 0; } -static int nextu6_nc_count(const struct next_hop_v6_u *nhu) +void route_v6_uninit(struct vrf *vrf, struct route6_head *rt6_head) { - int count = 0; - uint32_t i; - struct next_hop_v6 *array = rcu_dereference(nhu->siblings); - - for (i = 0; i < nhu->nsiblings; i++) { - struct next_hop_v6 *next = array + i; - - if (nh6_is_neigh_created(next)) - count++; - } - return count; -} + uint32_t id; -static struct next_hop_v6 *nextu6_find_path_using_ifp(struct next_hop_v6_u *nhu, - struct ifnet *ifp, - int *sibling) -{ - uint32_t i; - struct next_hop_v6 *array = rcu_dereference(nhu->siblings); + if (rt6_head == NULL) + return; - for (i = 0; i < nhu->nsiblings; i++) { - struct next_hop_v6 *next = array + i; + for (id = 0; id < rt6_head->rt6_rtm_max; id++) { + struct lpm6 *lpm = rt6_head->rt6_table[id]; - if (nh6_get_ifp(next) == ifp) { - *sibling = i; - return next; + if (lpm) { + if (!lpm6_is_empty(lpm)) { + if (!rt6_lpm_is_empty(lpm)) { + RTE_LOG(ERR, ROUTE, + "%s:non empty lpm vrf %u table %u\n", + __func__, vrf->v_id, id); + return; + } + rt6_lpm_del_reserved_routes(lpm, vrf); + } + lpm6_free(lpm); } } - return NULL; + free_huge(rt6_head->rt6_table, (rt6_head->rt6_rtm_max * + sizeof(struct lpm6 *))); + rt6_head->rt6_table = NULL; } -static bool nextu6_is_any_connected(const struct next_hop_v6_u *nhu) +static struct next_hop_list * +route6_get_nh_blackhole(void) { - uint32_t i; - struct next_hop_v6 *array = rcu_dereference(nhu->siblings); - - for (i = 0; i < nhu->nsiblings; i++) { - struct next_hop_v6 *next = array + i; - - if (nh6_is_connected(next)) - return true; - } - return false; + return nextl6_blackhole; } -void nexthop6_put(uint32_t idx) -{ - struct next_hop_v6_u *nextu = rcu_dereference(nh6_tbl.entry[idx]); - - if (--nextu->refcount == 0) { - struct next_hop_v6 *array = nextu->siblings; - uint32_t i; - int ret; - - nh6_tbl.entry[idx] = NULL; - --nh6_tbl.in_use; - - for (i = 0; i < nextu->nsiblings; i++) { - struct next_hop_v6 *nh = array + i; - - if (nh6_is_neigh_present(nh)) - nh6_tbl.neigh_present--; - if (nh6_is_neigh_created(nh)) - nh6_tbl.neigh_created--; - } - - if (nextu->pd_state == PD_OBJ_STATE_FULL) { - ret = fal_ip6_del_next_hops(nextu->nhg_fal_obj, - nextu->nsiblings, - nextu->siblings, - nextu->nh_fal_obj); - if (ret < 0) { - RTE_LOG(ERR, ROUTE, - "FAL IPv6 next-hop-group delete failed: %s\n", - strerror(-ret)); - } - } - - cds_lfht_del(nexthop6_hash, &nextu->nh_node); - call_rcu(&nextu->rcu, nexthop6_destroy); - } -} +struct nh_common nh6_common = { + .nh_hash = nexthop6_hashfn, + .nh_compare = nexthop6_cmpfn, + .nh_get_hash_tbl = route6_get_nh_hash_table, + .nh_get_nh_tbl = route6_get_nh_table, + .nh_get_blackhole = route6_get_nh_blackhole, +}; void nexthop_v6_tbl_init(void) { - struct next_hop_v6 nh_drop = { + struct next_hop nh_drop = { .flags = RTF_BLACKHOLE, }; uint32_t idx; - nexthop6_hash = cds_lfht_new(NEXTHOP6_HASH_TBL_MIN, - NEXTHOP6_HASH_TBL_MIN, - NEXTHOP6_HASH_TBL_SIZE, + nexthop6_hash = cds_lfht_new(NEXTHOP_HASH_TBL_MIN, + NEXTHOP_HASH_TBL_MIN, + NEXTHOP_HASH_TBL_SIZE, CDS_LFHT_AUTO_RESIZE, NULL); if (nexthop6_hash == NULL) rte_panic("rte_route_v6_init: can't create nexthop6 hash\n"); + nh_common_register(AF_INET6, &nh6_common); + /* reserve a drop nexthop */ - if (nexthop6_new(&nh_drop, 1, &idx)) + if (nexthop_new(AF_INET6, &nh_drop, 1, RTPROT_UNSPEC, FAL_NHG_USE_IP, + &idx)) rte_panic("%s: can't create drop nexthop\n", __func__); - nextu6_blackhole = + nextl6_blackhole = rcu_dereference(nh6_tbl.entry[idx]); - if (!nextu6_blackhole) + if (!nextl6_blackhole) rte_panic("%s: can't create drop nexthop\n", __func__); } @@ -1448,17 +961,17 @@ static void subtree_walk_route_cleanup_cb(struct lpm6 *lpm, void *arg) { struct subtree_walk_arg *changing = arg; - struct next_hop_v6_u *nextu = rcu_dereference(nh6_tbl.entry[idx]); + struct next_hop_list *nextl = rcu_dereference(nh6_tbl.entry[idx]); uint8_t cover_ip[LPM6_IPV6_ADDR_SIZE]; struct in6_addr inaddr; uint8_t cover_depth; uint32_t cover_nh_idx; int neigh_created = 0; - if (!nextu) + if (!nextl) return; - neigh_created = nextu6_nc_count(nextu); + neigh_created = next_hop_list_nc_count(nextl); if (neigh_created == 0) return; @@ -1500,10 +1013,9 @@ static void subtree_walk_route_cleanup_cb(struct lpm6 *lpm, * entry is recreaetd with correct values. */ memcpy(&inaddr.s6_addr, masked_ip, sizeof(inaddr.s6_addr)); - route_lpm6_delete(changing->vrf->v_id, lpm, - &inaddr, 128, - &cover_nh_idx, RT_SCOPE_LINK); - nexthop6_put(idx); + route_lpm6_delete(changing->vrf->v_id, changing->vrf->v_fal_obj, + lpm, &inaddr, 128, &cover_nh_idx, RT_SCOPE_LINK); + nexthop_put(AF_INET6, idx); } static unsigned int lle_routing_insert_neigh_cb(struct lltable *llt __unused, @@ -1530,67 +1042,66 @@ enum nh_change { * then remove it. */ static uint32_t -route6_nh_replace(struct next_hop_v6_u *nextu, uint32_t nh_idx, - struct llentry *lle, uint32_t *new_nextu_idx_for_del, - enum nh_change (*nh_processing_cb)(struct next_hop_v6 *next, +route6_nh_replace(int family, struct next_hop_list *nextl, uint32_t nh_idx, + struct llentry *lle, uint32_t *new_nextl_idx_for_del, + enum nh_change (*nh_processing_cb)(struct next_hop *next, int sibling, void *arg), void *arg) { - struct next_hop_v6_u *new_nextu = NULL; - struct next_hop_v6 *old_array; - struct next_hop_v6 *new_array = NULL; + struct next_hop_list *new_nextl = NULL; + struct next_hop *old_array; + struct next_hop *new_array = NULL; enum nh_change nh_change; bool any_change = false; uint32_t i; uint32_t deleted = 0; - ASSERT_MASTER(); + ASSERT_MAIN(); /* walk all the NHs, copying as we go */ - old_array = rcu_dereference(nextu->siblings); - - new_nextu = nexthop6_alloc(nextu->nsiblings); - if (!new_nextu) + old_array = rcu_dereference(nextl->siblings); + new_nextl = next_hop_list_create_copy_start(AF_INET6, nextl); + if (!new_nextl) return 0; - new_nextu->index = nextu->index; - new_nextu->refcount = nextu->refcount; - new_array = rcu_dereference(new_nextu->siblings); + new_array = rcu_dereference(new_nextl->siblings); - for (i = 0; i < nextu->nsiblings; i++) { - struct next_hop_v6 *next = old_array + i; - struct next_hop_v6 *new_next = new_array + i - deleted; + for (i = 0; i < nextl->nsiblings; i++) { + struct next_hop *next = old_array + i; + struct next_hop *new_next = new_array + i - deleted; nh_change = nh_processing_cb(next, i, arg); /* Copy across old NH */ - memcpy(new_next, next, sizeof(struct next_hop_v6)); - nh_outlabels_copy(&next->outlabels, &new_next->outlabels); + if (next_hop_copy(next, new_next) < 0) { + __nexthop_destroy(new_nextl); + return 0; + } switch (nh_change) { case NH_NO_CHANGE: break; case NH_SET_NEIGH_CREATED: any_change = true; - nh6_set_neigh_created(new_next, lle); + nh_set_neigh_created(family, new_next, lle); break; case NH_CLEAR_NEIGH_CREATED: any_change = true; - nh6_clear_neigh_created(new_next); + nh_clear_neigh_created(family, new_next); break; case NH_SET_NEIGH_PRESENT: any_change = true; - nh6_set_neigh_present(new_next, lle); + nh_set_neigh_present(family, new_next, lle); break; case NH_CLEAR_NEIGH_PRESENT: any_change = true; - nh6_clear_neigh_present(new_next); + nh_clear_neigh_present(AF_INET6, new_next); break; case NH_DELETE: - if (!new_nextu_idx_for_del) { - __nexthop6_destroy(new_nextu); + if (!new_nextl_idx_for_del) { + __nexthop_destroy(new_nextl); return -1; } any_change = true; @@ -1601,76 +1112,62 @@ route6_nh_replace(struct next_hop_v6_u *nextu, uint32_t nh_idx, /* Did we make any changes? If not then we can return */ if (!any_change) { - __nexthop6_destroy(new_nextu); + __nexthop_destroy(new_nextl); return 0; } if (deleted) { /* * We are deleting at least one nh - create a new - * nextu for caller to deal with. + * nextl for caller to deal with. */ - if (deleted != nextu->nsiblings && - route_nexthop6_new(nextu->siblings, nextu->nsiblings, - new_nextu_idx_for_del) < 0) - deleted = nextu->nsiblings; - __nexthop6_destroy(new_nextu); + if (deleted != nextl->nsiblings && + route_nexthop6_new(nextl->siblings, nextl->nsiblings, + new_nextl_idx_for_del) < 0) + deleted = nextl->nsiblings; + __nexthop_destroy(new_nextl); return deleted; } - if (nexthop6_hash_del_add(nextu, new_nextu)) { - __nexthop6_destroy(new_nextu); + if (next_hop_list_create_copy_finish(AF_INET6, nextl, new_nextl, + nh_idx) < 0) RTE_LOG(ERR, ROUTE, "nh6 replace failed\n"); - return 0; - } - - /* - * It's safe to copy over the FAL objects without - * notifications as there are no FAL-visible changes to the - * object - it maintains its own linkage to the neighbour - */ - new_nextu->nhg_fal_obj = nextu->nhg_fal_obj; - memcpy(new_nextu->nh_fal_obj, nextu->nh_fal_obj, - new_nextu->nsiblings * sizeof(*new_nextu->nh_fal_obj)); - - assert(nh6_tbl.entry[nh_idx] == nextu); - rcu_xchg_pointer(&nh6_tbl.entry[nh_idx], new_nextu); - call_rcu(&nextu->rcu, nexthop6_destroy); return 0; } -static void route6_change_process_nh(struct next_hop_v6_u *nhu, +static void route6_change_process_nh(struct next_hop_list *nhl, enum nh_change (*upd_neigh_present_cb)( - struct next_hop_v6 *next, + struct next_hop *next, int sibling, void *arg)) { - const struct next_hop_v6 *array; + const struct next_hop *array; int index; uint i; - index = nhu->index; - array = rcu_dereference(nhu->siblings); - for (i = 0; i < nhu->nsiblings; i++) { - const struct next_hop_v6 *next = array + i; - const struct ifnet *ifp = nh6_get_ifp(next); + index = nhl->index; + array = rcu_dereference(nhl->siblings); + for (i = 0; i < nhl->nsiblings; i++) { + const struct next_hop *next = array + i; + const struct ifnet *ifp = dp_nh_get_ifp(next); if (!ifp) /* happens for local routes */ continue; - if (!nh6_is_gw(next)) + if (!nh_is_gw(next)) continue; /* * Is there an lle on this interface with a * matching address. */ - struct llentry *lle = in6_lltable_find((struct ifnet *)ifp, - &next->gateway); + struct llentry *lle = in6_lltable_find( + (struct ifnet *)ifp, + &next->gateway.address.ip_v6); if (lle) { - route6_nh_replace(nhu, nhu->index, lle, NULL, + route6_nh_replace(AF_INET6, nhl, nhl->index, lle, NULL, upd_neigh_present_cb, lle); /* @@ -1678,8 +1175,8 @@ static void route6_change_process_nh(struct next_hop_v6_u *nhu, * replaced by prev func, and will not * then be found in hash table. */ - nhu = rcu_dereference(nh6_tbl.entry[index]); - if (!nhu) + nhl = rcu_dereference(nh6_tbl.entry[index]); + if (!nhl) break; } } @@ -1687,19 +1184,19 @@ static void route6_change_process_nh(struct next_hop_v6_u *nhu, static void walk_nh6s_for_route6_change(enum nh_change (*upd_neigh_present_cb)( - struct next_hop_v6 *next, + struct next_hop *next, int sibling, void *arg)) { - struct next_hop_v6_u *nhu; + struct next_hop_list *nhl; struct cds_lfht_iter iter; struct cds_lfht_node *node; - ASSERT_MASTER(); + ASSERT_MAIN(); cds_lfht_for_each(nexthop6_hash, &iter, node) { - nhu = caa_container_of(node, struct next_hop_v6_u, nh_node); - route6_change_process_nh(nhu, upd_neigh_present_cb); + nhl = caa_container_of(node, struct next_hop_list, nh_node); + route6_change_process_nh(nhl, upd_neigh_present_cb); } } @@ -1707,7 +1204,7 @@ walk_nh6s_for_route6_change(enum nh_change (*upd_neigh_present_cb)( * On an arp add, should we set NEIGH_PRESENT from this NH. */ static enum nh_change -routing_neigh_add_gw_nh_replace_cb(struct next_hop_v6 *next, +routing_neigh_add_gw_nh_replace_cb(struct next_hop *next, int sibling __unused, void *arg) { @@ -1715,12 +1212,12 @@ routing_neigh_add_gw_nh_replace_cb(struct next_hop_v6 *next, struct in6_addr *ip = ll_ipv6_addr(lle); struct ifnet *ifp = rcu_dereference(lle->ifp); - if (!nh6_is_gw(next) || !IN6_ARE_ADDR_EQUAL(&next->gateway, - &ip->s6_addr)) + if (!nh_is_gw(next) || + !IN6_ARE_ADDR_EQUAL(&next->gateway.address.ip_v6, &ip->s6_addr)) return NH_NO_CHANGE; - if (nh6_get_ifp(next) != ifp) + if (dp_nh_get_ifp(next) != ifp) return NH_NO_CHANGE; - if (nh6_is_local(next) || nh6_is_neigh_present(next)) + if (nh_is_local(next) || nh_is_neigh_present(next)) return NH_NO_CHANGE; return NH_SET_NEIGH_PRESENT; @@ -1739,8 +1236,8 @@ route_change_link_neigh(struct vrf *vrf, struct lpm6 *lpm, uint32_t next_hop, int16_t scope __unused) { uint32_t i; - const struct next_hop_v6_u *nextu; - const struct next_hop_v6 *array; + const struct next_hop_list *nextl; + const struct next_hop *array; struct subtree_walk_arg subtree_arg = { .vrf = vrf, .table_id = table_id, @@ -1750,7 +1247,7 @@ route_change_link_neigh(struct vrf *vrf, struct lpm6 *lpm, uint8_t cover_ip[LPM6_IPV6_ADDR_SIZE]; uint8_t cover_depth; uint32_t cover_idx; - const struct next_hop_v6_u *cover_nextu; + const struct next_hop_list *cover_nextl; /* * If the entry we have just created is connected OR its @@ -1763,8 +1260,8 @@ route_change_link_neigh(struct vrf *vrf, struct lpm6 *lpm, * as the cover need to be checked to see if they are still accurate, * and removed if not. */ - nextu = rcu_dereference(nh6_tbl.entry[next_hop]); - if (nextu6_is_any_connected(nextu)) { + nextl = rcu_dereference(nh6_tbl.entry[next_hop]); + if (next_hop_list_is_any_connected(nextl)) { memcpy(&subtree_arg.ip, ip, LPM6_IPV6_ADDR_SIZE); lpm6_subtree_walk( @@ -1773,8 +1270,8 @@ route_change_link_neigh(struct vrf *vrf, struct lpm6 *lpm, &subtree_arg); } else if (lpm6_find_cover(lpm, ip, depth, (uint8_t *)&cover_ip, &cover_depth, &cover_idx) == 0) { - cover_nextu = rcu_dereference(nh6_tbl.entry[cover_idx]); - if (nextu6_is_any_connected(cover_nextu)) { + cover_nextl = rcu_dereference(nh6_tbl.entry[cover_idx]); + if (next_hop_list_is_any_connected(cover_nextl)) { memcpy(&subtree_arg.ip, ip, LPM6_IPV6_ADDR_SIZE); lpm6_subtree_walk( @@ -1785,10 +1282,10 @@ route_change_link_neigh(struct vrf *vrf, struct lpm6 *lpm, } /* Walk all the interface neigh entries to do /128 processing */ - array = rcu_dereference(nextu->siblings); - for (i = 0; i < nextu->nsiblings; i++) { - const struct next_hop_v6 *next = array + i; - const struct ifnet *ifp = nh6_get_ifp(next); + array = rcu_dereference(nextl->siblings); + for (i = 0; i < nextl->nsiblings; i++) { + const struct next_hop *next = array + i; + const struct ifnet *ifp = dp_nh_get_ifp(next); if (!ifp) /* happens for local routes */ @@ -1812,7 +1309,7 @@ route_delete_unlink_neigh(struct vrf *vrf, struct lpm6 *lpm, uint32_t table_id, const uint8_t *ip, uint8_t depth) { - const struct next_hop_v6_u *nextu; + const struct next_hop_list *nextl; uint32_t nh_idx; struct subtree_walk_arg subtree_arg = { .vrf = vrf, @@ -1838,8 +1335,8 @@ route_delete_unlink_neigh(struct vrf *vrf, struct lpm6 *lpm, if (lpm6_lookup_exact(lpm, ip, depth, &nh_idx)) return; - nextu = rcu_dereference(nh6_tbl.entry[nh_idx]); - if (nextu6_is_any_connected(nextu)) { + nextl = rcu_dereference(nh6_tbl.entry[nh_idx]); + if (next_hop_list_is_any_connected(nextl)) { memcpy(&subtree_arg.ip, ip, LPM6_IPV6_ADDR_SIZE); subtree_walk_route_cleanup_cb(lpm, (uint8_t *)ip, depth, nh_idx, @@ -1849,11 +1346,11 @@ route_delete_unlink_neigh(struct vrf *vrf, struct lpm6 *lpm, &subtree_arg); } else if (lpm6_find_cover(lpm, ip, depth, cover_ip, &cover_depth, &cover_idx) == 0) { - const struct next_hop_v6_u *cover_nextu; + const struct next_hop_list *cover_nextl; - cover_nextu = rcu_dereference( + cover_nextl = rcu_dereference( nh6_tbl.entry[cover_idx]); - if (nextu6_is_any_connected(cover_nextu)) { + if (next_hop_list_is_any_connected(cover_nextl)) { memcpy(&subtree_arg.ip, ip, LPM6_IPV6_ADDR_SIZE); subtree_walk_route_cleanup_cb(lpm, (uint8_t *)ip, @@ -1872,11 +1369,11 @@ route_delete_unlink_neigh(struct vrf *vrf, struct lpm6 *lpm, static void route_delete_relink_neigh(struct lpm6 *lpm, uint8_t *ip, uint8_t depth) { - const struct next_hop_v6_u *nextu; + const struct next_hop_list *nextl; uint8_t cover_ip[LPM6_IPV6_ADDR_SIZE]; uint8_t cover_depth; uint32_t cover_nh_idx; - const struct next_hop_v6 *array; + const struct next_hop *array; uint32_t i; /* @@ -1889,16 +1386,16 @@ route_delete_relink_neigh(struct lpm6 *lpm, uint8_t *ip, uint8_t depth) } /* Walk all the interfaces neigh entries to do /128 processing */ - nextu = rcu_dereference(nh6_tbl.entry[cover_nh_idx]); - array = rcu_dereference(nextu->siblings); - for (i = 0; i < nextu->nsiblings; i++) { - const struct next_hop_v6 *next = array + i; - const struct ifnet *ifp = nh6_get_ifp(next); + nextl = rcu_dereference(nh6_tbl.entry[cover_nh_idx]); + array = rcu_dereference(nextl->siblings); + for (i = 0; i < nextl->nsiblings; i++) { + const struct next_hop *next = array + i; + const struct ifnet *ifp = dp_nh_get_ifp(next); if (!ifp) /* happens for local routes */ continue; - if (nh6_is_connected(next)) + if (nh_is_connected(next)) lltable_walk(ifp->if_lltable6, lle_routing_insert_neigh_cb, NULL); } @@ -1907,9 +1404,9 @@ route_delete_relink_neigh(struct lpm6 *lpm, uint8_t *ip, uint8_t depth) walk_nh6s_for_route6_change(routing_neigh_add_gw_nh_replace_cb); } -static int rt6_delete(vrfid_t vrf_id, const struct in6_addr *dst, - uint8_t prefix_len, uint32_t id, uint16_t scope, - bool is_local) +int rt6_delete(vrfid_t vrf_id, const struct in6_addr *dst, + uint8_t prefix_len, uint32_t id, uint16_t scope, + bool is_local) { struct lpm6 *lpm; uint32_t id_in = id; @@ -1958,25 +1455,17 @@ static int rt6_delete(vrfid_t vrf_id, const struct in6_addr *dst, if (!err) { route_delete_unlink_neigh(vrf, lpm, id, dst->s6_addr, prefix_len); - err = route_lpm6_delete(vrf->v_id, lpm, dst, prefix_len, - &index, scope); + err = route_lpm6_delete(vrf->v_id, vrf->v_fal_obj, lpm, dst, + prefix_len, &index, scope); if (err >= 0) { /* A delete now always gets rid of all NHs */ - nexthop6_put(index); + nexthop_put(AF_INET6, index); route_delete_relink_neigh(lpm, (uint8_t *)dst, prefix_len); } } pthread_mutex_unlock(&route6_mutex); - /*unlock the VRF if this is the last route in this LPM*/ - if (rt6_lpm_is_empty(lpm)) { - if (!is_nondefault_vrf(vrf->v_id)) - vrf_delete_by_ptr(vrf); - else - vrf_delete(VRF_DEFAULT_ID); - } - if (err) /* * Expected now we get all deletes from RIB and still act on @@ -1996,7 +1485,7 @@ static int rt6_delete(vrfid_t vrf_id, const struct in6_addr *dst, static int rt6_insert(struct vrf *vrf, struct lpm6 *lpm, uint32_t table_id, const struct in6_addr *dst, uint8_t prefix_len, - int16_t scope, struct next_hop_v6 hops[], + int16_t scope, struct next_hop hops[], size_t size, uint32_t *idx, bool replace) { char b[INET6_ADDRSTRLEN]; @@ -2025,7 +1514,8 @@ static int rt6_insert(struct vrf *vrf, struct lpm6 *lpm, if (hops[i].flags & RTF_GATEWAY) continue; - hops[i].gateway = *dst; + hops[i].gateway.address.ip_v6 = *dst; + hops[i].gateway.type = AF_INET6; } err = route_nexthop6_new(hops, size, idx); @@ -2037,21 +1527,20 @@ static int rt6_insert(struct vrf *vrf, struct lpm6 *lpm, route_delete_unlink_neigh(vrf, lpm, table_id, dst->s6_addr, prefix_len); if (replace) - err = route_lpm6_update(vrf->v_id, lpm, dst, prefix_len, - &old_index, *idx, scope, table_id, - hops, size, - nh6_tbl.entry[*idx]->nhg_fal_obj); + err = route_lpm6_update(vrf->v_id, vrf->v_fal_obj, lpm, dst, + prefix_len, &old_index, *idx, scope, + table_id); else - err = route_lpm6_add(vrf->v_id, lpm, dst, prefix_len, *idx, - scope, table_id); + err = route_lpm6_add(vrf->v_id, vrf->v_fal_obj, lpm, dst, + prefix_len, *idx, scope, table_id); if (err < 0) { RTE_LOG(ERR, ROUTE, "route insert %s/%u scope %u failed (%d)\n", inet_ntop(AF_INET6, dst, b, sizeof(b)), prefix_len, scope, err); - nexthop6_put(*idx); + nexthop_put(AF_INET6, *idx); } else { if (replace) - nexthop6_put(old_index); + nexthop_put(AF_INET6, old_index); route_change_link_neigh(vrf, lpm, table_id, dst->s6_addr, prefix_len, *idx, scope); DP_DEBUG(ROUTE, INFO, ROUTE, @@ -2064,9 +1553,9 @@ static int rt6_insert(struct vrf *vrf, struct lpm6 *lpm, } /* Add, replace or append a new entry */ -static int rt6_add(vrfid_t vrf_id, struct in6_addr *dst, uint32_t prefix_len, - uint32_t table, int16_t scope, struct next_hop_v6 hops[], - size_t size) +int rt6_add(vrfid_t vrf_id, struct in6_addr *dst, uint32_t prefix_len, + uint32_t table, int16_t scope, struct next_hop hops[], + size_t size) { struct lpm6 *lpm; struct vrf *vrf = NULL; @@ -2086,35 +1575,15 @@ static int rt6_add(vrfid_t vrf_id, struct in6_addr *dst, uint32_t prefix_len, return -ENOENT; vrf = vrf_get_rcu(vrf_id); - lpm = vrf ? rt6_get_lpm(&vrf->v_rt6_head, table) : NULL; + if (!vrf) + return -ENOENT; + lpm = rt6_get_lpm(&vrf->v_rt6_head, table); if (lpm == NULL) { - if (is_nondefault_vrf(vrf_id)) { - /* - * Should have a VRF by this point since we - * needed it to find the VRF ID from the table ID - */ - if (!vrf) - return -ENOENT; - /* Add refcount on default VRF */ - if (!vrf_find_or_create(VRF_DEFAULT_ID)) - return -ENOENT; - } else { - vrf = vrf_find_or_create(vrf_id); - if (vrf == NULL) - return -ENOENT; - } - lpm = rt6_create_lpm(table, vrf); if (lpm == NULL) { err_code = -ENOENT; goto err; } - } else if (rt6_lpm_is_empty(lpm)) { - /* incr ref count when first route is added*/ - if (!is_nondefault_vrf(vrf->v_id)) - vrf = vrf_find_or_create(vrf_id); - else if (!vrf_find_or_create(VRF_DEFAULT_ID)) - return -ENOENT; } pthread_mutex_lock(&route6_mutex); @@ -2131,26 +1600,17 @@ static int rt6_add(vrfid_t vrf_id, struct in6_addr *dst, uint32_t prefix_len, goto err; return 0; -err: - if (vrf && (lpm == NULL || (lpm && rt6_lpm_is_empty(lpm)))) { - if (!is_nondefault_vrf(vrf->v_id)) - vrf_delete_by_ptr(vrf); - else - vrf_delete(VRF_DEFAULT_ID); - } +err: return err_code; } /* Gleaner for the next hop */ -static void flush6_cleanup(const uint8_t *prefix __rte_unused, - uint32_t pr_len __rte_unused, - int16_t scope __rte_unused, - uint32_t next_hop, - struct pd_obj_state_and_flags pd_state __rte_unused, +static void flush6_cleanup(struct lpm6_walk_params *params, + struct pd_obj_state_and_flags *pd_state __rte_unused, void *arg __rte_unused) { - nexthop6_put(next_hop); + nexthop_put(AF_INET6, params->next_hop); } static void rt6_flush(struct vrf *vrf) @@ -2158,14 +1618,13 @@ static void rt6_flush(struct vrf *vrf) unsigned int id; struct route6_head rt6_head = vrf->v_rt6_head; + if (vrf->v_id == VRF_INVALID_ID) + return; + pthread_mutex_lock(&route6_mutex); for (id = 0; id < rt6_head. rt6_rtm_max; id++) { struct lpm6 *lpm = rt6_head.rt6_table[id]; - if (is_nondefault_vrf(vrf->v_id) && - id != RT_TABLE_UNSPEC) - continue; - if (lpm != NULL && !rt6_lpm_is_empty(lpm)) { lpm6_delete_all(lpm, flush6_cleanup, NULL); if (!rt6_lpm_add_reserved_routes(lpm, vrf)) { @@ -2173,10 +1632,6 @@ static void rt6_flush(struct vrf *vrf) "Failed to replace v6 reserved routes %s\n", vrf->v_name); } - if (!is_nondefault_vrf(vrf->v_id)) - vrf_delete_by_ptr(vrf); - else - vrf_delete(VRF_DEFAULT_ID); } } pthread_mutex_unlock(&route6_mutex); @@ -2196,23 +1651,47 @@ void rt6_flush_all(enum cont_src_en cont_src) } -void rt6_print_nexthop(json_writer_t *json, uint32_t next_hop) +void rt6_print_nexthop(json_writer_t *json, uint32_t next_hop, + enum rt_print_nexthop_verbosity v) { - const struct next_hop_v6_u *nextu = + const struct next_hop_list *nextl = rcu_dereference(nh6_tbl.entry[next_hop]); - const struct next_hop_v6 *array; + const struct next_hop *array; unsigned int i, j; - + const char *use_str = NULL; jsonw_uint_field(json, "nh_index", next_hop); - if (unlikely(!nextu)) + if (unlikely(!nextl)) return; - array = rcu_dereference(nextu->siblings); - jsonw_uint_field(json, "nh_refcount", nextu->refcount); + array = rcu_dereference(nextl->siblings); + jsonw_uint_field(json, "nh_refcount", nextl->refcount); + switch (nextl->use) { + case FAL_NHG_USE_IP: + use_str = "ip"; + break; + case FAL_NHG_USE_MPLS_LABEL_SWITCH: + use_str = "mpls-lswitch"; + break; + } + if (use_str) + jsonw_string_field(json, "use", use_str); + if (v == RT_PRINT_NH_DETAIL && + fal_state_is_obj_present(nextl->pd_state)) { + /* + * name disambuigates between next-hop-group state + * and possible future route state given we don't have a + * separate JSON object for the two. + */ + jsonw_name(json, "nhg_platform_state"); + jsonw_start_object(json); + fal_ip_dump_next_hop_group(nextl->nhg_fal_obj, json); + jsonw_end_object(json); + } + nexthop_map_display(nextl, json); jsonw_name(json, "next_hop"); jsonw_start_array(json); - for (i = 0; i < nextu->nsiblings; i++) { - const struct next_hop_v6 *next = array + i; + for (i = 0; i < nextl->nsiblings; i++) { + const struct next_hop *next = array + i; const struct ifnet *ifp; jsonw_start_object(json); @@ -2233,12 +1712,15 @@ void rt6_print_nexthop(json_writer_t *json, uint32_t next_hop) jsonw_string_field(json, "state", "gateway"); - if (IN6_IS_ADDR_V4MAPPED(&next->gateway)) { - v4nhop = V4MAPPED_IPV6_TO_IPV4(next->gateway); + if (IN6_IS_ADDR_V4MAPPED( + &next->gateway.address.ip_v6)) { + v4nhop = V4MAPPED_IPV6_TO_IPV4( + next->gateway.address.ip_v6); nhop = inet_ntop(AF_INET, &v4nhop, b1, sizeof(b1)); } else { - nhop = inet_ntop(AF_INET6, &next->gateway, + nhop = inet_ntop(AF_INET6, + &next->gateway.address.ip_v6, b1, sizeof(b1)); } jsonw_string_field(json, "via", nhop); @@ -2249,8 +1731,10 @@ void rt6_print_nexthop(json_writer_t *json, uint32_t next_hop) jsonw_bool_field(json, "neigh_present", true); if (next->flags & RTF_NEIGH_CREATED) jsonw_bool_field(json, "neigh_created", true); + if (next->flags & RTF_BACKUP) + jsonw_bool_field(json, "backup", true); - ifp = nh6_get_ifp(next); + ifp = dp_nh_get_ifp(next); if (ifp && !(next->flags & RTF_DEAD)) jsonw_string_field(json, "ifname", ifp->if_name); @@ -2266,6 +1750,19 @@ void rt6_print_nexthop(json_writer_t *json, uint32_t next_hop) jsonw_end_array(json); } + /* + * FAL may access hardware which may be slow or may otherwise + * increase the data returned greatly, so only output this + * information if requested. + */ + if (v == RT_PRINT_NH_DETAIL && + fal_state_is_obj_present(nextl->pd_state)) { + jsonw_name(json, "platform_state"); + jsonw_start_object(json); + fal_ip_dump_next_hop(nextl->nh_fal_obj[i], json); + jsonw_end_object(json); + } + jsonw_end_object(json); } jsonw_end_array(json); @@ -2273,7 +1770,7 @@ void rt6_print_nexthop(json_writer_t *json, uint32_t next_hop) static void __rt6_display(json_writer_t *json, const uint8_t *addr, uint32_t prefix_len, int16_t scope, - const struct next_hop_v6_u *nextu __unused, + const struct next_hop_list *nextl __unused, uint32_t next_hop) { char b1[INET6_ADDRSTRLEN]; @@ -2286,7 +1783,7 @@ static void __rt6_display(json_writer_t *json, const uint8_t *addr, jsonw_string_field(json, "prefix", buf); jsonw_uint_field(json, "scope", scope); - rt6_print_nexthop(json, next_hop); + rt6_print_nexthop(json, next_hop, RT_PRINT_NH_BRIEF); jsonw_end_object(json); } @@ -2294,67 +1791,66 @@ static void __rt6_display(json_writer_t *json, const uint8_t *addr, /* * Walk FIB table. */ -static void rt6_display(const uint8_t *addr, uint32_t prefix_len, int16_t scope, - uint32_t next_hop, - struct pd_obj_state_and_flags pd_state __rte_unused, +static void rt6_display(struct lpm6_walk_params *params, + struct pd_obj_state_and_flags *pd_state __rte_unused, void *arg) { json_writer_t *json = arg; - const struct next_hop_v6_u *nextu = - rcu_dereference(nh6_tbl.entry[next_hop]); - const struct next_hop_v6 *next; + const struct next_hop_list *nextl = + rcu_dereference(nh6_tbl.entry[params->next_hop]); + const struct next_hop *next; - if (unlikely(!nextu)) + if (unlikely(!nextl)) return; - next = rcu_dereference(nextu->siblings); + next = rcu_dereference(nextl->siblings); /* Filter local route being displayed */ if (next->flags & RTF_LOCAL) return; /* Don't show if any paths are NEIGH_CREATED. */ - if (nextu6_nc_count(nextu)) + if (next_hop_list_nc_count(nextl)) return; - if (rt6_is_reserved(addr, prefix_len, scope)) + if (rt6_is_reserved(params->prefix, params->pr_len, params->scope)) return; - __rt6_display(json, addr, prefix_len, scope, nextu, next_hop); + __rt6_display(json, params->prefix, params->pr_len, params->scope, + nextl, params->next_hop); } -static void rt6_display_all(const uint8_t *addr, uint32_t prefix_len, - int16_t scope, uint32_t next_hop, - struct pd_obj_state_and_flags pd_state __rte_unused, +static void rt6_display_all(struct lpm6_walk_params *params, + struct pd_obj_state_and_flags *pd_state __rte_unused, void *arg) { json_writer_t *json = arg; - const struct next_hop_v6_u *nextu = - rcu_dereference(nh6_tbl.entry[next_hop]); + const struct next_hop_list *nextl = + rcu_dereference(nh6_tbl.entry[params->next_hop]); - if (unlikely(!nextu)) + if (unlikely(!nextl)) return; - __rt6_display(json, addr, prefix_len, scope, nextu, next_hop); + __rt6_display(json, params->prefix, params->pr_len, params->scope, + nextl, params->next_hop); } static void rt6_local_display( - const uint8_t *addr, - uint32_t prefix_len, - int16_t scope, uint32_t next_hop, - struct pd_obj_state_and_flags pd_state __rte_unused, + struct lpm6_walk_params *params, + struct pd_obj_state_and_flags *pd_state __rte_unused, void *arg) { FILE *f = arg; char b1[INET6_ADDRSTRLEN]; - const struct next_hop_v6_u *nextu = - rcu_dereference(nh6_tbl.entry[next_hop]); - const struct next_hop_v6 *next; + const struct next_hop_list *nextl = + rcu_dereference(nh6_tbl.entry[params->next_hop]); + const struct next_hop *next; - if (unlikely(!nextu)) + if (unlikely(!nextl)) return; - next = rcu_dereference(nextu->siblings); + next = rcu_dereference(nextl->siblings); if (next->flags & RTF_LOCAL && - !rt6_is_reserved(addr, prefix_len, scope)) - fprintf(f, "\t%s\n", inet_ntop(AF_INET6, addr, b1, sizeof(b1))); + !rt6_is_reserved(params->prefix, params->pr_len, params->scope)) + fprintf(f, "\t%s\n", inet_ntop(AF_INET6, params->prefix, + b1, sizeof(b1))); } /* Route rule list (RB-tree) is not RCU safe */ @@ -2389,28 +1885,25 @@ struct rt6_vrf_lpm_walk_ctx { struct vrf *vrf; uint32_t table_id; void (*func)(struct vrf *vrf, uint32_t table_id, - const uint8_t *addr, uint32_t prefix_len, - int16_t scope, uint32_t next_hop, - struct pd_obj_state_and_flags pd_state, void *arg); + struct lpm6_walk_params *params, + struct pd_obj_state_and_flags *pd_state, void *arg); void *arg; }; -static void rt6_vrf_lpm_walk_cb(const uint8_t *addr, uint32_t prefix_len, - int16_t scope, uint32_t next_hop, - struct pd_obj_state_and_flags pd_state, +static void rt6_vrf_lpm_walk_cb(struct lpm6_walk_params *params, + struct pd_obj_state_and_flags *pd_state, void *arg) { const struct rt6_vrf_lpm_walk_ctx *ctx = arg; - ctx->func(ctx->vrf, ctx->table_id, addr, prefix_len, scope, next_hop, + ctx->func(ctx->vrf, ctx->table_id, params, pd_state, ctx->arg); } static void rt6_lpm_walk_util( void (*func)(struct vrf *vrf, uint32_t table_id, - const uint8_t *addr, uint32_t prefix_len, - int16_t scope, uint32_t next_hop, - struct pd_obj_state_and_flags pd_state, + struct lpm6_walk_params *params, + struct pd_obj_state_and_flags *pd_state, void *arg), void *arg) { @@ -2428,37 +1921,29 @@ static void rt6_lpm_walk_util( .arg = arg, }; - if (lpm && !rt6_lpm_is_empty(lpm)) { + if (lpm && !rt6_lpm_is_empty(lpm)) lpm6_walk_all_safe(lpm, rt6_vrf_lpm_walk_cb, &ctx); - if (rt6_lpm_is_empty(lpm)) { - if (!is_nondefault_vrf(vrf->v_id)) - vrf_delete_by_ptr(vrf); - else - vrf_delete(VRF_DEFAULT_ID); - } - } } } } static void rt6_if_dead(struct vrf *vrf, uint32_t table_id, - const uint8_t *addr, uint32_t prefix_len, - int16_t scope, uint32_t next_hop, - struct pd_obj_state_and_flags pd_state __rte_unused, + struct lpm6_walk_params *params, + struct pd_obj_state_and_flags *pd_state __rte_unused, void *arg) { - struct next_hop_v6_u *nextu = - rcu_dereference(nh6_tbl.entry[next_hop]); + struct next_hop_list *nextl = + rcu_dereference(nh6_tbl.entry[params->next_hop]); const struct ifnet *ifp = arg; unsigned int i, matches = 0; struct in6_addr inaddr; struct lpm6 *lpm; - for (i = 0; i < nextu->nsiblings; i++) { - struct next_hop_v6 *nh = nextu->siblings + i; + for (i = 0; i < nextl->nsiblings; i++) { + struct next_hop *nh = nextl->siblings + i; - if (nh6_get_ifp(nh) == ifp) { + if (dp_nh_get_ifp(nh) == ifp) { /* No longer check if connected, as kernel will not * signal explicitly for flushing */ @@ -2479,31 +1964,28 @@ static void rt6_if_dead(struct vrf *vrf, uint32_t table_id, * behaviour. */ lpm = rcu_dereference(vrf->v_rt6_head.rt6_table[table_id]); - memcpy(&inaddr.s6_addr, addr, sizeof(inaddr.s6_addr)); - route_lpm6_delete(vrf->v_id, lpm, &inaddr, - prefix_len, NULL, scope); - nexthop6_put(next_hop); + memcpy(&inaddr.s6_addr, ¶ms->prefix, sizeof(inaddr.s6_addr)); + route_lpm6_delete(vrf->v_id, vrf->v_fal_obj, lpm, &inaddr, + params->pr_len, NULL, params->scope); + nexthop_put(AF_INET6, params->next_hop); } static void rt6_if_clear_slowpath_flag( struct vrf *vrf __unused, uint32_t table_id __unused, - const uint8_t *addr __unused, - uint32_t prefix_len __unused, - int16_t scope __unused, - uint32_t next_hop, - struct pd_obj_state_and_flags pd_state __rte_unused, + struct lpm6_walk_params *params, + struct pd_obj_state_and_flags *pd_state __rte_unused, void *arg) { - const struct next_hop_v6_u *nextu = - rcu_dereference(nh6_tbl.entry[next_hop]); + const struct next_hop_list *nextl = + rcu_dereference(nh6_tbl.entry[params->next_hop]); const struct ifnet *ifp = arg; unsigned int i; - for (i = 0; i < nextu->nsiblings; i++) { - struct next_hop_v6 *nh = nextu->siblings + i; + for (i = 0; i < nextl->nsiblings; i++) { + struct next_hop *nh = nextl->siblings + i; - if (nh6_get_ifp(nh) == ifp) + if (dp_nh_get_ifp(nh) == ifp) nh->flags &= ~RTF_SLOWPATH; } } @@ -2511,21 +1993,19 @@ static void rt6_if_clear_slowpath_flag( static void rt6_if_set_slowpath_flag( struct vrf *vrf __unused, uint32_t table_id __unused, - const uint8_t *addr __unused, - uint32_t prefix_len __unused, - int16_t scope __unused, uint32_t next_hop, - struct pd_obj_state_and_flags pd_state __rte_unused, + struct lpm6_walk_params *params, + struct pd_obj_state_and_flags *pd_state __rte_unused, void *arg) { - const struct next_hop_v6_u *nextu = - rcu_dereference(nh6_tbl.entry[next_hop]); + const struct next_hop_list *nextl = + rcu_dereference(nh6_tbl.entry[params->next_hop]); const struct ifnet *ifp = arg; unsigned int i; - for (i = 0; i < nextu->nsiblings; i++) { - struct next_hop_v6 *nh = nextu->siblings + i; + for (i = 0; i < nextl->nsiblings; i++) { + struct next_hop *nh = nextl->siblings + i; - if (nh6_get_ifp(nh) == ifp) + if (dp_nh_get_ifp(nh) == ifp) nh->flags |= RTF_SLOWPATH; } } @@ -2618,29 +2098,27 @@ void rt6_local_show(struct route6_head *rt6_head, FILE *f) lpm6_walk_all_safe(lpm, rt6_local_display, f); } -static void rt6_summarize(const uint8_t *addr, - uint32_t prefix_len, int16_t scope, - uint32_t next_hop, - struct pd_obj_state_and_flags pd_state __rte_unused, +static void rt6_summarize(struct lpm6_walk_params *params, + struct pd_obj_state_and_flags *pd_state __rte_unused, void *arg __rte_unused) { - const struct next_hop_v6_u *nextu; - const struct next_hop_v6 *nh; + const struct next_hop_list *nextl; + const struct next_hop *nh; uint32_t *rt_used = arg; - nextu = rcu_dereference(nh6_tbl.entry[next_hop]); - if (unlikely(!nextu)) + nextl = rcu_dereference(nh6_tbl.entry[params->next_hop]); + if (unlikely(!nextl)) return; - nh = rcu_dereference(nextu->siblings); + nh = rcu_dereference(nextl->siblings); /* Filter local route being displayed */ if (nh->flags & RTF_LOCAL) return; - if (rt6_is_reserved(addr, prefix_len, scope)) + if (rt6_is_reserved(params->prefix, params->pr_len, params->scope)) return; - ++rt_used[prefix_len]; + ++rt_used[params->pr_len]; } static int rt6_stats(struct route6_head *rt6_head, json_writer_t *json, @@ -2677,7 +2155,7 @@ static int rt6_stats(struct route6_head *rt6_head, json_writer_t *json, jsonw_name(json, "nexthop"); jsonw_start_object(json); jsonw_uint_field(json, "used", nh6_tbl.in_use); - jsonw_uint_field(json, "free", NEXTHOP6_HASH_TBL_SIZE - nh6_tbl.in_use); + jsonw_uint_field(json, "free", NEXTHOP_HASH_TBL_SIZE - nh6_tbl.in_use); jsonw_uint_field(json, "neigh_present", nh6_tbl.neigh_present); jsonw_uint_field(json, "neigh_created", nh6_tbl.neigh_created); jsonw_end_object(json); @@ -2697,10 +2175,10 @@ static int rt6_stats(struct route6_head *rt6_head, json_writer_t *json, } static int rt6_show(struct route6_head *rt6_head, json_writer_t *json, - uint32_t tbl_id, const char *addr) + uint32_t tbl_id, const struct in6_addr *dst) { struct lpm6 *lpm6 = rt6_get_lpm(rt6_head, tbl_id); - struct in6_addr dst; + char b1[INET6_ADDRSTRLEN]; uint32_t next_hop; if (lpm6 == NULL) { @@ -2708,18 +2186,42 @@ static int rt6_show(struct route6_head *rt6_head, json_writer_t *json, return 0; } - if (inet_pton(AF_INET6, addr, &dst) == 0) { - RTE_LOG(ERR, ROUTE6, "invalid address\n"); - return -1; + jsonw_start_object(json); + jsonw_string_field(json, "address", + inet_ntop(AF_INET6, dst, b1, sizeof(b1))); + + if (lpm6_lookup(lpm6, dst->s6_addr, &next_hop) != 0) + jsonw_string_field(json, "state", "nomatch"); + else + rt6_print_nexthop(json, next_hop, RT_PRINT_NH_DETAIL); + jsonw_end_object(json); + + return 0; +} + +static int rt6_show_exact(struct route6_head *rt6_head, json_writer_t *json, + uint32_t tbl_id, const struct in6_addr *dst, + uint8_t plen) +{ + struct lpm6 *lpm6 = rt6_get_lpm(rt6_head, tbl_id); + char b2[INET6_ADDRSTRLEN + sizeof("/255")]; + char b1[INET6_ADDRSTRLEN]; + uint32_t next_hop; + + if (lpm6 == NULL) { + RTE_LOG(ERR, ROUTE6, "Unknown route table\n"); + return 0; } jsonw_start_object(json); - jsonw_string_field(json, "address", addr); + sprintf(b2, "%s/%u", + inet_ntop(AF_INET6, dst, b1, sizeof(b1)), plen); + jsonw_string_field(json, "prefix", b2); - if (lpm6_lookup(lpm6, dst.s6_addr, &next_hop) != 0) + if (lpm6_lookup_exact(lpm6, dst->s6_addr, plen, &next_hop) != 0) jsonw_string_field(json, "state", "nomatch"); else - rt6_print_nexthop(json, next_hop); + rt6_print_nexthop(json, next_hop, RT_PRINT_NH_DETAIL); jsonw_end_object(json); return 0; @@ -2737,12 +2239,6 @@ int cmd_route6(FILE *f, int argc, char **argv) argv += 2; } - vrf = vrf_get_rcu_from_external(vrf_id); - if (vrf == NULL) { - fprintf(f, "no vrf exist\n"); - return -1; - } - if (argc > 1 && strcmp(argv[1], "table") == 0) { if (argc == 2) { fprintf(f, "missing table id\n"); @@ -2762,6 +2258,21 @@ int cmd_route6(FILE *f, int argc, char **argv) argv += 2; } + if (vrf_is_vrf_table_id(tblid)) { + if (vrf_lookup_by_tableid(tblid, &vrf_id, &tblid) < 0) { + fprintf(f, "no vrf exists for table %u\n", tblid); + return -1; + } + vrf = vrf_get_rcu(vrf_id); + } else { + vrf = dp_vrf_get_rcu_from_external(vrf_id); + } + + if (vrf == NULL) { + fprintf(f, "no vrf exists\n"); + return -1; + } + json_writer_t *json = jsonw_new(f); int err = -1; @@ -2822,21 +2333,77 @@ int cmd_route6(FILE *f, int argc, char **argv) err = rt6_stats(&vrf->v_rt6_head, json, tblid); jsonw_end_object(json); } else if (strcmp(argv[1], "lookup") == 0) { + struct in6_addr in6; + long plen = -1; + if (argc == 2) { fprintf(f, "missing address\n"); goto error; } + if (inet_pton(AF_INET6, argv[2], &in6) == 0) { + RTE_LOG(ERR, ROUTE6, "invalid address\n"); + goto error; + } + + if (argc > 3) { + plen = strtol(argv[3], NULL, 10); + if (plen < 0 || plen > 128) { + fprintf(f, "invalid prefix length\n"); + goto error; + } + } + jsonw_name(json, "route6_lookup"); jsonw_start_array(json); - err = rt6_show(&vrf->v_rt6_head, json, tblid, argv[2]); + if (plen >= 0) + err = rt6_show_exact(&vrf->v_rt6_head, json, tblid, + &in6, plen); + else + err = rt6_show(&vrf->v_rt6_head, json, tblid, &in6); + jsonw_end_array(json); + } else if (strcmp(argv[1], "platform") == 0) { + + long cnt = UINT32_MAX; + + if (argc > 2) { + cnt = strtol(argv[2], NULL, 10); + if (cnt < 0 || cnt > UINT32_MAX) { + fprintf(f, "invalid count\n"); + goto error; + } + } + struct fal_attribute_t attr_list[] = { + { FAL_ROUTE_WALK_ATTR_VRFID, + .value.u32 = vrf_id }, + { FAL_ROUTE_WALK_ATTR_TABLEID, + .value.u32 = tblid }, + { FAL_ROUTE_WALK_ATTR_CNT, + .value.u32 = cnt }, + { FAL_ROUTE_WALK_ATTR_FAMILY, + .value.u32 = FAL_IP_ADDR_FAMILY_IPV6 }, + { FAL_ROUTE_WALK_ATTR_TYPE, + .value.u32 = FAL_ROUTE_WALK_TYPE_ALL }, + }; + + jsonw_name(json, "route6_platform_show"); + + jsonw_start_array(json); + + err = fal_ip_walk_routes(rt_show_platform_routes, + RTE_DIM(attr_list), + attr_list, json); jsonw_end_array(json); + + /*TODO For scale, get_next from a prefix can be added */ + } else { fprintf(f, "Usage: route6 [vrf_id ID] [table N] [show]\n" " route6 [vrf_id ID] [table N] all\n" " route6 [vrf_id ID] [table N] summary\n" - " route6 [vrf_id ID] [table N] lookup ADDR\n"); + " route6 [vrf_id ID] [table N] lookup ADDR \n" + " route6 [vrf_id ID] [table N] platform [cnt]\n"); } error: @@ -2844,105 +2411,10 @@ int cmd_route6(FILE *f, int argc, char **argv) return err; } -int handle_route6(vrfid_t vrf_id, uint16_t type, const struct rtmsg *rtm, - uint32_t table, const void *dest, const void *gateway, - unsigned int ifindex, uint8_t scope, struct nlattr *mpath, - uint32_t nl_flags, uint16_t num_labels, label_t *labels) -{ - uint32_t depth = rtm->rtm_dst_len; - struct in6_addr dst = *(const struct in6_addr *)dest; - struct ifnet *ifp = ifnet_byifindex(ifindex); - struct in6_addr gw = *(struct in6_addr *)gateway; - struct next_hop_v6 *next; - uint32_t size; - uint32_t flags = 0; - bool missing_ifp = false; - bool exp_ifp = true; - - if (rtm->rtm_type != RTN_UNICAST && - rtm->rtm_type != RTN_LOCAL && - rtm->rtm_type != RTN_BLACKHOLE && - rtm->rtm_type != RTN_UNREACHABLE) - return 0; - - if (rtm->rtm_family != AF_INET6) - return 0; - - if (IN6_IS_ADDR_LOOPBACK(&dst)) - return 0; - - if (IN6_IS_ADDR_UNSPEC_LINKLOCAL(&dst)) - return 0; - - /* - * If LOCAL unicast then ensure we replace any connected - * /128 which may have preceded it unless it's linklocal - * which need not be unique. - * Also ignore any ff00::/8 summary routes for multicast. - */ - if (rtm->rtm_type == RTN_LOCAL) { - if (!IN6_IS_ADDR_LINKLOCAL(&dst)) - nl_flags |= NLM_F_REPLACE; - } else if (rtm->rtm_type == RTN_UNICAST && - IN6_IS_ADDR_MULTICAST(&dst) && depth == 8) { - return 0; - } - - if (type == RTM_NEWROUTE) { - if (rtm->rtm_type == RTN_BLACKHOLE) { - flags |= RTF_BLACKHOLE; - exp_ifp = false; - } else if (rtm->rtm_type == RTN_UNREACHABLE) { - flags |= RTF_REJECT; - exp_ifp = false; - } else if (rtm->rtm_type == RTN_LOCAL) { - flags |= RTF_LOCAL; - /* no need to store ifp for local routes */ - ifp = NULL; - exp_ifp = false; - } else if ((num_labels == 0) && - (!ifp || is_lo(ifp))) { - flags |= RTF_SLOWPATH; - } - - if (num_labels > 0 && !is_lo(ifp)) - /* Output label rather than local label */ - flags |= RTF_OUTLABEL; - - if (!(nl_flags & NL_FLAG_ANY_ADDR)) - flags |= RTF_GATEWAY; - - if (mpath) { - next = ecmp6_create(mpath, &size, &missing_ifp); - if (missing_ifp) - return -1; - } else { - if (exp_ifp && !ifp && !is_ignored_interface(ifindex)) - return -1; - size = 1; - next = nexthop6_create(ifp, &gw, flags, num_labels, - labels); - } - - if (unlikely(!next)) - return 0; - - rcu_read_unlock(); - rt6_add(vrf_id, &dst, depth, table, scope, next, size); - rcu_read_lock(); - free(next); - } else if (type == RTM_DELROUTE) { - rt6_delete(vrf_id, &dst, depth, table, scope, - rtm->rtm_type == RTN_LOCAL); - } - - return 0; -} - /* * Get egress interface for destination address. * - * Must only be used on master thread. + * Must only be used on main thread. * Note for multipath routes, the first interface is always returned. */ struct ifnet *nhif_dst_lookup6(const struct vrf *vrf, @@ -2950,25 +2422,25 @@ struct ifnet *nhif_dst_lookup6(const struct vrf *vrf, bool *connected) { struct ifnet *ifp; - const struct next_hop_v6_u *nextu; - const struct next_hop_v6 *next; + const struct next_hop_list *nextl; + const struct next_hop *next; uint32_t nhindex; if (lpm6_lookup(vrf->v_rt6_head.rt6_table[RT_TABLE_MAIN], dst->s6_addr, &nhindex) != 0) return NULL; - nextu = nh6_tbl.entry[nhindex]; - if (nextu == NULL) + nextl = nh6_tbl.entry[nhindex]; + if (nextl == NULL) return NULL; - next = nextu->siblings; + next = nextl->siblings; if (next == NULL) return NULL; - ifp = nh6_get_ifp(next); + ifp = dp_nh_get_ifp(next); if (ifp && connected) - *connected = nh6_is_connected(next); + *connected = nh_is_connected(next); return ifp; } @@ -2978,50 +2450,52 @@ route6_create_neigh(struct vrf *vrf, struct lpm6 *lpm, uint32_t table_id, struct in6_addr *ip, struct llentry *lle) { - struct next_hop_v6_u *nextu; + struct next_hop_list *nextl; uint32_t nh_idx; - struct next_hop_v6 *nh; - struct next_hop_v6 *cover_nh; + struct next_hop *nh; + struct next_hop *cover_nh; struct ifnet *ifp = rcu_dereference(lle->ifp); int sibling; int size; if (lpm6_lookup(lpm, ip->s6_addr, &nh_idx) == 0) { - nextu = rcu_dereference(nh6_tbl.entry[nh_idx]); + nextl = rcu_dereference(nh6_tbl.entry[nh_idx]); /* * Note that this does not support a connected with multiple * paths that use the same ifp. */ - cover_nh = nextu6_find_path_using_ifp(nextu, ifp, &sibling); - if (cover_nh && nh6_is_connected(cover_nh)) { + cover_nh = next_hop_list_find_path_using_ifp(nextl, ifp, + &sibling); + if (cover_nh && nh_is_connected(cover_nh)) { /* * Have a connected cover so create a new entry for * this. Will only be 1 NEIGH_CREATED path, but * need to inherit other paths from the cover. */ - nh = nexthop6_create_copy(nextu, &size); + nh = next_hop_list_copy_next_hops(nextl, &size); if (!nh) return; /* * Set the correct NH to be NEIGH_CREATED. As this - * is copied from the cover nextu, the sibling gives + * is copied from the cover nextl, the sibling gives * the NH for the correct interface */ - nh6_set_neigh_created(&nh[sibling], lle); + nh_set_neigh_created(AF_INET6, &nh[sibling], lle); /* * This is a /128 we are creating, therefore not a GW. * Set the GW (but not the flag) so that we do not * share with non /128 routes such as the connected * cover. */ - nh[sibling].gateway = *ip; + nh[sibling].gateway.address.ip_v6 = *ip; + nh[sibling].gateway.type = AF_INET6; if (route_nexthop6_new(nh, size, &nh_idx) < 0) { free(nh); return; } - route_lpm6_add(vrf->v_id, lpm, ip, 128, + route_lpm6_add(vrf->v_id, vrf->v_fal_obj, lpm, ip, 128, nh_idx, RT_SCOPE_LINK, table_id); free(nh); } @@ -3032,7 +2506,7 @@ route6_create_neigh(struct vrf *vrf, struct lpm6 *lpm, * On an arp del, should we clear NEIGH_PRESENT from this NH. */ static enum nh_change -routing_neigh_del_gw_nh_replace_cb(struct next_hop_v6 *next, +routing_neigh_del_gw_nh_replace_cb(struct next_hop *next, int sibling __unused, void *arg) { @@ -3040,12 +2514,12 @@ routing_neigh_del_gw_nh_replace_cb(struct next_hop_v6 *next, struct in6_addr *ip = ll_ipv6_addr(lle); struct ifnet *ifp = rcu_dereference(lle->ifp); - if (!nh6_is_gw(next) || !IN6_ARE_ADDR_EQUAL(&next->gateway, - &ip->s6_addr)) + if (!nh_is_gw(next) || !IN6_ARE_ADDR_EQUAL(&next->gateway.address.ip_v6, + &ip->s6_addr)) return NH_NO_CHANGE; - if (nh6_get_ifp(next) != ifp) + if (dp_nh_get_ifp(next) != ifp) return NH_NO_CHANGE; - if (nh6_is_local(next) || !nh6_is_neigh_present(next)) + if (nh_is_local(next) || !nh_is_neigh_present(next)) return NH_NO_CHANGE; return NH_CLEAR_NEIGH_PRESENT; @@ -3054,19 +2528,19 @@ routing_neigh_del_gw_nh_replace_cb(struct next_hop_v6 *next, static void walk_nhs_for_neigh_change(struct llentry *lle, enum nh_change (*upd_neigh_present_cb)( - struct next_hop_v6 *next, + struct next_hop *next, int sibling, void *arg)) { - struct next_hop_v6_u *nhu; + struct next_hop_list *nhl; struct cds_lfht_iter iter; struct cds_lfht_node *node; - ASSERT_MASTER(); + ASSERT_MAIN(); cds_lfht_for_each(nexthop6_hash, &iter, node) { - nhu = caa_container_of(node, struct next_hop_v6_u, nh_node); - route6_nh_replace(nhu, nhu->index, lle, NULL, + nhl = caa_container_of(node, struct next_hop_list, nh_node); + route6_nh_replace(AF_INET6, nhl, nhl->index, lle, NULL, upd_neigh_present_cb, lle); } } @@ -3084,18 +2558,18 @@ struct neigh_add_nh_replace_arg { * it only exists due to the neigh entry, so this hop can become NEIGH_CREATED * too. */ -static enum nh_change routing_neigh_add_nh_replace_cb(struct next_hop_v6 *next, +static enum nh_change routing_neigh_add_nh_replace_cb(struct next_hop *next, int sibling __unused, void *arg) { struct neigh_add_nh_replace_arg *args = arg; - if (!nh6_is_connected(next)) + if (!nh_is_connected(next)) return NH_NO_CHANGE; - if (nh6_is_neigh_present(next) || nh6_is_neigh_created(next)) + if (nh_is_neigh_present(next) || nh_is_neigh_created(next)) return NH_NO_CHANGE; - if (args->ifp != nh6_get_ifp(next)) + if (args->ifp != dp_nh_get_ifp(next)) return NH_NO_CHANGE; if (args->count) @@ -3107,22 +2581,22 @@ static enum nh_change routing_neigh_add_nh_replace_cb(struct next_hop_v6 *next, /* * On a neigh del NEIGH_PRESENT from this NH. */ -static enum nh_change routing_neigh_del_nh_replace_cb(struct next_hop_v6 *next, +static enum nh_change routing_neigh_del_nh_replace_cb(struct next_hop *next, int sibling __unused, void *arg) { struct ifnet *ifp = arg; - if (!nh6_is_connected(next) || !nh6_is_neigh_present(next)) + if (!nh_is_connected(next) || !nh_is_neigh_present(next)) return NH_NO_CHANGE; - if (ifp != nh6_get_ifp(next)) + if (ifp != dp_nh_get_ifp(next)) return NH_NO_CHANGE; return NH_CLEAR_NEIGH_PRESENT; } struct neigh_remove_purge_arg { - int count; /* Count of number of NEIGH_CREATED in parent nextu */ + int count; /* Count of number of NEIGH_CREATED in parent nextl */ int sibling; /* Sibling that had the arp entry removed */ }; @@ -3135,7 +2609,7 @@ struct neigh_remove_purge_arg { * NEIGH_CREATED removed and revert back to inheriting from the cover. */ static enum nh_change -neigh_removal_nh_purge_cb(struct next_hop_v6 *next __unused, +neigh_removal_nh_purge_cb(struct next_hop *next __unused, int sibling, void *arg) { @@ -3144,8 +2618,7 @@ neigh_removal_nh_purge_cb(struct next_hop_v6 *next __unused, if (sibling == args->sibling) { if (args->count > 1) return NH_CLEAR_NEIGH_CREATED; - else - return NH_DELETE; + return NH_DELETE; } if (args->count > 1) @@ -3159,17 +2632,17 @@ void routing6_insert_neigh_safe(struct llentry *lle, bool neigh_change) struct in6_addr *ip = ll_ipv6_addr(lle); struct vrf *vrf = get_vrf(if_vrfid(lle->ifp)); struct lpm6 *lpm; - struct next_hop_v6_u *nextu; + struct next_hop_list *nextl; uint32_t nh_idx; struct ifnet *ifp = rcu_dereference(lle->ifp); - struct next_hop_v6 *nh; + struct next_hop *nh; int sibling; lpm = rcu_dereference(vrf->v_rt6_head.rt6_table[RT_TABLE_MAIN]); pthread_mutex_lock(&route6_mutex); if (lpm6_lookup_exact(lpm, ip->s6_addr, 128, &nh_idx) == 0) { /* We already have a /128 so add the shortcut if connected */ - nextu = rcu_dereference(nh6_tbl.entry[nh_idx]); + nextl = rcu_dereference(nh6_tbl.entry[nh_idx]); /* * Do we already have a nh for this interface? @@ -3178,14 +2651,14 @@ void routing6_insert_neigh_safe(struct llentry *lle, bool neigh_change) * modify the set of NHs, to reflect the ones the * cover has. */ - nh = nextu6_find_path_using_ifp(nextu, ifp, &sibling); + nh = next_hop_list_find_path_using_ifp(nextl, ifp, &sibling); if (nh) { struct neigh_add_nh_replace_arg arg = { .ifp = ifp, - .count = nextu6_nc_count(nextu), + .count = next_hop_list_nc_count(nextl), }; - route6_nh_replace(nextu, nh_idx, lle, NULL, + route6_nh_replace(AF_INET6, nextl, nh_idx, lle, NULL, routing_neigh_add_nh_replace_cb, &arg); } @@ -3216,52 +2689,58 @@ void routing6_remove_neigh_safe(struct llentry *lle) struct in6_addr *ip = ll_ipv6_addr(lle); struct vrf *vrf = get_vrf(if_vrfid(lle->ifp)); struct lpm6 *lpm; - struct next_hop_v6_u *nextu; + struct next_hop_list *nextl; uint32_t nh_idx; struct ifnet *ifp = rcu_dereference(lle->ifp); int sibling; - struct next_hop_v6 *nh; + struct next_hop *nh; lpm = rcu_dereference(vrf->v_rt6_head.rt6_table[RT_TABLE_MAIN]); pthread_mutex_lock(&route6_mutex); if (lpm6_lookup_exact(lpm, ip->s6_addr, 128, &nh_idx) == 0) { /* We have a /128 so unlink the arp (if there) */ - nextu = rcu_dereference(nh6_tbl.entry[nh_idx]); + nextl = rcu_dereference(nh6_tbl.entry[nh_idx]); + if (unlikely(!nextl)) + goto unlock; /* Do we already have a nh for this interface? */ - nh = nextu6_find_path_using_ifp(nextu, ifp, &sibling); - if (nh && nh6_is_neigh_created(nh)) { + nh = next_hop_list_find_path_using_ifp(nextl, ifp, &sibling); + if (nh && nh_is_neigh_created(nh)) { /* Are we removing a path or the entire NH */ - if (nextu->nsiblings == 1) { - route_lpm6_delete(vrf->v_id, lpm, ip, 128, - &nh_idx, RT_SCOPE_LINK); - nexthop6_put(nh_idx); + if (nextl->nsiblings == 1) { + route_lpm6_delete(vrf->v_id, vrf->v_fal_obj, + lpm, ip, 128, + &nh_idx, RT_SCOPE_LINK); + nexthop_put(AF_INET6, nh_idx); } else { struct neigh_remove_purge_arg args = { - .count = nextu6_nc_count(nextu), + .count = next_hop_list_nc_count(nextl), .sibling = sibling, }; uint32_t del; uint32_t new_nh_idx; del = route6_nh_replace( - nextu, nh_idx, lle, + AF_INET6, + nextl, nh_idx, lle, &new_nh_idx, neigh_removal_nh_purge_cb, &args); /* Can not delete a subset of paths here */ - if (del == nextu->nsiblings) { - route_lpm6_delete(vrf->v_id, lpm, - ip, 128, &nh_idx, - RT_SCOPE_LINK); - nexthop6_put(nh_idx); + if (del == nextl->nsiblings) { + route_lpm6_delete(vrf->v_id, + vrf->v_fal_obj, lpm, + ip, 128, &nh_idx, + RT_SCOPE_LINK); + nexthop_put(AF_INET6, nh_idx); } } } else { - route6_nh_replace(nextu, nh_idx, NULL, NULL, + route6_nh_replace(AF_INET6, nextl, nh_idx, NULL, NULL, routing_neigh_del_nh_replace_cb, ifp); } } +unlock: pthread_mutex_unlock(&route6_mutex); /* @@ -3289,9 +2768,8 @@ struct rt6_show_subset { }; static void rt6_show_subset(struct vrf *vrf, uint32_t tableid, - const uint8_t *ip, uint32_t depth, int16_t scope, - uint32_t idx, - struct pd_obj_state_and_flags pd_state, + struct lpm6_walk_params *params, + struct pd_obj_state_and_flags *pd_state, void *arg) { struct rt6_show_subset *subset = arg; @@ -3300,15 +2778,14 @@ static void rt6_show_subset(struct vrf *vrf, uint32_t tableid, subset->vrf = vrf->v_id; jsonw_start_object(subset->json); jsonw_uint_field(subset->json, "vrf_id", - vrf_get_external_id(vrf->v_id)); + dp_vrf_get_external_id(vrf->v_id)); jsonw_uint_field(subset->json, "table", tableid); jsonw_end_object(subset->json); } - if (subset->subset == pd_state.state) - rt6_display_all(ip, depth, scope, idx, pd_state, - subset->json); + if (subset->subset == pd_state->state) + rt6_display_all(params, pd_state, subset->json); } @@ -3325,8 +2802,97 @@ int route6_get_pd_subset_data(json_writer_t *json, return 0; } +static void route6_fal_upd_for_changed_nhl( + struct vrf *vrf, uint32_t table_id, + struct lpm6_walk_params *params, + struct pd_obj_state_and_flags *pd_state, + void *arg) +{ + const uint32_t *filter_nhl_index = arg; + struct in6_addr ip; + int rc; + + if (params->next_hop != *filter_nhl_index) + return; + + if (pd_state->state != PD_OBJ_STATE_FULL) + return; + + struct next_hop_list *nextl = + rcu_dereference(nh6_tbl.entry[params->next_hop]); + + memcpy(&ip.s6_addr, params->prefix, sizeof(ip.s6_addr)); + + rc = fal_ip6_upd_route(vrf->v_id, vrf->v_fal_obj, &ip, params->pr_len, + table_id, nextl->siblings, + nextl->nsiblings, nextl->nhg_fal_obj); + + pd_state->state = fal_state_to_pd_state(rc); + + /* Kick trackers so that clients can learn about FAL changes */ + params->call_tracker_cbs = true; +} + +static void +route6_handle_fal_l3_enable_change(struct ifnet *ifp) +{ + struct cds_lfht_node *node; + fal_object_t *old_nh_objs; + struct next_hop_list *nhl; + uint32_t nhls_updated = 0; + struct cds_lfht_iter iter; + fal_object_t old_nhg_obj; + + cds_lfht_for_each(nexthop6_hash, &iter, node) { + nhl = caa_container_of(node, struct next_hop_list, nh_node); + + if (!next_hop_list_fal_l3_enable_changed(AF_INET6, + nhl, ifp, + &old_nhg_obj, + &old_nh_objs)) + continue; + + /* + * This is going to be very expensive if there are a + * lot of routes present in the system. The only + * consolation is that this is only anticipated to be + * done on major changes such as interface creation + * and removal. + */ + rt6_lpm_walk_util(route6_fal_upd_for_changed_nhl, + &nhl->index); + mpls_update_all_routes_for_nh_change(AF_INET6, nhl->index); + + next_hop_list_fal_l3_enable_changed_finish( + AF_INET6, nhl, old_nhg_obj, old_nh_objs); + + nhls_updated++; + } + + if (nhls_updated) + RTE_LOG(DEBUG, ROUTE, + "Updated %u IPv6 next hop lists due to FAL L3 state change of interface %s\n", + nhls_updated, ifp->if_name); +} + +static void +rt6_if_feat_mode_change(struct ifnet *ifp, + enum if_feat_mode_event event) +{ + switch (event) { + case IF_FEAT_MODE_EVENT_L3_FAL_ENABLED: + case IF_FEAT_MODE_EVENT_L3_FAL_DISABLED: + route6_handle_fal_l3_enable_change(ifp); + break; + default: + break; + } +} + static const struct dp_event_ops route6_events = { .if_index_unset = rt6_if_delete, + .if_feat_mode_change = rt6_if_feat_mode_change, + .vrf_delete = rt6_flush, }; DP_STARTUP_EVENT_REGISTER(route6_events); diff --git a/src/netinet6/route_v6.h b/src/netinet6/route_v6.h index 81c7d16b..bbe70106 100644 --- a/src/netinet6/route_v6.h +++ b/src/netinet6/route_v6.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -15,6 +15,7 @@ #include #include "compiler.h" +#include "nh_common.h" #include "json_writer.h" #include "mpls/mpls.h" #include "pd_show.h" @@ -40,39 +41,20 @@ struct route6_head { struct lpm6 **rt6_table; }; -/* This is the nexthop information result of route lookup. */ -struct next_hop_v6 { - union { - struct ifnet *ifp; /* target interface */ - struct llentry *lle; /* lle entry to use when sending */ - } u; - uint32_t flags; /* routing flags */ - union next_hop_outlabels outlabels; - struct in6_addr gateway; /* nexthop IPv6 address */ -}; - void nexthop_v6_tbl_init(void); int route_v6_init(struct vrf *vrf); void route_v6_uninit(struct vrf *vrf, struct route6_head *rt6_head); -bool route6_link_vrf_to_table(struct vrf *vrf, uint32_t tableid); -bool route6_unlink_vrf_from_table(struct vrf *vrf); struct rte_mbuf; -struct next_hop_v6 *rt6_lookup(const struct in6_addr *dst, uint32_t tbl_id, - const struct rte_mbuf *m); -struct next_hop_v6 *nexthop6_select(uint32_t nh_idx, - const struct rte_mbuf *m, - uint16_t ether_type); - -struct next_hop_v6 *nexthop6_create( - struct ifnet *ifp, const struct in6_addr *gw, uint32_t flags, - uint16_t num_labels, label_t *labels); -void nexthop6_put(uint32_t idx); -int nexthop6_new(struct next_hop_v6 *nh, size_t size, uint32_t *slot); -void rt6_print_nexthop(json_writer_t *json, uint32_t next_hop); -struct next_hop_v6 *rt6_lookup_fast(struct vrf *vrf, - const struct in6_addr *dst, uint32_t tbl_id, - const struct rte_mbuf *m); +struct next_hop *nexthop6_select(int family, uint32_t nh_idx, + const struct rte_mbuf *m, + uint16_t ether_type); +void rt6_print_nexthop(json_writer_t *json, uint32_t next_hop, + enum rt_print_nexthop_verbosity v); + +struct next_hop *rt6_lookup_fast(struct vrf *vrf, + const struct in6_addr *dst, uint32_t tbl_id, + const struct rte_mbuf *m); void rt6_prefetch(const struct rte_mbuf *m, const struct in6_addr *dst); void rt6_prefetch_fast(const struct rte_mbuf *m, const struct in6_addr *dst) @@ -81,10 +63,12 @@ bool rt6_valid_tblid(vrfid_t vrfid, uint32_t tbl_id) __hot_func; struct rtmsg; -int handle_route6(vrfid_t vrf_id, uint16_t type, const struct rtmsg *rtm, - uint32_t table, const void *dest, const void *nexthop, - unsigned int ifindex, uint8_t scope, struct nlattr *mpath, - uint32_t nl_flags, uint16_t num_labels, label_t *labels); +int rt6_add(vrfid_t vrf_id, struct in6_addr *dst, uint32_t prefix_len, + uint32_t table, int16_t scope, struct next_hop hops[], + size_t size); +int rt6_delete(vrfid_t vrf_id, const struct in6_addr *dst, + uint8_t prefix_len, uint32_t id, uint16_t scope, + bool is_local); bool is_local_ipv6(vrfid_t vrf_id, const struct in6_addr *dst); void rt6_flush_all(enum cont_src_en cont_src); @@ -96,8 +80,6 @@ void rt6_if_punt_to_slowpath(struct ifnet *ifp); struct ifnet *nhif_dst_lookup6(const struct vrf *vrf, const struct in6_addr *dst, bool *connected); -int nh6_lookup_by_index(uint32_t nhindex, uint32_t hash, - struct in6_addr *nh, uint32_t *ifindex); void routing6_insert_neigh_safe(struct llentry *lle, bool neigh_change); void routing6_remove_neigh_safe(struct llentry *lle); diff --git a/src/netinet6/scope6.c b/src/netinet6/scope6.c index 2df66262..7d09c20e 100644 --- a/src/netinet6/scope6.c +++ b/src/netinet6/scope6.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * @@ -41,11 +41,10 @@ in6_setscope(struct in6_addr *in6, const struct ifnet *ifp, uint32_t *ret_id) if (IN6_IS_ADDR_LOOPBACK(in6)) { if (!(ifp->if_flags & IFF_LOOPBACK)) return EINVAL; - else { - if (ret_id != NULL) - *ret_id = 0; /* there's no ambiguity */ - return 0; - } + + if (ret_id != NULL) + *ret_id = 0; /* there's no ambiguity */ + return 0; } if (ret_id != NULL) diff --git a/src/netlink.c b/src/netlink.c index 31750ba9..237c2a7c 100644 --- a/src/netlink.c +++ b/src/netlink.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -30,16 +30,14 @@ #include #include #include -#include #include #include #include #include -#include "bridge.h" #include "compat.h" #include "compiler.h" -#include "config.h" +#include "config_internal.h" #include "crypto/crypto.h" #include "crypto/crypto_policy.h" #include "crypto/crypto_sadb.h" @@ -48,27 +46,30 @@ #include "ether.h" #include "fal.h" #include "fal_plugin.h" -#include "gre.h" +#include "if/bridge/bridge.h" +#include "if/dpdk-eth/dpdk_eth_if.h" +#include "if/dpdk-eth/vhost.h" +#include "if/gre.h" +#include "if/macvlan.h" +#include "if/vlan/vlan_if.h" +#include "if/vxlan.h" #include "if_name_types.h" #include "if_var.h" #include "ip_mcast.h" #include "l2_rx_fltr.h" #include "l2tp/l2tpeth.h" #include "lag.h" -#include "macvlan.h" #include "main.h" #include "netlink.h" #include "pipeline/nodes/pppoe/pppoe.h" #include "route.h" #include "util.h" -#include "vhost.h" -#include "vlan_if.h" #include "vplane_debug.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" #include "vrf_if.h" -#include "vxlan.h" #include "vlan_modify.h" +#include "crypto/xfrm_client.h" static int linkinfo_attr(const struct nlattr *attr, void *data) { @@ -103,7 +104,7 @@ struct ifnet *lo_or_dummy_create(enum cont_src_en cont_src, unsigned int flags, const char *ifname, unsigned int mtu, - const struct ether_addr *eth_addr) + const struct rte_ether_addr *eth_addr) { struct ifnet *ifp; struct vfp_softc *vsc; @@ -117,7 +118,7 @@ struct ifnet *lo_or_dummy_create(enum cont_src_en cont_src, return ifp; } - ifp = if_alloc(ifname, IFT_LOOP, mtu, eth_addr, SOCKET_ID_ANY); + ifp = if_alloc(ifname, IFT_LOOP, mtu, eth_addr, SOCKET_ID_ANY, NULL); if (!ifp) rte_panic("out of memory for lo ifnet\n"); @@ -141,7 +142,7 @@ static struct ifnet *lo_create(enum cont_src_en cont_src, unsigned int flags, const char *ifname, unsigned int mtu, - const struct ether_addr *eth_addr) + const struct rte_ether_addr *eth_addr) { struct ifnet *lo; @@ -156,18 +157,18 @@ static struct ifnet *lo_create(enum cont_src_en cont_src, static struct ifnet *ppp_create(unsigned int ifindex, const char *ifname, unsigned int mtu, - const struct ether_addr *eth_addr) + const struct rte_ether_addr *eth_addr) { struct ifnet *ifp; - ifp = ifnet_byifname(ifname); + ifp = dp_ifnet_byifname(ifname); if (ifp != NULL) { if_unset_ifindex(ifp); if_set_ifindex(ifp, ifindex); return ifp; } - ifp = if_alloc(ifname, IFT_PPP, mtu, eth_addr, SOCKET_ID_ANY); + ifp = if_alloc(ifname, IFT_PPP, mtu, eth_addr, SOCKET_ID_ANY, NULL); if (!ifp) rte_panic("out of memory for ppp ifnet\n"); @@ -178,11 +179,11 @@ static struct ifnet *ppp_create(unsigned int ifindex, const char *ifname, static struct ifnet *other_tunnel_create(unsigned int ifindex, const char *ifname, unsigned int mtu, - const struct ether_addr *eth_addr) + const struct rte_ether_addr *eth_addr) { struct ifnet *ifp; - ifp = ifnet_byifname(ifname); + ifp = dp_ifnet_byifname(ifname); if (ifp != NULL) { if_unset_ifindex(ifp); if_set_ifindex(ifp, ifindex); @@ -190,7 +191,30 @@ static struct ifnet *other_tunnel_create(unsigned int ifindex, } ifp = if_alloc(ifname, IFT_TUNNEL_OTHER, mtu, eth_addr, - SOCKET_ID_ANY); + SOCKET_ID_ANY, NULL); + if (!ifp) + rte_panic("out of memory for tunnel ifnet\n"); + + if_set_ifindex(ifp, ifindex); + return ifp; +} + +static struct ifnet *pimreg_tunnel_create(unsigned int ifindex, + const char *ifname, + unsigned int mtu, + const struct rte_ether_addr *eth_addr) +{ + struct ifnet *ifp; + + ifp = dp_ifnet_byifname(ifname); + if (ifp != NULL) { + if_unset_ifindex(ifp); + if_set_ifindex(ifp, ifindex); + return ifp; + } + + ifp = if_alloc(ifname, IFT_TUNNEL_PIMREG, mtu, eth_addr, + SOCKET_ID_ANY, NULL); if (!ifp) rte_panic("out of memory for tunnel ifnet\n"); @@ -290,16 +314,16 @@ vrf_link_create(const struct ifinfomsg *ifi, const char *ifname, vrfinfo_attr, vrfinfo) != MNL_CB_OK) { RTE_LOG(ERR, DATAPLANE, "Could not get vrfinfo for: %s\n", ifname); - return false; + return NULL; } if (!vrfinfo[IFLA_VRF_TABLE]) { RTE_LOG(ERR, DATAPLANE, "Missing VRF table attribute for: %s\n", ifname); - return false; + return NULL; } - return vrfmaster_create(ifname, ifi->ifi_index, + return vrf_if_create(ifname, ifi->ifi_index, mnl_attr_get_u32(vrfinfo[IFLA_VRF_TABLE])); } @@ -307,34 +331,20 @@ vrf_link_create(const struct ifinfomsg *ifi, const char *ifname, static struct ifnet * dataplane_tuntap_create(unsigned int if_idx, const char *ifname) { - struct ifnet *ifp = ifnet_byifname(ifname); - - /* - * Is it a local port created previously? - */ - if (ifp && ifp->if_local_port) { - /* newly learned if_index? */ - if (!ifp->if_index) { - if_set_ifindex(ifp, if_idx); - return ifp; - } else - return NULL; - } - return NULL; + return dpdk_eth_if_alloc(ifname, if_idx); } /* * Handle creation of software interfaces (tunnels, etc) * in response to netlink create message. */ -static struct ifnet *unspec_link_create(const struct nlmsghdr *nlh, - const struct ifinfomsg *ifi, +static struct ifnet *unspec_link_create(const struct ifinfomsg *ifi, const char *ifname, struct nlattr *tb[], const char *kind, struct nlattr *kdata, enum cont_src_en cont_src) { - struct ether_addr *macaddr = NULL; - unsigned int mtu = ETHER_MTU; + struct rte_ether_addr *macaddr = NULL; + unsigned int mtu = RTE_ETHER_MTU; const uint16_t arphrd = ifi->ifi_type; struct ifnet *parent_ifp = NULL; unsigned int if_idx = cont_src_ifindex(cont_src, ifi->ifi_index); @@ -404,6 +414,8 @@ static struct ifnet *unspec_link_create(const struct nlmsghdr *nlh, if (!strcmp(ifname, "sit0")) return NULL; return other_tunnel_create(if_idx, ifname, mtu, macaddr); + case ARPHRD_PIMREG: + return pimreg_tunnel_create(if_idx, ifname, mtu, macaddr); case ARPHRD_NONE: if (!strcmp(kind, "tun")) { /* We do not want an interface for this */ @@ -416,7 +428,7 @@ static struct ifnet *unspec_link_create(const struct nlmsghdr *nlh, if (!strcmp(kind, "vxlan")) return vxlan_create(ifi, ifname, macaddr, tb, kdata, - cont_src, nlh); + cont_src); return NULL; default: @@ -445,8 +457,8 @@ static struct ifnet *unspec_link_create(const struct nlmsghdr *nlh, /* Lower for virtual bridge (e.g. vxl-vbr4) */ if (!strcmp(kind, "vxlan")) - return vxlan_create(ifi, ifname, macaddr, tb, kdata, cont_src, - nlh); + return vxlan_create(ifi, ifname, macaddr, tb, kdata, + cont_src); /* * Used by a local tunnel for a GRE bridge. @@ -477,7 +489,7 @@ static struct ifnet *unspec_link_create(const struct nlmsghdr *nlh, if (!strcmp(kind, "tun")) { if (is_dp_intf(ifname)) return dataplane_tuntap_create(if_idx, ifname); - else if (strncmp(ifname, "vtun", 4) == 0) + if (strncmp(ifname, "vtun", 4) == 0) return other_tunnel_create(if_idx, ifname, mtu, macaddr); } @@ -496,16 +508,13 @@ static struct ifnet *unspec_link_create(const struct nlmsghdr *nlh, /* Nested types follow */ if (parent_idx) { - parent_ifp = ifnet_byifindex(parent_idx); + parent_ifp = dp_ifnet_byifindex(parent_idx); if (!parent_ifp) { if (is_ignored_interface(parent_idx)) RTE_LOG(INFO, DATAPLANE, "ignoring link %u not on top of" " dataplane interface\n", parent_idx); - else - missed_nl_child_link_add(parent_idx, if_idx, - nlh); return NULL; } @@ -550,22 +559,22 @@ static vrfid_t netlink_get_link_vrf(struct ifnet *ifp, enum cont_src_en cont_src, struct nlattr *tb[]) { - struct ifnet *master_ifp; + struct ifnet *team_ifp; if (tb[IFLA_MASTER]) { - uint32_t master; - - master = cont_src_ifindex(cont_src, - mnl_attr_get_u32(tb[IFLA_MASTER])); - master_ifp = ifnet_byifindex(master); - if (master_ifp && master_ifp->if_type == IFT_VRFMASTER) - return vrfmaster_get_vrfid(master_ifp); - } else if (ifp->if_type == IFT_VRFMASTER) { + uint32_t team; + + team = cont_src_ifindex(cont_src, + mnl_attr_get_u32(tb[IFLA_MASTER])); + team_ifp = dp_ifnet_byifindex(team); + if (team_ifp && team_ifp->if_type == IFT_VRF) + return vrf_if_get_vrfid(team_ifp); + } else if (ifp->if_type == IFT_VRF) { /* - * VRF master devices should also be considered to be + * VRF devices should also be considered to be * inside a VRF */ - return vrfmaster_get_vrfid(ifp); + return vrf_if_get_vrfid(ifp); } return VRF_DEFAULT_ID; @@ -580,16 +589,16 @@ static void unspec_link_modify(struct ifnet *ifp, struct nlattr *kdata, enum cont_src_en cont_src) { - struct ifnet *master_ifp = NULL; + struct ifnet *team_ifp = NULL; unsigned int flags = ifi->ifi_flags; vrfid_t vrf_id, old_vrfid = ifp->if_vrfid; /* handle device rename */ - if (strncmp(ifp->if_name, ifname, IFNAMSIZ)) { + if (strncmp(ifp->if_name, ifname, IFNAMSIZ) != 0) { if_rename(ifp, ifname); struct fal_attribute_t name_attr = { - FAL_PORT_ATTR_NAME}; + .id = FAL_PORT_ATTR_NAME}; snprintf(name_attr.value.if_name, sizeof(name_attr.value.if_name), @@ -614,8 +623,8 @@ static void unspec_link_modify(struct ifnet *ifp, if (old_vrfid != ifp->if_vrfid) { struct fal_attribute_t l3_vrf_attr = { - .id = FAL_ROUTER_INTERFACE_ATTR_VRF_ID, - .value.u32 = vrf_id, + .id = FAL_ROUTER_INTERFACE_ATTR_VRF_OBJ, + .value.objid = get_vrf(ifp->if_vrfid)->v_fal_obj, }; if_set_l3_intf_attr(ifp, &l3_vrf_attr); @@ -636,22 +645,22 @@ static void unspec_link_modify(struct ifnet *ifp, case IFT_ETHER: if (tb[IFLA_MASTER]) { - uint32_t master; + uint32_t if_index; - master = cont_src_ifindex(cont_src, + if_index = cont_src_ifindex(cont_src, mnl_attr_get_u32(tb[IFLA_MASTER])); - master_ifp = ifnet_byifindex(master); + team_ifp = dp_ifnet_byifindex(if_index); - if (master_ifp == NULL) { + if (team_ifp == NULL) { DP_DEBUG(NETLINK_IF, ERR, DATAPLANE, - "%s couldn't find master ifindex %d\n", - ifp->if_name, master); + "%s couldn't find bridge or team if_index %d\n", + ifp->if_name, if_index); return; } } - if (is_team(master_ifp) || ifp->aggregator) - lag_nl_slave_update(ifi, ifp, master_ifp); + if (is_team(team_ifp) || ifp->aggregator) + lag_nl_member_update(ifi, ifp, team_ifp); break; case IFT_TUNNEL_GRE: @@ -752,7 +761,8 @@ static int unspec_link_change(const struct nlmsghdr *nlh, } ifindex = cont_src_ifindex(cont_src, ifi->ifi_index); - ifp = ifnet_byifindex(ifindex); + ifp = dp_ifnet_byifindex(ifindex); + if (nlh->nlmsg_type == RTM_NEWLINK) msg = (ifp) ? "MOD" : "NEW"; else if (nlh->nlmsg_type == RTM_DELLINK) @@ -786,7 +796,7 @@ static int unspec_link_change(const struct nlmsghdr *nlh, unspec_link_modify(ifp, ifi, ifname, tb, kind, kdata, cont_src); } else { - ifp = unspec_link_create(nlh, ifi, ifname, tb, kind, + ifp = unspec_link_create(ifi, ifname, tb, kind, kdata, cont_src); if (ifp) { vrfid_t vrf_id; @@ -798,8 +808,6 @@ static int unspec_link_change(const struct nlmsghdr *nlh, if_set_vrf(ifp, vrf_id); if_set_cont_src(ifp, cont_src); ifp->if_flags = ifi->ifi_flags; - if (ifp->if_flags & IFF_UP) - if_start(ifp); if_set_broadcast(ifp, ifp->if_flags & IFF_BROADCAST); if_finish_create( @@ -808,36 +816,37 @@ static int unspec_link_change(const struct nlmsghdr *nlh, tb[IFLA_ADDRESS] ? mnl_attr_get_payload(tb[IFLA_ADDRESS]) : NULL); + if (ifp->if_flags & IFF_UP) + if_start(ifp); } else { - if (is_dp_intf(ifname)) - missed_nl_unspec_link_add(ifindex, nlh); - else + if (is_dp_intf(ifname)) { + RTE_LOG(WARNING, DATAPLANE, + "%u:%s link (%s/%s/%c) Not created\n", + ifindex, ifname, + ifitype_name(ifi->ifi_type), + kind ? kind : "-", + kdata ? 'y' : 'n'); + } else { incomplete_if_add_ignored(ifindex); - DP_DEBUG(NETLINK_IF, DEBUG, DATAPLANE, - "%u:%s NUL link (%s/%s/%c) Not created\n", - ifindex, ifname, - ifitype_name(ifi->ifi_type), - (kind) ? kind : "-", - (kdata) ? 'y' : 'n'); + DP_DEBUG(NETLINK_IF, DEBUG, DATAPLANE, + "%u:%s NUL link (%s/%s/%c) Not created\n", + ifindex, ifname, + ifitype_name(ifi->ifi_type), + (kind) ? kind : "-", + (kdata) ? 'y' : 'n'); + } } } break; case RTM_DELLINK: if (is_team(ifp)) - lag_nl_master_delete(ifi, ifp); + lag_nl_team_delete(ifi, ifp); else { - mc_del_if(ifindex); if (ifp) netlink_if_free(ifp); - else { - missed_nl_unspec_link_del(ifindex); + else incomplete_if_del_ignored(ifindex); - if (tb[IFLA_LINK]) - missed_nl_child_link_del( - mnl_attr_get_u32(tb[IFLA_LINK]), - ifindex); - } } break; } @@ -855,7 +864,7 @@ static int unspec_addr_change(const struct nlmsghdr *nlh, struct ifnet *ifp; unsigned int ifindex = cont_src_ifindex(cont_src, ifa->ifa_index); - ifp = ifnet_byifindex(cont_src_ifindex(cont_src, ifindex)); + ifp = dp_ifnet_byifindex(cont_src_ifindex(cont_src, ifindex)); if (tb[IFA_ADDRESS]) addr = mnl_attr_get_payload(tb[IFA_ADDRESS]); @@ -882,7 +891,11 @@ static int unspec_addr_change(const struct nlmsghdr *nlh, l2_rx_fltr_add_addr(ifp, addr); } else { if (!is_ignored_interface(ifindex)) - missed_nl_unspec_addr_add(ifindex, addr, nlh); + RTE_LOG(ERR, DATAPLANE, + "(%s) unspec addr %s missing interface with index %u\n", + cont_src_name(cont_src), + nlmsg_type(nlh->nlmsg_type), + ifindex); } break; case RTM_DELADDR: @@ -890,7 +903,11 @@ static int unspec_addr_change(const struct nlmsghdr *nlh, l2_rx_fltr_del_addr(ifp, addr); } else { if (!is_ignored_interface(ifindex)) - missed_nl_unspec_addr_del(ifindex, addr); + RTE_LOG(ERR, DATAPLANE, + "(%s) unspec addr %s missing interface with index %u\n", + cont_src_name(cont_src), + nlmsg_type(nlh->nlmsg_type), + ifindex); } break; default: @@ -1266,16 +1283,16 @@ xfrm_attr_vrf(struct xfrm_selector *sel, vrfid_t *vrfid, uint32_t *ifindex) { if (sel && sel->ifindex) { /* - * If the ifindex is a vrf master then it represents the vrf. - * If it is not a vrf master, then it means that it is part of - * the selector. In this case the vrf will be the vrf master + * If the ifindex is a vrf then it represents the vrf. + * If it is not a vrf, then it means that it is part of + * the selector. In this case the vrf will be the vrf * of the given ifindex if set, otherwise the DEFAULT vrf. */ - struct ifnet *ifp = ifnet_byifindex(sel->ifindex); + struct ifnet *ifp = dp_ifnet_byifindex(sel->ifindex); if (ifp) { - if (ifp->if_type == IFT_VRFMASTER) { - *vrfid = vrfmaster_get_vrfid(ifp); + if (ifp->if_type == IFT_VRF) { + *vrfid = vrf_if_get_vrfid(ifp); RTE_LOG(INFO, DATAPLANE, "XFRM using VRF %u\n", *vrfid); } else { @@ -1323,14 +1340,14 @@ xfrm_nl_policy_decode(const struct nlmsghdr *nlh, /* also checks that mnl_nlmsg_get_payload() below works */ if (offset > len) { RTE_LOG(ERR, DATAPLANE, "Can't parse XFRM attributes\n"); - return -1; + return -EINVAL; } /* xfrm_attr should always return successful */ ret = mnl_attr_parse(nlh, offset, xfrm_attr, tb); if (ret != MNL_CB_OK) { RTE_LOG(ERR, DATAPLANE, "Failed parsing XFRM attributes\n"); - return -1; + return -EINVAL; } switch (nlh->nlmsg_type) { @@ -1385,7 +1402,7 @@ xfrm_nl_policy_decode(const struct nlmsghdr *nlh, RTE_LOG(ERR, DATAPLANE, "xfrm: unexpected netlink policy msg %u\n", nlh->nlmsg_type); - return -1; + return -EINVAL; } return 0; @@ -1454,23 +1471,31 @@ int rtnl_process_xfrm(const struct nlmsghdr *nlh, void *data) struct xfrm_userpolicy_id tmp_id; const struct xfrm_selector *sel; uint8_t dir; - int ret, status = MNL_CB_OK; + int ret, status = 0; const xfrm_address_t *peer = NULL; const struct xfrm_mark *mark = NULL; vrfid_t vrfid; uint32_t ifindex = 0; + struct xfrm_client_aux_data *xfrm_aux; + + xfrm_aux = (struct xfrm_client_aux_data *)data; + + vrfid = *xfrm_aux->vrf; + xfrm_aux->ack_msg = false; - vrfid = *(vrfid_t *)data; ret = xfrm_nl_policy_decode(nlh, &id, &policy, &tmpl, &mark, &vrfid, &ifindex); if (ret < 0) { RTE_LOG(ERR, DATAPLANE, "Failed to decode XFRM Policy message\n"); - return MNL_CB_ERROR; + return ret; } - if (policy == NULL && id == NULL) - return MNL_CB_OK; + if (policy == NULL && id == NULL) { + xfrm_aux->ack_msg = true; + status = -EINVAL; + goto out; + } if (policy) { sel = &policy->sel; @@ -1486,14 +1511,21 @@ int rtnl_process_xfrm(const struct nlmsghdr *nlh, void *data) */ crypto_incmpl_xfrm_policy_del(ifindex, nlh, sel, mark); + /* + * If the interface a policy depends upon has not yet arrived + * then the policy is rejected and strongswan will retry. + * later. + */ if (crypto_incmpl_xfrm(ifindex)) { - crypto_incmpl_xfrm_policy_add(ifindex, nlh, sel, mark); - return MNL_CB_OK; + RTE_LOG(NOTICE, DATAPLANE, "XFRM policy missing interface\n"); + xfrm_aux->ack_msg = true; + status = -EINVAL; + goto out; } /* * If we are in a non default vrf, and the selector ifindex is a - * VRF master, then set it to 0, as we do not want to compare in the + * VRF, then set it to 0, as we do not want to compare in the * fastpath for this case. We do this here, instead of when parsing * the attributes, as changing the values before the incomplete * processing can lead to stuff not being removed from the incomplete @@ -1501,11 +1533,11 @@ int rtnl_process_xfrm(const struct nlmsghdr *nlh, void *data) */ if (vrfid != VRF_DEFAULT_ID) { if (sel->ifindex) { - struct ifnet *ifp = ifnet_byifindex(sel->ifindex); + struct ifnet *ifp = dp_ifnet_byifindex(sel->ifindex); struct xfrm_selector *new_sel; new_sel = (struct xfrm_selector *)sel; - if (ifp && ifp->if_type == IFT_VRFMASTER) + if (ifp && ifp->if_type == IFT_VRF) new_sel->ifindex = 0; } } @@ -1524,21 +1556,25 @@ int rtnl_process_xfrm(const struct nlmsghdr *nlh, void *data) /* * Ignore _FWD policies since We only create IN and OUT policies. */ - if (dir & ~(XFRM_POLICY_IN|XFRM_POLICY_OUT)) - return MNL_CB_OK; + if (dir & ~(XFRM_POLICY_IN|XFRM_POLICY_OUT)) { + xfrm_aux->ack_msg = true; + goto out; + } switch (nlh->nlmsg_type) { case XFRM_MSG_NEWPOLICY: - if (crypto_policy_add(policy, peer, tmpl, mark, vrfid) < 0) { + status = crypto_policy_add(policy, peer, tmpl, mark, vrfid, + nlh->nlmsg_seq, + &xfrm_aux->ack_msg); + if (status < 0) RTE_LOG(ERR, DATAPLANE, "NEWPOLICY failure\n"); - status = MNL_CB_ERROR; - } break; case XFRM_MSG_UPDPOLICY: - if (crypto_policy_update(policy, peer, tmpl, mark, vrfid) < 0) { + status = crypto_policy_update(policy, peer, tmpl, mark, vrfid, + nlh->nlmsg_seq, + &xfrm_aux->ack_msg); + if (status < 0) RTE_LOG(ERR, DATAPLANE, "UPDPOLICY failure\n"); - status = MNL_CB_ERROR; - } break; case XFRM_MSG_POLEXPIRE: memcpy(&tmp_id.sel, sel, sizeof(tmp_id.sel)); @@ -1547,14 +1583,15 @@ int rtnl_process_xfrm(const struct nlmsghdr *nlh, void *data) id = &tmp_id; /* fall through */ case XFRM_MSG_DELPOLICY: - crypto_policy_delete(id, mark, vrfid); + status = crypto_policy_delete(id, mark, vrfid, nlh->nlmsg_seq, + &xfrm_aux->ack_msg); break; default: RTE_LOG(ERR, DATAPLANE, "Unhandled XFRM policy message\n"); - status = MNL_CB_ERROR; + status = -EINVAL; break; } - +out: return status; } @@ -1563,19 +1600,21 @@ int rtnl_process_xfrm(const struct nlmsghdr *nlh, void *data) * * Process an XFRM new or update SA message. */ -static void process_xfrm_newsa(struct xfrm_usersa_info *sa_info, +static int process_xfrm_newsa(struct xfrm_usersa_info *sa_info, const char *msg_type_str, struct nlattr **attrs, vrfid_t vrf_id, uint32_t *ifindex) { struct xfrm_algo_aead *aead_algo; - struct xfrm_algo_auth *auth_algo; + struct xfrm_algo_auth *auth_trunc_algo; + struct xfrm_algo *auth_algo; struct xfrm_algo *crypto_algo = NULL; struct xfrm_encap_tmpl *tmpl = NULL; struct xfrm_mark *mark; uint32_t mark_val; uint32_t extra_flags = 0; + int rc = 0; /* * VRF. Use topic default if no attribute @@ -1583,7 +1622,7 @@ static void process_xfrm_newsa(struct xfrm_usersa_info *sa_info, xfrm_attr_vrf(&sa_info->sel, &vrf_id, ifindex); if (crypto_incmpl_xfrm(*ifindex)) - return; + return -EINVAL; /* * AEAD/crypto algorithm @@ -1593,12 +1632,9 @@ static void process_xfrm_newsa(struct xfrm_usersa_info *sa_info, /* * Authentication algorithm - * - * As in the kernel, prefer AUTH_TRUNC over AUTH. */ - auth_algo = get_nl_attr_payload(attrs[XFRMA_ALG_AUTH_TRUNC]); - if (!auth_algo) - auth_algo = get_nl_attr_payload(attrs[XFRMA_ALG_AUTH]); + auth_trunc_algo = get_nl_attr_payload(attrs[XFRMA_ALG_AUTH_TRUNC]); + auth_algo = get_nl_attr_payload(attrs[XFRMA_ALG_AUTH]); /* * TODO: Currently SADB doesn't cope with no AUTH algo, should it? @@ -1607,6 +1643,7 @@ static void process_xfrm_newsa(struct xfrm_usersa_info *sa_info, RTE_LOG(ERR, DATAPLANE, "Missing XFRMA_ALG_* attribute on XFRM %s message\n", msg_type_str); + rc = -EINVAL; goto scrub; } @@ -1615,6 +1652,7 @@ static void process_xfrm_newsa(struct xfrm_usersa_info *sa_info, if (!mark) { RTE_LOG(ERR, DATAPLANE, "Could not decode MARK attr\n"); + rc = -EINVAL; goto scrub; } mark_val = mark->v; @@ -1629,6 +1667,7 @@ static void process_xfrm_newsa(struct xfrm_usersa_info *sa_info, if (!tmpl) { RTE_LOG(ERR, DATAPLANE, "Could not decode ENCAP attr\n"); + rc = EINVAL; goto scrub; } } @@ -1641,11 +1680,13 @@ static void process_xfrm_newsa(struct xfrm_usersa_info *sa_info, crypto_algo->alg_key_len = aead_algo->alg_key_len; memcpy(crypto_algo->alg_key, aead_algo->alg_key, aead_algo->alg_key_len / 8); - auth_algo = (struct xfrm_algo_auth *)aead_algo; + auth_trunc_algo = (struct xfrm_algo_auth *)aead_algo; } - crypto_sadb_new_sa(sa_info, crypto_algo, auth_algo, tmpl, - mark_val, extra_flags, vrf_id); + rc = crypto_sadb_new_sa(sa_info, crypto_algo, auth_trunc_algo, + auth_algo, tmpl, mark_val, extra_flags, + vrf_id); + /* The above failure case needs to fall into scrub */ scrub: /* @@ -1666,13 +1707,15 @@ static void process_xfrm_newsa(struct xfrm_usersa_info *sa_info, memset(crypto_algo->alg_key, 0xff, (crypto_algo->alg_key_len >> 3)); - auth_algo = get_nl_attr_payload(attrs[XFRMA_ALG_AUTH_TRUNC]); - if (auth_algo) - memset(auth_algo->alg_key, 0xff, (auth_algo->alg_key_len >> 3)); + auth_trunc_algo = get_nl_attr_payload(attrs[XFRMA_ALG_AUTH_TRUNC]); + if (auth_trunc_algo) + memset(auth_trunc_algo->alg_key, 0xff, + (auth_trunc_algo->alg_key_len >> 3)); auth_algo = get_nl_attr_payload(attrs[XFRMA_ALG_AUTH]); if (auth_algo) memset(auth_algo->alg_key, 0xff, (auth_algo->alg_key_len >> 3)); + return rc; } /* @@ -1680,15 +1723,28 @@ static void process_xfrm_newsa(struct xfrm_usersa_info *sa_info, * * Process an XFRM delete SA message. */ -static void process_xfrm_delsa(struct xfrm_usersa_info *sa_info, +static int process_xfrm_delsa(struct xfrm_usersa_info *sa_info, vrfid_t vrfid, uint32_t *ifindex) { xfrm_attr_vrf(sa_info ? &sa_info->sel : NULL, &vrfid, ifindex); if (crypto_incmpl_xfrm(*ifindex)) - return; + return -EINVAL; + + return crypto_sadb_del_sa(sa_info, vrfid); +} + +static int +process_xfrm_getsa(const struct xfrm_usersa_id *sa_id, + vrfid_t vrf_id, uint32_t seq) +{ + struct crypto_sadb_stats sa; + + if (!crypto_sadb_get_stats(vrf_id, sa_id->daddr, + sa_id->family, sa_id->spi, &sa)) + return -1; - crypto_sadb_del_sa(sa_info, vrfid); + return xfrm_client_send_sa_stats(seq, sa_id->spi, &sa); } /* @@ -1709,9 +1765,14 @@ int rtnl_process_xfrm_sa(const struct nlmsghdr *nlh, void *data) const struct xfrm_usersa_id *sa_id; const char *msg_type_str; vrfid_t vrf_id; - uint32_t ifindex = 0; + uint32_t seq, ifindex = 0; + struct xfrm_client_aux_data *xfrm_aux; + int rc = 0; - vrf_id = *(vrfid_t *)data; + xfrm_aux = (struct xfrm_client_aux_data *)data; + vrf_id = *xfrm_aux->vrf; + seq = xfrm_aux->seq; + xfrm_aux->ack_msg = true; switch (nlh->nlmsg_type) { @@ -1722,7 +1783,7 @@ int rtnl_process_xfrm_sa(const struct nlmsghdr *nlh, void *data) if (payload_size < sizeof(*sa_info)) { RTE_LOG(ERR, DATAPLANE, "xfrm: too short for %s\n", msg_type_str); - return MNL_CB_ERROR; + return -EINVAL; } sa_info = mnl_nlmsg_get_payload(nlh); if (mnl_attr_parse(nlh, sizeof(*sa_info), @@ -1730,47 +1791,77 @@ int rtnl_process_xfrm_sa(const struct nlmsghdr *nlh, void *data) RTE_LOG(ERR, DATAPLANE, "xfrm: can't parse attributes to %s\n", msg_type_str); - return MNL_CB_ERROR; + return -EINVAL; } - process_xfrm_newsa(sa_info, msg_type_str, attrs, vrf_id, - &ifindex); + rc = process_xfrm_newsa(sa_info, msg_type_str, attrs, vrf_id, + &ifindex); + if (rc != 0) + return rc; break; case XFRM_MSG_DELSA: if (payload_size < sizeof(*sa_id)) { RTE_LOG(ERR, DATAPLANE, "xfrm: too short for DELSA\n"); - return MNL_CB_ERROR; + return -EINVAL; } mnl_nlmsg_get_payload(nlh); if (mnl_attr_parse(nlh, sizeof(*sa_id), xfrm_attr, attrs) != MNL_CB_OK) { RTE_LOG(ERR, DATAPLANE, "xfrm: can't parse attributes to DELSA\n"); - return MNL_CB_ERROR; + return -EINVAL; } sa_info = get_nl_attr_payload(attrs[XFRMA_SA]); - if (!sa_info) + if (!sa_info) { RTE_LOG(ERR, DATAPLANE, "Could not decode DELSA XFRM_SA attribute\n"); - - process_xfrm_delsa(sa_info, vrf_id, &ifindex); + return -EINVAL; + } + rc = process_xfrm_delsa(sa_info, vrf_id, &ifindex); + if (rc != 0) + return rc; break; case XFRM_MSG_EXPIRE: if (payload_size < sizeof(*expire)) { RTE_LOG(ERR, DATAPLANE, "xfrm: too short for EXPIRE\n"); - return MNL_CB_ERROR; + return -EINVAL; } expire = mnl_nlmsg_get_payload(nlh); if (expire->hard) - crypto_sadb_del_sa(&expire->state, vrf_id); + rc = crypto_sadb_del_sa(&expire->state, vrf_id); break; + case XFRM_MSG_GETSA: + if (payload_size < sizeof(*sa_id)) { + RTE_LOG(ERR, DATAPLANE, "xfrm: too short for GETSA\n"); + return -EINVAL; + } + + sa_id = mnl_nlmsg_get_payload(nlh); + if (mnl_attr_parse(nlh, sizeof(*sa_id), + xfrm_attr, attrs) != MNL_CB_OK) { + RTE_LOG(ERR, DATAPLANE, + "xfrm: can't parse attributes to GETA\n"); + return -EINVAL; + } + + rc = process_xfrm_getsa(sa_id, vrf_id, seq); + if (rc != 0) + return rc; + /* + * If we have successfully processed the stats request + * then we do not need to send an ack back, as the + * stats response message is in effect the ack. + */ + xfrm_aux->ack_msg = false; + + return rc; default: RTE_LOG(ERR, DATAPLANE, "xfrm: unexpected netlink SA msg %u\n", nlh->nlmsg_type); - return MNL_CB_ERROR; + return -EINVAL; } /* @@ -1781,5 +1872,5 @@ int rtnl_process_xfrm_sa(const struct nlmsghdr *nlh, void *data) if (crypto_incmpl_xfrm(ifindex)) crypto_incmpl_xfrm_sa_add(ifindex, nlh, sa_info); - return MNL_CB_OK; + return rc; } diff --git a/src/netlink.h b/src/netlink.h index 66f2834d..d5091c1a 100644 --- a/src/netlink.h +++ b/src/netlink.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -46,8 +46,6 @@ struct netlink_handler { enum cont_src_en cont_src); }; -void mc_del_if(int ifindex); - void register_netlink_handler(uint8_t, const struct netlink_handler *); int rtnl_process(const struct nlmsghdr *nlh, void *data); int rtnl_process_xfrm(const struct nlmsghdr *nlh, void *data); @@ -66,6 +64,6 @@ struct ifnet *lo_or_dummy_create(enum cont_src_en cont_src, unsigned int flags, const char *ifname, unsigned int mtu, - const struct ether_addr *eth_addr); + const struct rte_ether_addr *eth_addr); #endif /* NETLINK_H */ diff --git a/src/nh.h b/src/nh.h deleted file mode 100644 index 30d9be0c..00000000 --- a/src/nh.h +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Common nexthop and nexthop_u processing - * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. - * Copyright (c) 2015 by Brocade Communications Systems, Inc. - * All rights reserved. - * - * SPDX-License-Identifier: LGPL-2.1-only - */ -#ifndef NH_H -#define NH_H - -#include "compiler.h" -#include "if_llatbl.h" -#include "netinet6/route_v6.h" -#include "route.h" - -enum nh_fwd_ret { - NH_FWD_FAILURE = -1, - NH_FWD_SUCCESS = 0, - NH_FWD_RESWITCH_IPv4 = 2, - NH_FWD_RESWITCH_IPv6 = 3, - NH_FWD_RESWITCH_MPLS = 4, - NH_FWD_SLOWPATH, - NH_FWD_IPv4, - NH_FWD_IPv6, -}; - -enum nh_type { - NH_TYPE_V4GW, /* struct next_hop */ - NH_TYPE_V6GW, /* struct next_hop_v6 */ -}; - -union next_hop_v4_or_v6_ptr { - struct next_hop *v4; - struct next_hop_v6 *v6; -}; - -#define NH_STRING_MAX 100 - -/* - * funcs for manipulating abstract nh and nh set structs - */ - -/* accessors */ -static inline const union next_hop_outlabels * -nh_get_labels(enum nh_type nh_type, union next_hop_v4_or_v6_ptr nh) -{ - if (nh_type == NH_TYPE_V6GW) - return &nh.v6->outlabels; - - assert(nh_type == NH_TYPE_V4GW); - return &nh.v4->outlabels; -} - -static inline uint32_t -nh_get_flags(enum nh_type nh_type, union next_hop_v4_or_v6_ptr nh) -{ - if (nh_type == NH_TYPE_V6GW) - return nh.v6->flags; - - assert(nh_type == NH_TYPE_V4GW); - return nh.v4->flags; -} - -static ALWAYS_INLINE struct ifnet * -nh4_get_ifp(const struct next_hop *next_hop) -{ - if (next_hop->flags & (RTF_NEIGH_CREATED | RTF_NEIGH_PRESENT)) - return rcu_dereference(next_hop->u.lle->ifp); - - return rcu_dereference(next_hop->u.ifp); -} - -static ALWAYS_INLINE void -nh4_set_ifp(struct next_hop *next_hop, struct ifnet *ifp) -{ - if (next_hop->flags & (RTF_NEIGH_CREATED | RTF_NEIGH_PRESENT)) { - rte_panic("Can't set interface for NH with linked arp"); - return; - } - - rcu_assign_pointer(next_hop->u.ifp, ifp); -} - -static ALWAYS_INLINE bool -nh4_is_neigh_created(const struct next_hop *next_hop) -{ - return next_hop->flags & RTF_NEIGH_CREATED; -} - -static ALWAYS_INLINE bool -nh4_is_neigh_present(const struct next_hop *next_hop) -{ - return next_hop->flags & RTF_NEIGH_PRESENT; -} - -static ALWAYS_INLINE struct llentry * -nh4_get_lle(const struct next_hop *next_hop) -{ - if (next_hop->flags & (RTF_NEIGH_CREATED | RTF_NEIGH_PRESENT)) - return rcu_dereference(next_hop->u.lle); - - return NULL; -} - -static ALWAYS_INLINE void -nh6_set_ifp(struct next_hop_v6 *next_hop, struct ifnet *ifp) -{ - if (next_hop->flags & (RTF_NEIGH_CREATED | RTF_NEIGH_PRESENT)) { - rte_panic("Can't set interface for NH6 with linked neigh"); - return; - } - next_hop->u.ifp = ifp; -} - -static ALWAYS_INLINE struct ifnet * -nh6_get_ifp(const struct next_hop_v6 *next_hop) -{ - if (next_hop->flags & (RTF_NEIGH_CREATED | RTF_NEIGH_PRESENT)) - return next_hop->u.lle->ifp; - return next_hop->u.ifp; -} - -static ALWAYS_INLINE bool -nh6_is_neigh_created(const struct next_hop_v6 *next_hop) -{ - return next_hop->flags & RTF_NEIGH_CREATED; -} - -static ALWAYS_INLINE bool -nh6_is_neigh_present(const struct next_hop_v6 *next_hop) -{ - return next_hop->flags & RTF_NEIGH_PRESENT; -} - -static ALWAYS_INLINE struct llentry * -nh6_get_lle(const struct next_hop_v6 *next_hop) -{ - if (next_hop->flags & (RTF_NEIGH_CREATED | RTF_NEIGH_PRESENT)) - return next_hop->u.lle; - - return NULL; -} - -static ALWAYS_INLINE struct ifnet * -nh_get_if(enum nh_type nh_type, union next_hop_v4_or_v6_ptr nh) -{ - if (nh_type == NH_TYPE_V6GW) - return nh6_get_ifp(nh.v6); - - assert(nh_type == NH_TYPE_V4GW); - return nh4_get_ifp(nh.v4); -} - -static inline union next_hop_v4_or_v6_ptr -nh_select(enum nh_type nh_type, uint16_t nh_idx, - const struct rte_mbuf *m, uint16_t ether_type) -{ - union next_hop_v4_or_v6_ptr nh; - - if (nh_type == NH_TYPE_V6GW) - nh.v6 = nexthop6_select(nh_idx, m, ether_type); - else { - assert(nh_type == NH_TYPE_V4GW); - nh.v4 = nexthop_select(nh_idx, m, ether_type); - } - return nh; -} - -#endif /* NH_H */ diff --git a/src/nh_common.c b/src/nh_common.c new file mode 100644 index 00000000..6f7e4deb --- /dev/null +++ b/src/nh_common.c @@ -0,0 +1,1903 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include +#include + +#include "ecmp.h" +#include "fal.h" +#include "if_llatbl.h" +#include "ip_route.h" +#include "lcore_sched.h" +#include "nh_common.h" +#include "urcu.h" +#include "vplane_debug.h" + +static struct cds_lfht *next_hop_intf_hash; + +/* + * use entry 0 for AF_INET + * use entry 1 for AF_INET6 + */ +struct nh_common nh_common_af[2]; + +void nh_common_register(int family, struct nh_common *nh_common) +{ + if (family == AF_INET) { + nh_common_af[0] = *nh_common; + return; + } + + if (family == AF_INET6) { + nh_common_af[1] = *nh_common; + return; + } + + rte_panic("Invalid family %d for nh registration\n", family); +} + +static int af_family_to_family(int af_family) +{ + if (af_family == AF_INET) + return 0; + if (af_family == AF_INET6) + return 1; + + return -1; +} + +static struct cds_lfht *nh_common_get_hash_table(int af_family) +{ + int family = af_family_to_family(af_family); + if (family < 0) + return NULL; + + if (nh_common_af[family].nh_get_hash_tbl) + return nh_common_af[family].nh_get_hash_tbl(); + + return NULL; +} + +static nh_common_hash_fn *nh_common_get_hash_fn(int af_family) +{ + int family = af_family_to_family(af_family); + if (family < 0) + return NULL; + + if (nh_common_af[family].nh_hash) + return nh_common_af[family].nh_hash; + + return NULL; +} + +static nh_common_cmp_fn *nh_common_get_hash_cmp_fn(int af_family) +{ + int family = af_family_to_family(af_family); + if (family < 0) + return NULL; + + if (nh_common_af[family].nh_compare) + return nh_common_af[family].nh_compare; + + return NULL; +} + +static struct nexthop_table *nh_common_get_nh_table(int af_family) +{ + int family = af_family_to_family(af_family); + if (family < 0) + return NULL; + + if (nh_common_af[family].nh_get_nh_tbl) + return nh_common_af[family].nh_get_nh_tbl(); + + return NULL; +} + +static struct next_hop_list *nh_common_get_blackhole(int af_family) +{ + int family = af_family_to_family(af_family); + if (family < 0) + return NULL; + + if (nh_common_af[family].nh_get_blackhole) + return nh_common_af[family].nh_get_blackhole(); + + return NULL; +} + +ALWAYS_INLINE struct ifnet * +dp_nh_get_ifp(const struct next_hop *next_hop) +{ + if (next_hop->flags & (RTF_NEIGH_CREATED | RTF_NEIGH_PRESENT)) + return rcu_dereference(next_hop->u.lle->ifp); + + return rcu_dereference(next_hop->u.ifp); +} + +ALWAYS_INLINE struct ifnet * +dp_nh4_get_ifp(const struct next_hop *next_hop) +{ + return dp_nh_get_ifp(next_hop); +} + +ALWAYS_INLINE struct ifnet * +dp_nh6_get_ifp(const struct next_hop *next_hop) +{ + return dp_nh_get_ifp(next_hop); +} + +ALWAYS_INLINE void +nh_set_ifp(struct next_hop *next_hop, struct ifnet *ifp) +{ + if (next_hop->flags & (RTF_NEIGH_CREATED | RTF_NEIGH_PRESENT)) { + rte_panic("Can't set interface for NH with linked neigh"); + return; + } + + rcu_assign_pointer(next_hop->u.ifp, ifp); +} + +static int next_hop_list_backup_count(const struct next_hop_list *nhl) +{ + int count = 0; + int i; + struct next_hop *array = rcu_dereference(nhl->siblings); + + for (i = 0; i < nhl->nsiblings; i++) { + struct next_hop *next = array + i; + + if (next->flags & RTF_BACKUP) + count++; + } + return count; +} + +static int next_hop_list_primary_count(const struct next_hop_list *nhl) +{ + int backups = next_hop_list_backup_count(nhl); + + if (backups) + return nhl->nsiblings - backups; + + return 0; +} + +static int next_hop_list_num_primaries_usable(struct next_hop_list *nextl) +{ + struct next_hop *array; + int i, count = 0; + bool backup = false; + + array = nextl->siblings; + for (i = 0; i < nextl->nsiblings; i++) { + struct next_hop *next = array + i; + + if (next->flags & RTF_BACKUP) { + backup = true; + continue; + } + if (next->flags & RTF_UNUSABLE) + continue; + count++; + } + + if (backup) + return count; + + /* No backups, therefore no primaries */ + return 0; +} + +static void next_hop_map_use_backups(struct next_hop_list *nextl) +{ + int i, j; + int backups = nextl->nsiblings - nextl->primaries; + int slots[backups]; + struct next_hop *array; + + /* find the slots that we need to loop over */ + j = 0; + array = nextl->siblings; + for (i = 0; i < nextl->nsiblings; i++) { + struct next_hop *next = array + i; + + if (next->flags & RTF_BACKUP) { + slots[j] = i; + j++; + } + } + + j = 0; + for (i = 0; i < nextl->nh_map->count; i++) { + CMM_STORE_SHARED(nextl->nh_map->index[i], slots[j]); + j++; + if (j >= backups) + j = 0; + } +} + +static int next_hop_bitmap_get_usable_count(uint64_t bitmap) +{ + int i, usable_num = 0; + + for (i = 0; i < 64; i++) + if ((1ull << i) & bitmap) + usable_num++; + + return usable_num; +} + +static int next_hop_bitmap_find_next(uint64_t bitmap, int start) +{ + int i; + + for (i = start; i < 64; i++) + if ((1ull << i) & bitmap) + return i; + + return -1; +} + +static void next_hop_map_reinit(struct next_hop_list *nextl, + uint64_t usable_nhs) +{ + int next_to_write; + int i; + + next_to_write = next_hop_bitmap_find_next(usable_nhs, 0); + for (i = 0; i < nextl->nh_map->count; i++) { + + CMM_STORE_SHARED(nextl->nh_map->index[i], + next_to_write); + + next_to_write = next_hop_bitmap_find_next(usable_nhs, + ++next_to_write); + if (next_to_write < 0) + next_to_write = next_hop_bitmap_find_next(usable_nhs, + 0); + } +} + +static void +next_hop_list_update_map_usable(struct next_hop_list *nextl, + uint64_t usable_nhs, + uint64_t orig_nhs, + int loops) +{ + int i; + int entries_per_path; + uint64_t added_nhs = usable_nhs ^ orig_nhs; + int idx_to_swap; + int next_to_write; + int added = 0; + int usable_num; + int loop_count = 0; + + /* + * If a path has become usable then we need to put some entries + * in the map for it. Work out how many entries need to be added + * and then walk through the existing entries. Change the entry to + * the new usable based on round robin over the existing usable paths + * so that we maintain fairness as much as possible. + * + * For example, if the list was: 2, 2, 3, 3, 2, 3 and we were + * enabling 1 this would become: 1, 2, 1, 3, 2, 3, i.e + * we should have 2 entries added, so swap out the first 2, and + * the first 3. + * + * If loops is non 0 then we are coming round again, which + * means we lost race with another thread, so refill it as + * if it was the initial init + */ + if (loops) { + next_hop_map_reinit(nextl, usable_nhs); + return; + } + + usable_num = next_hop_bitmap_get_usable_count(usable_nhs); + entries_per_path = nextl->nh_map->count / usable_num; + next_to_write = next_hop_bitmap_find_next(added_nhs, 0); + + idx_to_swap = next_hop_bitmap_find_next(orig_nhs, 0); + + do { + for (i = 0; i < nextl->nh_map->count; i++) { + if (idx_to_swap >= 0 && + !(CMM_ACCESS_ONCE(nextl->nh_map->index[i]) == + idx_to_swap)) + continue; + + CMM_STORE_SHARED(nextl->nh_map->index[i], + next_to_write); + + next_to_write = next_hop_bitmap_find_next( + added_nhs, ++next_to_write); + if (next_to_write < 0) + next_to_write = next_hop_bitmap_find_next( + added_nhs, 0); + added++; + + if (orig_nhs) { + /* were not previously using backup paths */ + idx_to_swap = next_hop_bitmap_find_next( + orig_nhs, ++idx_to_swap); + if (idx_to_swap < 0) + idx_to_swap = next_hop_bitmap_find_next( + orig_nhs, 0); + } + if (added == entries_per_path) + return; + } + + /* + * There is a case where the initial loop will not add + * all the entries due to the way the entries in the + * map are laid out. In this case redo the loop until + * we have done them all. An example of this is when + * the distribution is: + * 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0. + * However make sure we don't go round too many times + * as another thread could overwrite the entries we + * are looking for meaning we will never find them. + * In this case it will be fixed up by the thread that + * goes round the outer loop again. + */ + loop_count++; + if (loop_count > added) + return; + } while (added < entries_per_path); +} + +static void +next_hop_list_update_map_unusable(struct next_hop_list *nextl, + uint64_t usable_nhs, + int loops) +{ + int i, j; + int new_index = 0; + + /* + * Update the map based on the given index being unusable. Walk + * across all entries, and if the map contains the index then + * replace it with the next value. The next value is based on a + * round robin over the remaining usable primary paths. + * + * If loops is non 0 then we are coming round again, which + * means we lost race with another thread, so refill it as + * if it was the initial init + */ + if (loops) { + next_hop_map_reinit(nextl, usable_nhs); + return; + } + + for (i = 0; i < nextl->nh_map->count; i++) { + if (!((1ull << nextl->nh_map->index[i]) & usable_nhs)) { + /* Was using the now unusable path */ + for (j = 0; j < nextl->nsiblings; j++) { + /* + * loop through the usable paths to + * get the next one to use. + */ + if (!(usable_nhs & (1ull << new_index))) { + /* not usable */ + new_index++; + if (new_index >= nextl->nsiblings) + new_index = 0; + continue; + } + CMM_STORE_SHARED(nextl->nh_map->index[i], + new_index); + new_index++; + if (new_index >= nextl->nsiblings) + new_index = 0; + break; + } + } + } +} + +/* + * Called to update a map when a path has become unusable. + */ +static void next_hop_list_update_map(struct next_hop_list *nextl, int index, + bool usable) +{ + uint64_t usable_nhs; + uint64_t orig_nhs; + int loops = 0; + + /* + * To update (add or remove) we maintain an atomic bitmask of the + * usable paths, and verify at the end of the changes that we were + * the only writer. If another index has changed then we need to go + * round the loop again as we lost the race to another thread. + * Second and later times round it will refill it as if it was the + * initial init. This is because it is in an unknown state due to + * possible collisions the first time round which can lead to + * unfairness in the allocation if we continue to update based on + * the current state. + */ + do { + usable_nhs = rte_atomic64_read(&nextl->usable_prim_nh_bitmask); + orig_nhs = usable_nhs; + + if (usable) + usable_nhs |= (1ull << index); + else + usable_nhs &= ~(1ull << index); + + if (next_hop_bitmap_get_usable_count(usable_nhs) == 0) + next_hop_map_use_backups(nextl); + else + if (usable) + next_hop_list_update_map_usable(nextl, + usable_nhs, + orig_nhs, + loops); + else + next_hop_list_update_map_unusable(nextl, + usable_nhs, + loops); + loops++; + } while (!rte_atomic64_cmpset((volatile uint64_t *) + &nextl->usable_prim_nh_bitmask, + orig_nhs, + usable_nhs)); +} + +static struct next_hop_list *nexthop_lookup(int family, + const struct nexthop_hash_key *key) +{ + struct cds_lfht_iter iter; + struct cds_lfht_node *node; + struct cds_lfht *hash_tbl = nh_common_get_hash_table(family); + nh_common_hash_fn *hash_fn = nh_common_get_hash_fn(family); + nh_common_cmp_fn *cmp_fn = nh_common_get_hash_cmp_fn(family); + + if (!hash_tbl || !hash_fn || !cmp_fn) + return NULL; + + cds_lfht_lookup(hash_tbl, + hash_fn(key, 0), + cmp_fn, key, &iter); + node = cds_lfht_iter_get_node(&iter); + if (node) + return caa_container_of(node, struct next_hop_list, nh_node); + return NULL; +} + +/* Reuse existing next hop entry */ +static struct next_hop_list *nexthop_reuse(int family, + const struct nexthop_hash_key *key, + uint32_t *slot) +{ + struct next_hop_list *nhl; + int index; + + nhl = nexthop_lookup(family, key); + if (!nhl) + return NULL; + + index = nhl->index; + + *slot = index; + ++nhl->refcount; + + DP_DEBUG(ROUTE, DEBUG, ROUTE, + "%s nexthop reuse: nexthop %d, refs %u\n", + family == AF_INET ? "IPv4" : "IPv6", + index, nhl->refcount); + + return nhl; +} + +static int nexthop_hash_insert(int family, struct next_hop_list *nhl, + const struct nexthop_hash_key *key) +{ + struct cds_lfht_node *ret_node; + unsigned long hash; + struct cds_lfht *hash_tbl = nh_common_get_hash_table(family); + nh_common_hash_fn *hash_fn = nh_common_get_hash_fn(family); + nh_common_cmp_fn *cmp_fn = nh_common_get_hash_cmp_fn(family); + + cds_lfht_node_init(&nhl->nh_node); + hash = hash_fn(key, 0); + + ret_node = cds_lfht_add_unique(hash_tbl, hash, + cmp_fn, key, + &nhl->nh_node); + + return (ret_node != &nhl->nh_node) ? EEXIST : 0; +} + +struct next_hop_list *nexthop_alloc(int size) +{ + struct next_hop_list *nextl; + + nextl = calloc(1, sizeof(*nextl)); + if (unlikely(!nextl)) { + RTE_LOG(ERR, ROUTE, "can't alloc next_hop_list\n"); + return NULL; + } + + nextl->nh_fal_obj = calloc(size, sizeof(*nextl->nh_fal_obj)); + if (!nextl->nh_fal_obj) { + free(nextl); + return NULL; + } + + if (size == 1) { + /* Optimize for non-ECMP case by staying in cache line */ + nextl->siblings = &nextl->hop0; + } else { + nextl->siblings = calloc(1, size * sizeof(struct next_hop)); + if (unlikely(nextl->siblings == NULL)) { + free(nextl->nh_fal_obj); + free(nextl); + return NULL; + } + } + nextl->nsiblings = size; + return nextl; +} + +void __nexthop_destroy(struct next_hop_list *nextl) +{ + unsigned int i; + + for (i = 0; i < nextl->nsiblings; i++) + nh_outlabels_destroy(&nextl->siblings[i].outlabels); + if (nextl->siblings != &nextl->hop0) + free(nextl->siblings); + if (nextl->nh_map) + free(nextl->nh_map); + + free(nextl->nh_fal_obj); + free(nextl); +} + +/* Callback from RCU after all other threads are done. */ +void nexthop_destroy(struct rcu_head *head) +{ + struct next_hop_list *nextl + = caa_container_of(head, struct next_hop_list, rcu); + + __nexthop_destroy(nextl); +} + +/* + * Structure to store in the top level interface hash. Part of the 2 level + * hash to allow lookups of all the NHs using an interface/gateway pair. + */ +struct next_hop_intf_entry { + uint32_t ifindex; + struct cds_lfht_node intf_hash_tbl_node; + struct cds_lfht *gw_hash_tbl; + struct rcu_head rcu; +}; + +static void next_hop_intf_entry_free(struct rcu_head *head) +{ + struct next_hop_intf_entry *if_entry + = caa_container_of(head, struct next_hop_intf_entry, rcu); + + free(if_entry); +} + +static int next_hop_intf_hash_cmp_fn(struct cds_lfht_node *node, + const void *key) +{ + struct next_hop_intf_entry *entry; + uint32_t *ifindex = (uint32_t *)key; + + entry = caa_container_of(node, struct next_hop_intf_entry, + intf_hash_tbl_node); + + if (entry->ifindex == *ifindex) + return 1; + + return 0; +} + +/* + * Return the hash table that hold next_hops for the given interface. + */ +static struct next_hop_intf_entry * +next_hop_intf_hash_lookup(const struct ifnet *ifp) +{ + struct cds_lfht_iter iter; + struct cds_lfht_node *node; + + if (!next_hop_intf_hash) + return NULL; + + cds_lfht_lookup(next_hop_intf_hash, ifp->if_index, + next_hop_intf_hash_cmp_fn, &ifp->if_index, &iter); + + node = cds_lfht_iter_get_node(&iter); + if (node) + return caa_container_of(node, struct next_hop_intf_entry, + intf_hash_tbl_node); + return NULL; +} + + +#define NH_INTF_HASH_TBL_MIN_SIZE 8 +#define NH_INTF_HASH_TBL_MAX_SIZE 1024 + +static int next_hop_intf_hash_init(void) +{ + if (next_hop_intf_hash) + return 0; + + next_hop_intf_hash = cds_lfht_new(NH_INTF_HASH_TBL_MIN_SIZE, + NH_INTF_HASH_TBL_MIN_SIZE, + NH_INTF_HASH_TBL_MAX_SIZE, + CDS_LFHT_AUTO_RESIZE, + NULL); + if (!next_hop_intf_hash) + return -ENOMEM; + return 0; +} + +static struct next_hop_intf_entry * +next_hop_intf_hash_add(const struct next_hop *next) +{ + struct next_hop_intf_entry *entry; + struct ifnet *ifp = dp_nh_get_ifp(next); + struct cds_lfht_node *ret_node; + unsigned long hash; + + if (next_hop_intf_hash_init()) + return NULL; + + entry = malloc(sizeof(*entry)); + if (!entry) + return NULL; + + entry->ifindex = ifp->if_index; + entry->gw_hash_tbl = cds_lfht_new(NH_INTF_HASH_TBL_MIN_SIZE, + NH_INTF_HASH_TBL_MIN_SIZE, + NH_INTF_HASH_TBL_MAX_SIZE, + CDS_LFHT_AUTO_RESIZE, + NULL); + if (!entry->gw_hash_tbl) { + free(entry); + return NULL; + } + + cds_lfht_node_init(&entry->intf_hash_tbl_node); + hash = ifp->if_index; + + ret_node = cds_lfht_add_unique(next_hop_intf_hash, hash, + next_hop_intf_hash_cmp_fn, + &entry->ifindex, + &entry->intf_hash_tbl_node); + + if (ret_node != &entry->intf_hash_tbl_node) { + /* This entry exists - this should not happen */ + cds_lfht_destroy(entry->gw_hash_tbl, NULL); + free(entry); + return caa_container_of(ret_node, + struct next_hop_intf_entry, + intf_hash_tbl_node); + } + + return entry; +} + +/* + * Structure to store in the 2nd level hash. Part of the 2 level + * hash to allow lookups of all the NHs using an interface/gateway pair. + */ +struct next_hop_gw_entry { + struct ip_addr addr; + struct cds_lfht_node gw_hash_tbl_node; + struct cds_list_head intf_gw_nh_list; + struct rcu_head rcu; +}; + +static unsigned long next_hop_gw_hash_fn(const struct ip_addr *key) +{ + int words = 1; /* for key->type */ + + if (key->type == AF_INET) + words += sizeof(struct in_addr) / 4; + else if (key->type == AF_INET6) + words += sizeof(struct in6_addr) / 4; + return rte_jhash_32b((uint32_t *)key, words, 0); +} + +static int next_hop_gw_hash_cmp_fn(struct cds_lfht_node *node, + const void *key) +{ + struct next_hop_gw_entry *gw_entry; + const struct ip_addr *addr = key; + + gw_entry = caa_container_of(node, struct next_hop_gw_entry, + gw_hash_tbl_node); + + if (dp_addr_eq(addr, &gw_entry->addr)) + return 1; + + return 0; +} + +static struct next_hop_gw_entry * +next_hop_gw_hash_lookup(struct cds_lfht *gw_hash_tbl, + const struct ip_addr *gw) +{ + struct cds_lfht_iter iter; + struct cds_lfht_node *node; + + cds_lfht_lookup(gw_hash_tbl, + next_hop_gw_hash_fn(gw), + next_hop_gw_hash_cmp_fn, gw, &iter); + + node = cds_lfht_iter_get_node(&iter); + if (node) + return caa_container_of(node, struct next_hop_gw_entry, + gw_hash_tbl_node); + return NULL; +} + +static struct next_hop_gw_entry * +next_hop_gw_hash_add(struct cds_lfht *gw_hash_tbl, + const struct ip_addr *gw) +{ + struct cds_lfht_node *ret_node; + struct next_hop_gw_entry *gw_entry; + + gw_entry = malloc(sizeof(*gw_entry)); + if (!gw_entry) + return NULL; + + gw_entry->addr = *gw; + CDS_INIT_LIST_HEAD(&gw_entry->intf_gw_nh_list); + + ret_node = cds_lfht_add_unique(gw_hash_tbl, + next_hop_gw_hash_fn(&gw_entry->addr), + next_hop_gw_hash_cmp_fn, &gw_entry->addr, + &gw_entry->gw_hash_tbl_node); + + if (ret_node != &gw_entry->gw_hash_tbl_node) { + /* This entry exists - this should not happen */ + free(gw_entry); + return caa_container_of(ret_node, + struct next_hop_gw_entry, + gw_hash_tbl_node); + } + + return gw_entry; +} + +typedef void (next_hop_usability_change_cb)(struct next_hop *next, + enum dp_rt_path_state state); + +static void next_hop_usability_check_update_cb(struct next_hop *next, + enum dp_rt_path_state state) +{ + struct next_hop_list *nextl = next->nhl; + int rc; + bool usable; + + usable = state == DP_RT_PATH_USABLE; + rc = fal_ip_upd_next_hop_state(nextl->nh_fal_obj, + next - nextl->siblings, + usable); + if (rc < 0 && (rc != -EOPNOTSUPP)) { + struct ifnet *ifp = dp_nh_get_ifp(next); + char b[INET6_ADDRSTRLEN]; + + RTE_LOG(ERR, ROUTE, + "FAL Unable to mark next hop unusable %s %s (%s)\n", + ifp ? ifp->if_name : "no interface", + inet_ntop(next->gateway.type, + &next->gateway.address, + b, sizeof(b)), + strerror(-rc)); + } + next_hop_list_update_map(nextl, next - nextl->siblings, usable); +} + +static void +next_hop_intf_gw_list_mark_path_state(struct cds_list_head *list_head, + enum dp_rt_path_state state) +{ + struct cds_list_head *list_entry, *next; + struct next_hop *nh; + + cds_list_for_each_safe(list_entry, next, list_head) { + nh = cds_list_entry(list_entry, struct next_hop, + if_gw_list_entry); + + + if (state == DP_RT_PATH_USABLE) { + if (!(CMM_ACCESS_ONCE(nh->flags) & RTF_UNUSABLE)) + /* Ignore if no change to state */ + continue; + CMM_STORE_SHARED(nh->flags, nh->flags & ~RTF_UNUSABLE); + } else { + if ((CMM_ACCESS_ONCE(nh->flags) & RTF_UNUSABLE)) + /* Ignore if no change to state */ + continue; + CMM_STORE_SHARED(nh->flags, nh->flags | RTF_UNUSABLE); + } + + next_hop_usability_check_update_cb(nh, state); + } +} + +void next_hop_mark_path_state(enum dp_rt_path_state state, + const struct dp_rt_path_unusable_key *key) +{ + struct ifnet *ifp = dp_ifnet_byifindex(key->ifindex); + struct next_hop_intf_entry *intf_entry; + struct next_hop_gw_entry *gw_entry; + struct cds_lfht_iter iter; + + /* + * The interface may have been deleted and since we may not be + * running in the main thread, then it is expected there could + * be a window of time between the interface being deleted and + * sources updating their datastructures to reflect this. + */ + if (!ifp) + return; + + intf_entry = next_hop_intf_hash_lookup(ifp); + if (!intf_entry) + return; + + if (key->type == DP_RT_PATH_UNUSABLE_KEY_INTF) { + cds_lfht_for_each_entry(intf_entry->gw_hash_tbl, + &iter, gw_entry, gw_hash_tbl_node) { + /* All NHs using this interface are unusable */ + next_hop_intf_gw_list_mark_path_state( + &gw_entry->intf_gw_nh_list, + state); + } + } else if (key->type == DP_RT_PATH_UNUSABLE_KEY_INTF_NEXTHOP) { + gw_entry = next_hop_gw_hash_lookup(intf_entry->gw_hash_tbl, + &key->nexthop); + if (!gw_entry) + return; + next_hop_intf_gw_list_mark_path_state( + &gw_entry->intf_gw_nh_list, + state); + } +} + +static void +next_hop_list_check_usability(struct next_hop_list *nextl, + next_hop_usability_change_cb change_cb) +{ + int backups = next_hop_list_backup_count(nextl); + int primaries = nextl->nsiblings - backups; + struct next_hop *array; + enum dp_rt_path_state state; + struct dp_rt_path_unusable_key key; + int i; + struct ifnet *ifp; + + if (!backups) { + nextl->primaries = 0; + return; + } + + nextl->primaries = primaries; + + array = nextl->siblings; + for (i = 0; i < nextl->nsiblings; i++) { + struct next_hop *next = array + i; + + ifp = dp_nh_get_ifp(next); + if (!ifp) + continue; + key.ifindex = ifp->if_index; + if (nh_is_gw(next)) { + key.type = DP_RT_PATH_UNUSABLE_KEY_INTF_NEXTHOP; + key.nexthop = next->gateway; + } else { + key.type = DP_RT_PATH_UNUSABLE_KEY_INTF; + } + + state = dp_rt_signal_check_paths_state(&key); + + if (state == DP_RT_PATH_UNUSABLE) { + if (!(CMM_ACCESS_ONCE(next->flags) & RTF_UNUSABLE)) { + CMM_STORE_SHARED(next->flags, + next->flags | RTF_UNUSABLE); + if (change_cb) + change_cb(next, state); + } + } else if (state == DP_RT_PATH_USABLE) { + if ((CMM_ACCESS_ONCE(next->flags) & RTF_UNUSABLE)) { + CMM_STORE_SHARED(next->flags, + next->flags & ~RTF_UNUSABLE); + if (change_cb) + change_cb(next, state); + } + } + } +} + +static int next_hop_track_protected_nh(struct next_hop *next) +{ + struct next_hop_intf_entry *if_entry; + struct next_hop_gw_entry *gw_entry; + struct ifnet *ifp = dp_nh_get_ifp(next); + + if (!ifp || next->flags & RTF_BACKUP) + return 0; + + if_entry = next_hop_intf_hash_lookup(ifp); + if (!if_entry) { + if_entry = next_hop_intf_hash_add(next); + if (!if_entry) { + RTE_LOG(ERR, ROUTE, + "Failed to add protected NH to intf hash %d\n", + ifp->if_index); + return -1; + } + } + + gw_entry = next_hop_gw_hash_lookup(if_entry->gw_hash_tbl, + &next->gateway); + if (!gw_entry) { + gw_entry = next_hop_gw_hash_add(if_entry->gw_hash_tbl, + &next->gateway); + if (!gw_entry) { + RTE_LOG(ERR, ROUTE, + "Failed to add protected NH to gw hash %d\n", + ifp->if_index); + return -1; + } + } + + cds_list_add_rcu(&next->if_gw_list_entry, &gw_entry->intf_gw_nh_list); + + return 0; +} + +static void next_hop_gw_entry_free(struct rcu_head *head) +{ + struct next_hop_gw_entry *gw_entry + = caa_container_of(head, struct next_hop_gw_entry, rcu); + + free(gw_entry); +} + +static int next_hop_untrack_protected_nh(struct next_hop *next) +{ + struct next_hop_intf_entry *if_entry; + struct next_hop_gw_entry *gw_entry; + struct ifnet *ifp = dp_nh_get_ifp(next); + struct cds_lfht_iter iter; + struct cds_lfht_node *node; + + if (!ifp || next->flags & RTF_BACKUP) + return 0; + + if_entry = next_hop_intf_hash_lookup(ifp); + if (!if_entry) + return 0; + + gw_entry = next_hop_gw_hash_lookup(if_entry->gw_hash_tbl, + &next->gateway); + if (!gw_entry) + return 0; + + cds_list_del_rcu(&next->if_gw_list_entry); + + /* Delete the list if it is empty - no more with matching addr. */ + if (cds_list_empty(&gw_entry->intf_gw_nh_list)) { + cds_lfht_del(if_entry->gw_hash_tbl, + &gw_entry->gw_hash_tbl_node); + call_rcu(&gw_entry->rcu, next_hop_gw_entry_free); + + } + + /* Delete the 2nd level hash table if empty - no more on interface */ + cds_lfht_first(if_entry->gw_hash_tbl, &iter); + node = cds_lfht_iter_get_node(&iter); + if (!node) { + cds_lfht_destroy(if_entry->gw_hash_tbl, NULL); + cds_lfht_del(next_hop_intf_hash, + &if_entry->intf_hash_tbl_node); + call_rcu(&if_entry->rcu, next_hop_intf_entry_free); + } + + /* Delete the 1st level hash table if empty - no more protected nhs. */ + cds_lfht_first(next_hop_intf_hash, &iter); + node = cds_lfht_iter_get_node(&iter); + if (!node) { + cds_lfht_destroy(next_hop_intf_hash, NULL); + next_hop_intf_hash = NULL; + } + + return 0; +} + +static void next_hop_list_track_protected_nh(struct next_hop_list *nextl) +{ + int i; + struct next_hop *array; + + if (nextl->primaries) { + + array = nextl->siblings; + for (i = 0; i < nextl->nsiblings; i++) { + struct next_hop *next = array + i; + + (void)next_hop_track_protected_nh(next); + } + } + + /* + * Now we need to recheck the usability to catch the case where + * the next_hop became unusable after installed in the fal but + * before it was installed in the hash table + */ + next_hop_list_check_usability(nextl, + next_hop_usability_check_update_cb); +} + +static void next_hop_list_untrack_protected_nh(struct next_hop_list *nextl) +{ + int i; + struct next_hop *array; + + if (nextl->primaries) { + + array = nextl->siblings; + for (i = 0; i < nextl->nsiblings; i++) { + struct next_hop *next = array + i; + + (void)next_hop_untrack_protected_nh(next); + } + } +} + +/* + * When building a new next_hop_list to swap into the forwarding state + * we need to make sure that the lists at the end of the 2 stage hash + * contain the new entry not the old one. + */ +static void next_hop_fixup_protected_tracking(struct next_hop_list *old, + struct next_hop_list *new) +{ + struct next_hop *old_array; + struct next_hop *new_array; + struct ifnet *ifp; + struct next_hop_intf_entry *intf_entry; + struct next_hop_gw_entry *gw_entry; + int i; + + ASSERT_MAIN(); + + if (!new->primaries) + return; + + old_array = old->siblings; + new_array = new->siblings; + for (i = 0; i < new->nsiblings; i++) { + struct next_hop *next_old = old_array + i; + struct next_hop *next_new = new_array + i; + + ifp = dp_nh_get_ifp(next_old); + if (!ifp || next_old->flags & RTF_BACKUP) + continue; + + intf_entry = next_hop_intf_hash_lookup(ifp); + if (!intf_entry) + return; + + gw_entry = next_hop_gw_hash_lookup(intf_entry->gw_hash_tbl, + &next_old->gateway); + if (!gw_entry) + return; + /* + * Take the old entry out of the list, then add the new + * one. + */ + cds_list_del_rcu(&next_old->if_gw_list_entry); + cds_list_add_rcu(&next_new->if_gw_list_entry, + &gw_entry->intf_gw_nh_list); + } +} +/* + * Create the nh_map for the list. Only use the map if there are backup paths. + * We use (#primary_paths * (#primary_paths -1)) as the initial size of the map + * as this gives us fairness on the first cutover of a path. This is limited + * to a max of 64 entries to make sure it stays in a cache line. + */ +static int next_hop_list_init_map(struct next_hop_list *nextl) +{ + int num_entries; + int primary_num = 0; + int i, j = 0; + struct next_hop *array; + int primaries; + int usable_prim = 0; + uint64_t usable = 0; + + /* + * Check usability of NHs before we build the map as we do not + * want unusable ones in there showing as usable. + */ + next_hop_list_check_usability(nextl, NULL); + + if (nextl->primaries == 0) + return 0; + + nextl->nh_map = malloc_aligned(sizeof(*nextl->nh_map)); + if (!nextl->nh_map) + return -ENOMEM; + + rte_atomic64_set(&nextl->usable_prim_nh_bitmask, 0); + + /* + * Use the amount of usable primaries to work out the size so + * we still get fairness after another one goes down. + */ + usable_prim = next_hop_list_num_primaries_usable(nextl); + primaries = nextl->primaries; + if (primaries > 1) + num_entries = primaries * (primaries - 1); + else + num_entries = 1; + + if (num_entries > NH_MAP_MAX_ENTRIES) + num_entries = NH_MAP_MAX_ENTRIES; + + if (num_entries < (nextl->nsiblings - nextl->primaries)) + /* Make sure we have enough entries for primary and backup */ + num_entries = (nextl->nsiblings - nextl->primaries); + + nextl->nh_map->count = num_entries; + if (usable_prim == 0) { + next_hop_map_use_backups(nextl); + return 0; + } + + array = nextl->siblings; + for (i = 0; i < nextl->nsiblings; i++) { + struct next_hop *next = array + i; + + if (next->flags & (RTF_BACKUP | RTF_UNUSABLE)) + continue; + + for (j = 0; j < primaries; j++) + nextl->nh_map->index[j * usable_prim + primary_num] = i; + primary_num++; + usable |= (1ull << i); + } + rte_atomic64_set(&nextl->usable_prim_nh_bitmask, usable); + + return 0; +} + +static void next_hop_list_setup_back_ptrs(struct next_hop_list *nextl) +{ + int i; + struct next_hop *array; + + array = nextl->siblings; + for (i = 0; i < nextl->nsiblings; i++) { + struct next_hop *next = array + i; + + next->nhl = nextl; + } +} + +/* Lookup (or create) nexthop based on hop information */ +int nexthop_new(int family, const struct next_hop *nh, uint16_t size, + uint8_t proto, enum fal_next_hop_group_use use, uint32_t *slot) +{ + struct nexthop_hash_key key = { + .nh = nh, + .size = size, + .proto = proto, + .use = use, + }; + struct next_hop_list *nextl; + uint32_t rover; + uint32_t nh_iter; + int ret; + struct nexthop_table *nh_table = nh_common_get_nh_table(family); + + if (!nh_table) { + RTE_LOG(ERR, ROUTE, "Invalid family %d for new nexthop\n", + family); + return -EINVAL; + } + + rover = nh_table->rover; + nextl = nexthop_reuse(family, &key, slot); + if (nextl) + return 0; + + if (unlikely(nh_table->in_use == NEXTHOP_HASH_TBL_SIZE)) { + RTE_LOG(ERR, ROUTE, "IPv%d next hop table full\n", + family == AF_INET ? 4 : 6); + return -ENOSPC; + } + + nextl = nexthop_alloc(size); + if (!nextl) { + RTE_LOG(ERR, ROUTE, "IPv%d next hop table alloc failed\n", + family == AF_INET ? 4 : 6); + return -ENOMEM; + } + + nextl->nsiblings = size; + nextl->refcount = 1; + nextl->index = rover; + nextl->proto = proto; + nextl->use = use; + if (size == 1) + nextl->hop0 = *nh; + else + memcpy(nextl->siblings, nh, size * sizeof(struct next_hop)); + next_hop_list_setup_back_ptrs(nextl); + + if (next_hop_list_init_map(nextl)) { + __nexthop_destroy(nextl); + return -ENOMEM; + } + + if (unlikely(nexthop_hash_insert(family, nextl, &key))) { + __nexthop_destroy(nextl); + return -ENOMEM; + } + + nextl->primaries = next_hop_list_primary_count(nextl); + + ret = fal_ip_new_next_hops(nextl->use, nextl->nsiblings, + nextl->siblings, &nextl->nhg_fal_obj, + nextl->nh_fal_obj); + if (ret < 0 && ret != -EOPNOTSUPP) + RTE_LOG(ERR, ROUTE, + "FAL IPv4 next-hop-group create failed: %s\n", + strerror(-ret)); + nextl->pd_state = fal_state_to_pd_state(ret); + + next_hop_list_track_protected_nh(nextl); + + nh_iter = rover; + do { + nh_iter++; + if (nh_iter >= NEXTHOP_HASH_TBL_SIZE) + nh_iter = 0; + } while ((rcu_dereference(nh_table->entry[nh_iter]) != NULL) && + likely(nh_iter != rover)); + + nh_table->rover = nh_iter; + *slot = rover; + nh_table->in_use++; + + rcu_assign_pointer(nh_table->entry[rover], nextl); + + return 0; +} + +struct next_hop * +nexthop_create(struct ifnet *ifp, struct ip_addr *gw, uint32_t flags, + uint16_t num_labels, label_t *labels) +{ + struct next_hop *next = malloc(sizeof(struct next_hop)); + + if (next) { + /* Copying the v6 addr guarantees all bits are copied */ + next->gateway = *gw; + next->flags = flags; + nh_set_ifp(next, ifp); + + if (!nh_outlabels_set(&next->outlabels, num_labels, + labels)) { + RTE_LOG(ERR, ROUTE, + "Failed to set outlabels for nexthop with %u labels\n", + num_labels); + free(next); + return NULL; + } + } + return next; +} + +void nexthop_put(int family, uint32_t idx) +{ + struct next_hop_list *nextl; + struct nexthop_table *nh_table = nh_common_get_nh_table(family); + struct cds_lfht *hash_tbl = nh_common_get_hash_table(family); + + if (!nh_table) { + RTE_LOG(ERR, ROUTE, "Invalid family %d for nexthop put\n", + family); + return; + } + + nextl = rcu_dereference(nh_table->entry[idx]); + if (unlikely(!nextl)) + return; + if (--nextl->refcount == 0) { + struct next_hop *array = nextl->siblings; + int ret; + int i; + + rcu_assign_pointer(nh_table->entry[idx], NULL); + --nh_table->in_use; + + for (i = 0; i < nextl->nsiblings; i++) { + struct next_hop *nh = array + i; + + if (nh_is_neigh_present(nh)) + nh_table->neigh_present--; + if (nh_is_neigh_created(nh)) + nh_table->neigh_created--; + } + + if (fal_state_is_obj_present(nextl->pd_state)) { + ret = fal_ip_del_next_hops(nextl->nhg_fal_obj, + nextl->nsiblings, + nextl->nh_fal_obj); + if (ret < 0) { + RTE_LOG(ERR, ROUTE, + "FAL IPv%d next-hop-group delete failed: %s\n", + family == AF_INET ? 4 : 6, + strerror(-ret)); + } + } + + next_hop_list_untrack_protected_nh(nextl); + + cds_lfht_del(hash_tbl, &nextl->nh_node); + call_rcu(&nextl->rcu, nexthop_destroy); + } +} + +int next_hop_copy(struct next_hop *old, struct next_hop *new) +{ + bool success; + + new->u = old->u; + new->flags = old->flags; + new->gateway = old->gateway; + success = nh_outlabels_copy(&old->outlabels, &new->outlabels); + + if (success) + return 0; + + return -ENOMEM; +} + +/* + * Create an array of next_hops based on the hops in the next_hop_list. + */ +struct next_hop * +next_hop_list_copy_next_hops(struct next_hop_list *nhl, int *size) +{ + struct next_hop *next, *n; + struct next_hop *array = rcu_dereference(nhl->siblings); + int i; + + *size = nhl->nsiblings; + n = next = calloc(sizeof(struct next_hop), *size); + if (!next) + return NULL; + + for (i = 0; i < nhl->nsiblings; i++) { + struct next_hop *nhl_next = array + i; + + if (next_hop_copy(nhl_next, n) < 0) + goto fail; + n++; + } + return next; + +fail: + /* Copy of a nh failed so cleanup */ + n = next; + for (i = 0; i < nhl->nsiblings; i++) + nh_outlabels_destroy(&n->outlabels); + free(next); + return NULL; +} + +int +nexthop_hash_del_add(int family, + struct next_hop_list *old_nhl, + struct next_hop_list *new_nhl) +{ + struct nexthop_hash_key key = { + .nh = new_nhl->siblings, + .size = new_nhl->nsiblings, + .proto = new_nhl->proto, + .use = new_nhl->use + }; + struct cds_lfht *hash_tbl = nh_common_get_hash_table(family); + + if (!hash_tbl) { + RTE_LOG(ERR, ROUTE, "Invalid family %d for nh hash del add\n", + family); + return -EINVAL; + } + + int rc; + + /* Remove old one */ + rc = cds_lfht_del(hash_tbl, &old_nhl->nh_node); + assert(rc == 0); + if (rc != 0) + return rc; + + /* add new one */ + return nexthop_hash_insert(family, new_nhl, &key); +} + +bool nh_is_connected(const struct next_hop *nh) +{ + if (nh->flags & (RTF_BLACKHOLE | RTF_REJECT | + RTF_SLOWPATH | RTF_GATEWAY | + RTF_LOCAL | RTF_NOROUTE)) + return false; + + return true; +} + +bool nh_is_local(const struct next_hop *nh) +{ + if (nh->flags & RTF_LOCAL) + return true; + + return false; +} + +bool nh_is_gw(const struct next_hop *nh) +{ + if (nh->flags & RTF_GATEWAY) + return true; + + return false; +} + +void nh_set_neigh_present(int family, + struct next_hop *next_hop, + struct llentry *lle) +{ + struct nexthop_table *nh_table = nh_common_get_nh_table(family); + + if (!nh_table) { + RTE_LOG(ERR, ROUTE, + "Invalid family %d for set neigh present\n", + family); + return; + } + + assert((next_hop->flags & RTF_NEIGH_PRESENT) == 0); + next_hop->flags |= RTF_NEIGH_PRESENT; + next_hop->u.lle = lle; + nh_table->neigh_present++; +} + +void nh_clear_neigh_present(int family, + struct next_hop *next_hop) +{ + struct nexthop_table *nh_table = nh_common_get_nh_table(family); + + if (!nh_table) { + RTE_LOG(ERR, ROUTE, + "Invalid family %d for clear neigh present\n", + family); + return; + } + + assert(next_hop->flags & RTF_NEIGH_PRESENT); + next_hop->flags &= ~RTF_NEIGH_PRESENT; + next_hop->u.ifp = next_hop->u.lle->ifp; + nh_table->neigh_present--; +} + +void nh_set_neigh_created(int family, + struct next_hop *next_hop, + struct llentry *lle) +{ + struct nexthop_table *nh_table = nh_common_get_nh_table(family); + + if (!nh_table) { + RTE_LOG(ERR, ROUTE, + "Invalid family %d for set neigh created\n", + family); + return; + } + + assert((next_hop->flags & RTF_NEIGH_CREATED) == 0); + next_hop->flags |= RTF_NEIGH_CREATED; + next_hop->u.lle = lle; + nh_table->neigh_created++; +} + +void nh_clear_neigh_created(int family, + struct next_hop *next_hop) +{ + struct nexthop_table *nh_table = nh_common_get_nh_table(family); + + if (!nh_table) { + RTE_LOG(ERR, ROUTE, + "Invalid family %d for clear neigh created\n", + family); + return; + } + assert(next_hop->flags & RTF_NEIGH_CREATED); + next_hop->flags &= ~RTF_NEIGH_CREATED; + next_hop->u.ifp = next_hop->u.lle->ifp; + nh_table->neigh_created--; +} + +int next_hop_list_nc_count(const struct next_hop_list *nhl) +{ + int count = 0; + int i; + struct next_hop *array = rcu_dereference(nhl->siblings); + + for (i = 0; i < nhl->nsiblings; i++) { + struct next_hop *next = array + i; + + if (nh_is_neigh_created(next)) + count++; + } + return count; +} + +struct next_hop *next_hop_list_find_path_using_ifp(struct next_hop_list *nhl, + struct ifnet *ifp, + int *sibling) +{ + uint32_t i; + struct next_hop *array; + + if (unlikely(!nhl)) + return NULL; + + array = rcu_dereference(nhl->siblings); + for (i = 0; i < nhl->nsiblings; i++) { + struct next_hop *next = array + i; + + if (dp_nh_get_ifp(next) == ifp) { + *sibling = i; + return next; + } + } + return NULL; +} + +bool next_hop_list_is_any_connected(const struct next_hop_list *nhl) +{ + uint32_t i; + struct next_hop *array; + + if (unlikely(!nhl)) + return false; + + array = rcu_dereference(nhl->siblings); + for (i = 0; i < nhl->nsiblings; i++) { + struct next_hop *next = array + i; + + if (nh_is_connected(next)) + return true; + } + return false; +} + +ALWAYS_INLINE struct next_hop * +nexthop_mp_select(const struct next_hop_list *nextl, + struct next_hop *next, + uint32_t size, + uint32_t hash) +{ + uint16_t path; + int index; + + if (nextl->nh_map) { + index = hash % nextl->nh_map->count; + return next + (nextl->nh_map->index[index]); + } + + path = ecmp_lookup(size, hash); + if (unlikely(next[path].flags & RTF_DEAD)) { + /* retry to find a good path */ + for (path = 0; path < size; path++) { + if (!(next[path].flags & RTF_DEAD)) + break; + } + + if (path == size) + return NULL; + } + return next + path; +} + +ALWAYS_INLINE struct next_hop *nexthop_select(int family, uint32_t nh_idx, + const struct rte_mbuf *m, + uint16_t ether_type) +{ + struct next_hop_list *nextl; + struct next_hop *next; + uint32_t size; + struct nexthop_table *nh_table = nh_common_get_nh_table(family); + + nextl = rcu_dereference(nh_table->entry[nh_idx]); + if (unlikely(!nextl)) + return NULL; + + size = nextl->nsiblings; + next = nextl->siblings; + + if (likely(size == 1)) + return next; + + return nexthop_mp_select(nextl, next, size, + ecmp_mbuf_hash(m, ether_type)); +} + +struct next_hop_list * +next_hop_list_create_copy_start(int family __unused, + struct next_hop_list *old) +{ + struct next_hop_list *new_nextl; + + new_nextl = nexthop_alloc(old->nsiblings); + if (!new_nextl) + return NULL; + + if (old->nh_map) { + new_nextl->nh_map = malloc(sizeof(*new_nextl->nh_map)); + if (!new_nextl->nh_map) { + __nexthop_destroy(new_nextl); + return NULL; + } + } + + new_nextl->proto = old->proto; + new_nextl->primaries = old->primaries; + new_nextl->index = old->index; + new_nextl->refcount = old->refcount; + new_nextl->use = old->use; + + return new_nextl; +} + +int +next_hop_list_create_copy_finish(int family, + struct next_hop_list *old, + struct next_hop_list *new, + uint32_t old_idx) +{ + int rc; + struct nexthop_table *nh_table = nh_common_get_nh_table(family); + int i; + struct next_hop *array; + uint64_t usable = 0; + + rc = nexthop_hash_del_add(family, old, new); + if (rc < 0) { + __nexthop_destroy(new); + return rc; + } + + if (old->nh_map) + memcpy(new->nh_map, old->nh_map, sizeof(*new->nh_map)); + /* + * Set the usable nh bitmask. Scan the copies of the NHs + * in case there was a change to the original + */ + array = new->siblings; + for (i = 0; i < new->nsiblings; i++) { + struct next_hop *next = array + i; + + if (next->flags & (RTF_BACKUP | RTF_UNUSABLE)) + continue; + usable |= (1ull << i); + } + rte_atomic64_set(&new->usable_prim_nh_bitmask, usable); + + next_hop_list_setup_back_ptrs(new); + + /* + * It's safe to copy over the FAL objects without + * notifications as there are no FAL-visible changes to the + * object - it maintains its own linkage to the neighbour + */ + new->nhg_fal_obj = old->nhg_fal_obj; + memcpy(new->nh_fal_obj, old->nh_fal_obj, + new->nsiblings * sizeof(*new->nh_fal_obj)); + new->pd_state = old->pd_state; + + assert(nh_table->entry[old_idx] == old); + rcu_xchg_pointer(&nh_table->entry[old_idx], new); + + next_hop_fixup_protected_tracking(old, new); + /* + * Now we need to recheck the usability to catch the case where + * a next_hop became unusable and was some way through processing + * an update to the old map as we were updating it here. + */ + next_hop_list_check_usability(new, + next_hop_usability_check_update_cb); + + call_rcu(&old->rcu, nexthop_destroy); + + return 0; +} + +/* + * This is kept for backwards compatibility. + */ +ALWAYS_INLINE const struct in_addr * +dp_nh4_get_addr(const struct next_hop *next_hop) +{ + return &next_hop->gateway.address.ip_v4; +} + +/* + * This is kept for backwards compatibility. + */ +ALWAYS_INLINE const struct in6_addr * +dp_nh6_get_addr(const struct next_hop *next_hop) +{ + return &next_hop->gateway.address.ip_v6; +} + +void nexthop_map_display(const struct next_hop_list *nextl, + json_writer_t *jsonw) +{ + int i; + + if (!nextl->nh_map) + return; + + jsonw_uint_field(jsonw, "nh_map_count", nextl->nh_map->count); + jsonw_name(jsonw, "nh_map"); + jsonw_start_array(jsonw); + for (i = 0; i < nextl->nh_map->count; i++) + jsonw_uint(jsonw, nextl->nh_map->index[i]); + jsonw_end_array(jsonw); +} + +bool +next_hop_list_fal_l3_enable_changed(int family, + struct next_hop_list *nextl, + struct ifnet *ifp, + fal_object_t *old_nhg_obj, + fal_object_t **old_nh_objs) +{ + enum fal_packet_action_t action; + bool contains_ifp = false; + unsigned int i; + int ret; + + for (i = 0; i < nextl->nsiblings; i++) { + struct next_hop *nh = nextl->siblings + i; + + if (dp_nh_get_ifp(nh) == ifp) { + contains_ifp = true; + break; + } + } + + if (!contains_ifp) + return false; + + action = fal_next_hop_group_packet_action(nextl->nsiblings, + nextl->siblings); + /* + * Check if there is a change in whether the nhg FAL object is + * needed - if the packet action is something other than + * FORWARD then the nhg FAL object isn't needed. + */ + if ((action == FAL_PACKET_ACTION_FORWARD) == + (nextl->pd_state != PD_OBJ_STATE_NOT_NEEDED)) + return false; + + *old_nh_objs = calloc(nextl->nsiblings, sizeof(**old_nh_objs)); + if (!*old_nh_objs) { + RTE_LOG(ERR, ROUTE, + "Out of memory during FAL intf L3 state change\n"); + return false; + } + + /* save old objects for later freeing */ + *old_nhg_obj = nextl->nhg_fal_obj; + memcpy(*old_nh_objs, nextl->nh_fal_obj, + sizeof(**old_nh_objs) * nextl->nsiblings); + + ret = fal_ip_new_next_hops(nextl->use, nextl->nsiblings, + nextl->siblings, &nextl->nhg_fal_obj, + nextl->nh_fal_obj); + if (ret < 0 && ret != -EOPNOTSUPP) { + RTE_LOG(ERR, ROUTE, + "FAL IPv%d next-hop-group create for FAL intf L3 state change failed: %s\n", + family == AF_INET ? 4 : 6, + strerror(-ret)); + /* + * clear out stale object handles, although with + * pd_state set accordingly these shouldn't be used + * anyway. + */ + nextl->nhg_fal_obj = FAL_NULL_OBJECT_ID; + memset(nextl->nh_fal_obj, 0, + sizeof(*nextl->nh_fal_obj) * nextl->nsiblings); + } + + nextl->pd_state = fal_state_to_pd_state(ret); + + return true; +} + +void +next_hop_list_fal_l3_enable_changed_finish(int family, + struct next_hop_list *nextl, + fal_object_t old_nhg_obj, + fal_object_t *old_nh_objs) +{ + int ret; + + /* + * Next hop group object may not have been created previously, + * which is expected if an interface was previously not FAL L3 + * enabled. + */ + if (old_nhg_obj == FAL_NULL_OBJECT_ID) + return; + + ret = fal_ip_del_next_hops(old_nhg_obj, + nextl->nsiblings, + old_nh_objs); + if (ret < 0) { + RTE_LOG(ERR, ROUTE, + "FAL IPv%d next-hop-group delete for FAL intf L3 state change failed: %s\n", + family == AF_INET ? 4 : 6, + strerror(-ret)); + } + + free(old_nh_objs); +} + +fal_object_t next_hop_list_get_fal_obj(int family, uint32_t nhl_idx, + enum pd_obj_state *pd_state) +{ + struct nexthop_table *nh_table = nh_common_get_nh_table(family); + struct next_hop_list *nextl; + + nextl = rcu_dereference(nh_table->entry[nhl_idx]); + *pd_state = nextl->pd_state; + + if (nextl->pd_state != PD_OBJ_STATE_FULL && + nextl->pd_state != PD_OBJ_STATE_NOT_NEEDED) + nextl = nh_common_get_blackhole(family); + + return nextl->nhg_fal_obj; +} + +size_t +next_hop_list_get_fal_nhs(int family, uint32_t nhl_idx, + struct next_hop **hops) +{ + struct nexthop_table *nh_table = nh_common_get_nh_table(family); + struct next_hop_list *nextl; + + nextl = rcu_dereference(nh_table->entry[nhl_idx]); + + if (nextl->pd_state != PD_OBJ_STATE_FULL && + nextl->pd_state != PD_OBJ_STATE_NOT_NEEDED) + nextl = nh_common_get_blackhole(family); + + *hops = nextl->siblings; + return nextl->nsiblings; +} diff --git a/src/nh_common.h b/src/nh_common.h new file mode 100644 index 00000000..e107c5ee --- /dev/null +++ b/src/nh_common.h @@ -0,0 +1,436 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#ifndef NH_COMMON_H +#define NH_COMMON_H + +#include +#include + +#include "compiler.h" +#include "fal_plugin.h" +#include "ip_addr.h" +#include "ip_forward.h" +#include "json_writer.h" +#include "mpls/mpls.h" +#include "pd_show.h" +#include "route_flags.h" +#include "urcu.h" + +struct ifnet; +struct llentry; + +#define NH_MAP_MAX_ENTRIES 64 + +struct nh_map { + uint8_t index[NH_MAP_MAX_ENTRIES]; + int count; +}; + +/* Output information associated with a single nexthop */ +struct next_hop { + union { + struct ifnet *ifp; /* target interface */ + struct llentry *lle; /* lle entry to use when sending */ + } u; + uint32_t flags; /* routing flags */ + union next_hop_outlabels outlabels; + struct ip_addr gateway; + struct cds_list_head if_gw_list_entry; + struct next_hop_list *nhl; /* ptr back to the next hop list */ +}; + +/* + * This is the nexthop information result of route lookup - allows for + * multiple nexthops in the case of ECMP + */ +struct next_hop_list { + struct next_hop *siblings; /* array of next_hop */ + uint8_t nsiblings; /* # of next_hops */ + uint8_t proto; /* routing protocol */ + uint8_t primaries; /* number of primary next hops */ + uint8_t padding; + uint32_t index; + struct nh_map *nh_map; + struct next_hop hop0; /* optimization for non-ECMP */ + uint32_t refcount; /* # of LPM's referring */ + enum pd_obj_state pd_state; + enum fal_next_hop_group_use use; + struct cds_lfht_node nh_node; + fal_object_t nhg_fal_obj; /* FAL handle for next_hop_group */ + fal_object_t *nh_fal_obj; /* Per-nh FAL handles */ + struct rcu_head rcu; + rte_atomic64_t usable_prim_nh_bitmask; +} __rte_cache_aligned; + +/* + * key for hashing an array of NHs. Size is the number of NHs in the array. + */ +struct nexthop_hash_key { + const struct next_hop *nh; + size_t size; + uint8_t proto; + enum fal_next_hop_group_use use; +}; + +/* + + * The nexthop in LPM is 22 bits but dpdk hash tables currently have a + * limit of 2^20 entries. + */ +#define NEXTHOP_HASH_TBL_SIZE RTE_FBK_HASH_ENTRIES_MAX +#define NEXTHOP_HASH_TBL_MIN (UINT8_MAX + 1) + +struct nexthop_table { + uint32_t in_use; /* # of entries used */ + uint32_t rover; /* next free slot to look at */ + struct next_hop_list *entry[NEXTHOP_HASH_TBL_SIZE]; /* entry array */ + uint32_t neigh_present; + uint32_t neigh_created; +}; + +enum nh_type { + NH_TYPE_V4GW, /* struct next_hop */ + NH_TYPE_V6GW, /* struct next_hop */ +}; + +void nh_set_ifp(struct next_hop *next_hop, struct ifnet *ifp); + +struct next_hop_list *nexthop_alloc(int size); + +void __nexthop_destroy(struct next_hop_list *nextl); + +void nexthop_destroy(struct rcu_head *head); + +int nexthop_new(int family, const struct next_hop *nh, uint16_t size, + uint8_t proto, enum fal_next_hop_group_use use, + uint32_t *slot); + +/* + * Create a next_hop based on the given information. This nexthop will then + * be used as the argument to nexthop_new. + * + * @param[in] ifp The interface the nexthop uses. + * @param[in] gw The gateway for the nexthop + * @param[in] flags The flags to set in the nexthop + * @param[in] num_labels The number of labels for the nexthop + * @param[in] labels An array of labels, of size 'num_labels' + * + * @return A next_hop on success + * @return NULL on failure. + */ +struct next_hop * +nexthop_create(struct ifnet *ifp, struct ip_addr *gw, uint32_t flags, + uint16_t num_labels, label_t *labels); + +void nexthop_put(int family, uint32_t idx); + +/* + * Copy the contents of the old next hop into the new next hop. It does + * not copy things like list ptrs and hash entries. + * + * @param[in] old The nexthop to copy. + * @param[out] new The nexthop to copy into. + * + * @return 0 on success + * -ve on error + */ +int next_hop_copy(struct next_hop *old, struct next_hop *new); + +/* + * Given an next_hop_list create a copy of the nexthops in an array + * + * @param[in] nhl The fully formed nhl + * @param[out] size Store the size of the created array here. + * + * @return Pointer to array of nexthops on success + * @return NULL on failure + */ +struct next_hop * +next_hop_list_copy_next_hops(struct next_hop_list *nhl, int *size); + +/* + * Given a next_hop_list that is in the nh table, start the process of doing + * a modify so that we can replace the existing next_hop_list with the new one. + * + * This function is used when a next_hop_list that is being used in the + * forwarding path needs to be modified in a non rcu friendly way. + * All memory for the new next_hop_list is allocated by this function. + * + * @param[in] family The address family the nexthop is using + * @param[in] old The nexthop that is currently being used + * + * @return a pointer to a partially constructed next_hop_list that is copied + * from 'old' + * + * This function allocates the memory for the next_hops, but does not + * populate them. That is left to the caller who should make changes to + * the contents of the NHs as required, and should finish the switch by calling + * next_hop_list_create_copy_finish. + */ +struct next_hop_list * +next_hop_list_create_copy_start(int family, + struct next_hop_list *old); + +/* + * After having called next_hop_list_create_copy_start, the user will have + * an old and a new next_hop_list. This function is called to finish the + * switch over to using the new version. + * + * The number of next_hops in old and new must be the same. This function + * is used only when modifying the contents of a next_hop - for example + * when one of them has been modified due to becoming 'neigh_present' + * + * It will modify hashtable entries, internal pointers, nh_maps and copy + * over fal objects as required. + * + * @param[in] family The address family the nexthop is using + * @param[in] old The nexthop that is currently being used. Once the + * switchover is complete this will be freed as any references + * will be to the new one. + * @param[in] new The new nexthop that is currently being created. It + * will be inserted into the forwarding path and used for + * forwarding. + * @param[in] old_idx The index in the nh table that old is at. + * + * @return 0 on success + * -ve on failure + */ +int +next_hop_list_create_copy_finish(int family, + struct next_hop_list *old, + struct next_hop_list *new, + uint32_t old_idx); + +/* + * Remove the old NH from the hash and add the new one. Can not + * use a call to cds_lfht_add_replace() or any of the variants + * as the key for the new NH may be very different in the case + * where there are a different number of paths. + * + * @param[in] family The address family for this nexthop + * @param[in] old_nu The old next_hop_list to remove from the hash + * @param[in] new_nu The new next_hop_list to add to the hash + * + * @retval 0 on success + * -ve on failure + */ +int +nexthop_hash_del_add(int family, + struct next_hop_list *old_nhl, + struct next_hop_list *new_nhl); + +/* + * Modify a NH to mark it as neigh present. This is done in a non atomic + * way, so this must be atomically swapped into the forwarding state when + * ready. + * + * @param[in] family The family the nh is using. + * @param[out] nh The next_hop to modify + * @param[in] lle The lle entry that the next_hop needs to link to. + * + */ +void nh_set_neigh_present(int family __unused, + struct next_hop *next_hop, + struct llentry *lle); + +/* + * Modify a NH to clear the neigh. + * + * @param[in] family The family the nh is using. + * @param[out] nh The next_hop to modify + */ +void nh_clear_neigh_present(int family, + struct next_hop *next_hop); + +/* + * Modify a NH to mark it as neigh created. This is done in a non atomic + * way, so this must be atomically swapped into the forwarding state when + * ready. + * + * @param[in] family The family the nh is using. + * @param[out] nh The next_hop to modify + * @param[in] lle The lle entry that the next_hop needs to link to. + */ +void nh_set_neigh_created(int family, + struct next_hop *next_hop, + struct llentry *lle); + +/* + * Modify a NH to clear the fact that it was neigh created. + * + * @param[in] family The family the nh is using. + * @param[out] nh The next_hop to modify + */ +void nh_clear_neigh_created(int family, + struct next_hop *next_hop); + +/* + * Get the number of neighbour created entries in the next_hop_list + * + * @param[in] nhl The next_hop_list to check + * + * @return the count of neighbour created entries in the nhl. + */ +int next_hop_list_nc_count(const struct next_hop_list *nhl); + +/* + * Given a next_hop_list and an ifp, find the next_hop within the + * next_hop_list that uses the given interface. + * + * @param[in] nhl The next_hop_list to check + * @param[in] ifp The ifp to look for + * @param[out] sibling Store the index of the returned nexthop + * + * @return A ptr to the next_hop if one matched + * Null if no match found. + */ +struct next_hop *next_hop_list_find_path_using_ifp(struct next_hop_list *nhl, + struct ifnet *ifp, + int *sibling); + +/* + * Given a next_hop_list check if any of the hops are connected + * + * @param[in] nh The next_hop_list to check + * + * @return True if there is a connected nexthop + * @return False if there is not a connected nexthop + */ +bool next_hop_list_is_any_connected(const struct next_hop_list *nhl); + +struct next_hop *nexthop_mp_select(const struct next_hop_list *nextl, + struct next_hop *next, + uint32_t size, + uint32_t hash); + +struct next_hop *nexthop_select(int family, uint32_t nh_idx, + const struct rte_mbuf *m, + uint16_t ether_type); + +bool nh_is_connected(const struct next_hop *nh); +bool nh_is_local(const struct next_hop *nh); +bool nh_is_gw(const struct next_hop *nh); + +static ALWAYS_INLINE bool +nh_is_neigh_present(const struct next_hop *next_hop) +{ + return next_hop->flags & RTF_NEIGH_PRESENT; +} + +static ALWAYS_INLINE bool +nh_is_neigh_created(const struct next_hop *next_hop) +{ + return next_hop->flags & RTF_NEIGH_CREATED; +} + +static ALWAYS_INLINE struct llentry * +nh_get_lle(const struct next_hop *next_hop) +{ + if (next_hop->flags & (RTF_NEIGH_CREATED | RTF_NEIGH_PRESENT)) + return rcu_dereference(next_hop->u.lle); + + return NULL; +} + +static inline const union next_hop_outlabels * +nh_get_labels(struct next_hop *nh) +{ + return &nh->outlabels; +} + +static inline uint32_t +nh_get_flags(struct next_hop *nh) +{ + return nh->flags; +} + +/* + * Display the next_hop map from a next_hop list in json foramt. + */ +void nexthop_map_display(const struct next_hop_list *nextl, + json_writer_t *json); + +/* + * mark all next_hops indicated by the key as unusable. + */ +void next_hop_mark_path_state(enum dp_rt_path_state state, + const struct dp_rt_path_unusable_key *key); + +/* + * Update a next hop list if required following a change in the FAL L3 + * enable state for an interface. + * + * Returns true if the FAL NHG object is update and false otherwise. + */ +bool +next_hop_list_fal_l3_enable_changed(int family, + struct next_hop_list *nextl, + struct ifnet *ifp, + fal_object_t *old_nhg_obj, + fal_object_t **old_nh_objs); +/* + * Finish the update following a change in the FAL L3 + * enable state for an interface. + * + * This should only be done after next_hop_list_fal_l3_enable_changed + * has returned true, and after all routes referring to the old NHG + * object have been updated to use the new NHG object. + */ +void +next_hop_list_fal_l3_enable_changed_finish(int family, + struct next_hop_list *nextl, + fal_object_t old_nhg_obj, + fal_object_t *old_nh_objs); + +/* + * Get the FAL next-hop-group object for the given next-hop-list index + */ +fal_object_t next_hop_list_get_fal_obj(int family, uint32_t nhl_idx, + enum pd_obj_state *pd_state); + +size_t +next_hop_list_get_fal_nhs(int family, uint32_t nhl_idx, + struct next_hop **hops); + +/* + * Per AF hash function for a nexthop. + */ +typedef int (nh_common_hash_fn)(const struct nexthop_hash_key *key, + unsigned long seed); + +/* + * Per AF function to compare nexthops + */ +typedef int (nh_common_cmp_fn)(struct cds_lfht_node *node, const void *key); + +/* + * get the hash table used to track NHs and if a new can be reused. + */ +typedef struct cds_lfht *(nh_common_get_hash_tbl_fn)(void); + +/* + * Get the table that the NHs are stored in. + */ +typedef struct nexthop_table *(nh_common_get_nh_tbl_fn)(void); + +/* + * Structure to hold all the function pointers required to do the + * NH processing that differs between address families. + */ +struct nh_common { + nh_common_hash_fn *nh_hash; + nh_common_cmp_fn *nh_compare; + nh_common_get_hash_tbl_fn *nh_get_hash_tbl; + nh_common_get_nh_tbl_fn *nh_get_nh_tbl; + struct next_hop_list *(*nh_get_blackhole)(void); +}; + +/* + * Register AF specific behaviour for processing NHs. + */ +void nh_common_register(int family, struct nh_common *nh_common); + +#endif /* NH_COMMON_H */ diff --git a/src/npf/alg/alg.c b/src/npf/alg/alg.c new file mode 100644 index 00000000..d7d4d6db --- /dev/null +++ b/src/npf/alg/alg.c @@ -0,0 +1,1185 @@ +/* + * Copyright (c) 2018-2021, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2016 by Brocade Communications Systems, Inc. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +/*- + * Copyright (c) 2010 The NetBSD Foundation, Inc. + * Copyright (c) 2013-2016 by Brocade Communications Systems, Inc. + * All rights reserved. + * + * SPDX-License-Identifier: (LGPL-2.1-only AND BSD-2-Clause-NETBSD) + * + * Substantially re-written from the original BSD source by Brocade. + * + * This material is based upon work partially supported by The + * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * NPF interface for application level gateways (ALGs). + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "if_var.h" +#include "json_writer.h" +#include "npf/npf.h" +#include "npf/alg/alg.h" +#include "npf/npf_nat.h" +#include "npf/npf_session.h" +#include "npf/npf_cache.h" +#include "npf/npf_vrf.h" +#include "vplane_log.h" +#include "vrf_internal.h" + +struct rte_mbuf; + +/* Minimum lifetime for a tuple */ +#define NPF_ALG_MIN_TIMEOUT 5 + +/* Retry count for tuple insertions. */ +#define NPF_ALG_RETRY_COUNT 10 + +/* ALG periodic timer - for GC */ +static struct rte_timer alg_timer; +#define ALG_INTERVAL 5 + +/* + * We need to store disable requests for ALGs in VRFs not yet seen. + * So we have a hash for each unseen VRF (by external-id), that + * points to a list of algs which should be disabled. + */ +struct alg_late_vrf { + struct alg_late_vrf *nv_prev; + struct alg_late_vrf *nv_next; + char nv_key[32]; + uint32_t nv_vrfid; + zhash_t *nv_algs; +}; + +static struct alg_late_vrf *alg_late_vrfs; +static zhash_t *alg_late_vrf_hash; + +static void +npf_alg_late_vrfs_destroy(void) +{ + struct alg_late_vrf *late_vrf; + struct alg_late_vrf *next_vrf; + + for (late_vrf = alg_late_vrfs; late_vrf; late_vrf = next_vrf) { + next_vrf = late_vrf->nv_next; + zhash_destroy(&late_vrf->nv_algs); + + zhash_delete(alg_late_vrf_hash, late_vrf->nv_key); + } + + alg_late_vrfs = NULL; +} + +static struct alg_late_vrf * +npf_alg_late_vrf_find(uint32_t ext_vrfid) +{ + char hash_key[32]; + snprintf(hash_key, sizeof(hash_key), "%x", ext_vrfid); + + return zhash_lookup(alg_late_vrf_hash, hash_key); +} + +static struct alg_late_vrf * +npf_alg_late_vrf_add(uint32_t ext_vrfid) +{ + struct alg_late_vrf *late_vrf = malloc(sizeof(*late_vrf)); + if (!late_vrf) + return NULL; + + late_vrf->nv_vrfid = ext_vrfid; + late_vrf->nv_algs = zhash_new(); + snprintf(late_vrf->nv_key, sizeof(late_vrf->nv_key), "%x", ext_vrfid); + + late_vrf->nv_prev = NULL; + late_vrf->nv_next = alg_late_vrfs; + if (alg_late_vrfs) + alg_late_vrfs->nv_prev = late_vrf; + alg_late_vrfs = late_vrf; + + zhash_insert(alg_late_vrf_hash, late_vrf->nv_key, late_vrf); + + return late_vrf; +} + +static void +npf_alg_late_vrf_del(struct alg_late_vrf *late_vrf) +{ + if (!late_vrf) + return; + + if (late_vrf->nv_next) + late_vrf->nv_next->nv_prev = late_vrf->nv_prev; + if (late_vrf->nv_prev) + late_vrf->nv_prev->nv_next = late_vrf->nv_next; + else + alg_late_vrfs = late_vrf->nv_next; + + zhash_destroy(&late_vrf->nv_algs); + + zhash_delete(alg_late_vrf_hash, late_vrf->nv_key); +} + +static void +npf_alg_late_vrf_set_alg(struct alg_late_vrf *late_vrf, char const *name, + bool on) +{ + bool *alg_on = malloc(sizeof(*alg_on)); + if (!alg_on) + return; + + *alg_on = on; + + int rc = zhash_insert(late_vrf->nv_algs, name, alg_on); + if (!rc) + return; /* Insert ok */ + + free(alg_on); + + /* Duplicate - probably will never occur */ + if (rc == -1) { + alg_on = zhash_lookup(late_vrf->nv_algs, name); + if (alg_on) + *alg_on = on; + } +} + +/* Set ALG private data */ +void +npf_alg_session_set_private(struct npf_session *se, void *data) +{ + struct npf_session_alg *sa = npf_session_get_alg_ptr(se); + + if (sa) + sa->sa_private = data; +} + +/* Get ALG private data */ +void * +npf_alg_session_get_private(const struct npf_session *se) +{ + struct npf_session_alg *sa = npf_session_get_alg_ptr(se); + + if (sa) + return sa->sa_private; + return NULL; +} + +/* Get previous ALG private data, and set new value as one operation */ +void * +npf_alg_session_get_and_set_private(const npf_session_t *se, void *data) +{ + struct npf_session_alg *sa = npf_session_get_alg_ptr(se); + if (sa) + return rcu_xchg_pointer(&(sa->sa_private), data); + return NULL; +} + +/* Test flag */ +int +npf_alg_session_test_flag(const struct npf_session *se, uint32_t flag) +{ + struct npf_session_alg *sa = npf_session_get_alg_ptr(se); + + if (sa) + return sa->sa_flags & flag; + return 0; +} + +/* Set flag */ +void +npf_alg_session_set_flag(struct npf_session *se, uint32_t flag) +{ + struct npf_session_alg *sa = npf_session_get_alg_ptr(se); + + if (sa) + sa->sa_flags |= flag; +} + +/* Get all flags */ +uint32_t +npf_alg_session_get_flags(const struct npf_session *se) +{ + struct npf_session_alg *sa = npf_session_get_alg_ptr(se); + + if (sa) + return sa->sa_flags; + return 0; +} + +/* Get inspect */ +bool +npf_alg_session_inspect(struct npf_session *se) +{ + struct npf_session_alg *sa = npf_session_get_alg_ptr(se); + + if (sa) + return sa->sa_inspect; + return false; +} + +/* Set inspect */ +void +npf_alg_session_set_inspect(struct npf_session *se, bool v) +{ + struct npf_session_alg *sa = npf_session_get_alg_ptr(se); + + if (sa) + sa->sa_inspect = v; +} + +/* Get the alg from this session */ +struct npf_alg * +npf_alg_session_get_alg(const struct npf_session *se) +{ + struct npf_session_alg *sa = npf_session_get_alg_ptr(se); + + if (sa) + return (struct npf_alg *)sa->sa_alg; + return NULL; +} + +/* + * Allocate ALG data on the session handle + */ +int +npf_alg_session_set_alg(struct npf_session *se, const struct npf_alg *alg) +{ + + struct npf_session_alg *sa = malloc(sizeof(struct npf_session_alg)); + + if (!sa) + return -ENOMEM; + + sa->sa_alg = npf_alg_get((struct npf_alg *)alg); + sa->sa_private = NULL; + sa->sa_flags = 0; + sa->sa_inspect = false; + + npf_session_set_alg_ptr(se, sa); + + return 0; +} + +/* + * Populate a tuple match key from the npc cache + */ +static void alg_npc_to_tuple_key(const npf_cache_t *npc, uint32_t ifx, + struct apt_match_key *m) +{ + m->m_ifx = ifx; + m->m_proto = npf_cache_ipproto(npc); + m->m_srcip = npf_cache_srcip(npc); + m->m_dstip = npf_cache_dstip(npc); + m->m_alen = npc->npc_alen; + + /* Get ports if applicable */ + if (npf_iscached(npc, NPC_L4PORTS)) { + const struct npf_ports *ports = &npc->npc_l4.ports; + + m->m_dport = ports->d_port; + m->m_sport = ports->s_port; + } else { + m->m_dport = 0; + m->m_sport = 0; + } +} + +/* + * Lookup all tuple tables. Do not change cache. + */ +struct apt_tuple * +alg_lookup_every_table(const struct ifnet *ifp, const npf_cache_t *npc) +{ + struct npf_alg_instance *ai; + struct apt_match_key key; + struct apt_tuple *at; + + ai = vrf_get_npf_alg_rcu(ifp->if_vrfid); + if (!ai) + return NULL; + + alg_npc_to_tuple_key(npc, ifp->if_index, &key); + at = apt_tuple_lookup_all_any_dport(ai->ai_apt, &key); + + return at; +} + +/* + * Lookup destination port tuple table + */ +static struct apt_tuple * +alg_lookup(struct npf_alg_instance *ai, npf_cache_t *npc, uint32_t ifx) +{ + struct apt_match_key m; + struct apt_tuple *at; + + /* Is table empty? */ + if (apt_table_count(ai->ai_apt, APT_MATCH_DPORT) == 0) + return NULL; + + alg_npc_to_tuple_key(npc, ifx, &m); + at = apt_tuple_lookup_dport(ai->ai_apt, &m); + + return at; +} + +/* Lookup by npc */ +struct apt_tuple *alg_lookup_npc(struct npf_alg_instance *ai, + npf_cache_t *npc, uint32_t ifx) +{ + struct apt_tuple *at = npf_cache_get_tuple(npc); + + if (!npf_iscached(npc, NPC_ALG_TLUP)) { + npc->npc_info |= NPC_ALG_TLUP; + at = alg_lookup(ai, npc, ifx); + npf_cache_set_tuple(npc, (void *)at); + } + return at; +} + +/* + * Expire tuples containing this session. + * + * Its possible that the alg vrf instance has been deleted, in which case + * alg->na_ai will be NULL. Just return in these cases. + */ +void alg_expire_session_tuples(const struct npf_alg *alg, npf_session_t *se) +{ + if (alg->na_ai) + alg_apt_instance_expire_session(alg->na_ai->ai_apt, se); +} + +/* + * Delete any tuples created by the given session + */ +void alg_destroy_session_tuples(const struct npf_alg *alg, npf_session_t *se) +{ + if (alg->na_ai) + alg_apt_instance_destroy_session(alg->na_ai->ai_apt, se); +} + +/* Get alg from name */ +static struct npf_alg *alg_name_to_alg(struct npf_alg_instance *ai, + const char *name) +{ + if (ai->ai_ftp && !strcmp(ai->ai_ftp->na_ops->name, name)) + return ai->ai_ftp; + if (ai->ai_tftp && !strcmp(ai->ai_tftp->na_ops->name, name)) + return ai->ai_tftp; + if (ai->ai_sip && !strcmp(ai->ai_sip->na_ops->name, name)) + return ai->ai_sip; + if (ai->ai_rpc && !strcmp(ai->ai_rpc->na_ops->name, name)) + return ai->ai_rpc; + return NULL; +} + +static int +alg_add_port(struct npf_alg *na, const struct npf_alg_config_item *ci) +{ + + struct apt_match_key m = { 0 }; + struct apt_tuple *at; + bool keep = false; + + m.m_proto = ci->ci_proto; + m.m_dport = htons(ci->ci_datum); + + if (ci->ci_flags & NPF_TUPLE_MATCH_PROTO_PORT) + m.m_match = APT_MATCH_DPORT; + else if (ci->ci_flags & NPF_TUPLE_MATCH_ALL) + m.m_match = APT_MATCH_ALL; + else if (ci->ci_flags & NPF_TUPLE_MATCH_ANY_SPORT) + m.m_match = APT_MATCH_ANY_SPORT; + + if (ci->ci_flags & NPF_TUPLE_KEEP) + keep = true; + + assert(m.m_match == APT_MATCH_DPORT); + assert(keep); + + at = apt_tuple_create_and_insert(na->na_ai->ai_apt, &m, + npf_alg_get(na), + ci->ci_alg_flags, + npf_alg_id2name(na->na_id), + false, keep); + + if (!at) { + npf_alg_put(na); + return -ENOMEM; + } + + return 0; +} + +/* Lookup tuple an mark as expired */ +static int alg_delete_port(struct npf_alg *na, + const struct npf_alg_config_item *ci) +{ + int rc; + + struct apt_match_key m = { 0 }; + + m.m_proto = ci->ci_proto; + m.m_dport = htons(ci->ci_datum); + + if (ci->ci_flags & NPF_TUPLE_MATCH_PROTO_PORT) + m.m_match = APT_MATCH_DPORT; + else if (ci->ci_flags & NPF_TUPLE_MATCH_ALL) + m.m_match = APT_MATCH_ALL; + else if (ci->ci_flags & NPF_TUPLE_MATCH_ANY_SPORT) + m.m_match = APT_MATCH_ANY_SPORT; + + rc = alg_apt_tuple_lookup_and_expire(na->na_ai->ai_apt, &m); + + return rc; +} + +int npf_alg_port_handler(struct npf_alg *na, int op, + const struct npf_alg_config_item *ci) +{ + int rc; + + switch (op) { + case NPF_ALG_CONFIG_SET: + rc = alg_add_port(na, ci); + break; + case NPF_ALG_CONFIG_DELETE: + rc = alg_delete_port(na, ci); + break; + default: + rc = -EINVAL; + } + return rc; +} + +/* Manage the default config as a unit */ +static int alg_manage_config(struct npf_alg *na, int op, + struct npf_alg_config *ac) +{ + int rc = 0; + int i; + + if ((op == NPF_ALG_CONFIG_SET) && ac->ac_default_set) + return 0; + + if ((op == NPF_ALG_CONFIG_DELETE) && !ac->ac_default_set) + return 0; + + for (i = 0; i < ac->ac_item_cnt; i++) { + /* Handler for default config is optional */ + if (ac->ac_handler) { + rc = ac->ac_handler(na, op, &ac->ac_items[i]); + if (rc) + return rc; + } + } + + /* Keep track of whether the default config is installed */ + if (!rc) + ac->ac_default_set = (op == NPF_ALG_CONFIG_SET) ? true : false; + + return rc; +} + +/* Called to reset an alg to a known state. */ +static int alg_reset_alg(struct npf_alg *alg, bool hard) +{ + uint8_t i; + int rc = 0; + + /* First let the alg do whatever it needs */ + if (alg_has_op(alg, reset)) { + rc = alg->na_ops->reset(alg, hard); + if (rc) + return rc; + } + + /* Delete 'keep' tuples; expire non-keep tuples */ + alg_apt_instance_client_reset(alg->na_ai->ai_apt, alg); + + /* Now reset the state of the configs and re-install. */ + for (i = 0; i < alg->na_num_configs; i++) { + struct npf_alg_config *ac = &alg->na_configs[i]; + + ac->ac_cli_refcnt = 0; + ac->ac_default_set = false; + rc = alg_manage_config(alg, NPF_ALG_CONFIG_SET, ac); + } + + /* Now reset state to default of enabled */ + if (!alg->na_enabled) + alg->na_enabled = true; + + return rc; +} + + +static void alg_reset_alg_module(struct npf_alg *alg, bool hard) +{ + int rc; + + if (!alg) + rte_panic("reset called on null alg"); + + rc = alg_reset_alg(alg, hard); + if (rc) + RTE_LOG(ERR, FIREWALL, "ALG: Reset: %s hard: %s rc: %d\n", + alg->na_ops->name, + hard ? "true" : "false", -rc); +} + +/* Reset a specific alg instance */ +void +alg_reset_instance(struct vrf *vrf, struct npf_alg_instance *ai, bool hard) +{ + + uint32_t count; + + if (!ai) + return; + + /* 'ai' may be freed */ + count = ai->ai_ref_count; + ai->ai_ref_count = 0; + + alg_reset_alg_module(ai->ai_ftp, hard); + alg_reset_alg_module(ai->ai_tftp, hard); + alg_reset_alg_module(ai->ai_sip, hard); + alg_reset_alg_module(ai->ai_rpc, hard); + + while (count--) + vrf_delete_by_ptr(vrf); +} + +/* Called by algs to manage a CLI config item */ +int npf_alg_manage_config_item(struct npf_alg *na, struct npf_alg_config *ac, + int op, struct npf_alg_config_item *ci) +{ + int rc; + + /* make sure the default config is deleted */ + if (op == NPF_ALG_CONFIG_SET) { + rc = alg_manage_config(na, NPF_ALG_CONFIG_DELETE, ac); + if (rc) + return rc; + } + + /* There must be a config item handler */ + rc = ac->ac_handler(na, op, ci); + if (rc) + goto reset; + + /* manage ref counts. */ + switch (op) { + case NPF_ALG_CONFIG_SET: + ac->ac_cli_refcnt++; + break; + case NPF_ALG_CONFIG_DELETE: + ac->ac_cli_refcnt--; + /* Restore default config? */ + if (!ac->ac_cli_refcnt) + (void) alg_manage_config(na, NPF_ALG_CONFIG_SET, ac); + break; + } + + return rc; + +reset: + /* + * Best attempt to restore default config. + * But only if no other CLI config is present. + */ + if ((op == NPF_ALG_CONFIG_SET) && !ac->ac_cli_refcnt) + (void) alg_manage_config(na, NPF_ALG_CONFIG_SET, ac); + + return rc; +} + +/* Free a reserved translation */ +int npf_alg_free_translation(npf_session_t *se, npf_addr_t *addr, + in_port_t port) +{ + npf_nat_t *nat = npf_session_get_nat(se); + npf_natpolicy_t *np = npf_nat_get_policy(nat); + npf_rule_t *rl = npf_nat_get_rule(nat); + uint32_t map_flags; + vrfid_t vrfid = npf_session_get_vrfid(se); + + /* Currently, all algs use a mapped port */ + map_flags = NPF_NAT_MAP_PORT; + + return npf_nat_free_map(np, rl, map_flags, npf_session_get_proto(se), + vrfid, *addr, port); +} + +/* Reserve translations for an alg. */ +int npf_alg_reserve_translations(npf_session_t *parent, int nr_ports, + bool start_even, uint8_t alen, + npf_addr_t *addr, in_port_t *port) +{ + npf_nat_t *pnat = npf_session_get_nat(parent); + npf_natpolicy_t *np = npf_nat_get_policy(pnat); + npf_rule_t *rl = npf_nat_get_rule(pnat); + in_port_t tmp; + npf_addr_t paddr; + uint32_t nat_flags; + vrfid_t vrfid = npf_session_get_vrfid(parent); + uint8_t ip_prot = npf_session_get_proto(parent); + int i; + int rc; + + /* Currently, all algs need a mapped port */ + nat_flags = NPF_NAT_MAP_PORT; + + if ((npf_nat_get_map_flags(pnat) & NPF_NAT_PA_SEQ) != 0) + nat_flags |= NPF_NAT_PA_SEQ; + + /* Start on even boundary? */ + if (start_even) + nat_flags |= NPF_NAT_MAP_EVEN_PORT; + + /* allocate from parent translation addr */ + npf_nat_get_trans(pnat, addr, &tmp); + paddr = *addr; + + rc = npf_nat_alloc_map(np, rl, nat_flags, ip_prot, vrfid, addr, + port, nr_ports); + if (rc) + return rc; + + /* + * Ensure that the translations come from the same + * (parent) translation address. + */ + if (memcmp(addr, &paddr, alen) != 0) { + tmp = ntohs(*port); + for (i = 0; i < nr_ports; i++) + npf_nat_free_map(np, rl, nat_flags, ip_prot, vrfid, + *addr, htons(tmp + i)); + return -ENOSPC; + } + + return 0; +} + +/* + * Create and assign a nat struct to a session handle. + * + * Used by algs to create nat structs for reverse secondary flows. + * On success, will consume the alg nat params. Otherwise we leave + * that to tuple destroy. (There may be a reservation) + * + * Called as desired by algs during their npf_alg_session_init(). + * + * There are two instances (sip and tftp) where 'nt' is NULL and 'an' is + * passed into the function instead. + */ +int npf_alg_session_nat(npf_session_t *se, npf_nat_t *pnat, npf_cache_t *npc, + const int di, struct apt_tuple *nt, + struct npf_alg_nat *an) +{ + npf_nat_t *nat; + + if (!an && nt) + an = apt_tuple_get_nat(nt); + + /* + * Only if we have an alg nat on the tuple. + * May be called in fw stateful matches for algs. (eg: non-nat) + */ + if (!an) + return 0; + + /* Must have a parent nat */ + if (!pnat) + return -ENOENT; + + /* Create the nat, possibly reversed of the pnat */ + nat = npf_nat_custom_nat(pnat, an->an_flags); + if (!nat) + return -ENOMEM; + + if (an->an_flags & NPF_NAT_REVERSE) { + npf_nat_set_trans(nat, &an->an_oaddr, an->an_oport); + npf_nat_set_orig(nat, &an->an_taddr, an->an_tport); + } else { + npf_nat_set_trans(nat, &an->an_taddr, an->an_tport); + npf_nat_set_orig(nat, &an->an_oaddr, an->an_oport); + } + + npf_nat_finalise(npc, se, di, nat); + + npf_session_setnat(se, nat, true); + + /* Mark as consumed so tuple destroy doesn't see it */ + if (nt) + apt_tuple_set_nat(nt, NULL); + free(an); + + return 0; +} + +/* + * Register a application protocol alg. + * + * - Create the tuple hash tables + * - Insert default config(s) + * + * Do not attempt to recover from partial success. Failure to + * register a specific ALG will result in failure of the + * ALG instance creation, will result in complete cleanup. + */ +int npf_alg_register(struct npf_alg *na) +{ + struct npf_alg_config *ac = na->na_configs; + int rc = 0; + int i; + + for (i = 0; i < na->na_num_configs; i++) { + rc = alg_manage_config(na, NPF_ALG_CONFIG_SET, ac); + if (rc) + break; + ac++; + } + + if (rc) + RTE_LOG(ERR, FIREWALL, "ALG: register: %s failed: rc: %d\n", + na->na_ops->name, rc); + + return rc; +} + +static int alg_config(struct npf_alg_instance *ai, const char *name, int op, + int argc, char **argv) +{ + struct npf_alg *alg; + int rc = -ENOENT; + + + alg = alg_name_to_alg(ai, name); + assert(alg); + if (alg_has_op(alg, config)) + rc = alg->na_ops->config(alg, op, argc, argv); + + return rc; +} + +/* config() - Set/delete options to an alg */ +int npf_alg_config(uint32_t ext_vrfid, const char *name, int op, + int argc, char **argv) +{ + struct vrf *vrf; + struct npf_alg_instance *ai; + int rc; + + vrf = vrf_find_or_create(ext_vrfid); /* Bug */ + if (!vrf) + return -EINVAL; + ai = vrf_get_npf_alg(vrf); + if (!ai) + return -EINVAL; + + rc = alg_config(ai, name, op, argc, argv); + if (!rc) { + switch (op) { + case NPF_ALG_CONFIG_SET: + vrf_find_or_create(ext_vrfid); /* Bug */ + ai->ai_ref_count++; + break; + case NPF_ALG_CONFIG_DELETE: + vrf_delete_by_ptr(vrf); + ai->ai_ref_count++; + break; + } + } + + vrf_delete_by_ptr(vrf); + return rc; +} + +/* + * alg_search_all_and_any_sport() + * + * Certain algs (sip notably) can add multiple tuples that can match a + * single packet. This is because they may wild-card the sport (eg: set to + * zero) due to the connection-less nature of UDP. + * + * We need to ensure that a tuple containing both a sport and dport is matched + * prior to a tuple with a matching dport and sport == 0, so search both in + * that order. + */ +struct apt_tuple * +alg_search_all_then_any_sport(struct npf_alg_instance *ai, + struct npf_cache *npc, uint32_t ifx) +{ + struct apt_match_key m; + struct apt_tuple *at; + + alg_npc_to_tuple_key(npc, ifx, &m); + at = apt_tuple_lookup_all_any(ai->ai_apt, &m); + + return at; +} + +/* Get the base parent's nat struct */ +struct npf_nat *npf_alg_parent_nat(npf_session_t *se) +{ + return npf_session_get_nat(npf_session_get_base_parent(se)); +} + +static void alg_info_json(struct npf_alg *alg, json_writer_t *json) +{ + if (alg) { + jsonw_start_object(json); + jsonw_string_field(json, "name", alg->na_ops->name); + jsonw_bool_field(json, "enabled", alg->na_enabled); + jsonw_end_object(json); + } +} + +int +alg_dump(struct npf_alg_instance *ai, vrfid_t vrfid, json_writer_t *json) +{ + jsonw_start_object(json); + jsonw_uint_field(json, "vrfid", dp_vrf_get_external_id(vrfid)); + + jsonw_name(json, "algs"); + jsonw_start_array(json); + alg_info_json(ai->ai_ftp, json); + alg_info_json(ai->ai_tftp, json); + alg_info_json(ai->ai_sip, json); + alg_info_json(ai->ai_rpc, json); + jsonw_end_array(json); + + alg_apt_instance_jsonw(ai->ai_apt, json); + + jsonw_end_object(json); + return 0; +} + +/* alg enable */ +int npf_alg_state_set(uint32_t ext_vrfid, const char *name, int op) +{ + struct vrf *vrf; + struct npf_alg_instance *ai; + struct npf_alg *alg; + + vrf = dp_vrf_get_rcu_from_external(ext_vrfid); + if (!vrf) { + struct alg_late_vrf *late_vrf + = npf_alg_late_vrf_find(ext_vrfid); + if (!late_vrf) + late_vrf = npf_alg_late_vrf_add(ext_vrfid); + + const bool off = (op == NPF_ALG_CONFIG_DISABLE); + npf_alg_late_vrf_set_alg(late_vrf, name, !off); + + return 0; + } + + ai = vrf_get_npf_alg(vrf); + if (!ai) + return -EINVAL; + + alg = alg_name_to_alg(ai, name); + if (!alg) + return -ENOENT; + + /* + * Note that algs are enabled by default + */ + switch (op) { + case NPF_ALG_CONFIG_ENABLE: + if (!alg->na_enabled) + alg->na_enabled = true; + break; + case NPF_ALG_CONFIG_DISABLE: + if (alg->na_enabled) + alg->na_enabled = false; + break; + default: + return -EINVAL; + } + + return 0; +} + +const char *npf_alg_id2name(enum npf_alg_id id) +{ + switch (id) { + case NPF_ALG_ID_FTP: + return NPF_ALG_FTP_NAME; + case NPF_ALG_ID_TFTP: + return NPF_ALG_TFTP_NAME; + case NPF_ALG_ID_RPC: + return NPF_ALG_RPC_NAME; + case NPF_ALG_ID_SIP: + return NPF_ALG_SIP_NAME; + }; + return "-"; +} + +void npf_alg_destroy_alg(struct npf_alg *alg) +{ + + alg->na_enabled = false; + alg->na_ops = NULL; + + alg_apt_instance_put(alg->na_ai_apt); + alg->na_ai_apt = NULL; + + free(alg); +} + +struct npf_alg * +npf_alg_create_alg(struct npf_alg_instance *ai, enum npf_alg_id id) +{ + struct npf_alg *alg; + + alg = zmalloc_aligned(sizeof(struct npf_alg)); + if (!alg) + return NULL; + + rte_atomic32_set(&alg->na_refcnt, 0); + alg->na_ai = ai; + alg->na_id = id; + alg->na_enabled = true; + + alg->na_ai_apt = alg_apt_instance_get(ai->ai_apt); + + return alg; +} + +static void +alg_periodic(struct rte_timer *timer __rte_unused, void *data __rte_unused) +{ + struct npf_alg_instance *ai; + struct vrf *vrf; + vrfid_t vrfid; + + VRF_FOREACH(vrf, vrfid) { + ai = vrf_get_npf_alg(vrf); + if (ai) { + /* Call an alg's periodic routine */ + if (alg_has_op(ai->ai_sip, periodic)) + ai->ai_sip->na_ops->periodic(ai->ai_sip); + } + } + + /* Until we graceful shutdown the dataplane */ + if (running) + npf_alg_timer_reset(); +} + +/* + * Create a per-vrf ALG instance + */ +struct npf_alg_instance *npf_alg_create_instance(uint32_t ext_vrfid) +{ + struct npf_alg_instance *ai; + + ai = zmalloc_aligned(sizeof(struct npf_alg_instance)); + if (!ai) + return NULL; + + /* Create tuple instance */ + ai->ai_apt = alg_apt_instance_create(ext_vrfid); + alg_apt_instance_get(ai->ai_apt); + + ai->ai_vrfid = ext_vrfid; + + /* Now specific alg instances */ + ai->ai_ftp = npf_alg_ftp_create_instance(ai); + if (!ai->ai_ftp) + goto out; + ai->ai_tftp = npf_alg_tftp_create_instance(ai); + if (!ai->ai_tftp) + goto out; + ai->ai_sip = npf_alg_sip_create_instance(ai); + if (!ai->ai_sip) + goto out; + ai->ai_rpc = npf_alg_rpc_create_instance(ai); + if (!ai->ai_rpc) + goto out; + + /* Find any disabled ALGs on the lookaside list */ + struct alg_late_vrf *late_vrf + = npf_alg_late_vrf_find(ext_vrfid); + if (late_vrf) { + zhash_t *algs = late_vrf->nv_algs; + bool *on_p; + for (on_p = zhash_first(algs); on_p; on_p = zhash_next(algs)) { + char const *name = zhash_cursor(algs); + struct npf_alg *alg = alg_name_to_alg(ai, name); + if (!alg) + continue; + alg->na_enabled = *on_p; + } + npf_alg_late_vrf_del(late_vrf); + } + + return ai; +out: + npf_alg_destroy_instance(ai); + return NULL; +} + +/* + * Notification from APT manager that a tuple is being deleted + */ +static void npf_alg_apt_delete_evt(struct apt_tuple *at) +{ + npf_session_t *se; + npf_nat_t *nat; + npf_natpolicy_t *np; + npf_rule_t *rl; + struct npf_alg_nat *an; + + + an = apt_tuple_get_nat(at); + + + if (an) { + apt_tuple_set_nat(at, NULL); + + /* Free port reserved for the secondary flow */ + se = apt_tuple_get_session(at); + nat = npf_session_get_nat(se); + np = npf_nat_get_policy(nat); + rl = npf_nat_get_rule(nat); + npf_nat_free_map(np, rl, NPF_NAT_MAP_PORT, + npf_session_get_proto(se), + an->an_vrfid, an->an_taddr, an->an_tport); + + free(an); + } + + /* + * Notify ALG that one of its tuples has been deleted + */ + struct npf_alg *alg; + + alg = (struct npf_alg *)apt_tuple_get_client_handle(at); + if (alg && alg_has_op(alg, tuple_delete)) + alg->na_ops->tuple_delete(at); + + /* + * client_data should now be NULL + */ + assert(!apt_tuple_get_client_data(at)); + + /* + * We can now release the reference that we took on the alg when the + * tuple was added. + */ + if (alg) { + npf_alg_put(alg); + apt_tuple_clear_client_handle(at); + } +} + +static const struct apt_event_ops alg_apt_event_ops = { + .apt_delete = npf_alg_apt_delete_evt, +}; + +/* + * ALG GC timer + * + * We can never safely free an allocated timer, so + * create a global one for all ALG instances. + */ +void +npf_alg_timer_init(void) +{ + rte_timer_init(&alg_timer); + + alg_late_vrf_hash = zhash_new(); + + /* One-time registration to get tuple delete notifications */ + apt_event_register(&alg_apt_event_ops); +} + +void +npf_alg_timer_uninit(void) +{ + rte_timer_stop_sync(&alg_timer); + + npf_alg_late_vrfs_destroy(); + zhash_destroy(&alg_late_vrf_hash); +} + +void +npf_alg_timer_reset(void) +{ + rte_timer_reset(&alg_timer, ALG_INTERVAL * rte_get_timer_hz(), + SINGLE, rte_get_master_lcore(), alg_periodic, NULL); +} + +/* + * Called from whole dp unit-tests to delete all non-keep or multimatch + * tuples, and any expired 'keep' tuples. + */ +void npf_alg_flush_all(void) +{ + struct npf_alg_instance *ai; + struct apt_instance *ai_apt; + struct vrf *vrf; + vrfid_t vrfid; + + VRF_FOREACH(vrf, vrfid) { + ai = vrf_get_npf_alg(vrf); + if (!ai) + continue; + + ai_apt = ai->ai_apt; + if (!ai_apt) + continue; + + alg_apt_instance_flush(ai_apt); + } +} diff --git a/src/npf/alg/npf_alg_private.h b/src/npf/alg/alg.h similarity index 57% rename from src/npf/alg/npf_alg_private.h rename to src/npf/alg/alg.h index 7856e0bd..518f857b 100644 --- a/src/npf/alg/npf_alg_private.h +++ b/src/npf/alg/alg.h @@ -1,13 +1,13 @@ /* - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ -#ifndef NPF_ALG_PRIVATE -#define NPF_ALG_PRIVATE +#ifndef _ALG_H_ +#define _ALG_H_ typedef struct npf_alg npf_alg_t; @@ -26,6 +26,8 @@ typedef struct npf_alg npf_alg_t; #include "npf/npf_session.h" #include "urcu.h" #include "util.h" +#include "npf/alg/alg_apt.h" +#include "npf/alg/sip/sip.h" /* Forward Declarations */ struct npf_session; @@ -34,8 +36,6 @@ struct npf_alg; struct rte_mbuf; struct npf_nat; -/* IANA defined IP protocols */ -#define NPF_ALG_MAX_PROTOS 143 /* ALG Nat */ struct npf_alg_nat { @@ -47,31 +47,10 @@ struct npf_alg_nat { vrfid_t an_vrfid; }; -/* The protocol hash table set */ -struct alg_ht { - struct cds_lfht *a_ht; /* Hash table */ - rte_atomic64_t a_cnt; /* Counter */ -}; - -struct alg_protocol_tuples { - rte_spinlock_t apt_lock; - struct alg_ht apt_all; /* NPF_TUPLE_MATCH_ALL */ - struct alg_ht apt_any_sport; /* NPF_TUPLE_MATCH_ANY_SPORT */ - struct alg_ht apt_port; /* NPF_TUPLE_MATCH_PROTO_PORT */ - struct alg_ht apt_proto; /* NPF_TUPLE_MATCH_PROTO */ -}; - -/* For resetting an alg's config */ -struct npf_alg_reset { - const char *ar_name; - bool ar_hard; -}; - -struct npf_alg; - /* ALG Instance */ struct npf_alg_instance { - struct alg_protocol_tuples *ai_apts[NPF_ALG_MAX_PROTOS+1]; + struct apt_instance *ai_apt; + uint32_t ai_vrfid; uint32_t ai_ref_count; struct npf_alg *ai_ftp; struct npf_alg *ai_tftp; @@ -79,40 +58,10 @@ struct npf_alg_instance { struct npf_alg *ai_rpc; }; -/* The ALG tuple */ -struct npf_alg_tuple { - /* Private fields, don't touch these */ - struct cds_lfht_node nt_node; /* for CDS hash table */ - struct rcu_head nt_rcu_head; /* for rcu call */ - uint64_t nt_exp_ts; /* Expire timestamp */ - void *nt_aht; /* hash table */ - npf_session_t *nt_se; /* For a session handle */ - - /* ALG specific fields, touch these */ - const struct npf_alg *nt_alg; /* Alg handle for this tuple */ - struct npf_alg_nat *nt_nat; /* Nat for secondary flows */ - uint32_t nt_ifx; /* Interface index */ - uint32_t nt_alg_flags; /* Alg specific flags */ - uint16_t nt_flags; /* flags */ - uint16_t nt_timeout; /* Timeout in seconds */ - uint8_t nt_proto; /* IP protocol */ - uint8_t nt_alen; /* addr len */ - uint16_t nt_sport; /* src port */ - uint16_t nt_dport; /* dst port */ - npf_addr_t nt_srcip; /* src addr */ - npf_addr_t nt_dstip; /* dst addr */ - void *nt_data; /* Optional data */ - void (*nt_reap)(void *data); /* Reap for 'data' */ - void *nt_paired; /* Part of a pair? */ -}; - -/* Forward ref for *config */ -struct npf_alg; - /* ALG operations struct */ struct npf_alg_ops { int (*se_init)(npf_session_t *, npf_cache_t *, - struct npf_alg_tuple *, const int di); + struct apt_tuple *, const int di); void (*se_destroy)(npf_session_t *se); void (*se_expire)(npf_session_t *se); void (*inspect)(npf_session_t *, npf_cache_t *, @@ -127,6 +76,8 @@ struct npf_alg_ops { int (*nat_out)(npf_session_t *, npf_cache_t *, struct rte_mbuf *, struct npf_nat *); void (*periodic)(struct npf_alg *); + void (*tuple_delete)(struct apt_tuple *at); + void (*se_json)(json_writer_t *json, npf_session_t *se); const char *name; }; @@ -169,6 +120,7 @@ struct npf_alg { void *na_private; const struct npf_alg_ops *na_ops; struct npf_alg_instance *na_ai; + struct apt_instance *na_ai_apt; rte_atomic32_t na_refcnt; uint8_t na_num_configs; struct npf_alg_config na_configs[NPF_ALG_MAX_CONFIG]; @@ -182,24 +134,6 @@ struct npf_session_alg { bool sa_inspect; /* inspect packets? */ }; -/* ALG tuple flags */ -/* - * NB: flags values for KEEP, REMOVING, and EXPIRED are used directly by - * script vyatta-dp-npf-show-alg-state in package vplane-config-npf, so - * if these values change then the script will need updating. - */ -#define NPF_TUPLE_KEEP (1<<0) -#define NPF_TUPLE_MATCH_PROTO (1<<1) -#define NPF_TUPLE_MATCH_PROTO_PORT (1<<2) -#define NPF_TUPLE_MATCH_ALL (1<<3) -#define NPF_TUPLE_MATCH_ANY_SPORT (1<<4) -#define NPF_TUPLE_REMOVING (1<<5) -#define NPF_TUPLE_EXPIRED (1<<6) -#define NPF_TUPLE_MULTIMATCH (1<<7) -#define NPF_TUPLE_MATCH_MASK (NPF_TUPLE_MATCH_PROTO | \ - NPF_TUPLE_MATCH_PROTO_PORT | \ - NPF_TUPLE_MATCH_ALL | \ - NPF_TUPLE_MATCH_ANY_SPORT) /* ALG names */ #define NPF_ALG_FTP_NAME "ftp" @@ -215,6 +149,9 @@ struct npf_session_alg { const char *npf_alg_id2name(enum npf_alg_id id); +struct apt_tuple *alg_lookup_every_table(const struct ifnet *ifp, + const npf_cache_t *npc); + /* Convenience macros to get various instances from an alg instance */ #define alg_to_alg_inst(a) ((a)->na_ai) #define alg_to_npf_inst(a) (alg_to_alg_inst(a)->ai_ni) @@ -230,17 +167,14 @@ uint32_t npf_alg_session_get_flags(const struct npf_session *se); bool npf_alg_session_inspect(struct npf_session *se); void npf_alg_session_set_inspect(struct npf_session *se, bool v); int npf_alg_session_set_alg(struct npf_session *se, const struct npf_alg *alg); -const struct npf_alg *npf_alg_session_get_alg(const struct npf_session *se); +struct npf_alg *npf_alg_session_get_alg(const struct npf_session *se); -struct alg_protocol_tuples *alg_get_apt(struct npf_alg_instance *ai, - uint8_t proto); -struct npf_alg_tuple *alg_search_all_then_any_sport( - struct alg_protocol_tuples *apt, struct npf_cache *npc, - const struct ifnet *ifp); -void apt_expire_tuple(struct npf_alg_tuple *nt); +struct apt_tuple *alg_search_all_then_any_sport(struct npf_alg_instance *ai, + struct npf_cache *npc, + uint32_t ifx); /* Protos */ -int npf_alg_register(struct npf_alg *np); +int npf_alg_register(struct npf_alg *na); void alg_reset_instance(struct vrf *vrf, struct npf_alg_instance *ai, bool hard); int npf_alg_manage_config_item(struct npf_alg *na, struct npf_alg_config *ac, @@ -248,9 +182,9 @@ int npf_alg_manage_config_item(struct npf_alg *na, struct npf_alg_config *ac, int npf_alg_port_handler(struct npf_alg *na, int op, const struct npf_alg_config_item *ci); int npf_alg_session_nat(npf_session_t *se, struct npf_nat *nat, - npf_cache_t *npc, const int di, - struct npf_alg_tuple *nt); -int npf_alg_reserve_translations(npf_session_t *se, int nr_ports, + npf_cache_t *npc, const int di, struct apt_tuple *nt, + struct npf_alg_nat *an); +int npf_alg_reserve_translations(npf_session_t *parent, int nr_ports, bool start_even, uint8_t alen, npf_addr_t *addr, in_port_t *port); int npf_alg_free_translation(npf_session_t *se, npf_addr_t *addr, @@ -261,22 +195,10 @@ struct npf_alg *npf_alg_create_alg(struct npf_alg_instance *ai, void npf_alg_timer_init(void); void npf_alg_timer_uninit(void); void npf_alg_timer_reset(void); -void alg_destroy_apts(struct npf_alg_instance *ai); -void npf_alg_tuple_pair(struct npf_alg_tuple *nt1, struct npf_alg_tuple *nt2); -void npf_alg_tuple_unpair(struct npf_alg_tuple *nt); -void npf_alg_tuple_expire_pair(struct npf_alg_tuple *nt); void alg_expire_session_tuples(const struct npf_alg *alg, npf_session_t *se); -void npf_alg_tuple_expire(struct npf_alg_tuple *nt); -struct npf_alg_tuple *npf_alg_tuple_alloc(void); -void npf_alg_tuple_free(struct npf_alg_tuple *nt); -int npf_alg_tuple_add_replace(struct npf_alg_instance *ai, - struct npf_alg_tuple *nt); -int npf_alg_tuple_insert(struct npf_alg_instance *ai, struct npf_alg_tuple *nt); -struct npf_alg_tuple *npf_alg_tuple_lookup(struct npf_alg_instance *ai, - struct npf_alg_tuple *nt); -void npf_alg_tuple_get(struct npf_alg_tuple *nt); -struct npf_alg_tuple *alg_lookup_npc(struct npf_alg_instance *ai, - npf_cache_t *npc, uint32_t ifx); +void alg_destroy_session_tuples(const struct npf_alg *alg, npf_session_t *se); +struct apt_tuple *alg_lookup_npc(struct npf_alg_instance *ai, + npf_cache_t *npc, uint32_t ifx); struct npf_nat *npf_alg_parent_nat(npf_session_t *se); @@ -285,15 +207,19 @@ int alg_dump(struct npf_alg_instance *ai, vrfid_t vrfid, int npf_alg_config(uint32_t ext_vrfid, const char *name, int op, int argc, char **argv); int npf_alg_state_set(uint32_t ext_vrfid, const char *name, int op); + struct npf_alg *npf_alg_tftp_create_instance(struct npf_alg_instance *ai); -void npf_alg_tftp_destroy_instance(struct npf_alg *na); +void npf_alg_tftp_destroy_instance(struct npf_alg *tftp); + struct npf_alg *npf_alg_ftp_create_instance(struct npf_alg_instance *ai); -void npf_alg_ftp_destroy_instance(struct npf_alg *na); +void npf_alg_ftp_destroy_instance(struct npf_alg *ftp); + struct npf_alg *npf_alg_sip_create_instance(struct npf_alg_instance *ai); -void npf_alg_sip_destroy_instance(struct npf_alg *na); +void npf_alg_sip_destroy_instance(struct npf_alg *sip); + struct npf_alg *npf_alg_rpc_create_instance(struct npf_alg_instance *ai); -void npf_alg_rpc_destroy_instance(struct npf_alg *na); +void npf_alg_rpc_destroy_instance(struct npf_alg *rpc); + void npf_alg_flush_all(void); -void npf_alg_purge(struct npf_alg_instance *ai); -#endif /* End of NPF_ALG_PRIVATE */ +#endif /* End of _ALG_H_ */ diff --git a/src/npf/alg/alg_apt.c b/src/npf/alg/alg_apt.c new file mode 100644 index 00000000..fe96feaf --- /dev/null +++ b/src/npf/alg/alg_apt.c @@ -0,0 +1,1370 @@ +/* + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * ALG Protocol Tuple (APT) Database + */ + +#include +#include "json_writer.h" +#include "vplane_log.h" +#include "util.h" + +#include "npf/npf_vrf.h" +#include "npf/alg/alg_apt.h" + +/* Reqd until apt instance is moved from alg instance to vrf */ +#include "npf/alg/alg.h" + +/* + * ALG tuple hash table. + * + * The ALG framework consists of an API, executed at certain points along a + * packets path throughout NPF, as well as an expected flow tuple database. + * + * The tuple database consists of a set of three hash tables, each table + * representing the type of tuple match. The tables represent 'wildcard' + * matching of various parts of a possible 6-tuple + * + * 2-tuple (proto and dest port). + * 5-tuple (intf, proto, saddr, daddr, dport). Aka 'any source port' table. + * 6-tuple (intf, proto, saddr, daddr, dport, sport). Aka 'all' table. + * + * Matches must be made in a 'most-restrictive' to 'least-restrictive' manner, + * meaning a match for a 6 tuple must be made prior to a match for a 5 tuple. + * + * When a packet enters the framework, a lookup into the tuple database is + * performed and if a match is made, the packet is forwarded to the alg set in + * the tuple. + * + * In practise, we can divide tuples into two types: 'keep' and 'non-keep'. + * + * 'keep' tuples are added via ALG configuration, and consist of a protocol + * and well known port. These are typically added to the 2 tuple (proto and + * dest port) table, and are used to identify the initial flow of an ALG. + * + * One exception is SIP, which adds a 'keep' tuple to the 'any src port' table + * to detect a secondary flow. It does this since it uses its own mechanism + * to timeout SIP Requests, and hence the tuple. + * + * 'non-keep' tuples are added by the ALGs themselves when they determine + * secondary flow information from the initial flow packets. These are added + * to either the 'all' table or the 'any source port' table. They always + * match on protocol, and usually all match on interface, .i.e. they are + * 5-tuple or 6-tuple. + */ + + +/* + * Each apt instance contains three hash tables. Each table has the following + * parameters. + */ +#define APT_INIT 32 +#define APT_MIN 256 +#define APT_MAX (16*1024) + +/* Retry count for tuple insertions. */ +#define APT_RETRY_COUNT 10 + +/* Default timeout */ +#define APT_TIMEOUT 10 + +/* Tuple garbage collector */ +#define APT_GC_INTERVAL 5 +static struct rte_timer apt_timer; + +static rte_atomic32_t apt_instance_count; +static rte_atomic32_t apt_init_flag; + +/* + * APT Hash Table + */ +struct apt_table { + struct cds_lfht *at_ht; + rte_atomic32_t at_count; +}; + +/* + * APT Instance + * + * Currently hangs off the alg instance structure 'struct npf_alg_instance' + * (which, in turn, hangs off the vrf instance). + */ +struct apt_instance { + rte_atomic32_t ai_refcnt; + uint32_t ai_vrfid; + + rte_spinlock_t ai_lock; /* Used by paired tuples */ + + /* Hash tables */ + struct apt_table ai_dport; /* 2-tuple (pcol, dest port) */ + struct apt_table ai_any_sport; /* 5-tuple (any src port) */ + struct apt_table ai_all; /* 6-tuple */ +}; + +/* + * Longest name is 'tftp'. at_client_name is only used when generating json, + * and it avoids us having to get the name from the client. + */ +#define APT_NAME_MAX 5 + +/* + * Tuple + */ +struct apt_tuple { + /* Private fields */ + struct cds_lfht_node at_node; /* hash table node */ + struct rcu_head at_rcu_head; + uint64_t at_exp_ts; /* Expire timestamp */ + struct apt_table *at_ht; /* back ptr to hash table */ + rte_atomic32_t at_refcnt; + + /* Public fields */ + + /* + * Client (ALG) handle and context. 'at_client' is a pointer to an + * 'struct npf_alg' structure for npf. Only sip and rsh have + * client_data. + */ + void *at_client; + uint32_t at_client_flags; + void *at_client_data; + char at_client_name[APT_NAME_MAX]; + + /* + * npf context. + * + * 'at_session' is a pointer to the 'parent' session that created the + * tuple. When a packet matches this tuple then a new 'child' session + * is created and linked to the parent tuple, after which the tuple is + * deleted. + * + * 'at_nat' is a short-lived data structure that contains some basic + * npf nat information about the parent session. + */ + void *at_session; + void *at_nat; /* struct npf_alg_nat */ + + /* + * 'at_paired' points to a paired tuple. This is used by SIP when the + * first packet of a secondary flow may occur from either direction. + * First tuple of the pair to see this flow 'wins'. + */ + struct apt_tuple *at_paired; + + uint8_t at_keep:1; /* Do not timeout */ + uint8_t at_expired:1; + uint8_t at_removing:1; + + /* + * 'multimatch' tuples are special SIP tuples. They are 5-tuples (any + * src port) with the well known SIP dest port(5060). They are not + * expired after the initial packet has matched the tuple, and hence + * are used to match multiple flows. They are expired when the parent + * session is expired. + */ + uint8_t at_multimatch:1; + uint16_t at_timeout; /* Timeout in seconds */ + + /* Match type */ + enum apt_match_table at_match; + + /* Match key */ + uint8_t at_proto; /* IP protocol */ + uint8_t at_alen; /* addr len */ + uint16_t at_sport; /* src port */ + uint16_t at_dport; /* dst port */ + uint32_t at_ifx; /* Interface index */ + npf_addr_t at_srcip; /* src addr */ + npf_addr_t at_dstip; /* dst addr */ +}; + + +/* Forward references */ +static struct apt_tuple *apt_tuple_lookup_key(struct apt_instance *ai, + struct apt_match_key *m); +static void apt_init(void); +static void apt_uninit(void); + + +/* + * APT client registration. Used for notification of APT database events to + * ALGs. + * + * Currently the only notification is when a tuple is being deleted in order + * to allow ALGs to cleanup. + */ +static struct apt_event_ops *apt_ops[APT_EVENT_MAX_OPS]; + +void apt_event_register(const struct apt_event_ops *ops) +{ + uint32_t i; + + for (i = 0; i < ARRAY_SIZE(apt_ops); i++) { + const struct apt_event_ops *tmp; + tmp = rcu_dereference(apt_ops[i]); + + /* Do not register same thing twice */ + if (tmp && tmp == ops) + return; + } + + for (i = 0; i < ARRAY_SIZE(apt_ops); i++) { + if (!rcu_cmpxchg_pointer(&apt_ops[i], NULL, + (struct apt_event_ops *)ops)) + return; + } +} + +/* + * Hash table matching function + */ +static int apt_table_match(struct cds_lfht_node *node, const void *key) +{ + const struct apt_match_key *m = key; + struct apt_tuple *at; + + at = caa_container_of(node, struct apt_tuple, at_node); + + /* Never match expired tuples */ + if (at->at_expired) + return 0; + + /* + * Interface match is optional. This is not used for 2-tuple (proto, + * dest port) matches, but is used for 'all' and 'any sport' matches. + */ + if (at->at_ifx && (at->at_ifx != m->m_ifx)) + return 0; + + /* Always match on protocol */ + if (at->at_proto != m->m_proto) + return 0; + + /* Which table are we matching in? */ + switch (m->m_match) { + case APT_MATCH_DPORT: + /* + * Match on destination port. This is used to detect the + * primary ALG flow. + */ + if (at->at_dport != m->m_dport) + return 0; + break; + + case APT_MATCH_ANY_SPORT: + /* fall through */ + case APT_MATCH_ALL: + /* + * 6-tuple of 5-tuple matches are typically used to detect the + * secondary ALG flow. For some protocols, we do not know the + * source address. + */ + if (at->at_alen != m->m_alen) + return 0; + if (m->m_match == APT_MATCH_ALL && + at->at_sport != m->m_sport) + return 0; + if (at->at_dport != m->m_dport) + return 0; + if (memcmp(&at->at_srcip, m->m_srcip, m->m_alen) != 0) + return 0; + if (memcmp(&at->at_dstip, m->m_dstip, m->m_alen) != 0) + return 0; + break; + + case APT_MATCH_NONE: + return 1; + } + + return 1; +} + +/* + * Tuple hash function + */ +static uint32_t apt_table_hash(const struct apt_match_key *m) +{ + const uint32_t *src; + const uint32_t *dst; + uint32_t hash = 0; + + switch (m->m_match) { + case APT_MATCH_DPORT: + hash = (m->m_dport << 16) | m->m_proto; + break; + + case APT_MATCH_ANY_SPORT: + /* fall through */ + case APT_MATCH_ALL: + /* Don't use sport, it can be wildcarded */ + src = m->m_srcip->s6_addr32; + dst = m->m_dstip->s6_addr32; + + hash = rte_jhash_2words(m->m_dport, m->m_proto, 0); + + if (m->m_alen == 4) + return rte_jhash_2words(src[0], dst[0], hash); + + const uint32_t sz = m->m_alen >> 2; + + hash = rte_jhash_32b(src, sz, hash); + hash = rte_jhash_32b(dst, sz, hash); + break; + + case APT_MATCH_NONE: + break; + } + + return hash; +} + +/* + * Select table from match enum + */ +static struct apt_table * +apt_table_select(struct apt_instance *ai, enum apt_match_table match) +{ + switch (match) { + case APT_MATCH_DPORT: + return &ai->ai_dport; + case APT_MATCH_ALL: + return &ai->ai_all; + case APT_MATCH_ANY_SPORT: + return &ai->ai_any_sport; + case APT_MATCH_NONE: + return NULL; + } + return NULL; +} + +/* Get number of entries (expired and unexpired) in a table */ +uint32_t apt_table_count(struct apt_instance *ai, enum apt_match_table tt) +{ + struct apt_table *tbl; + + tbl = apt_table_select(ai, tt); + if (!tbl) + return 0; + + return rte_atomic32_read(&tbl->at_count); +} + + +/* Create a tuple */ +static struct apt_tuple *apt_tuple_create(void) +{ + struct apt_tuple *at; + + at = zmalloc_aligned(sizeof(*at)); + + if (at) { + rte_atomic32_init(&at->at_refcnt); + at->at_client_name[0] = '\0'; + } + + return at; +} + +/* Free tuple */ +static void apt_tuple_free(struct rcu_head *head) +{ + struct apt_tuple *at = + caa_container_of(head, struct apt_tuple, at_rcu_head); + free(at); +} + +/* + * Take reference on tuple + * + * A reference is taken: + * 1. When a tuple is added to a hash table, or + * 2. When a tuple is paired with another tuple. + */ +static struct apt_tuple *apt_tuple_get(struct apt_tuple *at) +{ + if (at) + rte_atomic32_inc(&at->at_refcnt); + return at; +} + +/* + * Release reference on tuple + */ +static void apt_tuple_put(struct apt_tuple *at) +{ + if (at && rte_atomic32_dec_and_test(&at->at_refcnt)) + call_rcu(&at->at_rcu_head, apt_tuple_free); +} + +/* + * Link two tuples together. Used when the first packet of a secondary flow + * can occur in either direction. + */ +int alg_apt_tuple_pair(struct apt_tuple *at1, struct apt_tuple *at2) +{ + assert(at1->at_paired == NULL); + assert(at2->at_paired == NULL); + + if (!at1->at_paired && !at2->at_paired) { + at1->at_paired = apt_tuple_get(at2); + at2->at_paired = apt_tuple_get(at1); + return 0; + } + return -EINVAL; +} + +/* + * Unlink two tuples + */ +static void apt_tuple_unpair(struct apt_tuple *at1, struct apt_tuple *at2) +{ + if (at1) { + apt_tuple_put(at1->at_paired); + at1->at_paired = NULL; + } + if (at2) { + apt_tuple_put(at2->at_paired); + at2->at_paired = NULL; + } +} + +/* + * Unlink two tuples, and expire them + */ +static void apt_tuple_unpair_and_expire(struct apt_tuple *at1) +{ + if (!at1) + return; + + struct apt_tuple *at2 = at1->at_paired; + + apt_tuple_unpair(at1, at2); + alg_apt_tuple_expire(at1); + alg_apt_tuple_expire(at2); +} + +/* + * Verify the tuple of a secondary flow before creating a child session. + * Called from npf_alg_session + */ +bool apt_tuple_verify_and_expire(struct apt_instance *ai, struct apt_tuple *at) +{ + bool do_drop = false; + + /* + * There is one race we are concerned with: Possible receipt of both a + * forward and reverse packet. + * + * This race is problematic. We could wind up with two session + * handles, one containing the forward sentry and one containing its + * backward sentry. We cannot allow that. So detect and drop on the + * basis of tuple expiration. + * + * Regardless, expire all tuples for this match. + */ + if (unlikely(at->at_paired)) { + /* Paired tuple */ + rte_spinlock_lock(&ai->ai_lock); + + if (at->at_expired) + do_drop = true; + apt_tuple_unpair_and_expire(at); + + rte_spinlock_unlock(&ai->ai_lock); + } else { + /* Not paired */ + if (!at->at_multimatch) + alg_apt_tuple_expire(at); + } + + return do_drop; +} + +/* + * Insert tuple into a table + */ +static int apt_tuple_insert(struct apt_instance *ai, struct apt_tuple *at, + bool replace) +{ + struct apt_match_key m; + struct apt_table *tbl; + uint32_t hash; + int rc, retry; + + /* Select table from match flags */ + tbl = apt_table_select(ai, at->at_match); + if (!tbl) + return -ENOENT; + + if (unlikely(rte_atomic32_read(&tbl->at_count) >= APT_MAX)) { + if (net_ratelimit()) + RTE_LOG(DEBUG, FIREWALL, + "APT: table full\n"); + return -ENOSPC; + } + + rcu_assign_pointer(at->at_ht, tbl); + + if (!at->at_keep) + apt_tuple_set_timeout(at, APT_TIMEOUT); + + /* Fill-in the match structure */ + m.m_match = at->at_match; + m.m_srcip = &at->at_srcip; + m.m_dstip = &at->at_dstip; + m.m_ifx = at->at_ifx; + m.m_dport = at->at_dport; + m.m_sport = at->at_sport; + m.m_proto = at->at_proto; + m.m_alen = at->at_alen; + + cds_lfht_node_init(&at->at_node); + hash = apt_table_hash(&m); + + /* + * If 'replace' is true, then the alg is attempting to replace an + * existing tuple. Do this by expiring the existing tuple and + * retrying for a limited number of times. + */ + rc = -EEXIST; + retry = APT_RETRY_COUNT; + + while (retry--) { + struct cds_lfht_node *node; + + node = cds_lfht_add_unique(tbl->at_ht, hash, apt_table_match, + &m, &at->at_node); + if (node == &at->at_node) { + /* Success */ + apt_tuple_get(at); + rte_atomic32_inc(&tbl->at_count); + + return 0; + } + + /* Tuple already exists. Expire if necessary */ + if (replace) { + struct apt_tuple *old; + old = caa_container_of(node, struct apt_tuple, + at_node); + alg_apt_tuple_expire(old); + } else + break; + } + + return rc; +} + +/* + * Create a tuple, and insert into table + */ +struct apt_tuple * +apt_tuple_create_and_insert(struct apt_instance *ai, struct apt_match_key *m, + void *client, uint32_t client_flags, + const char *client_name, bool replace, bool keep) +{ + struct apt_tuple *at; + int rc; + + at = apt_tuple_create(); + if (!at) + return NULL; + + at->at_match = m->m_match; + if (client_name) { + strncpy(at->at_client_name, client_name, APT_NAME_MAX); + at->at_client_name[APT_NAME_MAX-1] = '\0'; + } + + if (m->m_srcip && m->m_alen) + memcpy(&at->at_srcip.s6_addr, &m->m_srcip->s6_addr, m->m_alen); + + if (m->m_dstip && m->m_alen) + memcpy(&at->at_dstip.s6_addr, &m->m_dstip->s6_addr, m->m_alen); + + at->at_ifx = m->m_ifx; + at->at_dport = m->m_dport; + at->at_sport = m->m_sport; + at->at_proto = m->m_proto; + at->at_alen = m->m_alen; + + at->at_keep = keep; + at->at_client = client; + at->at_client_flags = client_flags; + + rc = apt_tuple_insert(ai, at, replace); + if (rc < 0) { + free(at); + return NULL; + } + + return at; +} + +/* + * Expire a tuple + */ +void alg_apt_tuple_expire(struct apt_tuple *at) +{ + if (!at) + return; + + at->at_expired = true; + + /* If a tuple is paired then we always expire both */ + if (at->at_paired) + apt_tuple_unpair(at, at->at_paired); +} + +/* + * Lookup tuple and expire if found + */ +int alg_apt_tuple_lookup_and_expire(struct apt_instance *ai, + struct apt_match_key *m) +{ + struct apt_tuple *at; + + at = apt_tuple_lookup_key(ai, m); + if (!at) + return -ENOENT; + + alg_apt_tuple_expire(at); + return 0; +} + +/* + * Notify interested clients (the ALG infra) that a tuple has been deleted + */ +static void apt_tuple_delete_event(struct apt_tuple *at) +{ + uint32_t i; + struct apt_event_ops *ops; + + for (i = 0; i < ARRAY_SIZE(apt_ops); i++) { + ops = rcu_dereference(apt_ops[i]); + if (ops && ops->apt_delete) + ops->apt_delete(at); + } +} + +/* + * Delete a tuple + */ +static void apt_tuple_delete(struct apt_table *tbl, struct apt_tuple *at) +{ + if (!at) + return; + + if (!tbl) + tbl = rcu_dereference(at->at_ht); + + /* Mark expired, if not already expired */ + alg_apt_tuple_expire(at); + + if (tbl && !cds_lfht_del(tbl->at_ht, &at->at_node)) { + /* Successfully removed from hash table */ + rte_atomic32_dec(&tbl->at_count); + + /* Notify interested clients */ + apt_tuple_delete_event(at); + + /* Release reference */ + apt_tuple_put(at); + } +} + +/* + * Lookup one or more hash tables in an apt instance. + */ +static struct apt_tuple * +apt_tuple_lookup(struct apt_instance *ai, struct apt_match_key *m, + const enum apt_match_table *match_tbls, uint ntables) +{ + struct apt_table *tbl; + struct cds_lfht_node *node; + struct cds_lfht_iter iter; + struct apt_tuple *at = NULL; + uint i; + + /* For each table to lookup ... */ + for (i = 0; i < ntables; i++) { + m->m_match = match_tbls[i]; + + /* Select table from instance and match enum */ + tbl = apt_table_select(ai, m->m_match); + if (!tbl) + continue; + + if (rte_atomic32_read(&tbl->at_count) == 0) + continue; + + cds_lfht_lookup(tbl->at_ht, apt_table_hash(m), apt_table_match, + m, &iter); + + node = cds_lfht_iter_get_node(&iter); + if (node) { + at = caa_container_of(node, struct apt_tuple, at_node); + return at; + } + } + + return NULL; +} + +/* + * Lookup 6-tuple (all) table, then lookup 5-tuple (any source port) table, + * then any dest port table. + */ +struct apt_tuple * +apt_tuple_lookup_all_any_dport(struct apt_instance *ai, + struct apt_match_key *m) +{ + enum apt_match_table match_tbl[] = {APT_MATCH_ALL, + APT_MATCH_ANY_SPORT, + APT_MATCH_DPORT}; + + return apt_tuple_lookup(ai, m, match_tbl, ARRAY_SIZE(match_tbl)); +} + +/* + * Lookup 6-tuple (all) table, then lookup 5-tuple (any source port) table. + */ +struct apt_tuple * +apt_tuple_lookup_all_any(struct apt_instance *ai, struct apt_match_key *m) +{ + enum apt_match_table match_tbl[] = {APT_MATCH_ALL, + APT_MATCH_ANY_SPORT}; + + return apt_tuple_lookup(ai, m, match_tbl, ARRAY_SIZE(match_tbl)); +} + +/* + * Lookup destination port table + */ +struct apt_tuple * +apt_tuple_lookup_dport(struct apt_instance *ai, struct apt_match_key *m) +{ + enum apt_match_table match_tbl[] = {APT_MATCH_DPORT}; + + return apt_tuple_lookup(ai, m, match_tbl, ARRAY_SIZE(match_tbl)); +} + +/* + * Lookup table specified in the match key object, m_match + */ +static struct apt_tuple * +apt_tuple_lookup_key(struct apt_instance *ai, struct apt_match_key *m) +{ + enum apt_match_table match_tbl[] = {m->m_match}; + + return apt_tuple_lookup(ai, m, match_tbl, ARRAY_SIZE(match_tbl)); +} + +/******************************************************************* + * Tuple accessors start + */ + +/* Get client handle */ +void *apt_tuple_get_client_handle(struct apt_tuple *at) +{ + return at->at_client; +} + +/* + * Set the client handle to NULL when the tuple is being deleted, and we have + * released the handle. + */ +void apt_tuple_clear_client_handle(struct apt_tuple *at) +{ + at->at_client = NULL; +} + +/* Get client flags */ +uint32_t apt_tuple_get_client_flags(struct apt_tuple *at) +{ + return at->at_client_flags; +} + +/* Get client data */ +void *apt_tuple_get_client_data(struct apt_tuple *at) +{ + return at->at_client_data; +} + +/* Set client data */ +void apt_tuple_set_client_data(struct apt_tuple *at, void *data) +{ + at->at_client_data = data; +} + +/* Set session handle */ +void apt_tuple_set_session(struct apt_tuple *at, void *session) +{ + at->at_session = session; +} + +/* Get session handle */ +void *apt_tuple_get_session(struct apt_tuple *at) +{ + return at->at_session; +} + +/* Get session handle only if active */ +void *apt_tuple_get_active_session(struct apt_tuple *at) +{ + void *se = apt_tuple_get_session(at); + + if (!se) + return NULL; + + return npf_session_is_active(se) ? se : NULL; +} + +/* Set NAT handle */ +void apt_tuple_set_nat(struct apt_tuple *at, void *nat) +{ + at->at_nat = nat; +} + +/* Get NAT handle */ +void *apt_tuple_get_nat(struct apt_tuple *at) +{ + return at->at_nat; +} + +/* Set tuple timeout */ +void apt_tuple_set_timeout(struct apt_tuple *at, uint32_t timeout) +{ + at->at_timeout = timeout; + at->at_exp_ts = get_time_uptime() + timeout; +} + +/* Set multimatch attribute */ +void apt_tuple_set_multimatch(struct apt_tuple *at, bool val) +{ + at->at_multimatch = val; +} + +/* Get tuple table type */ +enum apt_match_table apt_tuple_get_table_type(struct apt_tuple *at) +{ + return at->at_match; +} + +/* + * Tuple accessors end + *******************************************************************/ + +/* + * Destroy an apt instance table + */ +static void apt_table_destroy(struct apt_table *tbl) +{ + struct cds_lfht_iter iter; + struct apt_tuple *at; + + if (!tbl->at_ht) + return; + + /* + * For each tuple in hash table: remove from hash table, mark as + * expired, and rcu-free. + */ + cds_lfht_for_each_entry(tbl->at_ht, &iter, at, at_node) + apt_tuple_delete(tbl, at); + + dp_ht_destroy_deferred(tbl->at_ht); + tbl->at_ht = NULL; +} + +/* + * Destroy an apt instance + */ +static void apt_instance_destroy(struct apt_instance *ai) +{ + if (ai) { + apt_table_destroy(&ai->ai_all); + apt_table_destroy(&ai->ai_any_sport); + apt_table_destroy(&ai->ai_dport); + free(ai); + + /* Last instance? */ + if (rte_atomic32_dec_and_test(&apt_instance_count)) { + apt_uninit(); + rte_atomic32_clear(&apt_init_flag); + } + } +} + +/* + * Create an apt instance hash table + */ +static struct cds_lfht *apt_table_create(void) +{ + return cds_lfht_new(APT_INIT, APT_MIN, APT_MAX, + CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING, NULL); +} + +/* + * Create an apt instance + */ +struct apt_instance *alg_apt_instance_create(uint32_t ext_vrfid) +{ + struct apt_instance *ai; + + ai = zmalloc_aligned(sizeof(*ai)); + if (!ai) + return NULL; + + rte_atomic32_inc(&apt_instance_count); + + /* First instance? */ + if (rte_atomic32_test_and_set(&apt_init_flag)) + apt_init(); + + rte_atomic32_init(&ai->ai_refcnt); + ai->ai_vrfid = ext_vrfid; + + rte_spinlock_init(&ai->ai_lock); + + ai->ai_all.at_ht = apt_table_create(); + if (!ai->ai_all.at_ht) + goto error; + + ai->ai_any_sport.at_ht = apt_table_create(); + if (!ai->ai_any_sport.at_ht) + goto error; + + ai->ai_dport.at_ht = apt_table_create(); + if (!ai->ai_dport.at_ht) + goto error; + + return ai; + +error: + apt_instance_destroy(ai); + return NULL; +} + +/* + * Take reference on instance. + * + * A reference is taken by the ALG instance plus each ALG on that ALG + * instance. + */ +struct apt_instance *alg_apt_instance_get(struct apt_instance *ai) +{ + if (ai) + rte_atomic32_inc(&ai->ai_refcnt); + return ai; +} + +/* Release reference on instance */ +void alg_apt_instance_put(struct apt_instance *ai) +{ + if (ai && rte_atomic32_dec_and_test(&ai->ai_refcnt)) + apt_instance_destroy(ai); +} + +/* + * Write json for one tuple + */ +static void apt_tuple_jsonw(struct apt_tuple *at, json_writer_t *json) +{ + int family = 0; + char buf[INET6_ADDRSTRLEN]; + + /* Only display initialized fields */ + + jsonw_start_object(json); + jsonw_string_field(json, "alg", at->at_client_name); + + if (at->at_exp_ts) + jsonw_uint_field(json, "timestamp", at->at_exp_ts); + if (at->at_proto) + jsonw_uint_field(json, "protocol", at->at_proto); + if (at->at_session) + jsonw_bool_field(json, "session", true); + if (at->at_ifx) + jsonw_uint_field(json, "if_index", at->at_ifx); + if (at->at_client_flags) + jsonw_uint_field(json, "alg_flags", at->at_client_flags); + + if (at->at_timeout) + jsonw_uint_field(json, "timeout", at->at_timeout); + + /* + * Create old-style flags bitmap until config scripts are updated. + */ + uint32_t at_flags = 0; + + switch (at->at_match) { + case APT_MATCH_DPORT: + at_flags |= NPF_TUPLE_MATCH_PROTO_PORT; + break; + case APT_MATCH_ALL: + at_flags |= NPF_TUPLE_MATCH_ALL; + break; + case APT_MATCH_ANY_SPORT: + at_flags |= NPF_TUPLE_MATCH_ANY_SPORT; + break; + case APT_MATCH_NONE: + break; + } + + if (at->at_keep) + at_flags |= NPF_TUPLE_KEEP; + if (at->at_removing) + at_flags |= NPF_TUPLE_REMOVING; + if (at->at_expired) + at_flags |= NPF_TUPLE_EXPIRED; + if (at->at_multimatch) + at_flags |= NPF_TUPLE_MULTIMATCH; + + if (at_flags) + jsonw_uint_field(json, "flags", at_flags); + + if (at->at_sport) + jsonw_uint_field(json, "sport", ntohs(at->at_sport)); + if (at->at_dport) + jsonw_uint_field(json, "dport", ntohs(at->at_dport)); + + switch (at->at_alen) { + case 4: + family = AF_INET; + break; + case 16: + family = AF_INET6; + break; + default: + family = 0; + + } + + if (family) { + inet_ntop(family, &at->at_srcip, buf, sizeof(buf)); + jsonw_string_field(json, "srcip", buf); + inet_ntop(family, &at->at_dstip, buf, sizeof(buf)); + jsonw_string_field(json, "dstip", buf); + jsonw_uint_field(json, "alen", at->at_alen); + } + + if (at->at_client_data) + jsonw_bool_field(json, "tuple_data", true); + + jsonw_end_object(json); +} + +/* + * Write json for an apt table + */ +static void apt_table_jsonw(struct apt_table *tbl, json_writer_t *json) +{ + struct cds_lfht_iter iter; + struct apt_tuple *at; + + if (!tbl->at_ht) + return; + + if (rte_atomic32_read(&tbl->at_count) == 0) + return; + + cds_lfht_for_each_entry(tbl->at_ht, &iter, at, at_node) + apt_tuple_jsonw(at, json); +} + +/* + * Write json for an apt instance + */ +void alg_apt_instance_jsonw(struct apt_instance *ai, json_writer_t *json) +{ + jsonw_name(json, "tuples"); + jsonw_start_array(json); + + apt_table_jsonw(&ai->ai_dport, json); + apt_table_jsonw(&ai->ai_any_sport, json); + apt_table_jsonw(&ai->ai_all, json); + + jsonw_end_array(json); +} + +/* + * Expire tuples for a given session + */ +static void apt_table_expire_session(struct apt_table *tbl, const void *session) +{ + struct cds_lfht_iter iter; + struct apt_tuple *at; + + if (rte_atomic32_read(&tbl->at_count) == 0) + return; + + cds_lfht_for_each_entry(tbl->at_ht, &iter, at, at_node) { + if (at->at_session != session) + continue; + + alg_apt_tuple_expire(at); + } +} + +/* + * Notification that a session has been expired. + * + * Expire tuples all tuples that contain this session handle. Only applies to + * 'all' and 'any_sport' tables since these are the only ones created via a + * session. (The 'dest' table is managed via config) + */ +void alg_apt_instance_expire_session(struct apt_instance *ai, + const void *session) +{ + if (ai) { + /* dest port table tuples will never have a session */ + apt_table_expire_session(&ai->ai_all, session); + apt_table_expire_session(&ai->ai_any_sport, session); + } +} + +static void +apt_table_destroy_session(struct apt_table *tbl, const void *session) +{ + struct cds_lfht_iter iter; + struct apt_tuple *at; + + if (rte_atomic32_read(&tbl->at_count) == 0) + return; + + cds_lfht_for_each_entry(tbl->at_ht, &iter, at, at_node) { + if (at->at_session != session) + continue; + + apt_tuple_delete(tbl, at); + } +} + +/* + * Delete any tuples created by the given session. Only applies to the 'all' + * and 'any_sport' tables since these are the only ones created via a session. + * (The 'dest' table is managed via config) + */ +void alg_apt_instance_destroy_session(struct apt_instance *ai, + const void *session) +{ + if (ai) { + apt_table_destroy_session(&ai->ai_all, session); + apt_table_destroy_session(&ai->ai_any_sport, session); + } +} + +/* + * Reset tuples for a specific client and instance. + * + * Delete 'keep' tuples and expire non 'keep' tuples. Typically this is + * called when a client (alg) is reset, in which case it will re-add its + * 'keep' tuples immediately after this call. + */ +static void apt_table_client_reset(struct apt_table *tbl, const void *client) +{ + struct cds_lfht_iter iter; + struct apt_tuple *at; + + if (rte_atomic32_read(&tbl->at_count) == 0) + return; + + cds_lfht_for_each_entry(tbl->at_ht, &iter, at, at_node) { + if (at->at_client != client) + continue; + + if (at->at_keep) + apt_tuple_delete(tbl, at); + else + alg_apt_tuple_expire(at); + } +} + +/* + * Reset tuples for a specific client and instance. + */ +void +alg_apt_instance_client_reset(struct apt_instance *ai, const void *client) +{ + apt_table_client_reset(&ai->ai_all, client); + apt_table_client_reset(&ai->ai_any_sport, client); + apt_table_client_reset(&ai->ai_dport, client); +} + +/* + * Delete all tuples for a specific client. + */ +static void apt_table_client_destroy(struct apt_table *tbl, const void *client) +{ + struct cds_lfht_iter iter; + struct apt_tuple *at; + + if (rte_atomic32_read(&tbl->at_count) == 0) + return; + + cds_lfht_for_each_entry(tbl->at_ht, &iter, at, at_node) { + if (at->at_client != client) + continue; + + apt_tuple_delete(tbl, at); + } +} + +/* + * Delete all tuples for a specific client and instance. + */ +void +alg_apt_instance_client_destroy(struct apt_instance *ai, const void *client) +{ + apt_table_client_destroy(&ai->ai_all, client); + apt_table_client_destroy(&ai->ai_any_sport, client); + apt_table_client_destroy(&ai->ai_dport, client); +} + +/* + * Called from whole dp unit-tests to delete all non-keep or multimatch + * tuples, and any expired 'keep' tuples. + */ +static void apt_table_flush(struct apt_table *tbl) +{ + struct cds_lfht_iter iter; + struct apt_tuple *at; + + if (rte_atomic32_read(&tbl->at_count) == 0) + return; + + /* + * For each qualifying tuple in each hash table: remove from hash + * table, mark as expired, and rcu-free. + */ + cds_lfht_for_each_entry(tbl->at_ht, &iter, at, at_node) { + if (at->at_multimatch || !at->at_keep || at->at_expired) + apt_tuple_delete(tbl, at); + } +} + +/* + * Called from whole dp unit-tests to delete all non-keep or multimatch + * tuples, and any expired 'keep' tuples. + */ +void alg_apt_instance_flush(struct apt_instance *ai) +{ + apt_table_flush(&ai->ai_all); + apt_table_flush(&ai->ai_any_sport); + apt_table_flush(&ai->ai_dport); +} + +/* + * Is tuple expired or timed-out? + */ +static bool apt_tuple_is_expired(struct apt_tuple *at, uint64_t current) +{ + if (at->at_expired) + return true; + + /* + * 'keep' entries never timeout. They must be explicitly expired + * and/or deleted. + */ + if (at->at_keep) + return false; + + if (current > at->at_exp_ts) { + alg_apt_tuple_expire(at); + return true; + } + return false; +} + +/* + * Garbage collect an APT table + * + * 1st pass: Timed-out entries are marked as expired + * 2nd pass: Expired entries are marked for removing + * 3rd pass: Entries are removed from hash table + * + * Expired entries are no longer found when doing a table lookup. + * + * 'keep' entries never timeout. They must be explicitly expired and/or + * deleted. + */ +static void apt_table_gc(struct apt_table *tbl, uint64_t current) +{ + struct cds_lfht_iter iter; + struct apt_tuple *at; + + if (rte_atomic32_read(&tbl->at_count) == 0) + return; + + cds_lfht_for_each_entry(tbl->at_ht, &iter, at, at_node) { + if (apt_tuple_is_expired(at, current)) { + if (at->at_removing) + apt_tuple_delete(tbl, at); + else + at->at_removing = true; + } + } +} + +/* + * Garbage collect instance + */ +static void apt_instance_gc(struct apt_instance *ai) +{ + uint64_t current = get_time_uptime(); + + apt_table_gc(&ai->ai_all, current); + apt_table_gc(&ai->ai_any_sport, current); + apt_table_gc(&ai->ai_dport, current); +} + +/* + * Garbage collector + */ +static void apt_gc(struct rte_timer *timer __unused, void *arg __unused) +{ + struct npf_alg_instance *ai; + struct apt_instance *ai_apt; + struct vrf *vrf; + vrfid_t vrfid; + + VRF_FOREACH(vrf, vrfid) { + ai = vrf_get_npf_alg(vrf); + if (!ai) + continue; + + ai_apt = ai->ai_apt; + if (!ai_apt) + continue; + + apt_instance_gc(ai_apt); + } + + /* Restart timer if dataplane still running */ + if (running) + rte_timer_reset(&apt_timer, + APT_GC_INTERVAL * rte_get_timer_hz(), + SINGLE, rte_get_master_lcore(), apt_gc, + NULL); +} + +/* + * APT timer is started when first instance is created, and stopped when last + * instance is destroyed. + */ +static void apt_init(void) +{ + rte_timer_init(&apt_timer); + rte_timer_reset(&apt_timer, + APT_GC_INTERVAL * rte_get_timer_hz(), + SINGLE, rte_get_master_lcore(), apt_gc, + NULL); +} + +static void apt_uninit(void) +{ + rte_timer_stop_sync(&apt_timer); +} diff --git a/src/npf/alg/alg_apt.h b/src/npf/alg/alg_apt.h new file mode 100644 index 00000000..1a1fed57 --- /dev/null +++ b/src/npf/alg/alg_apt.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * ALG Protocol Tuple Database + */ + +#ifndef _ALG_TUPLE_H_ +#define _ALG_TUPLE_H_ + +#include +#include + +#include "urcu.h" +#include "util.h" +#include "npf/npf.h" + +struct apt_instance; +struct apt_tuple; + + +/* + * Old ALG tuple flags. Keep until config scripts are updated. + * + * NB: flags values for KEEP, REMOVING, and EXPIRED are used directly by + * script vyatta-dp-npf-show-alg-state in package vplane-config-npf, so + * if these values change then the script will need updating. + */ +#define NPF_TUPLE_KEEP (1<<0) +#define NPF_TUPLE_MATCH_PROTO_PORT (1<<2) +#define NPF_TUPLE_MATCH_ALL (1<<3) +#define NPF_TUPLE_MATCH_ANY_SPORT (1<<4) +#define NPF_TUPLE_REMOVING (1<<5) +#define NPF_TUPLE_EXPIRED (1<<6) +#define NPF_TUPLE_MULTIMATCH (1<<7) +#define NPF_TUPLE_MATCH_MASK (NPF_TUPLE_MATCH_PROTO_PORT | \ + NPF_TUPLE_MATCH_ALL | \ + NPF_TUPLE_MATCH_ANY_SPORT) + +/* Match table type */ +enum apt_match_table { + APT_MATCH_NONE = 0, + APT_MATCH_DPORT, + APT_MATCH_ALL, + APT_MATCH_ANY_SPORT +}; +#define APT_MATCH_FIRST APT_MATCH_DPORT +#define APT_MATCH_LAST APT_MATCH_ANY_SPORT +#define APT_MATCH_SZ (APT_MATCH_LAST + 1) + +/* Hash table match key */ +struct apt_match_key { + enum apt_match_table m_match; + uint8_t m_proto; + uint8_t m_alen; + uint16_t m_sport; + uint16_t m_dport; + uint32_t m_ifx; + const npf_addr_t *m_srcip; + const npf_addr_t *m_dstip; +}; + +struct apt_tuple *apt_tuple_lookup_all_any_dport(struct apt_instance *ai, + struct apt_match_key *m); + +/* Lookup ALL table then ANY_SPORT table */ +struct apt_tuple *apt_tuple_lookup_all_any(struct apt_instance *ai, + struct apt_match_key *m); + +/* Lookup proto and dest port table */ +struct apt_tuple *apt_tuple_lookup_dport(struct apt_instance *ai, + struct apt_match_key *m); + +struct apt_tuple *apt_tuple_create_and_insert(struct apt_instance *ai, + struct apt_match_key *m, + void *client, + uint32_t client_flags, + const char *client_name, + bool replace, bool keep); + +/* Get number of entries (expired and unexpired) in a table */ +uint32_t apt_table_count(struct apt_instance *ai, enum apt_match_table tt); + +void alg_apt_tuple_expire(struct apt_tuple *at); +int alg_apt_tuple_lookup_and_expire(struct apt_instance *ai, + struct apt_match_key *m); +bool apt_tuple_verify_and_expire(struct apt_instance *ai, struct apt_tuple *at); +int alg_apt_tuple_pair(struct apt_tuple *at1, struct apt_tuple *at2); + +/* + * Accessors + */ +void *apt_tuple_get_client_handle(struct apt_tuple *at); +void apt_tuple_clear_client_handle(struct apt_tuple *at); +uint32_t apt_tuple_get_client_flags(struct apt_tuple *at); +void *apt_tuple_get_client_data(struct apt_tuple *at); +void apt_tuple_set_client_data(struct apt_tuple *at, void *data); + +void apt_tuple_set_session(struct apt_tuple *at, void *session); +void *apt_tuple_get_session(struct apt_tuple *at); +void *apt_tuple_get_active_session(struct apt_tuple *at); +void apt_tuple_set_nat(struct apt_tuple *at, void *nat); +void *apt_tuple_get_nat(struct apt_tuple *at); +void apt_tuple_set_timeout(struct apt_tuple *at, uint32_t timeout); +void apt_tuple_set_multimatch(struct apt_tuple *at, bool val); +enum apt_match_table apt_tuple_get_table_type(struct apt_tuple *at); + +/* + * APT Instance + */ +struct apt_instance *alg_apt_instance_create(uint32_t ext_vrfid); +struct apt_instance *alg_apt_instance_get(struct apt_instance *ai); +void alg_apt_instance_put(struct apt_instance *ai); +void alg_apt_instance_jsonw(struct apt_instance *ai, json_writer_t *json); + +void alg_apt_instance_expire_session(struct apt_instance *ai, + const void *session); +void alg_apt_instance_destroy_session(struct apt_instance *ai, + const void *session); +void alg_apt_instance_client_reset(struct apt_instance *ai, const void *client); +void alg_apt_instance_client_destroy(struct apt_instance *ai, + const void *client); + +/* Unit-test only */ +void alg_apt_instance_flush(struct apt_instance *ai); + +/* + * APT registration + */ + +/* Max size of the event operations structs array */ +#define APT_EVENT_MAX_OPS 4 + +enum apt_evt { + APT_EVT_DELETE = 1, +}; + +struct apt_event_ops { + void (*apt_delete)(struct apt_tuple *at); +}; + +void apt_event_register(const struct apt_event_ops *ops); + +#endif /* _ALG_TUPLE_H_ */ diff --git a/src/npf/alg/alg_feat.h b/src/npf/alg/alg_feat.h new file mode 100644 index 00000000..e2c2220b --- /dev/null +++ b/src/npf/alg/alg_feat.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef _ALG_FEAT_H_ +#define _ALG_FEAT_H_ + +/* + * The features that use ALGs. This enum is used to differentiate npf and + * cgnat alg and apt data structures and table entries. + */ +enum alg_feat { + ALG_FEAT_NPF, + ALG_FEAT_CGNAT, +}; + +#define ALG_FEAT_FIRST ALG_FEAT_NPF +#define ALG_FEAT_LAST ALG_FEAT_CGNAT +#define ALG_FEAT_MAX (ALG_FEAT_LAST + 1) + +#define ALG_FEAT_ALL ALG_FEAT_MAX + +/* + * ALG feature names + */ +static inline const char *alg_feat_name(enum alg_feat feat) +{ + switch (feat) { + case ALG_FEAT_NPF: + return "npf"; + case ALG_FEAT_CGNAT: + return "cgnat"; + }; + return "unknown"; +} + +#endif /* _ALG_FEAT_H_ */ diff --git a/src/npf/alg/npf_alg_ftp.c b/src/npf/alg/alg_ftp.c similarity index 87% rename from src/npf/alg/npf_alg_ftp.c rename to src/npf/alg/alg_ftp.c index b896b0dc..f3179369 100644 --- a/src/npf/alg/npf_alg_ftp.c +++ b/src/npf/alg/alg_ftp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2013-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -30,7 +30,7 @@ #include "compiler.h" #include "npf/npf.h" -#include "npf/alg/npf_alg_private.h" +#include "npf/alg/alg.h" #include "npf/npf_cache.h" #include "npf/npf_nat.h" #include "npf/npf_session.h" @@ -96,7 +96,7 @@ static int ftp_parse_port(struct ftp_parse *fp, char *sptr, int dlen) if (dlen < 18) return -ENOENT; - if (memcmp("PORT ", sptr, 5)) + if (memcmp("PORT ", sptr, 5) != 0) return -ENOENT; /* scan rest, make sure we stop */ @@ -130,7 +130,7 @@ static int ftp_parse_229(struct ftp_parse *fp, npf_cache_t *npc, if (dlen < 11) return -ENOENT; - if (strncmp("229 ", sptr, 4)) + if (strncmp("229 ", sptr, 4) != 0) return -ENOENT; /* advance to '(' */ @@ -168,7 +168,7 @@ static int ftp_parse_eprt(struct ftp_parse *fp, char *sptr, int dlen) if (dlen < 18) return -ENOENT; - if (memcmp("EPRT ", sptr, 5)) + if (memcmp("EPRT ", sptr, 5) != 0) return -ENOENT; rc = sscanf(sptr+5, "%c%d%c%64[0-9.a-fA-f:]%c%lu", @@ -221,7 +221,7 @@ static int ftp_parse_227(struct ftp_parse *fp, char *sptr, int dlen) if (dlen < 17) return -ENOENT; - if (strncmp("227 ", sptr, 4)) + if (strncmp("227 ", sptr, 4) != 0) return -ENOENT; /* @@ -382,7 +382,7 @@ static int ftp_alg_config(struct npf_alg *ftp, int op, int argc, }; /* Only ports */ - if (strcmp(argv[0], "port")) + if (strcmp(argv[0], "port") != 0) return -EINVAL; argc--; argv++; @@ -429,46 +429,51 @@ static int ftp_alg_translate_payload(npf_session_t *se, return npf_payload_update(se, npc, nbuf, payload, di, nplen); } -static int ftp_alg_tuple_insert(const struct npf_alg *ftp, - npf_cache_t *npc, npf_session_t *se, - const npf_addr_t *saddr, in_port_t sport, - const npf_addr_t *daddr, in_port_t dport, - uint32_t alg_flags, struct npf_alg_nat *an) +static int ftp_alg_tuple_insert(struct npf_alg *ftp, + npf_cache_t *npc, npf_session_t *se, + const npf_addr_t *saddr, in_port_t sport, + const npf_addr_t *daddr, in_port_t dport, + uint32_t alg_flags, struct npf_alg_nat *an) { - struct npf_alg_tuple *nt; - int rc = -ENOMEM; + struct apt_match_key m = { 0 }; + struct apt_tuple *at; + + m.m_proto = IPPROTO_TCP; + m.m_ifx = npf_session_get_if_index(se); + m.m_alen = npc->npc_alen; + m.m_dport = dport; + m.m_sport = sport; + m.m_dstip = daddr; + m.m_srcip = saddr; + + if (sport) + m.m_match = APT_MATCH_ALL; + else + m.m_match = APT_MATCH_ANY_SPORT; - nt = npf_alg_tuple_alloc(); - if (nt) { - nt->nt_se = se; - nt->nt_nat = an; - if (sport) - nt->nt_flags = NPF_TUPLE_MATCH_ALL; - else - nt->nt_flags = NPF_TUPLE_MATCH_ANY_SPORT; - nt->nt_proto = IPPROTO_TCP; - nt->nt_ifx = npf_session_get_if_index(se); - nt->nt_alg_flags = alg_flags; - nt->nt_alg = ftp; - nt->nt_alen = npc->npc_alen; - nt->nt_dport = dport; - nt->nt_sport = sport; - nt->nt_timeout = FTP_TUPLE_TIMEOUT; - nt->nt_dstip = *daddr; - nt->nt_srcip = *saddr; - rc = npf_alg_tuple_add_replace(ftp->na_ai, nt); - if (rc) { - RTE_LOG(ERR, FIREWALL, "FTP: tuple insert:%d\n", rc); - npf_alg_tuple_free(nt); - } + /* Tuple takes a reference on the alg */ + at = apt_tuple_create_and_insert(ftp->na_ai->ai_apt, &m, + npf_alg_get(ftp), + alg_flags, + NPF_ALG_FTP_NAME, + true, false); + + if (!at) { + RTE_LOG(ERR, FIREWALL, "FTP: tuple insert\n"); + npf_alg_put(ftp); + return -EINVAL; } - return rc; + apt_tuple_set_session(at, se); + apt_tuple_set_nat(at, an); + apt_tuple_set_timeout(at, FTP_TUPLE_TIMEOUT); + + return 0; } static int ftp_alg_snat_passive(npf_session_t *parent, npf_cache_t *npc, npf_nat_t *pnat, struct ftp_parse *fp) { - const struct npf_alg *ftp = npf_alg_session_get_alg(parent); + struct npf_alg *ftp = npf_alg_session_get_alg(parent); npf_addr_t oaddr; in_port_t tmp; @@ -482,7 +487,7 @@ static int ftp_alg_dnat_passive(npf_session_t *parent, npf_cache_t *npc, struct rte_mbuf *nbuf, char *payload, npf_nat_t *pnat, struct ftp_parse *fp) { - const struct npf_alg *ftp = npf_alg_session_get_alg(parent); + struct npf_alg *ftp = npf_alg_session_get_alg(parent); npf_addr_t oaddr; in_port_t tmp; int rc; @@ -507,7 +512,7 @@ static int ftp_alg_snat_active(npf_session_t *parent, npf_cache_t *npc, struct rte_mbuf *nbuf, struct ftp_parse *fp, char *payload) { - const struct npf_alg *ftp = npf_alg_session_get_alg(parent); + struct npf_alg *ftp = npf_alg_session_get_alg(parent); in_port_t port; struct npf_alg_nat *an = NULL; npf_addr_t addr; @@ -558,7 +563,7 @@ static int ftp_alg_dnat_active(npf_session_t *parent, npf_cache_t *npc, npf_nat_t *ns, struct ftp_parse *fp) { in_port_t tmp; - const struct npf_alg *ftp = npf_alg_session_get_alg(parent); + struct npf_alg *ftp = npf_alg_session_get_alg(parent); struct npf_alg_nat *an; int rc; @@ -648,7 +653,7 @@ static void ftp_alg_inspect(npf_session_t *parent, npf_cache_t *npc, in_port_t dport; int rc; int plen; - const struct npf_alg *ftp = npf_alg_session_get_alg(parent); + struct npf_alg *ftp = npf_alg_session_get_alg(parent); /* If we already natted, nothing to do */ if (npf_iscached(npc, NPC_NATTED)) @@ -680,27 +685,45 @@ static void ftp_alg_inspect(npf_session_t *parent, npf_cache_t *npc, * Session init */ static int ftp_alg_session_init(npf_session_t *se, npf_cache_t *npc, - struct npf_alg_tuple *nt, const int di) + struct apt_tuple *nt, const int di) { + npf_session_t *parent; + uint32_t alg_flags; int rc = 0; - switch (nt->nt_flags & NPF_TUPLE_MATCH_MASK) { - case NPF_TUPLE_MATCH_PROTO_PORT: + switch (apt_tuple_get_table_type(nt)) { + case APT_MATCH_DPORT: + /* Parent flow */ npf_alg_session_set_inspect(se, true); npf_alg_session_set_flag(se, FTP_ALG_CNTL); break; - case NPF_TUPLE_MATCH_ALL: - case NPF_TUPLE_MATCH_ANY_SPORT: - rc = npf_alg_session_nat(se, npf_alg_parent_nat(nt->nt_se), - npc, di, nt); + + case APT_MATCH_ALL: + case APT_MATCH_ANY_SPORT: + /* Child flow */ + parent = apt_tuple_get_active_session(nt); + if (!parent) { + rc = -ENOENT; + break; + } + + rc = npf_alg_session_nat(se, npf_alg_parent_nat(parent), + npc, di, nt, NULL); if (!rc) { - npf_alg_session_set_flag(se, nt->nt_alg_flags); - npf_session_link_child(nt->nt_se, se); - npf_alg_session_set_private(se, nt->nt_data); - nt->nt_data = NULL; + /* Transfer alg_flags from tuple to child session */ + alg_flags = apt_tuple_get_client_flags(nt); + npf_alg_session_set_flag(se, alg_flags); + + /* Link parent and child sessions */ + npf_session_link_child(parent, se); } break; + + default: + rc = -EINVAL; + break; } + return rc; } @@ -779,10 +802,15 @@ struct npf_alg *npf_alg_ftp_create_instance(struct npf_alg_instance *ai) void npf_alg_ftp_destroy_instance(struct npf_alg *ftp) { - if (ftp) { - ftp->na_enabled = false; - ftp->na_ai = NULL; - /* Release reference on an alg application instance */ - npf_alg_put(ftp); - } + if (!ftp) + return; + + /* Expire or delete tuples */ + alg_apt_instance_client_destroy(ftp->na_ai->ai_apt, ftp); + + ftp->na_enabled = false; + ftp->na_ai = NULL; + + /* Release reference on an alg application instance */ + npf_alg_put(ftp); } diff --git a/src/npf/alg/npf_alg_public.c b/src/npf/alg/alg_npf.c similarity index 62% rename from src/npf/alg/npf_alg_public.c rename to src/npf/alg/alg_npf.c index 220854e3..261d585f 100644 --- a/src/npf/alg/npf_alg_public.c +++ b/src/npf/alg/alg_npf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -7,15 +7,16 @@ #include #include "util.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" #include "npf/npf_cmd.h" #include "npf/npf_nat.h" +#include "npf/npf_rc.h" #include "npf/npf_session.h" #include "npf/npf_vrf.h" -#include "npf/alg/npf_alg_public.h" -#include "npf/alg/npf_alg_private.h" +#include "npf/alg/alg_npf.h" +#include "npf/alg/alg.h" #ifndef NALG @@ -42,24 +43,39 @@ void npf_alg_uninit(void) */ void npf_alg_destroy_instance(struct npf_alg_instance *ai) { - npf_alg_tftp_destroy_instance(ai->ai_tftp); - npf_alg_ftp_destroy_instance(ai->ai_ftp); - npf_alg_sip_destroy_instance(ai->ai_sip); - npf_alg_rpc_destroy_instance(ai->ai_rpc); + if (!ai) + return; + + if (ai->ai_tftp) + npf_alg_tftp_destroy_instance(ai->ai_tftp); + + if (ai->ai_ftp) + npf_alg_ftp_destroy_instance(ai->ai_ftp); + + if (ai->ai_sip) + npf_alg_sip_destroy_instance(ai->ai_sip); + + if (ai->ai_rpc) + npf_alg_rpc_destroy_instance(ai->ai_rpc); ai->ai_tftp = NULL; ai->ai_ftp = NULL; ai->ai_sip = NULL; ai->ai_rpc = NULL; - alg_destroy_apts(ai); + /* apt instance will be destroyed when last reference is removed */ + alg_apt_instance_put(ai->ai_apt); + ai->ai_apt = NULL; + free(ai); } /* Take reference on an alg application instance */ struct npf_alg *npf_alg_get(struct npf_alg *alg) { - rte_atomic32_inc(&alg->na_refcnt); + if (alg) + rte_atomic32_inc(&alg->na_refcnt); + return alg; } @@ -79,7 +95,7 @@ npf_alg_session_init(struct npf_session *se, struct npf_cache *npc, const int di) { const struct npf_alg *alg; - struct npf_alg_tuple *nt; + struct apt_tuple *nt; int rc = 0; /* Ensure we have an instance struct for the VRF */ @@ -96,7 +112,7 @@ npf_alg_session_init(struct npf_session *se, struct npf_cache *npc, if (!nt) return 0; - alg = nt->nt_alg; + alg = apt_tuple_get_client_handle(nt); if (alg_has_op(alg, se_init) && alg->na_enabled) { rc = npf_alg_session_set_alg(se, alg); if (!rc) @@ -113,6 +129,45 @@ npf_alg_session_init(struct npf_session *se, struct npf_cache *npc, return rc; } +/* + * Bypass CGNAT out if packet matches ALG tuple or SNAT session. Called + * *only* from the CGNAT output pipeline node. + */ +bool npf_alg_bypass_cgnat(const struct ifnet *ifp, struct rte_mbuf *m) +{ + npf_cache_t npc_local, *npc; + npf_session_t *se; + bool sforw = false; + + /* Initialize local npf packet cache. */ + npc = &npc_local; + npf_cache_init(npc); + + /* Cache packet */ + if (unlikely(!npf_cache_all_nogpr(npc, m, htons(RTE_ETHER_TYPE_IPV4)))) + return false; + + if (npf_iscached(npc, NPC_ICMP_ERR)) + return false; + + /* Does pkt match an ALG session? */ + se = npf_session_find(m, PFIL_OUT, ifp, &sforw, NULL); + if (se && npf_session_get_alg_ptr(se)) + return true; + + /* + * Does pkt match an ALG tuple? + * + * Note that this does *not* set the NPC_ALG_TLUP cache flag or set + * npc_tuple, since it may not be the correct type of tuple when + * fw_out first does a lookup. + */ + if (alg_lookup_every_table(ifp, npc) != NULL) + return true; + + return false; +} + /* * npf_alg_session. Lookup ALG tuple database, and create an ALG secondary * session if packet matches an expected ALG secondary flow. @@ -124,29 +179,27 @@ struct npf_session * npf_alg_session(struct npf_cache *npc, struct rte_mbuf *nbuf, const struct ifnet *ifp, const int di, int *error) { - struct alg_protocol_tuples *apt; - struct npf_alg_tuple *nt; + struct apt_tuple *nt; struct npf_session *se; - bool do_drop = false; + bool do_drop; /* Ensure we have an instance struct for the VRF */ struct npf_alg_instance *ai = vrf_get_npf_alg_rcu(ifp->if_vrfid); if (!ai) return NULL; - apt = alg_get_apt(ai, npf_cache_ipproto(npc)); - if (!apt) - return NULL; - /* * Search the 'all' ht for an exact match, then the any_sport for a * wildcarded sport */ - nt = alg_search_all_then_any_sport(apt, npc, ifp); + nt = alg_search_all_then_any_sport(ai, npc, ifp->if_index); if (!nt) return NULL; /* + * Verify the tuple, then expire it so that no other packets find + * it. (Note, some tuple are *not* expired here) + * * There is one race we are concerned with: * * - Possible receipt of both a forward and reverse packet. @@ -158,20 +211,11 @@ npf_alg_session(struct npf_cache *npc, struct rte_mbuf *nbuf, * * Regardless, expire all tuples for this match. */ - if (unlikely(!nt->nt_paired)) { - if (!(nt->nt_flags & NPF_TUPLE_MULTIMATCH)) - apt_expire_tuple(nt); - } else { - rte_spinlock_lock(&apt->apt_lock); - if (nt->nt_flags & NPF_TUPLE_EXPIRED) - do_drop = true; - npf_alg_tuple_expire_pair(nt); - rte_spinlock_unlock(&apt->apt_lock); - } + do_drop = apt_tuple_verify_and_expire(ai->ai_apt, nt); /* Decide whether we need to drop the racing packet(s). */ if (do_drop) { - *error = -EEXIST; + *error = -NPF_RC_ALG_EEXIST; return NULL; } @@ -179,6 +223,10 @@ npf_alg_session(struct npf_cache *npc, struct rte_mbuf *nbuf, * Add the tuple to the npc, since establish will call session init. * session_init will init the ALG portion of the handle and link to * parent. + * + * Note, we are adding an expired tuple to the cache without taking a + * reference on it. This relies on the tuple sticking around until at + * least the second garbage collection run after this point. */ npc->npc_info |= NPC_ALG_TLUP; npf_cache_set_tuple(npc, (void *)nt); @@ -192,6 +240,7 @@ npf_alg_session(struct npf_cache *npc, struct rte_mbuf *nbuf, * the above we only search said child tuples. */ se = npf_session_establish(npc, nbuf, ifp, di, error); + return se; } @@ -208,7 +257,7 @@ npf_alg_inspect(struct npf_session *se, struct npf_cache *npc, if (!npf_alg_session_inspect(se)) return; - const struct npf_alg *alg = npf_alg_session_get_alg(se); + struct npf_alg *alg = npf_alg_session_get_alg(se); /* Call inspect function */ if (alg_has_op(alg, inspect)) @@ -221,12 +270,12 @@ npf_alg_inspect(struct npf_session *se, struct npf_cache *npc, */ void npf_alg_nat_inspect(struct npf_session *se, struct npf_cache *npc, - struct npf_nat *nt, int di) + struct npf_nat *nat, int di) { - const struct npf_alg *alg = npf_alg_session_get_alg(se); + struct npf_alg *alg = npf_alg_session_get_alg(se); - if (nt && alg_has_op(alg, nat_inspect)) - alg->na_ops->nat_inspect(se, npc, nt, di); + if (nat && alg_has_op(alg, nat_inspect)) + alg->na_ops->nat_inspect(se, npc, nat, di); } /* @@ -235,15 +284,15 @@ npf_alg_nat_inspect(struct npf_session *se, struct npf_cache *npc, */ int npf_alg_nat(struct npf_session *se, struct npf_cache *npc, - struct rte_mbuf *nbuf, struct npf_nat *nt, const int di) + struct rte_mbuf *nbuf, struct npf_nat *nat, const int di) { - const struct npf_alg *alg = npf_nat_getalg(nt); + const struct npf_alg *alg = npf_nat_getalg(nat); int rc = 0; if (alg_has_op(alg, nat_out) && di == PFIL_OUT) - rc = alg->na_ops->nat_out(se, npc, nbuf, nt); + rc = alg->na_ops->nat_out(se, npc, nbuf, nat); else if (alg_has_op(alg, nat_in) && di == PFIL_IN) - rc = alg->na_ops->nat_in(se, npc, nbuf, nt); + rc = alg->na_ops->nat_in(se, npc, nbuf, nat); return rc; } @@ -282,6 +331,9 @@ npf_alg_session_destroy(struct npf_session *se, struct npf_session_alg *sa) if (alg_has_op(alg, se_destroy)) alg->na_ops->se_destroy(se); + /* Delete any tuples (pinholes) created by this session */ + alg_destroy_session_tuples(alg, se); + sa->sa_alg = NULL; npf_alg_put((struct npf_alg *)alg); } @@ -378,6 +430,105 @@ npf_alg_cfg(FILE *f, int argc, char **argv) return -1; } +struct npf_alg_child_json_ctx { + json_writer_t *json; + struct session *s; +}; + +/* + * Add per-child json for a parent session + */ +static void npf_alg_child_session_json(struct session *child, void *data) +{ + struct npf_alg_child_json_ctx *ctx = data; + json_writer_t *json = ctx->json; + struct session *parent = NULL; + + /* + * The walk function also calls the callback for the parent session + * and grandchild sessions, so we skip them here. We are only + * interested in children. + */ + if (child == ctx->s) + return; + + if (child->se_link) + parent = child->se_link->sl_parent; + + /* Only return children, not grandchildren */ + if (!parent || parent != ctx->s) + return; + + jsonw_start_object(json); + jsonw_uint_field(json, "id", child->se_id); + jsonw_end_object(json); +} + +/* + * Add ALG info to session json + */ +int npf_alg_session_json(json_writer_t *json, + struct npf_session *se, + struct npf_session_alg *sa __unused) +{ + const char *name; + struct npf_session *parent; + struct npf_session *base_parent; + struct npf_alg_child_json_ctx ctx = { + .json = json, + .s = npf_session_get_dp_session(se), + }; + + /* Name of specific alg */ + name = npf_alg_name(se); + if (!name) + name = "unknown"; + + /* Will return NULL if this is a parent */ + parent = npf_session_get_parent(se); + + /* If this is the base parent then base_parent will equal se */ + base_parent = (struct npf_session *)npf_session_get_base_parent(se); + + jsonw_name(json, "alg"); + jsonw_start_object(json); + + jsonw_string_field(json, "name", name); + + if (parent) + jsonw_uint_field(json, "parent", + npf_session_get_id(parent)); + + if (base_parent != se && base_parent != parent) { + jsonw_uint_field(json, "base_parent", + npf_session_get_id(base_parent)); + + /* Is base parent the grandparent? */ + bool bp_is_gp; + + bp_is_gp = (npf_session_get_parent(parent) == base_parent); + jsonw_bool_field(json, "bp_is_gp", bp_is_gp); + } + + /* Walk children */ + jsonw_name(json, "children"); + jsonw_start_array(json); + + session_link_walk(npf_session_get_dp_session(se), false, + npf_alg_child_session_json, &ctx); + + jsonw_end_array(json); + + /* ALG-specific session json */ + struct npf_alg *alg = npf_alg_session_get_alg(se); + + if (alg_has_op(alg, se_json)) + alg->na_ops->se_json(json, se); + + jsonw_end_object(json); + return 0; +} + /* * Dump contents of alg tuple tables */ @@ -417,7 +568,7 @@ void npf_alg_dump(FILE *fp, vrfid_t vrfid) */ const char *npf_alg_name(struct npf_session *se) { - const struct npf_alg *npf_alg = npf_alg_session_get_alg(se); + struct npf_alg *npf_alg = npf_alg_session_get_alg(se); if (npf_alg) return npf_alg->na_ops->name; diff --git a/src/npf/alg/npf_alg_public.h b/src/npf/alg/alg_npf.h similarity index 90% rename from src/npf/alg/npf_alg_public.h rename to src/npf/alg/alg_npf.h index 71b57345..61de16ff 100644 --- a/src/npf/alg/npf_alg_public.h +++ b/src/npf/alg/alg_npf.h @@ -1,11 +1,11 @@ /* - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ -#ifndef NPF_ALG_PUBLIC -#define NPF_ALG_PUBLIC +#ifndef _ALG_NPF_H_ +#define _ALG_NPF_H_ #include #include "json_writer.h" @@ -41,14 +41,19 @@ int npf_alg_nat(struct npf_session *se, struct npf_cache *npc, struct rte_mbuf *nbuf, struct npf_nat *nat, const int di) __cold_func; +bool npf_alg_bypass_cgnat(const struct ifnet *ifp, struct rte_mbuf *m); + int npf_alg_session_init(struct npf_session *se, struct npf_cache *npc, const int di); -struct npf_session *npf_alg_session(struct npf_cache *npc, struct rte_mbuf *m, +struct npf_session *npf_alg_session(struct npf_cache *npc, + struct rte_mbuf *nbuf, const struct ifnet *ifp, const int di, int *error); void npf_alg_session_expire(struct npf_session *se, struct npf_session_alg *sa); void npf_alg_session_destroy(struct npf_session *se, struct npf_session_alg *sa); +int npf_alg_session_json(json_writer_t *json, struct npf_session *se, + struct npf_session_alg *sa); void npf_alg_reset(bool hard); int npf_alg_cfg(FILE *f, int argc, char **argv); @@ -172,4 +177,4 @@ static const char *npf_alg_name(struct npf_session *se) #endif /* NALG */ -#endif /* End of NPF_ALG_PUBLIC */ +#endif /* End of _ALG_NPF_H_ */ diff --git a/src/npf/alg/npf_alg_rpc.c b/src/npf/alg/alg_rpc.c similarity index 87% rename from src/npf/alg/npf_alg_rpc.c rename to src/npf/alg/alg_rpc.c index d10ec05c..12a265f7 100644 --- a/src/npf/alg/npf_alg_rpc.c +++ b/src/npf/alg/alg_rpc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2014-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -23,7 +23,7 @@ #include "compiler.h" #include "npf/npf.h" -#include "npf/alg/npf_alg_private.h" +#include "npf/alg/alg.h" #include "npf/npf_cache.h" #include "npf/npf_nat.h" #include "npf/npf_session.h" @@ -37,7 +37,7 @@ struct rte_mbuf; #define RPC_MAX_PORT 65535 /* Skip over words in an rpc msg */ -#define SKIP(a, b) ((uint32_t *)((uint8_t *)a + b)) +#define SKIP(a, b) ((uint32_t *)((uint8_t *)(a) + (b))) #define RPC_PORT_CONFIG 0 #define RPC_PROG_CONFIG 1 @@ -191,40 +191,41 @@ static int rpc_alg_program_handler(struct npf_alg *rpc, int op, /* Insert a tuple */ -static int rpc_tuple_insert(const struct npf_alg *rpc, - npf_cache_t *npc, npf_session_t *se, - const npf_addr_t *srcip, - const npf_addr_t *dstip, uint16_t dport) +static int rpc_tuple_insert(struct npf_alg *rpc, + npf_cache_t *npc, npf_session_t *se, + const npf_addr_t *srcip, + const npf_addr_t *dstip, uint16_t dport) { - struct npf_alg_tuple *nt; - int rc = -ENOMEM; - - nt = npf_alg_tuple_alloc(); - if (nt) { - nt->nt_alg = rpc; - nt->nt_ifx = npf_session_get_if_index(se); - nt->nt_flags = NPF_TUPLE_MATCH_ANY_SPORT; - nt->nt_timeout = 10; - nt->nt_se = se; - nt->nt_proto = npc->npc_next_proto; - nt->nt_alen = npc->npc_alen; - memcpy(&nt->nt_dstip, dstip, nt->nt_alen); - memcpy(&nt->nt_srcip, srcip, nt->nt_alen); - nt->nt_sport = 0; - nt->nt_dport = dport; - - rc = npf_alg_tuple_add_replace(rpc->na_ai, nt); - if (rc) { - npf_alg_tuple_free(nt); - RTE_LOG(ERR, FIREWALL, "RPC: tuple insert: %d\n", rc); - } + struct apt_match_key m = { 0 }; + struct apt_tuple *at; + + m.m_proto = npf_cache_ipproto(npc); + m.m_ifx = npf_session_get_if_index(se); + m.m_alen = npc->npc_alen; + m.m_dport = dport; + m.m_sport = 0; + m.m_dstip = dstip; + m.m_srcip = srcip; + m.m_match = APT_MATCH_ANY_SPORT; + + at = apt_tuple_create_and_insert(rpc->na_ai->ai_apt, &m, + npf_alg_get(rpc), + 0, NPF_ALG_RPC_NAME, + true, false); + + if (!at) { + npf_alg_put(rpc); + RTE_LOG(ERR, FIREWALL, "RPC: tuple insert\n"); + return -EINVAL; } - return rc; + apt_tuple_set_session(at, se); + + return 0; } /* Parse a RPC request msg */ static int rpc_parse_request(struct rpc_request *rr, uint32_t xid, - uint32_t *rpc_data, uint8_t *buf_start, + uint32_t *rpc_data, const uint8_t *buf_start, uint32_t rpc_len) { uint32_t field_len; @@ -292,8 +293,8 @@ static int rpc_verify_request(const struct npf_alg *rpc, struct rpc_request *rr) /* Parse a RPC reply msg */ static int rpc_parse_reply(struct rpc_request *rr, struct rpc_reply *rp, - uint32_t *rpc_data, uint32_t xid, uint8_t *buf_start, - uint32_t rpc_len) + uint32_t *rpc_data, uint32_t xid, + const uint8_t *buf_start, uint32_t rpc_len) { uint32_t field_len; @@ -337,7 +338,7 @@ static int rpc_manage_request(npf_session_t *se, uint32_t xid, int rc; struct rpc_request r; struct rpc_request *sr; - const struct npf_alg *rpc = npf_alg_session_get_alg(se); + struct npf_alg *rpc = npf_alg_session_get_alg(se); rc = rpc_parse_request(&r, xid, rpc_data, buf_start, rpc_len); if (rc) @@ -488,7 +489,7 @@ static int rpc_handle_packet(npf_cache_t *npc, npf_session_t *se, { int rc; uint16_t port = 0; - const struct npf_alg *rpc = npf_alg_session_get_alg(se); + struct npf_alg *rpc = npf_alg_session_get_alg(se); rc = rpc_parse_packet(npc, se, nbuf, &port); if (rc) @@ -512,20 +513,41 @@ static void rpc_alg_inspect(npf_session_t *se, npf_cache_t *npc, /* ALG session initialization */ static int rpc_alg_session_init(npf_session_t *se, npf_cache_t *npc __unused, - struct npf_alg_tuple *nt, const int di __unused) + struct apt_tuple *nt, const int di __unused) { + npf_session_t *parent; + uint32_t alg_flags; + int rc = 0; npf_alg_session_set_inspect(se, true); - switch (nt->nt_flags & NPF_TUPLE_MATCH_MASK) { - case NPF_TUPLE_MATCH_PROTO_PORT: + + switch (apt_tuple_get_table_type(nt)) { + case APT_MATCH_DPORT: + /* Parent flow */ + break; + + case APT_MATCH_ANY_SPORT: + /* Child flow */ + parent = apt_tuple_get_active_session(nt); + if (!parent) { + rc = -ENOENT; + break; + } + + /* Transfer alg_flags from tuple to child session */ + alg_flags = apt_tuple_get_client_flags(nt); + npf_alg_session_set_flag(se, alg_flags); + + /* Link parent and child sessions */ + npf_session_link_child(parent, se); break; - case NPF_TUPLE_MATCH_ANY_SPORT: - /* Pass along the flags from the tuple */ - npf_alg_session_set_flag(se, nt->nt_alg_flags); - npf_session_link_child(nt->nt_se, se); + + default: + rc = -EINVAL; break; } - return 0; + + return rc; } /* ALG session destroy */ @@ -670,14 +692,19 @@ struct npf_alg *npf_alg_rpc_create_instance(struct npf_alg_instance *ai) void npf_alg_rpc_destroy_instance(struct npf_alg *rpc) { - if (rpc) { - rpc_destroy_list(rpc); - free(rpc->na_private); - rpc->na_private = NULL; - rpc->na_enabled = false; - rpc->na_ai = NULL; - - /* Release reference on an alg application instance */ - npf_alg_put(rpc); - } + if (!rpc) + return; + + /* Expire or delete tuples */ + alg_apt_instance_client_destroy(rpc->na_ai->ai_apt, rpc); + + rpc_destroy_list(rpc); + free(rpc->na_private); + + rpc->na_private = NULL; + rpc->na_enabled = false; + rpc->na_ai = NULL; + + /* Release reference on an alg application instance */ + npf_alg_put(rpc); } diff --git a/src/npf/alg/alg_sip.c b/src/npf/alg/alg_sip.c new file mode 100644 index 00000000..b18a4cfa --- /dev/null +++ b/src/npf/alg/alg_sip.c @@ -0,0 +1,917 @@ +/* + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2013-2016 by Brocade Communications Systems, Inc. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +/* + * NPF ALG for SIP + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "compiler.h" +#include "in_cksum.h" +#include "npf/npf.h" +#include "npf/alg/alg.h" +#include "npf/npf_cache.h" +#include "npf/npf_nat.h" +#include "npf/npf_session.h" +#include "urcu.h" +#include "util.h" +#include "vplane_log.h" + +#include "npf/alg/sip/sip.h" + +struct ifnet; +struct rte_mbuf; +struct sip_alg_request; + +/* default port */ +#define SIP_DEFAULT_PORT 5060 + +/* For one-time initialization of libosip. */ +static osip_t *sip_osip; + + +/* + * sip_addr_from_str() - Convert a string addr into an Ipv4 or IPv6 addr + */ +void sip_addr_from_str(const char *saddr, npf_addr_t *addr, uint8_t *alen) +{ + int af = AF_INET; + + if (strchr(saddr, ':')) + af = AF_INET6; + + *alen = 0; + if (inet_pton(af, saddr, addr)) { + if (af == AF_INET) + *alen = 4; + else + *alen = 16; + } +} + +/* + * Convert an address to an (allocated) string + */ +char *sip_addr_to_str(npf_addr_t *a, uint8_t alen) +{ + char buf[INET6_ADDRSTRLEN]; + int af; + + if (alen == 4) + af = AF_INET; + else if (alen == 16) + af = AF_INET6; + else + return NULL; + + if (inet_ntop(af, a, buf, sizeof(buf))) + return osip_strdup(buf); + return NULL; +} + +/* + * Convert a port to an (allocated) string + */ +char *sip_port_to_str(in_port_t n) +{ + char buf[8]; + int rc; + + rc = snprintf(buf, 8, "%hu", n); + if (rc < 0 || rc > 6) + return NULL; + return osip_strdup(buf); +} + +static int sip_alg_private_session_init(npf_session_t *se) +{ + struct sip_alg_session *ss; + + ss = npf_alg_session_get_private(se); + if (ss) + return -EINVAL; + ss = calloc(sizeof(struct sip_alg_session), 1); + if (!ss) + return -ENOMEM; + npf_alg_session_set_private(se, ss); + + return 0; +} + +static void sip_alg_private_session_free(npf_session_t *se) +{ + struct sip_alg_session *ss; + + + ss = npf_alg_session_get_private(se); + if (ss) + sip_expire_session_request(se); + free(ss); +} + +/* + * sip_alg_verify() - Some cursory checks before dealing with this packet. + */ +int sip_alg_verify(struct sip_alg_request *sr) +{ + /* + * We only check whether required headers have been + * parsed, we do not verify the contents. + */ + if (!sr->sr_sip->to) + return -EINVAL; + if (!sr->sr_sip->from) + return -EINVAL; + if (!sr->sr_sip->cseq) + return -EINVAL; + if (!sr->sr_sip->call_id) + return -EINVAL; + if (osip_list_size(&sr->sr_sip->vias) < 1) + return -EINVAL; + + return 0; +} + +/* Create an alg nat object */ +static struct npf_alg_nat * +sip_create_nat(vrfid_t vrfid, uint32_t flags, bool reserved, + npf_addr_t oaddr, in_port_t oport, + npf_addr_t taddr, in_port_t tport) +{ + struct npf_alg_nat *an = malloc(sizeof(struct npf_alg_nat)); + + if (an) { + an->an_oaddr = oaddr; + an->an_oport = oport; + an->an_taddr = taddr; + an->an_tport = tport; + an->an_flags = flags; + an->an_vrfid = vrfid; + if (reserved) + an->an_flags |= NPF_NAT_CLONE_APM | NPF_NAT_MAP_PORT; + } + return an; +} + +/* + * Called from sip_alg_session_init when an ALT_CNTL tuple is matched. + */ +static int sip_session_nat_alt_cntl(npf_session_t *se, npf_cache_t *npc, + const int di, struct apt_tuple *nt, + npf_session_t *parent) +{ + npf_nat_t *pnat; + npf_addr_t oaddr; + npf_addr_t taddr; + in_port_t oport, tport; + int ntype; + uint masq; + struct npf_alg_nat *an; + struct npf_alg *sip; + int rc; + + /* Only if parent is natted */ + pnat = npf_session_get_nat(parent); + if (!pnat) + return 0; + + /* Get parent NAT translation address and port */ + if (!npf_nat_info(pnat, &ntype, &taddr, &tport, &masq)) + return -EINVAL; + + /* Only for SNAT */ + if (ntype != NPF_NATOUT) + return -EINVAL; + + /* + * All we are doing here is creating a reverse nat using the + * parent's original src addr/port. We just want this flow to + * translate back to the original parent. We use a dummy tuple + * struct to pass the alg nat struct for nat creation. + */ + npf_nat_get_orig(pnat, &oaddr, &oport); + + an = sip_create_nat(npf_session_get_vrfid(se), NPF_NAT_REVERSE, + false, oaddr, oport, taddr, tport); + if (!an) + return -ENOMEM; + + /* Consumes 'an' if successful. */ + rc = npf_alg_session_nat(se, pnat, npc, di, NULL, an); + + if (!rc) { + sip = apt_tuple_get_client_handle(nt); + npf_nat_setalg(npf_session_get_nat(se), sip); + } else + free(an); + + return rc; +} + +/* + * calculate L4 + L3 checksum deltas. + */ +static void +sip_calculate_checksum_deltas(const void *oaddr, const void *naddr, + uint16_t oport, uint16_t nport, + uint16_t *l3_delta, uint16_t *l4_delta) +{ + const uint32_t *oip32 = oaddr; + const uint32_t *nip32 = naddr; + + uint16_t delta = ip_fixup32_cksum(0xffff, *oip32, *nip32); + *l3_delta = delta ^ 0xffff; + + delta = ip_fixup16_cksum(0xffff, oport, nport); + *l4_delta = delta ^ 0xffff; +} + +/* + * An RTP or RTCP tuple has been matched. Called from sip_alg_session_init. + * Compare with sip_session_nat_alt_cntl. + */ +static int sip_session_nat_media(npf_session_t *se, npf_cache_t *npc, + const int di, struct apt_tuple *nt) +{ + struct sip_tuple_data *td; + struct sip_alg_media *m = NULL; + struct npf_alg_nat *an; + npf_session_t *parent; + uint32_t nat_flags = 0; + uint32_t alg_flags; + int rc; + + parent = apt_tuple_get_active_session(nt); + if (!parent) + return -ENOENT; + + td = apt_tuple_get_client_data(nt); + alg_flags = apt_tuple_get_client_flags(nt); + + /* + * Create the nat(s). In SIP's case we always allocate a nat + * since we likely allocated consecutive rtp/rtcp ports. + * + * We have 4 (possible) cases to deal with. We don't know which + * direction the rtp and rtcp flows will originate from and + * we will add 4 tuples for those. + * + * Even though these might be forward flows that match a nat rule, + * we already allocated ports during control msg parsing. + * + * All we do here is create and set the nat struct, unless this + * is merely a stateful rule flow set. + */ + if (!(alg_flags & SIP_ALG_NAT) || !td) + return 0; + + /* We may have to reverse the nat */ + if (alg_flags & SIP_ALG_REVERSE) + nat_flags = NPF_NAT_REVERSE; + + + /* Select proper side, either invite or response */ + if (td_is_snat(td)) + m = td->td_mi; + else if (td_is_dnat(td)) + m = td->td_mr; + else + return -EINVAL; + + rc = -ENOMEM; + vrfid_t vrfid = npf_session_get_vrfid(se); + + switch (alg_flags & SIP_ALG_MASK) { + case SIP_ALG_RTP_FLOW: + an = sip_create_nat(vrfid, nat_flags, + m->m_rtp_reserved, + m->m_rtp_addr, htons(m->m_rtp_port), + m->m_trtp_addr, htons(m->m_trtp_port)); + if (an) { + rc = npf_alg_session_nat(se, npf_alg_parent_nat(parent), + npc, di, NULL, an); + if (!rc) + m->m_rtp_reserved = false; + else + free(an); + } + break; + case SIP_ALG_RTCP_FLOW: + an = sip_create_nat(vrfid, nat_flags, + m->m_rtcp_reserved, + m->m_rtcp_addr, htons(m->m_rtcp_port), + m->m_trtcp_addr, htons(m->m_trtcp_port)); + if (an) { + rc = npf_alg_session_nat(se, npf_alg_parent_nat(parent), + npc, di, NULL, an); + if (!rc) + m->m_rtcp_reserved = false; + else + free(an); + } + break; + default: + return -EINVAL; + } + + return rc; +} + +/* + * Manage the SIP message. Used both both NATd and non-NATd pkts. + * + * For non-NATd flow, tsr == sr. + */ +int sip_alg_manage_sip(npf_session_t *se, npf_cache_t *npc, + struct sip_alg_request *sr, + struct sip_alg_request *tsr, + npf_nat_t *nat, bool *consumed) +{ + int rc = -EINVAL; + + /* + * Handle (thus far) valid requests and responses, all garbage will + * result in a drop packet. + */ + if (MSG_IS_REQUEST(tsr->sr_sip)) + rc = sip_manage_request(se, npc, sr, tsr, nat, consumed); + else if (MSG_IS_RESPONSE(tsr->sr_sip)) + rc = sip_manage_response(se, npc, sr, tsr, nat); + + return rc; +} + +/* sip_alg_natout() - packet NAT (SNAT) out*/ +static int sip_alg_nat_out(npf_session_t *se, npf_cache_t *npc, + struct rte_mbuf *nbuf, npf_nat_t *ns) +{ + /* This can only be the SIP flow */ + return sip_alg_translate_packet(se, npc, ns, nbuf, PFIL_OUT); +} + +/* sip_alg_nat_in() - Packet NAT in */ +static int sip_alg_nat_in(npf_session_t *se, npf_cache_t *npc, + struct rte_mbuf *nbuf, npf_nat_t *ns) +{ + /* This can only be the SIP flow */ + return sip_alg_translate_packet(se, npc, ns, nbuf, PFIL_IN); +} + +/* + * Translate reply path *after* IP header has been translated. Called from + * the ao_inspect api function. + * + * While most SIP implementations set the VIA to match the port/addr of the + * initiator, the SIP RFC states that reply packets must be routed to the + * addr/port in the VIA. + * + * Newer Cisco phones implement the RFC exactly. They use a high numbered + * sport for sending out msgs and expect reply packets on the default SIP port + * (5060). + * + * The situation we have here, according to the RFC is: + * + * a1:p1 --> a2:p2 + * a3:p3 <-- + * + * But this screws up our session handles, which were added to a1:p1 - a2:p2, + * so we need to do this translation outside of the nat engine to maintain a + * sane view of session handles (as well as return the reply appropriately. + * + * Only do this for UDP. + */ +static void sip_translate_reply_path(npf_session_t *se, int di __unused, + struct rte_mbuf *nbuf, npf_cache_t *npc) +{ + /* + * This function is called from the ALG .inspect callback, and we dont + * know if this packet is SIP Request or a SIP Response. We only want + * to rewrite the IP dest for SIP Responses on the reply path. + */ + return; + + + struct sip_alg_session *ss = npf_alg_session_get_private(se); + void *n_ptr = npf_iphdr(nbuf); + struct udphdr *uh = &npc->npc_l4.udp; + + if (!ss) + return; + + /* Only if this is a response msg */ + if (npc->npc_alg_flags != SIP_NPC_RESPONSE) + return; + + /* Only udp */ + if (npf_cache_ipproto(npc) != IPPROTO_UDP) + return; + + /* + * Dont rewrite the IP header if we failed to get a return address + * from the Via in the Invite, e.g. it may have been a FQDN. + */ + if (ss->ss_via_alen == 0) + return; + + if (uh->dest == ss->ss_via_port) + return; /* Nothing to do */ + + /* Calculate the L3 and L4 checksum delta's */ + uint16_t l3_delta, l4_delta; + + sip_calculate_checksum_deltas(npf_cache_dstip(npc), &ss->ss_via_addr, + uh->dest, ss->ss_via_port, + &l3_delta, &l4_delta); + + /* + * re-write IP and UDP cksums first. + */ + if (npf_v4_rwrcksums(npc, nbuf, n_ptr, l3_delta, l4_delta) < 0) + return; + + /* Now translate */ + if (npf_rwrip(npc, nbuf, n_ptr, PFIL_IN, &ss->ss_via_addr) < 0) + return; + + /* Now the port */ + npf_rwrport(npc, nbuf, n_ptr, PFIL_IN, ss->ss_via_port); +} + +/* + * Inspect for non-NATd pkts + */ +static void sip_alg_inspect_packet(npf_session_t *se, npf_cache_t *npc, + struct rte_mbuf *nbuf, int di) +{ + struct sip_alg_request *sr; + struct npf_alg *sip = npf_alg_session_get_alg(se); + bool consumed = false; + + sr = sip_alg_parse(sip, npc, npf_session_get_if_index(se), nbuf); + if (!sr) + return; + + if (sip_alg_verify(sr)) { + sip_alg_request_free(sip, sr); + return; + } + + sip_init_nat(sr, false, NULL, NULL, 0, 0, di); + + sip_alg_manage_sip(se, npc, sr, sr, NULL, &consumed); + + if (!consumed) + sip_alg_request_free(sip, sr); +} + +/* + * Inspect (mostly) non-NATd flow. + */ +static void sip_alg_inspect(npf_session_t *se, npf_cache_t *npc, + struct rte_mbuf *nbuf, struct ifnet *ifp __unused, + int di) +{ + uint32_t flags = npf_alg_session_get_flags(se); + + /* sanity - can only be CNTL flow */ + if (!(flags & (SIP_ALG_CNTL_FLOW | SIP_ALG_ALT_CNTL_FLOW))) + return; + + /* + * In some curcumstances we want to adjust the packets destination IP + * address and port in the IP header of SIP Responses after NAT. + */ + if (npf_iscached(npc, NPC_NATTED)) { + sip_translate_reply_path(se, di, nbuf, npc); + return; + } + + /* + * Inspect for non-NATd pkts + */ + sip_alg_inspect_packet(se, npc, nbuf, di); +} + +/* + * New session has matched a tuple. + */ +static int sip_alg_session_init(npf_session_t *se, npf_cache_t *npc, + struct apt_tuple *nt, const int di) +{ + npf_session_t *parent; + uint32_t alg_flags; + int rc = 0; + + /* Transfer alg_flags from tuple to child session */ + alg_flags = apt_tuple_get_client_flags(nt); + npf_alg_session_set_flag(se, alg_flags); + + switch (alg_flags & SIP_ALG_MASK) { + case SIP_ALG_CNTL_FLOW: + npf_alg_session_set_inspect(se, true); + rc = sip_alg_private_session_init(se); + break; + + case SIP_ALG_ALT_CNTL_FLOW: + parent = apt_tuple_get_active_session(nt); + if (!parent) { + rc = -ENOENT; + break; + } + + npf_alg_session_set_inspect(se, true); + npf_alg_session_set_flag(se, SIP_ALG_REVERSE); + rc = sip_session_nat_alt_cntl(se, npc, di, nt, parent); + + if (!rc) + npf_session_link_child(parent, se); + break; + + case SIP_ALG_RTP_FLOW: + parent = apt_tuple_get_active_session(nt); + if (!parent) { + rc = -ENOENT; + break; + } + + rc = sip_session_nat_media(se, npc, di, nt); + if (!rc) { + struct sip_tuple_data *td; + + td = apt_tuple_get_client_data(nt); + sip_alg_create_rtcp_tuples(se, npc, td); + + npf_session_link_child(parent, se); + } + break; + + case SIP_ALG_RTCP_FLOW: + parent = apt_tuple_get_active_session(nt); + if (!parent) { + rc = -ENOENT; + break; + } + + rc = sip_session_nat_media(se, npc, di, nt); + if (!rc) + npf_session_link_child(parent, se); + break; + default: + rc = -EINVAL; + break; + } + + return rc; +} + +/* + * An SIP alg session has been expired. + * + * Expire any requests in the hash table that are associated with this + * session. We know this from the list of call IDs stored in the session + * context private data. + */ +static void sip_alg_session_expire(npf_session_t *se) +{ + if (npf_alg_session_test_flag(se, SIP_ALG_CNTL_FLOW)) + sip_expire_session_request(se); +} + +/* + * An SIP alg session is being destroyed. + */ +static void sip_alg_session_destroy(npf_session_t *se) +{ + if (npf_alg_session_test_flag(se, SIP_ALG_CNTL_FLOW)) + sip_alg_private_session_free(se); +} + +/* + * Called after NAT session is created, and IP header is translated. Called + * for first packet in a NATd flow. + * + * If its a parent session then sip_alg_session_init will already have set the + * SIP_ALG_CNTL flag. + */ +static void sip_alg_nat_inspect(npf_session_t *se, npf_cache_t *npc __unused, + npf_nat_t *nt, int di __unused) +{ + if (npf_alg_session_test_flag(se, SIP_ALG_CNTL_FLOW | + SIP_ALG_ALT_CNTL_FLOW)) + npf_nat_setalg(nt, npf_alg_session_get_alg(se)); +} + +/* sip_alg_config() - Config routine for sip */ +static int sip_alg_config(struct npf_alg *sip, int op, int argc, + char * const argv[]) +{ + struct npf_alg_config_item ci = { + .ci_flags = NPF_TUPLE_KEEP | NPF_TUPLE_MATCH_PROTO_PORT, + .ci_alg_flags = SIP_ALG_CNTL_FLOW + }; + int rc; + int i; + + /* Only ports, skip */ + if (strcmp(argv[0], "port") != 0) + return 0; + argc--; argv++; + + for (i = 0; i < argc; i++) { + ci.ci_datum = npf_port_from_str(argv[i]); + if (!ci.ci_datum) + continue; + + /* + * Treat ports are a protocol pair + * (Really should be separate CLI) + */ + ci.ci_proto = IPPROTO_UDP; + rc = npf_alg_manage_config_item(sip, &sip->na_configs[0], + op, &ci); + if (rc) + return rc; + + ci.ci_proto = IPPROTO_TCP; + rc = npf_alg_manage_config_item(sip, &sip->na_configs[0], + op, &ci); + if (rc) { + /* unwind if possible */ + ci.ci_proto = IPPROTO_UDP; + npf_alg_manage_config_item(sip, &sip->na_configs[0], + NPF_ALG_CONFIG_DELETE, &ci); + return rc; + } + } + + return 0; +} + +static void sip_alg_periodic(struct npf_alg *sip) +{ + sip_ht_gc(sip); +} + +static void +sip_alg_session_media_json(json_writer_t *json, struct sip_alg_media *m) +{ + char buf[INET6_ADDRSTRLEN]; + int af; + + jsonw_start_object(json); + + if (m->m_proto == sdp_proto_udp) + jsonw_string_field(json, "proto", "udp"); + else if (m->m_proto == sdp_proto_rtp) + jsonw_string_field(json, "proto", "rtp"); + else + jsonw_string_field(json, "proto", "unknown"); + + if (m->m_rtp_alen) { + af = (m->m_rtp_alen == 4) ? AF_INET : AF_INET6; + inet_ntop(af, &m->m_rtp_addr, buf, sizeof(buf)); + jsonw_string_field(json, "rtp_addr", buf); + } + if (m->m_rtp_port) + jsonw_uint_field(json, "rtp_port", m->m_rtp_port); + + if (m->m_rtcp_alen) { + af = (m->m_rtcp_alen == 4) ? AF_INET : AF_INET6; + inet_ntop(af, &m->m_rtcp_addr, buf, sizeof(buf)); + jsonw_string_field(json, "rtcp_addr", buf); + } + if (m->m_rtcp_port) + jsonw_uint_field(json, "rtcp_port", m->m_rtcp_port); + + if (m->m_trtp_alen) { + af = (m->m_trtp_alen == 4) ? AF_INET : AF_INET6; + inet_ntop(af, &m->m_trtp_addr, buf, sizeof(buf)); + jsonw_string_field(json, "trtp_addr", buf); + } + if (m->m_trtp_port) + jsonw_uint_field(json, "trtp_port", m->m_trtp_port); + + if (m->m_trtcp_alen) { + af = (m->m_trtcp_alen == 4) ? AF_INET : AF_INET6; + inet_ntop(af, &m->m_trtcp_addr, buf, sizeof(buf)); + jsonw_string_field(json, "trtcp_addr", buf); + } + if (m->m_trtcp_port) + jsonw_uint_field(json, "trtcp_port", m->m_trtcp_port); + + jsonw_end_object(json); +} + +static void +sip_alg_session_callid_json(json_writer_t *json, npf_session_t *se, + osip_call_id_t *call_id) +{ + char *number, *host; + struct npf_alg *sip = npf_alg_session_get_alg(se); + struct sip_alg_request *sr; + struct sip_alg_media *m, *tmp; + uint32_t if_idx = npf_session_get_if_index(se); + char buf[100]; + + number = osip_call_id_get_number(call_id); + host = osip_call_id_get_host(call_id); + + if (!number) + return; + + jsonw_start_object(json); + + if (!host) + snprintf(buf, sizeof(buf), "%s", number); + else + snprintf(buf, sizeof(buf), "%s@%s", number, host); + jsonw_string_field(json, "number", buf); + + sr = sip_request_lookup_by_call_id(sip, if_idx, call_id); + if (!sr) { + jsonw_end_object(json); + return; + } + + jsonw_name(json, "media"); + jsonw_start_array(json); + + cds_list_for_each_entry_safe(m, tmp, &sr->sr_media_list_head, m_node) + sip_alg_session_media_json(json, m); + + jsonw_end_array(json); + jsonw_end_object(json); +} + +static void +sip_alg_session_json(json_writer_t *json, npf_session_t *se) +{ + struct sip_alg_session *ss; + char buf[INET6_ADDRSTRLEN]; + + if (!json || !se) + return; + + ss = npf_alg_session_get_private(se); + if (!ss) + return; + + jsonw_name(json, "sip"); + jsonw_start_object(json); + + if (ss->ss_via_alen) + jsonw_string_field( + json, "via_addr", + inet_ntop(ss->ss_via_alen == 4 ? AF_INET : AF_INET6, + &ss->ss_via_addr, + buf, sizeof(buf))); + + if (ss->ss_via_port) + jsonw_uint_field(json, "via_port", ntohs(ss->ss_via_port)); + + if (ss->ss_call_id_count > 0) { + int i; + + jsonw_name(json, "callids"); + jsonw_start_array(json); + + for (i = 0; i < ss->ss_call_id_count; i++) + sip_alg_session_callid_json(json, se, + ss->ss_call_ids[i]); + + jsonw_end_array(json); + } + + jsonw_end_object(json); +} + +/* alg struct */ +static const struct npf_alg_ops sip_ops = { + .name = NPF_ALG_SIP_NAME, + .se_init = sip_alg_session_init, + .se_destroy = sip_alg_session_destroy, + .se_expire = sip_alg_session_expire, + .se_json = sip_alg_session_json, + .inspect = sip_alg_inspect, + .config = sip_alg_config, + .nat_inspect = sip_alg_nat_inspect, + .nat_in = sip_alg_nat_in, + .nat_out = sip_alg_nat_out, + .periodic = sip_alg_periodic, + .tuple_delete = sip_tuple_data_detach, +}; + +static const struct npf_alg_config_item sip_ports[] = { + { IPPROTO_TCP, (NPF_TUPLE_KEEP | NPF_TUPLE_MATCH_PROTO_PORT), + SIP_ALG_CNTL_FLOW, SIP_DEFAULT_PORT }, + { IPPROTO_UDP, (NPF_TUPLE_KEEP | NPF_TUPLE_MATCH_PROTO_PORT), + SIP_ALG_CNTL_FLOW, SIP_DEFAULT_PORT }, +}; + +struct npf_alg *npf_alg_sip_create_instance(struct npf_alg_instance *ai) +{ + struct npf_alg *sip; + struct sip_private *sp = NULL; + int rc = -ENOMEM; + + sip = npf_alg_create_alg(ai, NPF_ALG_ID_SIP); + if (!sip) + goto bad; + + sip->na_ops = &sip_ops; + + /* setup default config */ + sip->na_num_configs = 1; + sip->na_configs[0].ac_items = sip_ports; + sip->na_configs[0].ac_item_cnt = ARRAY_SIZE(sip_ports); + sip->na_configs[0].ac_handler = npf_alg_port_handler; + + sp = zmalloc_aligned(sizeof(struct sip_private)); + if (!sp) + goto bad; + + rc = sip_ht_create(sp); + if (rc < 0) + goto bad; + + rte_spinlock_init(&sp->sp_media_lock); + CDS_INIT_LIST_HEAD(&sp->sp_dead_media); + + sip->na_private = sp; + + rc = npf_alg_register(sip); + if (rc) + goto bad; + + /* Take reference on an alg application instance */ + npf_alg_get(sip); + + return sip; + +bad: + if (net_ratelimit()) + RTE_LOG(ERR, FIREWALL, "ALG: SIP instance failed: %d\n", rc); + + if (sp && sp->sp_ht) + cds_lfht_destroy(sp->sp_ht, NULL); + free(sp); + free(sip); + return NULL; +} + +/* + * Destroy - we are guaranteed no access and a rcu quiesce period has + * passed. + */ +void npf_alg_sip_destroy_instance(struct npf_alg *sip) +{ + if (!sip) + return; + + + /* Expire or delete tuples */ + alg_apt_instance_client_destroy(sip->na_ai->ai_apt, sip); + + sip_destroy_ht(sip); + + free(sip->na_private); + sip->na_private = NULL; + + sip->na_enabled = false; + sip->na_ai = NULL; + + /* Release reference on an alg application instance */ + npf_alg_put(sip); +} + +/* + * Constructor for one-time libosip initialization + */ +static void npf_alg_sip_init(void) __attribute__ ((__constructor__)); + +static void npf_alg_sip_init(void) +{ + osip_init(&sip_osip); +} diff --git a/src/npf/alg/npf_alg_tftp.c b/src/npf/alg/alg_tftp.c similarity index 77% rename from src/npf/alg/npf_alg_tftp.c rename to src/npf/alg/alg_tftp.c index c69b36a8..32099056 100644 --- a/src/npf/alg/npf_alg_tftp.c +++ b/src/npf/alg/alg_tftp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2013-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -23,7 +23,7 @@ #include "compiler.h" #include "npf/npf.h" -#include "npf/alg/npf_alg_private.h" +#include "npf/alg/alg.h" #include "npf/npf_cache.h" #include "npf/npf_nat.h" #include "npf/npf_session.h" @@ -55,7 +55,7 @@ static int tftp_alg_config(struct npf_alg *tftp, int op, int argc, .ci_flags = (NPF_TUPLE_KEEP | NPF_TUPLE_MATCH_PROTO_PORT) }; - if (strcmp(argv[0], "port")) + if (strcmp(argv[0], "port") != 0) return -EINVAL; argc--; argv++; @@ -72,36 +72,37 @@ static int tftp_alg_config(struct npf_alg *tftp, int op, int argc, } /* Create and insert a tuple for an expected flow */ -static int tftp_alg_tuple_insert(const struct npf_alg *tftp, +static int tftp_alg_tuple_insert(struct npf_alg *tftp, npf_cache_t *npc, npf_session_t *se, const npf_addr_t *saddr, in_port_t sport, const npf_addr_t *daddr, in_port_t dport, uint32_t alg_flags) { - struct npf_alg_tuple *nt; - int rc = -ENOMEM; - - nt = npf_alg_tuple_alloc(); - if (nt) { - nt->nt_se = se; - nt->nt_flags = NPF_TUPLE_MATCH_ANY_SPORT; - nt->nt_proto = IPPROTO_UDP; - nt->nt_ifx = npf_session_get_if_index(se); - nt->nt_alg = tftp; - nt->nt_alg_flags = alg_flags; - nt->nt_alen = npc->npc_alen; - nt->nt_dport = dport; - nt->nt_sport = sport; - nt->nt_timeout = 10; - memcpy(&nt->nt_srcip, saddr, nt->nt_alen); - memcpy(&nt->nt_dstip, daddr, nt->nt_alen); - rc = npf_alg_tuple_add_replace(tftp->na_ai, nt); - if (rc) { - npf_alg_tuple_free(nt); - RTE_LOG(ERR, FIREWALL, "TFTP: tuple insert:%d\n", rc); - } + struct apt_match_key m = { 0 }; + struct apt_tuple *at; + + m.m_proto = IPPROTO_UDP; + m.m_ifx = npf_session_get_if_index(se); + m.m_alen = npc->npc_alen; + m.m_dport = dport; + m.m_sport = sport; + m.m_dstip = daddr; + m.m_srcip = saddr; + m.m_match = APT_MATCH_ANY_SPORT; + + at = apt_tuple_create_and_insert(tftp->na_ai->ai_apt, &m, + npf_alg_get(tftp), + alg_flags, NPF_ALG_TFTP_NAME, + true, false); + + if (!at) { + npf_alg_put(tftp); + RTE_LOG(ERR, FIREWALL, "TFTP: tuple insert\n"); + return -EINVAL; } - return rc; + apt_tuple_set_session(at, se); + + return 0; } /* @@ -150,7 +151,7 @@ static int tftp_alg_nat_out(npf_session_t *se, npf_cache_t *npc, struct rte_mbuf *nbuf __unused, npf_nat_t *nat) { npf_addr_t taddr; - const struct npf_alg *tftp = npf_alg_session_get_alg(se); + struct npf_alg *tftp = npf_alg_session_get_alg(se); in_port_t tport; bool insert = false; int rc; @@ -171,7 +172,7 @@ static int tftp_alg_nat_in(npf_session_t *se, npf_cache_t *npc, struct rte_mbuf *nbuf __unused, npf_nat_t *nat) { npf_addr_t addr; - const struct npf_alg *tftp = npf_alg_session_get_alg(se); + struct npf_alg *tftp = npf_alg_session_get_alg(se); in_port_t port; struct udphdr *uh = &npc->npc_l4.udp; bool insert = false; @@ -193,7 +194,7 @@ static int tftp_alg_nat_in(npf_session_t *se, npf_cache_t *npc, * first data packet - we need the server src port */ static int tftp_create_nat(npf_session_t *se, npf_nat_t *pnat, npf_cache_t *npc, - const int di, struct npf_alg_tuple *nt) + const int di, struct apt_tuple *nt) { struct npf_ports *p; npf_addr_t taddr; @@ -201,9 +202,13 @@ static int tftp_create_nat(npf_session_t *se, npf_nat_t *pnat, npf_cache_t *npc, in_port_t oport; in_port_t tport; struct npf_alg_nat *an; + uint32_t alg_flags; + int rc; + + alg_flags = apt_tuple_get_client_flags(nt); /* Ignore stateful sessions */ - if (!(nt->nt_alg_flags & (TFTP_ALG_SNAT | TFTP_ALG_DNAT))) + if (!(alg_flags & (TFTP_ALG_SNAT | TFTP_ALG_DNAT))) return 0; an = zmalloc_aligned(sizeof(struct npf_alg_nat)); @@ -220,17 +225,22 @@ static int tftp_create_nat(npf_session_t *se, npf_nat_t *pnat, npf_cache_t *npc, an->an_oaddr = oaddr; an->an_vrfid = npf_session_get_vrfid(se); - if (nt->nt_alg_flags & TFTP_ALG_DNAT) { + if (alg_flags & TFTP_ALG_DNAT) { /* Only translate the address, port comes from server */ an->an_tport = an->an_oport = p->s_port; - } else if (nt->nt_alg_flags & TFTP_ALG_SNAT) { + } else if (alg_flags & TFTP_ALG_SNAT) { /* Translate both addr and port */ an->an_tport = tport; an->an_oport = oport; } - nt->nt_nat = an; - return npf_alg_session_nat(se, pnat, npc, di, nt); + /* Consumes 'an' if successful. */ + rc = npf_alg_session_nat(se, pnat, npc, di, NULL, an); + + if (rc < 0) + free(an); + + return rc; } /* Nat inspect */ @@ -247,7 +257,7 @@ static void tftp_alg_inspect(npf_session_t *se, npf_cache_t *npc, struct rte_mbuf *nbuf, struct ifnet *ifp __unused, int di __unused) { - const struct npf_alg *tftp = npf_alg_session_get_alg(se); + struct npf_alg *tftp = npf_alg_session_get_alg(se); struct udphdr *uh = &npc->npc_l4.udp; bool insert = false; @@ -269,23 +279,38 @@ static void tftp_alg_inspect(npf_session_t *se, npf_cache_t *npc, * Session init */ static int tftp_alg_session_init(npf_session_t *se, npf_cache_t *npc, - struct npf_alg_tuple *nt, const int di) + struct apt_tuple *nt, const int di) { + npf_session_t *parent; int rc = 0; npf_alg_session_set_inspect(se, true); - switch (nt->nt_flags & NPF_TUPLE_MATCH_MASK) { - case NPF_TUPLE_MATCH_PROTO_PORT: /* parent flow */ + switch (apt_tuple_get_table_type(nt)) { + case APT_MATCH_DPORT: + /* Parent flow */ npf_alg_session_set_flag(se, TFTP_ALG_CNTL); break; - case NPF_TUPLE_MATCH_ANY_SPORT: /* child flow */ - rc = tftp_create_nat(se, npf_alg_parent_nat(nt->nt_se), - npc, di, nt); + + case APT_MATCH_ANY_SPORT: + /* Child flow */ + parent = apt_tuple_get_active_session(nt); + if (!parent) { + rc = -ENOENT; + break; + } + + rc = tftp_create_nat(se, npf_alg_parent_nat(parent), + npc, di, nt); if (!rc) - npf_session_link_child(nt->nt_se, se); + npf_session_link_child(parent, se); + break; + + default: + rc = -EINVAL; break; } + return rc; } @@ -341,11 +366,15 @@ struct npf_alg *npf_alg_tftp_create_instance(struct npf_alg_instance *ai) void npf_alg_tftp_destroy_instance(struct npf_alg *tftp) { - if (tftp) { - tftp->na_enabled = false; - tftp->na_ai = NULL; - /* Release reference on an alg application instance */ - npf_alg_put(tftp); - } + if (!tftp) + return; + + /* Expire or delete tuples */ + alg_apt_instance_client_destroy(tftp->na_ai->ai_apt, tftp); + + tftp->na_enabled = false; + tftp->na_ai = NULL; + /* Release reference on an alg application instance */ + npf_alg_put(tftp); } diff --git a/src/npf/alg/npf_alg_private.c b/src/npf/alg/npf_alg_private.c deleted file mode 100644 index 39155189..00000000 --- a/src/npf/alg/npf_alg_private.c +++ /dev/null @@ -1,2008 +0,0 @@ -/* - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. - * Copyright (c) 2016 by Brocade Communications Systems, Inc. - * All rights reserved. - */ - -/*- - * Copyright (c) 2010 The NetBSD Foundation, Inc. - * Copyright (c) 2013-2016 by Brocade Communications Systems, Inc. - * All rights reserved. - * - * SPDX-License-Identifier: (LGPL-2.1-only AND BSD-2-Clause-NETBSD) - * - * Substantially re-written from the original BSD source by Brocade. - * - * This material is based upon work partially supported by The - * NetBSD Foundation under a contract with Mindaugas Rasiukevicius. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * NPF interface for application level gateways (ALGs). - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "if_var.h" -#include "json_writer.h" -#include "npf/npf.h" -#include "npf/alg/npf_alg_private.h" -#include "npf/npf_nat.h" -#include "npf/npf_session.h" -#include "npf/npf_cache.h" -#include "npf/npf_vrf.h" -#include "vplane_log.h" -#include "vrf.h" - -struct rte_mbuf; - -/* Minimum lifetime for a tuple */ -#define NPF_ALG_MIN_TIMEOUT 5 - -/* Retry count for tuple insertions. */ -#define NPF_ALG_RETRY_COUNT 10 - -/* ALG periodic timer - for GC */ -static struct rte_timer alg_timer; -#define ALG_INTERVAL 5 - -/* A zero addr */ -static npf_addr_t zero_addr; - -/* - * We need to store disable requests for ALGs in VRFs not yet seen. - * So we have a hash for each unseen VRF (by external-id), that - * points to a list of algs which should be disabled. - */ -struct alg_late_vrf { - struct alg_late_vrf *nv_prev; - struct alg_late_vrf *nv_next; - char nv_key[32]; - uint32_t nv_vrfid; - zhash_t *nv_algs; -}; - -static struct alg_late_vrf *alg_late_vrfs; -static zhash_t *alg_late_vrf_hash; - -static void -npf_alg_late_vrfs_destroy(void) -{ - struct alg_late_vrf *late_vrf; - struct alg_late_vrf *next_vrf; - - for (late_vrf = alg_late_vrfs; late_vrf; late_vrf = next_vrf) { - next_vrf = late_vrf->nv_next; - zhash_destroy(&late_vrf->nv_algs); - - zhash_delete(alg_late_vrf_hash, late_vrf->nv_key); - } - - alg_late_vrfs = NULL; -} - -static struct alg_late_vrf * -npf_alg_late_vrf_find(uint32_t ext_vrfid) -{ - char hash_key[32]; - snprintf(hash_key, sizeof(hash_key), "%x", ext_vrfid); - - return zhash_lookup(alg_late_vrf_hash, hash_key); -} - -static struct alg_late_vrf * -npf_alg_late_vrf_add(uint32_t ext_vrfid) -{ - struct alg_late_vrf *late_vrf = malloc(sizeof(*late_vrf)); - if (!late_vrf) - return NULL; - - late_vrf->nv_vrfid = ext_vrfid; - late_vrf->nv_algs = zhash_new(); - snprintf(late_vrf->nv_key, sizeof(late_vrf->nv_key), "%x", ext_vrfid); - - late_vrf->nv_prev = NULL; - late_vrf->nv_next = alg_late_vrfs; - if (alg_late_vrfs) - alg_late_vrfs->nv_prev = late_vrf; - alg_late_vrfs = late_vrf; - - zhash_insert(alg_late_vrf_hash, late_vrf->nv_key, late_vrf); - - return late_vrf; -} - -static void -npf_alg_late_vrf_del(struct alg_late_vrf *late_vrf) -{ - if (!late_vrf) - return; - - if (late_vrf->nv_next) - late_vrf->nv_next->nv_prev = late_vrf->nv_prev; - if (late_vrf->nv_prev) - late_vrf->nv_prev->nv_next = late_vrf->nv_next; - else - alg_late_vrfs = late_vrf->nv_next; - - zhash_destroy(&late_vrf->nv_algs); - - zhash_delete(alg_late_vrf_hash, late_vrf->nv_key); -} - -static void -npf_alg_late_vrf_set_alg(struct alg_late_vrf *late_vrf, char const *name, - bool on) -{ - bool *alg_on = malloc(sizeof(*alg_on)); - if (!alg_on) - return; - - *alg_on = on; - - int rc = zhash_insert(late_vrf->nv_algs, name, alg_on); - if (!rc) - return; /* Insert ok */ - - free(alg_on); - - /* Duplicate - probably will never occur */ - if (rc == -1) { - alg_on = zhash_lookup(late_vrf->nv_algs, name); - if (alg_on) - *alg_on = on; - } -} - -/* - * ALG tuple hash table. - * - * The ALG framework consists of an API, executed at certain points - * along a packets path throughout NPF, as well as an expected flow - * tuple database. - * - * The tuple database consists of a specialized hash table. A set of - * hash tables are associated with a IP protocol, each table representing - * the type of tuple match. The tables represent 'wildcard' matching of - * various parts of a possible 6-tuple (proto/interface/addrs/ports). - * - * Tuple matching is a perfect candidate for a grouper2 match, however - * grouper2 does not allow for dynamic sorted-insertion/deletion of rows during - * runtime. Matches must be made in a 'most-restrictive' - * to 'least-restrictive' manner, meaning a match for a 4-5 tuple must - * be made prior to a match for a 3 tuple within the same protocol. - * - * When a packet enters the framework, a lookup into its protocol struct - * is performed and if a match is made, the packet is forwarded to the - * alg set in the tuple. Matches are made against the incoming packet's - * 'npc' struct directly. - * - * When algs register with the framework, they tell the framework which - * IP protocols they use, and the framework initializes the protocols if - * needed. Various ALGs may share the same IP protocols, they are not - * unique to an ALG. - * - * During a disable, delete all alg-specific tuples. We also set - * a disable flag on the alg struct so future incoming packets are - * prevented from reaching the alg. This mechanism also allows packets - * in-flight at the time of the disable to complete their path through - * the alg. - * - * On an enable, the config system will send down all configuration data - * to all ALGs and they will re-populate the expected tuples. - */ - -#define APT_INIT 32 -#define APT_MIN 128 -#define APT_MAX (8*1024) - -/* Max number of nodes in a protocol, inserts fail after this */ -#define APT_MAX_NODES (64*1024) - -/* For hash table matching */ -struct apt_match { - npf_addr_t *m_srcip; - npf_addr_t *m_dstip; - uint32_t m_ifx; - in_port_t m_dport; - in_port_t m_sport; - uint16_t m_flag; - uint8_t m_proto; - uint8_t m_alen; -}; - -/* For a walking the protos to reset an alg */ -struct alg_walk_params { - struct npf_alg *ap_alg; - bool ap_all; -}; - -/* typedef for a list walking function */ -typedef void (algwalk_t)(struct alg_ht *ht, struct npf_alg_tuple *, void *); - -/* Set ALG private data */ -void -npf_alg_session_set_private(struct npf_session *se, void *data) -{ - struct npf_session_alg *sa = npf_session_get_alg_ptr(se); - - if (sa) - sa->sa_private = data; -} - -/* Get ALG private data */ -void * -npf_alg_session_get_private(const struct npf_session *se) -{ - struct npf_session_alg *sa = npf_session_get_alg_ptr(se); - - if (sa) - return sa->sa_private; - return NULL; -} - -/* Get previous ALG private data, and set new value as one operation */ -void * -npf_alg_session_get_and_set_private(const npf_session_t *se, void *data) -{ - struct npf_session_alg *sa = npf_session_get_alg_ptr(se); - if (sa) - return rcu_xchg_pointer(&(sa->sa_private), data); - return NULL; -} - -/* Test flag */ -int -npf_alg_session_test_flag(const struct npf_session *se, uint32_t flag) -{ - struct npf_session_alg *sa = npf_session_get_alg_ptr(se); - - if (sa) - return sa->sa_flags & flag; - return 0; -} - -/* Set flag */ -void -npf_alg_session_set_flag(struct npf_session *se, uint32_t flag) -{ - struct npf_session_alg *sa = npf_session_get_alg_ptr(se); - - if (sa) - sa->sa_flags |= flag; -} - -/* Get all flags */ -uint32_t -npf_alg_session_get_flags(const struct npf_session *se) -{ - struct npf_session_alg *sa = npf_session_get_alg_ptr(se); - - if (sa) - return sa->sa_flags; - return 0; -} - -/* Get inspect */ -bool -npf_alg_session_inspect(struct npf_session *se) -{ - struct npf_session_alg *sa = npf_session_get_alg_ptr(se); - - if (sa) - return sa->sa_inspect; - return false; -} - -/* Set inspect */ -void -npf_alg_session_set_inspect(struct npf_session *se, bool v) -{ - struct npf_session_alg *sa = npf_session_get_alg_ptr(se); - - if (sa) - sa->sa_inspect = v; -} - -/* Get the alg from this session */ -const struct npf_alg * -npf_alg_session_get_alg(const struct npf_session *se) -{ - struct npf_session_alg *sa = npf_session_get_alg_ptr(se); - - if (sa) - return sa->sa_alg; - return NULL; -} - -/* - * Allocate ALG data on the session handle - */ -int -npf_alg_session_set_alg(struct npf_session *se, const struct npf_alg *alg) -{ - - struct npf_session_alg *sa = malloc(sizeof(struct npf_session_alg)); - - if (!sa) - return -ENOMEM; - - sa->sa_alg = npf_alg_get((struct npf_alg *)alg); - sa->sa_private = NULL; - sa->sa_flags = 0; - sa->sa_inspect = false; - - npf_session_set_alg_ptr(se, sa); - - return 0; -} - -/* Get a proto struct */ -struct alg_protocol_tuples *alg_get_apt(struct npf_alg_instance *ai, - uint8_t proto) -{ - if (proto <= NPF_ALG_MAX_PROTOS) - return rcu_dereference(ai->ai_apts[proto]); - return NULL; -} - -/* Allocate an apt */ -static struct alg_protocol_tuples *apt_alloc(void) -{ - struct alg_protocol_tuples *apt = - zmalloc_aligned(sizeof(struct alg_protocol_tuples)); - if (!apt) - return NULL; - - rte_spinlock_init(&apt->apt_lock); - - apt->apt_any_sport.a_ht = cds_lfht_new(APT_INIT, APT_MIN, APT_MAX, - CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING, NULL); - if (!apt->apt_any_sport.a_ht) - goto out; - - apt->apt_all.a_ht = cds_lfht_new(APT_INIT, APT_MIN, APT_MAX, - CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING, NULL); - if (!apt->apt_all.a_ht) - goto any_sport; - - apt->apt_port.a_ht = cds_lfht_new(APT_INIT, APT_MIN, APT_MAX, - CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING, NULL); - if (!apt->apt_port.a_ht) - goto all; - - apt->apt_proto.a_ht = cds_lfht_new(APT_INIT, APT_MIN, APT_MAX, - CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING, NULL); - if (!apt->apt_proto.a_ht) - goto port; - - return apt; - -port: - cds_lfht_destroy(apt->apt_port.a_ht, NULL); -all: - cds_lfht_destroy(apt->apt_all.a_ht, NULL); -any_sport: - cds_lfht_destroy(apt->apt_any_sport.a_ht, NULL); -out: - free(apt); - return NULL; -} - -/* Get the specific hash table based on flag */ -static struct alg_ht *apt_ht(struct alg_protocol_tuples *apt, uint16_t flag) -{ - struct alg_ht *a; - - switch (flag & NPF_TUPLE_MATCH_MASK) { - case NPF_TUPLE_MATCH_ANY_SPORT: - a = &apt->apt_any_sport; - break; - case NPF_TUPLE_MATCH_ALL: - a = &apt->apt_all; - break; - case NPF_TUPLE_MATCH_PROTO_PORT: - a = &apt->apt_port; - break; - case NPF_TUPLE_MATCH_PROTO: - a = &apt->apt_proto; - break; - default: - a = NULL; - } - - return a; -} - -/* Matching function */ -static int apt_matcher(struct cds_lfht_node *node, const void *key) -{ - const struct apt_match *m = key; - struct npf_alg_tuple *nt; - - nt = caa_container_of(node, struct npf_alg_tuple, nt_node); - - /* Never return if in expired state */ - if (nt->nt_flags & NPF_TUPLE_EXPIRED) - return 0; - - /* interface index, optional */ - if (nt->nt_ifx && (nt->nt_ifx != m->m_ifx)) - return 0; - - /* flag */ - if (!(nt->nt_flags & m->m_flag)) - return 0; - - switch (m->m_flag) { - case NPF_TUPLE_MATCH_ANY_SPORT: - case NPF_TUPLE_MATCH_ALL: - if (m->m_alen != nt->nt_alen) - return 0; - if (m->m_flag == NPF_TUPLE_MATCH_ALL) - if (nt->nt_sport != m->m_sport) - return 0; - if (nt->nt_dport != m->m_dport) - return 0; - if (memcmp(&nt->nt_srcip, m->m_srcip, m->m_alen)) - return 0; - if (memcmp(&nt->nt_dstip, m->m_dstip, m->m_alen)) - return 0; - break; - case NPF_TUPLE_MATCH_PROTO_PORT: - if (nt->nt_dport != m->m_dport) - return 0; - break; - case NPF_TUPLE_MATCH_PROTO: - if (nt->nt_proto != m->m_proto) - return 0; - break; - default: - return 0; /* wtf?? */ - } - - return 1; -} - -/* Hash table node count */ -static inline int64_t apt_ht_count(struct alg_ht *a) -{ - return rte_atomic64_read(&a->a_cnt); -} - -/* Protocol node count */ -static int64_t apt_count(struct alg_protocol_tuples *apt) -{ - int64_t count = apt_ht_count(&apt->apt_all); - - count += apt_ht_count(&apt->apt_port); - count += apt_ht_count(&apt->apt_proto); - return count; -} - -/* Hash generator */ -static unsigned long apt_ht_hash(struct apt_match *m) -{ - uint32_t hash; - const uint32_t *src; - const uint32_t *dst; - - switch (m->m_flag) { - case NPF_TUPLE_MATCH_PROTO: - hash = m->m_proto; - break; - case NPF_TUPLE_MATCH_PROTO_PORT: - hash = (m->m_dport << 16) | m->m_proto; - break; - case NPF_TUPLE_MATCH_ANY_SPORT: /* Fall through */ - case NPF_TUPLE_MATCH_ALL: - src = m->m_srcip->s6_addr32; - dst = m->m_dstip->s6_addr32; - /* Don't use sport, it can be wildcarded */ - hash = rte_jhash_2words(m->m_dport, m->m_proto, 0); - - if (m->m_alen == 4) - return rte_jhash_2words(src[0], dst[0], hash); - - const uint32_t sz = m->m_alen >> 2; - - hash = rte_jhash_32b(src, sz, hash); - hash = rte_jhash_32b(dst, sz, hash); - break; - default: - hash = 0; - break; - } - - assert(hash); - - return (unsigned long) hash; -} - -/* Search a list for a match */ -static struct npf_alg_tuple *apt_search_ht(struct alg_ht *a, - struct apt_match *m) -{ - struct npf_alg_tuple *nt; - struct cds_lfht_iter iter; - struct cds_lfht_node *node; - - cds_lfht_lookup(a->a_ht, apt_ht_hash(m), apt_matcher, m, &iter); - node = cds_lfht_iter_get_node(&iter); - if (node) - nt = caa_container_of(node, struct npf_alg_tuple, nt_node); - else - nt = NULL; - - return nt; -} - -/* Fill a match struct based on npc */ -static void alg_fill_match(npf_cache_t *npc, uint8_t proto, - uint32_t ifx, struct apt_match *m) -{ - /* Fill in the match struct */ - m->m_ifx = ifx; - m->m_proto = proto; - m->m_srcip = npf_cache_srcip(npc); - m->m_dstip = npf_cache_dstip(npc); - m->m_alen = npc->npc_alen; - - /* Get ports if applicable */ - if (npf_iscached(npc, NPC_L4PORTS)) { - struct npf_ports *ports = &npc->npc_l4.ports; - - m->m_dport = ports->d_port; - m->m_sport = ports->s_port; - } else { - m->m_dport = 0; - m->m_sport = 0; - } -} - -/* lookup by npc */ -static struct npf_alg_tuple *alg_lookup(struct npf_alg_instance *ai, - npf_cache_t *npc, uint32_t ifx) -{ - struct alg_protocol_tuples *apt; - struct npf_alg_tuple *nt; - struct apt_match m; - uint8_t proto = npf_cache_ipproto(npc); - - apt = alg_get_apt(ai, proto); - if (!apt) - return NULL; - - if (!apt_ht_count(&apt->apt_port) && !apt_ht_count(&apt->apt_proto)) - return NULL; - - alg_fill_match(npc, proto, ifx, &m); - - /* Search on dport */ - m.m_flag = NPF_TUPLE_MATCH_PROTO_PORT; - nt = apt_search_ht(&apt->apt_port, &m); - if (nt) - return nt; - - /* Search on proto */ - m.m_flag = NPF_TUPLE_MATCH_PROTO; - nt = apt_search_ht(&apt->apt_proto, &m); - - return nt; -} - -/* Lookup by tuple */ -struct npf_alg_tuple * -npf_alg_tuple_lookup(struct npf_alg_instance *ai, struct npf_alg_tuple *nt) -{ - struct alg_protocol_tuples *apt; - struct apt_match m; - struct alg_ht *a; - struct cds_lfht_node *node; - struct cds_lfht_iter iter; - struct npf_alg_tuple *result = NULL; - - apt = alg_get_apt(ai, nt->nt_proto); - if (!apt) - return NULL; - - /* Fill in the match struct */ - m.m_flag = (nt->nt_flags & NPF_TUPLE_MATCH_MASK); - m.m_ifx = nt->nt_ifx; - m.m_proto = nt->nt_proto; - m.m_dport = nt->nt_dport; - m.m_sport = nt->nt_sport; - m.m_srcip = &nt->nt_srcip; - m.m_dstip = &nt->nt_dstip; - m.m_alen = nt->nt_alen; - - a = apt_ht(apt, nt->nt_flags); - if (a) { - cds_lfht_lookup(a->a_ht, apt_ht_hash(&m), apt_matcher, - &m, &iter); - node = cds_lfht_iter_get_node(&iter); - if (node) { - result = caa_container_of(node, struct npf_alg_tuple, - nt_node); - } - } - - return result; -} - -/* Lookup by npc */ -struct npf_alg_tuple *alg_lookup_npc(struct npf_alg_instance *ai, - npf_cache_t *npc, uint32_t ifx) -{ - struct npf_alg_tuple *nt = npf_cache_get_tuple(npc); - - if (!npf_iscached(npc, NPC_ALG_TLUP)) { - npc->npc_info |= NPC_ALG_TLUP; - nt = alg_lookup(ai, npc, ifx); - npf_cache_set_tuple(npc, (void *)nt); - } - return nt; -} - -/* Free tuple */ -static void apt_free_tuple(struct rcu_head *head) -{ - struct npf_alg_tuple *nt = caa_container_of(head, - struct npf_alg_tuple, nt_rcu_head); - free(nt); -} - - -static void apt_release_node_nat(struct npf_alg_tuple *nt) -{ - npf_nat_t *nat; - npf_natpolicy_t *np; - npf_rule_t *rl; - - if (nt->nt_nat) { - nat = npf_session_get_nat(nt->nt_se); - np = npf_nat_get_policy(nat); - rl = npf_nat_get_rule(nat); - npf_nat_free_map(np, rl, NPF_NAT_MAP_PORT, nt->nt_nat->an_vrfid, - nt->nt_nat->an_taddr, nt->nt_nat->an_tport); - } - free(nt->nt_nat); -} - -static void apt_release_node(struct npf_alg_tuple *nt) -{ - if (nt) { - if (nt->nt_data && nt->nt_reap) - nt->nt_reap(nt->nt_data); - if (nt->nt_nat) - apt_release_node_nat(nt); - call_rcu(&nt->nt_rcu_head, apt_free_tuple); - } -} - -/* Set tuple expired */ -void apt_expire_tuple(struct npf_alg_tuple *nt) -{ - if (nt) { - struct alg_ht *a = rcu_dereference(nt->nt_aht); - uint16_t exp = nt->nt_flags & ~NPF_TUPLE_EXPIRED; - - /* Only expire once */ - if (a && rte_atomic16_cmpset(&nt->nt_flags, exp, - (exp | NPF_TUPLE_EXPIRED))) - rte_atomic64_dec(&a->a_cnt); - } -} - -/* internal delete tuple */ -static void apt_del_tuple(struct alg_ht *a, struct npf_alg_tuple *nt) -{ - if (a && !cds_lfht_del(a->a_ht, &nt->nt_node)) { - apt_expire_tuple(nt); - apt_release_node(nt); - } -} - -/* internal tuple insert */ -static int apt_insert_tuple(struct npf_alg_instance *ai, - struct npf_alg_tuple *nt, bool addreplace) -{ - struct alg_protocol_tuples *apt; - struct cds_lfht_node *node; - struct apt_match m; - int rc; - int retry; - struct alg_ht *a; - - /* Proto exists? */ - apt = alg_get_apt(ai, nt->nt_proto); - if (!apt) - return -ENOENT; - - /* At max? */ - if (unlikely(apt_count(apt) >= APT_MAX_NODES)) { - if (net_ratelimit()) - RTE_LOG(DEBUG, FIREWALL, - "ALG: Expected flow table full\n"); - return -ENOSPC; - } - - /* Fill in the match struct */ - m.m_flag = (nt->nt_flags & NPF_TUPLE_MATCH_MASK); - m.m_ifx = nt->nt_ifx; - m.m_proto = nt->nt_proto; - m.m_dport = nt->nt_dport; - m.m_sport = nt->nt_sport; - m.m_srcip = &nt->nt_srcip; - m.m_dstip = &nt->nt_dstip; - m.m_alen = nt->nt_alen; - - cds_lfht_node_init(&nt->nt_node); - - rc = -ENOENT; - a = apt_ht(apt, nt->nt_flags); - if (!a) - return rc; - - /* - * If 'addreplace', then the alg is attemping to replace an - * existing tuple. Do this by expiring the existing tuple and - * retrying for a limited number of times. - */ - rc = -EEXIST; - retry = NPF_ALG_RETRY_COUNT; - while (retry--) { - node = cds_lfht_add_unique(a->a_ht, apt_ht_hash(&m), - apt_matcher, &m, &nt->nt_node); - if (node == &nt->nt_node) { - rc = 0; - break; - } - - /* Expire if necessary */ - if (addreplace) - apt_expire_tuple(caa_container_of(node, - struct npf_alg_tuple, nt_node)); - else - break; - } - - if (!rc) { - rte_atomic64_inc(&a->a_cnt); - rcu_assign_pointer(nt->nt_aht, a); - } - - return rc; -} - -/* walk a proto list and apply 'func' */ -static void apt_walk_proto(struct alg_protocol_tuples *apt, algwalk_t func, - void *data) -{ - struct cds_lfht_iter iter; - struct npf_alg_tuple *nt; - - if (!apt) - return; - - cds_lfht_for_each_entry(apt->apt_any_sport.a_ht, &iter, nt, nt_node) { - func(&apt->apt_any_sport, nt, data); - } - - cds_lfht_for_each_entry(apt->apt_all.a_ht, &iter, nt, nt_node) { - func(&apt->apt_all, nt, data); - } - - cds_lfht_for_each_entry(apt->apt_port.a_ht, &iter, nt, nt_node) { - func(&apt->apt_port, nt, data); - } - - cds_lfht_for_each_entry(apt->apt_proto.a_ht, &iter, nt, nt_node) { - func(&apt->apt_proto, nt, data); - } -} - -/* reset tuples */ -static void apt_reset_tuples(struct alg_ht *a, struct npf_alg_tuple *nt, - void *data) -{ - struct alg_walk_params *ap = data; - - if (nt->nt_alg != ap->ap_alg) - return; - - /* delete all KEEP tuples, optionally delete ALL tuples */ - if (nt->nt_flags & NPF_TUPLE_KEEP) - apt_del_tuple(a, nt); - else if (ap->ap_all) - apt_expire_tuple(nt); -} - -/* delete all tuples */ -static void alg_del_tuples_all(struct alg_ht *a, struct npf_alg_tuple *nt, - void *data __unused) -{ - apt_del_tuple(a, nt); -} - -/* Destroy all apt hash tables */ -void alg_destroy_apts(struct npf_alg_instance *ai) -{ - int i; - - for (i = 0; i < NPF_ALG_MAX_PROTOS; i++) { - apt_walk_proto(ai->ai_apts[i], alg_del_tuples_all, NULL); - if (ai->ai_apts[i]) { - cds_lfht_destroy(ai->ai_apts[i]->apt_any_sport.a_ht, - NULL); - cds_lfht_destroy(ai->ai_apts[i]->apt_all.a_ht, NULL); - cds_lfht_destroy(ai->ai_apts[i]->apt_port.a_ht, NULL); - cds_lfht_destroy(ai->ai_apts[i]->apt_proto.a_ht, NULL); - - ai->ai_apts[i]->apt_any_sport.a_ht = NULL; - ai->ai_apts[i]->apt_all.a_ht = NULL; - ai->ai_apts[i]->apt_port.a_ht = NULL; - ai->ai_apts[i]->apt_proto.a_ht = NULL; - } - free(ai->ai_apts[i]); - } -} - -/* - * Expire tuples containing this session. - * - * Its possible that the alg vrf instance has been deleted, in which case - * alg->na_ai will be NULL. Just return in these cases. - */ -static void alg_expire_se_tuples(npf_session_t *se, uint8_t proto) -{ - const struct npf_alg *alg = npf_alg_session_get_alg(se); - struct alg_protocol_tuples *apt; - struct cds_lfht_iter iter; - struct npf_alg_tuple *nt; - - if (!proto || !alg->na_ai) - return; - - apt = alg_get_apt(alg->na_ai, proto); - if (!apt) - return; - - /* Walk the 'all' hash table and expire matching tuples */ - if (apt_ht_count(&apt->apt_all) && apt->apt_all.a_ht) { - cds_lfht_for_each_entry(apt->apt_all.a_ht, - &iter, nt, nt_node) { - if (nt->nt_se == se) - apt_expire_tuple(nt); - } - } - - /* Now the 'any_sport' hash table and expire matching tuples */ - if (apt_ht_count(&apt->apt_any_sport) && apt->apt_any_sport.a_ht) { - cds_lfht_for_each_entry(apt->apt_any_sport.a_ht, - &iter, nt, nt_node) { - if (nt->nt_se == se) - apt_expire_tuple(nt); - } - } -} - -/* expire tuples by session */ -void alg_expire_session_tuples(const struct npf_alg *alg, npf_session_t *se) -{ - int i; - int n; - - /* - * Walk the configs to get all protocols this alg - * references. - */ - for (i = 0; i < alg->na_num_configs; i++) { - const struct npf_alg_config *ac = &alg->na_configs[i]; - - for (n = 0; n < ac->ac_item_cnt; n++) - alg_expire_se_tuples(se, ac->ac_items[n].ci_proto); - } -} - -/* Dump tuple */ -static void apt_tuple_dump(struct alg_ht *a __unused, - struct npf_alg_tuple *nt, void *data) -{ - json_writer_t *json = data; - int family = 0; - char buf[INET6_ADDRSTRLEN]; - - /* Only display initialized fields */ - - jsonw_start_object(json); - jsonw_string_field(json, "alg", nt->nt_alg->na_ops->name); - - if (nt->nt_exp_ts) - jsonw_uint_field(json, "timestamp", nt->nt_exp_ts); - if (nt->nt_proto) - jsonw_uint_field(json, "protocol", nt->nt_proto); - if (nt->nt_se) - jsonw_bool_field(json, "session", true); - if (nt->nt_ifx) - jsonw_uint_field(json, "if_index", nt->nt_ifx); - if (nt->nt_alg_flags) - jsonw_uint_field(json, "alg_flags", nt->nt_alg_flags); - - if (nt->nt_timeout) - jsonw_uint_field(json, "timeout", nt->nt_timeout); - if (nt->nt_flags) - jsonw_uint_field(json, "flags", nt->nt_flags); - - if (nt->nt_sport) - jsonw_uint_field(json, "sport", ntohs(nt->nt_sport)); - if (nt->nt_dport) - jsonw_uint_field(json, "dport", ntohs(nt->nt_dport)); - - switch (nt->nt_alen) { - case 4: - family = AF_INET; - break; - case 16: - family = AF_INET6; - break; - default: - family = 0; - - } - - if (family) { - inet_ntop(family, &nt->nt_srcip, buf, sizeof(buf)); - jsonw_string_field(json, "srcip", buf); - inet_ntop(family, &nt->nt_dstip, buf, sizeof(buf)); - jsonw_string_field(json, "dstip", buf); - jsonw_uint_field(json, "alen", nt->nt_alen); - } - - if (nt->nt_data) - jsonw_bool_field(json, "tuple_data", true); - if (nt->nt_reap) - jsonw_bool_field(json, "reap", true); - jsonw_end_object(json); -} - -/* - * A tuples' NPF_TUPLE_EXPIRED may be set in three ways: - * 1. When the current time exceeds the tuples timestamp (non-KEEP tuples), or - * 2. When a tuple is deleted unconditionally (e.g. KEEP tuple is deleted via - * config) - * 3. Manually by an alg. - */ -static bool apt_tuple_is_expired(uint64_t current, struct npf_alg_tuple *nt) -{ - if (nt->nt_flags & NPF_TUPLE_EXPIRED) - return true; - - /* Do not timeout KEEP tuples */ - if (nt->nt_flags & NPF_TUPLE_KEEP) - return false; - - if (current > nt->nt_exp_ts) { - apt_expire_tuple(nt); - return true; - } - return false; -} - -/* tuple garbage collection */ -static void apt_gc(struct alg_ht *a, struct npf_alg_tuple *nt, - void *data) -{ - uint64_t current = *((uint64_t *) data); - - /* - * Manually expired or timed out? - * - * Two passes to reclaim. First sets removing flag - */ - if (apt_tuple_is_expired(current, nt)) { - if (nt->nt_flags & NPF_TUPLE_REMOVING) - apt_del_tuple(a, nt); - else - nt->nt_flags |= NPF_TUPLE_REMOVING; - } -} - -/* Flush all non-config tuples - Expressly for UT's. */ -static void apt_flush_tuples(struct alg_ht *a, struct npf_alg_tuple *nt, - void *data __unused) -{ - /* - * N.B. MULTIMATCH tuples have a dependency on a - * session handle, which are flushed after - * algs in the UTs. So expire them now. - */ - if ((nt->nt_flags & NPF_TUPLE_MULTIMATCH) || - !(nt->nt_flags & NPF_TUPLE_KEEP) || - (nt->nt_flags & NPF_TUPLE_EXPIRED)) - apt_del_tuple(a, nt); -} - -/* Purge all tuples - Used during instance destroy */ -static void apt_tuple_purge(struct alg_ht *a, struct npf_alg_tuple *nt, - void *data __unused) -{ - apt_del_tuple(a, nt); -} - -/* Get alg from name */ -static struct npf_alg *alg_name_to_alg(struct npf_alg_instance *ai, - const char *name) -{ - if (ai->ai_ftp && !strcmp(ai->ai_ftp->na_ops->name, name)) - return ai->ai_ftp; - else if (ai->ai_tftp && !strcmp(ai->ai_tftp->na_ops->name, name)) - return ai->ai_tftp; - else if (ai->ai_sip && !strcmp(ai->ai_sip->na_ops->name, name)) - return ai->ai_sip; - else if (ai->ai_rpc && !strcmp(ai->ai_rpc->na_ops->name, name)) - return ai->ai_rpc; - return NULL; -} - -/* Periodic delete expired tuples */ -static void apt_worker(struct npf_alg_instance *ai) -{ - uint8_t i; - struct alg_protocol_tuples *apt; - uint64_t current = get_time_uptime(); - - for (i = 0; i <= NPF_ALG_MAX_PROTOS; i++) { - apt = rcu_dereference(ai->ai_apts[i]); - apt_walk_proto(apt, apt_gc, ¤t); - } -} - -/* Sanity check on tuples. */ -static int alg_tuple_sanity(struct npf_alg_tuple *nt) -{ - /* No alg? */ - if (!nt->nt_alg) - return -EINVAL; - - /* Unsupported proto */ - if (nt->nt_proto > NPF_ALG_MAX_PROTOS) - return -EINVAL; - - /* - * start with most restrictive first - */ - - switch (nt->nt_flags & NPF_TUPLE_MATCH_MASK) { - case NPF_TUPLE_MATCH_ANY_SPORT: - case NPF_TUPLE_MATCH_ALL: - if (!nt->nt_ifx) - return -EINVAL; - if (!nt->nt_alen) - return -EINVAL; - if (!memcmp(&nt->nt_srcip, &zero_addr, nt->nt_alen)) - return -EINVAL; - if (!memcmp(&nt->nt_dstip, &zero_addr, nt->nt_alen)) - return -EINVAL; - if (!nt->nt_dport) - return -EINVAL; - break; - case NPF_TUPLE_MATCH_PROTO_PORT: - if (!nt->nt_dport) - return -EINVAL; - break; - case NPF_TUPLE_MATCH_PROTO: - break; - default: - /* Too many flags set, can only be one of the above */ - return -EINVAL; - } - - /* non-keep needs a reasonable timeout */ - if (!(nt->nt_flags & NPF_TUPLE_KEEP) && - nt->nt_timeout < NPF_ALG_MIN_TIMEOUT) - nt->nt_timeout = NPF_ALG_MIN_TIMEOUT; - - return 0; -} - -/* Expire a tuple */ -void npf_alg_tuple_expire(struct npf_alg_tuple *nt) -{ - apt_expire_tuple(nt); -} - -/* - * Allocate a tuple - */ -struct npf_alg_tuple *npf_alg_tuple_alloc(void) -{ - struct npf_alg_tuple *nt = zmalloc_aligned( - sizeof(struct npf_alg_tuple)); - return nt; -} - -/* - * free a tuple - Must NOT be inserted in the hash - */ -void npf_alg_tuple_free(struct npf_alg_tuple *nt) -{ - free(nt); -} - -/* link two tuples together */ -void npf_alg_tuple_pair(struct npf_alg_tuple *nt1, struct npf_alg_tuple *nt2) -{ - nt1->nt_paired = nt2; - nt2->nt_paired = nt1; -} - -/* Unpair a set of tuples */ -void npf_alg_tuple_unpair(struct npf_alg_tuple *nt) -{ - struct npf_alg_tuple *nt2 = nt->nt_paired; - - nt->nt_paired = NULL; - if (nt2) - nt2->nt_paired = NULL; -} - -/* Expire a set of paired tuples */ -void npf_alg_tuple_expire_pair(struct npf_alg_tuple *nt) -{ - struct npf_alg_tuple *nt2 = nt->nt_paired; - - apt_expire_tuple(nt); - nt->nt_paired = NULL; - apt_expire_tuple(nt2); - if (nt2) - nt2->nt_paired = NULL; -} - -/* - * Insert a tuple into the hash table. - */ -int npf_alg_tuple_insert(struct npf_alg_instance *ai, struct npf_alg_tuple *nt) -{ - /* Sanity check the tuple */ - if (alg_tuple_sanity(nt)) - return -EINVAL; - - if (!(nt->nt_flags & NPF_TUPLE_KEEP)) - nt->nt_exp_ts = get_time_uptime() + nt->nt_timeout; - - return apt_insert_tuple(ai, nt, false); -} - -/* tuple add/replace */ -int npf_alg_tuple_add_replace(struct npf_alg_instance *ai, - struct npf_alg_tuple *nt) -{ - /* Sanity check the tuple */ - if (alg_tuple_sanity(nt)) - return -EINVAL; - - if (!(nt->nt_flags & NPF_TUPLE_KEEP)) - nt->nt_exp_ts = get_time_uptime() + nt->nt_timeout; - - return apt_insert_tuple(ai, nt, true); -} - -/* Delete a tuple, unconditionally */ -static int npf_alg_tuple_delete(struct npf_alg_instance *ai, - struct npf_alg_tuple *nt) -{ - struct alg_protocol_tuples *apt; - struct apt_match m; - struct alg_ht *a; - struct cds_lfht_node *node; - struct cds_lfht_iter iter; - int rc; - - apt = alg_get_apt(ai, nt->nt_proto); - if (!apt) - return -ENOENT; - - /* Fill in the match struct */ - m.m_flag = (nt->nt_flags & NPF_TUPLE_MATCH_MASK); - m.m_ifx = nt->nt_ifx; - m.m_proto = nt->nt_proto; - m.m_dport = nt->nt_dport; - m.m_sport = nt->nt_sport; - m.m_srcip = &nt->nt_srcip; - m.m_dstip = &nt->nt_dstip; - m.m_alen = nt->nt_alen; - - rc = -ENOENT; - a = apt_ht(apt, nt->nt_flags); - if (a) { - cds_lfht_lookup(a->a_ht, apt_ht_hash(&m), apt_matcher, - &m, &iter); - node = cds_lfht_iter_get_node(&iter); - if (node) { - nt = caa_container_of(node, struct npf_alg_tuple, - nt_node); - apt_expire_tuple(nt); - rc = 0; - } - } - - return rc; -} - -static int alg_add_port(struct npf_alg *na, - const struct npf_alg_config_item *ci) -{ - struct npf_alg_tuple *nt = npf_alg_tuple_alloc(); - int rc = -ENOMEM; - - if (nt) { - nt->nt_proto = ci->ci_proto; - nt->nt_dport = htons(ci->ci_datum); - nt->nt_alg_flags = ci->ci_alg_flags; - nt->nt_flags = ci->ci_flags; - nt->nt_alg = na; - rc = npf_alg_tuple_insert(na->na_ai, nt); - if (rc) - npf_alg_tuple_free(nt); - } - return rc; -} - -static int alg_delete_port(struct npf_alg *na, - const struct npf_alg_config_item *ci) -{ - struct npf_alg_tuple nt; - - nt.nt_proto = ci->ci_proto; - nt.nt_dport = htons(ci->ci_datum); - nt.nt_flags = ci->ci_flags; - nt.nt_alg = na; - nt.nt_ifx = 0; - nt.nt_sport = 0; - nt.nt_alen = 0; - return npf_alg_tuple_delete(na->na_ai, &nt); -} - -int npf_alg_port_handler(struct npf_alg *na, int op, - const struct npf_alg_config_item *ci) -{ - int rc; - - switch (op) { - case NPF_ALG_CONFIG_SET: - rc = alg_add_port(na, ci); - break; - case NPF_ALG_CONFIG_DELETE: - rc = alg_delete_port(na, ci); - break; - default: - return -EINVAL; - } - return rc; -} - -/* Manage the default config as a unit */ -static int alg_manage_config(struct npf_alg *na, int op, - struct npf_alg_config *ac) -{ - int rc = 0; - int i; - - if ((op == NPF_ALG_CONFIG_SET) && ac->ac_default_set) - return 0; - - if ((op == NPF_ALG_CONFIG_DELETE) && !ac->ac_default_set) - return 0; - - for (i = 0; i < ac->ac_item_cnt; i++) { - /* Handler for default config is optional */ - if (ac->ac_handler) { - rc = ac->ac_handler(na, op, &ac->ac_items[i]); - if (rc) - return rc; - } - } - - /* Keep track of whether the default config is installed */ - if (!rc) - ac->ac_default_set = (op == NPF_ALG_CONFIG_SET) ? true : false; - - return rc; -} - -/* Called to reset an alg to a known state. */ -static int alg_reset_alg(struct npf_alg *alg, bool hard) -{ - struct alg_walk_params ap; - struct alg_protocol_tuples *apt; - uint8_t i; - int rc; - - /* First let the alg do whatever it needs */ - if (alg_has_op(alg, reset)) { - rc = alg->na_ops->reset(alg, hard); - if (rc) - return rc; - } - - /* - * Delete all KEEP tuples and if 'hard' is set, - * all tuples for this alg - */ - ap.ap_alg = alg; - ap.ap_all = hard; - for (i = 0; i < NPF_ALG_MAX_PROTOS; i++) { - apt = alg_get_apt(alg->na_ai, i); - apt_walk_proto(apt, apt_reset_tuples, &ap); - } - - /* Now reset the state of the configs and re-install. */ - for (i = 0; i < alg->na_num_configs; i++) { - struct npf_alg_config *ac = &alg->na_configs[i]; - - ac->ac_cli_refcnt = 0; - ac->ac_default_set = false; - rc = alg_manage_config(alg, NPF_ALG_CONFIG_SET, ac); - } - - /* Now reset state to default of enabled */ - if (!alg->na_enabled) - alg->na_enabled = true; - - return rc; -} - - -static void alg_reset_alg_module(struct npf_alg *alg, bool hard) -{ - int rc; - - if (!alg) - rte_panic("reset called on null alg"); - - rc = alg_reset_alg(alg, hard); - if (rc) - RTE_LOG(ERR, FIREWALL, "ALG: Reset: %s hard: %s rc: %d\n", - alg->na_ops->name, - hard ? "true" : "false", -rc); -} - -/* Reset a specific alg instance */ -void -alg_reset_instance(struct vrf *vrf, struct npf_alg_instance *ai, bool hard) -{ - - uint32_t count; - - if (!ai) - return; - - /* 'ai' may be freed */ - count = ai->ai_ref_count; - ai->ai_ref_count = 0; - - alg_reset_alg_module(ai->ai_ftp, hard); - alg_reset_alg_module(ai->ai_tftp, hard); - alg_reset_alg_module(ai->ai_sip, hard); - alg_reset_alg_module(ai->ai_rpc, hard); - - while (count--) - vrf_delete_by_ptr(vrf); -} - -/* Called by algs to manage a CLI config item */ -int npf_alg_manage_config_item(struct npf_alg *na, struct npf_alg_config *ac, - int op, struct npf_alg_config_item *ci) -{ - int rc; - - /* make sure the default config is deleted */ - if (op == NPF_ALG_CONFIG_SET) { - rc = alg_manage_config(na, NPF_ALG_CONFIG_DELETE, ac); - if (rc) - return rc; - } - - /* There must be a config item handler */ - rc = ac->ac_handler(na, op, ci); - if (rc) - goto reset; - - /* manage ref counts. */ - switch (op) { - case NPF_ALG_CONFIG_SET: - ac->ac_cli_refcnt++; - break; - case NPF_ALG_CONFIG_DELETE: - ac->ac_cli_refcnt--; - /* Restore default config? */ - if (!ac->ac_cli_refcnt) - (void) alg_manage_config(na, NPF_ALG_CONFIG_SET, ac); - break; - } - - return rc; - -reset: - /* - * Best attempt to restore default config. - * But only if no other CLI config is present. - */ - if ((op == NPF_ALG_CONFIG_SET) && !ac->ac_cli_refcnt) - (void) alg_manage_config(na, NPF_ALG_CONFIG_SET, ac); - - return rc; -} - -/* register protocols */ -static int alg_register_protos(struct npf_alg *na, - const struct npf_alg_config *ac) -{ - struct alg_protocol_tuples *apt; - uint8_t proto; - int i; - - for (i = 0; i < ac->ac_item_cnt; i++) { - proto = ac->ac_items[i].ci_proto; - if (!proto) /* Non-protocol config item */ - continue; - if (proto >= NPF_ALG_MAX_PROTOS) - rte_panic("ALG unsupported protocol %u\n", proto); - apt = alg_get_apt(na->na_ai, proto); - if (!apt) { - rcu_assign_pointer(na->na_ai->ai_apts[proto], - apt_alloc()); - if (!na->na_ai->ai_apts[proto]) - return -ENOMEM; - } - } - return 0; -} - -/* Free a reserved translation */ -int npf_alg_free_translation(npf_session_t *se, npf_addr_t *addr, - in_port_t port) -{ - npf_nat_t *nat = npf_session_get_nat(se); - npf_natpolicy_t *np = npf_nat_get_policy(nat); - npf_rule_t *rl = npf_nat_get_rule(nat); - uint32_t map_flags; - vrfid_t vrfid = npf_session_get_vrfid(se); - - /* Currently, all algs use a mapped port */ - map_flags = NPF_NAT_MAP_PORT; - - return npf_nat_free_map(np, rl, map_flags, vrfid, *addr, port); -} - -/* Reserve translations for an alg. */ -int npf_alg_reserve_translations(npf_session_t *parent, int nr_ports, - bool start_even, uint8_t alen, - npf_addr_t *addr, in_port_t *port) -{ - npf_nat_t *pnat = npf_session_get_nat(parent); - npf_natpolicy_t *np = npf_nat_get_policy(pnat); - npf_rule_t *rl = npf_nat_get_rule(pnat); - in_port_t tmp; - npf_addr_t paddr; - uint32_t nat_flags; - vrfid_t vrfid = npf_session_get_vrfid(parent); - int i; - int rc; - - /* Currently, all algs need a mapped port */ - nat_flags = NPF_NAT_MAP_PORT; - - /* Start on even boundary? */ - if (start_even) - nat_flags |= NPF_NAT_MAP_EVEN_PORT; - - /* allocate from parent translation addr */ - npf_nat_get_trans(pnat, addr, &tmp); - paddr = *addr; - - rc = npf_nat_alloc_map(np, rl, nat_flags, vrfid, addr, - port, nr_ports); - if (rc) - return rc; - - /* - * Ensure that the translations come from the same - * (parent) translation address. - */ - if (memcmp(addr, &paddr, alen)) { - tmp = ntohs(*port); - for (i = 0; i < nr_ports; i++) - npf_nat_free_map(np, rl, nat_flags, vrfid, - *addr, htons(tmp + i)); - return -ENOSPC; - } - - return 0; -} - -/* - * Create and assign a nat struct to a session handle. - * - * Used by algs to create nat structs for reverse secondary flows. - * On success, will consume the alg nat params. Otherwise we leave - * that to tuple destroy. (There may be a reservation) - * - * Called as desired by algs during their npf_alg_session_init(). - * - */ -int npf_alg_session_nat(npf_session_t *se, npf_nat_t *pnat, npf_cache_t *npc, - const int di, struct npf_alg_tuple *nt) -{ - struct npf_alg_nat *an = nt->nt_nat; - npf_nat_t *nat; - - /* - * Only if we have an alg nat on the tuple. - * May be called in fw stateful matches for algs. (eg: non-nat) - */ - if (!an) - return 0; - - /* Must have a parent nat */ - if (!pnat) - return -ENOENT; - - /* Create the nat, possibly reversed of the pnat */ - nat = npf_nat_custom_nat(pnat, an->an_flags); - if (!nat) - return -ENOMEM; - - if (an->an_flags & NPF_NAT_REVERSE) { - npf_nat_set_trans(nat, &an->an_oaddr, an->an_oport); - npf_nat_set_orig(nat, &an->an_taddr, an->an_tport); - } else { - npf_nat_set_trans(nat, &an->an_taddr, an->an_tport); - npf_nat_set_orig(nat, &an->an_oaddr, an->an_oport); - } - - npf_nat_finalise(npc, se, di, nat); - - npf_session_setnat(se, nat, true); - - /* Mark as consumed so tuple destroy doesn't see it */ - free(nt->nt_nat); - nt->nt_nat = NULL; - - return 0; -} - -/* - * Register a application protocol alg. - * - * - Create the tuple hash tables - * - Insert default config(s) - * - * Do not attempt to recover from partial success. Failure to - * register a specific ALG will result in failure of the - * ALG instance creation, will result in complete cleanup. - */ -int npf_alg_register(struct npf_alg *na) -{ - struct npf_alg_config *ac = na->na_configs; - int rc = 0; - int i; - - for (i = 0; i < na->na_num_configs; i++) { - rc = alg_register_protos(na, ac); - if (rc) - break; - - rc = alg_manage_config(na, NPF_ALG_CONFIG_SET, ac); - if (rc) - break; - ac++; - } - - if (rc) - RTE_LOG(ERR, FIREWALL, "ALG: register: %s failed: rc: %d\n", - na->na_ops->name, rc); - - return rc; -} - -static int alg_config(struct npf_alg_instance *ai, const char *name, int op, - int argc, char **argv) -{ - struct npf_alg *alg; - - alg = alg_name_to_alg(ai, name); - if (alg_has_op(alg, config)) - return alg->na_ops->config(alg, op, argc, argv); - return -ENOENT; -} - -/* config() - Set/delete options to an alg */ -int npf_alg_config(uint32_t ext_vrfid, const char *name, int op, - int argc, char **argv) -{ - struct vrf *vrf; - struct npf_alg_instance *ai; - int rc; - - vrf = vrf_find_or_create(ext_vrfid); /* Bug */ - if (!vrf) - return -EINVAL; - ai = vrf_get_npf_alg(vrf); - if (!ai) - return -EINVAL; - - rc = alg_config(ai, name, op, argc, argv); - if (!rc) { - switch (op) { - case NPF_ALG_CONFIG_SET: - vrf_find_or_create(ext_vrfid); /* Bug */ - ai->ai_ref_count++; - break; - case NPF_ALG_CONFIG_DELETE: - vrf_delete_by_ptr(vrf); - ai->ai_ref_count++; - break; - } - } - - vrf_delete_by_ptr(vrf); - return rc; -} - -/* - * alg_search_all_and_any_sport() - * - * Certain algs (sip notably) can add multiple tuples that can match a - * single packet. This is because they may wild-card the sport (eg: set to - * zero) due to the connection-less nature of UDP. - * - * We need to ensure that a tuple containing both a sport and dport is matched - * prior to a tuple with a matching dport and sport == 0, so search both in - * that order. - */ -struct npf_alg_tuple * -alg_search_all_then_any_sport(struct alg_protocol_tuples *apt, - struct npf_cache *npc, const struct ifnet *ifp) -{ - struct apt_match m; - struct npf_alg_tuple *nt; - uint64_t all_count; - uint64_t any_sport; - - - /* Ensure we have some in either */ - all_count = apt_ht_count(&apt->apt_all); - any_sport = apt_ht_count(&apt->apt_any_sport); - if (!all_count && !any_sport) - return NULL; - - alg_fill_match(npc, npf_cache_ipproto(npc), ifp->if_index, &m); - - /* Search 'all' first */ - if (all_count) { - m.m_flag = NPF_TUPLE_MATCH_ALL; - nt = apt_search_ht(&apt->apt_all, &m); - if (nt) - return nt; - } - - /* Not found, try the 'any_sport' */ - if (any_sport) { - m.m_flag = NPF_TUPLE_MATCH_ANY_SPORT; - nt = apt_search_ht(&apt->apt_any_sport, &m); - } - - return nt; -} - -/* Get the base parent's nat struct */ -struct npf_nat *npf_alg_parent_nat(npf_session_t *se) -{ - return npf_session_get_nat(npf_session_get_base_parent(se)); -} - -static void alg_info_json(struct npf_alg *alg, json_writer_t *json) -{ - if (alg) { - jsonw_start_object(json); - jsonw_string_field(json, "name", alg->na_ops->name); - jsonw_bool_field(json, "enabled", alg->na_enabled); - jsonw_end_object(json); - } -} - -int -alg_dump(struct npf_alg_instance *ai, vrfid_t vrfid, json_writer_t *json) -{ - struct alg_protocol_tuples *apt; - uint8_t i; - - jsonw_start_object(json); - jsonw_uint_field(json, "vrfid", vrf_get_external_id(vrfid)); - - jsonw_name(json, "algs"); - jsonw_start_array(json); - alg_info_json(ai->ai_ftp, json); - alg_info_json(ai->ai_tftp, json); - alg_info_json(ai->ai_sip, json); - alg_info_json(ai->ai_rpc, json); - jsonw_end_array(json); - - jsonw_name(json, "tuples"); - jsonw_start_array(json); - for (i = 0; i < NPF_ALG_MAX_PROTOS; i++) { - apt = alg_get_apt(ai, i); - apt_walk_proto(apt, apt_tuple_dump, json); - } - jsonw_end_array(json); - jsonw_end_object(json); - return 0; -} - -/* alg enable */ -int npf_alg_state_set(uint32_t ext_vrfid, const char *name, int op) -{ - struct vrf *vrf; - struct npf_alg_instance *ai; - struct npf_alg *alg; - - vrf = vrf_get_rcu_from_external(ext_vrfid); - if (!vrf) { - struct alg_late_vrf *late_vrf - = npf_alg_late_vrf_find(ext_vrfid); - if (!late_vrf) - late_vrf = npf_alg_late_vrf_add(ext_vrfid); - - const bool off = (op == NPF_ALG_CONFIG_DISABLE); - npf_alg_late_vrf_set_alg(late_vrf, name, !off); - - return 0; - } - - ai = vrf_get_npf_alg(vrf); - if (!ai) - return -EINVAL; - - alg = alg_name_to_alg(ai, name); - if (!alg) - return -ENOENT; - - /* - * Note that algs are enabled by default - */ - switch (op) { - case NPF_ALG_CONFIG_ENABLE: - if (!alg->na_enabled) - alg->na_enabled = true; - break; - case NPF_ALG_CONFIG_DISABLE: - if (alg->na_enabled) - alg->na_enabled = false; - break; - default: - return -EINVAL; - } - - return 0; -} - -const char *npf_alg_id2name(enum npf_alg_id id) -{ - switch (id) { - case NPF_ALG_ID_FTP: - return NPF_ALG_FTP_NAME; - case NPF_ALG_ID_TFTP: - return NPF_ALG_TFTP_NAME; - case NPF_ALG_ID_RPC: - return NPF_ALG_RPC_NAME; - case NPF_ALG_ID_SIP: - return NPF_ALG_SIP_NAME; - }; - return "-"; -} - -void npf_alg_destroy_alg(struct npf_alg *alg) -{ - alg->na_enabled = false; - alg->na_ops = NULL; - free(alg); -} - -struct npf_alg * -npf_alg_create_alg(struct npf_alg_instance *ai, enum npf_alg_id id) -{ - struct npf_alg *alg; - - alg = zmalloc_aligned(sizeof(struct npf_alg)); - if (!alg) - return NULL; - - rte_atomic32_set(&alg->na_refcnt, 0); - alg->na_ai = ai; - alg->na_id = id; - alg->na_enabled = true; - - return alg; -} - -static void -alg_periodic(struct rte_timer *timer __rte_unused, void *data __rte_unused) -{ - struct npf_alg_instance *ai; - struct vrf *vrf; - vrfid_t vrfid; - - VRF_FOREACH(vrf, vrfid) { - ai = vrf_get_npf_alg(vrf); - if (ai) { - /* Call an alg's periodic routine */ - if (alg_has_op(ai->ai_sip, periodic)) - ai->ai_sip->na_ops->periodic(ai->ai_sip); - apt_worker(ai); - } - } - - /* Until we graceful shutdown the dataplane */ - if (running) - npf_alg_timer_reset(); -} - -/* - * Create a per-vrf ALG instance - */ -struct npf_alg_instance *npf_alg_create_instance(uint32_t ext_vrfid) -{ - struct npf_alg_instance *ai; - - ai = zmalloc_aligned(sizeof(struct npf_alg_instance)); - if (!ai) - return NULL; - - /* Now specific alg instances */ - ai->ai_tftp = npf_alg_tftp_create_instance(ai); - if (!ai->ai_tftp) - goto out; - ai->ai_ftp = npf_alg_ftp_create_instance(ai); - if (!ai->ai_ftp) - goto out; - ai->ai_sip = npf_alg_sip_create_instance(ai); - if (!ai->ai_sip) - goto out; - ai->ai_rpc = npf_alg_rpc_create_instance(ai); - if (!ai->ai_rpc) - goto out; - - /* Find any disabled ALGs on the lookaside list */ - struct alg_late_vrf *late_vrf - = npf_alg_late_vrf_find(ext_vrfid); - if (late_vrf) { - zhash_t *algs = late_vrf->nv_algs; - bool *on_p; - for (on_p = zhash_first(algs); on_p; on_p = zhash_next(algs)) { - char const *name = zhash_cursor(algs); - struct npf_alg *alg = alg_name_to_alg(ai, name); - if (!alg) - continue; - alg->na_enabled = *on_p; - } - npf_alg_late_vrf_del(late_vrf); - } - - return ai; -out: - npf_alg_destroy_instance(ai); - return NULL; -} - -/* - * ALG GC timer - * - * We can never safely free an allocated timer, so - * create a global one for all ALG instances. - */ -void -npf_alg_timer_init(void) -{ - rte_timer_init(&alg_timer); - - alg_late_vrf_hash = zhash_new(); -} - -void -npf_alg_timer_uninit(void) -{ - rte_timer_stop_sync(&alg_timer); - - npf_alg_late_vrfs_destroy(); - zhash_destroy(&alg_late_vrf_hash); -} - -void -npf_alg_timer_reset(void) -{ - rte_timer_reset(&alg_timer, ALG_INTERVAL * rte_get_timer_hz(), - SINGLE, rte_get_master_lcore(), alg_periodic, NULL); -} - -/* - * Called from whole dp unit-tests to delete all non-KEEP tuples - */ -static void npf_alg_flush(struct npf_alg_instance *ai) -{ - uint8_t i; - struct alg_protocol_tuples *apt; - - if (!ai) - return; - - for (i = 0; i <= NPF_ALG_MAX_PROTOS; i++) { - apt = rcu_dereference(ai->ai_apts[i]); - apt_walk_proto(apt, apt_flush_tuples, NULL); - } -} - -/* - * Called from whole dp unit-tests to delete all non-KEEP tuples - */ -void npf_alg_flush_all(void) -{ - struct vrf *vrf; - vrfid_t vrfid; - - VRF_FOREACH(vrf, vrfid) { - npf_alg_flush(vrf_get_npf_alg_rcu(vrfid)); - } -} - -/* - * ALG tuple purge - Delete all tuples. - * - * Called during an NPF instance delete. - */ -void npf_alg_purge(struct npf_alg_instance *ai) -{ - uint8_t i; - struct alg_protocol_tuples *apt; - - for (i = 0; i <= NPF_ALG_MAX_PROTOS; i++) { - apt = rcu_dereference(ai->ai_apts[i]); - apt_walk_proto(apt, apt_tuple_purge, NULL); - } -} diff --git a/src/npf/alg/npf_alg_sip.c b/src/npf/alg/npf_alg_sip.c deleted file mode 100644 index 590a6555..00000000 --- a/src/npf/alg/npf_alg_sip.c +++ /dev/null @@ -1,3643 +0,0 @@ -/* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. - * Copyright (c) 2013-2016 by Brocade Communications Systems, Inc. - * All rights reserved. - * - * SPDX-License-Identifier: LGPL-2.1-only - */ - -/* - * NPF ALG for SIP - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "compiler.h" -#include "in_cksum.h" -#include "npf/npf.h" -#include "npf/alg/npf_alg_private.h" -#include "npf/npf_cache.h" -#include "npf/npf_nat.h" -#include "npf/npf_session.h" -#include "urcu.h" -#include "util.h" -#include "vplane_log.h" - -struct ifnet; -struct rte_mbuf; -struct sip_alg_request; - -/* default port */ -#define SIP_DEFAULT_PORT 5060 - -/* - * Minimum msg size. - * - * While the protocol does not define a minimum size directly, the Osip - * parser assumes minimum of 4 bytes during parsing. - * - * A 'real' SIP message must have multiple header fields to be valid and - * a minimum ACK msg with options stripped out will be > 200 bytes, - * so let's use that as our min msg size. - */ -#define SIP_MSG_MIN_LENGTH 200 - -/* - * Default lifetime for a request in the hash table. - */ -#define SIP_DEFAULT_REQUEST_TIMEOUT 32 - -/* - * Flags defining the types of SIP/media - * flows. Note that a SIP media UDP flow - * is handled as a RTP flow. - */ -#define SIP_ALG_CNTL_FLOW 0x01 -#define SIP_ALG_ALT_CNTL_FLOW 0x02 -#define SIP_ALG_RTP_FLOW 0x04 -#define SIP_ALG_RTCP_FLOW 0x08 -#define SIP_ALG_REVERSE 0x10 -#define SIP_ALG_NAT 0x20 -#define SIP_ALG_ALT_TUPLE_SET 0x40 -#define SIP_ALG_MASK (SIP_ALG_CNTL_FLOW | SIP_ALG_ALT_CNTL_FLOW | \ - SIP_ALG_RTP_FLOW | SIP_ALG_RTCP_FLOW) - -/* Hash table config */ -#define SIP_HT_INIT 32 -#define SIP_HT_MIN 32 -#define SIP_HT_MAX 1024 - -/* SIP per-packet flags. */ -#define SIP_NPC_REQUEST 0x01 -#define SIP_NPC_RESPONSE 0x02 - -/* For one-time initialization of libosip. */ -static osip_t *sip_osip; - -/* - * We manage Invites and responses by using a hash table. - * New invites are added to the table, and corresponding responses - * pull them from the hash table. - */ -struct sip_private { - struct cds_lfht *sp_ht; - rte_spinlock_t sp_media_lock; /* For media */ - struct cds_list_head sp_dead_media; /* for freeing media */ -}; - -/* - * Max media connections per INVITE. - */ -#define SDP_MAX_MEDIA 8 - -/* - * There are two types of media that we are interested in: UDP and RTP. - * (RTP includes secure RTP) - */ -enum sdp_proto { - sdp_proto_udp = 1, - sdp_proto_rtp, - sdp_proto_unknown -}; - -/* - * Type of nat being performed. - */ -enum sip_nat_type { - sip_nat_snat = 1, - sip_nat_dnat, - sip_nat_inspect -}; - -/* - * Struct for holding nat info. - */ -struct sip_nat { - char sn_taddr[INET6_ADDRSTRLEN];/* trans addr */ - char sn_oaddr[INET6_ADDRSTRLEN];/* orig addr */ - char sn_tport[8]; /* trans port */ - enum sip_nat_type sn_type; /* type of nat */ - bool sn_forw; /* forward? */ - int sn_di; /* direction */ - uint8_t sn_alen; /* addr len */ -}; - -#define sip_nat_type(sr) ((sr)->sr_nat.sn_type) -#define sip_is_snat(sr) (sip_nat_type(sr) == sip_nat_snat) -#define sip_is_dnat(sr) (sip_nat_type(sr) == sip_nat_dnat) -#define sip_is_inspect(sr) (sip_nat_type(sr) == sip_nat_inspect) -#define sip_forw(sr) ((sr)->sr_nat.sn_forw) -#define sip_taddr(sr) ((sr)->sr_nat.sn_taddr) -#define sip_oaddr(sr) ((sr)->sr_nat.sn_oaddr) -#define sip_tport(sr) ((sr)->sr_nat.sn_tport) -#define sip_di(sr) ((sr)->sr_nat.sn_di) - -/* Macros for accessing SIP instance datum */ -#define sip_alg_instance(sip) ((sip)->na_ai) - -#define SIP_REQUEST_EXPIRED 0x1 -#define SIP_REQUEST_REMOVING 0x2 - -/* - * SIP request struct - */ -struct sip_alg_request { - struct cds_lfht_node sr_node; - uint64_t sr_timeout; - osip_message_t *sr_sip; - sdp_message_t *sr_sdp; - struct sip_nat sr_nat; - uint32_t sr_if_idx; - struct cds_list_head sr_media; - uint8_t sr_flags; - const struct npf_alg *sr_sip_alg; - struct rcu_head sr_rcu_head; -}; - -/* - * Struct for matching hash table requests - */ -struct sip_request_match { - osip_call_id_t *sm_call_id; - uint32_t sm_if_idx; -}; - - -/* - * Struct for managing rtp translation data. Note - * these ports are maintained in host order. - * - * We have to save both the original and translation - * ports and addresses until we create the tuples. - */ -struct sip_alg_media { - struct cds_list_head m_list; /* list head */ - - enum sdp_proto m_proto; - enum sip_nat_type m_type; - - /* Original */ - in_port_t m_rtp_port; - npf_addr_t m_rtp_addr; - uint8_t m_rtp_alen; - in_port_t m_rtcp_port; - npf_addr_t m_rtcp_addr; - uint8_t m_rtcp_alen; - - /* Translated */ - in_port_t m_trtp_port; - npf_addr_t m_trtp_addr; - uint8_t m_trtp_alen; - in_port_t m_trtcp_port; - npf_addr_t m_trtcp_addr; - uint8_t m_trtcp_alen; - - npf_natpolicy_t *m_np; - npf_rule_t *m_rl; - uint32_t m_nat_flags; - vrfid_t m_vrfid; - bool m_rtp_reserved; /* ports from pool? */ - bool m_rtcp_reserved; -}; - -/* - * struct for parsing the rtcp attribute (RFC3605) - */ -struct sip_rtcp { - char *rtcp_port; - char *rtcp_nettype; - char *rtcp_addrtype; - char *rtcp_addr; -}; - -/* - * Struct for managing tuple data. These are added to media (RTP and RTCP) - * tuples. - * - * Note ports are in host format. - */ -struct sip_tuple_data { - const struct npf_alg *td_sip; - struct sip_nat td_nat; - struct sip_alg_media *td_mi; - struct sip_alg_media *td_mr; - struct npf_alg_tuple *td_nt1; /* For the tuples */ - struct npf_alg_tuple *td_nt2; - rte_atomic32_t td_refcnt; - bool td_is_reverse; /* Reverse flow? */ -}; -#define td_nat_type(sr) ((td)->td_nat.sn_type) -#define td_is_snat(td) ((td)->td_nat.sn_type == sip_nat_snat) -#define td_is_dnat(td) ((td)->td_nat.sn_type == sip_nat_dnat) -#define td_is_inspect(td) ((td)->td_nat.sn_type == sip_nat_inspect) -#define td_is_reverse(td) ((td)->td_is_reverse) -#define td_forw(td) ((td)->td_nat.sn_forw) - -/* - * SIP ALG session. - */ -struct sip_alg_session { - in_port_t ss_via_port; - uint8_t ss_via_alen; - uint32_t ss_ifx; - npf_addr_t ss_via_addr; - int ss_call_id_count; - osip_call_id_t **ss_call_ids; -}; - -/* - * sip_alg_release_translation() - Release preallocated translation data. - */ -static void sip_alg_release_translation(struct sip_alg_media *m, - npf_addr_t taddr, in_port_t port) -{ - - if (m->m_np) - npf_nat_free_map(m->m_np, m->m_rl, - m->m_nat_flags, m->m_vrfid, taddr, htons(port)); -} - -/* - * Free a ports struct, if the ports were - * allocated from a nat pool, return them. - */ -static void sip_media_free(void *_m) -{ - struct sip_alg_media *m = _m; - - if (!m) - return; - if (m->m_type != sip_nat_inspect) { - if (m->m_rtp_reserved) - sip_alg_release_translation(m, - m->m_trtp_addr, m->m_trtp_port); - if (m->m_rtcp_reserved) - sip_alg_release_translation(m, - m->m_trtcp_addr, m->m_trtcp_port); - } - free(m); -} - -/* - * sip_tuple_data_alloc() - Alloc a tuple data struct - */ -static struct sip_tuple_data *sip_tuple_data_alloc(const struct npf_alg *sip, - struct sip_alg_request *sr, struct sip_alg_media *mi, - struct sip_alg_media *mr) -{ - struct sip_tuple_data *td = calloc(1, sizeof(struct sip_tuple_data)); - - if (td) { - memcpy(&td->td_nat, &sr->sr_nat, sizeof(struct sip_nat)); - td->td_sip = sip; - td->td_mi = mi; - td->td_mr = mr; - rte_atomic32_set(&td->td_refcnt, 1); - td->td_is_reverse = false; - } - return td; -} - -/* - * sip_tuple_data_get() - */ -static inline void sip_tuple_data_get(struct sip_tuple_data *td) -{ - rte_atomic32_inc(&td->td_refcnt); -} - -/* - * stip_tuple_data_put() - */ -static void sip_tuple_data_put(struct sip_tuple_data *td) -{ - if (rte_atomic32_dec_and_test(&td->td_refcnt)) { - sip_media_free(td->td_mi); - sip_media_free(td->td_mr); - free(td); - } -} - -/* - * sip_media_count() - */ -static int sip_media_count(struct cds_list_head *h) -{ - struct cds_list_head *p; - int i = 0; - - cds_list_for_each(p, h) - i++; - - return i; -} - -/* - * sip_addr_from_str() - Convert a string addr into an Ipv4 or IPv6 addr - */ -static void sip_addr_from_str(const char *saddr, npf_addr_t *addr, - uint8_t *alen) -{ - int af = AF_INET; - - if (strchr(saddr, ':')) - af = AF_INET6; - - *alen = 0; - if (inet_pton(af, saddr, addr)) { - if (af == AF_INET) - *alen = 4; - else - *alen = 16; - } -} - -/* - * sip_addr_to_str() Convert npf addr to a string - */ -static char *sip_addr_to_str(npf_addr_t *a, uint8_t alen) -{ - char buf[INET6_ADDRSTRLEN]; - int af; - - if (alen == 4) - af = AF_INET; - else if (alen == 16) - af = AF_INET6; - else - return NULL; - - if (inet_ntop(af, a, buf, sizeof(buf))) - return osip_strdup(buf); - return NULL; -} - -/* - * Convert a port to an (allocated) string - */ -static char *port_to_str(in_port_t n) -{ - char buf[8]; - int rc; - - rc = snprintf(buf, 8, "%hu", n); - if (rc < 0 || rc > 6) - return NULL; - return osip_strdup(buf); -} - -/* Free dead media structs from the instance */ -static void sip_free_dead_media(struct sip_private *sp) -{ - struct sip_alg_media *m; - struct sip_alg_media *tmp; - - if (!sp) - return; - - rte_spinlock_lock(&sp->sp_media_lock); - cds_list_for_each_entry_safe(m, tmp, &sp->sp_dead_media, m_list) { - cds_list_del(&m->m_list); - sip_media_free(m); - } - rte_spinlock_unlock(&sp->sp_media_lock); -} - -static void sip_request_free_rcu(struct rcu_head *head) -{ - struct sip_alg_request *sr = caa_container_of(head, - struct sip_alg_request, sr_rcu_head); - struct sip_private *sp = sr->sr_sip_alg->na_private; - - /* - * Move medias to the instance for deletion - * via the sip GC - */ - if (sp) { - rte_spinlock_lock(&sp->sp_media_lock); - cds_list_splice(&sr->sr_media, &sp->sp_dead_media); - rte_spinlock_unlock(&sp->sp_media_lock); - } - - if (sr->sr_sip) - osip_message_free(sr->sr_sip); - if (sr->sr_sdp) - sdp_message_free(sr->sr_sdp); - free(sr); -} - -/* - * Free a sip msg, always via RCU. - */ -static void sip_alg_request_free(const struct npf_alg *sip, - struct sip_alg_request *sr) -{ - if (sr) { - sr->sr_sip_alg = sip; - call_rcu(&sr->sr_rcu_head, sip_request_free_rcu); - } -} - -/* - * Synchronously free a sip msg. - * Used when destroying the sip instance. - */ -static void sip_alg_request_free_sync(const struct npf_alg *sip, - struct sip_alg_request *sr) -{ - if (sr) { - sr->sr_sip_alg = sip; - /* Call the rcu free variant synchronously */ - sip_request_free_rcu(&sr->sr_rcu_head); - } -} -/* - * sip_alg_body_is_sdp() - Do we have an SDP message? - */ -static bool sip_alg_body_is_sdp(struct sip_alg_request *sr) -{ - osip_content_type_t *ct; - - ct = osip_message_get_content_type(sr->sr_sip); - if (!ct) - return false; - - if (ct->type && !strstr(ct->type, "application")) - return false; - - if (ct->subtype && !strstr(ct->subtype, "sdp")) - return false; - - return true; -} - -/* - * sip_alg_get_sdp() - */ -static int sip_alg_get_sdp(struct sip_alg_request *sr) -{ - osip_body_t *sdp_body; - sdp_message_t *sdp; - int rc; - - if (!sip_alg_body_is_sdp(sr)) - return 0; - - rc = osip_message_get_body(sr->sr_sip, 0, &sdp_body); - if (rc >= 0) { - rc = sdp_message_init(&sdp); - if (rc < 0) - return rc; - rc = sdp_message_parse(sdp, sdp_body->body); - if (!rc) - sr->sr_sdp = sdp; - else - sdp_message_free(sdp); - } - - return rc; -} - -/* - * sip_alg_request_alloc() - */ -static struct sip_alg_request *sip_alg_request_alloc(bool init_sip, - uint32_t if_idx) -{ - struct sip_alg_request *sr; - - sr = calloc(1, sizeof(struct sip_alg_request)); - if (!sr) - return NULL; - - CDS_INIT_LIST_HEAD(&sr->sr_media); - sr->sr_if_idx = if_idx; - - if (init_sip && osip_message_init(&sr->sr_sip)) { - free(sr); - sr = NULL; - } - - return sr; -} - -/* - * Parse a sip packet - */ -static struct sip_alg_request *sip_alg_parse(const struct npf_alg *sip, - npf_cache_t *npc, uint32_t if_idx, struct rte_mbuf *nbuf) -{ - struct sip_alg_request *sr = NULL; - uint16_t plen; - char payload[SIP_MESSAGE_MAX_LENGTH + 1]; - int rc; - - plen = npf_payload_fetch(npc, nbuf, payload, - SIP_MSG_MIN_LENGTH, SIP_MESSAGE_MAX_LENGTH); - if (!plen) - return NULL; - - /* Make the payload a string */ - payload[plen] = '\0'; - - sr = sip_alg_request_alloc(true, if_idx); - if (!sr) - return NULL; - - rc = osip_message_parse(sr->sr_sip, payload, plen); - if (rc != 0) - goto bad; - - /* Get the sdp portion if present */ - rc = sip_alg_get_sdp(sr); - if (rc) - goto bad; - - return sr; - -bad: - sip_alg_request_free(sip, sr); - return NULL; -} - -/* - * sip_ht_match() - Match function for hash table - */ -static int sip_ht_match(struct cds_lfht_node *node, const void *key) -{ - const struct sip_alg_request *sr = caa_container_of(node, - struct sip_alg_request, sr_node); - const struct sip_request_match *sm = key; - - if (sr->sr_flags & SIP_REQUEST_EXPIRED) - return 0; - - if (sm->sm_if_idx != sr->sr_if_idx) - return 0; - - return !osip_call_id_match(osip_message_get_call_id(sr->sr_sip), - sm->sm_call_id); -} - -/* - * sip_request_expire() - Expire an invite from the hash table. - */ -static void sip_request_expire(struct sip_alg_request *sr) -{ - if (!(sr->sr_flags & SIP_REQUEST_EXPIRED)) - sr->sr_flags |= SIP_REQUEST_EXPIRED; -} - -/* - * sip_delete_request() - Delete an invite from the hash table. - */ -static void sip_delete_request(struct npf_alg *sip, - struct sip_alg_request *sr) -{ - struct sip_private *sp = sip->na_private; - - if (sr && sp && !cds_lfht_del(sp->sp_ht, &sr->sr_node)) - sip_alg_request_free(sip, sr); -} - -static bool sip_ht_expired(uint64_t curr, struct sip_alg_request *sr) -{ - if (sr->sr_flags & SIP_REQUEST_EXPIRED) - return true; - if (sr->sr_timeout < curr) { - sip_request_expire(sr); - return true; - } - return false; -} - -/* - * sip_ht_gc() - Clean stale entries from the hash table. - */ -static void sip_ht_gc(struct npf_alg *sip) -{ - struct cds_lfht_iter iter; - struct sip_alg_request *sr; - uint64_t current = rte_get_timer_cycles(); - struct sip_private *sp = sip->na_private; - - if (!sp) - return; - - /* Always free any medias first */ - sip_free_dead_media(sp); - - cds_lfht_for_each_entry(sp->sp_ht, &iter, sr, sr_node) { - if (!sip_ht_expired(current, sr)) - continue; - - if (sr->sr_flags & SIP_REQUEST_REMOVING) - sip_delete_request(sip, sr); - else - sr->sr_flags |= SIP_REQUEST_REMOVING; - } -} - -/* - * sip_alg_hash() - Create a hash out of the Call-ID number. This is unique. - */ -static unsigned long sip_alg_hash(struct sip_request_match *sm) -{ - char *tmp; - unsigned long hash = 0; - - if (!sm->sm_call_id) - return hash; - - tmp = osip_call_id_get_number(sm->sm_call_id); - if (tmp) - hash = rte_jhash(tmp, strlen(tmp), hash); - - tmp = osip_call_id_get_host(sm->sm_call_id); - if (tmp) - hash = rte_jhash(tmp, strlen(tmp), hash); - - return hash ? rte_jhash_1word(sm->sm_if_idx, hash) : 0; -} - -/* - * sip_request_lookup_by_call_id() - Lookup by call id. - */ -static struct sip_alg_request *sip_request_lookup_by_call_id( - const struct npf_alg *sip, uint32_t if_idx, - osip_call_id_t *call_id) -{ - struct cds_lfht_iter iter; - struct cds_lfht_node *node; - unsigned long hash; - struct sip_alg_request *sr; - struct sip_private *sp; - struct sip_request_match sm = { - .sm_call_id = call_id, - .sm_if_idx = if_idx - }; - - if (!sip) - return NULL; - - sp = sip->na_private; - if (!sp) - return NULL; - - hash = sip_alg_hash(&sm); - if (!hash) - return NULL; - - cds_lfht_lookup(sp->sp_ht, hash, sip_ht_match, &sm, &iter); - node = cds_lfht_iter_get_node(&iter); - if (node) - sr = caa_container_of(node, struct sip_alg_request, sr_node); - else - sr = NULL; - - return sr; -} - -/* - * sip_request_lookup() - Lookup a request - */ -static struct sip_alg_request *sip_request_lookup(const struct npf_alg *sip, - struct sip_alg_request *incoming) -{ - osip_call_id_t *call_id; - - call_id = osip_message_get_call_id(incoming->sr_sip); - return sip_request_lookup_by_call_id(sip, incoming->sr_if_idx, call_id); -} - -/* Expire all SIP requests on this session handle */ -static void sip_expire_session_request(npf_session_t *se) -{ - struct sip_alg_session *ss = npf_alg_session_get_private(se); - const struct npf_alg *sip = npf_alg_session_get_alg(se); - struct sip_alg_request *sr; - uint32_t if_idx = npf_session_get_if_index(se); - int i; - - if (!ss) - return; - - for (i = 0 ; i < ss->ss_call_id_count; i++) { - sr = sip_request_lookup_by_call_id(sip, if_idx, - ss->ss_call_ids[i]); - if (sr) - sip_request_expire(sr); - /* free this call id */ - osip_call_id_free(ss->ss_call_ids[i]); - } - - /* reset so expire/destroy doesn't repeat */ - ss->ss_call_id_count = 0; - free(ss->ss_call_ids); - ss->ss_call_ids = NULL; -} - -/* - * sip_request_lookup_and_expire() - Expire an invite from the hash table. - */ -static void sip_request_lookup_and_expire(const struct npf_alg *sip, - struct sip_alg_request *incoming) -{ - struct sip_alg_request *sr; - - if (incoming) { - sr = sip_request_lookup(sip, incoming); - if (sr) - sip_request_expire(sr); - } -} - -/* - * sip_alg_expires() - Get an expiration time for this request - */ -static uint64_t sip_alg_expires(struct sip_alg_request *sr) -{ - osip_header_t *expires; - unsigned long timeout = 0; - char *end; - int rc; - - /* Does the request have an expires field? */ - rc = osip_message_get_expires(sr->sr_sip, 0, &expires); - if (rc >= 0 && expires->hvalue) { - timeout = strtoul(expires->hvalue, &end, 10); - if (*end) - timeout = 0; - } - - /* - * If unset or bogus, or greater than 24h, set a default. - */ - if (!timeout || timeout > 84600) - timeout = SIP_DEFAULT_REQUEST_TIMEOUT; - - return (uint64_t) timeout; -} - -/* - * sip_alg_add_invite() - Add an invite to the hash table. - */ -static int sip_alg_add_invite(const struct npf_alg *sip, - struct sip_alg_request *sr) -{ - unsigned long hash; - struct cds_lfht_node *node; - struct sip_request_match sm; - struct sip_private *sp = sip->na_private; - - if (!MSG_IS_INVITE(sr->sr_sip)) - return -EINVAL; - - if (!sp) - return -EINVAL; - - sm.sm_call_id = osip_message_get_call_id(sr->sr_sip); - sm.sm_if_idx = sr->sr_if_idx; - hash = sip_alg_hash(&sm); - if (!hash) - return -EINVAL; - - cds_lfht_node_init(&sr->sr_node); - sr->sr_timeout = sip_alg_expires(sr); - sr->sr_timeout *= rte_get_timer_hz(); /* to cycles */ - sr->sr_timeout += rte_get_timer_cycles(); /* add current time */ - - node = cds_lfht_add_unique(sp->sp_ht, hash, sip_ht_match, &sm, - &sr->sr_node); - if (node != &sr->sr_node) - return -EEXIST; - - return 0; -} - -/* - * Create and add a tuple from a session, but with a wildcard source port. - * This tuple is subsequently expired when the SIP Request is expired. - */ -static int sip_alg_add_cntl_tuple(npf_session_t *se, npf_cache_t *npc) -{ - struct npf_alg_tuple *nt; - const struct npf_alg *sip = npf_alg_session_get_alg(se); - int rc; - - nt = npf_alg_tuple_alloc(); - if (!nt) - return -ENOMEM; - - nt->nt_se = se; - nt->nt_alg = sip; - nt->nt_ifx = npf_session_get_if_index(se); - nt->nt_flags = NPF_TUPLE_MATCH_ANY_SPORT | NPF_TUPLE_KEEP | - NPF_TUPLE_MULTIMATCH; - nt->nt_alg_flags = SIP_ALG_ALT_CNTL_FLOW; - nt->nt_proto = IPPROTO_UDP; - nt->nt_alen = 4; - - nt->nt_sport = 0; /* Any source port */ - nt->nt_srcip = *npf_cache_dstip(npc); - npf_nat_get_trans(npf_session_get_nat(se), &nt->nt_dstip, - &nt->nt_dport); - - rc = npf_alg_tuple_insert(sip_alg_instance(sip), nt); - if (rc) - npf_alg_tuple_free(nt); - else - npf_alg_session_set_flag(se, SIP_ALG_ALT_TUPLE_SET); - return rc; -} - -/* - * Add a control tuple if we are using SNAT. Cisco SIP Gateways send SIP - * response messages with a (per-call?) random source port. This sets up a - * tuple in the reverse direction (c/w with REQUEST) that matches on any - * source port. - */ -static int sip_alg_manage_cntl(npf_session_t *se, npf_cache_t *npc, - struct sip_alg_request *sr) -{ - struct sip_alg_session *ss; - uint32_t flags = npf_alg_session_get_flags(se); - - /* Already added? */ - if (flags & SIP_ALG_ALT_TUPLE_SET) - return 0; - - /* Only if this is a UDP connection. */ - if (npf_session_get_proto(se) != IPPROTO_UDP) - return 0; - - /* Only add from a CNTL session. */ - if (!(flags & SIP_ALG_CNTL_FLOW)) - return 0; - - ss = npf_alg_session_get_private(se); - if (!ss) - return 0; - - /* Only in forward direction */ - if (!sip_forw(sr)) - return 0; - - /* Currently only supports SNAT */ - if (!sip_is_snat(sr)) - return 0; - - return sip_alg_add_cntl_tuple(se, npc); -} - -static int sip_alg_private_session_init(npf_session_t *se) -{ - struct sip_alg_session *ss; - - ss = npf_alg_session_get_private(se); - if (ss) - return -EINVAL; - ss = calloc(sizeof(struct sip_alg_session), 1); - if (!ss) - return -ENOMEM; - npf_alg_session_set_private(se, ss); - - return 0; -} - -static void sip_alg_private_session_free(npf_session_t *se) -{ - struct sip_alg_session *ss; - - ss = npf_alg_session_get_private(se); - if (ss) - sip_expire_session_request(se); - free(ss); -} - -/* - * sip_alg_handle_error_response() - */ -static bool sip_alg_handle_error_response(const struct npf_alg *sip, - struct sip_alg_request *sr) -{ - /* - * These responses imply a failure and/or a future re-submit of the - * invite request, so delete the one we currently have and let - * the protocol try again. - */ - if (MSG_IS_STATUS_3XX(sr->sr_sip) || - MSG_IS_STATUS_4XX(sr->sr_sip) || - MSG_IS_STATUS_5XX(sr->sr_sip) || - MSG_IS_STATUS_6XX(sr->sr_sip)) { - sip_request_lookup_and_expire(sip, sr); - return true; - } - return false; -} - -/* - * sip_alg_verify() - Some cursory checks before dealing with this packet. - */ -static int sip_alg_verify(struct sip_alg_request *sr) -{ - /* - * We only check whether required headers have been - * parsed, we do not verify the contents. - */ - if (!sr->sr_sip->to) - return -EINVAL; - if (!sr->sr_sip->from) - return -EINVAL; - if (!sr->sr_sip->cseq) - return -EINVAL; - if (!sr->sr_sip->call_id) - return -EINVAL; - if (osip_list_size(&sr->sr_sip->vias) < 1) - return -EINVAL; - - return 0; -} - - -/* - * sip_alg_update_payload() - Update a packet payload - */ -static int sip_alg_update_payload(npf_session_t *se, npf_cache_t *npc, - const int di, struct rte_mbuf *nbuf, - struct sip_alg_request *tsr) -{ - char *payload; - char *sdp; - osip_body_t *body; - uint16_t new_plen; - char ebuf[64]; - size_t sz; - int rc; - - /* - * If we have an SDP, get the string and replace the body in - * the SIP. - */ - if (tsr->sr_sdp) { - rc = sdp_message_to_str(tsr->sr_sdp, &sdp); - if (rc) { - rc = -ENOMEM; - goto done; - } - - /* Replace this body at pos = 0 */ - rc = osip_message_get_body(tsr->sr_sip, 0, &body); - if (rc < 0) { - rc = -ENOENT; - osip_free(sdp); - goto done; - } - osip_free(body->body); - body->body = sdp; - body->length = strlen(sdp); - } - - osip_message_force_update(tsr->sr_sip); - rc = osip_message_to_str(tsr->sr_sip, &payload, &sz); - if (rc) { - rc = -ENOMEM; - goto done; - } - - new_plen = (uint16_t) sz; - - rc = npf_payload_update(se, npc, nbuf, payload, di, new_plen); - osip_free(payload); - -done: - if (rc) { - if (net_ratelimit()) - RTE_LOG(ERR, FIREWALL, - "NPF ALG: SIP payload update: %s\n", - strerror_r(-rc, ebuf, sizeof(ebuf))); - } - - return rc; -} - -/* - * sip_translate_addr_reqd() - Do we want to translate this addr? - */ -static inline bool sip_translate_addr_reqd(const char *addr, const char *oaddr) -{ - if (!addr || !oaddr) - return false; - - /* Only translate if the address matches the NAT target address */ - if (strcmp(addr, oaddr) != 0) - return false; - - return true; -} - -/* - * Only translate a port if it is present in the url, and is different from - * tport - */ -static inline bool sip_translate_port_reqd(const char *port, const char *tport) -{ - if (!port || !tport) - return false; - - if (strcmp(port, tport) == 0) - return false; - - return true; -} - -/* - * sip_alg_translate_url() - */ -static int sip_alg_translate_url(osip_uri_t *u, const char *oaddr, - const char *taddr, const char *port) -{ - if (!u) - return 0; - - if (!sip_translate_addr_reqd(u->host, oaddr)) - return 0; - - osip_free(u->host); - u->host = osip_strdup(taddr); - - /* translate the port if present */ - if (sip_translate_port_reqd(u->port, port)) { - osip_free(u->port); - u->port = osip_strdup(port); - } - return 0; -} - -/* - * sip_alg_translate_from - Translate a From header - */ -static int sip_alg_translate_from(struct sip_alg_request *tsr, - const char *taddr, const char *tport) -{ - osip_uri_t *from; - int rc = -1; - - from = osip_from_get_url(osip_message_get_from(tsr->sr_sip)); - if (from) - rc = sip_alg_translate_url(from, sip_oaddr(tsr), taddr, tport); - return rc; -} - -/* - * sip_alg_translate_to - Translate a To header - */ -static int sip_alg_translate_to(struct sip_alg_request *tsr, - const char *taddr, const char *tport) -{ - osip_uri_t *to; - int rc = -1; - - to = osip_from_get_url(osip_message_get_to(tsr->sr_sip)); - if (to) - rc = sip_alg_translate_url(to, sip_oaddr(tsr), taddr, tport); - return rc; -} - -/* - * sip_alg_translate_call_id - Translate a Call-Id header - */ -static int sip_alg_translate_call_id(struct sip_alg_request *tsr, - const char *addr) -{ - osip_call_id_t *cid = osip_message_get_call_id(tsr->sr_sip); - char *p; - - if (cid) { - p = osip_call_id_get_host(cid); - if (sip_translate_addr_reqd(p, sip_oaddr(tsr))) { - osip_free(p); - osip_call_id_set_host(cid, osip_strdup(addr)); - } - } - return 0; -} - -/* - * sip_alg_translate_user_agent() - Translate User-Agent header - */ -static int sip_alg_translate_user_agent(struct sip_alg_request *tsr, - const char *taddr) -{ - osip_header_t *h; - int rc = 0; - int n; - char *p; - - osip_message_get_user_agent(tsr->sr_sip, 0, &h); - if (h) { - /* - * This can contain anything. But we only need to - * replace the original address, if present, with - * the taddr - */ - p = strstr(h->hvalue, sip_oaddr(tsr)); - if (!p) - return 0; /* Nothing to do */ - - /* Ensure enough space */ - rc = strlen(h->hvalue) + strlen(taddr); - - char buf[rc + 1]; /* avoid 0 bounds VLA */ - - n = p - h->hvalue; - memset(buf, '\0', sizeof(buf)); - memcpy(buf, h->hvalue, n); - strcat(buf, taddr); - n += strlen(sip_oaddr(tsr)); - strcat(buf, &h->hvalue[n]); - p = osip_strdup(buf); - if (!p) - return -ENOMEM; - osip_free(h->hvalue); - h->hvalue = p; - rc = 0; - } - return rc; -} - -/* - * sip_alg_translate_via_addr - Translate a Via header address and/or port - */ -static int sip_alg_translate_via_addr(osip_via_t *v, const char *oaddr, - const char *taddr, const char *tport) -{ - char *p; - - p = osip_via_get_host(v); - if (!sip_translate_addr_reqd(p, oaddr)) - return 0; - - osip_free(p); - osip_via_set_host(v, osip_strdup(taddr)); - - p = osip_via_get_port(v); - if (sip_translate_port_reqd(p, tport)) { - osip_free(p); - osip_via_set_port(v, osip_strdup(tport)); - } - return 0; -} - -/* - * sip_alg_translate_via - Translate Via header(s) - */ -static int sip_alg_translate_via(struct sip_alg_request *tsr, - const char *taddr, const char *tport) -{ - osip_via_t *v = NULL; - int i = 0; - int rc = 0; - - while (osip_message_get_via(tsr->sr_sip, i, &v) >= 0) { - rc = sip_alg_translate_via_addr(v, sip_oaddr(tsr), - taddr, tport); - if (rc) - return rc; - i++; - } - return rc; -} - -/* - * sip_alg_translate_contact - Translate a Contact header - */ -static int sip_alg_translate_contact(struct sip_alg_request *tsr, - const char *taddr, const char *tport) -{ - osip_contact_t *c = NULL; - int i = 0; - int rc = 0; - - while (osip_message_get_contact(tsr->sr_sip, i, &c) >= 0) { - rc = sip_alg_translate_url(osip_contact_get_url(c), - sip_oaddr(tsr), taddr, tport); - if (rc) - return rc; - i++; - } - return rc; -} - -/* - * sip_alg_translate_record_route - Translate Record-Route header(s) - */ -static int sip_alg_translate_record_route(struct sip_alg_request *tsr, - const char *taddr, const char *tport) -{ - osip_record_route_t *rr; - int i = 0; - int rc = 0; - - while (osip_message_get_record_route(tsr->sr_sip, i, &rr) >= 0) { - rc = sip_alg_translate_url(osip_record_route_get_url(rr), - sip_oaddr(tsr), taddr, tport); - if (rc) - return rc; - i++; - } - return rc; -} - -/* - * sip_alg_translate_route - Translate a Route header - * - * The osip library parses either of the following two forms: - * - * "Route: ,\r\n" - * - * or - * - * "Route: \r\n" - * "Route: \r\n" - * - * It always generates the second form on output (translating the url as where - * relevant). - */ -static int sip_alg_translate_route(struct sip_alg_request *tsr, - const char *taddr, const char *tport) -{ - osip_route_t *r = NULL; - int i = 0; - int rc = 0; - - while (osip_message_get_route(tsr->sr_sip, i, &r) >= 0) { - rc = sip_alg_translate_url(osip_route_get_url(r), - sip_oaddr(tsr), taddr, tport); - if (rc) - return rc; - i++; - } - return rc; -} - -/* - * sip_alg_translate_request_uri - Translate a Request-Uri header - */ -static int sip_alg_translate_request_uri(struct sip_alg_request *tsr, - const char *taddr, const char *tport) -{ - osip_uri_t *r = tsr->sr_sip->req_uri; - - return sip_alg_translate_url(r, sip_oaddr(tsr), taddr, tport); -} - -/* - * Translate a generic SIP header that has not been parsed by the osip - * library. - * - * Replace NAT target address with the translation address. Also replaces the - * port if 1. a port is present in the url, and 2. the header address matched - * the target address. - */ -static int sip_alg_translate_header(osip_header_t *h, const char *oaddr, - const char *taddr, const char *tport) -{ - const char *p; - - if (!h) - return 0; - - /* - * If the header does not contain NAT target address then there is - * nothing to be done - */ - p = strstr(h->hvalue, oaddr); - if (!p) - return 0; - - size_t oaddr_len = strlen(oaddr); - size_t taddr_len = strlen(taddr); - size_t tport_len = tport ? strlen(tport) : 0; - size_t hval_len = strlen(h->hvalue); - - /* Ensure more than enough space */ - char buf[hval_len + taddr_len + tport_len + 1]; - char *insert_point = buf; - - /* copy part before oaddr */ - memcpy(insert_point, h->hvalue, p - h->hvalue); - insert_point += p - h->hvalue; - *insert_point = '\0'; - - /* insert taddr */ - strncat(insert_point, taddr, taddr_len); - insert_point += taddr_len; - - /* set p to point to just after oaddr */ - p += oaddr_len; - - /* - * replace port if tport specified by the caller and if a port is - * present in the header - */ - if (tport && *p == ':') { - uint hport; - const char *pp = p + 1; - - /* Look for a number at a point after the colon */ - if (sscanf(pp, "%5u", &hport) > 0 && hport <= 65535) { - char hport_str[6]; - char *hportp; - - /* - * convert number to string, and locate in the - * original header string - */ - snprintf(hport_str, sizeof(hport_str), "%u", hport); - hportp = strstr(pp, hport_str); - - /* - * Check that port string is immediately after the - * colon. Only replace if header port is different - * than tport. - */ - if (hportp == pp && strcmp(tport, hport_str)) { - /* insert colon and tport */ - strcat(insert_point, ":"); - insert_point += 1; - - strncat(insert_point, tport, tport_len); - insert_point += tport_len; - - /* - * set p to point just after the port in the - * original header string - */ - p = hportp + strlen(hport_str); - } - } - } - - /* - * copy part after oaddr (or after port, if present), and NULL - * terminate - */ - strcat(insert_point, p); - - /* replace hvalue */ - char *new = osip_strdup(buf); - if (!new) - return -ENOMEM; - - osip_free(h->hvalue); - h->hvalue = new; - - return 0; -} - -/* - * Translate all headers of the given name *if* the url contains oaddr. - */ -static int sip_alg_translate_header_byname(struct sip_alg_request *tsr, - const char *name, - const char *taddr, - const char *tport) -{ - osip_header_t *h; - const char *oaddr = sip_oaddr(tsr); - int i = 0; - int rc; - - while ((i = osip_message_header_get_byname(tsr->sr_sip, - name, - i, &h)) >= 0) { - rc = sip_alg_translate_header(h, oaddr, taddr, tport); - - if (rc < 0) - return rc; - i++; - } - return 0; -} - -/* - * Translate all P-asserted-identity headers - */ -static int sip_alg_translate_p_asserted_id(struct sip_alg_request *tsr, - const char *taddr, - const char *tport) -{ - return sip_alg_translate_header_byname(tsr, - "P-asserted-identity", - taddr, tport); -} - -/* - * Translate all P-preferred-identity headers - */ -static int sip_alg_translate_p_preferred_id(struct sip_alg_request *tsr, - const char *taddr, - const char *tport) -{ - return sip_alg_translate_header_byname(tsr, - "P-preferred-identity", - taddr, tport); -} - -/* - * sip_alg_get_sdp_attribute() - */ -static sdp_attribute_t *sip_alg_get_sdp_attribute(struct sip_alg_request *sr, - int pos, const char *name) -{ - int i = 0; - sdp_attribute_t *a; - - while ((a = sdp_message_attribute_get(sr->sr_sdp, pos, i)) != NULL) { - if (!strncmp(a->a_att_field, name, strlen(name))) - return a; - i++; - } - return NULL; -} - -/* - * sip_parse_rtcp() - Get port and (optional) addr from an rtcp attribute. - * - * Example RTCP SDP attributes: - * a=rtcp:53020 - * a=rtcp:53020 IN IP4 126.16.64.4 - * a=rtcp:53020 IN IP6 2001:2345:6789:ABCD:EF01:2345:6789:ABCD - * - * The "rtcp:" part is stripped off sip_alg_get_sdp_attribute, and the - * attribute value passed in here is of the form "53020 IN IP4 126.16.64.4". - * Note that only the port number is mandatory. - */ -static int sip_parse_rtcp(const char *value, - in_port_t *port, npf_addr_t *addr, uint8_t *alen) -{ - char *cport = NULL; - char *cnettype = NULL; - char *caddrtype = NULL; - char *caddr = NULL; - int i; - int rc = 0; - - i = sscanf(value, "%5ms %2ms %3ms %46ms", - &cport, &cnettype, &caddrtype, &caddr); - if (i > 0) { - *port = npf_port_from_str(cport); - /* Address is optional, verify if present */ - if (*port && (i == 4)) { - sip_addr_from_str(caddr, addr, alen); - if (!*alen) - rc = -EINVAL; - } - } - free(cport); - free(cnettype); - free(caddrtype); - free(caddr); - - return rc; -} - -/* - * sip_alg_set_rtcp_attribute() - Update "rtcp" attribute if present - */ -static int sip_alg_sdp_set_rtcp_attribute(struct sip_alg_request *sr, - int pos, npf_addr_t *taddr, uint8_t alen, in_port_t tport) -{ - sdp_attribute_t *a; - int rc = 0; /* Not an error if ENOENT */ - - /* Only if the rtcp port exists */ - if (!tport) - return 0; - - a = sip_alg_get_sdp_attribute(sr, pos, "rtcp"); - if (a) { - char *cport = NULL; - char *cnettype = NULL; - char *caddrtype = NULL; - char *caddr = NULL; - int i; - char value[70]; - char *naddr; - - naddr = sip_addr_to_str(taddr, alen); - if (!naddr) - return -ENOMEM; - - i = sscanf(a->a_att_value, "%5ms %2ms %3ms %46ms", - &cport, &cnettype, &caddrtype, &caddr); - - if (i <= 0) { - osip_free(naddr); - return -EINVAL; - } - - if (i == 1) - rc = snprintf(value, sizeof(value), "%hu", tport); - else if (i == 4) - rc = snprintf(value, sizeof(value), "%hu %s %s %s", - tport, cnettype, caddrtype, naddr); - else - rc = -EINVAL; - - free(cport); - free(cnettype); - free(caddrtype); - free(caddr); - osip_free(naddr); - - if (rc > 0 && (uint)rc < sizeof(value)) { - osip_free(a->a_att_value); - a->a_att_value = osip_strdup(value); - rc = 0; - } else { - rc = -ENOMEM; - } - } - return rc; -} - -/* - * sip_alg_sdp_get_rtcp_attribute() - Get the rtcp attribute params if present. - */ -static int sip_alg_sdp_get_rtcp_attribute(struct sip_alg_request *sr, - struct sip_alg_media *m, int pos) -{ - sdp_attribute_t *a; - int rc = 0; - - a = sip_alg_get_sdp_attribute(sr, pos, "rtcp"); - if (a) { - rc = sip_parse_rtcp(a->a_att_value, &m->m_rtcp_port, - &m->m_rtcp_addr, &m->m_rtcp_alen); - if (rc) - return rc; - } - - /* - * Now default for addr if not set. - */ - if (m->m_rtcp_port && IN6_IS_ADDR_UNSPECIFIED(&m->m_rtcp_addr)) { - m->m_rtcp_addr = m->m_rtp_addr; - m->m_rtcp_alen = m->m_rtp_alen; - } - - return rc; -} - -/* - * sip_alg_sdp_get_media_proto() - */ -static int sip_alg_sdp_get_media_proto(struct sip_alg_request *sr, int pos) -{ - char *proto = sdp_message_m_proto_get(sr->sr_sdp, pos); - - if (!proto) - return -1; - - if (strstr(proto, "UDP")) - return sdp_proto_udp; - else if (strstr(proto, "RTP")) - return sdp_proto_rtp; - return sdp_proto_unknown; -} - -/* - * sip_alg_set_rtcp_media() - Init/finalize media addr/ports - */ -static void sip_alg_set_rtcp_media(struct sip_alg_media *m) -{ - /* - * If an rtcp attribute was sent, use it - */ - if (IN6_IS_ADDR_UNSPECIFIED(&m->m_rtcp_addr)) { - m->m_rtcp_addr = m->m_rtp_addr; - m->m_rtcp_port = m->m_rtp_port + 1; - m->m_rtcp_alen = m->m_rtp_alen; - } - - m->m_trtcp_addr = m->m_rtcp_addr; - m->m_trtcp_port = m->m_rtcp_port; - m->m_trtcp_alen = m->m_rtcp_alen; -} - -/* - * sip_alg_set_dnat_rtcp_media() - Init/finalize media addr/ports - */ -static void sip_alg_dnat_rtcp_media(struct sip_alg_media *m, npf_nat_t *nat) -{ - in_port_t tmp; - bool do_rtcp = false; - - if (!IN6_IS_ADDR_UNSPECIFIED(&m->m_rtcp_addr) && - memcmp(&m->m_rtp_addr, &m->m_rtcp_addr, m->m_rtp_alen) == 0) - do_rtcp = true; - - /* Reset to original address */ - npf_nat_get_orig(nat, &m->m_rtp_addr, &tmp); - - /* - * If an rtcp attribute was sent, use it - */ - if (IN6_IS_ADDR_UNSPECIFIED(&m->m_rtcp_addr)) { - m->m_rtcp_addr = m->m_rtp_addr; - m->m_rtcp_port = m->m_rtp_port + 1; - m->m_rtcp_alen = m->m_rtp_alen; - m->m_trtcp_addr = m->m_trtp_addr; - m->m_trtcp_port = m->m_rtcp_port; - m->m_trtcp_alen = m->m_rtcp_alen; - } else { - m->m_trtcp_addr = m->m_rtcp_addr; - m->m_trtcp_port = m->m_rtcp_port; - m->m_trtcp_alen = m->m_rtcp_alen; - if (do_rtcp) - m->m_rtcp_addr = m->m_rtp_addr; - } -} - -/* - * sip_alg_reserve_ports() - */ -static int sip_alg_reserve_ports(npf_session_t *se, - struct sip_alg_media *m, npf_nat_t *ns) -{ - int n = 1; - bool start_even = false; - in_port_t port; - int rc; - npf_natpolicy_t *np = npf_nat_get_policy(ns); - uint32_t nat_flags = NPF_NAT_MAP_PORT; - npf_rule_t *rl = npf_nat_get_rule(ns); - vrfid_t vrfid = npf_session_get_vrfid(se); - - /* - * If we do not have an rtcp attribute, then we need - * to allocate 2 consecutive ports, starting on an even - * boundary. Otherwise, one port will do. - */ - if (m->m_proto == sdp_proto_rtp && - IN6_IS_ADDR_UNSPECIFIED(&m->m_rtcp_addr)) { - start_even = true; - n = 2; - } - - port = htons(m->m_trtp_port); - rc = npf_alg_reserve_translations(se, n, start_even, m->m_rtp_alen, - &m->m_trtp_addr, &port); - if (rc) - return rc; - m->m_trtp_port = ntohs(port); - m->m_rtp_reserved = true; - - /* If the proto is not rtp, we are done. */ - if (m->m_proto != sdp_proto_rtp) - return rc; - - /* - * If we didn't have an rtcp attribute, then - * default the rtcp members use the allocated port - * - * Otherwise, we have an rtcp attribute with an addr, - * (which may be the same as the rtp addr) - * and we need to get a distinct mapping for that - * addr/port pair - */ - if (IN6_IS_ADDR_UNSPECIFIED(&m->m_rtcp_addr)) { - m->m_rtcp_addr = m->m_rtp_addr; - m->m_rtcp_port = m->m_rtp_port+1; - m->m_rtcp_alen = m->m_rtp_alen; - - m->m_trtcp_port = ntohs(port) + 1; - m->m_trtcp_addr = m->m_trtp_addr; - m->m_trtcp_alen = m->m_trtp_alen; - m->m_rtcp_reserved = true; - } else { - /* - * If the rtcp addr is the same as the rtp addr, - * then we need to allocate a port. Otherwise, - * this is a remote host. - */ - if (!memcmp(&m->m_rtcp_addr, &m->m_rtp_addr, m->m_rtp_alen)) { - m->m_trtcp_addr = m->m_trtp_addr; - port = htons(m->m_rtcp_port); - rc = npf_nat_alloc_map(np, rl, nat_flags, - vrfid, &m->m_trtcp_addr, &port, 1); - if (rc) - return rc; - m->m_trtcp_port = ntohs(port); - m->m_trtcp_alen = m->m_rtcp_alen; - m->m_rtcp_reserved = true; - } else{ - m->m_trtcp_addr = m->m_rtcp_addr; - m->m_trtcp_port = m->m_rtcp_port; - m->m_trtcp_alen = m->m_rtcp_alen; - } - } - - return rc; -} - -static int sip_alg_parse_media_ports(struct sip_alg_media *m, - struct sip_alg_request *sr, int pos) -{ - char *cport = sdp_message_m_port_get(sr->sr_sdp, pos); - int rc = 0; - - /* Must have a port in the media */ - if (!cport) - return -EINVAL; - - switch (m->m_proto) { - case sdp_proto_udp: - m->m_rtp_port = npf_port_from_str(cport); - if (!m->m_rtp_port) - rc = -EINVAL; - break; - case sdp_proto_rtp: - m->m_rtp_port = npf_port_from_str(cport); - if (!m->m_rtp_port) { - rc = -EINVAL; - break; - } - /* Default rtcp port for inspect */ - if (sip_is_inspect(sr)) - m->m_rtcp_port = m->m_rtp_port+1; - /* Do we have an attribute rtcp port? */ - rc = sip_alg_sdp_get_rtcp_attribute(sr, m, pos); - break; - default: - rc = -EINVAL; - break; - } - - return rc; -} - -static int sip_alg_translate_media_connect(sdp_connection_t *c, - char *addr) -{ - /* - * Performs basic sanity as well. - * - * If we do not have a translation address, we are not - * translating this packet. Do nothing. - * - * Do not translate IPv6 - */ - if (!addr) - return 0; - - if (strcmp(c->c_nettype, "IN")) - return -EINVAL; - - if (!strcmp(c->c_addrtype, "IP6")) - return 0; - - if (strcmp(c->c_addrtype, "IP4")) - return -EINVAL; - - osip_free(c->c_addr); - c->c_addr = addr; - return 0; -} - -static int sip_alg_translate_media_port(struct sip_alg_request *sr, - int pos, in_port_t port) -{ - char *cport; - - cport = port_to_str(port); - if (!cport) - return -ENOMEM; - - if (sdp_message_m_port_set(sr->sr_sdp, pos, cport)) { - osip_free(cport); - return -EINVAL; - } - return 0; -} - -static int sip_alg_update_media(struct sip_alg_request *sr, - int pos, npf_addr_t *taddr, uint8_t alen, in_port_t port) -{ - sdp_connection_t *c; - int rc = 0; - char *addr; - - /* - * Update the connection ("c=") and media ("m=") with - * the translation address and port. - */ - c = sdp_message_connection_get(sr->sr_sdp, pos, 0); - - if (c) { - if (!sip_translate_addr_reqd(c->c_addr, sip_oaddr(sr))) - return 0; /* Nothing to do */ - - addr = sip_addr_to_str(taddr, alen); - if (!addr) - return -EINVAL; - - rc = sip_alg_translate_media_connect(c, addr); - if (rc) { - osip_free(addr); - return rc; - } - } else { - /* - * There is no media connection address. Only translate media - * port if session connection address matches the NAT target - * address. - */ - c = sdp_message_connection_get(sr->sr_sdp, -1, 0); - - if (!c || !sip_translate_addr_reqd(c->c_addr, sip_oaddr(sr))) - return 0; /* Nothing to do */ - } - - return sip_alg_translate_media_port(sr, pos, port); -} - -/* - * Translate the SDP session (global) connection address - */ -static void -sip_alg_update_session_media(struct sip_alg_request *sr) -{ - sdp_connection_t *c; - char *addr; - - c = sdp_message_connection_get(sr->sr_sdp, -1, 0); - if (!c) - return; - - if (!sip_translate_addr_reqd(c->c_addr, sip_oaddr(sr))) - return; - - addr = osip_strdup(sip_taddr(sr)); - if (addr) - sip_alg_translate_media_connect(c, addr); -} - -/* - * sip_alg_sdp_update_origin() - Update the "o=" field. - */ -static int sip_alg_sdp_update_origin(struct sip_alg_request *sr) -{ - char *nettype = sdp_message_o_nettype_get(sr->sr_sdp); - char *addrtype = sdp_message_o_addrtype_get(sr->sr_sdp); - - if (!nettype || strcmp(nettype, "IN")) - return -EINVAL; - - if (addrtype && !strcmp(addrtype, "IP6")) - return 0; /* Ignore IPv6 */ - - if (!addrtype || strcmp(addrtype, "IP4")) - return -EINVAL; /* Unknown/unsupported */ - - if (!sip_translate_addr_reqd(sr->sr_sdp->o_addr, sip_oaddr(sr))) - return 0; /* Nothing to do */ - - /* no api */ - osip_free(sr->sr_sdp->o_addr); - sr->sr_sdp->o_addr = osip_strdup(sip_taddr(sr)); - if (!sr->sr_sdp->o_addr) - return -ENOMEM; - return 0; -} - -/* - * Parse the sdp session (global) media address - */ -static int -sip_alg_parse_session_media_addr(struct sip_alg_request *sr, - npf_addr_t *addr, uint8_t *alen) -{ - sdp_connection_t *c; - int rc = -1; - - c = sdp_message_connection_get(sr->sr_sdp, -1, 0); - if (!c) - return 0; - - sip_addr_from_str(c->c_addr, addr, alen); - if (*alen) - rc = 0; - - return rc; -} - -/* - * sip_alg_parse_media_addr() - */ -static int sip_alg_parse_media_addr(struct sip_alg_media *m, - struct sip_alg_request *sr, int pos) -{ - sdp_connection_t *c; - int rc = -1; - - c = sdp_message_connection_get(sr->sr_sdp, pos, 0); - if (c) { - sip_addr_from_str(c->c_addr, &m->m_rtp_addr, &m->m_rtp_alen); - if (m->m_rtp_alen) - rc = 0; - } - return rc; -} - -/* - * sip_media_alloc() - Allocate a media struct - */ -static struct sip_alg_media *sip_media_alloc(npf_session_t *se, - struct sip_alg_request *sr, int pos) -{ - struct sip_alg_media *m; - npf_nat_t *nat = npf_session_get_nat(se); - - m = calloc(1, sizeof(struct sip_alg_media)); - if (!m) - return NULL; - - CDS_INIT_LIST_HEAD(&m->m_list); - m->m_np = npf_nat_get_policy(nat); - if (m->m_np) { - m->m_rl = npf_nat_get_rule(nat); - m->m_nat_flags = npf_nat_get_map_flags(nat); - m->m_vrfid = npf_session_get_vrfid(se); - } - - m->m_proto = sip_alg_sdp_get_media_proto(sr, pos); - m->m_type = sip_nat_type(sr); - return m; -} - -static bool sip_do_translate(const struct sip_alg_request *sr) -{ - switch (sip_nat_type(sr)) { - case sip_nat_snat: - if (sip_forw(sr)) - return true; - break; - case sip_nat_dnat: - if (!sip_forw(sr)) - return true; - break; - default: /* Hush up gcc */ - break; - } - - return false; -} - -/* - * sip_media_translations() - Get media translations if needed. - */ -static int sip_media_translations(npf_session_t *se, - struct sip_alg_media *m, struct sip_alg_request *sr, - npf_nat_t *nat) -{ - int rc = 0; - - /* Set defaults for translation addrs/ports. */ - m->m_trtp_port = m->m_rtp_port; - m->m_trtp_addr = m->m_rtp_addr; - m->m_trtp_alen = m->m_rtp_alen; - - /* If IPv6 or inspection */ - if (m->m_rtp_alen > 4 || sip_is_inspect(sr)) { - sip_alg_set_rtcp_media(m); - return rc; - } - - /* - * Handle both INVITE and OK's for both SNAT and DNAT. - * Only SNAT must reserve ports. - */ - switch (sip_nat_type(sr)) { - case sip_nat_snat: - if (sip_forw(sr)) - rc = sip_alg_reserve_ports(se, m, nat); - else - sip_alg_set_rtcp_media(m); - break; - case sip_nat_dnat: - if (sip_forw(sr)) - sip_alg_set_rtcp_media(m); - else - sip_alg_dnat_rtcp_media(m, nat); - break; - default: - return -EINVAL; - } - return rc; -} - -static int sip_alg_translate_media(struct sip_alg_request *sr, - struct sip_alg_media *m, int pos) -{ - int rc; - - /* - * N.B.: If this is dnat, then this 'm' is generated off - * a response from the server. This means that we received the - * dnat translation port. IOW, we need to do a 'reverse' translation - * on this msg. - * - * So make sure we re-write the packet with the correct port. - */ - if (sip_is_snat(sr)) - rc = sip_alg_update_media(sr, pos, &m->m_trtp_addr, - m->m_trtp_alen, m->m_trtp_port); - else - rc = sip_alg_update_media(sr, pos, &m->m_rtp_addr, - m->m_rtp_alen, m->m_rtp_port); - if (!rc) { - if (sip_is_snat(sr)) - rc = sip_alg_sdp_set_rtcp_attribute(sr, pos, - &m->m_trtcp_addr, m->m_trtcp_alen, - m->m_trtcp_port); - else - rc = sip_alg_sdp_set_rtcp_attribute(sr, pos, - &m->m_rtcp_addr, m->m_rtcp_alen, - m->m_rtcp_port); - } - return rc; -} - -/* - * calculate L4 + L3 checksum deltas. - */ -static void -sip_calculate_checksum_deltas(const void *oaddr, const void *naddr, - uint16_t oport, uint16_t nport, - uint16_t *l3_delta, uint16_t *l4_delta) -{ - const uint32_t *oip32 = oaddr; - const uint32_t *nip32 = naddr; - - uint16_t delta = ip_fixup32_cksum(0xffff, *oip32, *nip32); - *l3_delta = delta ^ 0xffff; - - delta = ip_fixup16_cksum(0xffff, oport, nport); - *l4_delta = delta ^ 0xffff; -} - -/* - * sip_parse_reply_path() - Parse the first VIA for reply path parameters. - */ -static int sip_parse_reply_path(struct sip_alg_request *sr, npf_session_t *se) -{ - struct sip_alg_session *ss = npf_alg_session_get_private(se); - int rc; - - /* - * The SIP RFC states that responses are always routed to the - * VIA path. In the case of newer Cicso phones, a high number - * sport is used with replies expected on the SIP default port - * (5060). See the inspect and nat routines for more details. - * - * This means we may need to translate all replies after - * receiving the first msg. So grab the needed addr/port here and - * save it in the session handle. - */ - - if (!ss) - return -ENOENT; - - rc = 0; - if (!ss->ss_via_port) { - osip_via_t *v = NULL; - - ss->ss_ifx = npf_session_get_if_index(se); - osip_message_get_via(sr->sr_sip, 0, &v); - if (v) { - ss->ss_via_port = htons(npf_port_from_str( - osip_via_get_port(v))); - if (!ss->ss_via_port) - return -EINVAL; - - /* - * Note, this my fail if the VIA address is a FQDN, in - * which case ss_via_alen will be left at 0. - */ - ss->ss_via_alen = 0; - sip_addr_from_str(osip_via_get_host(v), - &ss->ss_via_addr, &ss->ss_via_alen); - rc = 0; - - } else - rc = -EINVAL; - } - return rc; -} - -/* - * sip_alg_manage_media() - Parse a media line, reserve addr/port and - */ -static int sip_alg_manage_media(npf_session_t *se, npf_nat_t *nat, - struct sip_alg_request *sr) -{ - int rc; - int pos; - struct sip_alg_media *m; - - /* - * Update session connection - */ - if (sip_is_snat(sr)) { - rc = sip_alg_sdp_update_origin(sr); - if (rc) - return rc; - } - - npf_addr_t s_rtp_addr; - uint8_t s_rtp_alen = 0; - - sip_alg_parse_session_media_addr(sr, &s_rtp_addr, &s_rtp_alen); - - for (pos = 0; !sdp_message_endof_media(sr->sr_sdp, pos) && - pos < SDP_MAX_MEDIA; pos++) { - - rc = -ENOMEM; - m = sip_media_alloc(se, sr, pos); - if (!m) - goto bad; - - rc = sip_alg_parse_media_addr(m, sr, pos); - if (rc) { - /* No media addr. Use SDP session addr */ - if (!s_rtp_alen) - goto bad; - m->m_rtp_addr = s_rtp_addr; - m->m_rtp_alen = s_rtp_alen; - } - - rc = sip_alg_parse_media_ports(m, sr, pos); - if (rc) - goto bad; - - rc = sip_media_translations(se, m, sr, nat); - if (rc) - goto bad; - - if (sip_do_translate(sr)) { - rc = sip_alg_translate_media(sr, m, pos); - if (rc) - goto bad; - } - - cds_list_add_tail(&sr->sr_media, &m->m_list); - } - - /* - * Translate the session connection address - */ - if (s_rtp_alen && sip_do_translate(sr)) - sip_alg_update_session_media(sr); - - return 0; -bad: - sip_media_free(m); - return rc; -} - -/* - * sip_tuple_data_free() - ALG tuple callback for tuple private data. - */ -static void sip_tuple_data_free(void *data) -{ - struct sip_tuple_data *td = data; - - if (td) - sip_tuple_data_put(td); -} - -static void sip_alg_tuple_init(struct npf_alg_tuple *nt, npf_session_t *se, - void *data, uint8_t alen) -{ - const struct npf_alg *sip = npf_alg_session_get_alg(se); - - /* session ref count is incremented by apt_insert_tuple */ - nt->nt_se = se; - nt->nt_alg = sip; - nt->nt_ifx = npf_session_get_if_index(se); - nt->nt_flags = NPF_TUPLE_MATCH_ALL; - nt->nt_timeout = 10; - nt->nt_proto = IPPROTO_UDP; - nt->nt_alen = alen; - - if (data) { - nt->nt_data = data; - nt->nt_reap = sip_tuple_data_free; - } -} - -static int sip_alloc_tuple_pair(struct npf_alg_tuple **forward, - struct npf_alg_tuple **reverse) -{ - *reverse = NULL; - *forward = npf_alg_tuple_alloc(); - if (!*forward) - goto bad; - - *reverse = npf_alg_tuple_alloc(); - if (!*reverse) - goto bad; - - return 0; - -bad: - npf_alg_tuple_free(*forward); - npf_alg_tuple_free(*reverse); - *forward = NULL; - *reverse = NULL; - return -ENOMEM; -} - -/* - * sip_alg_create_rtcp_tuple() - */ -static void sip_alg_create_rtcp_tuples(npf_session_t *se, npf_cache_t *npc, - struct sip_tuple_data *td) -{ - int rc; - struct sip_alg_media *mi = td->td_mi; - struct sip_alg_media *mr = td->td_mr; - const struct npf_alg *sip = npf_alg_session_get_alg(se); - struct npf_alg_tuple *forward; - struct npf_alg_tuple *reverse; - - /* - * If the rtcp ports are zero, we have nothing to do. - */ - if (!mi->m_rtcp_port || !mr->m_rtcp_port) - return; - - /* - * If this is a UDP SDP proto, then we are done. - */ - if (mi->m_proto == sdp_proto_udp) - return; - - /* Allocate a pair of tuples */ - if (sip_alloc_tuple_pair(&forward, &reverse)) - return; - - /* Common init */ - sip_alg_tuple_init(forward, se, td, npc->npc_alen); - sip_alg_tuple_init(reverse, se, td, npc->npc_alen); - forward->nt_alg_flags = SIP_ALG_RTCP_FLOW; - reverse->nt_alg_flags = SIP_ALG_RTCP_FLOW; - - /* Set ports/addrs/flags */ - switch (td_nat_type(td)) { - case sip_nat_snat: - forward->nt_alg_flags |= SIP_ALG_NAT; - forward->nt_srcip = mi->m_rtcp_addr; - forward->nt_sport = htons(mi->m_rtcp_port); - forward->nt_dstip = mr->m_trtcp_addr; - forward->nt_dport = htons(mr->m_trtcp_port); - - reverse->nt_alg_flags |= SIP_ALG_NAT; - reverse->nt_srcip = mr->m_rtcp_addr; - reverse->nt_sport = htons(mr->m_rtcp_port); - reverse->nt_dstip = mi->m_trtcp_addr; - reverse->nt_dport = htons(mi->m_trtcp_port); - break; - case sip_nat_dnat: - forward->nt_alg_flags |= SIP_ALG_NAT; - forward->nt_srcip = mi->m_rtcp_addr; - forward->nt_sport = htons(mi->m_rtcp_port); - forward->nt_dstip = mr->m_rtcp_addr; - forward->nt_dport = htons(mr->m_rtcp_port); - - reverse->nt_alg_flags |= SIP_ALG_NAT; - reverse->nt_srcip = mr->m_trtcp_addr; - reverse->nt_sport = htons(mr->m_trtcp_port); - reverse->nt_dstip = mi->m_rtcp_addr; - reverse->nt_dport = htons(mi->m_rtcp_port); - break; - case sip_nat_inspect: - forward->nt_srcip = mi->m_rtcp_addr; - forward->nt_sport = htons(mi->m_rtcp_port); - forward->nt_dstip = mr->m_rtcp_addr; - forward->nt_dport = htons(mr->m_rtcp_port); - - reverse->nt_srcip = mr->m_rtcp_addr; - reverse->nt_sport = htons(mr->m_rtcp_port); - reverse->nt_dstip = mi->m_rtcp_addr; - reverse->nt_dport = htons(mi->m_rtcp_port); - break; - default: - npf_alg_tuple_free(forward); - npf_alg_tuple_free(reverse); - sip_tuple_data_put(td); - return; - } - - sip_tuple_data_get(td); - - rc = npf_alg_tuple_add_replace(sip_alg_instance(sip), forward); - if (rc) { - npf_alg_tuple_free(forward); - npf_alg_tuple_free(reverse); - sip_tuple_data_put(td); - return; - } - - /* Now deal with the reverse tuple */ - npf_alg_tuple_pair(forward, reverse); - sip_tuple_data_get(td); - reverse->nt_alg_flags |= SIP_ALG_REVERSE; - - rc = npf_alg_tuple_add_replace(sip_alg_instance(sip), reverse); - if (rc) { - npf_alg_tuple_unpair(reverse); - npf_alg_tuple_expire(forward); - npf_alg_tuple_free(reverse); - sip_tuple_data_put(td); - } -} - -/* - * sip_alg_create_rtp_tuples() - Create the RTP or UDP tuples. Note that this - * traffic is di-directional, so we need to create - * one for each possible direction. - */ -static int sip_alg_create_rtp_tuples(npf_session_t *se, - const struct npf_alg *sip, - struct sip_alg_request *sr, struct sip_alg_media *mi, - struct sip_alg_media *mr) -{ - struct sip_tuple_data *td = NULL; - int rc; - struct npf_alg_tuple *forward; - struct npf_alg_tuple *reverse; - - /* Allocate a pair of tuples */ - if (sip_alloc_tuple_pair(&forward, &reverse)) { - sip_media_free(mi); - sip_media_free(mr); - return -ENOMEM; - } - - /* - * Set a private data field for the rtp/udp tuples. This flow - * will create tuples for the rtcp flow if needed. - * - * allocated ports may be reclaimed when the tuples are deleted. - */ - - td = sip_tuple_data_alloc(sip, sr, mi, mr); - if (!td) { - sip_media_free(mi); - sip_media_free(mr); - npf_alg_tuple_free(forward); - npf_alg_tuple_free(reverse); - return -ENOMEM; - } - - /* Common init */ - sip_alg_tuple_init(forward, se, td, mi->m_rtp_alen); - sip_alg_tuple_init(reverse, se, td, mi->m_rtp_alen); - forward->nt_alg_flags = SIP_ALG_RTP_FLOW; - reverse->nt_alg_flags = SIP_ALG_RTP_FLOW; - - /* Set ports/addrs/flags */ - switch (td_nat_type(td)) { - case sip_nat_snat: - forward->nt_alg_flags |= SIP_ALG_NAT; - forward->nt_sport = htons(mi->m_rtp_port); - forward->nt_srcip = mi->m_rtp_addr; - forward->nt_dport = htons(mr->m_rtp_port); - forward->nt_dstip = mr->m_rtp_addr; - - reverse->nt_alg_flags |= SIP_ALG_NAT; - reverse->nt_sport = htons(mr->m_trtp_port); - reverse->nt_srcip = mr->m_trtp_addr; - reverse->nt_dport = htons(mi->m_trtp_port); - reverse->nt_dstip = mi->m_trtp_addr; - break; - case sip_nat_dnat: - forward->nt_alg_flags |= SIP_ALG_NAT; - forward->nt_sport = htons(mi->m_rtp_port); - forward->nt_srcip = mi->m_rtp_addr; - forward->nt_dport = htons(mr->m_rtp_port); - forward->nt_dstip = mr->m_rtp_addr; - - reverse->nt_alg_flags |= SIP_ALG_NAT; - reverse->nt_sport = htons(mr->m_trtp_port); - reverse->nt_srcip = mr->m_trtp_addr; - reverse->nt_dport = htons(mi->m_rtp_port); - reverse->nt_dstip = mi->m_rtp_addr; - break; - case sip_nat_inspect: - forward->nt_sport = htons(mi->m_rtp_port); - forward->nt_srcip = mi->m_rtp_addr; - forward->nt_dport = htons(mr->m_rtp_port); - forward->nt_dstip = mr->m_rtp_addr; - - reverse->nt_sport = htons(mr->m_rtp_port); - reverse->nt_srcip = mr->m_rtp_addr; - reverse->nt_dport = htons(mi->m_rtp_port); - reverse->nt_dstip = mi->m_rtp_addr; - break; - default: - sip_tuple_data_put(td); - npf_alg_tuple_free(forward); - npf_alg_tuple_free(reverse); - return -EINVAL; - } - - rc = npf_alg_tuple_add_replace(sip_alg_instance(sip), forward); - if (rc) { - sip_tuple_data_put(td); - npf_alg_tuple_free(forward); - npf_alg_tuple_free(reverse); - return rc; - } - - /* Now deal with the reverse tuple */ - npf_alg_tuple_pair(forward, reverse); - sip_tuple_data_get(td); - reverse->nt_alg_flags |= SIP_ALG_REVERSE; - - rc = npf_alg_tuple_add_replace(sip_alg_instance(sip), reverse); - if (rc) { - sip_tuple_data_put(td); - npf_alg_tuple_unpair(reverse); - npf_alg_tuple_expire(forward); - npf_alg_tuple_free(reverse); - } - - return rc; -} - -/* - * sip_alg_resolve_media() - sync up invite/response and create tuples. - */ -static int sip_alg_resolve_media(npf_session_t *se, - struct sip_alg_request *invite, - struct sip_alg_request *response) -{ - int rc = 0; - int pos; - const struct npf_alg *sip = npf_alg_session_get_alg(se); - int size = sip_media_count(&invite->sr_media); - - /* - * If the invite and response port lists are different sizes, - * then we had a bad SDP packet in either - They must be the - * same size. - */ - if (size != sip_media_count(&response->sr_media)) - return -1; - - /* - * Prepare for creating tuples out of each media definition - * from the invite and response. - */ - for (pos = 0; pos < size; pos++) { - struct sip_alg_media *i; - struct sip_alg_media *r; - - i = cds_list_first_entry(&invite->sr_media, - struct sip_alg_media, m_list); - cds_list_del(&i->m_list); - - r = cds_list_first_entry(&response->sr_media, - struct sip_alg_media, m_list); - cds_list_del(&r->m_list); - - /* This consumes the medias */ - rc = sip_alg_create_rtp_tuples(se, sip, invite, i, r); - if (rc) - break; - } - - return rc; -} - -/* - * sip_alg_translate_snat() - Translate for SNAT. - */ -static int sip_alg_translate_snat(struct sip_alg_request *tsr, bool forw, - const char *taddr, const char *tport) -{ - /* - * Translation fields depend upon both stream - * direction and msg type. - */ - if (MSG_IS_REQUEST(tsr->sr_sip) && forw) { - if (sip_alg_translate_from(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_user_agent(tsr, taddr)) - return -1; - if (sip_alg_translate_call_id(tsr, taddr)) - return -1; - if (sip_alg_translate_via(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_contact(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_record_route(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_route(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_p_asserted_id(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_p_preferred_id(tsr, taddr, tport)) - return -1; - } else if (MSG_IS_REQUEST(tsr->sr_sip) && !forw) { - if (sip_alg_translate_request_uri(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_to(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_call_id(tsr, taddr)) - return -1; - } else if (MSG_IS_RESPONSE(tsr->sr_sip) && forw) { - if (sip_alg_translate_to(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_contact(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_record_route(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_from(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_call_id(tsr, taddr)) - return -1; - if (sip_alg_translate_via(tsr, taddr, tport)) - return -1; - } else if (MSG_IS_RESPONSE(tsr->sr_sip) && !forw) { - if (sip_alg_translate_from(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_call_id(tsr, taddr)) - return -1; - if (sip_alg_translate_via(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_record_route(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_route(tsr, taddr, tport)) - return -1; - } - return 0; -} - -/* - * sip_alg_translate_dnat() - Translate for DNAT. - */ -static int sip_alg_translate_dnat(struct sip_alg_request *tsr, bool forw, - const char *taddr, const char *tport) -{ - /* - * Translation fields depend upon both stream - * direction and msg type. - */ - if (MSG_IS_REQUEST(tsr->sr_sip) && forw) { - if (sip_alg_translate_request_uri(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_to(tsr, taddr, tport)) - return -1; - } else if (MSG_IS_REQUEST(tsr->sr_sip) && !forw) { - if (sip_alg_translate_request_uri(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_contact(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_to(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_p_asserted_id(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_p_preferred_id(tsr, taddr, tport)) - return -1; - } else if (MSG_IS_RESPONSE(tsr->sr_sip) && forw) { - if (sip_alg_translate_to(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_record_route(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_route(tsr, taddr, tport)) - return -1; - } else if (MSG_IS_RESPONSE(tsr->sr_sip) && !forw) { - if (sip_alg_translate_request_uri(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_to(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_contact(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_record_route(tsr, taddr, tport)) - return -1; - if (sip_alg_translate_route(tsr, taddr, tport)) - return -1; - } - return 0; -} - -/* - * sip_alg_translate_message() - Translate a sip msg. - */ -static int sip_alg_translate_message(const struct npf_alg *sip, - struct sip_alg_request *sr, struct sip_alg_request **_tsr) -{ - int rc; - struct sip_alg_request *tsr; - - *_tsr = NULL; - - tsr = sip_alg_request_alloc(false, sr->sr_if_idx); - if (!tsr) - return -ENOMEM; - - /* - * Clone the SIP and SDP messages. - */ - rc = osip_message_clone(sr->sr_sip, &tsr->sr_sip); - if (rc) - goto bad; - - memcpy(&tsr->sr_nat, &sr->sr_nat, sizeof(struct sip_nat)); - - if (sr->sr_sdp) { - rc = sdp_message_clone(sr->sr_sdp, &tsr->sr_sdp); - if (rc) - goto bad; - } - - if (sip_is_snat(tsr)) { - rc = sip_alg_translate_snat(tsr, sip_forw(tsr), - sip_taddr(tsr), sip_tport(tsr)); - } else if (sip_is_dnat(tsr)) { - rc = sip_alg_translate_dnat(tsr, sip_forw(tsr), - sip_taddr(tsr), sip_tport(tsr)); - } - if (rc) - goto bad; - - *_tsr = tsr; - - return 0; -bad: - sip_alg_request_free(sip, tsr); - return rc; -} - -/* - * Add the call id on the session handle private data. - * We will expire these then the session handle is expired. - * - * Note this is non-fatal if we cannot add it. All it means is - * that the INVITES will timeout/expire. - */ -static void sip_alg_add_session_call_id(npf_session_t *se, - struct sip_alg_request *sr) -{ - osip_call_id_t *cid; - struct sip_alg_session *ss; - int i; - size_t sz; - - /* Only CNTL sessions have private data */ - ss = npf_alg_session_get_private(se); - if (!ss) - return; - - cid = osip_message_get_call_id(sr->sr_sip); - if (!cid) - return; - - /* Only add unique, ignore re-transmissions... */ - for (i = 0; i < ss->ss_call_id_count; i++) { - if (osip_call_id_match(cid, ss->ss_call_ids[i]) - == OSIP_SUCCESS) - return; - } - - sz = sizeof(osip_call_id_t *) * (ss->ss_call_id_count + 1); - ss->ss_call_ids = realloc(ss->ss_call_ids, sz); - if (!ss->ss_call_ids) - return; - if (osip_call_id_clone(cid, &ss->ss_call_ids[ss->ss_call_id_count]) != - OSIP_SUCCESS) - return; - ss->ss_call_id_count++; - return; -} - -/* - * Verify that the call ID in 'sr' matches the given session. - */ -static bool sip_alg_verify_session_call_id(npf_session_t *se, - struct sip_alg_request *sr) -{ - osip_call_id_t *cid; - struct sip_alg_session *ss; - int i; - - /* Only CNTL sessions have private data */ - ss = npf_alg_session_get_private(se); - if (!ss) - return false; - - cid = osip_message_get_call_id(sr->sr_sip); - if (!cid) - return false; - - for (i = 0; i < ss->ss_call_id_count; i++) { - if (osip_call_id_match(cid, ss->ss_call_ids[i]) - == OSIP_SUCCESS) - return true; - } - return false; -} - -/* - * Manage all SIP requests. If appropriate, add - * an INVITE to the sip hash table. - * - * Note that all call-ids for INVITEs are added to the - * CNTL session handle, and are expired (if they exist) - * when the session handle is expired. - * - * We will also add the ALT CNTL tuple in here, but only - * once. - */ -static int sip_manage_request(npf_session_t *se, - npf_cache_t *npc, - struct sip_alg_request *sr, - struct sip_alg_request *tsr, - npf_nat_t *nat, bool *consumed) -{ - const struct npf_alg *sip = npf_alg_session_get_alg(se); - int rc; - - /* Set per-packet info */ - npc->npc_alg_flags = SIP_NPC_REQUEST; - - if (MSG_IS_CANCEL(tsr->sr_sip) || MSG_IS_BYE(tsr->sr_sip)) { - sip_request_lookup_and_expire(sip, tsr); - return 0; - } - - /* - * Get the reply path from the VIA header, must be present. Note that - * this call returns an error for Requests in the reverse direction, - * hence we handle the CANCEL and BYE Requests above. - */ - rc = sip_parse_reply_path(sr, se); - if (rc) - return rc; - - /* This will only add the alt cntl tuple once */ - rc = sip_alg_manage_cntl(se, npc, tsr); - if (rc) - return rc; - - /* Either parse and add the INVITE, or handle a BYE/etc */ - if (MSG_IS_INVITE(tsr->sr_sip)) { - if (!sr->sr_sdp) - return -EINVAL; - - rc = sip_alg_manage_media(se, nat, tsr); - if (rc) - return rc; - - rc = sip_alg_add_invite(sip, tsr); - if (!rc) { - sip_alg_add_session_call_id(se, tsr); - *consumed = true; - } - - } - - return rc; -} - -/* - * Manage all sip responses. - * - * Here we associate the previously received INVITE (if applicable) with this - * 200 or 183 response, and eventually create the rtp tuples for the media - * flows. - * - * A '183 Session Progress' may be used if early media (RTP traffic before the - * call is answered) is present. This response (like the '200 OK') includes - * an SDP message part containing media information. - * - * The first response received containing SDP media information is used to - * create the RTP/RTCP tuples. After that, we can expire the SIP Request - * message that we have been holding onto. - * - * However if the original Invite is expired upon receipt of a 183 message - * then we still need to ensure that we translate the SDP media fields in the - * '200 OK' message. - * - * (Early media typically includes dial tones and/or recorded messages.) - * - * Note we also do some sanity checking on this response and we can return an - * error to drop the packet. - */ -static int sip_manage_response(npf_session_t *se, npf_cache_t *npc, - struct sip_alg_request *sr, struct sip_alg_request *tsr, - npf_nat_t *nat) - -{ - struct sip_alg_request *osr; - const struct npf_alg *sip = npf_alg_session_get_alg(se); - int rc; - - /* Set per-packet info */ - npc->npc_alg_flags = SIP_NPC_RESPONSE; - - /* - * Handle all error responses now, no need to continue - * if this is an error response. - */ - if (sip_alg_handle_error_response(sip, tsr)) - return 0; - - /* - * If this is a '200 Ok', or a '183 Session Progress', then we - * may need to resolve the media flows for this sip call. - */ - rc = 0; - if (MSG_IS_RESPONSE_FOR(tsr->sr_sip, "INVITE") && - (MSG_TEST_CODE(tsr->sr_sip, 200) || - MSG_TEST_CODE(tsr->sr_sip, 183))) { - - /* - * We are only interested in backwards Responses, - * eg: reply to a forward request - */ - if (sip_forw(tsr)) - return 0; - - /* ignore non-sdp 200 responses */ - if (!sr->sr_sdp && MSG_TEST_CODE(tsr->sr_sip, 200)) - return 0; - - /* But not 183's... */ - if (!sr->sr_sdp && MSG_TEST_CODE(tsr->sr_sip, 183)) - return -EINVAL; - - osr = sip_request_lookup(sip, sr); - if (osr) { - /* Translate SDP media fields */ - rc = sip_alg_manage_media(se, nat, tsr); - if (!rc) - rc = sip_alg_resolve_media(se, osr, tsr); - - /* Always expire the INVITE, UA can resend */ - sip_request_expire(osr); - } else { - /* - * The original INVITE may have been resolved by a 183 - * Response. If so, we still need to translate the - * SDP media in the '200 OK' message. Verify the - * call-ID matches the session before doing so. - */ - if (!MSG_TEST_CODE(tsr->sr_sip, 200)) - return 0; - - if (!sip_alg_verify_session_call_id(se, sr)) - return 0; - - rc = sip_alg_manage_media(se, nat, tsr); - } - } - - return rc; -} - -/* - * sip_alg_manage_sip() - Manage the SIP message. - * Used both both nat and inspect. - */ -static int sip_alg_manage_sip(npf_session_t *se, npf_cache_t *npc, - struct sip_alg_request *sr, struct sip_alg_request *tsr, - npf_nat_t *nat, bool *consumed) -{ - int rc = -EINVAL; - - /* - * Handle (thus far) valid requests and responses, - * all garbage will result in a drop packet. - */ - if (MSG_IS_REQUEST(tsr->sr_sip)) - rc = sip_manage_request(se, npc, sr, tsr, nat, consumed); - else if (MSG_IS_RESPONSE(tsr->sr_sip)) - rc = sip_manage_response(se, npc, sr, tsr, nat); - - return rc; -} - -/* - * sip_alg_manage_packet() - manage and translate SIP packets - */ -static int sip_alg_manage_packet(npf_session_t *se, struct sip_alg_request *sr, - npf_cache_t *npc, struct rte_mbuf *nbuf, npf_nat_t *nat) -{ - struct sip_alg_request *tsr = NULL; - const struct npf_alg *sip = npf_alg_session_get_alg(se); - int rc; - bool consumed = false; - - rc = sip_alg_translate_message(sip, sr, &tsr); - if (rc) - goto done; - - rc = sip_alg_manage_sip(se, npc, sr, tsr, nat, &consumed); - if (rc) - goto done; - - rc = sip_alg_update_payload(se, npc, sip_di(tsr), nbuf, tsr); - if (rc) { - if (consumed) { - sip_request_lookup_and_expire(sip, tsr); - consumed = false; - } - } - -done: - if (!consumed) - sip_alg_request_free(sip, tsr); - - sip_alg_request_free(sip, sr); - - return rc; -} - -/* - * sip_init_nat() - Init the 'nat' params for this request - */ -static void sip_init_nat(struct sip_alg_request *sr, bool forw, - const npf_addr_t *taddr, const npf_addr_t *oaddr, - uint8_t alen, in_port_t tport, const int di) -{ - struct sip_nat *sn = &sr->sr_nat; - int rc; - - /* Port and addr from nat struct for CNTL session */ - rc = snprintf(sn->sn_tport, 8, "%hu", ntohs(tport)); - if (rc < 0 || rc >= 8) - return; - - if (taddr) { - inet_ntop(AF_INET, taddr, sn->sn_taddr, sizeof(sn->sn_taddr)); - sn->sn_alen = alen; - } - if (oaddr) - inet_ntop(AF_INET, oaddr, sn->sn_oaddr, sizeof(sn->sn_oaddr)); - sn->sn_di = di; - sn->sn_forw = forw; - - if (taddr) { - if (di == PFIL_IN && forw) - sn->sn_type = sip_nat_dnat; - else if (di == PFIL_OUT && forw) - sn->sn_type = sip_nat_snat; - else if (di == PFIL_IN && !forw) - sn->sn_type = sip_nat_snat; - else if (di == PFIL_OUT && !forw) - sn->sn_type = sip_nat_dnat; - } else { - sn->sn_type = sip_nat_inspect; - } -} - -/* - * sip_alg_translate_packet() - */ -static int sip_alg_translate_packet(npf_session_t *se, npf_cache_t *npc, - npf_nat_t *ns, struct rte_mbuf *nbuf, const int di) -{ - npf_addr_t taddr; - const struct npf_alg *sip = npf_alg_session_get_alg(se); - in_port_t tport; - npf_addr_t oaddr; - in_port_t oport; - bool forw; - struct sip_alg_request *sr; - - /* Don't manipulate (TCP) packets w/o data */ - if (!npf_payload_len(npc)) - return 0; - - /* - * Parsed msg may have been placed into session provate data by tuple - * inspect - */ - sr = sip_alg_parse(sip, npc, npf_session_get_if_index(se), nbuf); - if (!sr) - return -EINVAL; - - if (sip_alg_verify(sr)) { - sip_alg_request_free(sip, sr); - return -EINVAL; - } - - (void) npf_session_retnat(se, di, &forw); - - /* - * We need both sets of addrs, in opposite order - */ - if (forw) { - npf_nat_get_trans(ns, &taddr, &tport); - npf_nat_get_orig(ns, &oaddr, &oport); - } else { - npf_nat_get_orig(ns, &taddr, &tport); - npf_nat_get_trans(ns, &oaddr, &oport); - } - - /* - * For the SIP alt cntl session, 'forw' is true since the session was - * created in this direction. However from the SIP translation POV, - * we want to use the parent session to get the 'forw' setting, since - * the SIP packet flow is relative to it. This is used to set sn_forw - * and sn_type in the sip_nat struct that hangs of the SIP request - * struct. - */ - if (npf_session_get_parent(se) && - npf_alg_session_test_flag(se, SIP_ALG_REVERSE)) - forw = !forw; - - sip_init_nat(sr, forw, &taddr, &oaddr, npc->npc_alen, tport, di); - - return sip_alg_manage_packet(se, sr, npc, nbuf, ns); -} - -/* - * sip_alg_inspect_packet() - Prep for packet inspection - */ -static void sip_alg_inspect_packet(npf_session_t *se, npf_cache_t *npc, - struct rte_mbuf *nbuf, int di) -{ - struct sip_alg_request *sr; - const struct npf_alg *sip = npf_alg_session_get_alg(se); - bool consumed = false; - - sr = sip_alg_parse(sip, npc, npf_session_get_if_index(se), nbuf); - if (!sr) - return; - - if (sip_alg_verify(sr)) { - sip_alg_request_free(sip, sr); - return; - } - - sip_init_nat(sr, false, NULL, NULL, 0, 0, di); - - sip_alg_manage_sip(se, npc, sr, sr, NULL, &consumed); - - if (!consumed) - sip_alg_request_free(sip, sr); -} - -/* - * sip_translate_reply_path() - */ -static void sip_translate_reply_path(npf_session_t *se, int di __unused, - struct rte_mbuf *nbuf, npf_cache_t *npc) -{ - /* - * We *dont* want to rewrite the dest IP address until VRVDR-31954 is - * resolved. - * - * This function is called from the ALG .inspect callback, and we dont - * know if this packet is SIP Request or a SIP Response. We only want - * to rewrite the IP dest for SIP Responses on the reply path. - */ - return; - - - struct sip_alg_session *ss = npf_alg_session_get_private(se); - void *n_ptr = npf_iphdr(nbuf); - struct udphdr *uh = &npc->npc_l4.udp; - - if (!ss) - return; - - /* Only if this is a response msg */ - if (npc->npc_alg_flags != SIP_NPC_RESPONSE) - return; - - /* Only udp */ - if (npf_cache_ipproto(npc) != IPPROTO_UDP) - return; - - /* - * While most SIP implementations set the VIA to match - * the port/addr of the initiator, the SIP RFC states that - * reply packets must be routed to the addr/port in the VIA. - * - * Newer Cisco phones implement the RFC exactly. They use a - * high numbered sport for sending out msgs and expect - * reply packets on the default SIP port (5060). - * - * The situation we have here, according to the RFC is: - * - * a1:p1 --> a2:p2 - * a3:p3 <-- - * - * But this screws up our session handles, which were added to - * a1:p1 - a2:p2, so we need to do this translation outside - * of the nat engine to maintain a sane view of session - * handles (as well as return the reply appropriately. - * - * Only do this for UDP. - */ - - - /* - * Dont rewrite the IP header if we failed to get a return address - * from the Via in the Invite, e.g. it may have been a FQDN. - */ - if (ss->ss_via_alen == 0) - return; - - if (uh->dest == ss->ss_via_port) - return; /* Nothing to do */ - - /* Calculate the L3 and L4 checksum delta's */ - uint16_t l3_delta, l4_delta; - - sip_calculate_checksum_deltas(npf_cache_dstip(npc), &ss->ss_via_addr, - uh->dest, ss->ss_via_port, - &l3_delta, &l4_delta); - - /* - * re-write IP and UDP cksums first. - */ - if (!npf_v4_rwrcksums(npc, nbuf, n_ptr, l3_delta, l4_delta)) - return; - - /* Now translate */ - if (!npf_rwrip(npc, nbuf, n_ptr, PFIL_IN, &ss->ss_via_addr)) - return; - - /* Now the port */ - npf_rwrport(npc, nbuf, n_ptr, PFIL_IN, ss->ss_via_port); -} - -/* - * sip_alg_inspect() - Inspect non-natted flow - */ -static void sip_alg_inspect(npf_session_t *se, npf_cache_t *npc, - struct rte_mbuf *nbuf, struct ifnet *ifp __unused, - int di) -{ - uint32_t flags = npf_alg_session_get_flags(se); - - /* sanity - can only be CNTL flow */ - if (!(flags & (SIP_ALG_CNTL_FLOW | SIP_ALG_ALT_CNTL_FLOW))) - return; - - if (npf_iscached(npc, NPC_IP4)) - sip_translate_reply_path(se, di, nbuf, npc); - - if (!npf_iscached(npc, NPC_NATTED)) - sip_alg_inspect_packet(se, npc, nbuf, di); -} - -/* sip_alg_natout() - packet NAT (SNAT) out*/ -static int sip_alg_nat_out(npf_session_t *se, npf_cache_t *npc, - struct rte_mbuf *nbuf, npf_nat_t *ns) -{ - /* This can only be the SIP flow */ - return sip_alg_translate_packet(se, npc, ns, nbuf, PFIL_OUT); -} - -/* sip_alg_nat_in() - Packet NAT in */ -static int sip_alg_nat_in(npf_session_t *se, npf_cache_t *npc, - struct rte_mbuf *nbuf, npf_nat_t *ns) -{ - /* This can only be the SIP flow */ - return sip_alg_translate_packet(se, npc, ns, nbuf, PFIL_IN); -} - -/* sip_alg_session_destroy() - session handle destroy */ -static void sip_alg_session_destroy(npf_session_t *se) -{ - /* Only the reply datum for cntl */ - if (npf_alg_session_test_flag(se, SIP_ALG_CNTL_FLOW)) - sip_alg_private_session_free(se); -} - -/* sip_alg_session_expire() - session handle expire */ -static void sip_alg_session_expire(npf_session_t *se) -{ - if (npf_alg_session_test_flag(se, SIP_ALG_CNTL_FLOW)) - sip_expire_session_request(se); -} - -/* - * sip_alg_nat_inspect() - Inspect and assign the nat struct. - */ -static void sip_alg_nat_inspect(npf_session_t *se, npf_cache_t *npc __unused, - npf_nat_t *nt, int di __unused) -{ - if (npf_alg_session_test_flag(se, SIP_ALG_CNTL_FLOW | - SIP_ALG_ALT_CNTL_FLOW)) - npf_nat_setalg(nt, npf_alg_session_get_alg(se)); -} - -/* Create an alg nat object */ -static struct npf_alg_nat * -sip_create_nat(vrfid_t vrfid, uint32_t flags, bool reserved, - npf_addr_t oaddr, in_port_t oport, - npf_addr_t taddr, in_port_t tport) -{ - struct npf_alg_nat *an = malloc(sizeof(struct npf_alg_nat)); - - if (an) { - an->an_oaddr = oaddr; - an->an_oport = oport; - an->an_taddr = taddr; - an->an_tport = tport; - an->an_flags = flags; - an->an_vrfid = vrfid; - if (reserved) - an->an_flags |= NPF_NAT_CLONE_APM | NPF_NAT_MAP_PORT; - } - return an; -} - -static int sip_session_nat_media(npf_session_t *se, npf_cache_t *npc, - const int di, struct npf_alg_tuple *nt) -{ - struct sip_tuple_data *td = nt->nt_data; - struct sip_alg_media *m = NULL; - uint32_t nat_flags = 0; - int rc; - - /* - * Create the nat(s). In SIP's case we always allocate a nat - * since we likely allocated consecutive rtp/rtcp ports. - * - * We have 4 (possible) cases to deal with. We don't know which - * direction the rtp and rtcp flows will originate from and - * we will add 4 tuples for those. - * - * Even though these might be forward flows that match a nat rule, - * we already allocated ports during control msg parsing. - * - * All we do here is create and set the nat struct, unless this - * is merely a stateful rule flow set. - */ - if (!(nt->nt_alg_flags & SIP_ALG_NAT) || !td) - return 0; - - /* We may have to reverse the nat */ - if (nt->nt_alg_flags & SIP_ALG_REVERSE) - nat_flags = NPF_NAT_REVERSE; - - - /* Select proper side, either invite or response */ - if (td_is_snat(td)) - m = td->td_mi; - else if (td_is_dnat(td)) - m = td->td_mr; - else - return -EINVAL; - - rc = -ENOMEM; - vrfid_t vrfid = npf_session_get_vrfid(se); - - switch (nt->nt_alg_flags & SIP_ALG_MASK) { - case SIP_ALG_RTP_FLOW: - nt->nt_nat = sip_create_nat(vrfid, nat_flags, - m->m_rtp_reserved, - m->m_rtp_addr, htons(m->m_rtp_port), - m->m_trtp_addr, htons(m->m_trtp_port)); - if (nt->nt_nat) { - rc = npf_alg_session_nat(se, - npf_alg_parent_nat(nt->nt_se), - npc, di, nt); - if (!rc) - m->m_rtp_reserved = false; - } - break; - case SIP_ALG_RTCP_FLOW: - nt->nt_nat = sip_create_nat(vrfid, nat_flags, - m->m_rtcp_reserved, - m->m_rtcp_addr, htons(m->m_rtcp_port), - m->m_trtcp_addr, htons(m->m_trtcp_port)); - if (nt->nt_nat) { - rc = npf_alg_session_nat(se, - npf_alg_parent_nat(nt->nt_se), - npc, di, nt); - if (!rc) - m->m_rtcp_reserved = false; - } - break; - default: - return -EINVAL; - } - - return rc; -} - -/* - * Setup NAT for the alt control session. - */ -static int sip_session_nat_alt_cntl(npf_session_t *se, npf_cache_t *npc, - const int di, struct npf_alg_tuple *nt) -{ - npf_session_t *parent = nt->nt_se; - npf_nat_t *pnat = npf_session_get_nat(parent); - npf_addr_t oaddr; - npf_addr_t taddr; - in_port_t oport, tport; - int ntype; - uint masq; - struct npf_alg_tuple dummy; - int rc; - - /* Only if parent is natted */ - if (!pnat) - return 0; - - /* Get parent NAT translation address and port */ - if (!npf_nat_info(pnat, &ntype, &taddr, &tport, &masq)) - return -EINVAL; - - /* Only for SNAT */ - if (ntype != NPF_NATOUT) - return -EINVAL; - - /* - * All we are doing here is creating a reverse nat using the - * parent's original src addr/port. We just want this flow to - * translate back to the original parent. We use a dummy tuple - * struct to pass the alg nat struct for nat creation. - */ - npf_nat_get_orig(pnat, &oaddr, &oport); - - dummy.nt_nat = sip_create_nat(npf_session_get_vrfid(se), - NPF_NAT_REVERSE, false, oaddr, oport, - taddr, tport); - if (!dummy.nt_nat) - return -ENOMEM; - - rc = npf_alg_session_nat(se, pnat, npc, di, &dummy); - if (!rc) - npf_nat_setalg(npf_session_get_nat(se), nt->nt_alg); - - return rc; -} - -static int sip_alg_session_init(npf_session_t *se, npf_cache_t *npc, - struct npf_alg_tuple *nt, const int di) -{ - int rc = 0; - - npf_alg_session_set_flag(se, nt->nt_alg_flags); - - switch (nt->nt_alg_flags & SIP_ALG_MASK) { - case SIP_ALG_CNTL_FLOW: - npf_alg_session_set_inspect(se, true); - rc = sip_alg_private_session_init(se); - break; - - case SIP_ALG_ALT_CNTL_FLOW: - npf_alg_session_set_inspect(se, true); - npf_alg_session_set_flag(se, SIP_ALG_REVERSE); - rc = sip_session_nat_alt_cntl(se, npc, di, nt); - if (!rc) - npf_session_link_child(nt->nt_se, se); - break; - - case SIP_ALG_RTP_FLOW: - rc = sip_session_nat_media(se, npc, di, nt); - if (!rc) { - struct sip_tuple_data *td = nt->nt_data; - - sip_alg_create_rtcp_tuples(se, npc, td); - npf_session_link_child(nt->nt_se, se); - } - break; - - case SIP_ALG_RTCP_FLOW: - rc = sip_session_nat_media(se, npc, di, nt); - if (!rc) - npf_session_link_child(nt->nt_se, se); - break; - } - - return rc; -} - -/* sip_alg_config() - Config routine for sip */ -static int sip_alg_config(struct npf_alg *sip, int op, int argc, - char * const argv[]) -{ - struct npf_alg_config_item ci = { - .ci_flags = NPF_TUPLE_KEEP | NPF_TUPLE_MATCH_PROTO_PORT, - .ci_alg_flags = SIP_ALG_CNTL_FLOW - }; - int rc; - int i; - - /* Only ports, skip */ - if (strcmp(argv[0], "port")) - return 0; - argc--; argv++; - - for (i = 0; i < argc; i++) { - ci.ci_datum = npf_port_from_str(argv[i]); - if (!ci.ci_datum) - continue; - - /* - * Treat ports are a protocol pair - * (Really should be separate CLI) - */ - ci.ci_proto = IPPROTO_UDP; - rc = npf_alg_manage_config_item(sip, &sip->na_configs[0], - op, &ci); - if (rc) - return rc; - - ci.ci_proto = IPPROTO_TCP; - rc = npf_alg_manage_config_item(sip, &sip->na_configs[0], - op, &ci); - if (rc) { - /* unwind if possible */ - ci.ci_proto = IPPROTO_UDP; - npf_alg_manage_config_item(sip, &sip->na_configs[0], - NPF_ALG_CONFIG_DELETE, &ci); - return rc; - } - } - - return 0; -} - -static void sip_alg_periodic(struct npf_alg *sip) -{ - sip_ht_gc(sip); -} - -/* alg struct */ -static const struct npf_alg_ops sip_ops = { - .name = NPF_ALG_SIP_NAME, - .se_init = sip_alg_session_init, - .se_destroy = sip_alg_session_destroy, - .se_expire = sip_alg_session_expire, - .inspect = sip_alg_inspect, - .config = sip_alg_config, - .nat_inspect = sip_alg_nat_inspect, - .nat_in = sip_alg_nat_in, - .nat_out = sip_alg_nat_out, - .periodic = sip_alg_periodic, -}; - -static const struct npf_alg_config_item sip_ports[] = { - { IPPROTO_TCP, (NPF_TUPLE_KEEP | NPF_TUPLE_MATCH_PROTO_PORT), - SIP_ALG_CNTL_FLOW, SIP_DEFAULT_PORT }, - { IPPROTO_UDP, (NPF_TUPLE_KEEP | NPF_TUPLE_MATCH_PROTO_PORT), - SIP_ALG_CNTL_FLOW, SIP_DEFAULT_PORT }, -}; - -struct npf_alg *npf_alg_sip_create_instance(struct npf_alg_instance *ai) -{ - struct npf_alg *sip; - struct sip_private *sp = NULL; - int rc = -ENOMEM; - - sip = npf_alg_create_alg(ai, NPF_ALG_ID_SIP); - if (!sip) - goto bad; - - sip->na_ops = &sip_ops; - - /* setup default config */ - sip->na_num_configs = 1; - sip->na_configs[0].ac_items = sip_ports; - sip->na_configs[0].ac_item_cnt = ARRAY_SIZE(sip_ports); - sip->na_configs[0].ac_handler = npf_alg_port_handler; - - sp = zmalloc_aligned(sizeof(struct sip_private)); - if (!sp) - goto bad; - - sp->sp_ht = cds_lfht_new(SIP_HT_INIT, SIP_HT_MIN, SIP_HT_MAX, - CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING, NULL); - if (!sp->sp_ht) - goto bad; - - rte_spinlock_init(&sp->sp_media_lock); - CDS_INIT_LIST_HEAD(&sp->sp_dead_media); - - sip->na_private = sp; - - rc = npf_alg_register(sip); - if (rc) - goto bad; - - /* Take reference on an alg application instance */ - npf_alg_get(sip); - - return sip; - -bad: - if (net_ratelimit()) - RTE_LOG(ERR, FIREWALL, "ALG: SIP instance failed: %d\n", rc); - - if (sp && sp->sp_ht) - cds_lfht_destroy(sp->sp_ht, NULL); - free(sp); - free(sip); - return NULL; -} - -static void sip_destroy_ht(struct npf_alg *sip) -{ - struct cds_lfht_iter iter; - struct sip_alg_request *sr; - struct sip_private *sp = sip->na_private; - int rc; - - if (!sp) - return; - - /* - * Free each request synchronously - ensures we - * sync return APM mappings prior tp APM instance destroy - */ - cds_lfht_for_each_entry(sp->sp_ht, &iter, sr, sr_node) { - if (!cds_lfht_del(sp->sp_ht, &sr->sr_node)) - sip_alg_request_free_sync(sip, sr); - } - - rcu_read_unlock(); - rc = cds_lfht_destroy(sp->sp_ht, NULL); - rcu_read_lock(); - if (rc) - RTE_LOG(ERR, FIREWALL, "ALG: SIP cds_lfht_destroy\n"); - - /* Destroy any dead media added during ht destroy */ - sip_free_dead_media(sp); -} - -/* - * Destroy - we are guaranteed no access and a rcu quiesce period has - * passed. - */ -void npf_alg_sip_destroy_instance(struct npf_alg *sip) -{ - if (sip) { - sip_destroy_ht(sip); - free(sip->na_private); - sip->na_private = NULL; - sip->na_enabled = false; - sip->na_ai = NULL; - - /* Release reference on an alg application instance */ - npf_alg_put(sip); - } -} - -/* Constructor for one-time libosip initialization */ -static void npf_alg_sip_init(void) __attribute__ ((__constructor__)); - -static void npf_alg_sip_init(void) -{ - osip_init(&sip_osip); -} diff --git a/src/npf/alg/sip/sip.h b/src/npf/alg/sip/sip.h new file mode 100644 index 00000000..5497360e --- /dev/null +++ b/src/npf/alg/sip/sip.h @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +/* + * Private header for SIP alg files. + */ + +#ifndef _SIP_H_ +#define _SIP_H_ + +#include +#include +#include "json_writer.h" +#include "util.h" + +#include "npf/alg/alg.h" +#include "npf/alg/sip/sip_osip.h" +#include "npf/alg/sip/sip_request.h" +#include "npf/alg/sip/sip_response.h" +#include "npf/alg/sip/sip_parse.h" +#include "npf/alg/sip/sip_translate.h" + +/* + * SIP private data. + * + * We manage Invites and responses by using a hash table. New invites are + * added to the table, and corresponding responses pull them from the hash + * table. + */ +struct sip_private { + struct cds_lfht *sp_ht; + rte_spinlock_t sp_media_lock; /* For media */ + struct cds_list_head sp_dead_media; /* for freeing media */ +}; + +/* + * Type of nat being performed. + */ +enum sip_nat_type { + sip_nat_snat = 1, + sip_nat_dnat, + sip_nat_inspect +}; + +static inline const char *sip_nat_type_str(enum sip_nat_type type) +{ + switch (type) { + case sip_nat_snat: + return "SIP_NAT_SNAT"; + case sip_nat_dnat: + return "SIP_NAT_DNAT"; + case sip_nat_inspect: + return "SIP_NAT_INSPECT"; + }; + return "Unknown"; +} + +/* + * There are two types of media that we are interested in: UDP and RTP. + * (RTP includes secure RTP) + */ +enum sdp_proto { + sdp_proto_udp = 1, + sdp_proto_rtp, + sdp_proto_unknown +}; + +/* + * Struct for holding nat info. + */ +struct sip_nat { + char sn_taddr[INET6_ADDRSTRLEN];/* trans addr */ + char sn_oaddr[INET6_ADDRSTRLEN];/* orig addr */ + char sn_tport[8]; /* trans port */ + enum sip_nat_type sn_type; /* type of nat */ + bool sn_forw; /* forward? */ + int sn_di; /* direction */ + uint8_t sn_alen; /* addr len */ +}; + +#define sip_nat_type(sr) ((sr)->sr_nat.sn_type) +#define sip_is_snat(sr) (sip_nat_type(sr) == sip_nat_snat) +#define sip_is_dnat(sr) (sip_nat_type(sr) == sip_nat_dnat) +#define sip_is_inspect(sr) (sip_nat_type(sr) == sip_nat_inspect) +#define sip_forw(sr) ((sr)->sr_nat.sn_forw) +#define sip_taddr(sr) ((sr)->sr_nat.sn_taddr) +#define sip_oaddr(sr) ((sr)->sr_nat.sn_oaddr) +#define sip_tport(sr) ((sr)->sr_nat.sn_tport) +#define sip_di(sr) ((sr)->sr_nat.sn_di) + +/* Macros for accessing SIP instance datum */ +#define sip_alg_instance(sip) ((sip)->na_ai) + +/* + * SIP request struct + * + * Created when we parse a SIP request message. sr_osip and sr_sdp are the + * two fields that are initialized and populated by the osip parser. + * + * Two object of this type are typically passed around - sr and tsr. sr is + * the original request, and tsr is a copy of the original request but with + * NAT translations applied. + * + * Stored in a hash table in sip_alg_private, which is on the ALG instance + * private data, alg->alg_private. + */ +struct sip_alg_request { + struct cds_lfht_node sr_node; + uint64_t sr_timeout; + osip_message_t *sr_sip; + sdp_message_t *sr_sdp; + struct sip_nat sr_nat; + uint32_t sr_if_idx; + struct cds_list_head sr_media_list_head; /* media list head */ + uint8_t sr_flags; + const struct npf_alg *sr_sip_alg; + struct rcu_head sr_rcu_head; +}; + +/* sr_flags */ +#define SIP_REQUEST_EXPIRED 0x1 +#define SIP_REQUEST_REMOVING 0x2 + +/* + * Struct for managing rtp translation data. Note these ports are maintained + * in host order. + * + * There are multiple media parts in the SDP message of the Invite and + * Response packets. A list of media structures is stored in the respective + * Invite and Response sip_alg_request structures. The Invite request is + * stored in the sip alg hash table until a matching Response is received. + * + * When an Invite request is matched to a Response request then the media + * information is used to create tuples, and the sip_alg_media structures are + * then stored in the tuple private data. + * + * A list of these structures is stored in sip_alg_request sr_media. + */ +struct sip_alg_media { + /* node in sr_media_list or sp_dead_media list */ + struct cds_list_head m_node; + + enum sdp_proto m_proto; + enum sip_nat_type m_type; + uint8_t m_ip_prot; + + /* Original */ + in_port_t m_rtp_port; + npf_addr_t m_rtp_addr; + uint8_t m_rtp_alen; + in_port_t m_rtcp_port; + npf_addr_t m_rtcp_addr; + uint8_t m_rtcp_alen; + + /* Translated */ + in_port_t m_trtp_port; + npf_addr_t m_trtp_addr; + uint8_t m_trtp_alen; + in_port_t m_trtcp_port; + npf_addr_t m_trtcp_addr; + uint8_t m_trtcp_alen; + + npf_natpolicy_t *m_np; + npf_rule_t *m_rl; + uint32_t m_nat_flags; + vrfid_t m_vrfid; + bool m_rtp_reserved; /* ports from pool? */ + bool m_rtcp_reserved; +}; + + +/* + * SIP alg flags + * + * Flags defining the types of SIP/media flows. Note that a SIP media UDP + * flow is handled as a RTP flow. + */ +#define SIP_ALG_CNTL_FLOW 0x01 +#define SIP_ALG_ALT_CNTL_FLOW 0x02 +#define SIP_ALG_RTP_FLOW 0x04 +#define SIP_ALG_RTCP_FLOW 0x08 +#define SIP_ALG_REVERSE 0x10 +#define SIP_ALG_NAT 0x20 +#define SIP_ALG_ALT_TUPLE_SET 0x40 +#define SIP_ALG_MASK (SIP_ALG_CNTL_FLOW | SIP_ALG_ALT_CNTL_FLOW | \ + SIP_ALG_RTP_FLOW | SIP_ALG_RTCP_FLOW) + + +/* + * SIP private session data. + */ +struct sip_alg_session { + in_port_t ss_via_port; + uint8_t ss_via_alen; + uint32_t ss_ifx; + npf_addr_t ss_via_addr; + int ss_call_id_count; + osip_call_id_t **ss_call_ids; +}; + +/* + * Struct for managing tuple data. These are added to media (RTP and RTCP) + * tuples. + * + * Note ports are in host format. + */ +struct sip_tuple_data { + const struct npf_alg *td_sip; + struct sip_nat td_nat; + struct sip_alg_media *td_mi; + struct sip_alg_media *td_mr; + rte_atomic32_t td_refcnt; + bool td_is_reverse; /* Reverse flow? */ +}; +#define td_nat_type(sr) ((td)->td_nat.sn_type) +#define td_is_snat(td) ((td)->td_nat.sn_type == sip_nat_snat) +#define td_is_dnat(td) ((td)->td_nat.sn_type == sip_nat_dnat) +#define td_is_inspect(td) ((td)->td_nat.sn_type == sip_nat_inspect) +#define td_is_reverse(td) ((td)->td_is_reverse) +#define td_forw(td) ((td)->td_nat.sn_forw) + + +/* + * Minimum msg size. + * + * While the protocol does not define a minimum size directly, the Osip + * parser assumes minimum of 4 bytes during parsing. + * + * A 'real' SIP message must have multiple header fields to be valid and + * a minimum ACK msg with options stripped out will be > 200 bytes, + * so let's use that as our min msg size. + */ +#define SIP_MSG_MIN_LENGTH 200 + + +/* SIP per-packet flags. */ +#define SIP_NPC_REQUEST 0x01 +#define SIP_NPC_RESPONSE 0x02 + + +/* + * sip_translate_addr_reqd() - Do we want to translate this addr? + */ +static inline bool sip_translate_addr_reqd(const char *addr, const char *oaddr) +{ + if (!addr || !oaddr) + return false; + + /* Only translate if the address matches the NAT target address */ + if (strcmp(addr, oaddr) != 0) + return false; + + return true; +} + +void sip_addr_from_str(const char *saddr, npf_addr_t *addr, uint8_t *alen); + +/* Convert an address to an (allocated) string */ +char *sip_addr_to_str(npf_addr_t *a, uint8_t alen); + +/* Convert a port to an (allocated) string */ +char *sip_port_to_str(in_port_t n); + +int sip_alg_verify(struct sip_alg_request *sr); + +int sip_alg_manage_sip(npf_session_t *se, npf_cache_t *npc, + struct sip_alg_request *sr, + struct sip_alg_request *tsr, + npf_nat_t *nat, bool *consumed); + +#endif /* _SIP_H_ */ diff --git a/src/npf/alg/sip/sip_osip.h b/src/npf/alg/sip/sip_osip.h new file mode 100644 index 00000000..c933420b --- /dev/null +++ b/src/npf/alg/sip/sip_osip.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +/* + * OSIP library header files. + */ + +#ifndef _SIP_OSIP_H_ +#define _SIP_OSIP_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif /* _SIP_OSIP_H_ */ diff --git a/src/npf/alg/sip/sip_parse.c b/src/npf/alg/sip/sip_parse.c new file mode 100644 index 00000000..f703aea2 --- /dev/null +++ b/src/npf/alg/sip/sip_parse.c @@ -0,0 +1,669 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +/* + * SIP parse. + * + * sip_alg_parse will parse a SIP packet (Invite or Response) looking for an + * SDP message contained in it. If found, a sip_alg_request is allocated and + * returned. + * + * sip_alg_manage_media will parse the SDP "c=" and "m=" strings, and (if not + * in 'inspect' path) translate the "c=" address. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "vrf.h" +#include "util.h" +#include "vplane_log.h" + +#include "npf/npf.h" +#include "npf/alg/alg.h" +#include "npf/npf_cache.h" +#include "npf/npf_nat.h" +#include "npf/npf_session.h" + + +/* + * Max media connections per INVITE. + */ +#define SDP_MAX_MEDIA 8 + + +/* + * sip_alg_set_dnat_rtcp_media() - Init/finalize media addr/ports + */ +static void sip_alg_dnat_rtcp_media(struct sip_alg_media *m, npf_nat_t *nat) +{ + in_port_t tmp; + bool do_rtcp = false; + + if (!IN6_IS_ADDR_UNSPECIFIED(&m->m_rtcp_addr) && + memcmp(&m->m_rtp_addr, &m->m_rtcp_addr, m->m_rtp_alen) == 0) + do_rtcp = true; + + /* Reset to original address */ + npf_nat_get_orig(nat, &m->m_rtp_addr, &tmp); + + /* + * If an rtcp attribute was sent, use it + */ + if (IN6_IS_ADDR_UNSPECIFIED(&m->m_rtcp_addr)) { + m->m_rtcp_addr = m->m_rtp_addr; + m->m_rtcp_port = m->m_rtp_port + 1; + m->m_rtcp_alen = m->m_rtp_alen; + m->m_trtcp_addr = m->m_trtp_addr; + m->m_trtcp_port = m->m_rtcp_port; + m->m_trtcp_alen = m->m_rtcp_alen; + } else { + m->m_trtcp_addr = m->m_rtcp_addr; + m->m_trtcp_port = m->m_rtcp_port; + m->m_trtcp_alen = m->m_rtcp_alen; + if (do_rtcp) + m->m_rtcp_addr = m->m_rtp_addr; + } +} + +/* + * sip_alg_set_rtcp_media() - Init/finalize media addr/ports + */ +static void sip_alg_set_rtcp_media(struct sip_alg_media *m) +{ + /* + * If an rtcp attribute was sent, use it + */ + if (IN6_IS_ADDR_UNSPECIFIED(&m->m_rtcp_addr)) { + m->m_rtcp_addr = m->m_rtp_addr; + m->m_rtcp_port = m->m_rtp_port + 1; + m->m_rtcp_alen = m->m_rtp_alen; + } + + m->m_trtcp_addr = m->m_rtcp_addr; + m->m_trtcp_port = m->m_rtcp_port; + m->m_trtcp_alen = m->m_rtcp_alen; +} + +/* + * Parse the sdp session (global) media address + */ +static int +sip_alg_parse_session_media_addr(struct sip_alg_request *sr, + npf_addr_t *addr, uint8_t *alen) +{ + sdp_connection_t *c; + int rc = -1; + + c = sdp_message_connection_get(sr->sr_sdp, -1, 0); + if (!c) + return 0; + + sip_addr_from_str(c->c_addr, addr, alen); + if (*alen) + rc = 0; + + return rc; +} + +/* + * sip_alg_sdp_get_media_proto() + */ +static int sip_alg_sdp_get_media_proto(struct sip_alg_request *sr, int pos) +{ + char *proto = sdp_message_m_proto_get(sr->sr_sdp, pos); + + if (!proto) + return -1; + + if (strstr(proto, "UDP")) + return sdp_proto_udp; + if (strstr(proto, "RTP")) + return sdp_proto_rtp; + return sdp_proto_unknown; +} + +/* + * sip_alg_sdp_update_origin() - Update the "o=" field. + */ +static int sip_alg_sdp_update_origin(struct sip_alg_request *sr) +{ + char *nettype = sdp_message_o_nettype_get(sr->sr_sdp); + char *addrtype = sdp_message_o_addrtype_get(sr->sr_sdp); + + if (!nettype || strcmp(nettype, "IN") != 0) + return -EINVAL; + + if (addrtype && !strcmp(addrtype, "IP6")) + return 0; /* Ignore IPv6 */ + + if (!addrtype || strcmp(addrtype, "IP4") != 0) + return -EINVAL; /* Unknown/unsupported */ + + if (!sip_translate_addr_reqd(sr->sr_sdp->o_addr, sip_oaddr(sr))) + return 0; /* Nothing to do */ + + /* no api */ + osip_free(sr->sr_sdp->o_addr); + sr->sr_sdp->o_addr = osip_strdup(sip_taddr(sr)); + if (!sr->sr_sdp->o_addr) + return -ENOMEM; + return 0; +} + +/* + * sip_alg_get_sdp_attribute() + */ +static sdp_attribute_t *sip_alg_get_sdp_attribute(struct sip_alg_request *sr, + int pos, const char *name) +{ + int i = 0; + sdp_attribute_t *a; + + while ((a = sdp_message_attribute_get(sr->sr_sdp, pos, i)) != NULL) { + if (!strncmp(a->a_att_field, name, strlen(name))) + return a; + i++; + } + return NULL; +} + +/* + * sip_parse_rtcp() - Get port and (optional) addr from an rtcp attribute. + * + * Example RTCP SDP attributes: + * a=rtcp:53020 + * a=rtcp:53020 IN IP4 126.16.64.4 + * a=rtcp:53020 IN IP6 2001:2345:6789:ABCD:EF01:2345:6789:ABCD + * + * The "rtcp:" part is stripped off sip_alg_get_sdp_attribute, and the + * attribute value passed in here is of the form "53020 IN IP4 126.16.64.4". + * Note that only the port number is mandatory. + */ +static int sip_parse_rtcp(const char *value, + in_port_t *port, npf_addr_t *addr, uint8_t *alen) +{ + char *cport = NULL; + char *cnettype = NULL; + char *caddrtype = NULL; + char *caddr = NULL; + int i; + int rc = 0; + + i = sscanf(value, "%5ms %2ms %3ms %46ms", + &cport, &cnettype, &caddrtype, &caddr); + if (i > 0) { + *port = npf_port_from_str(cport); + /* Address is optional, verify if present */ + if (*port && (i == 4)) { + sip_addr_from_str(caddr, addr, alen); + if (!*alen) + rc = -EINVAL; + } + } + free(cport); + free(cnettype); + free(caddrtype); + free(caddr); + + return rc; +} + +/* + * sip_alg_sdp_get_rtcp_attribute() - Get the rtcp attribute params if present. + */ +static int sip_alg_sdp_get_rtcp_attribute(struct sip_alg_request *sr, + struct sip_alg_media *m, int pos) +{ + sdp_attribute_t *a; + int rc = 0; + + a = sip_alg_get_sdp_attribute(sr, pos, "rtcp"); + if (a) { + rc = sip_parse_rtcp(a->a_att_value, &m->m_rtcp_port, + &m->m_rtcp_addr, &m->m_rtcp_alen); + if (rc) + return rc; + } + + /* + * Now default for addr if not set. + */ + if (m->m_rtcp_port && IN6_IS_ADDR_UNSPECIFIED(&m->m_rtcp_addr)) { + m->m_rtcp_addr = m->m_rtp_addr; + m->m_rtcp_alen = m->m_rtp_alen; + } + + return rc; +} + +/* + * sip_alg_set_rtcp_attribute() - Update "rtcp" attribute if present + */ +int sip_alg_sdp_set_rtcp_attribute(struct sip_alg_request *sr, + int pos, npf_addr_t *taddr, uint8_t alen, + in_port_t tport) +{ + sdp_attribute_t *a; + int rc = 0; /* Not an error if ENOENT */ + + /* Only if the rtcp port exists */ + if (!tport) + return 0; + + a = sip_alg_get_sdp_attribute(sr, pos, "rtcp"); + if (a) { + char *cport = NULL; + char *cnettype = NULL; + char *caddrtype = NULL; + char *caddr = NULL; + int i; + char value[70]; + char *naddr; + + naddr = sip_addr_to_str(taddr, alen); + if (!naddr) + return -ENOMEM; + + i = sscanf(a->a_att_value, "%5ms %2ms %3ms %46ms", + &cport, &cnettype, &caddrtype, &caddr); + + if (i <= 0) { + osip_free(naddr); + return -EINVAL; + } + + if (i == 1) + rc = snprintf(value, sizeof(value), "%hu", tport); + else if (i == 4) + rc = snprintf(value, sizeof(value), "%hu %s %s %s", + tport, cnettype, caddrtype, naddr); + else + rc = -EINVAL; + + free(cport); + free(cnettype); + free(caddrtype); + free(caddr); + osip_free(naddr); + + if (rc > 0 && (uint)rc < sizeof(value)) { + osip_free(a->a_att_value); + a->a_att_value = osip_strdup(value); + rc = 0; + } else { + rc = -ENOMEM; + } + } + return rc; +} + +/* + * sip_alg_parse_media_addr() + */ +static int sip_alg_parse_media_addr(struct sip_alg_media *m, + struct sip_alg_request *sr, int pos) +{ + sdp_connection_t *c; + int rc = -1; + + c = sdp_message_connection_get(sr->sr_sdp, pos, 0); + if (c) { + sip_addr_from_str(c->c_addr, &m->m_rtp_addr, &m->m_rtp_alen); + if (m->m_rtp_alen) + rc = 0; + } + return rc; +} + +static int sip_alg_parse_media_ports(struct sip_alg_media *m, + struct sip_alg_request *sr, int pos) +{ + char *cport = sdp_message_m_port_get(sr->sr_sdp, pos); + int rc = 0; + + /* Must have a port in the media */ + if (!cport) + return -EINVAL; + + switch (m->m_proto) { + case sdp_proto_udp: + m->m_rtp_port = npf_port_from_str(cport); + if (!m->m_rtp_port) + rc = -EINVAL; + break; + case sdp_proto_rtp: + m->m_rtp_port = npf_port_from_str(cport); + if (!m->m_rtp_port) { + rc = -EINVAL; + break; + } + /* Default rtcp port for inspect */ + if (sip_is_inspect(sr)) + m->m_rtcp_port = m->m_rtp_port+1; + /* Do we have an attribute rtcp port? */ + rc = sip_alg_sdp_get_rtcp_attribute(sr, m, pos); + break; + default: + rc = -EINVAL; + break; + } + + return rc; +} + +/* + * sip_alg_reserve_ports() + */ +static int sip_alg_reserve_ports(npf_session_t *se, + struct sip_alg_media *m, npf_nat_t *ns) +{ + int n = 1; + bool start_even = false; + in_port_t port; + int rc; + npf_natpolicy_t *np = npf_nat_get_policy(ns); + uint32_t nat_flags = NPF_NAT_MAP_PORT; + npf_rule_t *rl = npf_nat_get_rule(ns); + vrfid_t vrfid = npf_session_get_vrfid(se); + + /* + * If we do not have an rtcp attribute, then we need + * to allocate 2 consecutive ports, starting on an even + * boundary. Otherwise, one port will do. + */ + if (m->m_proto == sdp_proto_rtp && + IN6_IS_ADDR_UNSPECIFIED(&m->m_rtcp_addr)) { + start_even = true; + n = 2; + } + + port = htons(m->m_trtp_port); + rc = npf_alg_reserve_translations(se, n, start_even, m->m_rtp_alen, + &m->m_trtp_addr, &port); + if (rc) + return rc; + m->m_trtp_port = ntohs(port); + m->m_ip_prot = npf_session_get_proto(se); + m->m_rtp_reserved = true; + + /* If the proto is not rtp, we are done. */ + if (m->m_proto != sdp_proto_rtp) + return rc; + + /* + * If we didn't have an rtcp attribute, then + * default the rtcp members use the allocated port + * + * Otherwise, we have an rtcp attribute with an addr, + * (which may be the same as the rtp addr) + * and we need to get a distinct mapping for that + * addr/port pair + */ + if (IN6_IS_ADDR_UNSPECIFIED(&m->m_rtcp_addr)) { + m->m_rtcp_addr = m->m_rtp_addr; + m->m_rtcp_port = m->m_rtp_port+1; + m->m_rtcp_alen = m->m_rtp_alen; + + m->m_trtcp_port = ntohs(port) + 1; + m->m_trtcp_addr = m->m_trtp_addr; + m->m_trtcp_alen = m->m_trtp_alen; + m->m_rtcp_reserved = true; + } else { + /* + * If the rtcp addr is the same as the rtp addr, + * then we need to allocate a port. Otherwise, + * this is a remote host. + */ + if (!memcmp(&m->m_rtcp_addr, &m->m_rtp_addr, m->m_rtp_alen)) { + m->m_trtcp_addr = m->m_trtp_addr; + port = htons(m->m_rtcp_port); + rc = npf_nat_alloc_map(np, rl, nat_flags, m->m_ip_prot, + vrfid, &m->m_trtcp_addr, &port, 1); + if (rc) + return rc; + m->m_trtcp_port = ntohs(port); + m->m_trtcp_alen = m->m_rtcp_alen; + m->m_rtcp_reserved = true; + } else { + m->m_trtcp_addr = m->m_rtcp_addr; + m->m_trtcp_port = m->m_rtcp_port; + m->m_trtcp_alen = m->m_rtcp_alen; + } + } + + return rc; +} + +/* + * sip_media_translations() - Get media translations if needed. + */ +static int sip_media_translations(npf_session_t *se, + struct sip_alg_media *m, struct sip_alg_request *sr, + npf_nat_t *nat) +{ + int rc = 0; + + /* Set defaults for translation addrs/ports. */ + m->m_trtp_port = m->m_rtp_port; + m->m_trtp_addr = m->m_rtp_addr; + m->m_trtp_alen = m->m_rtp_alen; + + /* If IPv6 or inspection */ + if (m->m_rtp_alen > 4 || sip_is_inspect(sr)) { + sip_alg_set_rtcp_media(m); + return rc; + } + + /* + * Handle both INVITE and OK's for both SNAT and DNAT. + * Only SNAT must reserve ports. + */ + switch (sip_nat_type(sr)) { + case sip_nat_snat: + if (sip_forw(sr)) + rc = sip_alg_reserve_ports(se, m, nat); + else + sip_alg_set_rtcp_media(m); + break; + case sip_nat_dnat: + if (sip_forw(sr)) + sip_alg_set_rtcp_media(m); + else + sip_alg_dnat_rtcp_media(m, nat); + break; + default: + return -EINVAL; + } + return rc; +} + +static bool sip_do_translate(const struct sip_alg_request *sr) +{ + switch (sip_nat_type(sr)) { + case sip_nat_snat: + if (sip_forw(sr)) + return true; + break; + case sip_nat_dnat: + if (!sip_forw(sr)) + return true; + break; + default: /* Hush up gcc */ + break; + } + + return false; +} + +/* + * Parse the SDP "c=" and "m=" strings, and (if not in 'inspect' path) + * translate the "c=" address. + */ +int sip_alg_manage_media(npf_session_t *se, npf_nat_t *nat, + struct sip_alg_request *sr) +{ + int rc; + int pos; + struct sip_alg_media *m; + enum sdp_proto m_prot; + + /* + * Update session connection + */ + if (sip_is_snat(sr)) { + rc = sip_alg_sdp_update_origin(sr); + if (rc) + return rc; + } + + npf_addr_t s_rtp_addr; + uint8_t s_rtp_alen = 0; + + sip_alg_parse_session_media_addr(sr, &s_rtp_addr, &s_rtp_alen); + + for (pos = 0; !sdp_message_endof_media(sr->sr_sdp, pos) && + pos < SDP_MAX_MEDIA; pos++) { + + m_prot = sip_alg_sdp_get_media_proto(sr, pos); + + rc = -ENOMEM; + m = sip_media_alloc(se, sr, m_prot); + if (!m) + goto bad; + + rc = sip_alg_parse_media_addr(m, sr, pos); + if (rc) { + /* No media addr. Use SDP session addr */ + if (!s_rtp_alen) + goto bad; + m->m_rtp_addr = s_rtp_addr; + m->m_rtp_alen = s_rtp_alen; + } + + rc = sip_alg_parse_media_ports(m, sr, pos); + if (rc) + goto bad; + + rc = sip_media_translations(se, m, sr, nat); + if (rc) + goto bad; + + if (sip_do_translate(sr)) { + rc = sip_alg_translate_media(sr, m, pos); + if (rc) + goto bad; + } + + cds_list_add_tail(&m->m_node, &sr->sr_media_list_head); + } + + /* + * Translate the session connection address + */ + if (s_rtp_alen && sip_do_translate(sr)) + sip_alg_update_session_media(sr); + + return 0; +bad: + sip_media_free(m); + return rc; +} + +/* + * sip_alg_body_is_sdp() - Do we have an SDP message? + */ +static bool sip_alg_body_is_sdp(struct sip_alg_request *sr) +{ + osip_content_type_t *ct; + + ct = osip_message_get_content_type(sr->sr_sip); + if (!ct) + return false; + + if (ct->type && !strstr(ct->type, "application")) + return false; + + if (ct->subtype && !strstr(ct->subtype, "sdp")) + return false; + + return true; +} + +/* + * sip_alg_get_sdp() + */ +static int sip_alg_get_sdp(struct sip_alg_request *sr) +{ + osip_body_t *sdp_body; + sdp_message_t *sdp; + int rc; + + if (!sip_alg_body_is_sdp(sr)) + return 0; + + rc = osip_message_get_body(sr->sr_sip, 0, &sdp_body); + if (rc >= 0) { + rc = sdp_message_init(&sdp); + if (rc < 0) + return rc; + rc = sdp_message_parse(sdp, sdp_body->body); + if (!rc) + sr->sr_sdp = sdp; + else + sdp_message_free(sdp); + } + + return rc; +} + +/* + * Parse a sip packet using the osip library. We are only interested in + * packets containing an SDP message. Returns a sip_alg_request structure is + * successful. + */ +struct sip_alg_request *sip_alg_parse(const struct npf_alg *sip, + npf_cache_t *npc, uint32_t if_idx, + struct rte_mbuf *nbuf) +{ + struct sip_alg_request *sr = NULL; + uint16_t plen; + char payload[SIP_MESSAGE_MAX_LENGTH + 1]; + int rc; + + plen = npf_payload_fetch(npc, nbuf, payload, + SIP_MSG_MIN_LENGTH, SIP_MESSAGE_MAX_LENGTH); + if (!plen) + return NULL; + + /* Make the payload a string */ + payload[plen] = '\0'; + + sr = sip_alg_request_alloc(true, if_idx); + if (!sr) + return NULL; + + rc = osip_message_parse(sr->sr_sip, payload, plen); + if (rc != 0) + goto bad; + + /* Get the sdp portion if present */ + rc = sip_alg_get_sdp(sr); + if (rc) + goto bad; + + return sr; + +bad: + sip_alg_request_free(sip, sr); + return NULL; +} + diff --git a/src/npf/alg/sip/sip_parse.h b/src/npf/alg/sip/sip_parse.h new file mode 100644 index 00000000..145d8d10 --- /dev/null +++ b/src/npf/alg/sip/sip_parse.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef _SIP_PARSE_H_ +#define _SIP_PARSE_H_ + +#include "npf/npf.h" +#include "npf/alg/alg.h" +#include "npf/npf_cache.h" +#include "npf/npf_nat.h" +#include "npf/npf_session.h" + +struct sip_alg_request; +struct rte_mbuf; + +int sip_alg_sdp_set_rtcp_attribute(struct sip_alg_request *sr, + int pos, npf_addr_t *taddr, uint8_t alen, + in_port_t tport); + +/* + * Parse the SDP "c=" and "m=" strings, and (if not in 'inspect' path) + * translate the "c=" address. + */ +int sip_alg_manage_media(npf_session_t *se, npf_nat_t *nat, + struct sip_alg_request *sr); + +/* + * Parse a sip packet using the osip library. We are only interested in + * packets containing an SDP message. Returns a sip_alg_request structure is + * successful. + */ +struct sip_alg_request *sip_alg_parse(const struct npf_alg *sip, + npf_cache_t *npc, uint32_t if_idx, + struct rte_mbuf *nbuf); + +#endif diff --git a/src/npf/alg/sip/sip_request.c b/src/npf/alg/sip/sip_request.c new file mode 100644 index 00000000..efe035da --- /dev/null +++ b/src/npf/alg/sip/sip_request.c @@ -0,0 +1,784 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +/* + * SIP request hash table and media list functions; SIP request handling. + * + * SIP request structures (struct sip_alg_request) are stored in a hash table + * in the alg private data structure (struct sip_alg_private). The lookup key + * is the SIP request Call-ID number. + * + * SIP media structures are stored in a list in the SIP request structures. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "dp_event.h" +#include "vrf.h" +#include "util.h" +#include "vplane_log.h" + +#include "npf/alg/alg.h" +#include "npf/alg/sip/sip.h" +#include "npf/alg/sip/sip_osip.h" + +/* Hash table config */ +#define SIP_HT_INIT 32 +#define SIP_HT_MIN 32 +#define SIP_HT_MAX 1024 + +/* + * Struct for matching hash table requests + */ +struct sip_request_match { + osip_call_id_t *sm_call_id; + uint32_t sm_if_idx; +}; + +/* + * Default lifetime for a request in the hash table. + */ +#define SIP_DEFAULT_REQUEST_TIMEOUT 32 + + +/* Forward reference */ +static int sip_alg_add_invite(const struct npf_alg *sip, + struct sip_alg_request *sr); + + +/*************************************************************** + * Media List + **************************************************************/ + +/* + * sip_alg_release_translation() - Release preallocated translation data. + */ +static void sip_alg_release_translation(struct sip_alg_media *m, + npf_addr_t taddr, in_port_t port) +{ + + if (m->m_np) + npf_nat_free_map(m->m_np, m->m_rl, m->m_nat_flags, m->m_ip_prot, + m->m_vrfid, taddr, htons(port)); +} + +/* + * sip_media_alloc() - Allocate a media struct + */ +struct sip_alg_media *sip_media_alloc(npf_session_t *se, + struct sip_alg_request *sr, int m_proto) +{ + struct sip_alg_media *m; + npf_nat_t *nat = npf_session_get_nat(se); + + m = calloc(1, sizeof(struct sip_alg_media)); + if (!m) + return NULL; + + CDS_INIT_LIST_HEAD(&m->m_node); + m->m_np = npf_nat_get_policy(nat); + if (m->m_np) { + m->m_rl = npf_nat_get_rule(nat); + m->m_nat_flags = npf_nat_get_map_flags(nat); + m->m_vrfid = npf_session_get_vrfid(se); + } + + m->m_proto = m_proto; + m->m_type = sip_nat_type(sr); + return m; +} + +/* + * Free a ports struct, if the ports were + * allocated from a nat pool, return them. + */ +void sip_media_free(void *_m) +{ + struct sip_alg_media *m = _m; + + if (!m) + return; + if (m->m_type != sip_nat_inspect) { + if (m->m_rtp_reserved) + sip_alg_release_translation(m, + m->m_trtp_addr, m->m_trtp_port); + if (m->m_rtcp_reserved) + sip_alg_release_translation(m, + m->m_trtcp_addr, m->m_trtcp_port); + } + free(m); +} + +/* Free dead media structs from the instance */ +static void sip_free_dead_media(struct sip_private *sp) +{ + struct sip_alg_media *m; + struct sip_alg_media *tmp; + + if (!sp) + return; + + rte_spinlock_lock(&sp->sp_media_lock); + cds_list_for_each_entry_safe(m, tmp, &sp->sp_dead_media, m_node) { + cds_list_del(&m->m_node); + sip_media_free(m); + } + rte_spinlock_unlock(&sp->sp_media_lock); +} + +/* + * sip_media_count() + */ +int sip_media_count(struct cds_list_head *h) +{ + struct cds_list_head *p; + int i = 0; + + cds_list_for_each(p, h) + i++; + + return i; +} + + +/*************************************************************** + * SIP Request + **************************************************************/ + +/* + * sip_alg_request_alloc() + */ +struct sip_alg_request *sip_alg_request_alloc(bool init_sip, + uint32_t if_idx) +{ + struct sip_alg_request *sr; + + sr = calloc(1, sizeof(struct sip_alg_request)); + if (!sr) + return NULL; + + CDS_INIT_LIST_HEAD(&sr->sr_media_list_head); + sr->sr_if_idx = if_idx; + + if (init_sip && osip_message_init(&sr->sr_sip)) { + free(sr); + sr = NULL; + } + + return sr; +} + +static void sip_request_free_rcu(struct rcu_head *head) +{ + struct sip_alg_request *sr = caa_container_of(head, + struct sip_alg_request, sr_rcu_head); + struct sip_private *sp = sr->sr_sip_alg->na_private; + + /* + * Move medias to the instance for deletion + * via the sip GC + */ + if (sp) { + rte_spinlock_lock(&sp->sp_media_lock); + cds_list_splice(&sr->sr_media_list_head, &sp->sp_dead_media); + rte_spinlock_unlock(&sp->sp_media_lock); + } + + if (sr->sr_sip) + osip_message_free(sr->sr_sip); + if (sr->sr_sdp) + sdp_message_free(sr->sr_sdp); + free(sr); +} + +/* + * Free a sip msg, always via RCU. + */ +void +sip_alg_request_free(const struct npf_alg *sip, struct sip_alg_request *sr) +{ + if (sr) { + sr->sr_sip_alg = sip; + call_rcu(&sr->sr_rcu_head, sip_request_free_rcu); + } +} + +/* + * Synchronously free a sip msg. + * Used when destroying the sip instance. + */ +static void sip_alg_request_free_sync(const struct npf_alg *sip, + struct sip_alg_request *sr) +{ + if (sr) { + sr->sr_sip_alg = sip; + /* Call the rcu free variant synchronously */ + sip_request_free_rcu(&sr->sr_rcu_head); + } +} + +/* + * sip_parse_reply_path() - Parse the first VIA for reply path parameters. + */ +static int sip_parse_reply_path(struct sip_alg_request *sr, npf_session_t *se) +{ + struct sip_alg_session *ss = npf_alg_session_get_private(se); + int rc; + + /* + * The SIP RFC states that responses are always routed to the + * VIA path. In the case of newer Cicso phones, a high number + * sport is used with replies expected on the SIP default port + * (5060). See the inspect and nat routines for more details. + * + * This means we may need to translate all replies after + * receiving the first msg. So grab the needed addr/port here and + * save it in the session handle. + */ + + if (!ss) + return -ENOENT; + + rc = 0; + if (!ss->ss_via_port) { + osip_via_t *v = NULL; + + ss->ss_ifx = npf_session_get_if_index(se); + osip_message_get_via(sr->sr_sip, 0, &v); + if (v) { + /* + * This may fail if a port number is not specified in + * VIA string. This is ok. When this occurs the + * default SIP port is used. + */ + ss->ss_via_port = htons(npf_port_from_str( + osip_via_get_port(v))); + + /* + * Note, this my fail if the VIA address is a FQDN, in + * which case ss_via_alen will be left at 0. + */ + ss->ss_via_alen = 0; + sip_addr_from_str(osip_via_get_host(v), + &ss->ss_via_addr, &ss->ss_via_alen); + rc = 0; + + } else + rc = -EINVAL; + } + return rc; +} + +/* + * Create and add a tuple from a session, but with a wildcard source port. + * This tuple is subsequently expired when the SIP Request is expired. + */ +static int sip_alg_add_cntl_tuple(npf_session_t *se, npf_cache_t *npc) +{ + struct npf_alg *sip = npf_alg_session_get_alg(se); + struct apt_match_key m = { 0 }; + struct apt_tuple *at; + npf_addr_t dstip; + uint16_t dport; + + + npf_nat_get_trans(npf_session_get_nat(se), &dstip, + &dport); + + m.m_proto = IPPROTO_UDP; + m.m_match = APT_MATCH_ANY_SPORT; + m.m_ifx = npf_session_get_if_index(se); + m.m_alen = 4; + m.m_sport = 0; + m.m_dport = dport; + m.m_srcip = npf_cache_dstip(npc); + m.m_dstip = &dstip; + + at = apt_tuple_create_and_insert(sip->na_ai->ai_apt, &m, + npf_alg_get(sip), + SIP_ALG_ALT_CNTL_FLOW, + NPF_ALG_SIP_NAME, false, true); + + if (!at) { + npf_alg_put(sip); + return -EINVAL; + } + + apt_tuple_set_session(at, se); + apt_tuple_set_multimatch(at, true); + npf_alg_session_set_flag(se, SIP_ALG_ALT_TUPLE_SET); + + return 0; +} + +/* + * Add a control tuple if we are using SNAT. Cisco SIP Gateways send SIP + * response messages with a (per-call?) random source port. This sets up a + * tuple in the reverse direction (c/w with REQUEST) that matches on any + * source port. + */ +static int sip_alg_manage_cntl(npf_session_t *se, npf_cache_t *npc, + struct sip_alg_request *sr) +{ + struct sip_alg_session *ss; + uint32_t flags = npf_alg_session_get_flags(se); + + /* Already added? */ + if (flags & SIP_ALG_ALT_TUPLE_SET) + return 0; + + /* Only if this is a UDP connection. */ + if (npf_session_get_proto(se) != IPPROTO_UDP) + return 0; + + /* Only add from a CNTL session. */ + if (!(flags & SIP_ALG_CNTL_FLOW)) + return 0; + + ss = npf_alg_session_get_private(se); + if (!ss) + return 0; + + /* Only in forward direction */ + if (!sip_forw(sr)) + return 0; + + /* Currently only supports SNAT */ + if (!sip_is_snat(sr)) + return 0; + + return sip_alg_add_cntl_tuple(se, npc); +} + +/* + * Add the call id on the session handle private data. + * We will expire these then the session handle is expired. + * + * Note this is non-fatal if we cannot add it. All it means is + * that the INVITES will timeout/expire. + */ +static void sip_alg_add_session_call_id(npf_session_t *se, + struct sip_alg_request *sr) +{ + osip_call_id_t *cid; + struct sip_alg_session *ss; + int i; + size_t sz; + + /* Only CNTL sessions have private data */ + ss = npf_alg_session_get_private(se); + if (!ss) + return; + + cid = osip_message_get_call_id(sr->sr_sip); + if (!cid) + return; + + /* Only add unique, ignore re-transmissions... */ + for (i = 0; i < ss->ss_call_id_count; i++) { + if (osip_call_id_match(cid, ss->ss_call_ids[i]) + == OSIP_SUCCESS) + return; + } + + sz = sizeof(osip_call_id_t *) * (ss->ss_call_id_count + 1); + ss->ss_call_ids = realloc(ss->ss_call_ids, sz); + if (!ss->ss_call_ids) + return; + if (osip_call_id_clone(cid, &ss->ss_call_ids[ss->ss_call_id_count]) != + OSIP_SUCCESS) + return; + + ss->ss_call_id_count++; +} + +/* Expire all SIP requests on this session handle */ +void sip_expire_session_request(npf_session_t *se) +{ + struct sip_alg_session *ss = npf_alg_session_get_private(se); + struct npf_alg *sip = npf_alg_session_get_alg(se); + struct sip_alg_request *sr; + uint32_t if_idx = npf_session_get_if_index(se); + int i; + + if (!ss) + return; + + + for (i = 0 ; i < ss->ss_call_id_count; i++) { + sr = sip_request_lookup_by_call_id(sip, if_idx, + ss->ss_call_ids[i]); + if (sr) + sip_request_expire(sr); + /* free this call id */ + osip_call_id_free(ss->ss_call_ids[i]); + } + + /* reset so expire/destroy doesn't repeat */ + ss->ss_call_id_count = 0; + free(ss->ss_call_ids); + ss->ss_call_ids = NULL; +} + +/* + * Manage all SIP requests. If appropriate, add + * an INVITE to the sip hash table. + * + * Note that all call-ids for INVITEs are added to the + * CNTL session handle, and are expired (if they exist) + * when the session handle is expired. + * + * We will also add the ALT CNTL tuple in here, but only + * once. + */ +int sip_manage_request(npf_session_t *se, npf_cache_t *npc, + struct sip_alg_request *sr, + struct sip_alg_request *tsr, + npf_nat_t *nat, bool *consumed) +{ + struct npf_alg *sip = npf_alg_session_get_alg(se); + int rc; + + /* Set per-packet info */ + npc->npc_alg_flags = SIP_NPC_REQUEST; + + if (MSG_IS_CANCEL(tsr->sr_sip) || MSG_IS_BYE(tsr->sr_sip)) { + sip_request_lookup_and_expire(sip, tsr); + return 0; + } + + /* + * Get the reply path from the VIA header, must be present. Note that + * this call returns an error for Requests in the reverse direction, + * hence we handle the CANCEL and BYE Requests above. + */ + rc = sip_parse_reply_path(sr, se); + if (rc) + return rc; + + /* This will only add the alt cntl tuple once */ + rc = sip_alg_manage_cntl(se, npc, tsr); + if (rc) + return rc; + + /* Either parse and add the INVITE, or handle a BYE/etc */ + if (MSG_IS_INVITE(tsr->sr_sip)) { + if (!sr->sr_sdp) + return -EINVAL; + + rc = sip_alg_manage_media(se, nat, tsr); + if (rc) + return rc; + + rc = sip_alg_add_invite(sip, tsr); + if (!rc) { + sip_alg_add_session_call_id(se, tsr); + *consumed = true; + } + + } + + return rc; +} + + +/*************************************************************** + * SIP Request Hash Table + **************************************************************/ + +/* + * sip_alg_hash() - Create a hash out of the Call-ID number. This is unique. + * + * The jhash reads in 4 byte words, so make sure that it doesn't read off + * the end of allocated mem. + */ +static unsigned long sip_alg_hash(struct sip_request_match *sm) +{ + char *tmp; + unsigned long hash = 0; + + if (!sm->sm_call_id) + return hash; + + tmp = osip_call_id_get_number(sm->sm_call_id); + if (tmp) { + char __tmp[RTE_ALIGN(strlen(tmp), 4)] + __rte_aligned(sizeof(uint32_t)); + + memcpy(__tmp, tmp, strlen(tmp)); + hash = rte_jhash(__tmp, strlen(tmp), hash); + } + + tmp = osip_call_id_get_host(sm->sm_call_id); + if (tmp) { + char __tmp[RTE_ALIGN(strlen(tmp), 4)] + __rte_aligned(sizeof(uint32_t)); + + memcpy(__tmp, tmp, strlen(tmp)); + hash = rte_jhash(__tmp, strlen(tmp), hash); + } + + return hash ? rte_jhash_1word(sm->sm_if_idx, hash) : 0; +} + +/* + * sip_ht_match() - Match function for hash table + */ +static int sip_ht_match(struct cds_lfht_node *node, const void *key) +{ + const struct sip_alg_request *sr = caa_container_of(node, + struct sip_alg_request, sr_node); + const struct sip_request_match *sm = key; + + if (sr->sr_flags & SIP_REQUEST_EXPIRED) + return 0; + + if (sm->sm_if_idx != sr->sr_if_idx) + return 0; + + return !osip_call_id_match(osip_message_get_call_id(sr->sr_sip), + sm->sm_call_id); +} + +/* + * sip_request_lookup_by_call_id() - Lookup by call id. + */ +struct sip_alg_request * +sip_request_lookup_by_call_id(const struct npf_alg *sip, uint32_t if_idx, + osip_call_id_t *call_id) +{ + struct cds_lfht_iter iter; + struct cds_lfht_node *node; + unsigned long hash; + struct sip_alg_request *sr; + struct sip_private *sp; + struct sip_request_match sm = { + .sm_call_id = call_id, + .sm_if_idx = if_idx + }; + + if (!sip) + return NULL; + + sp = sip->na_private; + if (!sp) + return NULL; + + hash = sip_alg_hash(&sm); + if (!hash) + return NULL; + + cds_lfht_lookup(sp->sp_ht, hash, sip_ht_match, &sm, &iter); + node = cds_lfht_iter_get_node(&iter); + if (node) + sr = caa_container_of(node, struct sip_alg_request, sr_node); + else + sr = NULL; + + return sr; +} + +/* + * sip_request_lookup() - Lookup a request + */ +struct sip_alg_request *sip_request_lookup(const struct npf_alg *sip, + struct sip_alg_request *incoming) +{ + osip_call_id_t *call_id; + + call_id = osip_message_get_call_id(incoming->sr_sip); + return sip_request_lookup_by_call_id(sip, incoming->sr_if_idx, call_id); +} + +/* + * sip_request_expire() - Expire an invite from the hash table. + */ +void sip_request_expire(struct sip_alg_request *sr) +{ + if (!(sr->sr_flags & SIP_REQUEST_EXPIRED)) + sr->sr_flags |= SIP_REQUEST_EXPIRED; +} + +/* + * sip_request_lookup_and_expire() - Expire an invite from the hash table. + */ +void sip_request_lookup_and_expire(const struct npf_alg *sip, + struct sip_alg_request *incoming) +{ + struct sip_alg_request *sr; + + if (incoming) { + sr = sip_request_lookup(sip, incoming); + if (sr) + sip_request_expire(sr); + } +} + +/* + * sip_alg_expires() - Get an expiration time for this request + */ +static uint64_t sip_alg_expires(struct sip_alg_request *sr) +{ + osip_header_t *expires; + unsigned long timeout = 0; + char *end; + int rc; + + /* Does the request have an expires field? */ + rc = osip_message_get_expires(sr->sr_sip, 0, &expires); + if (rc >= 0 && expires->hvalue) { + timeout = strtoul(expires->hvalue, &end, 10); + if (*end) + timeout = 0; + } + + /* + * If unset or bogus, or greater than 24h, set a default. + */ + if (!timeout || timeout > 84600) + timeout = SIP_DEFAULT_REQUEST_TIMEOUT; + + return (uint64_t) timeout; +} + +/* + * sip_alg_add_invite() - Add an invite to the hash table. + */ +static int sip_alg_add_invite(const struct npf_alg *sip, + struct sip_alg_request *sr) +{ + unsigned long hash; + struct cds_lfht_node *node; + struct sip_request_match sm; + struct sip_private *sp = sip->na_private; + + if (!MSG_IS_INVITE(sr->sr_sip)) + return -EINVAL; + + if (!sp) + return -EINVAL; + + sm.sm_call_id = osip_message_get_call_id(sr->sr_sip); + sm.sm_if_idx = sr->sr_if_idx; + hash = sip_alg_hash(&sm); + if (!hash) + return -EINVAL; + + cds_lfht_node_init(&sr->sr_node); + sr->sr_timeout = sip_alg_expires(sr); + sr->sr_timeout *= rte_get_timer_hz(); /* to cycles */ + sr->sr_timeout += rte_get_timer_cycles(); /* add current time */ + + node = cds_lfht_add_unique(sp->sp_ht, hash, sip_ht_match, &sm, + &sr->sr_node); + if (node != &sr->sr_node) + return -EEXIST; + + return 0; +} + +/* + * sip_delete_request() - Delete an invite from the hash table. + */ +static void sip_delete_request(struct npf_alg *sip, + struct sip_alg_request *sr) +{ + struct sip_private *sp = sip->na_private; + + if (sr && sp && !cds_lfht_del(sp->sp_ht, &sr->sr_node)) + sip_alg_request_free(sip, sr); +} + +void sip_destroy_ht(struct npf_alg *sip) +{ + struct cds_lfht_iter iter; + struct sip_alg_request *sr; + struct sip_private *sp = sip->na_private; + int rc; + + if (!sp) + return; + + /* + * Free each request synchronously - ensures we + * sync return APM mappings prior tp APM instance destroy + */ + cds_lfht_for_each_entry(sp->sp_ht, &iter, sr, sr_node) { + if (!cds_lfht_del(sp->sp_ht, &sr->sr_node)) + sip_alg_request_free_sync(sip, sr); + } + + dp_rcu_read_unlock(); + rc = cds_lfht_destroy(sp->sp_ht, NULL); + dp_rcu_read_lock(); + if (rc) + RTE_LOG(ERR, FIREWALL, "ALG: SIP cds_lfht_destroy\n"); + + /* Destroy any dead media added during ht destroy */ + sip_free_dead_media(sp); +} + +/* + * Create SIP alg hash table. + * + * We manage Invites and responses by using a hash table. New invites are + * added to the table, and corresponding responses pull them from the hash + * table. + */ +int sip_ht_create(struct sip_private *sp) +{ + sp->sp_ht = cds_lfht_new(SIP_HT_INIT, SIP_HT_MIN, SIP_HT_MAX, + CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING, NULL); + if (!sp->sp_ht) + return -EINVAL; + + return 0; +} + +static bool sip_ht_expired(uint64_t curr, struct sip_alg_request *sr) +{ + if (sr->sr_flags & SIP_REQUEST_EXPIRED) + return true; + if (sr->sr_timeout < curr) { + sip_request_expire(sr); + return true; + } + return false; +} + +/* + * sip_ht_gc() - Clean stale entries from the hash table. + */ +void sip_ht_gc(struct npf_alg *sip) +{ + struct cds_lfht_iter iter; + struct sip_alg_request *sr; + uint64_t current = rte_get_timer_cycles(); + struct sip_private *sp = sip->na_private; + + if (!sp) + return; + + /* Always free any medias first */ + sip_free_dead_media(sp); + + cds_lfht_for_each_entry(sp->sp_ht, &iter, sr, sr_node) { + if (!sip_ht_expired(current, sr)) + continue; + + if (sr->sr_flags & SIP_REQUEST_REMOVING) + sip_delete_request(sip, sr); + else + sr->sr_flags |= SIP_REQUEST_REMOVING; + } +} + diff --git a/src/npf/alg/sip/sip_request.h b/src/npf/alg/sip/sip_request.h new file mode 100644 index 00000000..1eede6f1 --- /dev/null +++ b/src/npf/alg/sip/sip_request.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef _SIP_REQUEST_H_ +#define _SIP_REQUEST_H_ + +#include "npf/npf.h" +#include "npf/alg/alg.h" +#include "npf/npf_cache.h" +#include "npf/npf_nat.h" +#include "npf/npf_session.h" + +struct sip_alg_request; +struct sip_alg_media; +struct sip_private; +struct rte_mbuf; +struct npf_alg; + +/* + * SIP media list + */ +struct sip_alg_media *sip_media_alloc(npf_session_t *se, + struct sip_alg_request *sr, + int m_proto); +void sip_media_free(void *_m); +int sip_media_count(struct cds_list_head *h); + +/* + * SIP request alloc and free + */ +struct sip_alg_request *sip_alg_request_alloc(bool init_sip, + uint32_t if_idx); +void sip_alg_request_free(const struct npf_alg *sip, + struct sip_alg_request *sr); + +void sip_expire_session_request(npf_session_t *se); + +/* + * Manage SIP request + */ +int sip_manage_request(npf_session_t *se, npf_cache_t *npc, + struct sip_alg_request *sr, + struct sip_alg_request *tsr, + npf_nat_t *nat, bool *consumed); + +/* + * SIP request hash table + */ +struct sip_alg_request *sip_request_lookup_by_call_id(const struct npf_alg *sip, + uint32_t if_idx, + osip_call_id_t *call_id); +struct sip_alg_request *sip_request_lookup(const struct npf_alg *sip, + struct sip_alg_request *incoming); +void sip_request_lookup_and_expire(const struct npf_alg *sip, + struct sip_alg_request *incoming); +void sip_request_expire(struct sip_alg_request *sr); +void sip_destroy_ht(struct npf_alg *sip); +int sip_ht_create(struct sip_private *sp); +void sip_ht_gc(struct npf_alg *sip); + + +#endif diff --git a/src/npf/alg/sip/sip_response.c b/src/npf/alg/sip/sip_response.c new file mode 100644 index 00000000..86c6f95d --- /dev/null +++ b/src/npf/alg/sip/sip_response.c @@ -0,0 +1,548 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +/* + * SIP response handilng. + * + * Responses are matched to Invite Requests previously save to a hash table. + * The media contained in the Invite and Response are use to create secondary + * sessions. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "dp_event.h" +#include "vrf.h" +#include "util.h" +#include "vplane_log.h" + +#include "npf/alg/alg.h" +#include "npf/alg/sip/sip.h" +#include "npf/alg/sip/sip_osip.h" + +/* + * sip_tuple_data_alloc() - Alloc a tuple data struct + */ +static struct sip_tuple_data *sip_tuple_data_alloc(const struct npf_alg *sip, + struct sip_alg_request *sr, struct sip_alg_media *mi, + struct sip_alg_media *mr) +{ + struct sip_tuple_data *td = calloc(1, sizeof(struct sip_tuple_data)); + + if (td) { + memcpy(&td->td_nat, &sr->sr_nat, sizeof(struct sip_nat)); + td->td_sip = sip; + td->td_mi = mi; + td->td_mr = mr; + rte_atomic32_set(&td->td_refcnt, 0); + td->td_is_reverse = false; + } + return td; +} + +/* + * Free tuple data after last reference has been removed + */ +static void sip_tuple_data_free(struct sip_tuple_data *td) +{ + if (!td) + return; + + assert(rte_atomic32_read(&td->td_refcnt) == 0); + + if (td->td_mi) { + sip_media_free(td->td_mi); + td->td_mi = NULL; + } + if (td->td_mr) { + sip_media_free(td->td_mr); + td->td_mr = NULL; + } + free(td); +} + +/* + * sip_tuple_data_get() + */ +static struct sip_tuple_data *sip_tuple_data_get(struct sip_tuple_data *td) +{ + if (td) + rte_atomic32_inc(&td->td_refcnt); + return td; +} + +/* + * sip_tuple_data_put() + */ +static void sip_tuple_data_put(struct sip_tuple_data *td) +{ + if (td && rte_atomic32_dec_and_test(&td->td_refcnt)) + sip_tuple_data_free(td); +} + +/* + * Attach a sip tuple data structure to a tuple, and take a reference on the + * tuple data. + */ +static void +sip_tuple_data_attach(struct apt_tuple *nt, struct sip_tuple_data *td) +{ + sip_tuple_data_get(td); + apt_tuple_set_client_data(nt, td); +} + +/* Called via ops tuple_delete callback */ +void sip_tuple_data_detach(struct apt_tuple *nt) +{ + void *data; + + data = apt_tuple_get_client_data(nt); + if (data) { + apt_tuple_set_client_data(nt, NULL); + sip_tuple_data_put((struct sip_tuple_data *)data); + } +} + +/* + * sip_alg_handle_error_response() + */ +static bool sip_alg_handle_error_response(const struct npf_alg *sip, + struct sip_alg_request *sr) +{ + /* + * These responses imply a failure and/or a future re-submit of the + * invite request, so delete the one we currently have and let + * the protocol try again. + */ + if (MSG_IS_STATUS_3XX(sr->sr_sip) || + MSG_IS_STATUS_4XX(sr->sr_sip) || + MSG_IS_STATUS_5XX(sr->sr_sip) || + MSG_IS_STATUS_6XX(sr->sr_sip)) { + sip_request_lookup_and_expire(sip, sr); + return true; + } + return false; +} + +static void +sip_alg_tuple_init(struct apt_match_key *m, npf_session_t *se, uint8_t alen) +{ + m->m_ifx = npf_session_get_if_index(se); + m->m_match = APT_MATCH_ALL; + m->m_proto = IPPROTO_UDP; + m->m_alen = alen; +} + +/* + * sip_alg_create_rtp_tuples() - Create the RTP or UDP tuples. Note that this + * traffic is di-directional, so we need to create + * one for each possible direction. + */ +static int +sip_alg_create_rtp_tuples(npf_session_t *se, struct npf_alg *sip, + struct sip_alg_request *sr, struct sip_alg_media *mi, + struct sip_alg_media *mr) +{ + struct sip_tuple_data *td = NULL; + struct npf_alg_instance *ai = sip->na_ai; + struct apt_tuple *forward = NULL; + struct apt_tuple *reverse = NULL; + struct apt_match_key fwd_m = { 0 }, rev_m = { 0 }; + uint32_t fwd_alg_flags = 0; + uint32_t rev_alg_flags = 0; + + + /* + * Set a private data field for the rtp/udp tuples. This flow + * will create tuples for the rtcp flow if needed. + * + * allocated ports may be reclaimed when the tuples are deleted. + */ + + td = sip_tuple_data_alloc(sip, sr, mi, mr); + if (!td) { + sip_media_free(mi); + sip_media_free(mr); + return -ENOMEM; + } + + /* Common init */ + sip_alg_tuple_init(&fwd_m, se, mi->m_rtp_alen); + sip_alg_tuple_init(&rev_m, se, mi->m_rtp_alen); + + fwd_alg_flags = SIP_ALG_RTP_FLOW; + rev_alg_flags = SIP_ALG_RTP_FLOW; + + /* Set ports/addrs/flags */ + switch (td_nat_type(td)) { + case sip_nat_snat: + fwd_alg_flags |= SIP_ALG_NAT; + fwd_m.m_sport = htons(mi->m_rtp_port); + fwd_m.m_srcip = &mi->m_rtp_addr; + fwd_m.m_dport = htons(mr->m_rtp_port); + fwd_m.m_dstip = &mr->m_rtp_addr; + + rev_alg_flags |= SIP_ALG_NAT; + rev_m.m_sport = htons(mr->m_trtp_port); + rev_m.m_srcip = &mr->m_trtp_addr; + rev_m.m_dport = htons(mi->m_trtp_port); + rev_m.m_dstip = &mi->m_trtp_addr; + break; + case sip_nat_dnat: + fwd_alg_flags |= SIP_ALG_NAT; + fwd_m.m_sport = htons(mi->m_rtp_port); + fwd_m.m_srcip = &mi->m_rtp_addr; + fwd_m.m_dport = htons(mr->m_rtp_port); + fwd_m.m_dstip = &mr->m_rtp_addr; + + rev_alg_flags |= SIP_ALG_NAT; + rev_m.m_sport = htons(mr->m_trtp_port); + rev_m.m_srcip = &mr->m_trtp_addr; + rev_m.m_dport = htons(mi->m_rtp_port); + rev_m.m_dstip = &mi->m_rtp_addr; + break; + case sip_nat_inspect: + fwd_m.m_sport = htons(mi->m_rtp_port); + fwd_m.m_srcip = &mi->m_rtp_addr; + fwd_m.m_dport = htons(mr->m_rtp_port); + fwd_m.m_dstip = &mr->m_rtp_addr; + + rev_m.m_sport = htons(mr->m_rtp_port); + rev_m.m_srcip = &mr->m_rtp_addr; + rev_m.m_dport = htons(mi->m_rtp_port); + rev_m.m_dstip = &mi->m_rtp_addr; + break; + default: + sip_tuple_data_free(td); + return -EINVAL; + } + + forward = apt_tuple_create_and_insert(ai->ai_apt, &fwd_m, + npf_alg_get(sip), + fwd_alg_flags, NPF_ALG_SIP_NAME, + true, false); + if (!forward) { + npf_alg_put(sip); + sip_tuple_data_free(td); + return -ENOMEM; + } + + apt_tuple_set_session(forward, se); + + /* Attach tuple data to forwards tuple */ + sip_tuple_data_attach(forward, td); + + /* Now deal with the reverse tuple */ + rev_alg_flags |= SIP_ALG_REVERSE; + + reverse = apt_tuple_create_and_insert(ai->ai_apt, &rev_m, + npf_alg_get(sip), + rev_alg_flags, NPF_ALG_SIP_NAME, + true, false); + if (!reverse) { + npf_alg_put(sip); + alg_apt_tuple_expire(forward); + return -ENOMEM; + } + + apt_tuple_set_session(reverse, se); + + /* Attach tuple data to reverse tuple */ + sip_tuple_data_attach(reverse, td); + + alg_apt_tuple_pair(forward, reverse); + + return 0; +} + +/* + * sip_alg_create_rtcp_tuple() + */ +void sip_alg_create_rtcp_tuples(npf_session_t *se, npf_cache_t *npc, + struct sip_tuple_data *td) +{ + struct sip_alg_media *mi = td->td_mi; + struct sip_alg_media *mr = td->td_mr; + struct npf_alg *sip = npf_alg_session_get_alg(se); + struct npf_alg_instance *ai = sip->na_ai; + struct apt_tuple *forward = NULL; + struct apt_tuple *reverse = NULL; + struct apt_match_key fwd_m = { 0 }, rev_m = { 0 }; + uint32_t fwd_alg_flags = 0; + uint32_t rev_alg_flags = 0; + + /* + * If the rtcp ports are zero, we have nothing to do. + */ + if (!mi->m_rtcp_port || !mr->m_rtcp_port) + return; + + /* + * If this is a UDP SDP proto, then we are done. + */ + if (mi->m_proto == sdp_proto_udp) + return; + + /* Common init */ + sip_alg_tuple_init(&fwd_m, se, npc->npc_alen); + sip_alg_tuple_init(&rev_m, se, npc->npc_alen); + + fwd_alg_flags = SIP_ALG_RTCP_FLOW; + rev_alg_flags = SIP_ALG_RTCP_FLOW; + + /* Set ports/addrs/flags */ + switch (td_nat_type(td)) { + case sip_nat_snat: + fwd_alg_flags |= SIP_ALG_NAT; + fwd_m.m_srcip = &mi->m_rtcp_addr; + fwd_m.m_sport = htons(mi->m_rtcp_port); + fwd_m.m_dstip = &mr->m_trtcp_addr; + fwd_m.m_dport = htons(mr->m_trtcp_port); + + rev_alg_flags |= SIP_ALG_NAT; + rev_m.m_srcip = &mr->m_rtcp_addr; + rev_m.m_sport = htons(mr->m_rtcp_port); + rev_m.m_dstip = &mi->m_trtcp_addr; + rev_m.m_dport = htons(mi->m_trtcp_port); + break; + case sip_nat_dnat: + fwd_alg_flags |= SIP_ALG_NAT; + fwd_m.m_srcip = &mi->m_rtcp_addr; + fwd_m.m_sport = htons(mi->m_rtcp_port); + fwd_m.m_dstip = &mr->m_rtcp_addr; + fwd_m.m_dport = htons(mr->m_rtcp_port); + + rev_alg_flags |= SIP_ALG_NAT; + rev_m.m_srcip = &mr->m_trtcp_addr; + rev_m.m_sport = htons(mr->m_trtcp_port); + rev_m.m_dstip = &mi->m_rtcp_addr; + rev_m.m_dport = htons(mi->m_rtcp_port); + break; + case sip_nat_inspect: + fwd_m.m_srcip = &mi->m_rtcp_addr; + fwd_m.m_sport = htons(mi->m_rtcp_port); + fwd_m.m_dstip = &mr->m_rtcp_addr; + fwd_m.m_dport = htons(mr->m_rtcp_port); + + rev_m.m_srcip = &mr->m_rtcp_addr; + rev_m.m_sport = htons(mr->m_rtcp_port); + rev_m.m_dstip = &mi->m_rtcp_addr; + rev_m.m_dport = htons(mi->m_rtcp_port); + break; + default: + return; + } + + forward = apt_tuple_create_and_insert(ai->ai_apt, &fwd_m, + npf_alg_get(sip), + fwd_alg_flags, NPF_ALG_SIP_NAME, + true, false); + if (!forward) { + npf_alg_put(sip); + return; + } + + apt_tuple_set_session(forward, se); + + /* Attach tuple data to forwards tuple */ + sip_tuple_data_attach(forward, td); + + /* Now deal with the reverse tuple */ + rev_alg_flags |= SIP_ALG_REVERSE; + + reverse = apt_tuple_create_and_insert(ai->ai_apt, &rev_m, + npf_alg_get(sip), + rev_alg_flags, NPF_ALG_SIP_NAME, + true, false); + if (!reverse) { + npf_alg_put(sip); + alg_apt_tuple_expire(forward); + return; + } + + apt_tuple_set_session(reverse, se); + + /* Attach tuple data to reverse tuple */ + sip_tuple_data_attach(reverse, td); + + alg_apt_tuple_pair(forward, reverse); + +} + +/* + * sip_alg_resolve_media() - sync up invite/response and create tuples. + */ +static int sip_alg_resolve_media(npf_session_t *se, + struct sip_alg_request *invite, + struct sip_alg_request *response) +{ + int rc = 0; + int pos; + struct npf_alg *sip = npf_alg_session_get_alg(se); + int size = sip_media_count(&invite->sr_media_list_head); + + /* + * If the invite and response port lists are different sizes, + * then we had a bad SDP packet in either - They must be the + * same size. + */ + if (size != sip_media_count(&response->sr_media_list_head)) + return -1; + + /* + * Prepare for creating tuples out of each media definition + * from the invite and response. + */ + for (pos = 0; pos < size; pos++) { + struct sip_alg_media *i; + struct sip_alg_media *r; + + i = cds_list_first_entry(&invite->sr_media_list_head, + struct sip_alg_media, m_node); + cds_list_del(&i->m_node); + + r = cds_list_first_entry(&response->sr_media_list_head, + struct sip_alg_media, m_node); + cds_list_del(&r->m_node); + + /* This consumes the medias */ + rc = sip_alg_create_rtp_tuples(se, sip, invite, i, r); + if (rc) + break; + } + + return rc; +} + +/* + * Verify that the call ID in 'sr' matches the given session. + */ +static bool sip_alg_verify_session_call_id(npf_session_t *se, + struct sip_alg_request *sr) +{ + osip_call_id_t *cid; + struct sip_alg_session *ss; + int i; + + /* Only CNTL sessions have private data */ + ss = npf_alg_session_get_private(se); + if (!ss) + return false; + + cid = osip_message_get_call_id(sr->sr_sip); + if (!cid) + return false; + + for (i = 0; i < ss->ss_call_id_count; i++) { + if (osip_call_id_match(cid, ss->ss_call_ids[i]) + == OSIP_SUCCESS) + return true; + } + return false; +} + +/* + * Manage all sip responses. + * + * Here we associate the previously received INVITE (if applicable) with this + * 200 or 183 response, and eventually create the rtp tuples for the media + * flows. + * + * A '183 Session Progress' may be used if early media (RTP traffic before the + * call is answered) is present. This response (like the '200 OK') includes + * an SDP message part containing media information. + * + * The first response received containing SDP media information is used to + * create the RTP/RTCP tuples. After that, we can expire the SIP Request + * message that we have been holding onto. + * + * However if the original Invite is expired upon receipt of a 183 message + * then we still need to ensure that we translate the SDP media fields in the + * '200 OK' message. + * + * (Early media typically includes dial tones and/or recorded messages.) + * + * Note we also do some sanity checking on this response and we can return an + * error to drop the packet. + */ +int sip_manage_response(npf_session_t *se, npf_cache_t *npc, + struct sip_alg_request *sr, + struct sip_alg_request *tsr, npf_nat_t *nat) + +{ + struct sip_alg_request *osr; + struct npf_alg *sip = npf_alg_session_get_alg(se); + int rc; + + /* Set per-packet info */ + npc->npc_alg_flags = SIP_NPC_RESPONSE; + + /* + * Handle all error responses now, no need to continue + * if this is an error response. + */ + if (sip_alg_handle_error_response(sip, tsr)) + return 0; + + /* + * If this is a '200 Ok', or a '183 Session Progress', then we + * may need to resolve the media flows for this sip call. + */ + rc = 0; + if (MSG_IS_RESPONSE_FOR(tsr->sr_sip, "INVITE") && + (MSG_TEST_CODE(tsr->sr_sip, 200) || + MSG_TEST_CODE(tsr->sr_sip, 183))) { + + /* + * We are only interested in backwards Responses, + * eg: reply to a forward request + */ + if (sip_forw(tsr)) + return 0; + + /* ignore non-sdp 200 responses */ + if (!sr->sr_sdp && MSG_TEST_CODE(tsr->sr_sip, 200)) + return 0; + + /* But not 183's... */ + if (!sr->sr_sdp && MSG_TEST_CODE(tsr->sr_sip, 183)) + return -EINVAL; + + osr = sip_request_lookup(sip, sr); + if (osr) { + /* Translate SDP media fields */ + rc = sip_alg_manage_media(se, nat, tsr); + if (!rc) + rc = sip_alg_resolve_media(se, osr, tsr); + + /* Always expire the INVITE, UA can resend */ + sip_request_expire(osr); + } else { + /* + * The original INVITE may have been resolved by a 183 + * Response. If so, we still need to translate the + * SDP media in the '200 OK' message. Verify the + * call-ID matches the session before doing so. + */ + if (!MSG_TEST_CODE(tsr->sr_sip, 200)) + return 0; + + if (!sip_alg_verify_session_call_id(se, sr)) + return 0; + + rc = sip_alg_manage_media(se, nat, tsr); + } + } + + return rc; +} + diff --git a/src/npf/alg/sip/sip_response.h b/src/npf/alg/sip/sip_response.h new file mode 100644 index 00000000..5bf87c0f --- /dev/null +++ b/src/npf/alg/sip/sip_response.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef _SIP_RESPONSE_H_ +#define _SIP_RESPONSE_H_ + +#include "npf/npf.h" +#include "npf/alg/alg.h" +#include "npf/npf_cache.h" +#include "npf/npf_nat.h" +#include "npf/npf_session.h" + +struct apt_tuple; +struct sip_tuple_data; +struct sip_alg_request; + +/* + * Detach tuple private data from alg tuple context, and release reference. + */ +void sip_tuple_data_detach(struct apt_tuple *nt); + +/* + * Create RTCP tuples. Called when an RTP tuple is matched and an RTP session + * created. + */ +void sip_alg_create_rtcp_tuples(npf_session_t *se, npf_cache_t *npc, + struct sip_tuple_data *td); + +int sip_manage_response(npf_session_t *se, npf_cache_t *npc, + struct sip_alg_request *sr, + struct sip_alg_request *tsr, npf_nat_t *nat); + +#endif diff --git a/src/npf/alg/sip/sip_translate.c b/src/npf/alg/sip/sip_translate.c new file mode 100644 index 00000000..42956a2c --- /dev/null +++ b/src/npf/alg/sip/sip_translate.c @@ -0,0 +1,940 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +/* + * SIP alg translate + * + * sip_alg_translate_packet is called from the ao_nat api function to + * translate a SIP packet. + * + * sip_alg_translate_media translates the media in the SDP "m=" strings. + * + * sip_alg_update_session_media translates the media address in the SDP "c=" + * string + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "vrf.h" +#include "util.h" +#include "vplane_log.h" + +#include "npf/npf.h" +#include "npf/alg/alg.h" +#include "npf/npf_cache.h" +#include "npf/npf_nat.h" +#include "npf/npf_session.h" + +/* + * Only translate a port if it is present in the url, and is different from + * tport + */ +static inline bool sip_translate_port_reqd(const char *port, const char *tport) +{ + if (!port || !tport) + return false; + + if (strcmp(port, tport) == 0) + return false; + + return true; +} + +static int sip_alg_translate_media_connect(sdp_connection_t *c, + char *addr) +{ + /* + * Performs basic sanity as well. + * + * If we do not have a translation address, we are not + * translating this packet. Do nothing. + * + * Do not translate IPv6 + */ + if (!addr) + return 0; + + if (strcmp(c->c_nettype, "IN") != 0) + return -EINVAL; + + if (!strcmp(c->c_addrtype, "IP6")) + return 0; + + if (strcmp(c->c_addrtype, "IP4") != 0) + return -EINVAL; + + osip_free(c->c_addr); + c->c_addr = addr; + return 0; +} + +static int sip_alg_translate_media_port(struct sip_alg_request *sr, + int pos, in_port_t port) +{ + char *cport; + + cport = sip_port_to_str(port); + if (!cport) + return -ENOMEM; + + if (sdp_message_m_port_set(sr->sr_sdp, pos, cport)) { + osip_free(cport); + return -EINVAL; + } + return 0; +} + +static int sip_alg_update_media(struct sip_alg_request *sr, + int pos, npf_addr_t *taddr, uint8_t alen, in_port_t port) +{ + sdp_connection_t *c; + int rc = 0; + char *addr; + + /* + * Update the connection ("c=") and media ("m=") with + * the translation address and port. + */ + c = sdp_message_connection_get(sr->sr_sdp, pos, 0); + + if (c) { + if (!sip_translate_addr_reqd(c->c_addr, sip_oaddr(sr))) + return 0; /* Nothing to do */ + + addr = sip_addr_to_str(taddr, alen); + if (!addr) + return -EINVAL; + + rc = sip_alg_translate_media_connect(c, addr); + if (rc) { + osip_free(addr); + return rc; + } + } else { + /* + * There is no media connection address. Only translate media + * port if session connection address matches the NAT target + * address. + */ + c = sdp_message_connection_get(sr->sr_sdp, -1, 0); + + if (!c || !sip_translate_addr_reqd(c->c_addr, sip_oaddr(sr))) + return 0; /* Nothing to do */ + } + + return sip_alg_translate_media_port(sr, pos, port); +} + +/* + * Translates the media in the SDP "m=" strings + */ +int sip_alg_translate_media(struct sip_alg_request *sr, + struct sip_alg_media *m, int pos) +{ + int rc; + + /* + * N.B.: If this is dnat, then this 'm' is generated off + * a response from the server. This means that we received the + * dnat translation port. IOW, we need to do a 'reverse' translation + * on this msg. + * + * So make sure we re-write the packet with the correct port. + */ + if (sip_is_snat(sr)) + rc = sip_alg_update_media(sr, pos, &m->m_trtp_addr, + m->m_trtp_alen, m->m_trtp_port); + else + rc = sip_alg_update_media(sr, pos, &m->m_rtp_addr, + m->m_rtp_alen, m->m_rtp_port); + if (!rc) { + if (sip_is_snat(sr)) + rc = sip_alg_sdp_set_rtcp_attribute(sr, pos, + &m->m_trtcp_addr, m->m_trtcp_alen, + m->m_trtcp_port); + else + rc = sip_alg_sdp_set_rtcp_attribute(sr, pos, + &m->m_rtcp_addr, m->m_rtcp_alen, + m->m_rtcp_port); + } + return rc; +} + +/* + * Translates the media address in the SDP "c=" string + */ +void sip_alg_update_session_media(struct sip_alg_request *sr) +{ + sdp_connection_t *c; + char *addr; + + c = sdp_message_connection_get(sr->sr_sdp, -1, 0); + if (!c) + return; + + if (!sip_translate_addr_reqd(c->c_addr, sip_oaddr(sr))) + return; + + addr = osip_strdup(sip_taddr(sr)); + if (addr) + sip_alg_translate_media_connect(c, addr); +} + +/* + * sip_alg_translate_url() + */ +static int sip_alg_translate_url(osip_uri_t *u, const char *oaddr, + const char *taddr, const char *port) +{ + if (!u) + return 0; + + if (!sip_translate_addr_reqd(u->host, oaddr)) + return 0; + + osip_free(u->host); + u->host = osip_strdup(taddr); + + /* translate the port if present */ + if (sip_translate_port_reqd(u->port, port)) { + osip_free(u->port); + u->port = osip_strdup(port); + } + return 0; +} + +/* + * sip_alg_translate_from - Translate a From header + */ +static int sip_alg_translate_from(struct sip_alg_request *tsr, + const char *taddr, const char *tport) +{ + osip_uri_t *from; + int rc = -1; + + from = osip_from_get_url(osip_message_get_from(tsr->sr_sip)); + if (from) + rc = sip_alg_translate_url(from, sip_oaddr(tsr), taddr, tport); + return rc; +} + +/* + * sip_alg_translate_to - Translate a To header + */ +static int sip_alg_translate_to(struct sip_alg_request *tsr, + const char *taddr, const char *tport) +{ + osip_uri_t *to; + int rc = -1; + + to = osip_from_get_url(osip_message_get_to(tsr->sr_sip)); + if (to) + rc = sip_alg_translate_url(to, sip_oaddr(tsr), taddr, tport); + return rc; +} + +/* + * sip_alg_translate_call_id - Translate a Call-Id header + */ +static int sip_alg_translate_call_id(struct sip_alg_request *tsr, + const char *addr) +{ + osip_call_id_t *cid = osip_message_get_call_id(tsr->sr_sip); + char *p; + + if (cid) { + p = osip_call_id_get_host(cid); + if (sip_translate_addr_reqd(p, sip_oaddr(tsr))) { + osip_free(p); + osip_call_id_set_host(cid, osip_strdup(addr)); + } + } + return 0; +} + +/* + * sip_alg_translate_user_agent() - Translate User-Agent header + */ +static int sip_alg_translate_user_agent(struct sip_alg_request *tsr, + const char *taddr) +{ + osip_header_t *h; + int rc = 0; + int n; + char *p; + + osip_message_get_user_agent(tsr->sr_sip, 0, &h); + if (h) { + /* + * This can contain anything. But we only need to + * replace the original address, if present, with + * the taddr + */ + p = strstr(h->hvalue, sip_oaddr(tsr)); + if (!p) + return 0; /* Nothing to do */ + + /* Ensure enough space */ + rc = strlen(h->hvalue) + strlen(taddr); + + char buf[rc + 1]; /* avoid 0 bounds VLA */ + + n = p - h->hvalue; + memset(buf, '\0', sizeof(buf)); + memcpy(buf, h->hvalue, n); + strcat(buf, taddr); + n += strlen(sip_oaddr(tsr)); + strcat(buf, &h->hvalue[n]); + p = osip_strdup(buf); + if (!p) + return -ENOMEM; + osip_free(h->hvalue); + h->hvalue = p; + rc = 0; + } + return rc; +} + +/* + * sip_alg_translate_via_addr - Translate a Via header address and/or port + */ +static int sip_alg_translate_via_addr(osip_via_t *v, const char *oaddr, + const char *taddr, const char *tport) +{ + char *p; + + p = osip_via_get_host(v); + if (!sip_translate_addr_reqd(p, oaddr)) + return 0; + + osip_free(p); + osip_via_set_host(v, osip_strdup(taddr)); + + p = osip_via_get_port(v); + if (sip_translate_port_reqd(p, tport)) { + osip_free(p); + osip_via_set_port(v, osip_strdup(tport)); + } + return 0; +} + +/* + * sip_alg_translate_via - Translate Via header(s) + */ +static int sip_alg_translate_via(struct sip_alg_request *tsr, + const char *taddr, const char *tport) +{ + osip_via_t *v = NULL; + int i = 0; + int rc = 0; + + while (osip_message_get_via(tsr->sr_sip, i, &v) >= 0) { + rc = sip_alg_translate_via_addr(v, sip_oaddr(tsr), + taddr, tport); + if (rc) + return rc; + i++; + } + return rc; +} + +/* + * sip_alg_translate_contact - Translate a Contact header + */ +static int sip_alg_translate_contact(struct sip_alg_request *tsr, + const char *taddr, const char *tport) +{ + osip_contact_t *c = NULL; + int i = 0; + int rc = 0; + + while (osip_message_get_contact(tsr->sr_sip, i, &c) >= 0) { + rc = sip_alg_translate_url(osip_contact_get_url(c), + sip_oaddr(tsr), taddr, tport); + if (rc) + return rc; + i++; + } + return rc; +} + +/* + * sip_alg_translate_record_route - Translate Record-Route header(s) + */ +static int sip_alg_translate_record_route(struct sip_alg_request *tsr, + const char *taddr, const char *tport) +{ + osip_record_route_t *rr; + int i = 0; + int rc = 0; + + while (osip_message_get_record_route(tsr->sr_sip, i, &rr) >= 0) { + rc = sip_alg_translate_url(osip_record_route_get_url(rr), + sip_oaddr(tsr), taddr, tport); + if (rc) + return rc; + i++; + } + return rc; +} + +/* + * sip_alg_translate_route - Translate a Route header + * + * The osip library parses either of the following two forms: + * + * "Route: ,\r\n" + * + * or + * + * "Route: \r\n" + * "Route: \r\n" + * + * It always generates the second form on output (translating the url as where + * relevant). + */ +static int sip_alg_translate_route(struct sip_alg_request *tsr, + const char *taddr, const char *tport) +{ + osip_route_t *r = NULL; + int i = 0; + int rc = 0; + + while (osip_message_get_route(tsr->sr_sip, i, &r) >= 0) { + rc = sip_alg_translate_url(osip_route_get_url(r), + sip_oaddr(tsr), taddr, tport); + if (rc) + return rc; + i++; + } + return rc; +} + +/* + * sip_alg_translate_request_uri - Translate a Request-Uri header + */ +static int sip_alg_translate_request_uri(struct sip_alg_request *tsr, + const char *taddr, const char *tport) +{ + osip_uri_t *r = tsr->sr_sip->req_uri; + + return sip_alg_translate_url(r, sip_oaddr(tsr), taddr, tport); +} + +/* + * Translate a generic SIP header that has not been parsed by the osip + * library. + * + * Replace NAT target address with the translation address. Also replaces the + * port if 1. a port is present in the url, and 2. the header address matched + * the target address. + */ +static int sip_alg_translate_header(osip_header_t *h, const char *oaddr, + const char *taddr, const char *tport) +{ + const char *p; + + if (!h) + return 0; + + /* + * If the header does not contain NAT target address then there is + * nothing to be done + */ + p = strstr(h->hvalue, oaddr); + if (!p) + return 0; + + size_t oaddr_len = strlen(oaddr); + size_t taddr_len = strlen(taddr); + size_t tport_len = tport ? strlen(tport) : 0; + size_t hval_len = strlen(h->hvalue); + + /* Ensure more than enough space */ + char buf[hval_len + taddr_len + tport_len + 1]; + char *insert_point = buf; + + /* copy part before oaddr */ + memcpy(insert_point, h->hvalue, p - h->hvalue); + insert_point += p - h->hvalue; + *insert_point = '\0'; + + /* insert taddr */ + strncat(insert_point, taddr, taddr_len); + insert_point += taddr_len; + + /* set p to point to just after oaddr */ + p += oaddr_len; + + /* + * replace port if tport specified by the caller and if a port is + * present in the header + */ + if (tport && *p == ':') { + uint hport; + const char *pp = p + 1; + + /* Look for a number at a point after the colon */ + if (sscanf(pp, "%5u", &hport) > 0 && hport <= 65535) { + char hport_str[6]; + char *hportp; + + /* + * convert number to string, and locate in the + * original header string + */ + snprintf(hport_str, sizeof(hport_str), "%u", hport); + hportp = strstr(pp, hport_str); + + /* + * Check that port string is immediately after the + * colon. Only replace if header port is different + * than tport. + */ + if (hportp == pp && (strcmp(tport, hport_str) != 0)) { + /* insert colon and tport */ + strcat(insert_point, ":"); + insert_point += 1; + + strncat(insert_point, tport, tport_len); + insert_point += tport_len; + + /* + * set p to point just after the port in the + * original header string + */ + p = hportp + strlen(hport_str); + } + } + } + + /* + * copy part after oaddr (or after port, if present), and NULL + * terminate + */ + strcat(insert_point, p); + + /* replace hvalue */ + char *new = osip_strdup(buf); + if (!new) + return -ENOMEM; + + osip_free(h->hvalue); + h->hvalue = new; + + return 0; +} + +/* + * Translate all headers of the given name *if* the url contains oaddr. + */ +static int sip_alg_translate_header_byname(struct sip_alg_request *tsr, + const char *name, + const char *taddr, + const char *tport) +{ + osip_header_t *h; + const char *oaddr = sip_oaddr(tsr); + int i = 0; + int rc; + + while ((i = osip_message_header_get_byname(tsr->sr_sip, + name, + i, &h)) >= 0) { + rc = sip_alg_translate_header(h, oaddr, taddr, tport); + + if (rc < 0) + return rc; + i++; + } + return 0; +} + +/* + * Translate all P-asserted-identity headers + */ +static int sip_alg_translate_p_asserted_id(struct sip_alg_request *tsr, + const char *taddr, + const char *tport) +{ + return sip_alg_translate_header_byname(tsr, + "P-asserted-identity", + taddr, tport); +} + +/* + * Translate all P-preferred-identity headers + */ +static int sip_alg_translate_p_preferred_id(struct sip_alg_request *tsr, + const char *taddr, + const char *tport) +{ + return sip_alg_translate_header_byname(tsr, + "P-preferred-identity", + taddr, tport); +} + +/* + * sip_alg_translate_snat() - Translate for SNAT. + */ +static int sip_alg_translate_snat(struct sip_alg_request *tsr, bool forw, + const char *taddr, const char *tport) +{ + /* + * Translation fields depend upon both stream + * direction and msg type. + */ + if (MSG_IS_REQUEST(tsr->sr_sip) && forw) { + if (sip_alg_translate_from(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_user_agent(tsr, taddr)) + return -1; + if (sip_alg_translate_call_id(tsr, taddr)) + return -1; + if (sip_alg_translate_via(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_contact(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_record_route(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_route(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_p_asserted_id(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_p_preferred_id(tsr, taddr, tport)) + return -1; + } else if (MSG_IS_REQUEST(tsr->sr_sip) && !forw) { + if (sip_alg_translate_request_uri(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_to(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_call_id(tsr, taddr)) + return -1; + } else if (MSG_IS_RESPONSE(tsr->sr_sip) && forw) { + if (sip_alg_translate_to(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_contact(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_record_route(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_from(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_call_id(tsr, taddr)) + return -1; + if (sip_alg_translate_via(tsr, taddr, tport)) + return -1; + } else if (MSG_IS_RESPONSE(tsr->sr_sip) && !forw) { + if (sip_alg_translate_from(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_call_id(tsr, taddr)) + return -1; + if (sip_alg_translate_via(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_record_route(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_route(tsr, taddr, tport)) + return -1; + } + return 0; +} + +/* + * sip_alg_translate_dnat() - Translate for DNAT. + */ +static int sip_alg_translate_dnat(struct sip_alg_request *tsr, bool forw, + const char *taddr, const char *tport) +{ + /* + * Translation fields depend upon both stream + * direction and msg type. + */ + if (MSG_IS_REQUEST(tsr->sr_sip) && forw) { + if (sip_alg_translate_request_uri(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_to(tsr, taddr, tport)) + return -1; + } else if (MSG_IS_REQUEST(tsr->sr_sip) && !forw) { + if (sip_alg_translate_request_uri(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_contact(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_to(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_p_asserted_id(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_p_preferred_id(tsr, taddr, tport)) + return -1; + } else if (MSG_IS_RESPONSE(tsr->sr_sip) && forw) { + if (sip_alg_translate_to(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_record_route(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_route(tsr, taddr, tport)) + return -1; + } else if (MSG_IS_RESPONSE(tsr->sr_sip) && !forw) { + if (sip_alg_translate_request_uri(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_to(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_contact(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_record_route(tsr, taddr, tport)) + return -1; + if (sip_alg_translate_route(tsr, taddr, tport)) + return -1; + } + return 0; +} + +/* + * sip_alg_translate_message() - Translate a sip msg. + */ +static int sip_alg_translate_message(const struct npf_alg *sip, + struct sip_alg_request *sr, struct sip_alg_request **_tsr) +{ + int rc; + struct sip_alg_request *tsr; + + *_tsr = NULL; + + tsr = sip_alg_request_alloc(false, sr->sr_if_idx); + if (!tsr) + return -ENOMEM; + + /* + * Clone the SIP and SDP messages. + */ + rc = osip_message_clone(sr->sr_sip, &tsr->sr_sip); + if (rc) + goto bad; + + memcpy(&tsr->sr_nat, &sr->sr_nat, sizeof(struct sip_nat)); + + if (sr->sr_sdp) { + rc = sdp_message_clone(sr->sr_sdp, &tsr->sr_sdp); + if (rc) + goto bad; + } + + if (sip_is_snat(tsr)) { + rc = sip_alg_translate_snat(tsr, sip_forw(tsr), + sip_taddr(tsr), sip_tport(tsr)); + } else if (sip_is_dnat(tsr)) { + rc = sip_alg_translate_dnat(tsr, sip_forw(tsr), + sip_taddr(tsr), sip_tport(tsr)); + } + if (rc) + goto bad; + + *_tsr = tsr; + + return 0; +bad: + sip_alg_request_free(sip, tsr); + return rc; +} + +/* + * sip_alg_update_payload() - Update a packet payload + */ +static int sip_alg_update_payload(npf_session_t *se, npf_cache_t *npc, + const int di, struct rte_mbuf *nbuf, + struct sip_alg_request *tsr) +{ + char *payload; + char *sdp; + osip_body_t *body; + uint16_t new_plen; + char ebuf[64]; + size_t sz; + int rc; + + /* + * If we have an SDP, get the string and replace the body in + * the SIP. + */ + if (tsr->sr_sdp) { + rc = sdp_message_to_str(tsr->sr_sdp, &sdp); + if (rc) { + rc = -ENOMEM; + goto done; + } + + /* Replace this body at pos = 0 */ + rc = osip_message_get_body(tsr->sr_sip, 0, &body); + if (rc < 0) { + rc = -ENOENT; + osip_free(sdp); + goto done; + } + osip_free(body->body); + body->body = sdp; + body->length = strlen(sdp); + } + + osip_message_force_update(tsr->sr_sip); + rc = osip_message_to_str(tsr->sr_sip, &payload, &sz); + if (rc) { + rc = -ENOMEM; + goto done; + } + + new_plen = (uint16_t) sz; + + rc = npf_payload_update(se, npc, nbuf, payload, di, new_plen); + osip_free(payload); + +done: + if (rc) { + if (net_ratelimit()) + RTE_LOG(ERR, FIREWALL, + "NPF ALG: SIP payload update: %s\n", + strerror_r(-rc, ebuf, sizeof(ebuf))); + } + + return rc; +} + +/* + * sip_init_nat() - Init the 'nat' params for this request + */ +void sip_init_nat(struct sip_alg_request *sr, bool forw, + const npf_addr_t *taddr, const npf_addr_t *oaddr, + uint8_t alen, in_port_t tport, const int di) +{ + struct sip_nat *sn = &sr->sr_nat; + int rc; + + /* Port and addr from nat struct for CNTL session */ + rc = snprintf(sn->sn_tport, 8, "%hu", ntohs(tport)); + if (rc < 0 || rc >= 8) + return; + + if (taddr) { + inet_ntop(AF_INET, taddr, sn->sn_taddr, sizeof(sn->sn_taddr)); + sn->sn_alen = alen; + } + if (oaddr) + inet_ntop(AF_INET, oaddr, sn->sn_oaddr, sizeof(sn->sn_oaddr)); + sn->sn_di = di; + sn->sn_forw = forw; + + if (taddr) { + if (di == PFIL_IN && forw) + sn->sn_type = sip_nat_dnat; + else if (di == PFIL_OUT && forw) + sn->sn_type = sip_nat_snat; + else if (di == PFIL_IN && !forw) + sn->sn_type = sip_nat_snat; + else if (di == PFIL_OUT && !forw) + sn->sn_type = sip_nat_dnat; + } else { + sn->sn_type = sip_nat_inspect; + } +} + +/* + * sip_alg_manage_packet() - manage and translate SIP packets + */ +static int sip_alg_manage_packet(npf_session_t *se, struct sip_alg_request *sr, + npf_cache_t *npc, struct rte_mbuf *nbuf, npf_nat_t *nat) +{ + struct sip_alg_request *tsr = NULL; + struct npf_alg *sip = npf_alg_session_get_alg(se); + int rc; + bool consumed = false; + + rc = sip_alg_translate_message(sip, sr, &tsr); + if (rc) + goto done; + + rc = sip_alg_manage_sip(se, npc, sr, tsr, nat, &consumed); + if (rc) + goto done; + + rc = sip_alg_update_payload(se, npc, sip_di(tsr), nbuf, tsr); + if (rc) { + if (consumed) { + sip_request_lookup_and_expire(sip, tsr); + consumed = false; + } + } + +done: + if (!consumed) + sip_alg_request_free(sip, tsr); + + sip_alg_request_free(sip, sr); + + return rc; +} + +/* + * sip_alg_translate_packet() + */ +int sip_alg_translate_packet(npf_session_t *se, npf_cache_t *npc, + npf_nat_t *ns, struct rte_mbuf *nbuf, const int di) +{ + npf_addr_t taddr; + struct npf_alg *sip = npf_alg_session_get_alg(se); + in_port_t tport; + npf_addr_t oaddr; + in_port_t oport; + bool forw; + struct sip_alg_request *sr; + + /* Don't manipulate (TCP) packets w/o data */ + if (!npf_payload_len(npc)) + return 0; + + /* + * Parsed msg may have been placed into session provate data by tuple + * inspect + */ + sr = sip_alg_parse(sip, npc, npf_session_get_if_index(se), nbuf); + if (!sr) + return -EINVAL; + + if (sip_alg_verify(sr)) { + sip_alg_request_free(sip, sr); + return -EINVAL; + } + + (void) npf_session_retnat(se, di, &forw); + + /* + * We need both sets of addrs, in opposite order + */ + if (forw) { + npf_nat_get_trans(ns, &taddr, &tport); + npf_nat_get_orig(ns, &oaddr, &oport); + } else { + npf_nat_get_orig(ns, &taddr, &tport); + npf_nat_get_trans(ns, &oaddr, &oport); + } + + /* + * For the SIP alt cntl session, 'forw' is true since the session was + * created in this direction. However from the SIP translation POV, + * we want to use the parent session to get the 'forw' setting, since + * the SIP packet flow is relative to it. This is used to set sn_forw + * and sn_type in the sip_nat struct that hangs of the SIP request + * struct. + */ + if (npf_session_get_parent(se) && + npf_alg_session_test_flag(se, SIP_ALG_REVERSE)) + forw = !forw; + + sip_init_nat(sr, forw, &taddr, &oaddr, npc->npc_alen, tport, di); + + return sip_alg_manage_packet(se, sr, npc, nbuf, ns); +} diff --git a/src/npf/alg/sip/sip_translate.h b/src/npf/alg/sip/sip_translate.h new file mode 100644 index 00000000..4feb1021 --- /dev/null +++ b/src/npf/alg/sip/sip_translate.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef _SIP_TRANSLATE_H_ +#define _SIP_TRANSLATE_H_ + +#include "npf/npf.h" +#include "npf/alg/alg.h" +#include "npf/npf_cache.h" +#include "npf/npf_nat.h" +#include "npf/npf_session.h" + +struct sip_alg_request; +struct sip_alg_media; +struct rte_mbuf; + +/* + * Translates the media in the SDP "m=" strings + */ +int sip_alg_translate_media(struct sip_alg_request *sr, + struct sip_alg_media *m, int pos); + +/* + * Translates the media address in the SDP "c=" string + */ +void sip_alg_update_session_media(struct sip_alg_request *sr); + +void sip_init_nat(struct sip_alg_request *sr, bool forw, + const npf_addr_t *taddr, const npf_addr_t *oaddr, + uint8_t alen, in_port_t tport, const int di); + +/* + * Translate a SIP packet + */ +int sip_alg_translate_packet(npf_session_t *se, npf_cache_t *npc, + npf_nat_t *ns, struct rte_mbuf *nbuf, + const int di); + +#endif /* _SIP_TRANSLATE_H_ */ diff --git a/src/npf/apm/apm.c b/src/npf/apm/apm.c index 975348de..7b70e826 100644 --- a/src/npf/apm/apm.c +++ b/src/npf/apm/apm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -37,8 +37,9 @@ #include "npf/nat/nat_pool.h" #include "npf/apm/apm.h" -#include "npf/cgnat/cgn_errno.h" +#include "npf/cgnat/cgn_rc.h" #include "npf/cgnat/cgn_log.h" +#include "npf/cgnat/cgn_map.h" /* npf_rule_gen.c */ @@ -52,11 +53,14 @@ int npf_parse_ip_addr(char *value, sa_family_t *fam, npf_addr_t *addr, #define APM_GC_COUNT 2 #define ONE_THOUSAND (1<<10) + +/* + * Start with 128 buckets, and allow to grow to any size (size will be limited + * by number of addresses in the CGNAT address pools). + */ #define APM_HT_INIT 128 #define APM_HT_MIN (4 * ONE_THOUSAND) -#define APM_HT_MAX (32 * ONE_THOUSAND) - -#define APMS_MAX APM_HT_MAX +#define APM_HT_MAX 0 /* * port block @@ -64,8 +68,9 @@ int npf_parse_ip_addr(char *value, sa_family_t *fam, npf_addr_t *addr, struct apm_port_block { struct cds_list_head pb_list_node; /* source list node */ struct apm *pb_apm; /* back ptr */ + struct cgn_source *pb_src; /* ptr to source */ struct rcu_head pb_rcu_head; - uint64_t pb_start_time; + uint64_t pb_start_time; /* unix epoch us */ uint16_t pb_port_start; /* first port in block */ uint16_t pb_port_end; /* last port in block */ uint16_t pb_nports; /* end - start + 1 */ @@ -101,11 +106,8 @@ struct apm_match { vrfid_t vrfid; }; -/* APM table used and max */ +/* APM table used count */ static rte_atomic32_t apms_used; -static int32_t apms_max = APMS_MAX; -static bool apm_table_full; - /* * Find first clear bit in a 64-bit word, starting at LSB, bit 1. Returns 0 @@ -155,6 +157,21 @@ struct cds_list_head *apm_block_get_list_node(struct apm_port_block *pb) return &pb->pb_list_node; } +/* + * pb_src is non-NULL *only* while a port-block is in a sources port-block + * list. When pb_src is set then that sources lock must be held when changing + * the port-block. + */ +void apm_block_set_source(struct apm_port_block *pb, struct cgn_source *src) +{ + pb->pb_src = src; +} + +struct cgn_source *apm_block_get_source(struct apm_port_block *pb) +{ + return pb->pb_src; +} + /* * Allocate a port from a port block. Returns 0 if it fails to find an * available port. @@ -251,6 +268,44 @@ apm_block_alloc_random_port(struct apm_port_block *pb, uint8_t proto) return port; } +/* + * Allocate a specific port from a port-block. Used by PCP. + */ +uint16_t +apm_block_alloc_specific_port(struct apm_port_block *pb, uint8_t proto, + uint16_t port) +{ + uint16_t bm; + uint16_t bm_start, bit; + uint64_t mask; + + if (pb->pb_ports_used[proto] == pb->pb_nports) + return 0; + + assert(port >= pb->pb_port_start); + assert(port <= pb->pb_port_end); + + /* Which bitmap in the block? */ + bm = (port - pb->pb_port_start) / PORTS_PER_BITMAP; + + /* Which bit in the bitmap? */ + bm_start = (bm * PORTS_PER_BITMAP) + pb->pb_port_start; + bit = port - bm_start; + + mask = UINT64_C(1) << bit; + + if ((pb->pb_map[proto][bm] & mask) == UINT64_C(0)) { + pb->pb_ports_used[proto]++; + + /* Set bit */ + pb->pb_map[proto][bm] |= mask; + return port; + } + + /* Fail if port is not free */ + return 0; +} + /* * Release a port in a port-block */ @@ -319,6 +374,45 @@ uint16_t apm_block_list_first_free_port(struct cds_list_head *list, return 0; } +/* + * Called from apm_block_create when apm_blocks_used >= apm_nblocks + */ +static inline void apm_pb_full(struct apm *apm) +{ + if (!apm->apm_pb_full) { + /* Mark this address as full */ + apm->apm_pb_full = true; + + /* Log address is full */ + cgn_log_resource_public_pb(CGN_RESOURCE_FULL, apm->apm_addr, + apm->apm_blocks_used, + apm->apm_nblocks); + + /* Increment and check address pool threshold */ + np_threshold_get(apm->apm_np); + + } +} + +/* + * Called from apm_block_destroy when a block is freed. + */ +static inline void apm_pb_available(struct apm *apm) +{ + if (apm->apm_pb_full) { + /* Mark this address as available */ + apm->apm_pb_full = false; + + /* Log address is available */ + cgn_log_resource_public_pb(CGN_RESOURCE_AVAILABLE, + apm->apm_addr, apm->apm_blocks_used, + apm->apm_nblocks); + + /* Decrement and check address pool threshold */ + np_threshold_put(apm->apm_np); + } +} + /* * Create a port-block and add it to apm block array. * @@ -331,6 +425,8 @@ apm_block_create(struct apm *apm, uint16_t block) uint16_t nmaps; size_t sz; + assert(rte_spinlock_is_locked(&apm->apm_lock)); + /* How many 64-bit bitmaps do we need? */ nmaps = apm->apm_port_block_sz / PORTS_PER_BITMAP; sz = sizeof(struct apm_port_block) + @@ -352,8 +448,8 @@ apm_block_create(struct apm *apm, uint16_t block) /* Back pointer to apm */ pb->pb_apm = apm; - /* start time in millisecs */ - pb->pb_start_time = soft_ticks; + /* start time in unix epoch microsecs */ + pb->pb_start_time = unix_epoch_us; /* Determine first and last ports in this block */ pb->pb_port_start = (block * apm->apm_port_block_sz) + @@ -372,6 +468,9 @@ apm_block_create(struct apm *apm, uint16_t block) apm->apm_blocks[block] = pb; apm->apm_blocks_used++; + if (apm->apm_blocks_used >= apm->apm_nblocks) + apm_pb_full(apm); + return pb; } @@ -405,64 +504,57 @@ void apm_block_destroy(struct apm_port_block *pb) apm->apm_blocks[pb->pb_block] = NULL; apm->apm_blocks_used--; - if (apm->apm_pb_full) { - cgn_log_public_pb_avail(apm->apm_addr, - apm->apm_blocks_used, - apm->apm_nblocks); - apm->apm_pb_full = false; - } - - if (apm->apm_np && apm->apm_np->np_full) { - RTE_LOG(NOTICE, CGNAT, "NP_AVAILABLE name=%s\n", - apm->apm_np->np_name); - apm->apm_np->np_full = false; - } + apm_pb_available(apm); + cgn_alloc_pool_available(apm->apm_np, apm); call_rcu(&pb->pb_rcu_head, apm_block_rcu_free); } -void apm_log_block_alloc(struct apm_port_block *pb, uint32_t src_addr) +void apm_log_block_alloc(struct apm_port_block *pb, uint32_t src_addr, + const char *policy_name, const char *pool_name) { cgn_log_pb_alloc(src_addr, pb->pb_apm->apm_addr, pb->pb_port_start, pb->pb_port_end, - pb->pb_start_time); + pb->pb_start_time, + policy_name, pool_name); } -void apm_log_block_release(struct apm_port_block *pb, uint32_t src_addr) +void apm_log_block_release(struct apm_port_block *pb, uint32_t src_addr, + const char *policy_name, const char *pool_name) { cgn_log_pb_release(src_addr, - pb->pb_apm->apm_addr, - pb->pb_port_start, pb->pb_port_end, - pb->pb_start_time, soft_ticks); + pb->pb_apm->apm_addr, + pb->pb_port_start, pb->pb_port_end, + pb->pb_start_time, unix_epoch_us, + policy_name, pool_name); +} + +/* Deprecated */ +void +apm_table_threshold_set(int32_t threshold __unused, uint32_t interval __unused) +{ } /* - * Is there space in the apm table? + * Note that there is no max value for the apm table. We allow the hash table + * to grow as much as it needs to. The apms_used count is only used to + * provide show state. */ -static bool apm_slot_get(void) +static inline void apm_slot_get(void) { - if (rte_atomic32_add_return(&apms_used, 1) <= apms_max) - return true; + rte_atomic32_inc(&apms_used); +} +static inline void apm_slot_put(void) +{ rte_atomic32_dec(&apms_used); - - if (!apm_table_full) - RTE_LOG(ERR, CGNAT, "APM_TABLE_FULL count=%u/%u\n", - rte_atomic32_read(&apms_used), apms_max); - - /* - * Mark apm table as full. This is reset in the gc when the apm count - * reduces. - */ - apm_table_full = true; - - return false; } -static void apm_slot_put(void) +/* Get apm table used count */ +int32_t apm_get_used(void) { - rte_atomic32_dec(&apms_used); + return rte_atomic32_read(&apms_used); } /* @@ -481,10 +573,8 @@ apm_create(uint32_t addr, vrfid_t vrfid, struct nat_pool *np, struct apm *apm; size_t sz; - if (!apm_slot_get()) { - *error = -CGN_APM_ENOSPC; - return NULL; - } + /* Increment apms_used count */ + apm_slot_get(); sz = sizeof(*apm) + nblocks * sizeof(struct apm_port_block *); @@ -536,7 +626,7 @@ static void apm_destroy(struct apm *apm) /* Remove from hash table */ cds_lfht_del(apm_ht, &apm->apm_node); - /* Release slot */ + /* Decrement apms_used count */ apm_slot_put(); /* Release nat pool */ @@ -646,17 +736,6 @@ apm_create_and_insert(uint32_t addr, vrfid_t vrfid, struct nat_pool *np, return apm; } -/* Get apm table used count */ -int32_t apm_get_used(void) -{ - return rte_atomic32_read(&apms_used); -} - -int32_t apm_get_max(void) -{ - return apms_max; -} - /* * Return json list of active public addresses in uint format, host-byte order */ @@ -1083,7 +1162,7 @@ static void apm_gc_inspect(struct apm *apm) rte_spinlock_unlock(&apm->apm_lock); } -static void apm_gc_walk(void) +static void apm_gc(struct rte_timer *timer, void *arg __unused) { struct cds_lfht_iter iter; struct apm *apm; @@ -1091,44 +1170,44 @@ static void apm_gc_walk(void) if (!apm_ht) return; + /* Walk the apm table */ cds_lfht_for_each_entry(apm_ht, &iter, apm, apm_node) apm_gc_inspect(apm); - /* Is table still full? */ - if (apm_table_full && - rte_atomic32_read(&apms_used) < apms_max) { - - RTE_LOG(ERR, CGNAT, "APM_TABLE_AVAILABLE count=%u/%u\n", - rte_atomic32_read(&apms_used), apms_max); - - apm_table_full = false; - } -} - -static void apm_gc(struct rte_timer *timer __unused, void *arg __unused) -{ - /* Walk the apm table */ - apm_gc_walk(); - /* Restart timer if dataplane still running */ - if (running) - rte_timer_reset(&apm_timer, + if (running && timer) + rte_timer_reset(timer, APM_GC_INTERVAL * rte_get_timer_hz(), SINGLE, rte_get_master_lcore(), apm_gc, NULL); } /* - * Unit-test only. + * Called from unit-test and from apm_uninit. */ void apm_cleanup(void) +{ + uint i; + + rte_timer_stop(&apm_timer); + + for (i = 0; i <= APM_GC_COUNT; i++) + /* Do not restart gc timer */ + apm_gc(NULL, NULL); +} + +/* + * Called via hidden vplsh command. Used by unit-test and by dev testers. + */ +void apm_gc_pass(void) { rte_timer_stop(&apm_timer); - apm_gc_walk(); - apm_gc_walk(); /* APM_EXPIRED */ - apm_gc_walk(); /* APM_DEAD */ + apm_gc(&apm_timer, NULL); } +/* + * Called from DP_EVT_INIT event handler + */ void apm_init(void) { if (apm_ht) @@ -1136,7 +1215,7 @@ void apm_init(void) apm_ht = cds_lfht_new( APM_HT_INIT, APM_HT_MIN, APM_HT_MAX, - CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING, + CDS_LFHT_AUTO_RESIZE, NULL); rte_timer_init(&apm_timer); @@ -1146,18 +1225,18 @@ void apm_init(void) NULL); } - +/* + * Called from DP_EVT_UNINIT event handler + */ void apm_uninit(void) { - uint i; - if (!apm_ht) return; - rte_timer_stop(&apm_timer); + /* Do three passes of the garbage collector */ + apm_cleanup(); - for (i = 0; i <= APM_GC_COUNT; i++) - apm_gc_walk(); + assert(apm_get_used() == 0); dp_ht_destroy_deferred(apm_ht); apm_ht = NULL; diff --git a/src/npf/apm/apm.h b/src/npf/apm/apm.h index 7533c7b0..27d6b9c3 100644 --- a/src/npf/apm/apm.h +++ b/src/npf/apm/apm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -12,16 +12,31 @@ #ifndef _APM_H_ #define _APM_H_ +#include +#include #include +#include "if_var.h" +#include "urcu.h" + #include "npf/nat/nat_proto.h" struct apm; struct nat_pool; +struct cgn_source; struct apm_port_block; #define PORTS_PER_BITMAP 64 +/* + * The APMS_LIMIT define and apms_limit variable are *not* enforced, and the apm + * table is allowed to grow as big as required. The limiting factor will be + * the number of addresses in the relevant address pools. apms_limit is only + * used to provide some indication (via logs and show output) of how full the + * apm table is. + */ +#define APMS_LIMIT 32768 + /* * public address (apm) table entry. Each entry is defined by: public source * address and vrfid. @@ -76,6 +91,9 @@ struct apm { struct apm_port_block *apm_blocks[]; }; +static_assert(offsetof(struct apm, apm_np) == 64, + "first cache line exceeded"); + /* apm entry removal bits. */ #define APM_EXPIRED 0x01 #define APM_DEAD 0x02 @@ -148,12 +166,23 @@ uint16_t apm_block_get_nports(struct apm_port_block *pb); /* Get pointer to list node */ struct cds_list_head *apm_block_get_list_node(struct apm_port_block *pb); +/* Set src ptr in port-block */ +void apm_block_set_source(struct apm_port_block *pb, struct cgn_source *src); + +/* Get the src ptr if this port-block is in a source list */ +struct cgn_source *apm_block_get_source(struct apm_port_block *pb); + /* Get port and blocks used counts from a list of port blocks */ void apm_source_block_list_get_counts(struct cds_list_head *list, uint *nports, uint *ports_used); -void apm_log_block_alloc(struct apm_port_block *pb, uint32_t src_addr); -void apm_log_block_release(struct apm_port_block *pb, uint32_t src_addr); +void apm_log_block_alloc(struct apm_port_block *pb, uint32_t src_addr, + const char *policy_name, const char *pool_name); +void apm_log_block_release(struct apm_port_block *pb, uint32_t src_addr, + const char *policy_name, const char *pool_name); + +/* Threshold */ +void apm_table_threshold_set(int32_t threshold, uint32_t interval); /* jsonw port-blocks from a source list */ void apm_source_port_block_list_jsonw(json_writer_t *json, @@ -173,6 +202,13 @@ uint16_t apm_block_alloc_first_free_port(struct apm_port_block *pb, uint16_t apm_block_alloc_random_port(struct apm_port_block *pb, uint8_t proto); +/* + * Allocate a specified port. Used by PCP. + */ +uint16_t +apm_block_alloc_specific_port(struct apm_port_block *pb, uint8_t proto, + uint16_t port); + /* Release a port in a port-block */ bool apm_block_release_port(struct apm_port_block *pb, uint8_t proto, uint16_t port); @@ -196,13 +232,13 @@ void apm_block_destroy(struct apm_port_block *pb); void apm_public_list(FILE *f, int argc, char **argv); -/* Get apm table used and max counts */ +/* Get apm table used count */ int32_t apm_get_used(void); -int32_t apm_get_max(void); void apm_show(FILE *f, int argc, char **argv); void apm_cleanup(void); +void apm_gc_pass(void); void apm_init(void); void apm_uninit(void); diff --git a/src/npf/app_group/app_group.c b/src/npf/app_group/app_group.c new file mode 100644 index 00000000..7c8f3a6a --- /dev/null +++ b/src/npf/app_group/app_group.c @@ -0,0 +1,643 @@ +/* + * Copyright (c) 2020-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include "util.h" +#include "app_group.h" +#include "npf/dpi/dpi_internal.h" + +#define AG_APP_HT_SIZE 4 +#define AG_APP_HT_MIN 4 +#define AG_APP_HT_MAX 1024 +#define AG_APP_HT_FLAGS (CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING) + +#define AG_TYPE_HT_SIZE 4 +#define AG_TYPE_HT_MIN 4 +#define AG_TYPE_HT_MAX 1024 +#define AG_TYPE_HT_FLAGS (CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING) + +#define AG_PROTO_HT_SIZE 4 +#define AG_PROTO_HT_MIN 4 +#define AG_PROTO_HT_MAX 1024 +#define AG_PROTO_HT_FLAGS (CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING) + +/* App group garbage collection list. */ +static CDS_LIST_HEAD(app_group_gc_list); + +/* Application ID hash table entry. */ +struct ag_app_entry { + uint32_t app; /* Application ID */ + struct cds_lfht_node ht_node; /* Hash table node */ + struct rcu_head rcu; +}; + +/* Application type hash table entry. + * Application type is unique per engine, + * so both type and engine ID are stored. + */ +struct ag_type_entry { + uint32_t type; /* Application type */ + uint8_t engine; /* Application engine ID */ + struct cds_lfht_node ht_node; /* Hash table node */ + struct rcu_head rcu; +}; + +/* Application protocol hash table entry. */ +struct ag_proto_entry { + uint32_t proto; /* Protocol ID */ + struct cds_lfht_node ht_node; /* Hash table node */ + struct rcu_head rcu; +}; + +/** + * Add an engine to the given resource group + * if the engine is not already in the given resource group. + * + * Return true on success (engine_id was pushed). + * Return false on failure (engine_id wasn't pushed); + */ +static inline bool +push_engine(struct app_group *group, uint8_t engine_id) +{ + int32_t idx = dpi_engine_id_to_idx(engine_id); + + if (idx == -1) + /* Engine_id wasn't recognised */ + return false; + + if (group->engine_refcount[idx] == UINT32_MAX) + return false; + + group->engine_refcount[idx]++; + + return true; +} + +/** + * Remove an engine from the given resource group + * if no application or protocol in the group still requires it. + * + * Return true on success (engine_id was popped). + * Return false on failure (engine_id wasn't popped); + */ +static inline bool +pop_engine(struct app_group *group, uint8_t engine_id) +{ + int32_t idx = dpi_engine_id_to_idx(engine_id); + + if (idx == -1) + /* Engine_id wasn't recognised */ + return false; + + if (group->engine_refcount[idx] == 0) + /* recount mismatch */ + return false; + + group->engine_refcount[idx]--; + return true; +} + +struct app_group * +app_group_init(void) +{ + struct app_group *group = zmalloc_aligned(sizeof(struct app_group)); + if (!group) + return NULL; + + group->ag_app_ht = cds_lfht_new(AG_APP_HT_SIZE, + AG_APP_HT_MIN, + AG_APP_HT_MAX, + AG_APP_HT_FLAGS, + NULL); + + if (!group->ag_app_ht) { + free(group); + return NULL; + } + + group->ag_type_ht = cds_lfht_new(AG_TYPE_HT_SIZE, + AG_TYPE_HT_MIN, + AG_TYPE_HT_MAX, + AG_TYPE_HT_FLAGS, + NULL); + + if (!group->ag_type_ht) { + cds_lfht_destroy(group->ag_app_ht, NULL); + free(group); + return NULL; + } + + group->ag_proto_ht = cds_lfht_new(AG_PROTO_HT_SIZE, + AG_PROTO_HT_MIN, + AG_PROTO_HT_MAX, + AG_PROTO_HT_FLAGS, + NULL); + + if (!group->ag_type_ht) { + cds_lfht_destroy(group->ag_app_ht, NULL); + cds_lfht_destroy(group->ag_type_ht, NULL); + free(group); + return NULL; + } + + return group; +} + +/* + * Delete the given application group. + */ +void +app_group_destroy(struct app_group *group) +{ + if (group->ag_app_ht) { + app_group_del_all_apps(group); + cds_lfht_destroy(group->ag_app_ht, NULL); + } + + if (group->ag_type_ht) { + app_group_del_all_types(group); + cds_lfht_destroy(group->ag_type_ht, NULL); + } + + if (group->ag_proto_ht) { + app_group_del_all_protos(group); + cds_lfht_destroy(group->ag_proto_ht, NULL); + } + + /* Be safe. */ + group->ag_app_ht = NULL; + group->ag_type_ht = NULL; + group->ag_proto_ht = NULL; + group->engine_refcount[0] = 0; + group->engine_refcount[1] = 0; + + free(group); +} + +/* + * Add app group to the GC list for later deletion. + */ +void +app_group_rm_group(struct app_group *group) +{ + if (group) + cds_list_add(&group->deadlist, &app_group_gc_list); +} + +/* + * Periodic garbage collection. + */ +void +app_group_gc(void) +{ + struct app_group *ag, *tmp; + + cds_list_for_each_entry_safe(ag, tmp, &app_group_gc_list, deadlist) { + if (ag->is_dead) { + cds_list_del(&ag->deadlist); + app_group_destroy(ag); + } else { + ag->is_dead = true; + } + } +} + +static int +app_group_cmp_app(struct cds_lfht_node *node, const void *key) +{ + struct ag_app_entry *entry; + entry = caa_container_of(node, struct ag_app_entry, ht_node); + + uint32_t app = *(uint32_t *)key; + + return (entry->app == app); +} + +int +app_group_add_app(struct app_group *group, uint32_t app) +{ + if (!group) + return -EINVAL; + + /* Create the hash table entry. */ + struct ag_app_entry * + entry = zmalloc_aligned(sizeof(*entry)); + if (!entry) + return -ENOMEM; + + /* Store the app. */ + entry->app = app; + + /* Add to hash table. */ + cds_lfht_node_init(&entry->ht_node); + unsigned long name_hash = app; + + struct cds_lfht_node *node; + node = cds_lfht_add_unique(group->ag_app_ht, name_hash, + app_group_cmp_app, &app, + &entry->ht_node); + + if (node != &entry->ht_node) { + free(entry); + return -EEXIST; + } + + /* Save engine ID. */ + uint8_t engine = (uint8_t) (app >> DPI_ENGINE_SHIFT); + if (!push_engine(group, engine)) { + /* Failed to push engine. + * Remove app from hash table. + */ + app_group_del_app(group, app, false); + return -EINVAL; + } + + return 0; +} + +static int +app_group_cmp_type(struct cds_lfht_node *node, const void *key) +{ + struct ag_type_entry *entry; + entry = caa_container_of(node, struct ag_type_entry, ht_node); + + uint32_t type = *(unsigned long *)key; + + return (entry->type == type); +} + +int +app_group_add_type(struct app_group *group, uint32_t type, uint8_t engine) +{ + if (!group) + return -EINVAL; + + /* Create the hash table entry. */ + struct ag_type_entry *entry; + entry = zmalloc_aligned(sizeof(struct ag_type_entry)); + if (!entry) + return -ENOMEM; + + /* Store the type and engine ID, + * because types are unique per engine. + */ + entry->type = type; + entry->engine = engine; + + /* Add to hash table. */ + cds_lfht_node_init(&entry->ht_node); + unsigned long hash = ((ulong)engine << 32) | type; + + struct cds_lfht_node *node; + node = cds_lfht_add_unique(group->ag_type_ht, hash, + app_group_cmp_type, &hash, &entry->ht_node); + + if (node != &entry->ht_node) { + free(entry); + return -EEXIST; + } + + /* Save engine ID. */ + if (!push_engine(group, engine)) { + /* Failed to push engine. + * Remove type from hash table. + */ + app_group_del_type(group, type, engine, false); + return -EINVAL; + } + + return 0; +} + +static int +app_group_cmp_proto(struct cds_lfht_node *node, const void *key) +{ + struct ag_proto_entry *entry; + entry = caa_container_of(node, struct ag_proto_entry, ht_node); + + uint32_t proto = *(uint32_t *)key; + + return (entry->proto == proto); +} + +int +app_group_add_proto(struct app_group *group, uint32_t proto) +{ + if (!group) + return -EINVAL; + + /* Create the hash table entry. */ + struct ag_proto_entry *entry; + entry = zmalloc_aligned(sizeof(struct ag_proto_entry)); + if (!entry) + return -ENOMEM; + + /* Store the protocol. */ + entry->proto = proto; + + /* Add to hash table. */ + cds_lfht_node_init(&entry->ht_node); + unsigned long hash = proto; + + struct cds_lfht_node *node; + node = cds_lfht_add_unique(group->ag_proto_ht, hash, + app_group_cmp_proto, &proto, + &entry->ht_node); + + if (node != &entry->ht_node) { + free(entry); + return -EEXIST; + } + + /* Save engine ID. */ + uint8_t engine = (uint8_t) (proto >> DPI_ENGINE_SHIFT); + if (!push_engine(group, engine)) { + /* Failed to push engine. + * Remove proto from hash table. + */ + app_group_del_proto(group, proto, false); + return -EINVAL; + } + + return 0; +} + +static int +ag_app_match(struct cds_lfht_node *node, const void *data) +{ + uint32_t wanted = *(uint32_t *)data; + struct ag_app_entry *entry; + + entry = caa_container_of(node, struct ag_app_entry, ht_node); + + return wanted == entry->app; +} + +static int +ag_type_match(struct cds_lfht_node *node, const void *data) +{ + uint64_t wanted = *(uint64_t *)data; + struct ag_type_entry *entry; + + entry = caa_container_of(node, struct ag_type_entry, ht_node); + + return wanted == entry->type; +} + +static int +ag_proto_match(struct cds_lfht_node *node, const void *data) +{ + uint32_t wanted = *(uint32_t *)data; + struct ag_proto_entry *entry; + + entry = caa_container_of(node, struct ag_proto_entry, ht_node); + + return wanted == entry->proto; +} + +static void +ag_app_free(struct rcu_head *head) +{ + struct ag_app_entry *entry; + entry = caa_container_of(head, struct ag_app_entry, rcu); + + free(entry); +} + +static int +_app_group_del_app(struct cds_lfht *ht, struct cds_lfht_node *node, + struct app_group *group, + bool remove_engine) +{ + struct ag_app_entry *entry; + entry = caa_container_of(node, struct ag_app_entry, ht_node); + + cds_lfht_del(ht, node); + call_rcu(&entry->rcu, ag_app_free); + + if (!remove_engine) + /* No need to remove the engine, + * so declare success. + */ + return 0; + + uint8_t engine = (uint8_t) (entry->app >> DPI_ENGINE_SHIFT); + + return pop_engine(group, engine) ? 0 : -EINVAL; +} + +int +app_group_del_app(struct app_group *group, uint32_t app, bool remove_engine) +{ + if (!group) + return -EINVAL; + + /* Search for node. */ + struct cds_lfht_node *ht_node; + + ht_node = app_group_find_app(group, app); + if (!ht_node) + return -EINVAL; + + return _app_group_del_app(group->ag_app_ht, ht_node, group, + remove_engine); +} + +void +app_group_del_all_apps(struct app_group *group) +{ + struct cds_lfht_iter iter; + struct cds_lfht_node *node; + + struct cds_lfht *ht = group->ag_app_ht; + if (!ht) + return; + + cds_lfht_first(ht, &iter); + + while ((node = cds_lfht_iter_get_node(&iter)) != NULL) { + + _app_group_del_app(ht, node, group, true); + cds_lfht_next(ht, &iter); + } +} + +static void +ag_type_free(struct rcu_head *head) +{ + struct ag_type_entry *entry; + entry = caa_container_of(head, struct ag_type_entry, rcu); + + free(entry); +} + +static int +_app_group_del_type(struct cds_lfht *ht, struct cds_lfht_node *node, + struct app_group *group, + bool remove_engine, uint8_t engine) +{ + struct ag_type_entry *entry; + entry = caa_container_of(node, struct ag_type_entry, ht_node); + + cds_lfht_del(ht, node); + call_rcu(&entry->rcu, ag_type_free); + + if (!remove_engine) + /* No need to remove the engine, + * so declare success. + */ + return 0; + + return pop_engine(group, engine) ? 0 : -EINVAL; +} + +int +app_group_del_type(struct app_group *group, uint32_t type, uint8_t engine, + bool remove_engine) +{ + if (!group) + return -EINVAL; + + /* Search for node. */ + struct cds_lfht_node *ht_node; + + ht_node = app_group_find_type(group, type, engine); + if (!ht_node) + return -EINVAL; + + return _app_group_del_type(group->ag_type_ht, ht_node, group, + remove_engine, engine); +} + +void +app_group_del_all_types(struct app_group *group) +{ + struct cds_lfht_iter iter; + struct cds_lfht_node *node; + + struct cds_lfht *ht = group->ag_type_ht; + if (!ht) + return; + + cds_lfht_first(ht, &iter); + + while ((node = cds_lfht_iter_get_node(&iter)) != NULL) { + struct ag_type_entry *entry; + entry = caa_container_of(node, struct ag_type_entry, ht_node); + + _app_group_del_type(ht, node, group, true, entry->engine); + + cds_lfht_next(ht, &iter); + } +} + +static void +ag_proto_free(struct rcu_head *head) +{ + struct ag_proto_entry *entry; + entry = caa_container_of(head, struct ag_proto_entry, rcu); + + free(entry); +} + +static int +_app_group_del_proto(struct cds_lfht *ht, struct cds_lfht_node *node, + struct app_group *group, + bool remove_engine) +{ + struct ag_proto_entry *entry; + entry = caa_container_of(node, struct ag_proto_entry, ht_node); + + cds_lfht_del(ht, node); + call_rcu(&entry->rcu, ag_proto_free); + + if (!remove_engine) + /* No need to remove the engine, + * so declare success. + */ + return 0; + + uint8_t engine = (uint8_t) (entry->proto >> DPI_ENGINE_SHIFT); + + return pop_engine(group, engine) ? 0 : -EINVAL; +} + +int +app_group_del_proto(struct app_group *group, uint32_t proto, bool remove_engine) +{ + if (!group) + return -EINVAL; + + /* Search for node. */ + struct cds_lfht_node *ht_node; + + ht_node = app_group_find_proto(group, proto); + if (!ht_node) + return -EINVAL; + + return _app_group_del_proto(group->ag_proto_ht, ht_node, group, + remove_engine); +} + +/* Remove all the protocols from an application group. */ +void +app_group_del_all_protos(struct app_group *group) +{ + struct cds_lfht_iter iter; + struct cds_lfht_node *node; + + struct cds_lfht *ht = group->ag_proto_ht; + if (!ht) + return; + + cds_lfht_first(ht, &iter); + + while ((node = cds_lfht_iter_get_node(&iter)) != NULL) { + + _app_group_del_proto(ht, node, group, true); + cds_lfht_next(ht, &iter); + } +} + +struct cds_lfht_node* +app_group_find_app(struct app_group *group, uint32_t app) +{ + struct cds_lfht_iter iter; + unsigned long hash = app; + + if (!group->ag_app_ht) + return NULL; + + cds_lfht_lookup(group->ag_app_ht, hash, ag_app_match, &app, &iter); + struct cds_lfht_node *ht_node = cds_lfht_iter_get_node(&iter); + + return ht_node; +} + +struct cds_lfht_node* +app_group_find_type(struct app_group *group, uint32_t type, uint8_t engine) +{ + struct cds_lfht_iter iter; + unsigned long etype = ((unsigned long)engine << 32) | type; + unsigned long hash = etype; + + cds_lfht_lookup(group->ag_type_ht, hash, ag_type_match, &etype, &iter); + struct cds_lfht_node *ht_node = cds_lfht_iter_get_node(&iter); + + return ht_node; +} + +struct cds_lfht_node* +app_group_find_proto(struct app_group *group, uint32_t proto) +{ + struct cds_lfht_iter iter; + unsigned long hash = proto; + + cds_lfht_lookup(group->ag_proto_ht, hash, ag_proto_match, + &proto, &iter); + struct cds_lfht_node *ht_node = cds_lfht_iter_get_node(&iter); + + return ht_node; +} diff --git a/src/npf/app_group/app_group.h b/src/npf/app_group/app_group.h new file mode 100644 index 00000000..22773628 --- /dev/null +++ b/src/npf/app_group/app_group.h @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2020-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef APP_GROUP_H +#define APP_GROUP_H + +#include +#include + +/** + * Application resource group. + */ +struct app_group { + struct cds_lfht *ag_app_ht; // App-group "application" hash table + struct cds_lfht *ag_type_ht; // App-group "type" hash table + struct cds_lfht *ag_proto_ht; // App-group "protocol" hash table + uint32_t engine_refcount[2]; // DPI engine refcounts + struct cds_list_head deadlist; // Memento mori + bool is_dead; +}; + +/** + * Create a new, empty application resource group. + * + * @return new application resource group or NULL on allocation failure. + */ +struct app_group * +app_group_init(void); + +/** + * Destroy the given application resource group. + * + * @param group Group to destroy, can be NULL. + * @return void. + */ +void +app_group_destroy(struct app_group *group); + +/** + * Add the given application resource group to a GC list for later deletion. + * + * @param group Group to destroy, can be NULL. + * @return void. + */ +void +app_group_rm_group(struct app_group *group); + +/** + * Periodic garbage collection. + */ +void +app_group_gc(void); + +/** + * Add an application to the given application resource group. + * + * @param group group to add to. + * @param app application ID. + * @return -EINVAL if group is NULL, -ENOMEM if not enough memory to add ID, or + * 0 otherwise. + */ +int +app_group_add_app(struct app_group *group, uint32_t app); + +/** + * Add a type to the given application resource group. + * + * @param group group to add to. + * @param type type ID. + * @param engine_id Engine ID associated with the type. + * @return -EINVAL if group is NULL, -ENOMEM if not enough memory to add ID, or + * 0 otherwise. + */ +int +app_group_add_type(struct app_group *group, uint32_t type, uint8_t engine_id); + +/** + * Add a protocol to the given application resource group. + * + * @param group group to add to. + * @param proto protocol ID. + * @return -EINVAL if group is NULL, -ENOMEM if not enough memory to add ID, or + * 0 otherwise. + */ +int +app_group_add_proto(struct app_group *group, uint32_t proto); + +/** + * Remove an application from the given application resource group. + * + * @param group group to remove from. + * @param app application ID. + * @param remove_engine pop the engine if true. + * @return -EINVAL if group is NULL, 1 if no matching ID is found, 0 otherwise. + */ +int +app_group_del_app(struct app_group *group, uint32_t app, bool remove_engine); + +/** + * Remove all applications from the given application resource group. + * + * @param group group to remove from. + */ +void +app_group_del_all_apps(struct app_group *group); + +/** + * Remove a type from the given application resource group. + * + * @param group group to remove from. + * @param type the type ID to be removed. + * @param engine_id the engine ID associated with the type. + * @param remove_engine pop the engine if true. + * @return -EINVAL if group is NULL, 1 if no matching ID is found, 0 otherwise. + */ +int +app_group_del_type(struct app_group *group, uint32_t type, uint8_t engine_id, + bool remove_engine); + +/** + * Remove all types from the given application resource group. + * + * @param group group to remove from. + */ +void +app_group_del_all_types(struct app_group *group); + +/** + * Remove a proto from the given application resource group. + * + * @param group group to remove from. + * @param proto proto ID. + * @param remove_engine pop the engine if true. + * @return -EINVAL if group is NULL, 1 if no matching ID is found, 0 otherwise. + */ +int +app_group_del_proto(struct app_group *group, uint32_t proto, + bool remove_engine); + +/** + * Remove all protos from the given application resource group. + * + * @param group group to remove from. + * @return void. + */ +void +app_group_del_all_protos(struct app_group *group); + +/** + * Determine if the given application ID is in the given application + * resource group. + * + * @param group group to check. + * @param app ID to look for. + * @return pointer to node if found; NULL if not found. + */ +struct cds_lfht_node* +app_group_find_app(struct app_group *group, uint32_t app); + +/** + * Determine if the given type ID is in the given application + * resource group. + * + * @param group group to check. + * @param type ID to look for. + * @param engine_id engine ID associated with the type. + * @return pointer to node if found; NULL if not found. + */ +struct cds_lfht_node* +app_group_find_type(struct app_group *group, uint32_t type, uint8_t engine_id); + +/** + * Determine if the given protocol ID is in the given application + * resource group. + * + * @param group group to check. + * @param proto ID to look for. + * @return pointer to node if found; NULL if not found. + */ +struct cds_lfht_node* +app_group_find_proto(struct app_group *group, uint32_t proto); + +#endif /* APP_GROUP_H */ diff --git a/src/npf/app_group/app_group_cmd.c b/src/npf/app_group/app_group_cmd.c new file mode 100644 index 00000000..47945b3b --- /dev/null +++ b/src/npf/app_group/app_group_cmd.c @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2020-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include +#include +#include "app_group.h" +#include "app_group_db.h" +#include "app_group_cmd.h" +#include "npf/dpi/dpi_internal.h" + +/** + * Split the given "engine:name" string. + * + * Split data at the first ':'. + * Replace ':' with '\0' and set *second to the * char after. + */ +static bool +split_data(char *data, char **second) +{ + char *split = strchr(data, ':'); + + /* Can only happen if 'end-app-res-grp' did not create arguments */ + if (unlikely(!split)) + return false; + + *split = '\0'; + split++; + + *second = split; + + return true; +} + +/* Parse a single "engine:application" string. + * Add it to, or remove it from, the specified application group. + */ +static int +parse_app(char *data, struct app_group *group, bool del) +{ + char *app_name; + if (!split_data(data, &app_name)) + return -EINVAL; + + uint8_t engine_id = dpi_engine_name_to_id(data); + if (engine_id == IANA_RESERVED) + return -EINVAL; + + int ret = dpi_init(engine_id); + if (ret != 0) + return -ENOMEM; + + uint32_t app_id = dpi_app_name_to_id(engine_id, app_name); + if (app_id == DPI_APP_ERROR) + return -EINVAL; + + if (del) + return app_group_del_app(group, app_id, true); + + return app_group_add_app(group, app_id); +} + +/* Parse a single "engine:type" string. + * Add it to, or remove it from, the specified application group. + */ +static int +parse_type(char *data, struct app_group *group, bool del) +{ + char *type_name; + if (!split_data(data, &type_name)) + return -EINVAL; + + uint8_t engine_id = dpi_engine_name_to_id(data); + if (engine_id == IANA_RESERVED) + return -EINVAL; + + int ret = dpi_init(engine_id); + if (ret != 0) + return -ENOMEM; + + uint32_t type_id = dpi_app_type_name_to_id(engine_id, type_name); + if (type_id == DPI_APP_ERROR) + return -EINVAL; + + if (del) + return app_group_del_type(group, type_id, engine_id, true); + + return app_group_add_type(group, type_id, engine_id); +} + +/* Parse a single "engine:protocol" string. + * Add it to, or remove it from, the specified application group. + */ +static int +parse_proto(char *data, struct app_group *group, bool del) +{ + char *proto_name; + if (!split_data(data, &proto_name)) + return -EINVAL; + + uint8_t engine_id = dpi_engine_name_to_id(data); + if (engine_id == IANA_RESERVED) + return -EINVAL; + + int ret = dpi_init(engine_id); + if (ret != 0) + return -ENOMEM; + + uint32_t proto_id = dpi_app_name_to_id(engine_id, proto_name); + if (proto_id == DPI_APP_ERROR) + return -EINVAL; + + if (del) + return app_group_del_proto(group, proto_id, true); + + return app_group_add_proto(group, proto_id); +} + +/** + * Find each entry in args, NULL-terminate it and pass it to parse_arg. + * + * Expected structure: + * arg := engine:name + * args := arg | arg,...,arg + * + * Each arg entry is passed to parse_arg. + * + * @param group Application resource group to modify. + * @param args arguments to parse. + * @param parse_arg function to parse each entry. + * @param del True if should delete parsed entries from given group. + */ +static int +parse_argument(struct app_group *group, char *args, bool del, + int (*parse_arg)(char *, struct app_group*, bool del)) +{ + char *next; + int ret; + + while ((next = strchr(args, ',')) != NULL) { + *next = '\0'; + next++; + ret = parse_arg(args, group, del); + if (ret != 0) + return ret; + + args = next; + } + + /* Handle both single arg and final arg */ + return parse_arg(args, group, del); +} + +int +app_group_add(char *name, char *args) +{ + /* Name is required. */ + if (!name) + return -EINVAL; + + /* Ensure database is initialised */ + if (!app_group_db_init()) + return -ENOMEM; + + /* Split args into apps, protos, types. */ + char *apps = args; + + char *protos = strchr(apps, ';'); + if (!protos) + return -EINVAL; + + *protos = '\0'; + protos++; + + char *types = strchr(protos, ';'); + if (!types) + return -EINVAL; + + *types = '\0'; + types++; + + /* Create a new, empty, group. */ + struct app_group *new_group = app_group_init(); + + /* Add any new applications. */ + if (*apps != '\0') { + int ret = parse_argument(new_group, apps, false, parse_app); + if (ret != 0) + return ret; + } + + /* Add any new types. */ + if (*types != '\0') { + int ret = parse_argument(new_group, types, false, parse_type); + if (ret != 0) + return ret; + } + + /* Add any new protocols. */ + if (*protos != '\0') { + int ret = parse_argument(new_group, protos, false, parse_proto); + if (ret != 0) + return ret; + } + + /* Either find an existing entry with the same name + * - we will update that entry. + * + * Or create a new entry. + */ + struct agdb_entry *entry = app_group_db_find_name(name); + if (!entry) { + /* There's no existing entry, so we need to make a new one. */ + + entry = app_group_db_find_or_alloc(name); + if (!entry) { + /* We weren't able to make the new entry. */ + app_group_destroy(new_group); + return -ENOMEM; + } + } + + /* If we're modifying an existing struct agdb_entry: + * + * Application firewalls cache struct agdb_entry in struct appfw_rule, + * so we can't just free the old agdb_entry and create a new one + * since appFWs would be left holding a stale pointer. + * + * So we'd need to walk all the appFWs and swap in the new agdb_entry. + * Then we could free the old one. + * + * But agdb_entry is hashed by name, so at least temporarily + * we'd have two entries with the same name. To prevent a + * parallel config process from picking up the old agdb_entry + * while we're walking all the appFWs, we'd need to remove the old + * agdb_entry->name and wait for an RCU interval before + * walking all the appFWs to swap in the new one. + * + * More simply, just leave the appFWs with their cached agdb_entry, + * and just swap the new struct app_group which we've just made + * into the existing agdb_entry. + * + * So there's no need to wait for an RCU period + * nor to walk the appFWs. + */ + struct app_group *old_group = entry->group; + if (!old_group) { + /* The entry has no group. */ + app_group_destroy(new_group); + return -EINVAL; + } + + /* Now swap the contents of new_group with the old_group. */ + + new_group->ag_app_ht = + rcu_xchg_pointer(&old_group->ag_app_ht, new_group->ag_app_ht); + new_group->ag_type_ht = + rcu_xchg_pointer(&old_group->ag_type_ht, new_group->ag_type_ht); + new_group->ag_proto_ht = + rcu_xchg_pointer(&old_group->ag_proto_ht, new_group->ag_proto_ht); + old_group->engine_refcount[0] = new_group->engine_refcount[0]; + old_group->engine_refcount[1] = new_group->engine_refcount[1]; + + /* Delete the old stuff that's been swapped into new_group. */ + app_group_rm_group(new_group); + + return 0; +} + +bool +app_group_del(char *name) +{ + if (!name) + return false; + + struct agdb_entry *entry = app_group_db_find_name(name); + + return app_group_db_rm_entry(entry); +} diff --git a/src/npf/app_group/app_group_cmd.h b/src/npf/app_group/app_group_cmd.h new file mode 100644 index 00000000..e325131e --- /dev/null +++ b/src/npf/app_group/app_group_cmd.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef APP_GROUP_CMD_H +#define APP_GROUP_CMD_H + +/** + * Add a new application resource group from the given params + * + * @param name Application resource group to modify. + * @param args arguments to parse. + * @return -ENOMEM if cannot allocate space, + * -EINVAL if params doesn't match the required structure, + * or 0 on success. + * + * Arg structure: + * + * args := apps;protos;types + * + * apps := app | app,...,app + * app := engine:app_name + * + * protos := proto | proto,...,proto + * proto := engine:proto_name + * + * types := type | type,...,type + * type := engine:type_name + * + * e.g. ndpi:facebook,user:chat_app;ndpi:web;ndpi:chat,user:chat + * + */ +int app_group_add(char *name, char *args); + +/** + * Delete the application resource group with the given name. + * + * @param name name of group + * @return true on success, false otherwise (i.e. no group found). + */ +bool app_group_del(char *name); + +#endif /* APP_GROUP_CMD_H */ diff --git a/src/npf/app_group/app_group_db.c b/src/npf/app_group/app_group_db.c new file mode 100644 index 00000000..1ceae727 --- /dev/null +++ b/src/npf/app_group/app_group_db.c @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2020-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +/* + * Application resource group database. + */ + +#include +#include "npf/config/npf_config.h" +#include "npf/dpi/dpi_internal.h" +#include "app_group_db.h" +#include "app_group.h" + +#define APP_GRP_NAME_HT_SIZE 32 +#define APP_GRP_NAME_HT_MIN 32 +#define APP_GRP_NAME_HT_MAX 8192 +#define APP_GRP_NAME_HT_FLAGS (CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING) + +/* App group DB garbage collector. */ +static CDS_LIST_HEAD(app_group_db_gc_list); +static struct rte_timer ag_gc_timer; +#define AG_GC_INTERVAL 30 + +/* Application resource group database hash table. */ +static struct cds_lfht *app_grp_ht; + +static uint32_t hash_seed; + +/* Forward */ +static void +app_group_db_gc(struct rte_timer *t __rte_unused, void *arg __rte_unused); + +bool +app_group_db_init(void) +{ + if (app_grp_ht) + return true; + + app_grp_ht = cds_lfht_new(APP_GRP_NAME_HT_SIZE, + APP_GRP_NAME_HT_MIN, + APP_GRP_NAME_HT_MAX, + APP_GRP_NAME_HT_FLAGS, + NULL); + + if (!app_grp_ht) + return false; + + hash_seed = random(); + + /* Start the GC timer. */ + rte_timer_init(&ag_gc_timer); + rte_timer_reset(&ag_gc_timer, + (AG_GC_INTERVAL * rte_get_timer_hz()), PERIODICAL, + rte_get_master_lcore(), app_group_db_gc, NULL); + + return true; +} + +bool +app_group_db_rm_entry(struct agdb_entry *entry) +{ + if (!entry) + return false; + + cds_lfht_del(app_grp_ht, &entry->ht_node); + cds_list_add(&entry->deadlist, &app_group_db_gc_list); + + return true; +} + +/* + * Periodic garbage collection + */ +static void +app_group_db_gc(struct rte_timer *t __rte_unused, void *arg __rte_unused) +{ + struct agdb_entry *entry, *tmp; + + cds_list_for_each_entry_safe(entry, tmp, &app_group_db_gc_list, + deadlist) { + if (entry->is_dead) { + cds_list_del(&entry->deadlist); + app_group_destroy(entry->group); + free(entry); + } else { + entry->is_dead = true; + } + } + + /* Finally, remove any old app groups. */ + app_group_gc(); +} + +/* + * Match function for the app_grp name hash table. + * Returns zero for a non-match, and non-zero for a match. + */ +static int +app_group_db_match(struct cds_lfht_node *ht_node, const void *data) +{ + struct agdb_entry *entry; + entry = caa_container_of(ht_node, struct agdb_entry, ht_node); + + return !strcmp(data, entry->name); +} + +struct agdb_entry * +app_group_db_find_name(const char *name) +{ + struct cds_lfht_iter iter; + unsigned long hash = rte_jhash(name, strlen(name), hash_seed); + + if (!app_grp_ht) + return NULL; + + cds_lfht_lookup(app_grp_ht, hash, app_group_db_match, + name, &iter); + + struct cds_lfht_node *ht_node; + ht_node = cds_lfht_iter_get_node(&iter); + + if (ht_node) + return caa_container_of(ht_node, struct agdb_entry, ht_node); + + return NULL; +} + +static int +app_group_db_cmp(struct cds_lfht_node *node, const void *key) +{ + struct agdb_entry *entry; + entry = caa_container_of(node, struct agdb_entry, ht_node); + + return strcmp(entry->name, key); +} + +struct agdb_entry * +app_group_db_find_or_alloc(const char *name) +{ + /* No name? No entry. */ + if (!name || !*name) + return NULL; + + /* First, search for an existing entry. */ + struct agdb_entry *entry = app_group_db_find_name(name); + if (entry) { + /* Already exists, so return it */ + return entry; + } + + /* Not found, so create a new DB entry. */ + entry = zmalloc_aligned(sizeof(struct agdb_entry)); + if (!entry) + return NULL; + + entry->group = app_group_init(); + if (!entry->group) { + free(entry); + return NULL; + } + + entry->name = strdup(name); + if (!entry->name) { + app_group_destroy(entry->group); + free(entry); + return NULL; + } + + /* Add to app_grp hash table. + * Entries are hashed by name. + */ + cds_lfht_node_init(&entry->ht_node); + unsigned long hash = rte_jhash(name, strlen(name), hash_seed); + + struct cds_lfht_node *node; + node = cds_lfht_add_unique(app_grp_ht, hash, app_group_db_cmp, name, + &entry->ht_node); + + if (node != &entry->ht_node) { + /* There's an existing entry that we didn't find earlier. + * So delete the new node and return the existing one. + */ + app_group_destroy(entry->group); + free(entry); + return caa_container_of(node, struct agdb_entry, ht_node); + } + + return entry; +} diff --git a/src/npf/app_group/app_group_db.h b/src/npf/app_group/app_group_db.h new file mode 100644 index 00000000..68e87c5c --- /dev/null +++ b/src/npf/app_group/app_group_db.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2020-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef APP_GROUP_DB_H +#define APP_GROUP_DB_H + +#include +#include +#include "json_writer.h" +#include "app_group.h" +#include "urcu.h" + +/* Application group database entry. */ +struct agdb_entry { + char *name; /* Name string */ + struct app_group *group; /* Application group */ + uint32_t refcount; /* Refcount */ + struct cds_lfht_node ht_node; /* Group hash table */ + struct cds_list_head deadlist; /* Memento mori */ + bool is_dead; +}; + +typedef int (*app_grp_walker_t)(void *ctx, struct agdb_entry *entry); + +/** + * Initialise the application resource group database. + * + * @return true on success; false on failure. + */ +bool app_group_db_init(void); + +/** + * Decrement the given appDB entry's refcount. + * If zero then remove the entry from the appDB. + * + * @param entry pointer to the appDB entry to be decremented. + * @return true on success; false on failure. + */ +bool app_group_db_rm_entry(struct agdb_entry *entry); + +/** + * Lookup the given application resource group name + * in the application resource group database. + * + * @param name entry name to be looked up. + * @return pointer to the new appDB entry, or NULL on failure. + */ +struct agdb_entry *app_group_db_find_name(const char *name); + +/** + * Find an existing appDB entry with the given name and increment its refcount. + * If not found, then create a new entry. + * + * @param name entry name to be looked up or created. + * @return pointer to the new appDB entry, or NULL on failure. + */ +struct agdb_entry *app_group_db_find_or_alloc(const char *name); + +#endif /* APP_GROUP_DB_H */ diff --git a/src/npf/cgnat/cgn.c b/src/npf/cgnat/cgn.c index d6938f33..31ea9c0c 100644 --- a/src/npf/cgnat/cgn.c +++ b/src/npf/cgnat/cgn.c @@ -1,95 +1,85 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ /** - * @file cgn.c - CGNAT module init and uninit and other global functions. + * @file cgn.c - CGNAT global variables and event handlers. */ #include -#include #include #include +#include #include "compiler.h" #include "if_var.h" #include "util.h" -#include "soft_ticks.h" #include "dp_event.h" +#include "vplane_log.h" #include "npf/cgnat/cgn.h" +#include "npf/cgnat/cgn_rc.h" #include "npf/apm/apm.h" -#include "npf/cgnat/cgn_errno.h" +#include "npf/cgnat/cgn_cmd_cfg.h" +#include "npf/cgnat/cgn_if.h" #include "npf/cgnat/cgn_policy.h" #include "npf/cgnat/cgn_session.h" #include "npf/cgnat/cgn_source.h" +#include "npf/cgnat/cgn_log.h" #include "npf/nat/nat_pool_event.h" #include "npf/nat/nat_pool_public.h" -/* - * cgnat globals - */ +/************************************************************************** + * CGNAT Global Variables + **************************************************************************/ /* Hairpinning config enable/disable */ bool cgn_hairpinning_gbl = true; -/* Time in millisecs since Epoch relative to soft_ticks==0 */ -static uint64_t cgn_epoch_ms; - -static void cgn_init_time(void) -{ - struct timeval tod; - uint64_t ms; - - gettimeofday(&tod, NULL); - - ms = (tod.tv_sec * 1000) + (tod.tv_usec / 1000); - cgn_epoch_ms = ms - soft_ticks; -} +/* snat-alg-bypass config enable/disable */ +bool cgn_snat_alg_bypass_gbl; /* - * Convert soft_ticks in millisecs to Epoch timestamp in microseconds. + * Simple global counts for the number of dest addr (sess2) hash tables + * created and destroyed. These URCU hash tables are fairly resource + * intensive, so we want to get some idea of how often they are required. */ -uint64_t cgn_ticks2timestamp(uint64_t ticks) -{ - return (cgn_epoch_ms + ticks) * 1000; -} +rte_atomic64_t cgn_sess2_ht_created; +rte_atomic64_t cgn_sess2_ht_destroyed; -/* - * Convert start time in soft_ticks into duration in microseconds. - */ -uint64_t cgn_start2duration(uint64_t start_time) -{ - return (soft_ticks - start_time) * 1000; -} +/* max 3-tuple sessions, and sessions used */ +int32_t cgn_sessions_max = CGN_SESSIONS_MAX; /* - * Extract an integer from a string + * Count of all 3-tuple sessions. Incremented and compared against + * cgn_sessions_max before a 3-tuple session is created. If it exceeds + * cgn_sessions_max then cgn_session_table_full is set true. It is + * decremented by the GC routine a time after the session has expired. */ -int cgn_arg_to_int(const char *arg) -{ - char *p; - unsigned long val = strtoul(arg, &p, 10); +rte_atomic32_t cgn_sessions_used; - if (p == arg || val > INT_MAX) - return -1; +/* max 2-tuple sessions per 3-tuple session*/ +int16_t cgn_dest_sessions_max = CGN_DEST_SESSIONS_INIT; - return (uint32_t) val; -} +/* Size of 2-tuple hash table that may be added per 3-tuple session */ +int16_t cgn_dest_ht_max = CGN_DEST_SESSIONS_INIT; -/* - * Format an IPv4 host-byte ordered address - */ -char *cgn_addrstr(uint32_t addr, char *str, size_t slen) -{ - snprintf(str, slen, "%u.%u.%u.%u", - (addr >> 24) & 0xFF, (addr >> 16) & 0xFF, - (addr >> 8) & 0xFF, addr & 0xFF); - return str; -} +/* Global count of all 5-tuple sessions */ +rte_atomic32_t cgn_sess2_used; + +/* Set true when table is full. Re-evaluated after GC. */ +bool cgn_session_table_full; + +/* Is CGNAT helper core enabled? */ +uint8_t cgn_helper_thread_enabled; + + +/************************************************************************** + * CGNAT Event Handlers + **************************************************************************/ /* * NAT pool has been de-activated. Clear all sessions and mappings that @@ -118,12 +108,12 @@ static void cgn_nat_pool_event_init(void) */ static void cgn_init(void) { + cgn_rc_init(); cgn_nat_pool_event_init(); cgn_policy_init(); cgn_session_init(); cgn_source_init(); apm_init(); - cgn_init_time(); } /* @@ -135,6 +125,25 @@ static void cgn_uninit(void) apm_uninit(); cgn_source_uninit(); cgn_policy_uninit(); + cgn_log_disable_all_handlers(); + cgn_rc_uninit(); +} + +/* + * Callback for dataplane DP_EVT_IF_INDEX_UNSET event. + */ +static void +cgn_event_if_index_unset(struct ifnet *ifp, uint32_t ifindex __unused) +{ + /* + * For each policy on interface: + * 1. Clear sessions, + * 2. Remove policy from cgn_if list + * 3. Remove policy from hash table + * 4. Release reference on policy + * Free cgn_if + */ + cgn_if_disable(ifp); } /* @@ -143,18 +152,8 @@ static void cgn_uninit(void) static const struct dp_event_ops cgn_event_ops = { .init = cgn_init, .uninit = cgn_uninit, - .if_index_set = cgn_event_if_index_set, .if_index_unset = cgn_event_if_index_unset, }; /* Register event handler */ DP_STARTUP_EVENT_REGISTER(cgn_event_ops); - - -/* Called from unit-tests */ -void dp_test_npf_clear_cgnat(void) -{ - cgn_session_cleanup(); - apm_cleanup(); - cgn_source_cleanup(); -} diff --git a/src/npf/cgnat/cgn.h b/src/npf/cgnat/cgn.h index 63966ba0..0b829221 100644 --- a/src/npf/cgnat/cgn.h +++ b/src/npf/cgnat/cgn.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -11,56 +11,65 @@ #include #include -#include -#include "vplane_log.h" +#include /* - * Packet direction relative to interface with cgnat policy. + * Packet direction relative to interface with cgnat policy. Note that this + * is 1 bit in 'struct cgn_sess2'. */ enum cgn_dir { - CGN_DIR_IN, - CGN_DIR_OUT -}; -#define CGN_DIR_SZ 2 + CGN_DIR_IN = 0, + CGN_DIR_OUT = 1 +} __attribute__ ((__packed__)); -/* Sometimes it makes more sense to refer to forw and back */ -enum cgn_flow { - CGN_DIR_FORW = CGN_DIR_OUT, - CGN_DIR_BACK = CGN_DIR_IN -}; +#define CGN_DIR_SZ 2 static inline enum cgn_dir cgn_reverse_dir(enum cgn_dir dir) { return (dir == CGN_DIR_OUT) ? CGN_DIR_IN : CGN_DIR_OUT; } +/************************************************************************** + * CGNAT Global Variables + **************************************************************************/ + +/* Hairpinning config enable/disable */ extern bool cgn_hairpinning_gbl; -extern rte_atomic64_t cgn_hairpinned_pkts; + +/* snat-alg-bypass config enable/disable */ +extern bool cgn_snat_alg_bypass_gbl; + +/* Configurable max number of 3-tuple sessions */ +extern int32_t cgn_sessions_max; /* - * CGNAT Dataplane Event Handlers + * Count of all 3-tuple sessions. Incremented and compared against + * cgn_sessions_max before a 3-tuple session is created. If it exceeds + * cgn_sessions_max then cgn_session_table_full is set true. */ -void cgn_event_if_index_set(struct ifnet *ifp, uint32_t ifindex); -void cgn_event_if_index_unset(struct ifnet *ifp, uint32_t ifindex); +extern rte_atomic32_t cgn_sessions_used; -struct rte_mbuf *cgn_copy_or_clone_and_undo(struct rte_mbuf *mbuf, - const struct ifnet *in_ifp, - const struct ifnet *out_if, - bool copy); +/* Is session table full? */ +extern bool cgn_session_table_full; -/* Convert a soft_ticks value in milliseconds to an Epoch time in microsecs */ -uint64_t cgn_ticks2timestamp(uint64_t ticks); +/* + * Simple global counts for the number of dest addr (sess2) hash tables + * created and destroyed. These URCU hash tables are fairly resource + * intensive, so we want to get some idea of how often they are required. + */ +extern rte_atomic64_t cgn_sess2_ht_created; +extern rte_atomic64_t cgn_sess2_ht_destroyed; -/* Convert start time in soft_ticks into duration in microseconds */ -uint64_t cgn_start2duration(uint64_t start_time); +/* max 2-tuple sessions per 3-tuple session*/ +extern int16_t cgn_dest_sessions_max; -/* Extract int from string */ -int cgn_arg_to_int(const char *arg); +/* Size of 2-tuple hash table that may be added per 3-tuple session */ +extern int16_t cgn_dest_ht_max; -/* Format host byte order address to string */ -char *cgn_addrstr(uint32_t addr, char *str, size_t slen); +/* Global count of all 5-tuple sessions */ +extern rte_atomic32_t cgn_sess2_used; -/* For unit-tests */ -void dp_test_npf_clear_cgnat(void); +/* Is CGNAT helper core enabled? */ +extern uint8_t cgn_helper_thread_enabled; #endif diff --git a/src/npf/cgnat/cgn_cmd_cfg.c b/src/npf/cgnat/cgn_cmd_cfg.c index 8f1b945b..87a54eb9 100644 --- a/src/npf/cgnat/cgn_cmd_cfg.c +++ b/src/npf/cgnat/cgn_cmd_cfg.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -23,10 +23,19 @@ * cgn-cfg policy detach * * ----------------------------------------------- + * Event config + * ----------------------------------------------- + * + * cgn-cfg events rte_log|protobuf enable|disable + * cgn-cfg events protobuf hwm + * cgn-cfg events core [] + * + * ----------------------------------------------- * Other config * ----------------------------------------------- * * cgn-cfg hairpinning {on | off} + * cgn-cfg snat-alg-bypass {on | off} */ #include @@ -35,7 +44,7 @@ #include "commands.h" #include "compiler.h" -#include "config.h" +#include "config_internal.h" #include "if_var.h" #include "util.h" #include "vplane_log.h" @@ -49,188 +58,28 @@ #include "npf/cgnat/cgn_sess_state.h" #include "npf/cgnat/cgn_session.h" #include "npf/cgnat/cgn_cmd_cfg.h" +#include "npf/cgnat/cgn_log.h" +#include "npf/cgnat/cgn_log_protobuf_zmq.h" -/* - * Interface list to handle interface config replay. Entries are identified - * by name - */ -struct cgn_cfg_if_list { - struct cds_list_head if_list; - int if_list_count; -}; - -struct cgn_cfg_if_list_entry { - struct cds_list_head le_node; - char le_ifname[IFNAMSIZ]; - char *le_buf; - char **le_argv; - int le_argc; -}; - -static struct cgn_cfg_if_list *cgn_cfg_list; - -/* Create list */ -static struct cgn_cfg_if_list *cgn_cfg_if_list_create(void) -{ - struct cgn_cfg_if_list *if_list; - - if_list = zmalloc_aligned(sizeof(*if_list)); - if (!if_list) - return NULL; - - CDS_INIT_LIST_HEAD(&if_list->if_list); - if_list->if_list_count = 0; - - return if_list; -} - -/* Add a command to the list */ -static int -cgn_cfg_if_list_add(struct cgn_cfg_if_list *if_list, const char *ifname, - int argc, char *argv[]) -{ - struct cgn_cfg_if_list_entry *le; - int i, size; - - if (strlen(ifname) + 1 > IFNAMSIZ) - return -EINVAL; - - le = zmalloc_aligned(sizeof(*le)); - if (!le) - return -ENOMEM; - - memcpy(le->le_ifname, ifname, strlen(ifname) + 1); - - /* Determine space required for arg strings */ - for (size = 0, i = 0; i < argc; i++) - size += (strlen(argv[i]) + 1); - - if (!size) { - free(le); - return -EINVAL; - } - - le->le_buf = malloc(size); - le->le_argv = malloc(argc * sizeof(void *)); - le->le_argc = argc; - - if (!le->le_buf || !le->le_argv) { - free(le->le_buf); - free(le->le_argv); - free(le); - return -ENOMEM; - } - - char *ptr = le->le_buf; - - for (i = 0; i < argc; i++) { - size = strlen(argv[i]) + 1; - memcpy(ptr, argv[i], size); - le->le_argv[i] = ptr; - ptr += size; - } - - cds_list_add_tail(&le->le_node, &if_list->if_list); - if_list->if_list_count++; - - return 0; -} - -/* Remove entry from list, and free it */ -static int -cgn_cfg_if_list_del(struct cgn_cfg_if_list *if_list, - struct cgn_cfg_if_list_entry *le) -{ - if (!if_list || if_list->if_list_count == 0) - return -ENOENT; - - cds_list_del(&le->le_node); - if_list->if_list_count--; - - if (le->le_buf) - free(le->le_buf); - if (le->le_argv) - free(le->le_argv); - free(le); - - return 0; -} - -/* Destroy list */ -static int cgn_cfg_if_list_destroy(struct cgn_cfg_if_list **if_list) -{ - if (!*if_list || (*if_list)->if_list_count) - return -EINVAL; - - free(*if_list); - *if_list = NULL; - return 0; -} - -/* - * Interface has been created. Replay any relevant commands on the interface - * list. - */ -void -cgn_event_if_index_set(struct ifnet *ifp, uint32_t ifindex __unused) -{ - struct cgn_cfg_if_list_entry *le, *tmp; - - if (!cgn_cfg_list) - return; - - cds_list_for_each_entry_safe(le, tmp, &cgn_cfg_list->if_list, - le_node) { - - if (strcmp(ifp->if_name, le->le_ifname) != 0) - continue; - - /* Replay command */ - cmd_cgn(NULL, le->le_argc, le->le_argv); - - /* Remove from list and free */ - cgn_cfg_if_list_del(cgn_cfg_list, le); - } +#include "npf/apm/apm.h" +#include "npf/nat/nat_pool.h" +#include "npf/cgnat/cgn_session.h" +#include "npf/cgnat/cgn_source.h" - if (!cgn_cfg_list->if_list_count) - cgn_cfg_if_list_destroy(&cgn_cfg_list); -} /* - * Interface has been deleted. Discard any saved commands. + * Extract an integer from a string */ -void -cgn_event_if_index_unset(struct ifnet *ifp, uint32_t ifindex __unused) +int cgn_arg_to_int(const char *arg) { - struct cgn_cfg_if_list_entry *le, *tmp; - - if (!cgn_cfg_list) - return; - - cds_list_for_each_entry_safe(le, tmp, &cgn_cfg_list->if_list, - le_node) { - - if (strcmp(ifp->if_name, le->le_ifname) != 0) - continue; - - cgn_cfg_if_list_del(cgn_cfg_list, le); - } - - if (!cgn_cfg_list->if_list_count) - cgn_cfg_if_list_destroy(&cgn_cfg_list); -} + char *p; + unsigned long val = strtoul(arg, &p, 10); -/* Initialize command replay list */ -static int cgn_cfg_replay_init(void) -{ - if (!cgn_cfg_list) { - cgn_cfg_list = cgn_cfg_if_list_create(); - if (!cgn_cfg_list) - return -ENOMEM; + if (p == arg || val > INT_MAX) + return -1; - } - return 0; + return (uint32_t) val; } /* @@ -269,7 +118,7 @@ cgn_cfg_ifname_from_arg(char *if_name, int sz, int argc, char **argv) value = c + 1; strncpy(if_name, value, sz-1); - if_name[sz] = '\0'; + if_name[sz - 1] = '\0'; free(item); return if_name; @@ -299,21 +148,11 @@ static int cgn_policy_cfg_attach(FILE *f, int argc, char **argv) goto usage; /* Does interface exist? */ - ifp = ifnet_byifname(ifname); + ifp = dp_ifnet_byifname(ifname); if (!ifp) { - /* No. Store command for later replay */ - if (cgn_cfg_replay_init() != 0) { - RTE_LOG(ERR, CGNAT, - "Could not set up cgn replay cache\n"); - goto err_out; - } - - cgn_cfg_if_list_add(cgn_cfg_list, ifname, argc, argv); - - RTE_LOG(ERR, CGNAT, - "Caching cgn command for interface %s\n", ifname); - return 0; + RTE_LOG(ERR, CGNAT, "Interfaace %s not found\n", ifname); + goto err_out; } /* @@ -396,7 +235,7 @@ static int cgn_policy_cfg_detach(FILE *f, int argc, char **argv) goto usage; /* Does interface exist? */ - ifp = ifnet_byifname(ifname); + ifp = dp_ifnet_byifname(ifname); if (!ifp) return -EEXIST; @@ -413,9 +252,6 @@ static int cgn_policy_cfg_detach(FILE *f, int argc, char **argv) */ cgn_if_del_policy(ifp, cp); - /* If policy list is now empty, then free cgn intf */ - cgn_if_gc_intf(ifp, false); - return 0; usage: @@ -460,6 +296,204 @@ static int cgn_policy_cfg(FILE *f, int argc, char **argv) return -1; } +/* + * cgn-cfg events rte_log enable|disable + * + * is one of session, port-block-allocation, subscriber, + * or resource-constraint + */ +static int cgn_events_cfg_rte_log(FILE *f, int argc, char **argv) +{ + const char *ltype_str; + enum cgn_log_type ltype; + int rc; + + if (argc < 5) { + if (f) + fprintf(f, "%s: need at least 5 fields", __func__); + return -1; + } + + ltype_str = argv[3]; + + rc = cgn_get_log_type(ltype_str, <ype); + if (rc < 0) { + if (f) + fprintf(f, "%s: unknown event type %s", __func__, + ltype_str); + return -1; + } + + if (strcmp(argv[4], "enable") == 0) { + rc = cgn_log_enable_handler(ltype, "rte_log"); + if (rc < 0 && rc != -EEXIST) { + if (f) + fprintf(f, "%s: cgn_log_enable_handler failed " + "for type %s", __func__, ltype_str); + return -1; + } + } else if (strcmp(argv[4], "disable") == 0) { + rc = cgn_log_disable_handler(ltype, "rte_log"); + if (rc < 0 && rc != -ENOENT) { + if (f) + fprintf(f, "%s: cgn_log_disable_handler failed " + "for type %s", __func__, ltype_str); + return -1; + } + } else { + if (f) + fprintf(f, "%s: unexpected value %s for type %s", + __func__, argv[4], ltype_str); + return -1; + } + + return 0; +} + +/* + * cgn-cfg events protobuf enable|disable|hwm + * + * is one of session, port-block-allocation, subscriber, + * or resource-constraint + */ +static int cgn_events_cfg_protobuf(FILE *f, int argc, char **argv) +{ + const char *ltype_str; + enum cgn_log_type ltype; + int rc; + + if (argc < 5) { + if (f) + fprintf(f, "%s: need at least 5 fields", __func__); + return -1; + } + + ltype_str = argv[3]; + + rc = cgn_get_log_type(ltype_str, <ype); + if (rc < 0) { + if (f) + fprintf(f, "%s: unknown event type %s", __func__, + ltype_str); + return -1; + } + + if (strcmp(argv[4], "enable") == 0) { + rc = cgn_log_enable_handler(ltype, "protobuf"); + if (rc < 0 && rc != -EEXIST) { + if (f) + fprintf(f, "%s: cgn_log_enable_handler failed " + "for type %s", __func__, ltype_str); + return -1; + } + } else if (strcmp(argv[4], "disable") == 0) { + rc = cgn_log_disable_handler(ltype, "protobuf"); + if (rc < 0 && rc != -ENOENT) { + if (f) + fprintf(f, "%s: cgn_log_disable_handler failed " + "for type %s", __func__, ltype_str); + return -1; + } + } else if (strcmp(argv[4], "hwm") == 0) { + int32_t hwm; + + if (argc >= 6) + hwm = atoi(argv[5]); + else + hwm = 0; /* default of unlimited */ + + rc = cl_zmq_set_hwm(ltype, hwm); + if (rc < 0) { + if (f) + fprintf(f, "%s: cl_zmq_set_hwm failed " + "for type %s, hwm \"%s\"", + __func__, ltype_str, + argc >= 6 ? argv[5] : "default"); + return -1; + } + } else { + if (f) + fprintf(f, "%s: unexpected value %s for type %s", + __func__, argv[4], ltype_str); + return -1; + } + + return 0; +} + +/* + * cgn-cfg events core [] + * + * is the number of the core requested to handle session log/export + * events as a thread on the dedicated core + */ +static int cgn_events_cfg_core(FILE *f, int argc, char **argv) +{ + int rc; + + if (argc >= 4) { + int core_num = cgn_arg_to_int(argv[3]); + + if (core_num < 0) { + fprintf(f, "%s: core number cannot be negative", + __func__); + return -1; + } + + rc = cgn_set_helper_thread((unsigned int) core_num); + if (rc < 0 && f) + fprintf(f, "%s: cgn_set_helper_thread failed for " + "core %s", __func__, argv[3]); + /* + * NB: do not fail, to prevent dataplane from continually + * restarting if the configuration is replayed, as + * unsupported CPU numbers could be configured. + */ + } else { + rc = cgn_disable_helper_thread(); + if (rc < 0) { + if (f) + fprintf(f, "%s: cgn_disable_helper_thread " + "failed", __func__); + return -1; + } + } + + return 0; +} + +/* + * cgn-cfg events rte_log|protobuf + */ +static int cgn_events_cfg(FILE *f, int argc, char **argv) +{ + int rc = 0; + + if (argc < 3) + goto usage; + + if (strcmp(argv[2], "rte_log") == 0) + rc = cgn_events_cfg_rte_log(f, argc, argv); + + else if (strcmp(argv[2], "protobuf") == 0) + rc = cgn_events_cfg_protobuf(f, argc, argv); + + else if (strcmp(argv[2], "core") == 0) + rc = cgn_events_cfg_core(f, argc, argv); + + else + goto usage; + + return rc; + +usage: + if (f) + fprintf(f, "%s: cgn-cfg events {rte_log|protobuf|core} ... ", + __func__); + + return -1; +} + /* * cgn-cfg hairpinning [on|off] */ @@ -483,6 +517,29 @@ static int cgn_hairpinning_cfg(FILE *f, int argc, char **argv) return -1; } +/* + * cgn-cfg snat-alg-bypass [on|off] + */ +static int cgn_snat_alg_bypass_cfg(FILE *f, int argc, char **argv) +{ + if (argc < 3) + goto usage; + + /* Policy */ + if (strcmp(argv[2], "on") == 0) + cgn_snat_alg_bypass_gbl = true; + else + cgn_snat_alg_bypass_gbl = false; + + return 0; +usage: + if (f) + fprintf(f, "%s: cgn-cfg snat-alg-bypass {on|off}", + __func__); + + return -1; +} + /* * cgn-cfg max-sessions */ @@ -499,7 +556,7 @@ static int cgn_max_sessions_cfg(FILE *f, int argc, char **argv) if (tmp == 0) tmp = CGN_SESSIONS_MAX; - cgn_sessions_max = tmp; + cgn_session_set_max(tmp); return 0; usage: @@ -510,6 +567,41 @@ static int cgn_max_sessions_cfg(FILE *f, int argc, char **argv) return -1; } +static int +cgn_max_apms_cfg(FILE *f __unused, int argc __unused, char **argv __unused) +{ + /* Deprecated */ + return 0; +} + +/* + * cgn-cfg max-subscribers + */ +static int cgn_max_subscribers_cfg(FILE *f, int argc, char **argv) +{ + int tmp; + + if (argc < 3) + goto usage; + + tmp = cgn_arg_to_int(argv[2]); + if (tmp < 0 || tmp > CGN_SRC_TABLE_MAX) + return -1; + + if (tmp == 0) + tmp = CGN_SRC_TABLE_MAX; + + cgn_source_set_max(tmp); + + return 0; +usage: + if (f) + fprintf(f, "%s: cgn-cfg max-subscribers ", + __func__); + + return -1; +} + /* * cgn-cfg max-dest-per-session * @@ -518,20 +610,35 @@ static int cgn_max_sessions_cfg(FILE *f, int argc, char **argv) */ static int cgn_max_dest_sessions_cfg(FILE *f, int argc, char **argv) { - int tmp; + uint16_t tmp; if (argc < 3) goto usage; - assert(CGN_DEST_SESSIONS_MAX < USHRT_MAX); - - tmp = cgn_arg_to_int(argv[2]); - if (tmp < 0 || tmp > CGN_DEST_SESSIONS_MAX || tmp > (USHRT_MAX - 1)) + tmp = (uint16_t)cgn_arg_to_int(argv[2]); + if (tmp > CGN_DEST_SESSIONS_MAX) return -1; if (tmp == 0) - tmp = CGN_DEST_SESSIONS_MAX; - cgn_dest_sessions_max = (int16_t)tmp; + tmp = CGN_DEST_SESSIONS_INIT; + + /* + * cgn_dest_sessions_max is used to limit the number of entries to the + * dest session hash table. + */ + cgn_dest_sessions_max = tmp; + + /* + * cgn_dest_ht_max is used to initialise the dest session hash table. + * This must be a power of two, so we round up tmp accordingly. + */ + for (uint16_t po2 = CGN_DEST_SESSIONS_MAX >> 1; po2 > 0; po2 >>= 1) { + if (tmp > po2) { + tmp = po2 << 1; + break; + } + } + cgn_dest_ht_max = tmp; return 0; usage: @@ -542,14 +649,97 @@ static int cgn_max_dest_sessions_cfg(FILE *f, int argc, char **argv) return -1; } +/* + * Remaining command is one off: + * + * tcp-estab + * tcp-estab port timeout + * + * Returns the number of arguments consumed, or 0 if there is an error. + */ +static int cgn_sess_timeout_tcp_estbd(int argc, char **argv) +{ + int port, timeout; + + if (!strcmp(argv[1], "port")) { + if (argc < 5) + return 0; + + port = cgn_arg_to_int(argv[2]); + if (port < 0 || port > USHRT_MAX) + return 0; + + if (strcmp(argv[3], "timeout") != 0) + return 0; + + timeout = cgn_arg_to_int(argv[4]); + if (timeout < 0) + return 0; + + cgn_cgn_port_tcp_etime_set(port, timeout); + + /* Five args consumed */ + return 5; + } + + timeout = cgn_arg_to_int(argv[1]); + if (timeout < 0) + return 0; + + cgn_sess_tcp_etime[CGN_ETIME_TCP_ESTBD] = timeout; + + /* Two args consumed */ + return 2; +} + +/* + * Remaining command is one off: + * + * udp-estab + * udp-estab port timeout + * + * Returns the number of arguments consumed, or 0 if there is an error. + */ +static int cgn_sess_timeout_udp_estbd(int argc, char **argv) +{ + int port, timeout; + + if (!strcmp(argv[1], "port")) { + if (argc < 5) + return 0; + + port = cgn_arg_to_int(argv[2]); + if (port < 0 || port > USHRT_MAX) + return 0; + + if (strcmp(argv[3], "timeout") != 0) + return 0; + + timeout = cgn_arg_to_int(argv[4]); + if (timeout < 0) + return 0; + + cgn_cgn_port_udp_etime_set(port, timeout); + + /* Five args consumed */ + return 5; + } + + timeout = cgn_arg_to_int(argv[1]); + if (timeout < 0) + return 0; + + cgn_sess_udp_etime[CGN_ETIME_ESTBD] = timeout; + + /* Two args consumed */ + return 2; +} + /* * Session timeouts */ static int cgn_session_timeouts_cfg(FILE *f, int argc, char **argv) { - char *c, *item, *value; - int i, tmp; - /* Move past "cgn-cfg session-timeouts" */ argc -= 2; argv += 2; @@ -560,42 +750,86 @@ static int cgn_session_timeouts_cfg(FILE *f, int argc, char **argv) /* * Parse item/value pairs. We ignore any we do not understand. */ - for (i = 0; i < argc; i++) { - c = strchr(argv[i], '='); - if (!c) - continue; + while (argc > 0) { + char *item; + int tmp; - item = argv[i]; - *c = '\0'; - value = c + 1; + item = argv[0]; - tmp = cgn_arg_to_int(value); - if (tmp < 0) - goto usage; + if (!strcmp(item, "other-opening") && argc >= 2) { + tmp = cgn_arg_to_int(argv[1]); + if (tmp < 0) + goto invalid_value; - if (!strcmp(item, "other-opening")) cgn_sess_other_etime[CGN_ETIME_OPENING] = tmp; - else if (!strcmp(item, "other-estab")) + argc -= 2; + argv += 2; + + } else if (!strcmp(item, "other-estab") && argc >= 2) { + tmp = cgn_arg_to_int(argv[1]); + if (tmp < 0) + goto invalid_value; + cgn_sess_other_etime[CGN_ETIME_ESTBD] = tmp; - else if (!strcmp(item, "udp-opening")) + argc -= 2; + argv += 2; + + } else if (!strcmp(item, "udp-opening") && argc >= 2) { + tmp = cgn_arg_to_int(argv[1]); + if (tmp < 0) + goto invalid_value; + cgn_sess_udp_etime[CGN_ETIME_OPENING] = tmp; - else if (!strcmp(item, "udp-estab")) - cgn_sess_udp_etime[CGN_ETIME_ESTBD] = tmp; + argc -= 2; + argv += 2; + + } else if (!strcmp(item, "udp-estab") && argc >= 2) { + tmp = cgn_sess_timeout_udp_estbd(argc, argv); + + if (tmp == 0) + goto invalid_value; + + argc -= tmp; + argv += tmp; + + } else if (!strcmp(item, "tcp-opening") && argc >= 2) { + tmp = cgn_arg_to_int(argv[1]); + if (tmp < 0) + goto invalid_value; - else if (!strcmp(item, "tcp-opening")) cgn_sess_tcp_etime[CGN_ETIME_TCP_OPENING] = tmp; - else if (!strcmp(item, "tcp-estab")) - cgn_sess_tcp_etime[CGN_ETIME_TCP_ESTBD] = tmp; + argc -= 2; + argv += 2; + + } else if (!strcmp(item, "tcp-estab") && argc >= 2) { + tmp = cgn_sess_timeout_tcp_estbd(argc, argv); + + if (tmp == 0) + goto invalid_value; + + argc -= tmp; + argv += tmp; + + } else if (!strcmp(item, "tcp-closing") && argc >= 2) { + tmp = cgn_arg_to_int(argv[1]); + if (tmp < 0) + goto invalid_value; - else if (!strcmp(item, "tcp-closing")) cgn_sess_tcp_etime[CGN_ETIME_TCP_CLOSING] = tmp; + + argc -= 2; + argv += 2; + + } else + goto usage; } return 0; + usage: if (f) fprintf(f, "%s: cgn-cfg " @@ -603,11 +837,93 @@ static int cgn_session_timeouts_cfg(FILE *f, int argc, char **argv) __func__); return -1; + +invalid_value: + if (f) + fprintf(f, "%s: cgn-cfg " + "session-timeouts invalid value %s", + __func__, argv[1]); + + return -1; } +/* Warning threshold setter functions */ +typedef void (*threshold_func_t)(int32_t threshold, uint32_t interval); + +struct threshold_fn_t { + const char *name; + threshold_func_t fn; +}; + +static const struct threshold_fn_t threshold_fns[] = { + { "mapping-table", apm_table_threshold_set }, + { "session-table", session_table_threshold_set }, + { "subscriber-table", subscriber_table_threshold_set }, + { "public-addresses", np_threshold_set_all }, +}; + +/* Configure CGN threshold warning levels */ +static int cgn_threshold_cfg(FILE *f, int argc, char **argv) +{ + bool add; + + if (argc < 3) + goto usage; + + if (strcmp(argv[2], "add") == 0) + add = true; + + else if (strcmp(argv[2], "del") == 0) + add = false; + + else + goto usage; + + /* + * Expecting "cgn-config warning del NNNN" + * or "cgn-config warning add NNNN threshold TTT" + * or "cgn-config warning add NNNN threshold TTT interval III" + */ + if (argc < 4) + return -EINVAL; + + char *name = argv[3]; + int32_t interval = 0; + int32_t threshold = 0; + + if (add) { + if (argc < 6) + return -EINVAL; + + assert(!strcmp("threshold", argv[4])); + threshold = atoi(argv[5]); + + if (argc >= 8) { + assert(!strcmp("interval", argv[6])); + interval = atoi(argv[7]); + } + } + + for (uint32_t i = 0; i < ARRAY_SIZE(threshold_fns); i++) { + if (!strcmp(name, threshold_fns[i].name)) { + threshold_fns[i].fn(threshold, interval); + return 0; + } + } + + /* No function found */ + return -1; + +usage: + if (f) + fprintf(f, "%s: cgn-cfg warning {add|delete} ... ", + __func__); + + return -1; +} /* - * cgn-cfg [policy | hairpinning] ... + * cgn-cfg [] ... * cgn-ut ... */ int cmd_cgn(FILE *f, int argc, char **argv) @@ -620,27 +936,47 @@ int cmd_cgn(FILE *f, int argc, char **argv) if (strcmp(argv[1], "policy") == 0) rc = cgn_policy_cfg(f, argc, argv); + else if (strcmp(argv[1], "events") == 0) + rc = cgn_events_cfg(f, argc, argv); + else if (strcmp(argv[1], "hairpinning") == 0) rc = cgn_hairpinning_cfg(f, argc, argv); + else if (strcmp(argv[1], "snat-alg-bypass") == 0) + rc = cgn_snat_alg_bypass_cfg(f, argc, argv); + else if (strcmp(argv[1], "max-sessions") == 0) rc = cgn_max_sessions_cfg(f, argc, argv); + else if (strcmp(argv[1], "max-apms") == 0) + rc = cgn_max_apms_cfg(f, argc, argv); + + else if (strcmp(argv[1], "max-subscribers") == 0) + rc = cgn_max_subscribers_cfg(f, argc, argv); + else if (strcmp(argv[1], "max-dest-per-session") == 0) rc = cgn_max_dest_sessions_cfg(f, argc, argv); else if (strcmp(argv[1], "session-timeouts") == 0) rc = cgn_session_timeouts_cfg(f, argc, argv); + else if (strcmp(argv[1], "warning") == 0) + rc = cgn_threshold_cfg(f, argc, argv); + else goto usage; return rc; usage: - if (f) - fprintf(f, "%s: cgn-cfg {policy} {add|delete} ... ", - __func__); + if (f) { + if (argc < 2) + fprintf(f, "%s: cgn-cfg with no type parameter", + __func__); + else + fprintf(f, "%s: cgn-cfg with unknown type: %s", + __func__, argv[1]); + } return -1; } diff --git a/src/npf/cgnat/cgn_cmd_cfg.h b/src/npf/cgnat/cgn_cmd_cfg.h index d29dcc4c..b3b96fb8 100644 --- a/src/npf/cgnat/cgn_cmd_cfg.h +++ b/src/npf/cgnat/cgn_cmd_cfg.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -9,6 +9,9 @@ #include "npf/npf_addr.h" +/* Extract int from string */ +int cgn_arg_to_int(const char *arg); + /* npf_rule_gen.c */ int npf_parse_ip_addr(char *value, sa_family_t *fam, npf_addr_t *addr, npf_netmask_t *masklen, bool *negate); diff --git a/src/npf/cgnat/cgn_cmd_op.c b/src/npf/cgnat/cgn_cmd_op.c index e50f950b..9afe9cc1 100644 --- a/src/npf/cgnat/cgn_cmd_op.c +++ b/src/npf/cgnat/cgn_cmd_op.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -14,7 +14,7 @@ #include "commands.h" #include "compiler.h" -#include "config.h" +#include "config_internal.h" #include "if_var.h" #include "util.h" #include "vplane_log.h" @@ -26,11 +26,12 @@ #include "npf/cgnat/cgn.h" #include "npf/apm/apm.h" -#include "npf/cgnat/cgn_errno.h" +#include "npf/cgnat/cgn_rc.h" #include "npf/cgnat/cgn_if.h" #include "npf/cgnat/cgn_policy.h" #include "npf/cgnat/cgn_session.h" #include "npf/cgnat/cgn_source.h" +#include "npf/cgnat/cgn_log_protobuf_zmq.h" static void cgn_show_summary(FILE *f, int argc __unused, char **argv __unused) @@ -57,10 +58,17 @@ static void cgn_show_summary(FILE *f, int argc __unused, char **argv __unused) jsonw_uint_field(json, "subs_table_max", cgn_source_get_max()); jsonw_uint_field(json, "apm_table_used", apm_get_used()); - jsonw_uint_field(json, "apm_table_max", apm_get_max()); + jsonw_uint_field(json, "apm_table_max", 0); /* deprecated */ jsonw_uint_field(json, "pkts_hairpinned", - rte_atomic64_read(&cgn_hairpinned_pkts)); + cgn_rc_read(CGN_DIR_OUT, CGN_HAIRPINNED)); + + if (rte_atomic64_read(&cgn_sess2_ht_created) > 0) { + jsonw_uint_field(json, "sess_ht_created", + rte_atomic64_read(&cgn_sess2_ht_created)); + jsonw_uint_field(json, "sess_ht_destroyed", + rte_atomic64_read(&cgn_sess2_ht_destroyed)); + } /* * Also summarize select error counts. Mosts counts will only ever @@ -69,51 +77,63 @@ static void cgn_show_summary(FILE *f, int argc __unused, char **argv __unused) */ uint64_t count; - count = rte_atomic64_read(&cgn_errors[CGN_DIR_OUT][CGN_PCY_ENOENT]); + count = cgn_rc_read(CGN_DIR_OUT, CGN_PCY_ENOENT); jsonw_uint_field(json, "nopolicy", count); - count = rte_atomic64_read(&cgn_errors[CGN_DIR_IN][CGN_SESS_ENOENT]); + count = cgn_rc_read(CGN_DIR_IN, CGN_SESS_ENOENT); jsonw_uint_field(json, "nosess", count); - count = rte_atomic64_read(&cgn_errors[CGN_DIR_OUT][CGN_BUF_PROTO]); - count += rte_atomic64_read(&cgn_errors[CGN_DIR_OUT][CGN_BUF_ICMP]); + count = cgn_rc_read(CGN_DIR_OUT, CGN_PCY_BYPASS); + jsonw_uint_field(json, "bypass", count); + + count = cgn_rc_read(CGN_DIR_IN, CGN_POOL_ENOENT); + jsonw_uint_field(json, "nopool", count); + + count = cgn_rc_read(CGN_DIR_OUT, CGN_BUF_PROTO); + count += cgn_rc_read(CGN_DIR_OUT, CGN_BUF_ICMP); jsonw_uint_field(json, "etrans", count); count = 0; - count += rte_atomic64_read(&cgn_errors[CGN_DIR_OUT][CGN_S1_ENOMEM]); - count += rte_atomic64_read(&cgn_errors[CGN_DIR_OUT][CGN_S2_ENOMEM]); - count += rte_atomic64_read(&cgn_errors[CGN_DIR_OUT][CGN_PB_ENOMEM]); - count += rte_atomic64_read(&cgn_errors[CGN_DIR_OUT][CGN_SRC_ENOMEM]); + count += cgn_rc_read(CGN_DIR_OUT, CGN_S1_ENOMEM); + count += cgn_rc_read(CGN_DIR_OUT, CGN_S2_ENOMEM); + count += cgn_rc_read(CGN_DIR_OUT, CGN_PB_ENOMEM); + count += cgn_rc_read(CGN_DIR_OUT, CGN_APM_ENOMEM); + count += cgn_rc_read(CGN_DIR_OUT, CGN_SRC_ENOMEM); jsonw_uint_field(json, "enomem", count); count = 0; - count += rte_atomic64_read(&cgn_errors[CGN_DIR_OUT][CGN_MBU_ENOSPC]); - count += rte_atomic64_read(&cgn_errors[CGN_DIR_OUT][CGN_SRC_ENOSPC]); - count += rte_atomic64_read(&cgn_errors[CGN_DIR_OUT][CGN_BLK_ENOSPC]); - count += rte_atomic64_read(&cgn_errors[CGN_DIR_OUT][CGN_APM_ENOSPC]); - count += rte_atomic64_read(&cgn_errors[CGN_DIR_OUT][CGN_POOL_ENOSPC]); - count += rte_atomic64_read(&cgn_errors[CGN_DIR_OUT][CGN_S1_ENOSPC]); - count += rte_atomic64_read(&cgn_errors[CGN_DIR_OUT][CGN_S2_ENOSPC]); + count += cgn_rc_read(CGN_DIR_OUT, CGN_MBU_ENOSPC); + count += cgn_rc_read(CGN_DIR_OUT, CGN_SRC_ENOSPC); + count += cgn_rc_read(CGN_DIR_OUT, CGN_BLK_ENOSPC); + count += cgn_rc_read(CGN_DIR_OUT, CGN_APM_ENOSPC); + count += cgn_rc_read(CGN_DIR_OUT, CGN_POOL_ENOSPC); + count += cgn_rc_read(CGN_DIR_OUT, CGN_S1_ENOSPC); + count += cgn_rc_read(CGN_DIR_OUT, CGN_S2_ENOSPC); jsonw_uint_field(json, "enospc", count); count = 0; - count += rte_atomic64_read(&cgn_errors[CGN_DIR_OUT][CGN_S1_EEXIST]); - count += rte_atomic64_read(&cgn_errors[CGN_DIR_OUT][CGN_S2_EEXIST]); - count += rte_atomic64_read(&cgn_errors[CGN_DIR_OUT][CGN_APM_ENOENT]); - count += rte_atomic64_read(&cgn_errors[CGN_DIR_OUT][CGN_SRC_ENOENT]); + count += cgn_rc_read(CGN_DIR_OUT, CGN_S1_EEXIST); + count += cgn_rc_read(CGN_DIR_OUT, CGN_S2_EEXIST); + count += cgn_rc_read(CGN_DIR_OUT, CGN_SRC_ENOENT); jsonw_uint_field(json, "ethread", count); count = 0; - count += rte_atomic64_read(&cgn_errors[CGN_DIR_OUT][CGN_BUF_ENOL3]); - count += rte_atomic64_read(&cgn_errors[CGN_DIR_OUT][CGN_BUF_ENOL4]); - count += rte_atomic64_read(&cgn_errors[CGN_DIR_OUT][CGN_BUF_ENOMEM]); - count += rte_atomic64_read(&cgn_errors[CGN_DIR_OUT][CGN_BUF_ENOSPC]); - count += rte_atomic64_read(&cgn_errors[CGN_DIR_IN][CGN_BUF_ENOL3]); - count += rte_atomic64_read(&cgn_errors[CGN_DIR_IN][CGN_BUF_ENOL4]); - count += rte_atomic64_read(&cgn_errors[CGN_DIR_IN][CGN_BUF_ENOMEM]); - count += rte_atomic64_read(&cgn_errors[CGN_DIR_IN][CGN_BUF_ENOSPC]); + count += cgn_rc_read(CGN_DIR_OUT, CGN_BUF_ENOL3); + count += cgn_rc_read(CGN_DIR_OUT, CGN_BUF_ENOL4); + count += cgn_rc_read(CGN_DIR_OUT, CGN_BUF_ENOMEM); + count += cgn_rc_read(CGN_DIR_IN, CGN_BUF_ENOL3); + count += cgn_rc_read(CGN_DIR_IN, CGN_BUF_ENOL4); + count += cgn_rc_read(CGN_DIR_IN, CGN_BUF_ENOMEM); jsonw_uint_field(json, "embuf", count); + jsonw_uint_field(json, "icmp_echoreq", + cgn_rc_read(CGN_DIR_IN, CGN_ICMP_ECHOREQ)); + + jsonw_uint_field(json, "pcp_ok", + cgn_rc_read(CGN_DIR_OUT, CGN_PCP_OK)); + jsonw_uint_field(json, "pcp_err", + cgn_rc_read(CGN_DIR_OUT, CGN_PCP_ERR)); + jsonw_end_object(json); jsonw_destroy(&json); } @@ -121,7 +141,8 @@ static void cgn_show_summary(FILE *f, int argc __unused, char **argv __unused) /* * Write json for errors in one direction */ -static void cgn_show_errors_dir(json_writer_t *json, int dir, const char *name) +static void cgn_show_errors_dir(json_writer_t *json, enum cgn_dir dir, + const char *name) { uint64_t count; int err; @@ -129,12 +150,12 @@ static void cgn_show_errors_dir(json_writer_t *json, int dir, const char *name) jsonw_name(json, name); jsonw_start_array(json); - for (err = 1; err <= CGN_ERRNO_LAST; err++) { + for (err = 1; err <= CGN_RC_LAST; err++) { jsonw_start_object(json); - count = rte_atomic64_read(&cgn_errors[dir][err]); - jsonw_string_field(json, "name", cgn_errno_str(err)); - jsonw_string_field(json, "desc", cgn_errno_detail_str(err)); + count = cgn_rc_read(dir, err); + jsonw_string_field(json, "name", cgn_rc_str(err)); + jsonw_string_field(json, "desc", cgn_rc_detail_str(err)); jsonw_uint_field(json, "errno", err); jsonw_uint_field(json, "count", count); @@ -165,6 +186,16 @@ static void cgn_show_errors(FILE *f, int argc __unused, char **argv __unused) jsonw_destroy(&json); } +static void cgn_clear_errors(int argc __unused, char **argv __unused) +{ + uint err; + + for (err = 1; err <= CGN_RC_LAST; err++) { + cgn_rc_clear(CGN_DIR_OUT, err); + cgn_rc_clear(CGN_DIR_IN, err); + } +} + /* * Unit-test specific op commands */ @@ -173,8 +204,20 @@ static int cgn_op_ut(FILE *f __unused, int argc, char **argv) if (argc < 3) return 0; - if (!strcmp(argv[2], "gc")) - cgn_session_gc_pass(); + if (!strcmp(argv[2], "gc")) { + if (argc < 4) { + cgn_session_gc_pass(); + cgn_source_gc_pass(); + apm_gc_pass(); + } else { + if (!strcmp(argv[3], "session")) + cgn_session_gc_pass(); + else if (!strcmp(argv[3], "subs")) + cgn_source_gc_pass(); + else if (!strcmp(argv[3], "pub")) + apm_gc_pass(); + } + } return 0; } @@ -191,9 +234,31 @@ int cmd_cgn_op(FILE *f, int argc, char **argv) * Clear ... */ if (!strcmp(argv[1], "clear")) { - if (!strcmp(argv[2], "session")) + if (!strcmp(argv[2], "policy")) + cgn_policy_clear(argc, argv); + + else if (!strcmp(argv[2], "subscriber")) + cgn_source_clear_or_update(argc, argv, true); + + else if (!strcmp(argv[2], "session")) cgn_session_clear(f, argc, argv); + else if (!strcmp(argv[2], "errors")) + cgn_clear_errors(argc, argv); + + return 0; + } + + /* + * Update ... + */ + if (!strcmp(argv[1], "update")) { + if (!strcmp(argv[2], "subscriber")) + cgn_source_clear_or_update(argc, argv, false); + + else if (!strcmp(argv[2], "session")) + cgn_session_update(f, argc, argv); + return 0; } @@ -219,6 +284,12 @@ int cmd_cgn_op(FILE *f, int argc, char **argv) else if (!strcmp(argv[2], "summary")) cgn_show_summary(f, argc, argv); + else if (!strcmp(argv[2], "zmq")) + cgn_show_zmq(f); + + else if (!strcmp(argv[2], "interface")) + cgn_show_interface(f, argc, argv); + return 0; } diff --git a/src/npf/cgnat/cgn_hash_key.h b/src/npf/cgnat/cgn_hash_key.h new file mode 100644 index 00000000..d0010372 --- /dev/null +++ b/src/npf/cgnat/cgn_hash_key.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef _CGN_HASH_KEY_H_ +#define _CGN_HASH_KEY_H_ + +#include + +/* + * Key for CGNAT 3-tuple hash table + * + * The expired flag is included in the hash key since we do *not* want to + * match on expired sessions. + * + * k_ifindex should be set from cgn_if_key_index() when the key is used to + * create or lookup sessions. This may be different than ifp->if_index. + * + * Note that any op-mode commands that use a key structure to filter sessions + * should set k_ifindex to ifp->if_index since it will be compared with + * cs_ifindex in the sessions. + */ +struct cgn_3tuple_key { + uint32_t k_addr; /* Address (net order) */ + uint32_t k_ifindex; /* Interface or intf group index */ + uint16_t k_port; /* port or id (net order) */ + uint8_t k_ipproto; /* not cgn_proto */ + bool k_expired; /* Expired session */ +} __attribute__((__packed__)); + +/* + * Key for CGNAT 2-tuple hash table + * + * The expired flag is included in the hash key since we do *not* want to + * match on expired sessions. + */ +struct cgn_2tuple_key { + uint32_t k_addr; /* Address (net order) */ + uint16_t k_port; /* port or id (net order) */ + bool k_expired; /* Expired session */ + enum cgn_dir k_dir; +} __attribute__((__packed__)); + +#endif /* _CGN_HASH_KEY_H_ */ diff --git a/src/npf/cgnat/cgn_if.c b/src/npf/cgnat/cgn_if.c index 01c025c7..938b8c10 100644 --- a/src/npf/cgnat/cgn_if.c +++ b/src/npf/cgnat/cgn_if.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -12,21 +12,153 @@ #include #include #include +#include #include "compiler.h" #include "if_var.h" +#include "urcu.h" #include "util.h" #include "pl_node.h" #include "pipeline/nodes/pl_nodes_common.h" #include "if_feat.h" -#include "npf/npf_if.h" +#include "npf/npf_addrgrp.h" #include "npf/cgnat/cgn_if.h" #include "npf/cgnat/cgn_policy.h" +/* + * Per-interface CGNAT data + * + * ci_feat_enabled is used to indicate whether this interface has enabled or + * disabled the cgnat pipeline feature. + * + * ci_refcnt is incremented when a policy is added to the policy list or when + * ci_feat_enabled changes to true. + */ +struct cgn_intf { + struct ifnet *ci_ifp; + struct cds_list_head ci_policy_list; + uint ci_policy_count; + uint32_t ci_index; /* Session key index */ + bool ci_feat_enabled; + rte_atomic16_t ci_refcnt; + struct rcu_head ci_rcu_head; +}; + +/* + * Kernel ifindex is a signed int with range -1 - 0x7FFFFFFF. The dataplane + * ifindex is a uint32_t. CGN_IF_INDEX_BASE is simply set to a large + * round-ish number greater than 0x80000001 such that is does not look like a + * random number if the show output. + * + * ci_index is either set to ifp->if_index or (CGN_IF_INDEX_BASE + vrf ID) + */ +#define CGN_IF_INDEX_BASE 2220000000 + + +/* + * Get index value to be used in session key + */ +uint32_t cgn_if_key_index(const struct ifnet *ifp) +{ + assert(ifp->if_cgn); + if (likely(ifp->if_cgn)) + return ifp->if_cgn->ci_index; + return ifp->if_index; +} + +static struct cgn_intf *cgn_if_get(struct ifnet *ifp) +{ + if (ifp != NULL) + return ifp->if_cgn; + return NULL; +} + +static void cgn_if_set(struct ifnet *ifp, struct cgn_intf *ci) +{ + if (ifp != NULL) + rcu_assign_pointer(ifp->if_cgn, ci); +} + +static void cgn_if_clear(struct ifnet *ifp) +{ + if (ifp != NULL) + rcu_assign_pointer(ifp->if_cgn, NULL); +} + +struct ifnet *cgn_if_get_ifp(struct cgn_intf *ci) +{ + if (likely(ci != NULL)) + return ci->ci_ifp; + return NULL; +} + +/* Take reference on ci */ +static struct cgn_intf *cgn_ci_get(struct cgn_intf *ci) +{ + if (ci) + rte_atomic16_inc(&ci->ci_refcnt); + return ci; +} + +static void cgn_ci_rcu_free(struct rcu_head *head) +{ + struct cgn_intf *ci = caa_container_of(head, struct cgn_intf, + ci_rcu_head); + free(ci); +} + +/* Release reference on ci */ +static void cgn_ci_put(struct cgn_intf *ci) +{ + if (ci && rte_atomic16_dec_and_test(&ci->ci_refcnt)) { + /* Clear cgnat handle on interface */ + cgn_if_clear(ci->ci_ifp); + + rcu_assign_pointer(ci->ci_ifp, NULL); + + /* Schedule rcu free */ + call_rcu(&ci->ci_rcu_head, cgn_ci_rcu_free); + } +} + +struct cds_list_head *cgn_if_get_policy_list(struct ifnet *ifp) +{ + struct cgn_intf *ci = cgn_if_get(ifp); + + if (likely(ci != NULL)) + return &ci->ci_policy_list; + return NULL; +} + +/* + * Called when a CGNAT policy is attached to an interface + */ +static struct cgn_intf *cgn_if_create(struct ifnet *ifp) +{ + struct cgn_intf *ci; + + ci = zmalloc_aligned(sizeof(*ci)); + if (!ci) + return NULL; + + CDS_INIT_LIST_HEAD(&ci->ci_policy_list); + rte_atomic16_set(&ci->ci_refcnt, 0); + + rcu_assign_pointer(ci->ci_ifp, ifp); + + /* ci_index defaults to base + vrf ID */ + ci->ci_index = CGN_IF_INDEX_BASE + ifp->if_vrfid; + + /* Set cgnat handle on interface */ + cgn_if_set(ifp, ci); + + return ci; +} + /* * Enable or disable CGNAT pipeline feature node on an interface */ @@ -46,18 +178,18 @@ void cgn_if_feat_enable(struct ifnet *ifp, bool enable) /* * Insert a policy into list in order or priority, lowest value first. */ -static void -cgn_if_list_insert(struct cgn_policy *cp, struct cds_list_head *head) +static void cgn_if_list_insert(struct cgn_policy *cp, struct cgn_intf *ci) { struct cds_list_head *pos, *newp = &cp->cp_list_node; + struct cds_list_head *policy_list = &ci->ci_policy_list; struct cgn_policy *p; - if (cds_list_empty(head)) { - cds_list_add_rcu(newp, head); - return; + if (cds_list_empty(policy_list)) { + cds_list_add_rcu(newp, policy_list); + goto end; } - for (pos = head->next; pos != head; pos = pos->next) { + for (pos = policy_list->next; pos != policy_list; pos = pos->next) { p = caa_container_of(pos, struct cgn_policy, cp_list_node); @@ -70,18 +202,61 @@ cgn_if_list_insert(struct cgn_policy *cp, struct cds_list_head *head) rcu_assign_pointer(pos->prev->next, newp); rcu_assign_pointer(pos->prev, newp); - return; + goto end; } } - if (pos == head) - cds_list_add_tail_rcu(newp, head); + if (pos == policy_list) + cds_list_add_tail_rcu(newp, policy_list); + +end: + /* Store back pointer to cgn intf struct */ + cp->cp_ci = ci; + + ci->ci_policy_count++; + + /* Take reference on ci while this policy is in the list */ + (void)cgn_ci_get(ci); } /* - * Called via config. May be called via command replay if interface was not - * available at config time. - * - * Note that its possible the npf niif may not be available at this time. + * Remove a policy from an interface list + */ +static void cgn_if_list_remove(struct cgn_policy *cp, struct cgn_intf *ci) +{ + /* Remove policy from the policy list */ + cds_list_del_rcu(&cp->cp_list_node); + + /* Disassociate the node from the list */ + CDS_INIT_LIST_HEAD(&cp->cp_list_node); + + /* Clear back pointer */ + cp->cp_ci = NULL; + + ci->ci_policy_count--; + + /* Assert that list state and list count agree */ + assert(cds_list_empty(&ci->ci_policy_list) == + (ci->ci_policy_count == 0)); + + /* If cgn_intf is empty then disable feature node. */ + if (cds_list_empty(&ci->ci_policy_list)) { + + if (ci->ci_feat_enabled) { + /* Decrement cgnat feature count in interface */ + cgn_if_feat_enable(ci->ci_ifp, false); + ci->ci_feat_enabled = false; + + /* Release reference on ci held by ci_feat_enabled */ + cgn_ci_put(ci); + } + } + + /* Release reference on ci that was held while policy was in list */ + cgn_ci_put(ci); +} + +/* + * Called via config. */ int cgn_if_add_policy(struct ifnet *ifp, struct cgn_policy *cp) { @@ -91,61 +266,32 @@ int cgn_if_add_policy(struct ifnet *ifp, struct cgn_policy *cp) if (cgn_if_find_policy_by_name(ifp, cp->cp_name)) return 0; - ci = npf_if_get_cgn(ifp); + ci = cgn_if_get(ifp); if (!ci) { - ci = zmalloc_aligned(sizeof(*ci)); + ci = cgn_if_create(ifp); if (!ci) - return -1; - - ci->ci_ifp = ifp; - CDS_INIT_LIST_HEAD(&ci->ci_policy_list); + return -ENOMEM; + } - if (npf_if_set_cgn(ifp, ci) < 0) { - free(ci); - return -1; - } + /* Has this interface incremented the feature count? */ + if (!ci->ci_feat_enabled) { + /* Increment cgnat feature count in interface */ cgn_if_feat_enable(ifp, true); + ci->ci_feat_enabled = true; + + /* Take reference on ci */ + (void)cgn_ci_get(ci); } /* Add policy to cgn_intf list */ - cgn_if_list_insert(cp, &ci->ci_policy_list); - cp->cp_ci = ci; + cgn_if_list_insert(cp, ci); + /* Take reference on policy */ cgn_policy_get(cp); return 0; } -/* - * Garbage collect the cgn interface structure. - * - * This is called after one or more policies are removed from the interface - * policy list. This can be either when a policy is unconfigured, or when an - * interface is deleted. - */ -void cgn_if_gc_intf(struct ifnet *ifp, bool if_unset) -{ - struct cgn_intf *ci = npf_if_get_cgn(ifp); - - if (!ci) - return; - - /* - * If cgn_intf is empty ... clear ptr in niif struct, free cgn_intf, - * and disable cgnat pipeline node. - */ - if (cds_list_empty(&ci->ci_policy_list)) { - /* - * Do not lock npf niif if called from npf_if_disable_with_name - * since niif is already locked. - */ - npf_if_clear_cgn(ifp, !if_unset); - free(ci); - - cgn_if_feat_enable(ifp, false); - } -} - int cgn_if_del_policy(struct ifnet *ifp, struct cgn_policy *cp) { struct cgn_intf *ci; @@ -153,13 +299,14 @@ int cgn_if_del_policy(struct ifnet *ifp, struct cgn_policy *cp) if (!cgn_if_find_policy_by_name(ifp, cp->cp_name)) return 0; - ci = npf_if_get_cgn(ifp); + ci = cgn_if_get(ifp); if (!ci) - return -1; + return -ENOENT; - /* Remove cp from cgn_intf list */ - cds_list_del_rcu(&cp->cp_list_node); - cp->cp_ci = NULL; + /* Remove policy from list. */ + cgn_if_list_remove(cp, ci); + + /* Release reference on policy */ cgn_policy_put(cp); return 0; @@ -171,14 +318,15 @@ int cgn_if_del_policy(struct ifnet *ifp, struct cgn_policy *cp) struct cgn_policy * cgn_if_find_policy_by_name(struct ifnet *ifp, const char *name) { - struct cgn_intf *ci; + struct cds_list_head *policy_list; struct cgn_policy *cp; - ci = npf_if_get_cgn(ifp); - if (!ci) + /* Get cgnat policy list from interface */ + policy_list = cgn_if_get_policy_list(ifp); + if (!policy_list) return NULL; - cds_list_for_each_entry_rcu(cp, &ci->ci_policy_list, cp_list_node) { + cds_list_for_each_entry_rcu(cp, policy_list, cp_list_node) { if (!strcmp(cp->cp_name, name)) return cp; } @@ -195,49 +343,86 @@ cgn_if_find_policy_by_name(struct ifnet *ifp, const char *name) struct cgn_policy * cgn_if_find_policy_by_addr(struct ifnet *ifp, uint32_t addr) { - struct cgn_intf *ci; + struct cds_list_head *policy_list; struct cgn_policy *cp; - /* Get cgnat interface structure. */ - ci = npf_if_get_cgn(ifp); - if (!ci) + /* Get cgnat policy list from interface */ + policy_list = cgn_if_get_policy_list(ifp); + if (!policy_list) return NULL; - cds_list_for_each_entry_rcu(cp, &ci->ci_policy_list, cp_list_node) { - if (cp->cp_prefix == (addr & cp->cp_mask)) + cds_list_for_each_entry_rcu(cp, policy_list, cp_list_node) { + /* + * Is subscriber address in the match address-group? + */ + if (npf_addrgrp_lookup_v4_by_handle(cp->cp_match_ag, + addr) == 0) return cp; } - return NULL; } /* - * Called from npf callbacks for DP_EVT_IF_INDEX_SET and DP_EVT_IF_INDEX_UNSET - * events. + * Show one cgnat interface */ -void cgn_nif_index_set(struct ifnet *ifp __unused) +static void cgn_if_show_intf_walk(struct ifnet *ifp, void *arg) { - /* Nothing to do */ + json_writer_t *json = arg; + struct cgn_intf *ci = ifp->if_cgn; + + if (!ci) + return; + + jsonw_start_object(json); + + jsonw_string_field(json, "name", ifp->if_name); + jsonw_uint_field(json, "vrf_id", + dp_vrf_get_external_id(ifp->if_vrfid)); + jsonw_uint_field(json, "ifindex", ifp->if_index); + + jsonw_uint_field(json, "session_index", ci->ci_index); + jsonw_uint_field(json, "policy_count", ci->ci_policy_count); + jsonw_bool_field(json, "feat_enabled", ci->ci_feat_enabled); + jsonw_uint_field(json, "refcnt", rte_atomic16_read(&ci->ci_refcnt)); + + jsonw_end_object(json); } /* - * Called via npf index unset. + * Show all cgnat interfaces */ -void cgn_nif_index_unset(struct ifnet *ifp) +static void cgn_if_show_intf(FILE *f) { - struct cgn_policy *cp, *tmp; - struct cgn_intf *ci; + json_writer_t *json; - ci = npf_if_get_cgn(ifp); - if (!ci) + json = jsonw_new(f); + if (!json) return; + jsonw_pretty(json, true); - /* Delete CGNAT Policies */ - cds_list_for_each_entry_safe(cp, tmp, &ci->ci_policy_list, - cp_list_node) - cgn_policy_if_index_unset(ifp, cp); + jsonw_name(json, "cgnat_interfaces"); + jsonw_start_array(json); + + dp_ifnet_walk(cgn_if_show_intf_walk, json); + + jsonw_end_array(json); + jsonw_destroy(&json); +} - /* If policy list is now empty, then free cgn intf */ - cgn_if_gc_intf(ifp, true); +/* cgn-op show interface */ +void cgn_show_interface(FILE *f, int argc __unused, char **argv __unused) +{ + cgn_if_show_intf(f); +} + +/* + * Callback for dataplane DP_EVT_IF_INDEX_UNSET event. + */ +void cgn_if_disable(struct ifnet *ifp) +{ + if (!cgn_if_get(ifp)) + return; + /* Delete CGNAT Policies */ + cgn_policy_if_disable(ifp); } diff --git a/src/npf/cgnat/cgn_if.h b/src/npf/cgnat/cgn_if.h index 66953e34..b472e738 100644 --- a/src/npf/cgnat/cgn_if.h +++ b/src/npf/cgnat/cgn_if.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -9,35 +9,24 @@ struct ifnet; struct cgn_policy; +struct cgn_intf; -struct cgn_intf { - struct ifnet *ci_ifp; - struct cds_list_head ci_policy_list; -}; - +uint32_t cgn_if_key_index(const struct ifnet *ifp); +struct ifnet *cgn_if_get_ifp(struct cgn_intf *ci); +struct cds_list_head *cgn_if_get_policy_list(struct ifnet *ifp); void cgn_if_feat_enable(struct ifnet *ifp, bool enable); int cgn_if_add_policy(struct ifnet *ifp, struct cgn_policy *cp); int cgn_if_del_policy(struct ifnet *ifp, struct cgn_policy *cp); -/* Garbage collect the cgn interface structure */ -void cgn_if_gc_intf(struct ifnet *ifp, bool if_unset); +void cgn_show_interface(FILE *f, int argc, char **argv); -/* - * Called from npf callbacks for DP_EVT_IF_INDEX_SET and DP_EVT_IF_INDEX_UNSET - * events. - */ -void cgn_nif_index_set(struct ifnet *ifp); -void cgn_nif_index_unset(struct ifnet *ifp); +/* Called from DP_EVT_IF_INDEX_UNSET event */ +void cgn_if_disable(struct ifnet *ifp); struct cgn_policy *cgn_if_find_policy_by_name(struct ifnet *ifp, const char *name); struct cgn_policy *cgn_if_find_policy_by_addr(struct ifnet *ifp, uint32_t addr); -/* npf/npf_if.c */ -struct cgn_intf *npf_if_get_cgn(struct ifnet *ifp); -int npf_if_set_cgn(struct ifnet *ifp, struct cgn_intf *cgn); -int npf_if_clear_cgn(struct ifnet *ifp, bool lock); - -#endif +#endif /* _CGN_IF_H_ */ diff --git a/src/npf/cgnat/cgn_limits.h b/src/npf/cgnat/cgn_limits.h index 59a3baec..4fb6153a 100644 --- a/src/npf/cgnat/cgn_limits.h +++ b/src/npf/cgnat/cgn_limits.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -49,16 +49,17 @@ #define CGN_SESSIONS_MAX CGN_SESSION_HT_MAX /************************************************************************** - * CGNAT Nested Session Table (public src addr and port) + * CGNAT Nested Session Table (destination addr and port) **************************************************************************/ -#define CGN_SESS2_HT_INIT 4 -#define CGN_SESS2_HT_MIN 4 -#define CGN_SESS2_HT_MAX 64 -#define CGN_SESS2_HT_FLAGS (CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING) - /* MUST be less than USHRT_MAX */ -#define CGN_DEST_SESSIONS_MAX CGN_SESS2_HT_MAX +#define CGN_DEST_SESSIONS_INIT 64 +#define CGN_DEST_SESSIONS_MAX 128 + +static_assert(CGN_DEST_SESSIONS_INIT <= CGN_DEST_SESSIONS_MAX, + "cgn dest sessions init too big"); +static_assert(CGN_DEST_SESSIONS_MAX < USHRT_MAX, + "cgn dest session max too big"); /************************************************************************** * CGNAT Source (private address, vrfid) Table diff --git a/src/npf/cgnat/cgn_log.c b/src/npf/cgnat/cgn_log.c index e14e33a4..2b1a2a40 100644 --- a/src/npf/cgnat/cgn_log.c +++ b/src/npf/cgnat/cgn_log.c @@ -1,142 +1,243 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ -/** - * @file cgn_log.c - cgnat logging - */ - -#include -#include -#include - -#include "compiler.h" -#include "if_var.h" -#include "util.h" -#include "soft_ticks.h" -#include "vplane_log.h" +#include "urcu.h" -#include "npf/cgnat/cgn.h" +#include "npf/cgnat/cgn_session.h" #include "npf/cgnat/cgn_log.h" -#include "npf/cgnat/cgn_source.h" -#include "npf/cgnat/cgn.h" -#define ADDR_CHARS 16 +static const struct cgn_log_type_info { + const char *name; +} cgn_log_type_info[CGN_LOG_TYPE_COUNT] = { + [CGN_LOG_TYPE_SESSION] = { + .name = "session", + }, + [CGN_LOG_TYPE_PORT_BLOCK_ALLOCATION] = { + .name = "port-block-allocation", + }, + [CGN_LOG_TYPE_SUBSCRIBER] = { + .name = "subscriber", + }, + [CGN_LOG_TYPE_RES_CONSTRAINT] = { + .name = "resource-constraint", + }, +}; + +const char *cgn_get_log_type_name(enum cgn_log_type type) +{ + if (type >= CGN_LOG_TYPE_COUNT) + return NULL; + return cgn_log_type_info[type].name; +} -/* - * Log subscriber session start - */ -void cgn_log_subscriber_start(uint32_t addr) +int cgn_get_log_type(const char *name, enum cgn_log_type *type) { - char str1[ADDR_CHARS]; + enum cgn_log_type t; + + for (t = 0; t < CGN_LOG_TYPE_COUNT; t++) { + if (strcmp(name, cgn_log_type_info[t].name) == 0) { + *type = t; + return 0; + } + } - RTE_LOG(NOTICE, CGNAT, - "SUBSCRIBER_START subs-addr=%s start-time=%lu\n", - cgn_addrstr(addr, str1, ADDR_CHARS), - cgn_ticks2timestamp(soft_ticks)); + return -ENOENT; } -/* - * Log subscriber session end - */ -void cgn_log_subscriber_end(uint32_t addr, uint64_t start_time, - uint64_t end_time, +extern const struct cgn_log_fns cgn_rte_log_fns, cgn_protobuf_fns; + +static const struct cgn_log_fns *cgn_log_fns[] = { + &cgn_rte_log_fns, + &cgn_protobuf_fns, +}; + +struct cgn_log_active_fns { + const struct cgn_log_fns *cla_fns; + enum cgn_log_type cla_ltype; + struct cgn_log_active_fns *cla_next; + struct rcu_head rcu; +}; + +static struct cgn_log_active_fns *cgn_log_active_fns[CGN_LOG_TYPE_COUNT]; + +#define CGN_LOG_FN_BODY(ltype, ltype_name, fn, ...) \ + { \ + const struct cgn_log_active_fns *fns; \ +\ + for (fns = rcu_dereference(cgn_log_active_fns[ltype]); \ + fns != NULL; \ + fns = rcu_dereference(fns->cla_next)) \ + if (fns->cla_fns->logfn[ltype].ltype_name->cl_ ## fn) \ + fns->cla_fns->logfn[ltype].ltype_name-> \ + cl_ ## fn(__VA_ARGS__); \ + } + +void cgn_log_subscriber_start(uint32_t addr) + CGN_LOG_FN_BODY(CGN_LOG_TYPE_SUBSCRIBER, subscriber, + subscriber_start, addr) + +void cgn_log_subscriber_end(uint32_t addr, + uint64_t start_time, uint64_t end_time, uint64_t pkts_out, uint64_t bytes_out, uint64_t pkts_in, uint64_t bytes_in, uint64_t sessions) -{ - char str1[ADDR_CHARS]; - - RTE_LOG(NOTICE, CGNAT, - "SUBSCRIBER_END subs-addr=%s start-time=%lu " - "end-time=%lu sessions=%lu forw=%lu/%lu back=%lu/%lu\n", - cgn_addrstr(addr, str1, ADDR_CHARS), - cgn_ticks2timestamp(start_time), cgn_ticks2timestamp(end_time), - sessions, pkts_out, bytes_out, pkts_in, bytes_in); -} + CGN_LOG_FN_BODY(CGN_LOG_TYPE_SUBSCRIBER, subscriber, subscriber_end, + addr, start_time, end_time, pkts_out, bytes_out, + pkts_in, bytes_in, sessions) + +void cgn_log_resource_subscriber_mbpu(enum cgn_resource_type type, + uint32_t addr, uint8_t ipproto, + uint16_t count, uint16_t max_count) + CGN_LOG_FN_BODY(CGN_LOG_TYPE_RES_CONSTRAINT, res_constraint, + resource_subscriber_mbpu, type, addr, ipproto, + count, max_count) + +void cgn_log_resource_public_pb(enum cgn_resource_type type, + uint32_t addr, uint16_t blocks_used, + uint16_t nblocks) + CGN_LOG_FN_BODY(CGN_LOG_TYPE_RES_CONSTRAINT, res_constraint, + resource_public_pb, type, addr, blocks_used, nblocks) -/* - * Log subscriber reaching max-blocks-per-user limit. - * - * Logged when CGN_MBU_ENOSPC occurs. Controlled by csp->srp_pb_full. - */ -void cgn_log_subscriber_mbpu_full(uint32_t addr, uint16_t block_count, - uint16_t mbpu) +void cgn_log_pb_alloc(uint32_t pvt_addr, uint32_t pub_addr, + uint16_t port_start, uint16_t port_end, + uint64_t start_time, const char *policy_name, + const char *pool_name) + CGN_LOG_FN_BODY(CGN_LOG_TYPE_PORT_BLOCK_ALLOCATION, port_block_alloc, + pb_alloc, pvt_addr, pub_addr, port_start, port_end, + start_time, policy_name, pool_name) + +void cgn_log_pb_release(uint32_t pvt_addr, uint32_t pub_addr, + uint16_t port_start, uint16_t port_end, + uint64_t start_time, uint64_t end_time, + const char *policy_name, const char *pool_name) + CGN_LOG_FN_BODY(CGN_LOG_TYPE_PORT_BLOCK_ALLOCATION, port_block_alloc, + pb_release, pvt_addr, pub_addr, port_start, port_end, + start_time, end_time, policy_name, pool_name) + +void cgn_log_sess_start(struct cgn_sess2 *s2) + CGN_LOG_FN_BODY(CGN_LOG_TYPE_SESSION, session, sess_start, s2) + +void cgn_log_sess_active(struct cgn_sess2 *s2) + CGN_LOG_FN_BODY(CGN_LOG_TYPE_SESSION, session, sess_active, s2) + +void cgn_log_sess_end(struct cgn_sess2 *s2, uint64_t end_time) + CGN_LOG_FN_BODY(CGN_LOG_TYPE_SESSION, session, sess_end, s2, end_time) + +void cgn_log_sess_clear(const char *desc, uint count, uint64_t clear_time) + CGN_LOG_FN_BODY(CGN_LOG_TYPE_RES_CONSTRAINT, res_constraint, + sess_clear, desc, count, clear_time) + +void cgn_log_resource_subscriber_table(enum cgn_resource_type type, + int32_t count, int32_t max_count) + CGN_LOG_FN_BODY(CGN_LOG_TYPE_RES_CONSTRAINT, res_constraint, + resource_subscriber_table, type, count, max_count) + +void cgn_log_resource_session_table(enum cgn_resource_type type, + int32_t count, int32_t max_count) + CGN_LOG_FN_BODY(CGN_LOG_TYPE_RES_CONSTRAINT, res_constraint, + resource_session_table, type, count, max_count) + +void cgn_log_resource_dest_session_table(enum cgn_resource_type type, + struct cgn_session *cse, + int16_t count, int16_t max_count) + CGN_LOG_FN_BODY(CGN_LOG_TYPE_RES_CONSTRAINT, res_constraint, + resource_dest_session_table, type, cse, count, + max_count) + +void cgn_log_resource_pool(enum cgn_resource_type type, struct nat_pool *np, + int32_t count, int32_t max_count) + CGN_LOG_FN_BODY(CGN_LOG_TYPE_RES_CONSTRAINT, res_constraint, + resource_pool, type, np, count, max_count) + +int cgn_log_enable_handler(enum cgn_log_type ltype, const char *name) { - char str1[ADDR_CHARS]; + unsigned int i; + struct cgn_log_active_fns *afns, *new; - RTE_LOG(NOTICE, CGNAT, - "MBPU_FULL subs-addr=%s blocks=%u mbpu=%u\n", - cgn_addrstr(addr, str1, ADDR_CHARS), block_count, mbpu); -} + if (ltype >= CGN_LOG_TYPE_COUNT) + return -EINVAL; -void cgn_log_subscriber_mbpu_avail(uint32_t addr, uint16_t block_count, - uint16_t mbpu) -{ - char str1[ADDR_CHARS]; + for (i = 0; i < ARRAY_SIZE(cgn_log_fns); i++) + if (strcmp(cgn_log_fns[i]->cl_name, name) == 0) + break; - RTE_LOG(NOTICE, CGNAT, - "MBPU_AVAILABLE subs-addr=%s blocks=%u mbpu=%u\n", - cgn_addrstr(addr, str1, ADDR_CHARS), block_count, mbpu); -} + if (i == ARRAY_SIZE(cgn_log_fns)) + return -ENOENT; -/* - * Log no free blocks on a public address - * - * Logged when CGN_BLK_ENOSPC occurs. Controlled by apm->apm_pb_full - */ -void cgn_log_public_pb_full(uint32_t addr, uint16_t blocks_used, - uint16_t nblocks) -{ - char str1[ADDR_CHARS]; + for (afns = cgn_log_active_fns[ltype]; afns != NULL; + afns = afns->cla_next) { + if (strcmp(afns->cla_fns->cl_name, name) == 0) + return -EEXIST; + } + + new = malloc(sizeof(*new)); + if (new == NULL) + return -ENOMEM; - RTE_LOG(NOTICE, CGNAT, - "PB_FULL pub-addr=%s blocks=%u/%u\n", - cgn_addrstr(addr, str1, ADDR_CHARS), blocks_used, nblocks); + if (cgn_log_fns[i]->cl_init) { + int ret = cgn_log_fns[i]->cl_init(ltype, cgn_log_fns[i]); + + if (ret != 0) { + free(new); + return ret; + } + } + + new->cla_fns = cgn_log_fns[i]; + new->cla_ltype = ltype; + new->cla_next = cgn_log_active_fns[ltype]; + + rcu_assign_pointer(cgn_log_active_fns[ltype], new); + + return 0; } -void cgn_log_public_pb_avail(uint32_t addr, uint16_t blocks_used, - uint16_t nblocks) +static void cgn_log_handler_reclaim(struct rcu_head *rp) { - char str1[ADDR_CHARS]; + struct cgn_log_active_fns *afns = container_of( + rp, struct cgn_log_active_fns, rcu); - RTE_LOG(NOTICE, CGNAT, - "PB_AVAILABLE pub-addr=%s blocks=%u/%u\n", - cgn_addrstr(addr, str1, ADDR_CHARS), blocks_used, nblocks); + free(afns); } -/* - * Log port block allocation and release - */ -void cgn_log_pb_alloc(uint32_t pvt_addr, uint32_t pub_addr, - uint16_t port_start, uint16_t port_end, - uint64_t start_time) +int cgn_log_disable_handler(enum cgn_log_type ltype, const char *name) { - char str1[ADDR_CHARS]; - char str2[ADDR_CHARS]; - - RTE_LOG(NOTICE, CGNAT, - "PB_ALLOCATED subs-addr=%s pub-addr=%s " - "port=%u-%u start-time=%lu\n", - cgn_addrstr(pvt_addr, str1, ADDR_CHARS), - cgn_addrstr(pub_addr, str2, ADDR_CHARS), - port_start, port_end, cgn_ticks2timestamp(start_time)); + struct cgn_log_active_fns **afnsp; + + if (ltype >= CGN_LOG_TYPE_COUNT) + return -EINVAL; + + for (afnsp = &cgn_log_active_fns[ltype]; *afnsp != NULL; + afnsp = &((*afnsp)->cla_next)) { + if (strcmp((*afnsp)->cla_fns->cl_name, name) == 0) { + struct cgn_log_active_fns *old = *afnsp; + rcu_assign_pointer(*afnsp, (*afnsp)->cla_next); + if (old->cla_fns->cl_fini) + old->cla_fns->cl_fini(old->cla_ltype, + old->cla_fns); + + call_rcu(&old->rcu, cgn_log_handler_reclaim); + return 0; + } + } + + return -ENOENT; } -void cgn_log_pb_release(uint32_t pvt_addr, uint32_t pub_addr, - uint16_t port_start, uint16_t port_end, - uint64_t start_time, uint64_t end_time) +void cgn_log_disable_all_handlers(void) { - char str1[ADDR_CHARS]; - char str2[ADDR_CHARS]; - - RTE_LOG(NOTICE, CGNAT, - "PB_RELEASED subs-addr=%s pub-addr=%s port=%u-%u " - "start-time=%lu end-time=%lu\n", - cgn_addrstr(pvt_addr, str1, ADDR_CHARS), - cgn_addrstr(pub_addr, str2, ADDR_CHARS), - port_start, port_end, cgn_ticks2timestamp(start_time), - cgn_ticks2timestamp(end_time)); + enum cgn_log_type ltype; + struct cgn_log_active_fns **afnsp; + + for (ltype = 0; ltype < CGN_LOG_TYPE_COUNT; ltype++) { + afnsp = &cgn_log_active_fns[ltype]; + while (*afnsp != NULL) { + struct cgn_log_active_fns *old = *afnsp; + rcu_assign_pointer(*afnsp, old->cla_next); + call_rcu(&old->rcu, cgn_log_handler_reclaim); + } + } } diff --git a/src/npf/cgnat/cgn_log.h b/src/npf/cgnat/cgn_log.h index f76d550c..1534f595 100644 --- a/src/npf/cgnat/cgn_log.h +++ b/src/npf/cgnat/cgn_log.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -7,43 +7,177 @@ #ifndef _CGN_LOG_H_ #define _CGN_LOG_H_ +struct nat_pool; +struct cgn_sess2; + +enum cgn_resource_type { + CGN_RESOURCE_FULL, + CGN_RESOURCE_AVAILABLE, + CGN_RESOURCE_THRESHOLD +}; + /* subscriber session start */ void cgn_log_subscriber_start(uint32_t addr); -/* subscriber session end */ +/* + * subscriber session end. start_time and end_time are unix epoch + * microseconds. + */ void cgn_log_subscriber_end(uint32_t addr, uint64_t start_time, uint64_t end_time, uint64_t pkts_out, uint64_t bytes_out, uint64_t pkts_in, uint64_t bytes_in, - uint64_t sessions); + uint64_t sessions); /* - * Log subscriber reaching max-blocks-per-user limit. - * - * Logged when CGN_MBU_ENOSPC occurs. Controlled by csp->srp_mbpu_full. + * Logs for subscriber resource limit */ -void cgn_log_subscriber_mbpu_full(uint32_t addr, uint16_t block_count, - uint16_t mbpu); -void cgn_log_subscriber_mbpu_avail(uint32_t addr, uint16_t block_count, - uint16_t mbpu); +void cgn_log_resource_subscriber_mbpu(enum cgn_resource_type type, + uint32_t addr, uint8_t ipproto, + uint16_t count, uint16_t max_count); /* - * Log no free blocks on a public address - * - * Logged when CGN_BLK_ENOSPC occurs. Controlled by apm->apm_pb_full + * Logs for public address blocks resource limits */ -void cgn_log_public_pb_full(uint32_t addr, uint16_t blocks_used, - uint16_t nblocks); -void cgn_log_public_pb_avail(uint32_t addr, uint16_t blocks_used, - uint16_t nblocks); +void cgn_log_resource_public_pb(enum cgn_resource_type type, + uint32_t addr, uint16_t blocks_used, + uint16_t nblocks); /* Port block allocation and release */ void cgn_log_pb_alloc(uint32_t pvt_addr, uint32_t pub_addr, uint16_t port_start, uint16_t port_end, - uint64_t start_time); + uint64_t start_time, + const char *policy_name, const char *pool_name); void cgn_log_pb_release(uint32_t pvt_addr, uint32_t pub_addr, uint16_t port_start, uint16_t port_end, - uint64_t start_time, uint64_t end_time); + uint64_t start_time, uint64_t end_time, + const char *policy_name, const char *pool_name); + +/* Session logging */ +void cgn_log_sess_start(struct cgn_sess2 *s2); +void cgn_log_sess_active(struct cgn_sess2 *s2); +void cgn_log_sess_end(struct cgn_sess2 *s2, uint64_t end_time); +void cgn_log_sess_clear(const char *desc, uint count, uint64_t clear_time); + +/* Resource constraint logging */ +void cgn_log_resource_subscriber_table(enum cgn_resource_type type, + int32_t count, int32_t max_count); +void cgn_log_resource_session_table(enum cgn_resource_type type, + int32_t count, int32_t max_count); +void cgn_log_resource_dest_session_table(enum cgn_resource_type type, + struct cgn_session *cse, + int16_t count, int16_t max_count); +void cgn_log_resource_pool(enum cgn_resource_type type, struct nat_pool *np, + int32_t count, int32_t max_count); + +enum cgn_log_type { + CGN_LOG_TYPE_SESSION, + CGN_LOG_TYPE_PORT_BLOCK_ALLOCATION, + CGN_LOG_TYPE_SUBSCRIBER, + CGN_LOG_TYPE_RES_CONSTRAINT, + + CGN_LOG_TYPE_COUNT /* Must be last */ +}; + +/** + * Get the name associated with the given CGNAT log type + * + * @param type The type of the log to get the name + * @return returns the name of the log type - NULL will be returned + * if an invalid type is passed in. + */ +const char *cgn_get_log_type_name(enum cgn_log_type type); + +/** + * Get the log type associated with a given CGNAT log type name + * + * @param name the name to look up + * @param type a pointer to a type which will be filled in with + * the enum value on success. + * + * @return returns 0 on success and a negative errno on failure + */ +int cgn_get_log_type(const char *name, enum cgn_log_type *type); + +enum cgn_log_format { + CGN_LOG_FORMAT_RTE_LOG, + CGN_LOG_FORMAT_PROTOBUF, + + CGN_LOG_FORMAT_COUNT /* Must be last */ +}; + +/* Enable and disable a named log handler for a give log type */ +int cgn_log_enable_handler(enum cgn_log_type ltype, const char *name); +int cgn_log_disable_handler(enum cgn_log_type ltype, const char *name); + +/* Free resources used by all active handles */ +void cgn_log_disable_all_handlers(void); + +struct cgn_session_log_fns { + void (*cl_sess_start)(struct cgn_sess2 *s2); + void (*cl_sess_active)(struct cgn_sess2 *s2); + void (*cl_sess_end)(struct cgn_sess2 *s2, uint64_t end_time); +}; + +struct cgn_port_block_alloc_log_fns { + void (*cl_pb_alloc)(uint32_t pvt_addr, uint32_t pub_addr, + uint16_t port_start, uint16_t port_end, + uint64_t start_time, const char *policy_name, + const char *pool_name); + void (*cl_pb_release)(uint32_t pvt_addr, uint32_t pub_addr, + uint16_t port_start, uint16_t port_end, + uint64_t start_time, uint64_t end_time, + const char *policy_name, const char *pool_name); +}; + +struct cgn_subscriber_log_fns { + void (*cl_subscriber_start)(uint32_t addr); + void (*cl_subscriber_end)(uint32_t addr, + uint64_t start_time, uint64_t end_time, + uint64_t pkts_out, uint64_t bytes_out, + uint64_t pkts_in, uint64_t bytes_in, + uint64_t sessions); +}; + +struct cgn_res_constraint_log_fns { + void (*cl_resource_subscriber_mbpu)(enum cgn_resource_type type, + uint32_t addr, uint8_t ipproto, + uint16_t count, + uint16_t max_count); + void (*cl_resource_public_pb)(enum cgn_resource_type type, + uint32_t addr, uint16_t blocks_used, + uint16_t nblocks); + void (*cl_sess_clear)(const char *desc, uint count, + uint64_t clear_time); + void (*cl_resource_subscriber_table)(enum cgn_resource_type type, + int32_t count, int32_t max_count); + void (*cl_resource_session_table)(enum cgn_resource_type type, + int32_t count, int32_t max_count); + void (*cl_resource_dest_session_table)(enum cgn_resource_type type, + struct cgn_session *cse, + int16_t count, + int16_t max_count); + void (*cl_resource_apm_table)(enum cgn_resource_type type, + int32_t count, int32_t limit_count); + void (*cl_resource_pool)(enum cgn_resource_type type, + struct nat_pool *np, int32_t count, + int32_t max_count); +}; + +union cgn_log_type_fns { + const struct cgn_session_log_fns *session; + const struct cgn_port_block_alloc_log_fns *port_block_alloc; + const struct cgn_subscriber_log_fns *subscriber; + const struct cgn_res_constraint_log_fns *res_constraint; +}; + +struct cgn_log_fns { + const char *cl_name; + int (*cl_init)(enum cgn_log_type ltype, const struct cgn_log_fns *fns); + void (*cl_fini)(enum cgn_log_type ltype, const struct cgn_log_fns *fns); + + union cgn_log_type_fns logfn[CGN_LOG_TYPE_COUNT]; +}; -#endif +#endif /* _CGN_LOG_H_ */ diff --git a/src/npf/cgnat/cgn_log_protobuf_zmq.c b/src/npf/cgnat/cgn_log_protobuf_zmq.c new file mode 100644 index 00000000..88666026 --- /dev/null +++ b/src/npf/cgnat/cgn_log_protobuf_zmq.c @@ -0,0 +1,1007 @@ +/* + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +/** + * @file cgn_log_protobuf_zmq.c - cgnat logging sending protobufs over zmq + */ + +#include +#include +#include +#include + +#include "compiler.h" +#include "if_var.h" +#include "util.h" +#include "soft_ticks.h" +#include "czmq.h" +#include "zmq_dp.h" +#include "vplane_log.h" + +#include "npf/cgnat/cgn.h" +#include "npf/cgnat/cgn_log.h" +#include "npf/cgnat/cgn_source.h" +#include "npf/cgnat/cgn_sess_state.h" +#include "npf/cgnat/cgn_session.h" +#include "npf/cgnat/cgn_sess2.h" +#include "npf/nat/nat_pool.h" +#include "npf/cgnat/cgn_log_protobuf_zmq.h" + +#include "protobuf/CgnatLogging.pb-c.h" + +struct cgn_zmq { + zsock_t *sock; + void *ul_sock; + struct rcu_head rcu; +}; + +struct cgnat_zmq_ctx { + const char *endpoint; + rte_spinlock_t lock; + rte_atomic32_t hwm; + struct cgn_zmq *sender; + rte_atomic64_t msgs_sent; + rte_atomic64_t init_fails; + rte_atomic64_t send_fails; + rte_atomic64_t no_channel; +}; + +struct cgnat_zmq_ctx cgnat_zmq_ctx[CGN_LOG_TYPE_COUNT] = { + [CGN_LOG_TYPE_SESSION] = { + .endpoint = "ipc:///var/run/vyatta/cgnat-event-session", + .lock = RTE_SPINLOCK_INITIALIZER, + }, + [CGN_LOG_TYPE_PORT_BLOCK_ALLOCATION] = { + .endpoint = + "ipc:///var/run/vyatta/cgnat-event-port-block-allocation", + .lock = RTE_SPINLOCK_INITIALIZER, + }, + [CGN_LOG_TYPE_SUBSCRIBER] = { + .endpoint = "ipc:///var/run/vyatta/cgnat-event-subscriber", + .lock = RTE_SPINLOCK_INITIALIZER, + }, + [CGN_LOG_TYPE_RES_CONSTRAINT] = { + .endpoint = + "ipc:///var/run/vyatta/cgnat-event-resource-constraint", + .lock = RTE_SPINLOCK_INITIALIZER, + }, +}; + +void cgn_show_zmq(FILE *f) +{ + enum cgn_log_type ltype; + uint64_t count; + uint32_t count32; + const char *ltype_name; + struct cgn_zmq *sender; + struct cgnat_zmq_ctx *zmqctx; + + json_writer_t *json; + + json = jsonw_new(f); + if (!json) + return; + + jsonw_name(json, "zmq"); + jsonw_start_object(json); + + jsonw_name(json, "statistics"); + jsonw_start_array(json); + + for (ltype = 0; ltype < CGN_LOG_TYPE_COUNT; ltype++) { + + jsonw_start_object(json); + + ltype_name = cgn_get_log_type_name(ltype); + jsonw_string_field(json, "logtype", + ltype_name ? ltype_name : "unknown"); + + count = rte_atomic64_read(&cgnat_zmq_ctx[ltype].msgs_sent); + jsonw_uint_field(json, "msgs_sent", count); + + count = rte_atomic64_read(&cgnat_zmq_ctx[ltype].init_fails); + jsonw_uint_field(json, "init_fails", count); + + count = rte_atomic64_read(&cgnat_zmq_ctx[ltype].send_fails); + jsonw_uint_field(json, "send_fails", count); + + count = rte_atomic64_read(&cgnat_zmq_ctx[ltype].no_channel); + jsonw_uint_field(json, "no_channel", count); + + jsonw_end_object(json); + } + + jsonw_end_array(json); + + jsonw_name(json, "config"); + jsonw_start_array(json); + + for (ltype = 0; ltype < CGN_LOG_TYPE_COUNT; ltype++) { + + jsonw_start_object(json); + + ltype_name = cgn_get_log_type_name(ltype); + jsonw_string_field(json, "logtype", + ltype_name ? ltype_name : "unknown"); + + zmqctx = &cgnat_zmq_ctx[ltype]; + + count32 = rte_atomic32_read(&zmqctx->hwm); + jsonw_uint_field(json, "configured_hwm", count32); + + sender = rcu_dereference(zmqctx->sender); + if (sender != NULL && sender->sock != NULL) { + int act_snd_hwm = zsock_sndhwm(sender->sock); + int act_rcv_hwm = zsock_rcvhwm(sender->sock); + + jsonw_uint_field(json, "actual_snd_hwm", act_snd_hwm); + jsonw_uint_field(json, "actual_rcv_hwm", act_rcv_hwm); + } + + jsonw_end_object(json); + } + + jsonw_end_array(json); + + jsonw_end_object(json); + jsonw_destroy(&json); +} + +static void cl_reclaim_zmqctx(struct rcu_head *rp) +{ + struct cgn_zmq *cgn_zmq = container_of(rp, struct cgn_zmq, rcu); + + free(cgn_zmq); +} + +/* + * Function called when zmq protobuf logging is enabled for a log type + */ +static int cl_zmq_init(enum cgn_log_type ltype, + const struct cgn_log_fns *fns __unused) +{ + struct cgnat_zmq_ctx *zmqctx; + struct cgn_zmq *sender; + int ret; + + if (ltype >= CGN_LOG_TYPE_COUNT) + return -EINVAL; + + zmqctx = &cgnat_zmq_ctx[ltype]; + + sender = rcu_dereference(zmqctx->sender); + if (sender != NULL) + return -EEXIST; + + sender = calloc(sizeof(*sender), 1); + + if (sender == NULL) + return -ENOMEM; + + rte_spinlock_lock(&zmqctx->lock); + + sender->sock = zsock_new(ZMQ_PUSH); + if (sender->sock == NULL) { + RTE_LOG(ERR, CGNAT, "%s: zsock_new failed (%s)\n", + __func__, strerror(errno)); + free(sender); + rte_spinlock_unlock(&zmqctx->lock); + return -ECONNREFUSED; + } + + /* NB: HWMs need set before zsock_bind() */ + zsock_set_sndhwm(sender->sock, rte_atomic32_read(&zmqctx->hwm)); + zsock_set_rcvhwm(sender->sock, rte_atomic32_read(&zmqctx->hwm)); + + ret = zsock_bind(sender->sock, "%s", zmqctx->endpoint); + + if (ret < 0) { + RTE_LOG(ERR, CGNAT, "%s: zsock_bind(%s) failed (%s)\n", + __func__, zmqctx->endpoint, strerror(errno)); + zsock_destroy(&(sender->sock)); + free(sender); + rte_spinlock_unlock(&zmqctx->lock); + return -ECONNREFUSED; + } + + sender->ul_sock = zsock_resolve(sender->sock); + + if (sender->ul_sock == NULL) { + RTE_LOG(ERR, CGNAT, "%s: zsock_resolve failed for %s (%s)\n", + __func__, zmqctx->endpoint, strerror(errno)); + zsock_destroy(&(sender->sock)); + free(sender); + rte_spinlock_unlock(&zmqctx->lock); + return -ENOTSOCK; + } + + rcu_assign_pointer(zmqctx->sender, sender); + + rte_spinlock_unlock(&zmqctx->lock); + return 0; +} + +/* + * Function called when zmw protobuf logging is disabled for a log type + */ +static void cl_zmq_fini(enum cgn_log_type ltype, + const struct cgn_log_fns *fns __unused) +{ + struct cgnat_zmq_ctx *zmqctx; + struct cgn_zmq *old_sender; + + if (ltype >= CGN_LOG_TYPE_COUNT) + return; + + zmqctx = &cgnat_zmq_ctx[ltype]; + + rte_spinlock_lock(&zmqctx->lock); + + old_sender = zmqctx->sender; + rcu_assign_pointer(zmqctx->sender, NULL); + + if (old_sender != NULL) { + zsock_destroy(&(old_sender->sock)); + call_rcu(&old_sender->rcu, cl_reclaim_zmqctx); + } + + rte_spinlock_unlock(&zmqctx->lock); +} + +int cl_zmq_set_hwm(enum cgn_log_type ltype, int32_t hwm) +{ + struct cgnat_zmq_ctx *zmqctx; + + if (ltype >= CGN_LOG_TYPE_COUNT) + return -EINVAL; + + zmqctx = &cgnat_zmq_ctx[ltype]; + + rte_atomic32_set(&(zmqctx->hwm), hwm); + + return 0; +} + +/* + * Function back-called by czmq library to free the allocated buffer + * that has just been sent out. + */ +static void cl_protobuf_msg_free(void *data, void *hint __unused) +{ + free(data); +} + +/* + * Send a serialised protobuf message down the ZMQ channel associated with + * the log type. + * + * Note: on return the buffer passed in will be freed, even if there is + * an error. + */ +static int cl_protobuf_zmq_send(enum cgn_log_type ltype, void *buf, + unsigned int buflen) +{ + int rv; + zmq_msg_t zpb; + struct cgnat_zmq_ctx *zmqctx = &cgnat_zmq_ctx[ltype]; + struct cgn_zmq *sender = rcu_dereference(zmqctx->sender); + + if (sender == NULL) { /* using protobufs not currently enabled */ + rte_atomic64_inc(&zmqctx->no_channel); + cl_protobuf_msg_free(buf, NULL); + if (net_ratelimit()) + RTE_LOG(DEBUG, CGNAT, "%s: channel no set-up", + __func__); + return 0; + } + + rte_spinlock_lock(&zmqctx->lock); + + /* send the protobuf (without copying) */ + + rv = zmq_msg_init_data(&zpb, buf, buflen, cl_protobuf_msg_free, NULL); + if (unlikely(rv < 0)) { + rte_atomic64_inc(&zmqctx->init_fails); + cl_protobuf_msg_free(buf, NULL); + if (net_ratelimit()) + RTE_LOG(DEBUG, CGNAT, "%s: zmq_msg_init_data failure " + "(%s)\n", __func__, strerror(errno)); + rte_spinlock_unlock(&zmqctx->lock); + return -errno; + } + + rv = zmq_msg_send(&zpb, sender->ul_sock, ZMQ_DONTWAIT); + if (unlikely(rv < 0)) { + rte_atomic64_inc(&zmqctx->send_fails); + zmq_msg_close(&zpb); + if (net_ratelimit()) + RTE_LOG(DEBUG, CGNAT, "%s: zmq_send failure (%s)\n", + __func__, strerror(errno)); + rte_spinlock_unlock(&zmqctx->lock); + return -errno; + } + + rte_atomic64_inc(&zmqctx->msgs_sent); + zmq_msg_close(&zpb); + + rte_spinlock_unlock(&zmqctx->lock); + return 0; +} + +static inline void microsecs_to_timestamp(uint64_t micro_secs, Timestamp *ts) +{ + ts->has_seconds = 1; + ts->seconds = micro_secs / 1000000; + + ts->has_nanos = 1; + ts->nanos = (micro_secs - (ts->seconds * 1000000)) * 1000; +} + +/* + * Send a protobuf structure down the subscriber ZMQ channel + */ +static int cl_protobuf_log_send_subscriber(SubscriberLog *msg) +{ + unsigned int buflen = subscriber_log__get_packed_size(msg); + void *buf = malloc(buflen); + + if (unlikely(buf == NULL)) { + if (net_ratelimit()) + RTE_LOG(ERR, CGNAT, "%s: buffer allocation\n", + __func__); + return -ENOMEM; + } + + subscriber_log__pack(msg, buf); + + return cl_protobuf_zmq_send(CGN_LOG_TYPE_SUBSCRIBER, buf, buflen); +} + +/* + * Log subscriber session start - SUBSCRIBER_EVENT_START + */ +static void cl_protobuf_subscriber_start(uint32_t addr) +{ + SubscriberLog msg = SUBSCRIBER_LOG__INIT; + Timestamp start_ts = TIMESTAMP__INIT; + + msg.has_eventtype = 1; + msg.eventtype = SUBSCRIBER_EVENT_TYPE__SUBSCRIBER_EVENT_START; + + msg.has_subscriberaddress = 1; + msg.subscriberaddress = addr; + + microsecs_to_timestamp(unix_epoch_us, &start_ts); + msg.starttimestamp = &start_ts; + + cl_protobuf_log_send_subscriber(&msg); +} + +/* + * Log subscriber session end - SUBSCRIBER_EVENT_END + */ +static void cl_protobuf_subscriber_end(uint32_t addr, uint64_t start_time, + uint64_t end_time, uint64_t pkts_out, + uint64_t bytes_out, uint64_t pkts_in, + uint64_t bytes_in, uint64_t sessions) +{ + SubscriberLog msg = SUBSCRIBER_LOG__INIT; + Timestamp start_ts = TIMESTAMP__INIT; + Timestamp end_ts = TIMESTAMP__INIT; + + msg.has_eventtype = 1; + msg.eventtype = SUBSCRIBER_EVENT_TYPE__SUBSCRIBER_EVENT_END; + + msg.has_subscriberaddress = 1; + msg.subscriberaddress = addr; + + msg.has_sessioncount = 1; + msg.sessioncount = sessions; + + msg.has_inbytes = 1; + msg.inbytes = bytes_in; + msg.has_outbytes = 1; + msg.outbytes = bytes_out; + msg.has_inpackets = 1; + msg.inpackets = pkts_in; + msg.has_outpackets = 1; + msg.outpackets = pkts_out; + + microsecs_to_timestamp(start_time, &start_ts); + msg.starttimestamp = &start_ts; + microsecs_to_timestamp(end_time, &end_ts); + msg.endtimestamp = &end_ts; + + cl_protobuf_log_send_subscriber(&msg); +} + +/* + * Send a protobuf structure down the port-block-allocation ZMQ channel + */ +static int cl_protobuf_log_send_pba(PortAllocationLog *msg) +{ + unsigned int buflen = port_allocation_log__get_packed_size(msg); + void *buf = malloc(buflen); + + if (unlikely(buf == NULL)) { + if (net_ratelimit()) + RTE_LOG(ERR, CGNAT, "%s: buffer allocation\n", + __func__); + return -ENOMEM; + } + + port_allocation_log__pack(msg, buf); + + return cl_protobuf_zmq_send(CGN_LOG_TYPE_PORT_BLOCK_ALLOCATION, buf, + buflen); +} + +/* + * Log port block allocation - PB_EVENT_ALLOCATED + */ +static void cl_protobuf_pb_alloc(uint32_t pvt_addr, uint32_t pub_addr, + uint16_t port_start, uint16_t port_end, + uint64_t start_time, const char *policy_name, + const char *pool_name) +{ + PortAllocationLog msg = PORT_ALLOCATION_LOG__INIT; + Timestamp start_ts = TIMESTAMP__INIT; + + msg.has_eventtype = 1; + msg.eventtype = PORT_ALLOCATION_EVENT_TYPE__PB_EVENT_ALLOCATED; + + msg.has_subscriberaddress = 1; + msg.subscriberaddress = pvt_addr; + + if (policy_name) + msg.policyname = (char *)policy_name; + + msg.has_natallocatedaddress = 1; + msg.natallocatedaddress = pub_addr; + + if (pool_name) + msg.poolname = (char *)pool_name; + + msg.has_startportnumber = 1; + msg.startportnumber = port_start; + + msg.has_endportnumber = 1; + msg.endportnumber = port_end; + + microsecs_to_timestamp(start_time, &start_ts); + msg.starttimestamp = &start_ts; + + cl_protobuf_log_send_pba(&msg); +} + +/* + * Log port block release - PB_EVENT_RELEASED + */ +static void cl_protobuf_pb_release(uint32_t pvt_addr, uint32_t pub_addr, + uint16_t port_start, uint16_t port_end, + uint64_t start_time, uint64_t end_time, + const char *policy_name, + const char *pool_name) +{ + PortAllocationLog msg = PORT_ALLOCATION_LOG__INIT; + Timestamp start_ts = TIMESTAMP__INIT; + Timestamp end_ts = TIMESTAMP__INIT; + + msg.has_eventtype = 1; + msg.eventtype = PORT_ALLOCATION_EVENT_TYPE__PB_EVENT_RELEASED; + + msg.has_subscriberaddress = 1; + msg.subscriberaddress = pvt_addr; + + if (policy_name) + msg.policyname = (char *)policy_name; + + msg.has_natallocatedaddress = 1; + msg.natallocatedaddress = pub_addr; + + if (pool_name) + msg.poolname = (char *)pool_name; + + msg.has_startportnumber = 1; + msg.startportnumber = port_start; + + msg.has_endportnumber = 1; + msg.endportnumber = port_end; + + microsecs_to_timestamp(start_time, &start_ts); + msg.starttimestamp = &start_ts; + microsecs_to_timestamp(end_time, &end_ts); + msg.endtimestamp = &end_ts; + + cl_protobuf_log_send_pba(&msg); +} + +static SessionState sess_state_to_pb(uint8_t state) +{ + switch (state) { + case CGN_TCP_STATE_NONE: + return SESSION_STATE__SESSION_NONE; + case CGN_TCP_STATE_CLOSED: + return SESSION_STATE__SESSION_CLOSED; + case CGN_TCP_STATE_INIT: + return SESSION_STATE__SESSION_OPENING; + case CGN_TCP_STATE_ESTABLISHED: + return SESSION_STATE__SESSION_ESTABLISHED; + case CGN_TCP_STATE_TRANS: + return SESSION_STATE__SESSION_TRANSITORY; + case CGN_TCP_STATE_C_FIN_RCV: + return SESSION_STATE__SESSION_C_FIN_RCV; + case CGN_TCP_STATE_S_FIN_RCV: + return SESSION_STATE__SESSION_S_FIN_RCV; + case CGN_TCP_STATE_CS_FIN_RCV: + return SESSION_STATE__SESSION_CS_FIN_RCV; + }; + + return SESSION_STATE__SESSION_OTHER; +} + +static void cl_protobuf_sess_common(struct cgn_sess2 *s2, SessionLog *msg) +{ + struct cgn_session *cse = cgn_sess2_session(s2); + struct ifnet *ifp = dp_ifnet_byifindex(cgn_session_ifindex(cse)); + struct cgn_state *state = cgn_sess2_state(s2); + uint16_t port; + + msg->has_sessionid = 1; + msg->sessionid = cgn_session_id(cse); + + msg->has_subsessionid = 1; + msg->subsessionid = cgn_sess2_id(s2); + + if (ifp) + msg->ifname = ifp->if_name; + + msg->has_protocol = 1; + msg->protocol = cgn_sess2_ipproto(s2); + + msg->has_direction = 1; + if (cgn_sess2_dir(s2) == CGN_DIR_IN) + msg->direction = DIRECTION__DIRECTION_IN; + else + msg->direction = DIRECTION__DIRECTION_OUT; + + msg->has_subscriberaddress = 1; + msg->subscriberaddress = ntohl(cgn_session_forw_addr(cse)); + + msg->has_subscriberport = 1; + port = cgn_session_forw_id(cse); + msg->subscriberport = ntohs(port); + + msg->has_natallocatedaddress = 1; + msg->natallocatedaddress = ntohl(cgn_session_back_addr(cse)); + + msg->has_natallocatedport = 1; + port = cgn_session_back_id(cse); + msg->natallocatedport = ntohs(port); + + msg->has_destinationaddress = 1; + msg->destinationaddress = ntohl(cgn_sess2_addr(s2)); + + msg->has_destinationport = 1; + port = cgn_sess2_port(s2); + msg->destinationport = ntohs(port); + + msg->has_state = 1; + msg->state = sess_state_to_pb(state->st_state); + + if (state->st_proto == NAT_PROTO_TCP) { + msg->has_statehistory = 1; + msg->statehistory = state->st_hist; + } + + /* + * Note that the session start time is stored in microseconds, + * rather than milliseconds, as used in rtt calculations. + */ + microsecs_to_timestamp(cgn_sess2_start_time(s2), msg->starttimestamp); +} + +/* + * Send a protobuf structure down the session ZMQ channel + */ +static int cl_protobuf_log_send_session(SessionLog *msg) +{ + unsigned int buflen = session_log__get_packed_size(msg); + void *buf = malloc(buflen); + + if (unlikely(buf == NULL)) { + if (net_ratelimit()) + RTE_LOG(ERR, CGNAT, "%s: buffer allocation\n", + __func__); + return -ENOMEM; + } + + session_log__pack(msg, buf); + + return cl_protobuf_zmq_send(CGN_LOG_TYPE_SESSION, buf, buflen); +} + +/* + * Log session creation - SESSION_EVENT_CREATE + */ +static void cl_protobuf_sess_start(struct cgn_sess2 *s2) +{ + SessionLog msg = SESSION_LOG__INIT; + Timestamp start_ts = TIMESTAMP__INIT; + + msg.starttimestamp = &start_ts; + + cl_protobuf_sess_common(s2, &msg); + + msg.has_eventtype = 1; + msg.eventtype = SESSION_EVENT_TYPE__SESSION_EVENT_CREATE; + + cl_protobuf_log_send_session(&msg); +} + +static void cl_protobuf_sess_active_and_end(struct cgn_sess2 *s2, + SessionEventType eventtype, + uint64_t time2) +{ + SessionLog msg = SESSION_LOG__INIT; + Timestamp start_ts = TIMESTAMP__INIT; + Timestamp cur_ts = TIMESTAMP__INIT; + struct cgn_state *state = cgn_sess2_state(s2); + + msg.starttimestamp = &start_ts; + + cl_protobuf_sess_common(s2, &msg); + + msg.has_eventtype = 1; + msg.eventtype = eventtype; + + msg.has_inbytes = 1; + msg.inbytes = cgn_sess2_bytes_in_tot(s2); + msg.has_outbytes = 1; + msg.outbytes = cgn_sess2_bytes_out_tot(s2); + msg.has_inpackets = 1; + msg.inpackets = cgn_sess2_pkts_in_tot(s2); + msg.has_outpackets = 1; + msg.outpackets = cgn_sess2_pkts_out_tot(s2); + + if (state->st_proto == NAT_PROTO_TCP) { + msg.has_networkroundtriptime = 1; + msg.networkroundtriptime = state->st_int_rtt; + msg.has_internetroundtriptime = 1; + msg.internetroundtriptime = state->st_ext_rtt; + } + + microsecs_to_timestamp(time2, &cur_ts); + msg.currenttimestamp = &cur_ts; + + cl_protobuf_log_send_session(&msg); +} + +/* + * Periodic logging - SESSION_EVENT_ACTIVE + */ +static void cl_protobuf_sess_active(struct cgn_sess2 *s2) +{ + cl_protobuf_sess_active_and_end(s2, + SESSION_EVENT_TYPE__SESSION_EVENT_ACTIVE, unix_epoch_us); +} + +/* + * Log 5-tuple session end - SESSION_EVENT_END + */ +static void cl_protobuf_sess_end(struct cgn_sess2 *s2, uint64_t end_time) +{ + cl_protobuf_sess_active_and_end(s2, + SESSION_EVENT_TYPE__SESSION_EVENT_END, end_time); +} + +static ConstraintLimit constraint_limit_to_pb(enum cgn_resource_type type) +{ + switch (type) { + case CGN_RESOURCE_FULL: + return CONSTRAINT_LIMIT__CONSTRAINT_LIMIT_FULL; + case CGN_RESOURCE_AVAILABLE: + return CONSTRAINT_LIMIT__CONSTRAINT_LIMIT_AVAILABLE; + case CGN_RESOURCE_THRESHOLD: + return CONSTRAINT_LIMIT__CONSTRAINT_LIMIT_THRESHOLD; + }; + + return CONSTRAINT_LIMIT__CONSTRAINT_LIMIT_UNKNOWN; +} + +static void cl_protobuf_resource_common(enum cgn_resource_type type, + ConstraintEventType eventtype, + ConstraintLog *msg) +{ + msg->has_eventtype = 1; + msg->eventtype = eventtype; + + msg->has_constraintlimit = 1; + msg->constraintlimit = constraint_limit_to_pb(type); + + microsecs_to_timestamp(unix_epoch_us, msg->timestamp); +} + +/* + * Send a protobuf structure down the resource constraint ZMQ channel + */ +static int cl_protobuf_log_send_res_constraint(ConstraintLog *msg) +{ + unsigned int buflen = constraint_log__get_packed_size(msg); + void *buf = malloc(buflen); + + if (unlikely(buf == NULL)) { + if (net_ratelimit()) + RTE_LOG(ERR, CGNAT, "%s: buffer allocation\n", + __func__); + return -ENOMEM; + } + + constraint_log__pack(msg, buf); + + return cl_protobuf_zmq_send(CGN_LOG_TYPE_RES_CONSTRAINT, buf, buflen); +} + +static void cl_protobuf_resource_common_count_and_max( + enum cgn_resource_type resource_type, ConstraintEventType eventtype, + int32_t count, int32_t max_count) +{ + ConstraintLog msg = CONSTRAINT_LOG__INIT; + Timestamp cur_ts = TIMESTAMP__INIT; + + msg.timestamp = &cur_ts; + + cl_protobuf_resource_common(resource_type, eventtype, &msg); + + msg.has_count = 1; + msg.count = count; + + msg.has_maxcount = 1; + msg.maxcount = max_count; + + cl_protobuf_log_send_res_constraint(&msg); +} + +/* + * Log CONSTRAINT_EVENT_SUBSCRIBER_TABLE + */ +static void cl_protobuf_resource_subscriber_table(enum cgn_resource_type type, + int32_t count, + int32_t max_count) +{ + cl_protobuf_resource_common_count_and_max(type, + CONSTRAINT_EVENT_TYPE__CONSTRAINT_EVENT_SUBSCRIBER_TABLE, + count, max_count); +} + +/* + * Log CONSTRAINT_EVENT_SESSION_TABLE + */ +static void cl_protobuf_resource_session_table(enum cgn_resource_type type, + int32_t count, int32_t max_count) +{ + cl_protobuf_resource_common_count_and_max(type, + CONSTRAINT_EVENT_TYPE__CONSTRAINT_EVENT_SESSION_TABLE, + count, max_count); +} + +/* + * Logs CONSTRAINT_EVENT_MAPPING_TABLE + */ +static void cl_protobuf_resource_apm_table(enum cgn_resource_type type, + int32_t count, int32_t limit_count) +{ + cl_protobuf_resource_common_count_and_max(type, + CONSTRAINT_EVENT_TYPE__CONSTRAINT_EVENT_MAPPING_TABLE, + count, limit_count); +} + +/* + * Log CONSTRAINT_EVENT_DEST_SESSIONS + */ +static void cl_protobuf_resource_dest_session_table(enum cgn_resource_type type, + struct cgn_session *cse, + int16_t count, + int16_t max_count) +{ + ConstraintLog msg = CONSTRAINT_LOG__INIT; + Timestamp cur_ts = TIMESTAMP__INIT; + struct ifnet *ifp = dp_ifnet_byifindex(cgn_session_ifindex(cse)); + uint16_t port; + + msg.timestamp = &cur_ts; + + cl_protobuf_resource_common(type, + CONSTRAINT_EVENT_TYPE__CONSTRAINT_EVENT_DEST_SESSIONS, &msg); + + msg.has_count = 1; + msg.count = count; + + msg.has_maxcount = 1; + msg.maxcount = max_count; + + msg.has_sessionid = 1; + msg.sessionid = cgn_session_id(cse); + + if (ifp) + msg.ifname = ifp->if_name; + + msg.has_protocol = 1; + msg.protocol = cgn_session_ipproto(cse); + + msg.has_subscriberaddress = 1; + msg.subscriberaddress = ntohl(cgn_session_forw_addr(cse)); + + msg.has_subscriberport = 1; + port = cgn_session_forw_id(cse); + msg.subscriberport = ntohs(port); + + msg.has_natallocatedaddress = 1; + msg.natallocatedaddress = ntohl(cgn_session_back_addr(cse)); + + msg.has_natallocatedport = 1; + port = cgn_session_back_id(cse); + msg.natallocatedport = ntohs(port); + + cl_protobuf_log_send_res_constraint(&msg); +} + +/* + * Logs for subscriber resource limits - CONSTRAINT_EVENT_BLOCKS_PER_SUBSCRIBER + */ +static void +cl_protobuf_resource_subscriber_mbpu(enum cgn_resource_type type, uint32_t addr, + uint8_t ipproto, uint16_t count, + uint16_t max_count) +{ + ConstraintLog msg = CONSTRAINT_LOG__INIT; + Timestamp cur_ts = TIMESTAMP__INIT; + + msg.timestamp = &cur_ts; + + cl_protobuf_resource_common(type, + CONSTRAINT_EVENT_TYPE__CONSTRAINT_EVENT_BLOCKS_PER_SUBSCRIBER, + &msg); + + msg.has_count = 1; + msg.count = count; + + msg.has_maxcount = 1; + msg.maxcount = max_count; + + msg.has_subscriberaddress = 1; + msg.subscriberaddress = addr; + + /* ipproto will be 0 for 'other' (i.e. non-TCP and non-UDP) */ + msg.has_protocol = 1; + msg.protocol = ipproto; + + cl_protobuf_log_send_res_constraint(&msg); +} + +/* + * Logs for public address blocks resource limits - + * CONSTRAINT_EVENT_BLOCKS_FOR_NAT_ALLOC_ADDR + */ +static void cl_protobuf_resource_public_pb(enum cgn_resource_type type, + uint32_t addr, uint16_t blocks_used, + uint16_t nblocks) +{ + ConstraintLog msg = CONSTRAINT_LOG__INIT; + Timestamp cur_ts = TIMESTAMP__INIT; + + msg.timestamp = &cur_ts; + + cl_protobuf_resource_common(type, + CONSTRAINT_EVENT_TYPE__CONSTRAINT_EVENT_BLOCKS_FOR_NAT_ALLOC_ADDR, + &msg); + + msg.has_count = 1; + msg.count = blocks_used; + + msg.has_maxcount = 1; + msg.maxcount = nblocks; + + msg.has_natallocatedaddress = 1; + msg.natallocatedaddress = addr; + + cl_protobuf_log_send_res_constraint(&msg); +} + +/* + * Logs CONSTRAINT_EVENT_NAT_POOL + */ +static void cl_protobuf_resource_pool(enum cgn_resource_type type, + struct nat_pool *np, int32_t count, + int32_t max_count) +{ + ConstraintLog msg = CONSTRAINT_LOG__INIT; + Timestamp cur_ts = TIMESTAMP__INIT; + + msg.timestamp = &cur_ts; + + cl_protobuf_resource_common(type, + CONSTRAINT_EVENT_TYPE__CONSTRAINT_EVENT_NAT_POOL, &msg); + + msg.has_count = 1; + msg.count = count; + + msg.has_maxcount = 1; + msg.maxcount = max_count; + + msg.poolname = np->np_name; + + cl_protobuf_log_send_res_constraint(&msg); +} + +/* + * Log a session clear event (CONSTRAINT_EVENT_SESSION_CLEAR). This is done + * when one or more 2-tuple sessions are cleared manually, either from a clear + * command or a change in config (e.g. nat pool block size changes). This + * log message replaces the multiple SESSION_EVENT_END log messages in order + * to avoid scale issues. + */ +static void +cl_protobuf_sess_clear(const char *desc, uint count, uint64_t clear_time) +{ + ConstraintLog msg = CONSTRAINT_LOG__INIT; + Timestamp cur_ts = TIMESTAMP__INIT; + + msg.has_eventtype = 1; + msg.eventtype = CONSTRAINT_EVENT_TYPE__CONSTRAINT_EVENT_SESSION_CLEAR; + + msg.timestamp = &cur_ts; + microsecs_to_timestamp(clear_time, msg.timestamp); + + msg.has_count = 1; + msg.count = count; + + msg.desc = (char *)desc; + + cl_protobuf_log_send_res_constraint(&msg); +} + +const struct cgn_session_log_fns cgn_session_protobuf_fns = { + .cl_sess_start = cl_protobuf_sess_start, + .cl_sess_active = cl_protobuf_sess_active, + .cl_sess_end = cl_protobuf_sess_end, +}; + +const struct cgn_port_block_alloc_log_fns cgn_port_block_alloc_protobuf_fns = { + .cl_pb_alloc = cl_protobuf_pb_alloc, + .cl_pb_release = cl_protobuf_pb_release, +}; + +const struct cgn_subscriber_log_fns cgn_subscriber_protobuf_fns = { + .cl_subscriber_start = cl_protobuf_subscriber_start, + .cl_subscriber_end = cl_protobuf_subscriber_end, +}; + +const struct cgn_res_constraint_log_fns cgn_res_constraint_protobuf_fns = { + .cl_resource_subscriber_mbpu = cl_protobuf_resource_subscriber_mbpu, + .cl_resource_public_pb = cl_protobuf_resource_public_pb, + .cl_sess_clear = cl_protobuf_sess_clear, + .cl_resource_subscriber_table = cl_protobuf_resource_subscriber_table, + .cl_resource_session_table = cl_protobuf_resource_session_table, + .cl_resource_dest_session_table = + cl_protobuf_resource_dest_session_table, + .cl_resource_apm_table = cl_protobuf_resource_apm_table, + .cl_resource_pool = cl_protobuf_resource_pool, +}; + +const struct cgn_log_fns cgn_protobuf_fns = { + .cl_name = "protobuf", + .cl_init = cl_zmq_init, + .cl_fini = cl_zmq_fini, + .logfn[CGN_LOG_TYPE_SESSION].session = + &cgn_session_protobuf_fns, + .logfn[CGN_LOG_TYPE_PORT_BLOCK_ALLOCATION].port_block_alloc = + &cgn_port_block_alloc_protobuf_fns, + .logfn[CGN_LOG_TYPE_SUBSCRIBER].subscriber = + &cgn_subscriber_protobuf_fns, + .logfn[CGN_LOG_TYPE_RES_CONSTRAINT].res_constraint = + &cgn_res_constraint_protobuf_fns, +}; diff --git a/src/npf/cgnat/cgn_log_protobuf_zmq.h b/src/npf/cgnat/cgn_log_protobuf_zmq.h new file mode 100644 index 00000000..f36fb1cd --- /dev/null +++ b/src/npf/cgnat/cgn_log_protobuf_zmq.h @@ -0,0 +1,15 @@ +/* + * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef _CGN_LOG_PROTOBUF_ZMQ_H_ +#define _CGN_LOG_PROTOBUF_ZMQ_H_ + +#include "npf/cgnat/cgn_log.h" + +int cl_zmq_set_hwm(enum cgn_log_type ltype, int32_t hwm); +void cgn_show_zmq(FILE *f); + +#endif /* _CGN_LOG_PROTOBUF_ZMQ_H_ */ diff --git a/src/npf/cgnat/cgn_log_rte.c b/src/npf/cgnat/cgn_log_rte.c new file mode 100644 index 00000000..28b79327 --- /dev/null +++ b/src/npf/cgnat/cgn_log_rte.c @@ -0,0 +1,570 @@ +/* + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +/** + * @file cgn_log_rte.c - cgnat logging using rte_log() + */ + +#include +#include +#include + +#include "compiler.h" +#include "if_var.h" +#include "util.h" +#include "soft_ticks.h" +#include "vplane_log.h" + +#include "npf/cgnat/cgn.h" +#include "npf/cgnat/cgn_log.h" +#include "npf/cgnat/cgn_source.h" +#include "npf/cgnat/cgn_sess_state.h" +#include "npf/cgnat/cgn_session.h" +#include "npf/cgnat/cgn_sess2.h" +#include "npf/nat/nat_pool.h" + +#define ADDR_CHARS 16 + +#define CGNAT_RTE_LOG(level, ...) \ + rte_log(level, RTE_LOGTYPE_CGNAT, "CGNAT: " __VA_ARGS__) + +/* + * Format an IPv4 host-byte ordered address + */ +static char *cgn_addrstr(uint32_t addr, char *str, size_t slen) +{ + snprintf(str, slen, "%u.%u.%u.%u", + (addr >> 24) & 0xFF, (addr >> 16) & 0xFF, + (addr >> 8) & 0xFF, addr & 0xFF); + return str; +} + +/* + * Log subscriber session start - SUBSCRIBER_START + */ +static void cl_rte_log_subscriber_start(uint32_t addr) +{ + char str1[ADDR_CHARS]; + + RTE_LOG(NOTICE, CGNAT, + "SUBSCRIBER_START subs-addr=%s start-time=%lu\n", + cgn_addrstr(addr, str1, ADDR_CHARS), unix_epoch_us); +} + +/* + * Log subscriber session end - SUBSCRIBER_END + */ +static void cl_rte_log_subscriber_end(uint32_t addr, uint64_t start_time, + uint64_t end_time, uint64_t pkts_out, + uint64_t bytes_out, uint64_t pkts_in, + uint64_t bytes_in, uint64_t sessions) +{ + char str1[ADDR_CHARS]; + + RTE_LOG(NOTICE, CGNAT, + "SUBSCRIBER_END subs-addr=%s start-time=%lu " + "end-time=%lu sessions=%lu forw=%lu/%lu back=%lu/%lu\n", + cgn_addrstr(addr, str1, ADDR_CHARS), start_time, end_time, + sessions, pkts_out, bytes_out, pkts_in, bytes_in); +} + +static const char *cgn_log_name_or_unknown(const char *name) +{ + return name ? name : "(unknown)"; +} + +/* + * Log port block allocation - PB_ALLOCATED + */ +static void cl_rte_log_pb_alloc(uint32_t pvt_addr, uint32_t pub_addr, + uint16_t port_start, uint16_t port_end, + uint64_t start_time, const char *policy_name, + const char *pool_name) +{ + char str1[ADDR_CHARS]; + char str2[ADDR_CHARS]; + + RTE_LOG(NOTICE, CGNAT, + "PB_ALLOCATED subs-addr=%s policy=%s pub-addr=%s pool=%s " + "port=%u-%u start-time=%lu\n", + cgn_addrstr(pvt_addr, str1, ADDR_CHARS), + cgn_log_name_or_unknown(policy_name), + cgn_addrstr(pub_addr, str2, ADDR_CHARS), + cgn_log_name_or_unknown(pool_name), + port_start, port_end, start_time); +} + +/* + * Log port block release - PB_RELEASED + */ +static void cl_rte_log_pb_release(uint32_t pvt_addr, uint32_t pub_addr, + uint16_t port_start, uint16_t port_end, + uint64_t start_time, uint64_t end_time, + const char *policy_name, + const char *pool_name) +{ + char str1[ADDR_CHARS]; + char str2[ADDR_CHARS]; + + RTE_LOG(NOTICE, CGNAT, + "PB_RELEASED subs-addr=%s policy=%s pub-addr=%s pool=%s " + "port=%u-%u start-time=%lu end-time=%lu\n", + cgn_addrstr(pvt_addr, str1, ADDR_CHARS), + cgn_log_name_or_unknown(policy_name), + cgn_addrstr(pub_addr, str2, ADDR_CHARS), + cgn_log_name_or_unknown(pool_name), + port_start, port_end, start_time, end_time); +} + +/* + * Log 5-tuple session + */ +static uint +cl_rte_log_sess_common(struct cgn_sess2 *s2, char *log_str, uint log_str_sz) +{ +#define ADDR_CHARS 16 + char str1[ADDR_CHARS]; + char str2[ADDR_CHARS]; + char str3[ADDR_CHARS]; + char state_str[12]; + struct ifnet *ifp; + struct cgn_session *cse = cgn_sess2_session(s2); + uint32_t pid = cgn_session_id(cse); + uint32_t int_src = cgn_session_forw_addr(cse); + uint16_t int_port = cgn_session_forw_id(cse); + uint32_t ext_src = cgn_session_back_addr(cse); + uint16_t ext_port = cgn_session_back_id(cse); + struct cgn_state *state = cgn_sess2_state(s2); + uint len; + + ifp = dp_ifnet_byifindex(cgn_session_ifindex(cse)); + + if (state->st_proto == NAT_PROTO_TCP) + snprintf(state_str, sizeof(state_str), "%s[%u/0x%02X]", + cgn_sess_state_str_short(state), + state->st_state, state->st_hist); + else + snprintf(state_str, sizeof(state_str), "%s[%u]", + cgn_sess_state_str_short(state), + state->st_state); + + len = snprintf(log_str, log_str_sz, + "ifname=%s session-id=%u.%u proto=%u dir=%s " + "addr=%s->%s port=%u->%u cgn-addr=%s cgn-port=%u " + "state=%s start-time=%lu", + ifp ? ifp->if_name : "-", pid, + cgn_sess2_id(s2), cgn_sess2_ipproto(s2), + cgn_sess2_dir(s2) == CGN_DIR_IN ? "in" : "out", + cgn_addrstr(ntohl(int_src), str1, ADDR_CHARS), + cgn_addrstr(ntohl(cgn_sess2_addr(s2)), str2, ADDR_CHARS), + ntohs(int_port), ntohs(cgn_sess2_port(s2)), + cgn_addrstr(ntohl(ext_src), str3, ADDR_CHARS), + ntohs(ext_port), state_str, + cgn_sess2_start_time(s2)); + + return len; +} + +/* + * Log SESSION_CREATE + */ +static void cl_rte_log_sess_start(struct cgn_sess2 *s2) +{ +#define LOG_STR_SZ 400 + char log_str[LOG_STR_SZ]; + + cl_rte_log_sess_common(s2, log_str, sizeof(log_str)); + RTE_LOG(NOTICE, CGNAT, "SESSION_CREATE %s\n", log_str); +} + +/* + * Periodic logging - SESSION_ACTIVE + */ +static void cl_rte_log_sess_active(struct cgn_sess2 *s2) +{ +#define LOG_STR_SZ 400 + char log_str[LOG_STR_SZ]; + uint len; + struct cgn_state *state = cgn_sess2_state(s2); + + len = cl_rte_log_sess_common(s2, log_str, sizeof(log_str)); + + len += snprintf(log_str + len, sizeof(log_str) - len, + " cur-time=%lu", unix_epoch_us); + + len += snprintf(log_str + len, sizeof(log_str) - len, + " out=%u/%lu in=%lu/%lu", + cgn_sess2_pkts_out_tot(s2), cgn_sess2_bytes_out_tot(s2), + cgn_sess2_pkts_in_tot(s2), cgn_sess2_bytes_in_tot(s2)); + + if (state->st_proto == NAT_PROTO_TCP) + /* TCP round-trip time in microsecs */ + snprintf(log_str + len, sizeof(log_str) - len, + " int-rtt=%lu ext-rtt=%lu", + state->st_int_rtt, state->st_ext_rtt); + + RTE_LOG(NOTICE, CGNAT, "SESSION_ACTIVE %s\n", log_str); +} + +/* + * Log 5-tuple session end - SESSION_DELETE + */ +static void cl_rte_log_sess_end(struct cgn_sess2 *s2, uint64_t end_time) +{ +#define LOG_STR_SZ 400 + char log_str[LOG_STR_SZ]; + uint len; + struct cgn_state *state = cgn_sess2_state(s2); + + len = cl_rte_log_sess_common(s2, log_str, sizeof(log_str)); + + len += snprintf(log_str + len, sizeof(log_str) - len, + " end-time=%lu", end_time); + + len += snprintf(log_str + len, sizeof(log_str) - len, + " out=%u/%lu in=%lu/%lu", + cgn_sess2_pkts_out_tot(s2), cgn_sess2_bytes_out_tot(s2), + cgn_sess2_pkts_in_tot(s2), cgn_sess2_bytes_in_tot(s2)); + + if (state->st_proto == NAT_PROTO_TCP) + /* TCP round-trip time in microsecs */ + snprintf(log_str + len, sizeof(log_str) - len, + " int-rtt=%lu ext-rtt=%lu", + state->st_int_rtt, state->st_ext_rtt); + + RTE_LOG(NOTICE, CGNAT, "SESSION_DELETE %s\n", log_str); +} + +/* + * Log SUBSCRIBER_TABLE_FULL, SUBSCRIBER_TABLE_AVAILABLE, and + * SUBSCRIBER_TABLE_THRESHOLD + */ +static void cl_rte_log_resource_subscriber_table(enum cgn_resource_type type, + int32_t count, + int32_t max_count) +{ + const char *event_name; + uint32_t level; + + switch(type) { + case CGN_RESOURCE_FULL: + event_name = "SUBSCRIBER_TABLE_FULL"; + level = RTE_LOG_ERR; + break; + case CGN_RESOURCE_AVAILABLE: + event_name = "SUBSCRIBER_TABLE_AVAILABLE"; + level = RTE_LOG_ERR; + break; + case CGN_RESOURCE_THRESHOLD: + event_name = "SUBSCRIBER_TABLE_THRESHOLD"; + level = RTE_LOG_WARNING; + break; + default: + return; + } + + CGNAT_RTE_LOG(level, "%s count=%d/%d\n", event_name, count, max_count); +} + +/* + * Log SESSION_TABLE_FULL, SESSION_TABLE_AVAILABLE, and + * SESSION_TABLE_THRESHOLD + */ +static void cl_rte_log_resource_session_table(enum cgn_resource_type type, + int32_t count, int32_t max_count) +{ + const char *event_name; + uint32_t level; + + switch(type) { + case CGN_RESOURCE_FULL: + event_name = "SESSION_TABLE_FULL"; + level = RTE_LOG_ERR; + break; + case CGN_RESOURCE_AVAILABLE: + event_name = "SESSION_TABLE_AVAILABLE"; + level = RTE_LOG_ERR; + break; + case CGN_RESOURCE_THRESHOLD: + event_name = "SESSION_TABLE_THRESHOLD"; + level = RTE_LOG_WARNING; + break; + default: + return; + } + + CGNAT_RTE_LOG(level, "%s count=%d/%d\n", event_name, count, max_count); +} + +/* + * Logs APM_TABLE_ABOVE_LIMIT, APM_TABLE_BELOW_LIMIT, and APM_TABLE_THRESHOLD + * + * The apm table has no maximum size. However the user may specify a limit + * for which warnings will be logged when the table size goes above/below that + * limit. A threshold may also be specified as a percentage of the limit. + */ +static void cl_rte_log_resource_apm_table(enum cgn_resource_type type, + int32_t count, int32_t limit_count) +{ + const char *event_name; + uint32_t level; + + switch(type) { + case CGN_RESOURCE_FULL: + event_name = "APM_TABLE_ABOVE_LIMIT"; + level = RTE_LOG_WARNING; + break; + case CGN_RESOURCE_AVAILABLE: + event_name = "APM_TABLE_BELOW_LIMIT"; + level = RTE_LOG_WARNING; + break; + case CGN_RESOURCE_THRESHOLD: + event_name = "APM_TABLE_THRESHOLD"; + level = RTE_LOG_WARNING; + break; + default: + return; + } + + CGNAT_RTE_LOG(level, "%s count=%d/%d\n", + event_name, count, limit_count); +} + +/* + * Basic log string for a 3-tuple session + */ +static int cgn_session_log_str(struct cgn_session *cse, bool incl_trans, + char *log_str, uint log_str_sz) +{ +#define ADDR_CHARS 16 + char str1[ADDR_CHARS]; + struct ifnet *ifp; + uint32_t pid = cgn_session_id(cse); + uint32_t int_src = cgn_session_forw_addr(cse); + uint16_t int_port = cgn_session_forw_id(cse); + uint len; + + ifp = dp_ifnet_byifindex(cgn_session_ifindex(cse)); + + len = snprintf(log_str, log_str_sz, + "ifname=%s session-id=%u proto=%u " + "addr=%s port=%u", + ifp ? ifp->if_name : "-", pid, + cgn_session_ipproto(cse), + cgn_addrstr(ntohl(int_src), str1, ADDR_CHARS), + ntohs(int_port)); + + if (incl_trans) { + uint32_t ext_src = cgn_session_back_addr(cse); + uint16_t ext_port = cgn_session_back_id(cse); + + len += snprintf(log_str + len, log_str_sz - len, + " cgn-addr=%s cgn-port=%u", + cgn_addrstr(ntohl(ext_src), str1, ADDR_CHARS), + ntohs(ext_port)); + } + + return len; +} + +/* + * Log DEST_SESSIONS_FULL, DEST_SESSIONS_AVAILABLE, and + * DEST_SESSIONS_THRESHOLD +*/ +static void cl_rte_log_resource_dest_session_table(enum cgn_resource_type type, + struct cgn_session *cse, + int16_t count, + int16_t max_count) +{ + const char *event_name; + uint32_t level; + char log_str[140]; + + switch(type) { + case CGN_RESOURCE_FULL: + event_name = "DEST_SESSIONS_FULL"; + level = RTE_LOG_ERR; + break; + case CGN_RESOURCE_AVAILABLE: + event_name = "DEST_SESSIONS_AVAILABLE"; + level = RTE_LOG_ERR; + break; + case CGN_RESOURCE_THRESHOLD: + event_name = "DEST_SESSIONS_THRESHOLD"; + level = RTE_LOG_WARNING; + break; + default: + return; + } + + cgn_session_log_str(cse, true, log_str, sizeof(log_str)); + + CGNAT_RTE_LOG(level, "%s count=%d/%d %s\n", + event_name, count, max_count, log_str); +} + +/* + * Logs for subscriber resource limits - MBPU_FULL, MBPU_AVAILABLE, + * and MBPU_THRESHOLD + */ +static void +cl_rte_log_resource_subscriber_mbpu(enum cgn_resource_type type, + uint32_t addr, uint8_t ipproto, + uint16_t count, uint16_t max_count) +{ + char str1[ADDR_CHARS]; + const char *event_name; + uint32_t level; + + switch(type) { + case CGN_RESOURCE_FULL: + event_name = "MBPU_FULL"; + level = RTE_LOG_ERR; + break; + case CGN_RESOURCE_AVAILABLE: + event_name = "MBPU_AVAILABLE"; + level = RTE_LOG_ERR; + break; + case CGN_RESOURCE_THRESHOLD: + event_name = "MBPU_THRESHOLD"; + level = RTE_LOG_WARNING; + break; + default: + return; + } + + /* ipproto will be 0 for 'other' (i.e. non-TCP and non-UDP) */ + CGNAT_RTE_LOG(level, "%s proto=%u subs-addr=%s blocks=%u/%u\n", + event_name, ipproto, + cgn_addrstr(addr, str1, ADDR_CHARS), + count, max_count); +} + +/* + * Logs for public address blocks resource limits - PB_FULL, + * PB_AVAILABLE, and PB_THRESHOLD + */ +static void cl_rte_log_resource_public_pb(enum cgn_resource_type type, + uint32_t addr, uint16_t blocks_used, + uint16_t nblocks) +{ + char str1[ADDR_CHARS]; + const char *event_name; + uint32_t level; + + switch(type) { + case CGN_RESOURCE_FULL: + event_name = "PB_FULL"; + level = RTE_LOG_ERR; + break; + case CGN_RESOURCE_AVAILABLE: + event_name = "PB_AVAILABLE"; + level = RTE_LOG_ERR; + break; + case CGN_RESOURCE_THRESHOLD: + event_name = "PB_THRESHOLD"; + level = RTE_LOG_WARNING; + break; + default: + return; + } + + CGNAT_RTE_LOG(level, "%s pub-addr=%s blocks=%u/%u\n", event_name, + cgn_addrstr(addr, str1, ADDR_CHARS), blocks_used, + nblocks); +} + +/* + * Logs NP_FULL, NP_AVAILABLE, and NP_THRESHOLD + */ +static void cl_rte_log_resource_pool(enum cgn_resource_type type, + struct nat_pool *np, + int32_t count, int32_t max_count) +{ + const char *pool_name = nat_pool_name(np); + const char *event_name; + uint32_t level; + + switch(type) { + case CGN_RESOURCE_FULL: + event_name = "NP_FULL"; + level = RTE_LOG_ERR; + break; + case CGN_RESOURCE_AVAILABLE: + event_name = "NP_AVAILABLE"; + level = RTE_LOG_ERR; + break; + case CGN_RESOURCE_THRESHOLD: + event_name = "NP_THRESHOLD"; + level = RTE_LOG_WARNING; + break; + default: + return; + } + + CGNAT_RTE_LOG(level, "%s pool=%s count=%d/%d\n", + event_name, cgn_log_name_or_unknown(pool_name), + count, max_count); +} + +/* + * Log a session clear event (SESSION_CLEAR). This is done when one or more + * 2-tuple sessions are cleared manually, either from a clear command or a + * change in config (e.g. nat pool block size changes). This log message + * replaces the multiple SESSION_END log messages in order to avoid scale + * issues. + */ +static void +cl_rte_log_sess_clear(const char *desc, uint count, uint64_t clear_time) +{ +#define LOG_STR_CL_SZ 300 + char log_str[LOG_STR_CL_SZ]; + + snprintf(log_str, sizeof(log_str), + "desc=\"%s\" count=%u time=%lu", desc, count, + clear_time); + + RTE_LOG(NOTICE, CGNAT, "SESSION_CLEAR %s\n", log_str); +} + +const struct cgn_session_log_fns cgn_session_rte_log_fns = { + .cl_sess_start = cl_rte_log_sess_start, + .cl_sess_active = cl_rte_log_sess_active, + .cl_sess_end = cl_rte_log_sess_end, +}; + +const struct cgn_port_block_alloc_log_fns cgn_port_block_alloc_rte_log_fns = { + .cl_pb_alloc = cl_rte_log_pb_alloc, + .cl_pb_release = cl_rte_log_pb_release, +}; + +const struct cgn_subscriber_log_fns cgn_subscriber_rte_log_fns = { + .cl_subscriber_start = cl_rte_log_subscriber_start, + .cl_subscriber_end = cl_rte_log_subscriber_end, +}; + +const struct cgn_res_constraint_log_fns cgn_res_constraint_rte_log_fns = { + .cl_resource_subscriber_mbpu = cl_rte_log_resource_subscriber_mbpu, + .cl_resource_public_pb = cl_rte_log_resource_public_pb, + .cl_sess_clear = cl_rte_log_sess_clear, + .cl_resource_subscriber_table = cl_rte_log_resource_subscriber_table, + .cl_resource_session_table = cl_rte_log_resource_session_table, + .cl_resource_dest_session_table = + cl_rte_log_resource_dest_session_table, + .cl_resource_apm_table = cl_rte_log_resource_apm_table, + .cl_resource_pool = cl_rte_log_resource_pool, +}; + +const struct cgn_log_fns cgn_rte_log_fns = { + .cl_name = "rte_log", + .logfn[CGN_LOG_TYPE_SESSION].session = &cgn_session_rte_log_fns, + .logfn[CGN_LOG_TYPE_PORT_BLOCK_ALLOCATION].port_block_alloc = + &cgn_port_block_alloc_rte_log_fns, + .logfn[CGN_LOG_TYPE_SUBSCRIBER].subscriber = + &cgn_subscriber_rte_log_fns, + .logfn[CGN_LOG_TYPE_RES_CONSTRAINT].res_constraint = + &cgn_res_constraint_rte_log_fns, +}; diff --git a/src/npf/cgnat/cgn_map.c b/src/npf/cgnat/cgn_map.c index 65dfc7b2..ff6e3e5d 100644 --- a/src/npf/cgnat/cgn_map.c +++ b/src/npf/cgnat/cgn_map.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -25,7 +25,7 @@ #include "npf/cgnat/cgn.h" #include "npf/apm/apm.h" -#include "npf/cgnat/cgn_errno.h" +#include "npf/cgnat/cgn_rc.h" #include "npf/cgnat/cgn_limits.h" #include "npf/cgnat/cgn_log.h" #include "npf/cgnat/cgn_map.h" @@ -34,50 +34,112 @@ #include "npf/cgnat/cgn_source.h" - -/* CGN_BLK_ENOSPC */ -static inline void cgn_alloc_log_pb_full(struct apm *apm) +/* + * Called from cgn_alloc_addr_rrobin when cgnat mapping fails to find an + * available public address (CGN_POOL_ENOSPC). + */ +static inline void cgn_alloc_pool_full(struct nat_pool *np) { - if (!apm->apm_pb_full) - cgn_log_public_pb_full(apm->apm_addr, - apm->apm_blocks_used, - apm->apm_nblocks); + if (!np->np_full) + cgn_log_resource_pool(CGN_RESOURCE_FULL, np, + rte_atomic32_read(&np->np_ranges->nr_used), + np->np_ranges->nr_naddrs); - apm->apm_pb_full = true; + np->np_full = true; } -/* CGN_POOL_ENOSPC */ -static inline void cgn_alloc_pool_full(struct nat_pool *np) +/* + * Called from apm_block_destroy when a port-block on an apm is freed. + * + * Refer to cgn_alloc_addr_rrobin to see how we decide if an address is + * available or not. + */ +void cgn_alloc_pool_available(struct nat_pool *np, struct apm *apm) { - if (!np->np_full) - RTE_LOG(NOTICE, CGNAT, "NP_FULL name=%s\n", - np->np_name); + if (!np || !np->np_full) + return; - np->np_full = true; + /* The apm should be locked */ + assert(rte_spinlock_is_locked(&apm->apm_lock)); + + struct nat_pool_range *pr = NULL; + int range; + + range = nat_pool_addr_range(np, apm->apm_addr); + if (range >= 0) + pr = &np->np_ranges->nr_range[range]; + + if (apm->apm_blocks_used == 0 || (pr && pr->pr_shared)) { + cgn_log_resource_pool( + CGN_RESOURCE_AVAILABLE, np, + rte_atomic32_read(&np->np_ranges->nr_used), + np->np_ranges->nr_naddrs); + np->np_full = false; + } } /* * Round-robin address allocation. * - * Very simple. Each invocation uses the address after the one allocated by - * the previous invocation. After each allocation, we store the address - * in the pools np_addr_hint object. + * Each invocation uses the address after the one allocated by the previous + * invocation. After each allocation, we store the address in the pools + * np_addr_hint object. * - * Note that the apm is *not* locked anytime in this function. + * Before we consider an address to use, we check if it has unallocated port + * blocks. Addresses with no port blocks in-use are preferred over an address + * with some port blocks already in-use. + * + * On first iteration of the pool addresses we try and identify the "least + * used" address. This may be used if an totally unused address is not found. + * + * We also make a "last gasp" second iteration of the pool addresses if the + * above yields no result. This simply grabs the first address with any free + * port-blocks. + * + * If addr_hint is set then pr may also be set. If so, then this a pointer to + * the address range that addr_hint is in. + * + * If successful, the returned apm will be LOCKED. */ static struct apm * -cgn_alloc_addr_rrobin(struct nat_pool *np, uint8_t proto, uint32_t addr_hint, +cgn_alloc_addr_rrobin(struct nat_pool *np, enum nat_proto proto, + uint32_t addr_hint, struct nat_pool_range *pr, vrfid_t vrfid, int *error) { uint32_t addr, start_addr; - struct apm *apm; + struct apm *apm, *lu_apm = NULL; + bool pass2 = false; + + /* Do not iterate over pool addresses if we know none are available */ +repeat: + if (np->np_full) { + *error = -CGN_POOL_ENOSPC; + return NULL; + } addr = start_addr = addr_hint; + if (!pr) { + int range = nat_pool_addr_range(np, addr); + + if (range >= 0) + pr = &np->np_ranges->nr_range[range]; + } + /* Iterate over all addresses in all address ranges */ do { - /* Ignore blacklisted addresses */ - if (nat_pool_is_blacklist_addr(np, htonl(addr))) + int lock_result; + + /* + * This should almost never happen. It might only occur if an + * address pool has been reconfigured before the stored + * address hint has been updated or cleared. + */ + if (unlikely(!pr)) + goto next_addr; + + /* Ignore blocked addresses */ + if (nat_pool_is_blocked_addr(np, htonl(addr))) goto next_addr; apm = apm_lookup(addr, vrfid); @@ -90,20 +152,114 @@ cgn_alloc_addr_rrobin(struct nat_pool *np, uint8_t proto, uint32_t addr_hint, return NULL; } - if (apm->apm_blocks_used < apm->apm_nblocks) { - nat_pool_hint_set(np, addr, proto); - return apm; + /* LOCK apm before checking if there are free blocks */ + lock_result = rte_spinlock_trylock(&apm->apm_lock); + + /* Lock was unsuccessful, try next address */ + if (lock_result == 0) + goto next_addr; + + /* Was the apm destroyed between table lookup and lock? */ + if (unlikely((apm->apm_flags & APM_DEAD) != 0)) { + rte_spinlock_unlock(&apm->apm_lock); + goto next_addr; + } + + /* Always pick an unused public address first */ + if (apm->apm_blocks_used == 0) + goto addr_found; + + /* + * Is the address shareable, and does it have some free + * port-blocks? + */ + if (pr->pr_shared && + (apm->apm_blocks_used < apm->apm_nblocks)) { + /* + * On second iteration through the NAT pool addresses, + * we simply use the first shareable address with any + * free port-blocks. + */ + if (unlikely(pass2)) + goto addr_found; + + /* + * On first iteration through the NAT pool addresses, + * we try and identify a "least used" address. + */ + if (!lu_apm || + (apm->apm_blocks_used < lu_apm->apm_blocks_used)) + lu_apm = apm; } + rte_spinlock_unlock(&apm->apm_lock); + + /* Try the next address in the pool */ next_addr: - addr = nat_pool_next_addr(np, addr); + addr = nat_pool_next_addr(np, addr, &pr); } while (addr != start_addr); - /* All pool addresses are in-use and with no free blocks */ + /* + * No unused addresses were found in first iteration of the pool. Did + * we find a "least used" address? + */ + if (lu_apm) { + int lock_result; + + /* + * If we cannot lock the candidate apm, or it is no longer + * suitable, then simple fall through to do the second + * iteration. + */ + apm = lu_apm; + lock_result = rte_spinlock_trylock(&apm->apm_lock); + + if (lock_result != 0) { + /* Lock successful. Can we still use this addr? */ + if ((apm->apm_flags & APM_DEAD) == 0 && + apm->apm_blocks_used < apm->apm_nblocks) { + /* Use this address */ + addr = apm->apm_addr; + goto addr_found; + } + + /* + * No longer suitable. Unlock and do second + * iteration. + */ + rte_spinlock_unlock(&apm->apm_lock); + } + } + + /* + * No unused addresses found, and either we failed to find a "least + * used" address or we lost the race to use the "least used" address. + * Make a last gasp attempt to just grab the first address with any + * free port-blocks. + */ + if (!pass2) { + pass2 = true; + pr = NULL; + lu_apm = NULL; + goto repeat; + } + + /* + * We only get here if both: 1. no unshareble addresses are unused, + * and 2. no shareable addresses have any free port-blocks. + */ *error = -CGN_POOL_ENOSPC; cgn_alloc_pool_full(np); return NULL; + +addr_found: + /* + * If a suitable address is found then leave the apm LOCKED, set the + * address hint in the pool, and return. + */ + nat_pool_hint_set(np, addr, proto); + return apm; } /* @@ -119,21 +275,11 @@ cgn_alloc_block(struct nat_pool *np, struct apm *apm, uint16_t block_hint, struct apm_port_block *pb; uint16_t block, i; - /* - * Lock the apm so we can allocate a port block - */ - rte_spinlock_lock(&apm->apm_lock); - - /* Was the apm destroyed while we waited for the lock? */ - if ((apm->apm_flags & APM_DEAD) != 0) { - *error = -CGN_APM_ENOENT; - goto error; - } + assert(rte_spinlock_is_locked(&apm->apm_lock)); - /* Were apm blocks used-up while waiting for the lock? */ - if (apm->apm_blocks_used >= apm->apm_nblocks) { + /* This should have already been checked, but check again */ + if (unlikely(apm->apm_blocks_used >= apm->apm_nblocks)) { *error = -CGN_BLK_ENOSPC; - cgn_alloc_log_pb_full(apm); goto error; } @@ -157,8 +303,6 @@ cgn_alloc_block(struct nat_pool *np, struct apm *apm, uint16_t block_hint, } /* Success */ - rte_spinlock_unlock(&apm->apm_lock); - nat_pool_incr_block_allocs(np); nat_pool_incr_block_active(np); @@ -170,11 +314,8 @@ cgn_alloc_block(struct nat_pool *np, struct apm *apm, uint16_t block_hint, * above loop. */ *error = -CGN_BLK_ENOSPC; - cgn_alloc_log_pb_full(apm); error: - rte_spinlock_unlock(&apm->apm_lock); - nat_pool_incr_block_fails(np); return NULL; } @@ -185,7 +326,7 @@ cgn_alloc_block(struct nat_pool *np, struct apm *apm, uint16_t block_hint, */ static uint16_t cgn_source_find_port(struct apm_port_block **pbp, struct cgn_source *src, - uint8_t proto) + enum nat_proto proto) { return apm_block_list_first_free_port(&src->sr_block_list, proto, src->sr_active_block[proto], @@ -193,25 +334,53 @@ cgn_source_find_port(struct apm_port_block **pbp, struct cgn_source *src, } /* - * Allocate an address and port. + * Allocate an address and port from the apm module. + * + * Inputs: + * vrfid + * cp - cgnat policy + * cmi->cmi_proto - 'Condensed' nat proto + * cmi->cmi_oaddr - Subscribers source addr * - * Writes to *taddr and *tport (in network byte order), and to **srcp. - * Returns 'enum cgn_errno'. + * Outputs (if successful): + * cmi->cmi_src - subscriber struct) + * cmi->cmi_taddr - Allocated public addr + * cmi->cmi_tid - Allocated public port + * cmi->cmi_reserved = true + * return 'enum cgn_errno' + * + * There are two locks that may be used here - one in the source address + * structure (struct cgn_source) and one is in the public address structure + * (struct apm). + * + * If we allocate from a port-block already assigned to the source, then only + * the source structure needs to be locked. (So if the port-block size is + * 512, then for 511 new sessions we only need to lock the source structure.) + * + * If we need to get a new port block then we need to lock *both* the source + * and apm structures while we assign the port block to the source. After + * that the apm lock can be released (and source lock kept), while we allocate + * the port from the port-block. */ int -cgn_map_get(struct cgn_policy *cp, vrfid_t vrfid, uint8_t proto, - uint32_t oaddr, uint32_t *taddr, uint16_t *tport, - struct cgn_source **srcp) +cgn_map_get(struct cgn_map *cmi, struct cgn_policy *cp, vrfid_t vrfid) { struct apm_port_block *pb; + enum nat_proto proto = cmi->cmi_proto; struct cgn_source *src; struct nat_pool *np; - struct apm *apm; + struct apm *apm = NULL; uint16_t port; int error; assert(proto <= NAT_PROTO_LAST); + assert(cmi->cmi_oaddr); + assert(cp); + if (unlikely(!cp || cmi->cmi_oaddr == 0)) + return -CGN_RC_UNKWN; + + /* Get public address pool from the policy */ np = cgn_policy_get_pool(cp); if (!np) /* No pool attached to policy, or pool is not active */ @@ -220,21 +389,23 @@ cgn_map_get(struct cgn_policy *cp, vrfid_t vrfid, uint8_t proto, /* Count of mapping requests ever. Only ever increments */ nat_pool_incr_map_reqs(np); - /* Find (or create) and lock a src entry */ - src = cgn_source_find_and_lock(cp, ntohl(oaddr), vrfid, &error); + /* + * Find (or create) and LOCK a subscriber address structure. The + * source struct remains locked until the end of cgn_map_get. + */ + src = cgn_source_find_and_lock(cp, ntohl(cmi->cmi_oaddr), vrfid, + &error); if (unlikely(!src)) { nat_pool_incr_map_fails(np); return error; } - /* Return the subscriber structure pointer to the caller */ - *srcp = src; - /* src is locked from here on */ + /* src is LOCKED from here on */ src->sr_map_reqs++; - /* Get active port-block for this proto */ + /* Get active port-block for this source and protocol */ pb = src->sr_active_block[proto]; /* @@ -250,15 +421,15 @@ cgn_map_get(struct cgn_policy *cp, vrfid_t vrfid, uint8_t proto, * paired address for this subscriber. Else get the next * address in the nat pool after the last allocated address. */ + struct nat_pool_range *pr = NULL; uint32_t addr_hint; - int rc; /* Does subscriber already have a paired address? */ if (src->sr_paired_addr) { /* Check paired address is still valid */ - rc = nat_pool_addr_range(np, src->sr_paired_addr); - if (rc < 0) + if (!nat_pool_is_pool_addr(np, + htonl(src->sr_paired_addr))) src->sr_paired_addr = 0; } @@ -269,14 +440,16 @@ cgn_map_get(struct cgn_policy *cp, vrfid_t vrfid, uint8_t proto, * from pool. */ addr_hint = nat_pool_hint(np, proto); - addr_hint = nat_pool_next_addr(np, addr_hint); + addr_hint = nat_pool_next_addr(np, addr_hint, &pr); } /* * Starting at addr_hint, iterate through addresses in the nat * pool until we find one with a free port-block. + * + * If successful, the returned apm will be LOCKED. */ - apm = cgn_alloc_addr_rrobin(np, proto, addr_hint, + apm = cgn_alloc_addr_rrobin(np, proto, addr_hint, pr, vrfid, &error); /* @@ -286,11 +459,21 @@ cgn_map_get(struct cgn_policy *cp, vrfid_t vrfid, uint8_t proto, if (!apm) goto error; + assert(rte_spinlock_is_locked(&apm->apm_lock)); + pb = cgn_alloc_block(np, apm, 0, &error); - if (!pb) + if (!pb) { + rte_spinlock_unlock(&apm->apm_lock); goto error; + } + /* + * Add port-block to source list. port-block is now under + * control of source lock so we can release the apm lock. + */ cgn_source_add_block(src, proto, pb, np); + rte_spinlock_unlock(&apm->apm_lock); + } else { apm = apm_block_get_apm(pb); @@ -338,15 +521,23 @@ cgn_map_get(struct cgn_policy *cp, vrfid_t vrfid, uint8_t proto, nat_pool_incr_block_limit(np); error = -CGN_MBU_ENOSPC; - if (!src->sr_mbpu_full && net_ratelimit()) - cgn_log_subscriber_mbpu_full(src->sr_addr, - src->sr_block_count, - nat_pool_get_mbpu(np)); + if (!src->sr_mbpu_full[proto]) { + cgn_log_resource_subscriber_mbpu( + CGN_RESOURCE_FULL, + src->sr_addr, nat_ipproto_from_proto(proto), + src->sr_block_count, + nat_pool_get_mbpu(np)); + + src->sr_mbpu_full[proto] = true; + } - src->sr_mbpu_full = true; goto error; } + /* LOCK apm */ + assert(!rte_spinlock_is_locked(&apm->apm_lock)); + rte_spinlock_lock(&apm->apm_lock); + /* * Are there any available port-blocks on this public address? */ @@ -355,30 +546,42 @@ cgn_map_get(struct cgn_policy *cp, vrfid_t vrfid, uint8_t proto, * No free port-blocks. Alloc a new public address if * address-pool pairing is not enabled. */ + rte_spinlock_unlock(&apm->apm_lock); + if (nat_pool_is_ap_paired(np)) { error = -CGN_BLK_ENOSPC; - cgn_alloc_log_pb_full(apm); goto error; } else { /* alloc a new public address */ + struct nat_pool_range *pr = NULL; uint32_t addr_hint; addr_hint = nat_pool_hint(np, proto); - addr_hint = nat_pool_next_addr(np, addr_hint); + addr_hint = nat_pool_next_addr(np, addr_hint, &pr); - apm = cgn_alloc_addr_rrobin(np, proto, addr_hint, + /* If successful, apm will be LOCKED */ + apm = cgn_alloc_addr_rrobin(np, proto, addr_hint, pr, vrfid, &error); if (!apm) goto error; } } + assert(rte_spinlock_is_locked(&apm->apm_lock)); + pb = cgn_alloc_block(np, apm, apm_block_get_block(pb) + 1, &error); - if (!pb) + if (!pb) { + rte_spinlock_unlock(&apm->apm_lock); goto error; + } - /* Add block to source's block list, and set as active block */ + /* + * Add port-block to source's block list, and set as active block. + * port-block is now under control of source lock so we can release + * the apm lock. + */ cgn_source_add_block(src, proto, pb, np); + rte_spinlock_unlock(&apm->apm_lock); /* Alloc port from new block */ if (nat_pool_is_pa_sequential(np)) @@ -387,10 +590,17 @@ cgn_map_get(struct cgn_policy *cp, vrfid_t vrfid, uint8_t proto, port = apm_block_alloc_random_port(pb, proto); port_found: - *taddr = htonl(apm->apm_addr); - *tport = htons(port); + /* Successful! */ + cmi->cmi_src = src; + cmi->cmi_taddr = htonl(apm->apm_addr); + cmi->cmi_tid = htons(port); + cmi->cmi_reserved = true; + rte_atomic32_inc(&src->sr_map_active); + assert(!rte_spinlock_is_locked(&apm->apm_lock)); + assert(rte_spinlock_is_locked(&src->sr_lock)); + rte_spinlock_unlock(&src->sr_lock); /* @@ -402,6 +612,9 @@ cgn_map_get(struct cgn_policy *cp, vrfid_t vrfid, uint8_t proto, return 0; error: + assert(!apm || !rte_spinlock_is_locked(&apm->apm_lock)); + assert(rte_spinlock_is_locked(&src->sr_lock)); + src->sr_map_fails++; rte_spinlock_unlock(&src->sr_lock); @@ -411,79 +624,298 @@ cgn_map_get(struct cgn_policy *cp, vrfid_t vrfid, uint8_t proto, } /* - * Return a mapped address and port. + * cgn_map_get2 + * + * Obtain mapping specified by taddr *and* tport. taddr and tport are in + * network byte order. Used by PCP. */ -int cgn_map_put(struct nat_pool *np, vrfid_t vrfid, int dir __unused, - uint8_t proto, uint32_t oaddr, uint32_t taddr, uint16_t tport) +int cgn_map_get2(struct cgn_map *cmi, struct cgn_policy *cp, vrfid_t vrfid) { + struct apm_port_block *pb; + enum nat_proto proto = cmi->cmi_proto; struct cgn_source *src; - struct apm *apm; - int rc; + struct nat_pool *np; + struct apm *apm = NULL; + uint16_t port, block; + int error = 0; assert(proto <= NAT_PROTO_LAST); - assert(np); + assert(cmi->cmi_oaddr); + assert(cp); - apm = apm_lookup(ntohl(taddr), vrfid); - if (unlikely(!apm)) - return 0; + if (cmi->cmi_taddr == 0 || cmi->cmi_tid == 0) + return -CGN_PCP_EINVAL; - src = cgn_source_lookup(ntohl(oaddr), vrfid); - if (unlikely(!src)) - return 0; + /* Get public address pool */ + np = cgn_policy_get_pool(cp); + if (!np) + /* No pool attached to policy, or pool is not active */ + return -CGN_POOL_ENOSPC; - rte_spinlock_lock(&apm->apm_lock); + /* Count of mapping requests ever. Only ever increments */ + nat_pool_incr_map_reqs(np); + + /* Find (or create) and LOCK a source address structure */ + src = cgn_source_find_and_lock(cp, ntohl(cmi->cmi_oaddr), + vrfid, &error); + + if (unlikely(!src)) { + nat_pool_incr_map_fails(np); + return error; + } + + /* src is LOCKED from here on */ + + src->sr_map_reqs++; /* - * Was apm destroyed while we waited for lock? This should never - * happen in normal operation as only the master thread destroys - * sessions, and hence calls cgn_map_put. + * Is the requested public address in the NAT pool for the policy that + * is being used by this subscriber? */ + if (!nat_pool_is_pool_addr(np, cmi->cmi_taddr)) { + error = -CGN_POOL_ENOSPC; + goto error; + } + + /* + * Is the requested public address blocked? + */ + if (nat_pool_is_blocked_addr(np, cmi->cmi_taddr)) { + error = -CGN_POOL_ENOSPC; + goto error; + } + + /* Lookup public address in apm table */ + apm = apm_lookup(cmi->cmi_taddr, vrfid); + if (!apm) { + apm = apm_create_and_insert(ntohl(cmi->cmi_taddr), vrfid, + np, &error); + + /* Either out of memory, or apm table is full */ + if (unlikely(!apm)) + goto error; + } + + /* Lock apm */ + rte_spinlock_lock(&apm->apm_lock); + + /* Was the apm destroyed while we waited for the lock? */ if (unlikely((apm->apm_flags & APM_DEAD) != 0)) { + error = -CGN_POOL_ENOSPC; + goto error; + } + + /* Find the port-block for the given port */ + port = ntohs(cmi->cmi_tid); + block = apm_block(port, apm->apm_port_start, apm->apm_port_block_sz); + pb = apm->apm_blocks[block]; + + if (!pb) { + /* + * Before allocating a new port-block, check + * max-blocks-per-user limit. + */ + if (src->sr_block_count >= nat_pool_get_mbpu(np)) { + + nat_pool_incr_block_limit(np); + error = -CGN_MBU_ENOSPC; + + if (!src->sr_mbpu_full[proto]) { + cgn_log_resource_subscriber_mbpu( + CGN_RESOURCE_FULL, + src->sr_addr, + nat_ipproto_from_proto(proto), + src->sr_block_count, + nat_pool_get_mbpu(np)); + + src->sr_mbpu_full[proto] = true; + } + + goto error; + } + + /* Allocate new port-block */ + pb = cgn_alloc_block(np, apm, block, &error); + if (!pb) + goto error; + + /* + * Check the block number is the one we requested. + * + * This should never fail since we checked + * apm->apm_blocks[block] above, but check anyway in case + * cgn_alloc_block changes in the future. + */ + if (unlikely(apm_block_get_block(pb) != block)) { + error = -CGN_BLK_ENOSPC; + goto error; + } + + /* Add port-block to source list */ + cgn_source_add_block(src, proto, pb, np); + + } else { + /* + * Port-block already exists. Ensure it is being used by the + * same subscriber. (we cannot use the port-block if a + * different subscriber is already using it). + */ + if (apm_block_get_source(pb) != src) { + error = -CGN_BLK_ENOSPC; + goto error; + } + } + + /* + * At this point we have a port-block (either new or existing) that + * has been assigned to the given subscriber. Since the port-block is + * now under control of the subscriber, and the subscriber is locked, + * can release the apm lock. + */ + rte_spinlock_unlock(&apm->apm_lock); + apm = NULL; + + /* + * Try and allocate the specified port. + */ + port = apm_block_alloc_specific_port(pb, proto, port); + if (port == 0) { + error = -CGN_PCP_ENOSPC; + goto error; + } + + /* Success. Increments stats and unlock the subscriber. */ + cmi->cmi_src = src; + cmi->cmi_reserved = true; + + rte_atomic32_inc(&src->sr_map_active); + + assert(rte_spinlock_is_locked(&src->sr_lock)); + rte_spinlock_unlock(&src->sr_lock); + + /* + * Increment count of current active mappings, and take reference on + * pool + */ + nat_pool_incr_map_active(np); + + return 0; + +error: + assert(!apm || !rte_spinlock_is_locked(&apm->apm_lock)); + assert(rte_spinlock_is_locked(&src->sr_lock)); + + if (apm) rte_spinlock_unlock(&apm->apm_lock); + + src->sr_map_fails++; + rte_spinlock_unlock(&src->sr_lock); + + nat_pool_incr_map_fails(np); + + return error; +} + +/* + * Return a mapped address and port. + * + * This is called: + * + * 1. If a new flow obtained a mapping but failed to create a session, or + * 2. When a session is destroyed + * + * A session may be destroyed: + * 1. We fail to activate a new session + * 2. We fail to translate a packet for which a new session was created + * 3. session is reaped by garbage collector + * 4. session clear command + * + * Inputs: + * vrfid + * cmi->cmi_reserved + * cmi->cmi_proto + * cmi->cmi_src + * cmi->cmi_taddr + * cmi->cmi_tid + */ +int cgn_map_put(struct cgn_map *cmi, vrfid_t vrfid) +{ + struct cgn_source *src; + struct nat_pool *np; + struct apm *apm; + + assert(cmi->cmi_src); + assert(cmi->cmi_taddr); + assert(cmi->cmi_tid); + assert(cmi->cmi_reserved); + + if (!cmi->cmi_reserved) return 0; - } + + if (unlikely(!cmi->cmi_src || cmi->cmi_taddr == 0 || + cmi->cmi_tid == 0)) + return -CGN_RC_UNKWN; + + src = cmi->cmi_src; + + /* Lock the source */ + rte_spinlock_lock(&src->sr_lock); + + /* Get pool from subscriber (not policy) */ + np = cgn_source_get_pool(src); + if (unlikely(!np)) + goto unlock_end; + + /* Lookup public address in apm table */ + apm = apm_lookup(ntohl(cmi->cmi_taddr), vrfid); + if (unlikely(!apm)) + goto unlock_end; uint16_t port, block; struct apm_port_block *pb; - port = ntohs(tport); + /* Find the port-block for the given port */ + port = ntohs(cmi->cmi_tid); block = apm_block(port, apm->apm_port_start, apm->apm_port_block_sz); pb = apm->apm_blocks[block]; - assert(pb); /* Should never happen */ - if (unlikely(!pb)) { - rte_spinlock_unlock(&apm->apm_lock); - return 0; - } + if (unlikely(!pb)) + goto unlock_end; + + assert(apm_block_get_source(pb) && + apm_block_get_source(pb) == src); /* Clear bit in port-block bitmap */ - apm_block_release_port(pb, proto, port); + apm_block_release_port(pb, cmi->cmi_proto, port); /* * Can we free the port block? */ if (apm_block_get_ports_used(pb) == 0) { /* - * Delete block from source list. This spinlocks source - * structure and releases reference on source, which may cause - * the source to be destroyed. + * Lock the apm before releasing the port-block + */ + rte_spinlock_lock(&apm->apm_lock); + + /* + * Delete block from source list. This releases reference on + * source, which may cause the source to be later destroyed in + * GC. */ - rc = cgn_source_del_block(src, pb, np); - if (rc < 0) - src = NULL; + cgn_source_del_block(src, pb, np); - /* Remove block from apm's block list and rcu-free */ + /* Remove block from apm's block list, and rcu-free it */ apm_block_destroy(pb); nat_pool_incr_block_freed(np); nat_pool_decr_block_active(np); - } - if (src) - rte_atomic32_dec(&src->sr_map_active); + /* Unlock apm */ + rte_spinlock_unlock(&apm->apm_lock); + } - rte_spinlock_unlock(&apm->apm_lock); + rte_atomic32_dec(&src->sr_map_active); /* * Decrement count of current active mappings, and release reference @@ -491,5 +923,12 @@ int cgn_map_put(struct nat_pool *np, vrfid_t vrfid, int dir __unused, */ nat_pool_decr_map_active(np); + /* Reservation has been released */ + cmi->cmi_reserved = false; + +unlock_end: + /* Unlock source */ + rte_spinlock_unlock(&src->sr_lock); + return 0; } diff --git a/src/npf/cgnat/cgn_map.h b/src/npf/cgnat/cgn_map.h index 97535810..31e93cd9 100644 --- a/src/npf/cgnat/cgn_map.h +++ b/src/npf/cgnat/cgn_map.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -7,21 +7,42 @@ #ifndef _CGN_MAP_H_ #define _CGN_MAP_H_ -struct cgn_session; -struct cgn_packet; +#include "npf/nat/nat_proto.h" + struct cgn_policy; struct cgn_source; struct nat_pool; +struct apm; + +/* + * Context that is required to create or release a CGNAT mapping. + * + * The policy and subscriber pointers are only valid if there is a current + * mapping, as the mapping itself will have taken a reference on the policy + * and subscriber. + */ +struct cgn_map { + uint8_t cmi_reserved:1; /* Contains a mapping? */ + enum nat_proto cmi_proto; /* Proto mapping space */ + uint16_t cmi_oid; /* Orig source port */ + uint16_t cmi_tid; /* Translation port */ + uint32_t cmi_oaddr; /* Orig source address */ + uint32_t cmi_taddr; /* Translation port */ + struct cgn_source *cmi_src; /* Subscriber struct */ +}; /* * Addresses are in network byte-order. proto is of type enum npf_proto_idx. */ -int -cgn_map_get(struct cgn_policy *cp, vrfid_t vrfid, uint8_t proto, - uint32_t oaddr, uint32_t *taddr, uint16_t *tport, - struct cgn_source **srcp); +int cgn_map_get(struct cgn_map *cmi, struct cgn_policy *cp, vrfid_t vrfid); + +/* + * Use the mapping given in taddr and tport. Used by PCP. + */ +int cgn_map_get2(struct cgn_map *cmi, struct cgn_policy *cp, vrfid_t vrfid); + +int cgn_map_put(struct cgn_map *cmi, vrfid_t vrfid); -int cgn_map_put(struct nat_pool *np, vrfid_t vrfid, int dir, uint8_t proto, - uint32_t oaddr, uint32_t taddr, uint16_t tport); +void cgn_alloc_pool_available(struct nat_pool *np, struct apm *apm); #endif diff --git a/src/npf/cgnat/cgn_mbuf.c b/src/npf/cgnat/cgn_mbuf.c index eacaa92b..72f796c8 100644 --- a/src/npf/cgnat/cgn_mbuf.c +++ b/src/npf/cgnat/cgn_mbuf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -11,14 +11,15 @@ #include #include "in_cksum.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "if_var.h" #include "npf/npf_mbuf.h" #include "npf/nat/nat_proto.h" #include "npf/cgnat/cgn.h" -#include "npf/cgnat/cgn_errno.h" +#include "npf/cgnat/cgn_rc.h" +#include "npf/cgnat/cgn_if.h" #include "npf/cgnat/cgn_mbuf.h" /* @@ -50,7 +51,7 @@ static int cgn_decode_icmp(struct cgn_packet *cpk, void *l4) cpk->cpk_l4ports = false; cpk->cpk_info |= CPK_ICMP; cpk->cpk_cksum = ic->icmp_cksum; - cpk->cpk_hlen += sizeof(struct icmp); + cpk->cpk_l4_len = sizeof(struct icmp); switch (ic->icmp_type) { case ICMP_ECHO: @@ -136,7 +137,7 @@ cgn_parse_l4(struct rte_mbuf *m, uint l4_offset, uint8_t ipproto, cpk->cpk_l4ports = true; if (unlikely(cpk->cpk_info & CPK_ICMP_EMBD_SHORT)) { - cpk->cpk_hlen += ICMP_ERROR_MIN_L4_SIZE; + cpk->cpk_l4_len = ICMP_ERROR_MIN_L4_SIZE; break; } @@ -147,7 +148,7 @@ cgn_parse_l4(struct rte_mbuf *m, uint l4_offset, uint8_t ipproto, cpk->cpk_keepalive = false; cpk->cpk_cksum = tcp->check; - cpk->cpk_hlen += sizeof(struct tcphdr); + cpk->cpk_l4_len = sizeof(struct tcphdr); break; } case IPPROTO_DCCP: @@ -161,7 +162,7 @@ cgn_parse_l4(struct rte_mbuf *m, uint l4_offset, uint8_t ipproto, cpk->cpk_cksum = dh->dc_checksum; if (unlikely(cpk->cpk_info & CPK_ICMP_EMBD_SHORT)) { - cpk->cpk_hlen += ICMP_ERROR_MIN_L4_SIZE; + cpk->cpk_l4_len = ICMP_ERROR_MIN_L4_SIZE; break; } uint8_t type = (dh->dc_res_type_x >> 1) & 0x0f; @@ -169,7 +170,7 @@ cgn_parse_l4(struct rte_mbuf *m, uint l4_offset, uint8_t ipproto, if (type == DCCP_RESP || type == DCCP_RST) cpk->cpk_keepalive = false; - cpk->cpk_hlen += sizeof(struct cgn_dccp); + cpk->cpk_l4_len = sizeof(struct cgn_dccp); break; } case IPPROTO_UDP: @@ -182,7 +183,7 @@ cgn_parse_l4(struct rte_mbuf *m, uint l4_offset, uint8_t ipproto, cpk->cpk_did = udp->dest; cpk->cpk_l4ports = true; cpk->cpk_cksum = udp->check; - cpk->cpk_hlen += sizeof(struct udphdr); + cpk->cpk_l4_len = sizeof(struct udphdr); break; } case IPPROTO_ICMP: @@ -201,7 +202,7 @@ cgn_parse_l4(struct rte_mbuf *m, uint l4_offset, uint8_t ipproto, * Extract the fields we need from the mbuf */ int cgn_cache_all(struct rte_mbuf *m, uint l3_offset, struct ifnet *ifp, - int dir, struct cgn_packet *cpk, bool icmp_err) + enum cgn_dir dir, struct cgn_packet *cpk, bool icmp_err) { unsigned char buf[sizeof(struct iphdr)]; struct iphdr *ip; @@ -218,18 +219,21 @@ int cgn_cache_all(struct rte_mbuf *m, uint l3_offset, struct ifnet *ifp, return -CGN_BUF_ENOL3; cpk->cpk_info = 0; - cpk->cpk_ifindex = ifp->if_index; cpk->cpk_ipproto = ip->protocol; cpk->cpk_proto = nat_proto_from_ipproto(ip->protocol); cpk->cpk_vrfid = pktmbuf_get_vrf(m); - cpk->cpk_len = rte_pktmbuf_pkt_len(m) - pktmbuf_l2_len(m); + cpk->cpk_len = rte_pktmbuf_pkt_len(m) - dp_pktmbuf_l2_len(m); cpk->cpk_l3_len = ip->ihl << 2; - cpk->cpk_hlen = cpk->cpk_l3_len; + cpk->cpk_l4_len = 0; cpk->cpk_keepalive = true; + cpk->cpk_pkt_instd = true; cpk->cpk_sid = 0; cpk->cpk_did = 0; cpk->cpk_l4ports = false; cpk->cpk_cksum = 0; + cpk->cpk_ifindex = ifp->if_index; + cpk->cpk_key.k_ifindex = cgn_if_key_index(ifp); + cpk->cpk_key.k_expired = false; if (dir == CGN_DIR_IN || icmp_err) cpk->cpk_keepalive = false; @@ -242,6 +246,9 @@ int cgn_cache_all(struct rte_mbuf *m, uint l3_offset, struct ifnet *ifp, cpk->cpk_saddr = ip->saddr; cpk->cpk_daddr = ip->daddr; + /* Setup direction dependent part of hash key */ + cgn_pkt_key_init(cpk, dir); + return 0; } diff --git a/src/npf/cgnat/cgn_mbuf.h b/src/npf/cgnat/cgn_mbuf.h index 8a0b939b..fb1fbf86 100644 --- a/src/npf/cgnat/cgn_mbuf.h +++ b/src/npf/cgnat/cgn_mbuf.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -16,7 +16,9 @@ #include #include -#include "pktmbuf.h" +#include "pktmbuf_internal.h" +#include "npf/cgnat/cgn_hash_key.h" +#include "npf/nat/nat_proto.h" /* * cgn_packet - decomposition of a packet @@ -32,24 +34,51 @@ * initialised by cgn_cache_all. */ struct cgn_packet { + struct cgn_3tuple_key cpk_key; /* hash lookup key */ + + uint32_t cpk_ifindex; uint32_t cpk_info; + vrfid_t cpk_vrfid; /* VRF id */ - uint32_t cpk_ifindex; /* Interface index */ - bool cpk_keepalive; /* Can we clear idle flag? */ + uint8_t cpk_keepalive:1; /* Can we clear idle flag? */ + uint8_t cpk_pkt_instd:1; uint8_t cpk_tcp_flags; - uint8_t cpk_ipproto; /* ip protocol */ - uint8_t cpk_proto; /* tcp, udp, other enum */ + enum nat_proto cpk_proto; /* tcp, udp, other enum */ uint8_t cpk_l4ports; /* true if there are l4ports*/ + uint16_t cpk_cksum; /* l4 checksum */ - uint16_t cpk_sid; - uint16_t cpk_did; - uint32_t cpk_saddr; - uint32_t cpk_daddr; - uint32_t cpk_l3_len; /* IP header length */ - uint32_t cpk_hlen; /* l3 + l4 */ - size_t cpk_len; /* l3 + l4 + data */ + uint16_t cpk_sid; /* source port or id */ + uint16_t cpk_did; /* dest port or id */ + uint8_t cpk_pad1[2]; + + uint32_t cpk_saddr; /* source address */ + uint32_t cpk_daddr; /* destination address */ + + uint16_t cpk_l3_len; /* IP header length */ + uint16_t cpk_l4_len; /* L4 header length */ + uint32_t cpk_len; /* l3 + l4 + data */ }; +#define cpk_ipproto cpk_key.k_ipproto + +/* + * Init the direction dependent part of the hash key in the packet cache + * structure. + */ +static inline void cgn_pkt_key_init(struct cgn_packet *cpk, enum cgn_dir dir) +{ + if (dir == CGN_DIR_OUT) { + /* Hash key is source address and port */ + cpk->cpk_key.k_addr = cpk->cpk_saddr; + cpk->cpk_key.k_port = cpk->cpk_sid; + } else { + /* Hash key is destination address and port */ + cpk->cpk_key.k_addr = cpk->cpk_daddr; + cpk->cpk_key.k_port = cpk->cpk_did; + } + assert(cpk->cpk_key.k_addr != 0); +} + #define ICMP_ERROR_MIN_L4_SIZE 8 #define CPK_ICMP 0x0001 @@ -147,9 +176,9 @@ static inline void cgn_rwricmpid(char *l4_ptr, uint16_t new_id) } int cgn_cache_all(struct rte_mbuf *m, uint l3_offset, struct ifnet *ifp, - int dir, struct cgn_packet *cpk, bool icmp_err); + enum cgn_dir dir, struct cgn_packet *cpk, bool icmp_err); -void cgn_rwrcksums(struct cgn_packet *sp, void *n_ptr, +void cgn_rwrcksums(struct cgn_packet *cpk, void *n_ptr, uint16_t l3_chk_delta, uint16_t l4_chk_delta); -#endif +#endif /* _CGN_MBUF_H_ */ diff --git a/src/npf/cgnat/cgn_policy.c b/src/npf/cgnat/cgn_policy.c index ed094869..d344a795 100644 --- a/src/npf/cgnat/cgn_policy.c +++ b/src/npf/cgnat/cgn_policy.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -26,6 +26,7 @@ #include "npf/cgnat/cgn_limits.h" #include "npf/cgnat/cgn_policy.h" #include "npf/cgnat/cgn_session.h" +#include "npf/cgnat/cgn_source.h" /* @@ -45,32 +46,28 @@ struct match { const char *name; }; +static void cgn_policy_destroy(struct cgn_policy *cp, bool rcu_free); +static void cgn_policy_free_sess_rate(struct cgn_policy *cp); + /* * Record destination? i.e. create nested 2-tuple session. + * + * This is determined from either a per-policy configuration + * (cp_log_sess_all), of from an address-group of subscriber addresses and/or + * prefixes (cp_log_sess_ag). */ -bool cgn_policy_record_dest(struct cgn_policy *cp, uint32_t addr, int dir) +bool cgn_policy_record_dest(struct cgn_policy *cp, uint32_t addr) { - if (dir != CGN_DIR_OUT) - return false; - if (cp->cp_log_sess_all) return true; if (cp->cp_log_sess_ag) - return npf_addrgrp_lookup_v4(cp->cp_log_sess_ag, addr) == 0; + return npf_addrgrp_lookup_v4_by_handle( + cp->cp_log_sess_ag, addr) == 0; return false; } -/* - * Compare two policies. Returns -1, 0, or 1 is p1 is less than, equal, or - * greater than p2. - */ -int cgn_policy_cmp(struct cgn_policy *p1, struct cgn_policy *p2) -{ - return strcmp(p1->cp_name, p2->cp_name); -} - static ulong cgn_policy_hash(const char *name) { return rte_jhash(name, strlen(name), 0); @@ -131,7 +128,7 @@ static int cgn_policy_insert(struct cgn_policy *cp) &cp->cp_table_node); /* - * This should never happen as entries are only added by master thread + * This should never happen as entries are only added by main thread */ if (node != &cp->cp_table_node) return -EEXIST; @@ -168,6 +165,28 @@ struct nat_pool *cgn_policy_get_pool(struct cgn_policy *cp) return NULL; } +const char *cgn_policy_get_name(struct cgn_policy *cp) +{ + if (cp) + return cp->cp_name; + return NULL; +} + +/* + * Get the number of addresses that a cgnat policy might match, i.e. the + * number of subscribers covered by this policy. + */ +static uint32_t cgn_policy_naddrs(struct cgn_policy *cp) +{ + uint32_t naddrs = 0; + + if (cp->cp_match_ag) + naddrs += npf_addrgrp_naddrs_by_handle(AG_IPv4, + cp->cp_match_ag, true); + + return naddrs; +} + /* * Attach policy to nat pool */ @@ -175,17 +194,13 @@ static int cgn_policy_attach_pool(struct cgn_policy *cp, const char *pool_name) { struct nat_pool *np; - uint32_t naddrs; np = nat_pool_lookup(pool_name); if (!np) return -ENOENT; - naddrs = npf_prefix_to_useable_naddrs4(cp->cp_prefix_len); - /* Take reference on pool */ cp->cp_pool = nat_pool_get(np); - nat_pool_incr_nusers(np, naddrs); return 0; } @@ -193,13 +208,9 @@ cgn_policy_attach_pool(struct cgn_policy *cp, const char *pool_name) static void cgn_policy_detach_pool(struct cgn_policy *cp) { - uint32_t naddrs; - if (!cp->cp_pool) return; - naddrs = npf_prefix_to_useable_naddrs4(cp->cp_prefix_len); - nat_pool_decr_nusers(cp->cp_pool, naddrs); nat_pool_put(cp->cp_pool); cp->cp_pool = NULL; } @@ -213,9 +224,9 @@ static struct cgn_policy *cgn_policy_create(struct cgn_policy_cfg *cpc) int rc; /* - * name source prefix prefix must be configured. + * Policy name and match address-group must be configured. */ - if (!cpc->cp_name || cpc->cp_prefix == 0) + if (strlen(cpc->cp_name) == 0 || !cpc->cp_match_ag_name) return NULL; sz = sizeof(struct cgn_policy); @@ -224,12 +235,9 @@ static struct cgn_policy *cgn_policy_create(struct cgn_policy_cfg *cpc) if (!cp) return NULL; - cp->cp_name = strdup(cpc->cp_name); - + strncpy(cp->cp_name, cpc->cp_name, sizeof(cp->cp_name)); rte_atomic32_set(&cp->cp_refcnt, 0); - - cp->cp_prefix = cpc->cp_prefix; - cp->cp_prefix_len = cpc->cp_prefix_len; + cp->cp_match_ag = NULL; CDS_INIT_LIST_HEAD(&cp->cp_list_node); cp->cp_priority = cpc->cp_priority; @@ -241,56 +249,100 @@ static struct cgn_policy *cgn_policy_create(struct cgn_policy_cfg *cpc) cp->cp_log_sess_end = cpc->cp_log_sess_end; cp->cp_log_sess_periodic = cpc->cp_log_sess_periodic; cp->cp_log_subs = cpc->cp_log_subs; + cp->cp_log_sess_ag = NULL; + + CDS_INIT_LIST_HEAD(&cp->cp_sess_rate_list); + cp->cp_sess_rate_count = 0; + /* Is a log address-group specified? */ if (cpc->cp_log_sess_name) { + /* We store a pointer the address group */ cp->cp_log_sess_ag = npf_addrgrp_lookup_name(cpc->cp_log_sess_name); + + if (!cp->cp_log_sess_ag) + goto error; + + /* Take reference on ag since we are storing ptr */ + npf_addrgrp_get(cp->cp_log_sess_ag); } if (cp->cp_log_sess_all || cp->cp_map_type == CGN_MAP_EDM || cp->cp_fltr_type == CGN_FLTR_EDF) cp->cp_sess2_enabled = true; - unsigned long mask; - - mask = (0xFFFFFFFF << (32 - cp->cp_prefix_len)) & 0xFFFFFFFF; - cp->cp_mask = mask; - cp->cp_mask = htonl(cp->cp_mask); + /* Match address-group */ + if (cpc->cp_match_ag_name) { + /* We store a pointer the address group */ + cp->cp_match_ag = + npf_addrgrp_lookup_name(cpc->cp_match_ag_name); + + if (!cp->cp_match_ag) + /* Should never happen */ + goto error; + + /* + * We take reference on the match address-group *only* because + * we are storing a pointer to the address-group instead of a + * table ID. + */ + npf_addrgrp_get(cp->cp_match_ag); + } /* * Find cgnat pool. Takes a reference on the cgnat pool if found. */ rc = cgn_policy_attach_pool(cp, cpc->cp_pool_name); - if (rc < 0) { - free(cp); - return NULL; - } + if (rc < 0) + goto error; return cp; + +error: + cgn_policy_destroy(cp, false); + return NULL; +} + +static void cgn_policy_free(struct cgn_policy *cp) +{ + cgn_policy_free_sess_rate(cp); + free(cp); } static void cgn_policy_rcu_free(struct rcu_head *head) { struct cgn_policy *cp = caa_container_of(head, struct cgn_policy, cp_rcu_head); - free(cp->cp_name); - cp->cp_name = NULL; - - free(cp); + cgn_policy_free(cp); } /* * cgn_policy_destroy */ -static void cgn_policy_destroy(struct cgn_policy *cp) +static void cgn_policy_destroy(struct cgn_policy *cp, bool rcu_free) { + struct npf_addrgrp *ag; + /* * Only detach from pool when all references on the policy have been * removed. */ cgn_policy_detach_pool(cp); - call_rcu(&cp->cp_rcu_head, cgn_policy_rcu_free); + /* Release reference on match address-group */ + ag = rcu_xchg_pointer(&cp->cp_match_ag, NULL); + if (ag) + npf_addrgrp_put(ag); + + /* Release reference on session lof address-group */ + ag = rcu_xchg_pointer(&cp->cp_log_sess_ag, NULL); + if (ag) + npf_addrgrp_put(ag); + + if (rcu_free) + call_rcu(&cp->cp_rcu_head, cgn_policy_rcu_free); + else + cgn_policy_free(cp); } /* @@ -308,7 +360,7 @@ struct cgn_policy *cgn_policy_get(struct cgn_policy *cp) void cgn_policy_put(struct cgn_policy *cp) { if (cp && rte_atomic32_dec_and_test(&cp->cp_refcnt)) - cgn_policy_destroy(cp); + cgn_policy_destroy(cp, true); } void cgn_policy_inc_source_count(struct cgn_policy *cp) @@ -330,7 +382,9 @@ void cgn_policy_dec_source_count(struct cgn_policy *cp) void cgn_policy_update_stats(struct cgn_policy *cp, uint64_t pkts_out, uint64_t bytes_out, uint64_t pkts_in, uint64_t bytes_in, - uint64_t sess_created, uint64_t sess_destroyed) + uint64_t unk_pkts_in, + uint64_t sess_created, uint64_t sess_destroyed, + uint64_t sess2_created, uint64_t sess2_destroyed) { if (!cp) return; @@ -339,16 +393,174 @@ void cgn_policy_update_stats(struct cgn_policy *cp, cp->cp_bytes[CGN_DIR_OUT] += bytes_out; cp->cp_pkts[CGN_DIR_IN] += pkts_in; cp->cp_bytes[CGN_DIR_IN] += bytes_in; + cp->cp_unk_pkts_in += unk_pkts_in; cp->cp_sess_created += sess_created; cp->cp_sess_destroyed += sess_destroyed; + cp->cp_sess2_created += sess2_created; + cp->cp_sess2_destroyed += sess2_destroyed; +} + +/* + * Create a new subscriber max session rate entry + */ +static struct cgn_policy_sess_rate * +cgn_policy_sess_rate_create(uint32_t subs_addr, uint32_t sess_rate_max, + uint64_t sess_rate_max_time) +{ + struct cgn_policy_sess_rate *new; + struct cds_list_head *node; + + new = malloc(sizeof(*new)); + if (!new) + return NULL; + + node = &new->ps_list_node; + CDS_INIT_LIST_HEAD(node); + new->ps_subs_addr = subs_addr; + new->ps_sess_rate_max = sess_rate_max; + new->ps_sess_rate_max_time = sess_rate_max_time; + + return new; +} + +/* + * Update the list of subscribers with the highest 1 minute average session + * rates + */ +void cgn_policy_update_sess_rate(struct cgn_policy *cp, + uint32_t subs_addr, + uint32_t sess_rate_max, + uint64_t sess_rate_max_time) +{ + struct cgn_policy_sess_rate *cur, *tail, *new = NULL; + + if (!cp) + return; + + /* + * If the list is full *and* sess_rate_max is less than the last value + * in the list then there is nothing to do. + */ + if (cp->cp_sess_rate_count >= CGN_POLICY_SESS_RATE_MAX) { + tail = caa_container_of(cp->cp_sess_rate_list.prev, + struct cgn_policy_sess_rate, + ps_list_node); + if (sess_rate_max < tail->ps_sess_rate_max) + return; + } + + struct cds_list_head *node, *next, *new_node; + + /* + * Iterate through list looking for correct place to insert + */ + cds_list_for_each_safe(node, next, &cp->cp_sess_rate_list) { + cur = caa_container_of(node, struct cgn_policy_sess_rate, + ps_list_node); + + /* + * Insert before 'cur' if rates are greater or equal + */ + if (!new && sess_rate_max >= cur->ps_sess_rate_max) { + + /* Are we updating the same subscriber? */ + if (subs_addr == cur->ps_subs_addr) { + cur->ps_sess_rate_max = sess_rate_max; + cur->ps_sess_rate_max_time = sess_rate_max_time; + return; + } + + /* Insert a new node before current node */ + new = cgn_policy_sess_rate_create(subs_addr, + sess_rate_max, + sess_rate_max_time); + if (!new) + return; + + new_node = &new->ps_list_node; + new_node->next = node; + new_node->prev = node->prev; + node->prev = new_node; + new_node->prev->next = new_node; + cp->cp_sess_rate_count++; + + /* + * Calling 'continue' here means the next 'cur' will + * be the list node *after* the one we have just + * inserted (since we are using the 'safe' form of the + * loop). + */ + continue; + } + + /* + * If we have already added a new node, then check if there + * already is an entry for this subscriber lower down in the + * list. + */ + if (new && new->ps_subs_addr == cur->ps_subs_addr) { + cds_list_del(&cur->ps_list_node); + free(cur); + cp->cp_sess_rate_count--; + return; + } + } + + /* + * If a new node was added, and we have exceeded the max then delete + * the last node in list + */ + if (new && cp->cp_sess_rate_count > CGN_POLICY_SESS_RATE_MAX) { + node = cp->cp_sess_rate_list.prev; + tail = caa_container_of(node, struct cgn_policy_sess_rate, + ps_list_node); + + cds_list_del(node); + free(tail); + cp->cp_sess_rate_count--; + } + + /* + * If a new node was *not* added, and there is space at the end of the + * list, then create and add a new node. + */ + if (!new && cp->cp_sess_rate_count < CGN_POLICY_SESS_RATE_MAX) { + /* Insert new entry at tail */ + new = cgn_policy_sess_rate_create(subs_addr, + sess_rate_max, + sess_rate_max_time); + if (!new) + return; + + cds_list_add_tail(&new->ps_list_node, &cp->cp_sess_rate_list); + cp->cp_sess_rate_count++; + } +} + +/* + * Free session rate list + */ +static void cgn_policy_free_sess_rate(struct cgn_policy *cp) +{ + struct cgn_policy_sess_rate *node, *next; + + cds_list_for_each_entry_safe(node, next, &cp->cp_sess_rate_list, + ps_list_node) { + cds_list_del(&node->ps_list_node); + cp->cp_sess_rate_count--; + free(node); + } } struct cgn_policy_stats { uint64_t ps_sess_created; uint64_t ps_sess_destroyed; + uint64_t ps_sess2_created; + uint64_t ps_sess2_destroyed; uint64_t ps_pkts[CGN_DIR_SZ]; uint64_t ps_bytes[CGN_DIR_SZ]; + uint64_t ps_unk_pkts_in; }; /* @@ -357,20 +569,23 @@ struct cgn_policy_stats { static void cgn_policy_jsonw_summary_cb(struct ifnet *ifp, void *arg) { struct cgn_policy_stats *ps = arg; + struct cds_list_head *policy_list; struct cgn_policy *cp; - struct cgn_intf *ci; - ci = npf_if_get_cgn(ifp); - if (!ci) + policy_list = cgn_if_get_policy_list(ifp); + if (!policy_list) return; - cds_list_for_each_entry(cp, &ci->ci_policy_list, cp_list_node) { + cds_list_for_each_entry(cp, policy_list, cp_list_node) { ps->ps_sess_created += cp->cp_sess_created; ps->ps_sess_destroyed += cp->cp_sess_destroyed; + ps->ps_sess2_created += cp->cp_sess2_created; + ps->ps_sess2_destroyed += cp->cp_sess2_destroyed; ps->ps_pkts[CGN_DIR_OUT] += cp->cp_pkts[CGN_DIR_OUT]; ps->ps_bytes[CGN_DIR_OUT] += cp->cp_bytes[CGN_DIR_OUT]; ps->ps_pkts[CGN_DIR_IN] += cp->cp_pkts[CGN_DIR_IN]; ps->ps_bytes[CGN_DIR_IN] += cp->cp_bytes[CGN_DIR_IN]; + ps->ps_unk_pkts_in += cp->cp_unk_pkts_in; } } @@ -382,14 +597,17 @@ void cgn_policy_jsonw_summary(json_writer_t *json) struct cgn_policy_stats ps = {0}; /* For each interface */ - ifnet_walk(cgn_policy_jsonw_summary_cb, &ps); + dp_ifnet_walk(cgn_policy_jsonw_summary_cb, &ps); jsonw_uint_field(json, "sess_created", ps.ps_sess_created); jsonw_uint_field(json, "sess_destroyed", ps.ps_sess_destroyed); + jsonw_uint_field(json, "sess2_created", ps.ps_sess2_created); + jsonw_uint_field(json, "sess2_destroyed", ps.ps_sess2_destroyed); jsonw_uint_field(json, "pkts_out", ps.ps_pkts[CGN_DIR_OUT]); jsonw_uint_field(json, "bytes_out", ps.ps_bytes[CGN_DIR_OUT]); jsonw_uint_field(json, "pkts_in", ps.ps_pkts[CGN_DIR_IN]); jsonw_uint_field(json, "bytes_in", ps.ps_bytes[CGN_DIR_IN]); + jsonw_uint_field(json, "unk_pkts_in", ps.ps_unk_pkts_in); } @@ -403,26 +621,28 @@ struct cgn_policy_show_ctx { static void cgn_policy_jsonw_one(json_writer_t *json, struct cgn_policy *cp) { - char ad_str[16], pfx_str[24]; - uint32_t naddrs; + char ad_str[16]; + const char *name; + struct ifnet *ifp; - inet_ntop(AF_INET, &cp->cp_prefix, - ad_str, sizeof(ad_str)); - snprintf(pfx_str, 24, "%s/%u", ad_str, cp->cp_prefix_len); - naddrs = npf_prefix_to_useable_naddrs4(cp->cp_prefix_len); + ifp = cgn_if_get_ifp(cp->cp_ci); jsonw_start_object(json); jsonw_string_field(json, "name", cp->cp_name); - jsonw_string_field(json, "prefix", pfx_str); - if (cp->cp_ci && cp->cp_ci->ci_ifp) - jsonw_string_field(json, "interface", - cp->cp_ci->ci_ifp->if_name); + + name = npf_addrgrp_handle2name(cp->cp_match_ag); + jsonw_string_field(json, "match_group", + name ? name : "(unknown)"); + + if (ifp) + jsonw_string_field(json, "interface", ifp->if_name); else jsonw_string_field(json, "interface", ""); + jsonw_uint_field(json, "priority", cp->cp_priority); - jsonw_uint_field(json, "naddrs", naddrs); + jsonw_uint_field(json, "naddrs", cgn_policy_naddrs(cp)); if (cp->cp_pool) jsonw_string_field(json, "pool", nat_pool_name(cp->cp_pool)); else @@ -435,6 +655,8 @@ cgn_policy_jsonw_one(json_writer_t *json, struct cgn_policy *cp) jsonw_uint_field(json, "sess_created", cp->cp_sess_created); jsonw_uint_field(json, "sess_destroyed", cp->cp_sess_destroyed); + jsonw_uint_field(json, "sess2_created", cp->cp_sess2_created); + jsonw_uint_field(json, "sess2_destroyed", cp->cp_sess2_destroyed); jsonw_uint_field(json, "out_pkts", cp->cp_pkts[CGN_DIR_OUT]); jsonw_uint_field(json, "out_bytes", cp->cp_bytes[CGN_DIR_OUT]); @@ -442,17 +664,52 @@ cgn_policy_jsonw_one(json_writer_t *json, struct cgn_policy *cp) jsonw_uint_field(json, "in_pkts", cp->cp_pkts[CGN_DIR_IN]); jsonw_uint_field(json, "in_bytes", cp->cp_bytes[CGN_DIR_IN]); + jsonw_uint_field(json, "unk_pkts_in", cp->cp_unk_pkts_in); + + jsonw_bool_field(json, "snat_alg_bypass", cgn_snat_alg_bypass_gbl); + jsonw_bool_field(json, "record_dest", cp->cp_sess2_enabled); jsonw_bool_field(json, "log_sess_all", cp->cp_log_sess_all); - char *lg_name = npf_addrgrp_handle2name(cp->cp_log_sess_ag); - if (lg_name) - jsonw_string_field(json, "log_sess_group", lg_name); + name = npf_addrgrp_handle2name(cp->cp_log_sess_ag); + if (name) + jsonw_string_field(json, "log_sess_group", name); jsonw_bool_field(json, "log_sess_start", cp->cp_log_sess_start); jsonw_bool_field(json, "log_sess_end", cp->cp_log_sess_end); jsonw_uint_field(json, "log_sess_periodic", cp->cp_log_sess_periodic); + /* List of subscribers with highest 1 minute session rates */ + jsonw_name(json, "subs_sess_rates"); + jsonw_start_array(json); + + struct cgn_policy_sess_rate *node; + uint i = 0; + + cds_list_for_each_entry(node, &cp->cp_sess_rate_list, ps_list_node) { + jsonw_start_object(json); + + uint32_t addr = htonl(node->ps_subs_addr); + inet_ntop(AF_INET, &addr, ad_str, sizeof(ad_str)); + jsonw_string_field(json, "subscriber", ad_str); + jsonw_uint_field(json, "max_sess_rate", node->ps_sess_rate_max); + jsonw_uint_field( + json, "time", node->ps_sess_rate_max_time); + + jsonw_end_object(json); + i++; + } + + /* Fill empty slots something */ + for (; i < CGN_POLICY_SESS_RATE_MAX; i++) { + jsonw_start_object(json); + jsonw_string_field(json, "subscriber", "None"); + jsonw_uint_field(json, "max_sess_rate", 0); + jsonw_uint_field(json, "time", 0); + jsonw_end_object(json); + } + jsonw_end_array(json); /* subs_sess_rates array */ + jsonw_end_object(json); } @@ -462,18 +719,35 @@ cgn_policy_jsonw_one(json_writer_t *json, struct cgn_policy *cp) static void cgn_policy_jsonw_intf(struct ifnet *ifp, void *arg) { struct cgn_policy_show_ctx *ctx = arg; + struct cds_list_head *policy_list; struct cgn_policy *cp; - struct cgn_intf *ci; - ci = npf_if_get_cgn(ifp); - if (!ci) + policy_list = cgn_if_get_policy_list(ifp); + if (!policy_list) return; - cds_list_for_each_entry(cp, &ci->ci_policy_list, cp_list_node) { + cds_list_for_each_entry(cp, policy_list, cp_list_node) { cgn_policy_jsonw_one(ctx->json, cp); } } +/* + * Show policies that are not attached to an interface + */ +static void cgn_policy_jsonw_unattached(json_writer_t *json) +{ + struct cds_lfht_iter iter; + struct cgn_policy *cp; + + if (!cgn_policy_ht) + return; + + cds_lfht_for_each_entry(cgn_policy_ht, &iter, cp, cp_table_node) { + if (cp->cp_ci == NULL) + cgn_policy_jsonw_one(json, cp); + } +} + /* * cgn_policy_jsonw */ @@ -496,8 +770,11 @@ cgn_policy_jsonw(FILE *f, char *name) if (cp) cgn_policy_jsonw_one(ctx.json, cp); } else { - /* For each interface */ - ifnet_walk(cgn_policy_jsonw_intf, &ctx); + /* Show policies attached to interfaces first */ + dp_ifnet_walk(cgn_policy_jsonw_intf, &ctx); + + /* Show unattached policies */ + cgn_policy_jsonw_unattached(ctx.json); } jsonw_end_array(ctx.json); @@ -521,36 +798,52 @@ void cgn_policy_show(FILE *f, int argc __unused, char **argv __unused) cgn_policy_jsonw(f, name); } -static int -cgn_policy_cfg_parse_src(char *value, struct cgn_policy_cfg *cgn) +/* + * cgn-op clear policy statistics + */ +void cgn_policy_clear(int argc, char **argv) { - npf_netmask_t prefix_len = 0; - npf_addr_t src_addr; - sa_family_t fam; - bool negate; + struct cgn_policy *cp; - int rc = npf_parse_ip_addr(value, &fam, &src_addr, - &prefix_len, &negate); - if (rc) - return -1; + /* Remove "cgn-op clear policy" */ + argc -= 3; + argv += 3; + + if (argc < 2) + return; - if (prefix_len == NPF_NO_NETMASK) - prefix_len = 32; + cp = cgn_policy_lookup(argv[0]); + if (!cp) + return; - memcpy(&cgn->cp_prefix, &src_addr, 4); - cgn->cp_prefix_len = prefix_len; + if (!strcmp(argv[1], "statistics")) { + cp->cp_pkts[CGN_DIR_OUT] = 0UL; + cp->cp_bytes[CGN_DIR_OUT] = 0UL; + cp->cp_pkts[CGN_DIR_IN] = 0UL; + cp->cp_bytes[CGN_DIR_IN] = 0UL; + cp->cp_unk_pkts_in = 0UL; - return 0; + cgn_policy_free_sess_rate(cp); + } } static int -cgn_policy_cfg_parse_pool(char *value, struct cgn_policy_cfg *cgn) +cgn_policy_cfg_parse_pool(const char *value, struct cgn_policy_cfg *cgn) { cgn->cp_pool_name = value; return 0; } +/* Match address-group name */ +static int +cgn_policy_cfg_parse_match(const char *value, struct cgn_policy_cfg *cgn) +{ + cgn->cp_match_ag_name = value; + + return 0; +} + /* * map-type=eim * map-type=edm @@ -652,7 +945,7 @@ cgn_policy_cfg_parse_priority(char *value, struct cgn_policy_cfg *cfg) * * cgn policy add POLICY1 pri=10 src-addr=100.64.0.0/12 pool=POOL1 */ -int cgn_policy_cfg_add(FILE *f, int argc, char **argv) +int cgn_policy_cfg_add(FILE * f __unused, int argc, char **argv) { struct cgn_policy *cp; const char *name; @@ -672,28 +965,37 @@ int cgn_policy_cfg_add(FILE *f, int argc, char **argv) struct cgn_policy_cfg cfg; if (cp) { - cfg.cp_name = cp->cp_name; + /* Copy name string from existing policy */ + strcpy(cfg.cp_name, cp->cp_name); + cfg.cp_priority = cp->cp_priority; - cfg.cp_prefix = cp->cp_prefix; - cfg.cp_prefix_len = cp->cp_prefix_len; + + cfg.cp_match_ag_name = + npf_addrgrp_handle2name(cp->cp_match_ag); + cfg.cp_pool_name = nat_pool_name(cp->cp_pool); cfg.cp_map_type = cp->cp_map_type; cfg.cp_fltr_type = cp->cp_fltr_type; cfg.cp_trans_type = cp->cp_trans_type; cfg.cp_log_sess_all = cp->cp_log_sess_all; - cfg.cp_log_sess_name = NULL; - if (cp->cp_log_sess_ag) - cfg.cp_log_sess_name = - npf_addrgrp_handle2name(cp->cp_log_sess_ag); + + cfg.cp_log_sess_name = + npf_addrgrp_handle2name(cp->cp_log_sess_ag); + cfg.cp_log_sess_start = cp->cp_log_sess_start; cfg.cp_log_sess_end = cp->cp_log_sess_end; cfg.cp_log_sess_periodic = cp->cp_log_sess_periodic; cfg.cp_log_subs = cp->cp_log_subs; } else { - cfg.cp_name = name; + /* + * We are copying name string from argv, so ensure it is NULL + * terminated + */ + strncpy(cfg.cp_name, name, sizeof(cfg.cp_name)); + cfg.cp_name[NAT_POLICY_NAME_MAX - 1] = '\0'; + cfg.cp_priority = 0; - cfg.cp_prefix = 0; - cfg.cp_prefix_len = 0; + cfg.cp_match_ag_name = NULL; cfg.cp_pool_name = NULL; cfg.cp_map_type = CGN_MAP_EIM; cfg.cp_fltr_type = CGN_FLTR_EIF; @@ -719,14 +1021,14 @@ int cgn_policy_cfg_add(FILE *f, int argc, char **argv) value = c + 1; rc = 0; - /* Source prefix */ - if (!strcmp(item, "src-addr")) { - rc = cgn_policy_cfg_parse_src(value, &cfg); - /* Pool name */ - } else if (!strcmp(item, "pool")) { + if (!strcmp(item, "pool")) { rc = cgn_policy_cfg_parse_pool(value, &cfg); + /* Match address-group */ + } else if (!strcmp(item, "match-ag")) { + rc = cgn_policy_cfg_parse_match(value, &cfg); + /* Priority */ } else if (!strcmp(item, "priority")) { rc = cgn_policy_cfg_parse_priority(value, &cfg); @@ -747,14 +1049,15 @@ int cgn_policy_cfg_add(FILE *f, int argc, char **argv) } else if (!strcmp(item, "trans-type")) { rc = cgn_policy_cfg_parse_trans(value, &cfg); + } if (rc < 0) - goto usage; + goto err_out; } if (cfg.cp_priority < 1 || cfg.cp_priority > 9999) - goto usage; + goto err_out; if (!cp) { cp = cgn_policy_create(&cfg); @@ -765,6 +1068,9 @@ int cgn_policy_cfg_add(FILE *f, int argc, char **argv) rc = cgn_policy_insert(cp); if (rc < 0) goto err_out; + + /* Inform source database that a new policy has been added */ + cgn_source_policy_added(cp); } else { /* Update existing policy */ @@ -778,15 +1084,16 @@ int cgn_policy_cfg_add(FILE *f, int argc, char **argv) cgn_policy_detach_pool(cp); cgn_policy_attach_pool(cp, cfg.cp_pool_name); } - uint32_t mask; - mask = (0xFFFFFFFF << (32 - cfg.cp_prefix_len)) & 0xFFFFFFFF; - mask = htonl(mask); + /* + * Has the match address-group changed? + */ + name = npf_addrgrp_handle2name(cp->cp_match_ag); + + npf_addrgrp_update_handle(name, cfg.cp_match_ag_name, + &cp->cp_match_ag); cp->cp_priority = cfg.cp_priority; - cp->cp_prefix = cfg.cp_prefix; - cp->cp_prefix_len = cfg.cp_prefix_len; - cp->cp_mask = mask; cp->cp_map_type = cfg.cp_map_type; cp->cp_fltr_type = cfg.cp_fltr_type; @@ -797,11 +1104,13 @@ int cgn_policy_cfg_add(FILE *f, int argc, char **argv) cp->cp_log_sess_periodic = cfg.cp_log_sess_periodic; cp->cp_log_subs = cfg.cp_log_subs; - if (cfg.cp_log_sess_name) - cp->cp_log_sess_ag = - npf_addrgrp_lookup_name(cfg.cp_log_sess_name); - else - cp->cp_log_sess_ag = NULL; + /* + * Has the session log address-group changed? + */ + name = npf_addrgrp_handle2name(cp->cp_log_sess_ag); + + npf_addrgrp_update_handle(name, cfg.cp_log_sess_name, + &cp->cp_log_sess_ag); if (cp->cp_log_sess_all || cp->cp_map_type == CGN_MAP_EDM || @@ -811,10 +1120,6 @@ int cgn_policy_cfg_add(FILE *f, int argc, char **argv) return 0; -usage: - if (f) - fprintf(f, "%s: policy add pri= " - "src-addr= pool=", __func__); err_out: return -1; } @@ -849,18 +1154,54 @@ int cgn_policy_cfg_delete(FILE *f __unused, int argc, char **argv) /* * The interface that this policy is attached to is going away. */ -void cgn_policy_if_index_unset(struct ifnet *ifp, struct cgn_policy *cp) +void cgn_policy_if_disable(struct ifnet *ifp) { - /* Clear sessions related to this policy */ - cgn_session_expire_policy(true, cp); + struct cds_list_head *policy_list; + struct cgn_policy *cp, *tmp; - /* Remove policy from cgn interface list */ - cgn_if_del_policy(ifp, cp); + /* Get cgnat policy list from interface */ + policy_list = cgn_if_get_policy_list(ifp); + if (!policy_list) + return; - /* Remove from table and release reference. */ - cgn_policy_delete(cp); + cds_list_for_each_entry_safe(cp, tmp, policy_list, cp_list_node) { + /* Clear sessions related to this policy */ + cgn_session_expire_policy(true, cp); + + /* Remove policy from cgn interface list */ + cgn_if_del_policy(ifp, cp); + + /* Remove from hash table and release reference. */ + cgn_policy_delete(cp); + } } +/* + * Return the number of CGNAT policies and subscriber addresses using this NAT + * pool. + */ +static void cgn_np_client_counts(struct nat_pool *np, uint32_t *nusers, + uint64_t *naddrs) +{ + struct cds_lfht_iter iter; + struct cgn_policy *cp; + + if (!cgn_policy_ht) + return; + + cds_lfht_for_each_entry(cgn_policy_ht, &iter, cp, cp_table_node) { + if (cp->cp_pool == np) { + *nusers += 1; + *naddrs += cgn_policy_naddrs(cp); + } + } +} + +/* NAT pool client api handlers */ +static const struct np_client_ops cgn_np_client_ops = { + .np_client_counts = cgn_np_client_counts, +}; + /* * One-time initialization. Called from cgn_init. */ @@ -872,6 +1213,8 @@ void cgn_policy_init(void) cgn_policy_ht = cds_lfht_new(CP_HT_INIT, CP_HT_MIN, CP_HT_MAX, CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING, NULL); + + nat_pool_client_register(&cgn_np_client_ops); } /* @@ -882,5 +1225,7 @@ void cgn_policy_uninit(void) if (cgn_policy_ht) { dp_ht_destroy_deferred(cgn_policy_ht); cgn_policy_ht = NULL; + + nat_pool_client_unregister(&cgn_np_client_ops); } } diff --git a/src/npf/cgnat/cgn_policy.h b/src/npf/cgnat/cgn_policy.h index bb26b799..8f060644 100644 --- a/src/npf/cgnat/cgn_policy.h +++ b/src/npf/cgnat/cgn_policy.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -13,6 +13,21 @@ struct cgn_policy; struct cgn_session; struct nat_pool; +/* Max length of names, enforced by config, is 42 */ +#define NAT_POLICY_NAME_MAX 43 + +/* + * Subscriber session rates + */ +struct cgn_policy_sess_rate { + struct cds_list_head ps_list_node; + uint32_t ps_subs_addr; + uint32_t ps_sess_rate_max; + uint64_t ps_sess_rate_max_time; +}; + +#define CGN_POLICY_SESS_RATE_MAX 5 + /* cgm mapping type */ enum cgn_map_type { CGN_MAP_EIM, /* Endpoint independent mapping */ @@ -33,12 +48,11 @@ enum cgn_trans_type { struct cgn_policy_cfg { /* Identity */ - const char *cp_name; + char cp_name[NAT_POLICY_NAME_MAX]; uint cp_priority; /* Match config */ - uint32_t cp_prefix; - uint8_t cp_prefix_len; + const char *cp_match_ag_name; /* addr grp name */ /* Translation config */ const char *cp_pool_name; @@ -48,7 +62,7 @@ struct cgn_policy_cfg { /* Config to log 5-tuple sessions. true or false. */ uint8_t cp_log_sess_all; - char *cp_log_sess_name; /* addr grp name */ + const char *cp_log_sess_name; /* addr grp name */ uint8_t cp_log_sess_start; uint8_t cp_log_sess_end; uint16_t cp_log_sess_periodic; @@ -63,61 +77,77 @@ struct cgn_policy_cfg { * Multiple cgnat policies may reference the same cgnat pool. */ struct cgn_policy { - struct cds_lfht_node cp_table_node; - struct cds_list_head cp_list_node; + struct cds_list_head cp_list_node; /* Intf list node */ + struct nat_pool *cp_pool; /* Public address pool */ - /* Policy identity */ - char *cp_name; - uint cp_priority; + struct npf_addrgrp *cp_match_ag; /* Match addess-group */ - /* Match config */ - uint32_t cp_prefix; - uint32_t cp_mask; - uint8_t cp_prefix_len; + rte_atomic32_t cp_refcnt; + rte_atomic32_t cp_source_count; - /* Translation config */ - struct nat_pool *cp_pool; /* public address pool */ enum cgn_map_type cp_map_type; /* EIM or EDM */ enum cgn_fltr_type cp_fltr_type; /* EIF or EDF */ enum cgn_trans_type cp_trans_type; /* dynamic or deterministic */ - struct cgn_intf *cp_ci; /* Back ptr to interface */ - - /* Config to log 5-tuple sessions. true or false. */ - uint8_t cp_log_sess_all; - struct npf_addrgrp *cp_log_sess_ag; + uint8_t cp_log_subs; /* Log subs start/end */ uint8_t cp_log_sess_start; uint8_t cp_log_sess_end; - uint16_t cp_log_sess_periodic; - /* Log subscriber start and end */ - uint8_t cp_log_subs; - - /* Control for nested 2-tuple sessions. */ - uint8_t cp_sess2_enabled; + uint16_t cp_log_sess_periodic; + uint8_t cp_log_sess_all; + uint8_t cp_pad2[5]; - struct rcu_head cp_rcu_head; - rte_atomic32_t cp_refcnt; - rte_atomic32_t cp_source_count; + /* --- cacheline 1 boundary (64 bytes) --- */ uint64_t cp_sess_created; uint64_t cp_sess_destroyed; + uint64_t cp_sess2_created; + uint64_t cp_sess2_destroyed; uint64_t cp_pkts[CGN_DIR_SZ]; uint64_t cp_bytes[CGN_DIR_SZ]; + + /* --- cacheline 2 boundary (128 bytes) --- */ + + /* List of subscribers with highest 1 minute session rates */ + struct cds_list_head cp_sess_rate_list; + uint cp_sess_rate_count; + + uint cp_priority; + struct cds_lfht_node cp_table_node; + struct rcu_head cp_rcu_head; + struct cgn_intf *cp_ci; + + /* --- cacheline 3 boundary (192 bytes) --- */ + + char cp_name[NAT_POLICY_NAME_MAX]; + struct npf_addrgrp *cp_log_sess_ag; + uint8_t cp_sess2_enabled; + uint8_t cs_pad5[4]; + uint64_t cp_unk_pkts_in; + uint64_t cp_unk_pkts_in_tot; + }; -bool cgn_policy_record_dest(struct cgn_policy *cp, uint32_t addr, int dir); +static_assert(offsetof(struct cgn_policy, cp_sess_created) == 64, + "first cache line exceeded"); +static_assert(offsetof(struct cgn_policy, cp_sess_rate_list) == 128, + "second cache line exceeded"); +static_assert(offsetof(struct cgn_policy, cp_name) == 192, + "third cache line exceeded"); + +bool cgn_policy_record_dest(struct cgn_policy *cp, uint32_t addr); void cgn_policy_update_stats(struct cgn_policy *cp, uint64_t pkts_out, uint64_t bytes_out, uint64_t pkts_in, uint64_t bytes_in, - uint64_t sess_created, uint64_t sess_destroyed); + uint64_t unk_pkts_in, + uint64_t sess_created, uint64_t sess_destroyed, + uint64_t sess2_created, uint64_t sess2_destroyed); -/* - * Compare two policies. Returns -1, 0, or 1 is p1 is less than, equal, or - * greater than p2. - */ -int cgn_policy_cmp(struct cgn_policy *p1, struct cgn_policy *p2); +void cgn_policy_update_sess_rate(struct cgn_policy *cp, + uint32_t subs_addr, + uint32_t sess_rate_max, + uint64_t sess_rate_max_time); struct cgn_policy *cgn_policy_lookup(const char *name); struct cgn_policy *cgn_policy_get(struct cgn_policy *cp); @@ -127,16 +157,18 @@ void cgn_policy_inc_source_count(struct cgn_policy *cp); void cgn_policy_dec_source_count(struct cgn_policy *cp); struct nat_pool *cgn_policy_get_pool(struct cgn_policy *cp); +const char *cgn_policy_get_name(struct cgn_policy *cp); void cgn_policy_stats_sess_created(struct cgn_policy *cp); void cgn_policy_stats_sess_destroyed(struct cgn_policy *cp); int cgn_policy_cfg_add(FILE *f, int argc, char **argv); int cgn_policy_cfg_delete(FILE *f, int argc, char **argv); -void cgn_policy_if_index_unset(struct ifnet *ifp, struct cgn_policy *cp); +void cgn_policy_if_disable(struct ifnet *ifp); void cgn_policy_jsonw_summary(json_writer_t *json); void cgn_policy_show(FILE *f, int argc, char **argv); +void cgn_policy_clear(int argc, char **argv); void cgn_policy_init(void); void cgn_policy_uninit(void); diff --git a/src/npf/cgnat/cgn_public.h b/src/npf/cgnat/cgn_public.h new file mode 100644 index 00000000..9ec7196b --- /dev/null +++ b/src/npf/cgnat/cgn_public.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef _CGN_PUBLIC_H_ +#define _CGN_PUBLIC_H_ + +#include +#include +#include + +struct ifnet; +struct rte_mbuf; + +/* + * May be called by icmp_do_error if pkt meta data indicates a CGNAT packet. + */ +struct rte_mbuf *cgn_copy_or_clone_and_undo(struct rte_mbuf *mbuf, + const struct ifnet *in_ifp, + const struct ifnet *out_if, + bool copy); + +#endif diff --git a/src/npf/cgnat/cgn_rc.c b/src/npf/cgnat/cgn_rc.c new file mode 100644 index 00000000..6051e697 --- /dev/null +++ b/src/npf/cgnat/cgn_rc.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include +#include + +#include "compiler.h" +#include "util.h" + +#include "npf/cgnat/cgn_rc.h" + +/* Return code and error counters. */ +struct cgn_rc_t *cgn_rc; + +uint64_t cgn_rc_read(enum cgn_dir dir, enum cgn_rc_en rc) +{ + uint64_t sum; + uint i; + + if (rc >= CGN_RC_SZ || dir >= CGN_DIR_SZ || !cgn_rc) + return 0UL; + + sum = 0UL; + FOREACH_DP_LCORE(i) + sum += cgn_rc[i].dir[dir].count[rc]; + + return sum; +} + +void cgn_rc_clear(enum cgn_dir dir, enum cgn_rc_en rc) +{ + uint i; + + if (rc >= CGN_RC_SZ || dir >= CGN_DIR_SZ || !cgn_rc) + return; + + FOREACH_DP_LCORE(i) + cgn_rc[i].dir[dir].count[rc] = 0UL; +} + +/* + * Init cgnat global per-core return code counters + */ +void cgn_rc_init(void) +{ + if (cgn_rc) + return; + + cgn_rc = zmalloc_aligned((get_lcore_max() + 1) * sizeof(*cgn_rc)); +} + +void cgn_rc_uninit(void) +{ + free(cgn_rc); + cgn_rc = NULL; +} diff --git a/src/npf/cgnat/cgn_errno.h b/src/npf/cgnat/cgn_rc.h similarity index 61% rename from src/npf/cgnat/cgn_errno.h rename to src/npf/cgnat/cgn_rc.h index 4965963a..de7e286b 100644 --- a/src/npf/cgnat/cgn_errno.h +++ b/src/npf/cgnat/cgn_rc.h @@ -1,17 +1,18 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ /* - * CGNAT error numbers. We try to account for every packet disposition in + * CGNAT return codes. We try to account for every packet disposition in * both directions. */ -#ifndef _CGN_ERRNO_H_ -#define _CGN_ERRNO_H_ +#ifndef _CGN_RC_H_ +#define _CGN_RC_H_ +#include "npf/cgnat/cgn.h" /* * cgnat error numbers @@ -23,14 +24,16 @@ * Any error or exception starting "CGN_S2_" do not prevent translation. They * just mean a nested session was not created and activated. */ -enum cgn_errno { - CGN_OK = 0, +enum cgn_rc_en { + CGN_RC_OK = 0, /* * Operational or config decisions */ CGN_PCY_ENOENT, /* Src did not match policy */ CGN_SESS_ENOENT, /* Inbound pkt did not match a session */ + CGN_POOL_ENOENT, /* Inbound pkt not addressed to pool addr */ + CGN_PCY_BYPASS, /* SNAT-alg pkts bypassed CGNAT */ /* * Packet buffer exceptions @@ -63,7 +66,6 @@ enum cgn_errno { */ CGN_S1_EEXIST, /* Lost race to insert sentry in table */ CGN_S2_EEXIST, /* Lost race to insert sentry in table */ - CGN_APM_ENOENT, /* apm destroyed while waiting for lock */ CGN_SRC_ENOENT, /* src destroyed while waiting for lock */ /* @@ -72,31 +74,76 @@ enum cgn_errno { CGN_BUF_ENOL3, /* IP header not available */ CGN_BUF_ENOL4, /* L4 header not available */ CGN_BUF_ENOMEM, /* Prep for hdr change failed */ - CGN_BUF_ENOSPC, /* Cannot advance beyond end of buffer */ /* * Other + * + * Note that CGN_RC_UNKWN must be the first in the 'Other' section as + * the op commands use this to mark the start of this section. */ - CGN_ERR_UNKWN, /* Unknown error */ + CGN_RC_UNKWN, /* Unknown return code */ + CGN_HAIRPINNED, /* Packets hairpinned */ + CGN_ICMP_ECHOREQ, /* Echo req to CGNAT pool addr */ + + /* + * PCP. All PCP requests will increment either CGN_PCP_OK or + * CGN_PCP_ERR. If CGN_PCP_ERR is incremented then the specific + * reason for this will also be incremented. That specific reason may + * be one of the specific to PCP errors or it may be a generic CGNAT + * error. + */ + CGN_PCP_OK, /* PCP request succeeded */ + CGN_PCP_ERR, /* PCP request failed */ + CGN_PCP_EINVAL, /* PCP Invalid argument */ + CGN_PCP_ENOSPC, /* PCP one or both of reqd src or trans addrs inuse */ +}; + +#define CGN_RC_LAST CGN_PCP_ENOSPC +#define CGN_RC_SZ (CGN_RC_LAST + 1) + +struct cgn_rc_dir { + uint64_t count[CGN_RC_SZ]; +}; + +struct cgn_rc_t { + struct cgn_rc_dir dir[CGN_DIR_SZ]; }; -#define CGN_ERRNO_LAST CGN_ERR_UNKWN -#define CGN_ERRNO_SZ (CGN_ERRNO_LAST + 1) +extern struct cgn_rc_t *cgn_rc; -extern rte_atomic64_t cgn_errors[][CGN_ERRNO_SZ]; +static ALWAYS_INLINE void cgn_rc_inc(enum cgn_dir dir, int error) +{ + assert(dir < CGN_DIR_SZ); + assert(cgn_rc); + + if (error < 0) + error = -error; + if (unlikely(error > CGN_RC_LAST)) + error = CGN_RC_UNKWN; -static inline const char *cgn_errno_str(int error) + if (likely(cgn_rc != NULL)) + cgn_rc[dp_lcore_id()].dir[dir].count[error]++; +} + +uint64_t cgn_rc_read(enum cgn_dir dir, enum cgn_rc_en rc); +void cgn_rc_clear(enum cgn_dir dir, enum cgn_rc_en rc); +void cgn_rc_init(void); +void cgn_rc_uninit(void); + +static inline const char *cgn_rc_str(int error) { if (error < 0) error = -error; - switch (error) { - case CGN_OK: + switch ((enum cgn_rc_en)error) { + case CGN_RC_OK: return "ok"; case CGN_SRC_ENOMEM: return "SRC_ENOMEM"; case CGN_SRC_ENOENT: return "SRC_ENOENT"; + case CGN_POOL_ENOENT: + return "POOL_ENOENT"; case CGN_MBU_ENOSPC: return "MBU_ENOSPC"; case CGN_SRC_ENOSPC: @@ -107,8 +154,6 @@ static inline const char *cgn_errno_str(int error) return "APM_ENOSPC"; case CGN_BLK_ENOSPC: return "BLK_ENOSPC"; - case CGN_APM_ENOENT: - return "APM_ENOENT"; case CGN_PB_ENOMEM: return "PB_ENOMEM"; case CGN_S1_ENOSPC: @@ -125,14 +170,14 @@ static inline const char *cgn_errno_str(int error) return "BUF_ENOL4"; case CGN_BUF_ENOMEM: return "BUF_ENOMEM"; - case CGN_BUF_ENOSPC: - return "BUF_ENOSPC"; case CGN_BUF_ICMP: return "BUF_ICMP"; case CGN_BUF_PROTO: return "BUF_PROTO"; case CGN_PCY_ENOENT: return "PCY_ENOENT"; + case CGN_PCY_BYPASS: + return "PCY_BYPASS"; case CGN_SESS_ENOENT: return "SESS_ENOENT"; case CGN_POOL_ENOSPC: @@ -141,19 +186,31 @@ static inline const char *cgn_errno_str(int error) return "S2_EEXIST"; case CGN_S2_ENOMEM: return "S2_ENOMEM"; - case CGN_ERR_UNKWN: - return "ERR_UNKWN"; + case CGN_HAIRPINNED: + return "CGN_HAIRPINNED"; + case CGN_ICMP_ECHOREQ: + return "ICMP_ECHOREQ"; + case CGN_PCP_OK: + return "PCP_OK"; + case CGN_PCP_ERR: + return "PCP_ERR"; + case CGN_PCP_EINVAL: + return "PCP_EINVAL"; + case CGN_PCP_ENOSPC: + return "PCP_ENOSPC"; + case CGN_RC_UNKWN: + break; } - return "ERR_UNKWN2"; + return "ERR_UNKWN"; } -static inline const char *cgn_errno_detail_str(int error) +static inline const char *cgn_rc_detail_str(int error) { if (error < 0) error = -error; - switch (error) { - case CGN_OK: + switch ((enum cgn_rc_en)error) { + case CGN_RC_OK: return "ok"; /* @@ -163,6 +220,10 @@ static inline const char *cgn_errno_detail_str(int error) return "Subscriber address did not match a CGNAT policy"; case CGN_SESS_ENOENT: return "Packet did not match a CGNAT session"; + case CGN_POOL_ENOENT: + return "Destination address did not match CGNAT pool"; + case CGN_PCY_BYPASS: + return "CGNAT bypassed by SNAT-ALG packets"; /* * Packet buffer exceptions @@ -211,9 +272,6 @@ static inline const char *cgn_errno_detail_str(int error) return "Lost race to insert session into table"; case CGN_S2_EEXIST: return "Lost race to insert destination session into table"; - case CGN_APM_ENOENT: - return "Public address destroyed while waiting " - "for lock"; case CGN_SRC_ENOENT: return "Subscriber address destroyed while " "waiting for lock"; @@ -227,18 +285,26 @@ static inline const char *cgn_errno_detail_str(int error) return "L4 header not available in message buffer"; case CGN_BUF_ENOMEM: return "Prepare message buffer for header change failed"; - case CGN_BUF_ENOSPC: - return "Cannot advance beyond end of message buffer"; /* * Other */ - case CGN_ERR_UNKWN: - return "Unknown error"; + case CGN_HAIRPINNED: + return "Packets hairpinned"; + case CGN_ICMP_ECHOREQ: + return "ICMP Echo Request for CGNAT public address"; + case CGN_PCP_OK: + return "Successful PCP requests"; + case CGN_PCP_ERR: + return "Failed PCP requests"; + case CGN_PCP_EINVAL: + return "PCP invalid or missing argument"; + case CGN_PCP_ENOSPC: + return "PCP public address and port not available"; + case CGN_RC_UNKWN: + break; } return "Unknown"; } -void cgn_error_inc(int error, int dir); - #endif diff --git a/src/npf/cgnat/cgn_sess2.c b/src/npf/cgnat/cgn_sess2.c index d0a8fc6b..2f4aed2b 100644 --- a/src/npf/cgnat/cgn_sess2.c +++ b/src/npf/cgnat/cgn_sess2.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -14,6 +14,7 @@ * ("sess2") whenre each entry contains destination IP and destination port. */ +#include #include #include #include @@ -24,377 +25,571 @@ #include "npf/nat/nat_proto.h" #include "npf/cgnat/cgn.h" -#include "npf/cgnat/cgn_errno.h" +#include "npf/cgnat/cgn_rc.h" #include "npf/cgnat/cgn_limits.h" #include "npf/cgnat/cgn_log.h" #include "npf/cgnat/cgn_mbuf.h" #include "npf/cgnat/cgn_sess2.h" #include "npf/cgnat/cgn_sess_state.h" #include "npf/cgnat/cgn_session.h" +#include "npf/cgnat/cgn_source.h" +/* + * s2 session table entry (aka 'sentry') + */ +struct cgn_s2entry { + struct cds_lfht_node s2e_node; /* hash tbl node */ + struct cgn_2tuple_key s2e_key; /* Hash key */ +}; + /* * Forward and backwards stats are split over two cachelines. * * idle flag is in s2_state. */ struct cgn_sess2 { - struct cds_lfht_node s2_node; /* session tbl node */ - struct cgn_session *s2_cse; /* back pointer */ + struct cgn_s2entry s2_sentry[CGN_DIR_SZ]; + + uint64_t s2_start_time; /* unix epoch microsecs */ + rte_atomic32_t s2_pkts_out; /* pkts out in last interval */ + rte_atomic32_t s2_bytes_out; /* bytes out in last interval */ + /* --- cacheline 1 boundary (64 bytes) --- */ + + struct cgn_state s2_state; /* 32 bytes */ rte_atomic32_t s2_pkts_in; /* pkts in in last interval */ rte_atomic32_t s2_bytes_in; /* bytes in in last interval */ - uint64_t s2_bytes_in_tot;/* bytes in total */ - uint64_t s2_pkts_in_tot; /* pkts in total */ - uint32_t s2_addr; /* Address (net order) */ + + /* + * The following are not accessed regularly in the forwarding path + */ uint32_t s2_etime; /* expiry time */ uint32_t s2_id; - uint16_t s2_port; /* port or id (net order) */ - uint8_t s2_dir; - uint8_t s2_ipproto; - /* --- cacheline 1 boundary (64 bytes) --- */ - - rte_atomic32_t s2_pkts_out; /* pkts out in last interval */ - rte_atomic32_t s2_bytes_out; /* bytes out in last interval */ - uint32_t s2_pkts_out_tot; /* pkts out total */ - uint16_t s2_log_countdown; + struct cgn_sess_s2 *s2_cs2; /* back pointer */ + uint8_t s2_dir:1; + uint8_t s2_log_start:1; + uint8_t s2_log_end:1; + uint8_t s2_log_active:1; uint8_t s2_gc_pass; - uint8_t s2_expired; + uint16_t s2_log_countdown; + uint32_t s2_pkts_out_tot; /* pkts out total */ + /* --- cacheline 2 boundary (128 bytes) --- */ + uint64_t s2_bytes_out_tot; /* bytes out total */ + uint64_t s2_pkts_in_tot; /* pkts in total */ + uint64_t s2_bytes_in_tot; /* bytes in total */ struct rcu_head s2_rcu_head; - uint64_t s2_start_time; - - struct cgn_state s2_state; /* 32 bytes */ - /* --- cacheline 2 boundary (128 bytes) --- */ }; +static_assert(offsetof(struct cgn_sess2, s2_state) == 64, + "cgn_sess2 structure: first cache line size exceeded"); +static_assert(offsetof(struct cgn_sess2, s2_bytes_out_tot) == 128, + "cgn_sess2 structure: second cache line size exceeded"); + + +static inline struct cgn_sess2 * +sentry2sess2(const struct cgn_s2entry *s2e, enum cgn_dir dir) +{ + return caa_container_of(s2e, struct cgn_sess2, s2_sentry[dir]); +} + +/* Forward references */ +static struct cds_lfht *cgn_sess2_ht_create(ulong max); +static void cgn_sess2_ht_destroy(struct cds_lfht **htp); +static int cgn_sess2_add(struct cgn_sess_s2 *cs2, struct cgn_sess2 *s2); +static bool s2_expired(struct cgn_sess2 *s2); + +/* + * API with cgn_session.c + */ /* - * get current monotonic time in approximate seconds + * Disable recording of dest addr and port for a given 3-tuple session. */ -static inline uint32_t cgn_get_time_uptime(void) +void cgn_sess_s2_disable(struct cgn_sess_s2 *cs2) { - /* divide millisecond soft_ticks by 1024 */ - return (uint32_t)(soft_ticks >> 10); + cs2->cs2_enbld = false; + + /* Destroy s2 hash table if present */ + assert(cgn_sess_s2_count(cs2) == 0); + + if (cs2->cs2_ht) + cgn_sess2_ht_destroy(&cs2->cs2_ht); } -/* Is t0 after t1? */ -static inline int time_after(uint32_t t0, uint32_t t1) +/* Get number of s2 sessions in this record */ +int16_t cgn_sess_s2_count(struct cgn_sess_s2 *cs2) { - return (int)(t0 - t1) >= 0; + return rte_atomic16_read(&cs2->cs2_used); } -/* Count hash table nodes */ -ulong cgn_sess2_count(struct cds_lfht *ht) +static inline void cgn_sess_s2_set_full(struct cgn_sess_s2 *cs2) { - unsigned long count; - long dummy; + struct cgn_session *cse = cgn_sess_from_cs2(cs2); - if (!ht) - return 0; + cgn_log_resource_dest_session_table( + CGN_RESOURCE_FULL, cse, + rte_atomic16_read(&cs2->cs2_used), + cs2->cs2_max); + + /* + * Mark nested session table as full. This is reset in the gc when + * the session count reduces. + */ + cs2->cs2_full = true; - cds_lfht_count_nodes(ht, &dummy, &count, &dummy); - return count; } -static ALWAYS_INLINE int -cgn_sess2_match(const struct cgn_sess2 *s2, uint16_t id, uint32_t addr) +static void cgn_sess_s2_set_available(struct cgn_sess_s2 *cs2) { - if (s2->s2_port != id) - return 0; + struct cgn_session *cse = cgn_sess_from_cs2(cs2); - if (s2->s2_addr != addr) - return 0; + cgn_log_resource_dest_session_table( + CGN_RESOURCE_AVAILABLE, cse, + rte_atomic16_read(&cs2->cs2_used), + cs2->cs2_max); - return 1; + cs2->cs2_full = false; } /* - * lfht match function, key is a pointer to a 'struct s2_lookup_key' object + * Is there space in the nested session table? + * + * We reserve a slot *before* creating the session. If the session + * subsequently fails to be activated for any reason then we must call + * cgn_sess_s2_slot_put to return the reserved slot. */ -static int -cgn_sess2_lkey_match(struct cds_lfht_node *node, const void *key) +static bool cgn_sess_s2_slot_get(struct cgn_sess_s2 *cs2) { - const struct s2_lookup_key *lkey = key; - const struct cgn_sess2 *s2; - int rc; + if (rte_atomic16_add_return(&cs2->cs2_used, 1) <= cs2->cs2_max) { + /* Success */ + rte_atomic32_inc(&cgn_sess2_used); + return true; + } + + /* + * No slots available. Decrement cs2_used again. + */ + rte_atomic16_dec(&cs2->cs2_used); - s2 = caa_container_of(node, struct cgn_sess2, s2_node); + if (!cs2->cs2_full) + cgn_sess_s2_set_full(cs2); - if (s2->s2_expired) - return 0; + return false; +} - rc = cgn_sess2_match(s2, lkey->s2k_id, lkey->s2k_addr); +static void cgn_sess_s2_slot_put(struct cgn_sess_s2 *cs2) +{ + /* Decrement count on parent session */ + rte_atomic16_dec(&cs2->cs2_used); - return rc; + /* Decrement global count */ + rte_atomic32_dec(&cgn_sess2_used); } -static int cgn_sess2_node_match(struct cds_lfht_node *node, const void *key) +/* + * Activate an s2 session + */ +int cgn_sess_s2_activate(struct cgn_sess_s2 *cs2, struct cgn_sess2 *s2) { - const struct cgn_sess2 *s2a, *s2b; int rc; - s2a = caa_container_of(node, struct cgn_sess2, s2_node); + rc = cgn_sess2_add(cs2, s2); - if (s2a->s2_expired) - return 0; - - s2b = key; + if (unlikely(rc < 0)) { + /* + * Failed to s2. Return reserved slot and free s2. + */ + cgn_sess_s2_slot_put(cs2); + free(s2); + return rc; + } - rc = cgn_sess2_match(s2a, s2b->s2_port, s2b->s2_addr); + s2->s2_log_start = cs2->cs2_log_start; + s2->s2_log_end = cs2->cs2_log_end; - return rc; + return 0; } /* - * Create a nested session + * Accessor functions */ -struct cgn_sess2 * -cgn_sess2_establish(struct cgn_session *cse, struct cgn_packet *cpk, - rte_atomic32_t *id_rsc, int dir) +struct cgn_session *cgn_sess2_session(struct cgn_sess2 *s2) { - struct cgn_sess2 *s2; - - s2 = zmalloc_aligned(sizeof(struct cgn_sess2)); - if (!s2) - return NULL; + return cgn_sess_from_cs2(s2->s2_cs2); +} - if (dir == CGN_DIR_OUT) { - s2->s2_addr = cpk->cpk_daddr; - s2->s2_port = cpk->cpk_did; - rte_atomic32_inc(&s2->s2_pkts_out); - rte_atomic32_add(&s2->s2_bytes_out, cpk->cpk_len); - } else { - s2->s2_addr = cpk->cpk_saddr; - s2->s2_port = cpk->cpk_sid; - rte_atomic32_inc(&s2->s2_pkts_in); - rte_atomic32_add(&s2->s2_bytes_in, cpk->cpk_len); - } +struct cgn_state *cgn_sess2_state(struct cgn_sess2 *s2) +{ + return &(s2->s2_state); +} - s2->s2_dir = dir; - s2->s2_expired = false; - s2->s2_cse = cse; - s2->s2_ipproto = cpk->cpk_ipproto; - s2->s2_start_time = soft_ticks; - s2->s2_id = rte_atomic32_add_return(id_rsc, 1); - s2->s2_log_countdown = cgn_session_log_periodic(cse); +uint32_t cgn_sess2_id(struct cgn_sess2 *s2) +{ + return s2->s2_id; +} - cgn_sess_state_init(&s2->s2_state, - nat_proto_from_ipproto(cpk->cpk_ipproto)); - cgn_sess_state_inspect(&s2->s2_state, cpk, dir, s2->s2_start_time); +uint32_t cgn_sess2_ipproto(struct cgn_sess2 *s2) +{ + struct cgn_session *cse; - return s2; + /* Get ipproto from parent 3-tuple session */ + cse = cgn_sess_from_cs2(s2->s2_cs2); + return cgn_session_ipproto(cse); } /* - * Log 5-tuple session + * The outbound address and port are the 'reference' values. We use these in + * show output, logging etc. The 'in' values will be identical except for + * some ALG sessions. */ -static uint -cgn_log_sess_common(struct cgn_sess2 *s2, char *log_str, uint log_str_sz) -{ -#define ADDR_CHARS 16 - char str1[ADDR_CHARS]; - char str2[ADDR_CHARS]; - char str3[ADDR_CHARS]; - char state_str[12]; - struct ifnet *ifp; - struct cgn_session *cse = s2->s2_cse; - uint32_t pid = cgn_session_id(cse); - uint32_t int_src = cgn_session_forw_addr(cse); - uint16_t int_port = cgn_session_forw_id(cse); - uint32_t ext_src = cgn_session_back_addr(cse); - uint16_t ext_port = cgn_session_back_id(cse); - uint len; - - ifp = ifnet_byifindex(cgn_session_ifindex(cse)); - - if (s2->s2_state.st_proto == NAT_PROTO_TCP) - snprintf(state_str, sizeof(state_str), "%s[%u/0x%02X]", - cgn_sess_state_str_short(&s2->s2_state), - s2->s2_state.st_state, s2->s2_state.st_hist); - else - snprintf(state_str, sizeof(state_str), "%s[%u]", - cgn_sess_state_str_short(&s2->s2_state), - s2->s2_state.st_state); - - len = snprintf(log_str, log_str_sz, - "ifname=%s session-id=%u.%u proto=%u " - "addr=%s->%s port=%u->%u cgn-addr=%s cgn-port=%u " - "state=%s start-time=%lu", - ifp ? ifp->if_name : "-", pid, - s2->s2_id, s2->s2_ipproto, - cgn_addrstr(ntohl(int_src), str1, ADDR_CHARS), - cgn_addrstr(ntohl(s2->s2_addr), str2, ADDR_CHARS), - ntohs(int_port), ntohs(s2->s2_port), - cgn_addrstr(ntohl(ext_src), str3, ADDR_CHARS), - ntohs(ext_port), state_str, - cgn_ticks2timestamp(s2->s2_start_time)); - - return len; +uint32_t cgn_sess2_addr(struct cgn_sess2 *s2) +{ + return s2->s2_sentry[CGN_DIR_OUT].s2e_key.k_addr; } -/* - * SESSION_CREATE - */ -static void cgn_log_sess_start(struct cgn_sess2 *s2) +uint16_t cgn_sess2_port(struct cgn_sess2 *s2) +{ + return s2->s2_sentry[CGN_DIR_OUT].s2e_key.k_port; +} + +uint64_t cgn_sess2_start_time(struct cgn_sess2 *s2) +{ + return s2->s2_start_time; +} + +uint32_t cgn_sess2_pkts_out_tot(struct cgn_sess2 *s2) +{ + return s2->s2_pkts_out_tot; +} + +uint64_t cgn_sess2_bytes_out_tot(struct cgn_sess2 *s2) +{ + return s2->s2_bytes_out_tot; +} + +uint64_t cgn_sess2_pkts_in_tot(struct cgn_sess2 *s2) +{ + return s2->s2_pkts_in_tot; +} + +uint64_t cgn_sess2_bytes_in_tot(struct cgn_sess2 *s2) +{ + return s2->s2_bytes_in_tot; +} + +uint8_t cgn_sess2_dir(struct cgn_sess2 *s2) +{ + return s2->s2_dir; +} + +/* Is t0 after t1? */ +static inline int time_after(uint32_t t0, uint32_t t1) { -#define LOG_STR_SZ 400 - char log_str[LOG_STR_SZ]; + return (int)(t0 - t1) >= 0; +} + +/* Hash function */ +static ALWAYS_INLINE ulong cgn_sess2_hash(const struct cgn_2tuple_key *key) +{ + static_assert(sizeof(*key) == 8, + "cgn sess2 key is wrong size"); + + /* + * A special optimized version of jhash that handles 1 or more of + * uint32_ts. + */ + return rte_jhash_32b((const uint32_t *)key, + sizeof(*key) / sizeof(uint32_t), 0); +} + +/* Compare two keys. Returns -1, 0, or 1, similar to memcmp */ +static inline int +keycmp(const struct cgn_2tuple_key *k1, const struct cgn_2tuple_key *k2) +{ + return memcmp(k1, k2, sizeof(*k1)); +} - cgn_log_sess_common(s2, log_str, sizeof(log_str)); - RTE_LOG(NOTICE, CGNAT, "SESSION_CREATE %s\n", log_str); +/* Compare the keys of two sub-sessions */ +static inline int +s2_keycmp(const struct cgn_sess2 *s2a, const struct cgn_sess2 *s2b, + enum cgn_dir dir) +{ + return keycmp(&s2a->s2_sentry[dir].s2e_key, + &s2b->s2_sentry[dir].s2e_key); } /* - * SESSION_ACTIVE - Periodic logging + * Hash table match function. + * + * key - Either a pointer to the key of the entry we are inserting, or + * a key we are looking up. (type 'struct cgn_2tuple_key') + * node - Pointer to an existing table node. + * + * Return 1 for a match. */ -static void cgn_log_sess_active(struct cgn_sess2 *s2) +static int cgn_sess2_match(struct cds_lfht_node *node, const void *key) { -#define LOG_STR_SZ 400 - char log_str[LOG_STR_SZ]; - uint len; + const struct cgn_s2entry *s2e; - len = cgn_log_sess_common(s2, log_str, sizeof(log_str)); + s2e = caa_container_of(node, struct cgn_s2entry, s2e_node); - len += snprintf(log_str + len, sizeof(log_str) - len, - " cur-time=%lu", cgn_ticks2timestamp(soft_ticks)); + return !keycmp(&s2e->s2e_key, key); +} - len += snprintf(log_str + len, sizeof(log_str) - len, - " out=%u/%lu in=%lu/%lu", - s2->s2_pkts_out_tot, s2->s2_bytes_out_tot, - s2->s2_pkts_in_tot, s2->s2_bytes_in_tot); +/* + * Sub-sessions require a 1ms precision timestamp for TCP RTT calculations. + */ +uint64_t cgn_sess2_timestamp(void) +{ + struct timespec ts; - if (s2->s2_state.st_proto == NAT_PROTO_TCP) - /* TCP round-trip time in microsecs */ - snprintf(log_str + len, sizeof(log_str) - len, - " int-rtt=%u ext-rtt=%u", - s2->s2_state.st_int_rtt * 1000, - s2->s2_state.st_ext_rtt * 1000); + /* Get unix epoch time. Precision of 1ms. */ + clock_gettime(CLOCK_REALTIME_COARSE, &ts); - RTE_LOG(NOTICE, CGNAT, "SESSION_ACTIVE %s\n", log_str); + return (ts.tv_sec * USEC_PER_SEC) + (ts.tv_nsec / NSEC_PER_USEC); } /* - * Log 5-tuple session end + * Create an s2 session. Sessions are only ever created in the 'out' context. */ -static void cgn_log_sess_end(struct cgn_sess2 *s2, uint64_t end_time) +struct cgn_sess2 * +cgn_sess_s2_establish(struct cgn_sess_s2 *cs2, struct cgn_packet *cpk, + int *error) { -#define LOG_STR_SZ 400 - char log_str[LOG_STR_SZ]; - uint len; + struct cgn_sess2 *s2; + + /* + * Reserve a slot from the counters. The slot should be returned if + * an error occurs at any point before the session is activated. + */ + if (unlikely(!cgn_sess_s2_slot_get(cs2))) { + *error = -CGN_S2_ENOSPC; + return NULL; + } + + s2 = zmalloc_aligned(sizeof(struct cgn_sess2)); + if (!s2) { + /* Return reserved slot */ + cgn_sess_s2_slot_put(cs2); + *error = -CGN_S2_ENOMEM; + return NULL; + } + + /* + * Populate forw sentry. Matched with outbound destination address + * and port. + */ + s2->s2_sentry[CGN_DIR_OUT].s2e_key.k_addr = cpk->cpk_daddr; + s2->s2_sentry[CGN_DIR_OUT].s2e_key.k_port = cpk->cpk_did; + s2->s2_sentry[CGN_DIR_OUT].s2e_key.k_dir = CGN_DIR_OUT; + + /* + * Populate back sentry. Matched with inbound source address and + * port. + */ + s2->s2_sentry[CGN_DIR_IN].s2e_key.k_addr = cpk->cpk_daddr; + s2->s2_sentry[CGN_DIR_IN].s2e_key.k_port = cpk->cpk_did; + s2->s2_sentry[CGN_DIR_IN].s2e_key.k_dir = CGN_DIR_IN; - len = cgn_log_sess_common(s2, log_str, sizeof(log_str)); + rte_atomic32_inc(&s2->s2_pkts_out); + rte_atomic32_add(&s2->s2_bytes_out, cpk->cpk_len); - len += snprintf(log_str + len, sizeof(log_str) - len, - " end-time=%lu", cgn_ticks2timestamp(end_time)); + s2->s2_cs2 = cs2; + s2->s2_dir = CGN_DIR_OUT; + s2->s2_start_time = cgn_sess2_timestamp(); + s2->s2_id = rte_atomic32_add_return(&cs2->cs2_id, 1); - len += snprintf(log_str + len, sizeof(log_str) - len, - " out=%u/%lu in=%lu/%lu", - s2->s2_pkts_out_tot, s2->s2_bytes_out_tot, - s2->s2_pkts_in_tot, s2->s2_bytes_in_tot); + /* Randomise the initial logging interval */ + if (cs2->cs2_log_periodic > 0) + s2->s2_log_countdown = (random() % cs2->cs2_log_periodic) + 1; - if (s2->s2_state.st_proto == NAT_PROTO_TCP) - /* TCP round-trip time in microsecs */ - snprintf(log_str + len, sizeof(log_str) - len, - " int-rtt=%u ext-rtt=%u", - s2->s2_state.st_int_rtt * 1000, - s2->s2_state.st_ext_rtt * 1000); + cgn_sess_state_init(&s2->s2_state, + nat_proto_from_ipproto(cpk->cpk_ipproto), + ntohs(cgn_sess2_port(s2))); + cgn_sess_state_inspect(&s2->s2_state, cpk, CGN_DIR_OUT, + s2->s2_start_time); - RTE_LOG(NOTICE, CGNAT, "SESSION_DELETE %s\n", log_str); + return s2; } /* - * Activate a nested session + * Insert forwards or backwards sentry into table */ -int -cgn_sess2_activate(struct cds_lfht *ht, struct cgn_sess2 *s2) +static int +cgn_sess2_sentry_insert(struct cds_lfht *ht, struct cgn_s2entry *s2e) { - /* Insert into table */ struct cds_lfht_node *node; - ulong hash; - - hash = rte_jhash_1word(s2->s2_addr, s2->s2_port); - node = cds_lfht_add_unique(ht, hash, cgn_sess2_node_match, - s2, &s2->s2_node); + node = cds_lfht_add_unique(ht, cgn_sess2_hash(&s2e->s2e_key), + cgn_sess2_match, &s2e->s2e_key, + &s2e->s2e_node); - /* Did we loose the race to insert s2? */ - if (node != &s2->s2_node) + /* Did we lose the race to insert the sentry? */ + if (node != &s2e->s2e_node) return -CGN_S2_EEXIST; - if (cgn_session_log_start(s2->s2_cse)) - cgn_log_sess_start(s2); - return 0; } static void -cgn_sess2_deactivate(struct cds_lfht *ht, struct cgn_sess2 *s2) +cgn_sess2_sentry_delete(struct cds_lfht *ht, struct cgn_s2entry *s2e) { - /* Remove from table */ - (void)cds_lfht_del(ht, &s2->s2_node); - - /* Release the slot */ - cgn_sess2_slot_put(s2->s2_cse); + if (ht) + (void)cds_lfht_del(ht, &s2e->s2e_node); } -struct cgn_sess2 * -cgn_sess2_lookup(struct cds_lfht *ht, struct cgn_packet *cpk, int dir) +/* + * Add a 2-tuple sub session to the 3-tuple main session + */ +static int cgn_sess2_add(struct cgn_sess_s2 *cs2, struct cgn_sess2 *s2) { - struct s2_lookup_key lkey; + int rc; - if (dir == CGN_DIR_OUT) { - lkey.s2k_addr = cpk->cpk_daddr; - lkey.s2k_id = cpk->cpk_did; - } else { - lkey.s2k_addr = cpk->cpk_saddr; - lkey.s2k_id = cpk->cpk_sid; + /* + * First try and add s2 as the cached, or embedded, session + */ + if (!rcu_dereference(cs2->cs2_s2)) { + struct cgn_sess2 *old; + + old = rcu_cmpxchg_pointer(&cs2->cs2_s2, NULL, s2); + + if (old == NULL) + /* Success! */ + return 0; + + /* + * Lost race to add s2 as the cached session. If it is + * identical then return an error ... + */ + if (!s2_keycmp(s2, old, CGN_DIR_OUT)) + return -CGN_S2_EEXIST; + + /* ... Else fall thru to add s2 to the hash table */ } - struct cds_lfht_iter iter; - struct cds_lfht_node *node; - ulong hash; + struct cds_lfht *ht = rcu_dereference(cs2->cs2_ht); + + /* Create a hash table if one does not exist */ + if (!ht) { + struct cds_lfht *old; - hash = rte_jhash_1word(lkey.s2k_addr, lkey.s2k_id); + /* + * cgn_dest_sessions_max and cgn_dest_ht_max may have changed + * since the 3-tuple session was created, so reset cs2_max at + * the same time the hash table is created since we must + * ensure cs2->cs2_max <= cgn_dest_ht_max at this point. + */ + cs2->cs2_max = cgn_dest_sessions_max; + + ht = cgn_sess2_ht_create(cgn_dest_ht_max); + if (!ht) + return -CGN_S2_ENOMEM; + + old = rcu_cmpxchg_pointer(&cs2->cs2_ht, NULL, ht); + if (old != NULL) { + /* + * Lost race to add hash table to main session. Thats + * ok. Destroy the one we just created here, and + * continue with the other table. + */ + cgn_sess2_ht_destroy(&ht); + ht = old; + } + } - cds_lfht_lookup(ht, hash, cgn_sess2_lkey_match, &lkey, &iter); + /* Insert forwards/out sentry */ + rc = cgn_sess2_sentry_insert(ht, &s2->s2_sentry[CGN_DIR_OUT]); - node = cds_lfht_iter_get_node(&iter); - if (node) - return caa_container_of(node, struct cgn_sess2, s2_node); + if (unlikely(rc < 0)) + return rc; + + /* Insert backwards/in sentry */ + rc = cgn_sess2_sentry_insert(ht, &s2->s2_sentry[CGN_DIR_IN]); + + if (unlikely(rc < 0)) { + cgn_sess2_sentry_delete(ht, &s2->s2_sentry[CGN_DIR_OUT]); + return rc; + } + + return 0; +} + +static void cgn_sess2_del(struct cgn_sess_s2 *cs2, struct cgn_sess2 *s2) +{ + /* Increment 2-tuple sessions destroyed in subscriber */ + struct cgn_source *src = cgn_src_from_cs2(cs2); + cgn_source_stats_sess2_destroyed(src); + + /* Is this the embedded session? */ + if (s2 == cs2->cs2_s2) { + cs2->cs2_s2 = NULL; + return; + } + + /* Remove forwards and backwards sentries from the table */ + cgn_sess2_sentry_delete(cs2->cs2_ht, &s2->s2_sentry[CGN_DIR_IN]); + cgn_sess2_sentry_delete(cs2->cs2_ht, &s2->s2_sentry[CGN_DIR_OUT]); +} + +/* Populate lookup key from packet cache */ +static inline void +cgn_sess2_lookup_key_from_cpk(struct cgn_2tuple_key *key, + struct cgn_packet *cpk, enum cgn_dir dir) +{ + key->k_expired = false; + key->k_dir = dir; - return NULL; + if (dir == CGN_DIR_OUT) { + key->k_addr = cpk->cpk_daddr; + key->k_port = cpk->cpk_did; + } else { + key->k_addr = cpk->cpk_saddr; + key->k_port = cpk->cpk_sid; + } } static struct cgn_sess2 * -cgn_sess2_lookup_by_key(struct cds_lfht *ht, struct s2_lookup_key *key) +cgn_sess2_lookup(struct cgn_sess_s2 *cs2, struct cgn_2tuple_key *key, + enum cgn_dir dir) { + /* Does key match embedded session? */ + if (likely(cs2->cs2_s2 && + cgn_sess2_match(&cs2->cs2_s2->s2_sentry[dir].s2e_node, key))) + return cs2->cs2_s2; + + if (unlikely(!cs2->cs2_ht)) + return NULL; + struct cds_lfht_iter iter; + struct cgn_s2entry *s2e; struct cds_lfht_node *node; - struct cgn_sess2 *s2; - ulong hash; - - hash = rte_jhash_1word(key->s2k_addr, key->s2k_id); - cds_lfht_lookup(ht, hash, cgn_sess2_lkey_match, key, &iter); + cds_lfht_lookup(cs2->cs2_ht, cgn_sess2_hash(key), cgn_sess2_match, + key, &iter); node = cds_lfht_iter_get_node(&iter); - if (node) { - s2 = caa_container_of(node, struct cgn_sess2, s2_node); - return s2; - } + if (!node) + return NULL; + + s2e = caa_container_of(node, struct cgn_s2entry, s2e_node); - return NULL; + return sentry2sess2(s2e, dir); } /* - * Inspect + * Does the cached packet match a destination session? */ struct cgn_sess2 * -cgn_sess2_inspect(struct cds_lfht *ht, struct cgn_packet *cpk, int dir) +cgn_sess_s2_inspect(struct cgn_sess_s2 *cs2, struct cgn_packet *cpk, + enum cgn_dir dir) { + struct cgn_2tuple_key key; struct cgn_sess2 *s2; - s2 = cgn_sess2_lookup(ht, cpk, dir); + cgn_sess2_lookup_key_from_cpk(&key, cpk, dir); + + s2 = cgn_sess2_lookup(cs2, &key, dir); if (!s2) return NULL; @@ -417,54 +612,92 @@ cgn_sess2_inspect(struct cds_lfht *ht, struct cgn_packet *cpk, int dir) /* * Periodic stats update. */ -static void cgn_sess2_stats_periodic(struct cgn_sess2 *s2, bool expired) +static void cgn_sess2_stats_periodic_inline(struct cgn_sess2 *s2) { - uint32_t pkts_out, pkts_in, bytes_out, bytes_in; + uint32_t pkts_out, pkts_in, bytes_out = 0, bytes_in = 0; pkts_out = rte_atomic32_exchange( (volatile uint32_t *)&s2->s2_pkts_out.cnt, 0); - bytes_out = rte_atomic32_exchange( - (volatile uint32_t *)&s2->s2_bytes_out.cnt, 0); + if (pkts_out) { + /* There can't be bytes without packets */ + bytes_out = rte_atomic32_exchange( + (volatile uint32_t *)&s2->s2_bytes_out.cnt, 0); + + s2->s2_pkts_out_tot += pkts_out; + s2->s2_bytes_out_tot += bytes_out; + } pkts_in = rte_atomic32_exchange( (volatile uint32_t *)&s2->s2_pkts_in.cnt, 0); - bytes_in = rte_atomic32_exchange( - (volatile uint32_t *)&s2->s2_bytes_in.cnt, 0); + if (pkts_in) { + bytes_in = rte_atomic32_exchange( + (volatile uint32_t *)&s2->s2_bytes_in.cnt, 0); - s2->s2_pkts_out_tot += pkts_out; - s2->s2_bytes_out_tot += bytes_out; - s2->s2_pkts_in_tot += pkts_in; - s2->s2_bytes_in_tot += bytes_in; + s2->s2_pkts_in_tot += pkts_in; + s2->s2_bytes_in_tot += bytes_in; + } /* Add stats to 3-tuple session totals */ - cgn_session_update_stats(s2->s2_cse, pkts_out, bytes_out, - pkts_in, bytes_in, expired); + if (pkts_out || pkts_in) + cgn_session_update_stats(cgn_sess_from_cs2(s2->s2_cs2), + pkts_out, bytes_out, + pkts_in, bytes_in); +} + +static void cgn_sess2_stats_periodic(struct cgn_sess2 *s2) +{ + cgn_sess2_stats_periodic_inline(s2); } /* - * Get state-dependent expiry time for a 2-tuple session + * Get expiry time for a 2-tuple session */ -static inline uint32_t cgn_sess2_state_expiry_time(struct cgn_state *st) +static inline uint32_t cgn_sess2_expiry_time(struct cgn_sess2 *s2) { - return cgn_sess_state_expiry_time(st->st_proto, st->st_state); + struct cgn_state *st = &s2->s2_state; + struct cgn_sess_s2 *cs2 = s2->s2_cs2; + uint32_t etime; + + /* PCP timeout (if set) takes precedence */ + if (cs2->cs2_map_timeout) + etime = cs2->cs2_map_timeout; + else + /* Get state-dependent expiry time */ + etime = cgn_sess_state_expiry_time(st->st_proto, + st->st_dst_port, + st->st_state); + + return etime; } /* * Count of unexpired sessions */ -uint32_t cgn_sess2_unexpired(struct cds_lfht *ht) +uint32_t cgn_sess_s2_unexpired(struct cgn_sess_s2 *cs2) { - struct cds_lfht_iter iter; - struct cgn_sess2 *s2; uint32_t count = 0; - if (!ht) - return 0; + /* Check embedded session */ + if (cs2->cs2_s2 && !s2_expired(cs2->cs2_s2)) + count++; + + if (!cs2->cs2_ht) + return count; + + struct cds_lfht_iter iter; + struct cgn_s2entry *s2e; + + /* + * There are two ht nodes per sub-session ('in' and 'out'), so to + * iterate over each sub-session we only need to look at one of them. + */ + cds_lfht_for_each_entry(cs2->cs2_ht, &iter, s2e, s2e_node) { + if (s2e->s2e_key.k_dir != CGN_DIR_OUT) + continue; - cds_lfht_for_each_entry(ht, &iter, s2, s2_node) { - if (!s2->s2_expired) + if (!s2e->s2e_key.k_expired) count++; } return count; @@ -487,13 +720,21 @@ static void cgn_sess2_set_expired(struct cgn_sess2 *s2, bool close, bool log) if (close) cgn_sess_state_close(&s2->s2_state); - s2->s2_expired = true; + s2->s2_sentry[CGN_DIR_OUT].s2e_key.k_expired = true; + s2->s2_sentry[CGN_DIR_IN].s2e_key.k_expired = true; /* Add stats to 3-tuple session totals */ - cgn_sess2_stats_periodic(s2, true); + cgn_sess2_stats_periodic(s2); + + if (!log) + s2->s2_log_end = false; +} - if (log && cgn_session_log_end(s2->s2_cse)) - cgn_log_sess_end(s2, soft_ticks); +/* Already expired? */ +static bool s2_expired(struct cgn_sess2 *s2) +{ + /* Only need to check one sentry as both are set at same time */ + return s2->s2_sentry[CGN_DIR_OUT].s2e_key.k_expired; } /* @@ -504,17 +745,17 @@ static inline bool cgn_sess2_expired(struct cgn_sess2 *s2) uint32_t etime; /* Already expired? */ - if (unlikely(s2->s2_expired)) + if (unlikely(s2_expired(s2))) return true; if (rte_atomic16_test_and_set(&s2->s2_state.st_idle)) { /* Session changed to idle */ - /* Get state-dependent expiry time */ - etime = cgn_sess2_state_expiry_time(&s2->s2_state); + /* Get expiry time */ + etime = cgn_sess2_expiry_time(s2); /* Set expiry time */ - s2->s2_etime = cgn_get_time_uptime() + etime; + s2->s2_etime = get_dp_uptime() + etime; return false; } @@ -522,27 +763,24 @@ static inline bool cgn_sess2_expired(struct cgn_sess2 *s2) /* * Session was already idle. Has it timed-out? */ - if (time_after(cgn_get_time_uptime(), s2->s2_etime)) { + if (time_after(get_dp_uptime(), s2->s2_etime)) { /* yes, session has timed-out */ /* * Crank state-machine with timeout event, and get the timeout * value for the new state. */ - etime = cgn_sess_state_timeout(&s2->s2_state); + bool closed = cgn_sess_state_timeout(&s2->s2_state); /* Did timeout cause session to close? */ - if (etime == 0) { + if (closed) { /* yes. Mark session as expired */ cgn_sess2_set_expired(s2, false, true); return true; } - /* Else reset timer */ - etime = cgn_sess_state_timeout(&s2->s2_state); - - /* Set expiry time */ - s2->s2_etime = cgn_get_time_uptime() + etime; + /* Else reset expiry timer */ + s2->s2_etime = get_dp_uptime() + cgn_sess2_expiry_time(s2); } return false; @@ -561,74 +799,301 @@ cgn_sess2_destroy(struct cgn_sess2 *s2) call_rcu(&s2->s2_rcu_head, cgn_sess2_rcu_free); } -void cgn_sess2_gc_walk(struct cds_lfht *ht, uint *unexpd, uint *expd) +static inline unsigned int +cgn_sess2_log_start_and_active(struct cgn_sess2 *s2) { - struct cds_lfht_iter iter; - struct cgn_sess2 *s2; + unsigned int count = 0; + + if (unlikely(s2->s2_log_start)) { + cgn_log_sess_start(s2); + s2->s2_log_start = false; + count++; + } - cds_lfht_for_each_entry(ht, &iter, s2, s2_node) { + if (unlikely(s2->s2_log_active)) { + cgn_log_sess_active(s2); + s2->s2_log_active = false; + count++; + } - cgn_sess2_stats_periodic(s2, false); + return count; +} - if (s2->s2_log_countdown) { - s2->s2_log_countdown -= 1; +/* + * We log the end of the sub-session using the dataplane timestamp. This has + * less precision than the mechanism we used for setting s2_start_time, but + * thats ok. 10ms is ok for logging, whereas the s2_start_time required 1ms + * precision since it is also used for TCP RTT calculations. + */ +static inline unsigned int +cgn_sess2_log_end(struct cgn_sess2 *s2) +{ + if (unlikely(s2_expired(s2) && s2->s2_log_end)) { + cgn_log_sess_end(s2, unix_epoch_us); + s2->s2_log_end = false; + return 1; + } - if (s2->s2_log_countdown == 0) { - s2->s2_log_countdown = - cgn_session_log_periodic(s2->s2_cse); - cgn_log_sess_active(s2); - } - } + return 0; +} - if (!cgn_sess2_expired(s2)) { - (*unexpd)++; - continue; +static void +cgn_sess2_gc_inspect_inline(struct cgn_sess2 *s2, uint *unexpd, uint *expd, + struct cgn_sess_s2 *cs2) +{ + cgn_sess2_stats_periodic_inline(s2); + + if (s2->s2_log_countdown) { + s2->s2_log_countdown -= 1; + + if (unlikely(s2->s2_log_countdown == 0)) { + s2->s2_log_countdown = cs2->cs2_log_periodic; + s2->s2_log_active = true; } + } - if (!s2->s2_gc_pass) { - s2->s2_gc_pass = true; - (*expd)++; - continue; + if (!cgn_helper_thread_enabled) + cgn_sess2_log_start_and_active(s2); + + if (likely(!cgn_sess2_expired(s2))) { + (*unexpd)++; + return; + } + + if (!cgn_helper_thread_enabled) + cgn_sess2_log_end(s2); + + if (s2->s2_log_start || s2->s2_log_end) { + /* + * Ensure that the session is not freed until the + * logging has been performed. + */ + (*expd)++; + return; + } + + if (!s2->s2_gc_pass) { + s2->s2_gc_pass = true; + (*expd)++; + return; + } + + /* Remove from hash table */ + cgn_sess2_del(cs2, s2); + + /* Release the slot */ + cgn_sess_s2_slot_put(cs2); + + /* Schedule rcu free */ + cgn_sess2_destroy(s2); +} + +static void +cgn_sess2_gc_inspect(struct cgn_sess2 *s2, uint *unexpd, uint *expd, + struct cgn_sess_s2 *cs2) +{ + cgn_sess2_gc_inspect_inline(s2, unexpd, expd, cs2); +} + +void cgn_sess_s2_gc_walk(struct cgn_sess_s2 *cs2, uint *unexpd, uint *expd) +{ + /* Check embedded session */ + if (likely(cs2->cs2_s2)) + cgn_sess2_gc_inspect_inline(cs2->cs2_s2, unexpd, expd, cs2); + + if (unlikely(cs2->cs2_ht)) { + struct cds_lfht_iter iter; + struct cgn_sess2 *s2; + struct cgn_s2entry *s2e; + + /* + * There are two ht nodes per sub-session ('in' and 'out'), so + * to iterate over each sub-session we only need to look at + * one of them. + */ + cds_lfht_for_each_entry(cs2->cs2_ht, &iter, s2e, s2e_node) { + if (s2e->s2e_key.k_dir != CGN_DIR_OUT) + continue; + + s2 = caa_container_of(s2e, struct cgn_sess2, + s2_sentry[CGN_DIR_OUT]); + + cgn_sess2_gc_inspect(s2, unexpd, expd, cs2); } + } - /* Remove from hash table */ - cgn_sess2_deactivate(ht, s2); + /* + * If the dest session table was full, check if it is still + * full. + */ + if (unlikely(cs2->cs2_full)) { + if (rte_atomic16_read(&cs2->cs2_used) < cs2->cs2_max) + cgn_sess_s2_set_available(cs2); + } +} - /* Schedule rcu free */ - cgn_sess2_destroy(s2); +static inline uint cgn_sess_s2_expire_one(struct cgn_sess2 *s2) +{ + if (!s2_expired(s2)) { + cgn_sess2_set_expired(s2, true, false); + return 1; } + return 0; } -uint cgn_sess2_expire_all(struct cds_lfht *ht) +uint cgn_sess_s2_expire_all(struct cgn_sess_s2 *cs2) { + uint count = 0; + + if (cs2->cs2_s2) + count += cgn_sess_s2_expire_one(cs2->cs2_s2); + + if (!cs2->cs2_ht) + return count; + struct cds_lfht_iter iter; struct cgn_sess2 *s2; - uint count = 0; + struct cgn_s2entry *s2e; - cds_lfht_for_each_entry(ht, &iter, s2, s2_node) { - if (!s2->s2_expired) { - cgn_sess2_set_expired(s2, true, false); - count++; - } + /* + * There are two ht nodes per sub-session ('in' and 'out'), so to + * iterate over each sub-session we only need to look at one of them. + */ + cds_lfht_for_each_entry(cs2->cs2_ht, &iter, s2e, s2e_node) { + if (s2e->s2e_key.k_dir != CGN_DIR_OUT) + continue; + + s2 = caa_container_of(s2e, struct cgn_sess2, + s2_sentry[CGN_DIR_OUT]); + + count += cgn_sess_s2_expire_one(s2); } + return count; } /* * Expire session by ID */ -uint cgn_sess2_expire_id(struct cds_lfht *ht, uint32_t s2_id) +static inline uint +cgn_sess_s2_expire_id_one(struct cgn_sess2 *s2, uint32_t s2_id) +{ + if (!s2_expired(s2) && (s2_id == 0 || s2_id == s2->s2_id)) { + cgn_sess2_set_expired(s2, true, false); + return 1; + } + return 0; +} + +uint cgn_sess_s2_expire_id(struct cgn_sess_s2 *cs2, uint32_t s2_id) { + uint count = 0; + + if (cs2->cs2_s2) + count += cgn_sess_s2_expire_id_one(cs2->cs2_s2, s2_id); + + if (!cs2->cs2_ht) + return count; + struct cds_lfht_iter iter; struct cgn_sess2 *s2; - uint count = 0; + struct cgn_s2entry *s2e; - cds_lfht_for_each_entry(ht, &iter, s2, s2_node) { - if (!s2->s2_expired && (s2_id == 0 || s2_id == s2->s2_id)) { - cgn_sess2_set_expired(s2, true, false); - count++; + /* + * There are two ht nodes per sub-session ('in' and 'out'), so to + * iterate over each sub-session we only need to look at one of them. + */ + cds_lfht_for_each_entry(cs2->cs2_ht, &iter, s2e, s2e_node) { + if (s2e->s2e_key.k_dir != CGN_DIR_OUT) + continue; + + s2 = caa_container_of(s2e, struct cgn_sess2, + s2_sentry[CGN_DIR_OUT]); + + count += cgn_sess_s2_expire_id_one(s2, s2_id); + } + + return count; +} + +static void cgn_sess2_clear_or_update_stats_one(struct cgn_sess2 *s2, + bool clear) +{ + if (s2_expired(s2)) + return; + + cgn_sess2_stats_periodic(s2); + + if (clear) { + s2->s2_pkts_out_tot = 0; + s2->s2_bytes_out_tot = 0UL; + s2->s2_pkts_in_tot = 0UL; + s2->s2_bytes_in_tot = 0UL; + } +} + +void cgn_sess2_clear_or_update_stats(struct cgn_sess_s2 *cs2, bool clear) +{ + if (cs2->cs2_s2) + cgn_sess2_clear_or_update_stats_one(cs2->cs2_s2, clear); + + if (!cs2->cs2_ht) + return; + + struct cds_lfht_iter iter; + struct cgn_sess2 *s2; + struct cgn_s2entry *s2e; + + /* + * There are two ht nodes per sub-session ('in' and 'out'), so to + * iterate over each sub-session we only need to look at one of them. + */ + cds_lfht_for_each_entry(cs2->cs2_ht, &iter, s2e, s2e_node) { + if (s2e->s2e_key.k_dir != CGN_DIR_OUT) + continue; + + s2 = caa_container_of(s2e, struct cgn_sess2, + s2_sentry[CGN_DIR_OUT]); + + cgn_sess2_clear_or_update_stats_one(s2, clear); + } +} + +static inline unsigned int +cgn_sess2_log_inspect(struct cgn_sess2 *s2) +{ + return cgn_sess2_log_start_and_active(s2) + cgn_sess2_log_end(s2); +} + +int cgn_sess_s2_log_walk(struct cgn_sess_s2 *cs2) +{ + unsigned int count = 0; + + /* Check embedded session */ + if (likely(cs2->cs2_s2)) + count += cgn_sess2_log_inspect(cs2->cs2_s2); + + if (unlikely(cs2->cs2_ht)) { + struct cds_lfht_iter iter; + struct cgn_sess2 *s2; + struct cgn_s2entry *s2e; + + /* + * There are two ht nodes per sub-session ('in' and 'out'), so + * to iterate over each sub-session we only need to look at + * one of them. + */ + cds_lfht_for_each_entry(cs2->cs2_ht, &iter, s2e, s2e_node) { + if (s2e->s2e_key.k_dir != CGN_DIR_OUT) + continue; + + s2 = caa_container_of(s2e, struct cgn_sess2, + s2_sentry[CGN_DIR_OUT]); + + count += cgn_sess2_log_inspect(s2); } } + return count; } @@ -636,25 +1101,24 @@ static void cgn_sess2_jsonw_one(json_writer_t *json, struct cgn_sess2 *s2) { char dst_str[16]; - uint32_t uptime = cgn_get_time_uptime(); - uint32_t max_timeout = cgn_sess2_state_expiry_time(&s2->s2_state); + uint32_t uptime = get_dp_uptime(); + uint32_t max_timeout = cgn_sess2_expiry_time(s2); + uint32_t addr = cgn_sess2_addr(s2); - inet_ntop(AF_INET, &s2->s2_addr, dst_str, sizeof(dst_str)); + inet_ntop(AF_INET, &addr, dst_str, sizeof(dst_str)); jsonw_start_object(json); jsonw_string_field(json, "dst_addr", dst_str); - jsonw_uint_field(json, "dst_port", htons(s2->s2_port)); + jsonw_uint_field(json, "dst_port", htons(cgn_sess2_port(s2))); jsonw_uint_field(json, "id", s2->s2_id); cgn_sess_state_jsonw(json, &s2->s2_state); - jsonw_uint_field(json, "start_time", - cgn_ticks2timestamp(s2->s2_start_time)); - jsonw_uint_field(json, "duration", - cgn_start2duration(s2->s2_start_time)); + jsonw_uint_field(json, "start_time", s2->s2_start_time); + jsonw_uint_field(json, "duration", unix_epoch_us - s2->s2_start_time); - jsonw_bool_field(json, "exprd", s2->s2_expired); + jsonw_bool_field(json, "exprd", s2_expired(s2)); if (rte_atomic16_read(&s2->s2_state.st_idle)) jsonw_uint_field(json, "cur_to", @@ -681,10 +1145,11 @@ cgn_sess2_show_fltr(struct cgn_sess2 *s2, struct cgn_sess_fltr *fltr) { /* Filter on destination address and port */ if (fltr->cf_dst_mask && - fltr->cf_dst.s2k_addr != (s2->s2_addr & fltr->cf_dst_mask)) + fltr->cf_dst.k_addr != + (cgn_sess2_addr(s2) & fltr->cf_dst_mask)) return false; - if (fltr->cf_dst.s2k_id && fltr->cf_dst.s2k_id != s2->s2_port) + if (fltr->cf_dst.k_port && fltr->cf_dst.k_port != cgn_sess2_port(s2)) return false; /* Filter on session ID */ @@ -695,39 +1160,57 @@ cgn_sess2_show_fltr(struct cgn_sess2 *s2, struct cgn_sess_fltr *fltr) } /* - * Determine how many sessions a filter might match in cgn_sess2_show. + * How many s2 sessions match a filter? */ -uint cgn_sess2_show_count(struct cds_lfht *ht, struct cgn_sess_fltr *fltr) +uint cgn_sess_s2_fltr_count(struct cgn_sess_s2 *cs2, + struct cgn_sess_fltr *fltr) { - struct cds_lfht_iter iter; struct cgn_sess2 *s2; - uint32_t count = 0; - - if (!ht) - return 0; /* - * Are there enough filter params to do a hash lookup? + * Are there enough filter params to match embedded session, or a hash + * table session? */ if (fltr->cf_dst_mask == 0xffffffff && cgn_s2_key_valid(&fltr->cf_dst)) { - s2 = cgn_sess2_lookup_by_key(ht, &fltr->cf_dst); + s2 = cgn_sess2_lookup(cs2, &fltr->cf_dst, fltr->cf_dir); if (s2 && cgn_sess2_show_fltr(s2, fltr)) return 1; } - cds_lfht_for_each_entry(ht, &iter, s2, s2_node) { + uint32_t count = 0; + + if (cs2->cs2_s2) + if (cgn_sess2_show_fltr(cs2->cs2_s2, fltr)) + count++; + + if (!cs2->cs2_ht) + return count; + + struct cds_lfht_iter iter; + struct cgn_s2entry *s2e; + + /* + * There are two ht nodes per sub-session ('in' and 'out'), so to + * iterate over each sub-session we only need to look at one of them. + */ + cds_lfht_for_each_entry(cs2->cs2_ht, &iter, s2e, s2e_node) { + if (s2e->s2e_key.k_dir != CGN_DIR_OUT) + continue; + + s2 = caa_container_of(s2e, struct cgn_sess2, + s2_sentry[CGN_DIR_OUT]); + if (cgn_sess2_show_fltr(s2, fltr)) count++; } return count; } -uint cgn_sess2_show(json_writer_t *json, struct cds_lfht *ht, - struct cgn_sess_fltr *fltr) +uint cgn_sess_s2_show(json_writer_t *json, struct cgn_sess_s2 *cs2, + struct cgn_sess_fltr *fltr) { - struct cds_lfht_iter iter; struct cgn_sess2 *s2; uint count = 0; @@ -743,7 +1226,7 @@ uint cgn_sess2_show(json_writer_t *json, struct cds_lfht *ht, if (fltr->cf_dst_mask == 0xffffffff && cgn_s2_key_valid(&fltr->cf_dst)) { - s2 = cgn_sess2_lookup_by_key(ht, &fltr->cf_dst); + s2 = cgn_sess2_lookup(cs2, &fltr->cf_dst, fltr->cf_dir); if (s2 && cgn_sess2_show_fltr(s2, fltr)) { cgn_sess2_jsonw_one(json, s2); @@ -752,7 +1235,27 @@ uint cgn_sess2_show(json_writer_t *json, struct cds_lfht *ht, goto end; } - cds_lfht_for_each_entry(ht, &iter, s2, s2_node) { + if (cs2->cs2_s2 && cgn_sess2_show_fltr(cs2->cs2_s2, fltr)) { + cgn_sess2_jsonw_one(json, cs2->cs2_s2); + count++; + } + + if (!cs2->cs2_ht) + goto end; + + struct cds_lfht_iter iter; + struct cgn_s2entry *s2e; + + /* + * There are two ht nodes per sub-session ('in' and 'out'), so to + * iterate over each sub-session we only need to look at one of them. + */ + cds_lfht_for_each_entry(cs2->cs2_ht, &iter, s2e, s2e_node) { + if (s2e->s2e_key.k_dir != CGN_DIR_OUT) + continue; + + s2 = caa_container_of(s2e, struct cgn_sess2, + s2_sentry[CGN_DIR_OUT]); if (cgn_sess2_show_fltr(s2, fltr)) { cgn_sess2_jsonw_one(json, s2); @@ -767,25 +1270,48 @@ uint cgn_sess2_show(json_writer_t *json, struct cds_lfht *ht, return count; } -struct cds_lfht *cgn_sess2_ht_create(void) +#define CGN_SESS2_MIN_BUCKETS 32 + +/* + * Create hash table + */ +static struct cds_lfht *cgn_sess2_ht_create(ulong max) { struct cds_lfht *ht; - ht = cds_lfht_new(CGN_SESS2_HT_INIT, CGN_SESS2_HT_MIN, - CGN_SESS2_HT_MAX, CGN_SESS2_HT_FLAGS, NULL); + /* Number of hash buckets must be a power of two */ + max = rte_align32pow2(max); + + /* + * Table is used for both forwards and backwards sentries, so double + * the max session value. + */ + max <<= 1; + + ht = cds_lfht_new(CGN_SESS2_MIN_BUCKETS, CGN_SESS2_MIN_BUCKETS, max, + CDS_LFHT_AUTO_RESIZE, NULL); + + if (likely(ht)) + rte_atomic64_inc(&cgn_sess2_ht_created); + return ht; } -void cgn_sess2_ht_destroy(struct cds_lfht **htp) +static void cgn_sess2_ht_destroy(struct cds_lfht **htp) { struct cds_lfht *ht = *htp; if (ht) { - assert(cgn_sess2_count(ht) == 0); - /* Destroy sess2 hash table */ dp_ht_destroy_deferred(ht); *htp = NULL; + + rte_atomic64_inc(&cgn_sess2_ht_destroyed); } } +/* Used by unit-tests only */ +size_t cgn_sess2_size(void) +{ + return sizeof(struct cgn_sess2); +} diff --git a/src/npf/cgnat/cgn_sess2.h b/src/npf/cgnat/cgn_sess2.h index d8335012..4041268a 100644 --- a/src/npf/cgnat/cgn_sess2.h +++ b/src/npf/cgnat/cgn_sess2.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -13,36 +13,68 @@ #include "urcu.h" #include "json_writer.h" +#include "npf/cgnat/cgn_hash_key.h" struct cgn_session; struct cgn_packet; struct cgn_sess2; struct cgn_state; +struct cds_lfht; -/* 3-tuple session lookup key */ -struct sess_lookup_key { - uint32_t sk_ifindex; - uint32_t sk_addr; - uint16_t sk_id; - uint8_t sk_ipproto; -}; - -/* 2-tuple session lookup key */ -struct s2_lookup_key { - uint32_t s2k_addr; - uint16_t s2k_id; +/* + * The s2 dest info container in each 3-tuple cse session. + * + * cs2_ht - Hash table. Used when there are more than one session. + * cs2_s2 - Embedded sessions. Used for first session. + * cs2_id - Resource to allocate session IDs from. Always increases. + * cs2_dst_port - The dest port from the pkt that created the 3-tuple session. + * May be used to determine session expiry time. Net order. + * cs2_used - Atomic count of sessions. + * cs2_max - Maximum number of sessions. + * cs2_full - Set true when session count exceeds max. + * cs2_enbld - True when dest info is being kept for this 3-tuple session. + * + * Only the very first session is added to cs2_s2. If further sessions are + * added then a hash table is created. We keep using cs2_s2, provided it has + * not expired. + * + * Therefore in every lookup or iteration over all sessions we must check both + * cs2_ht and cs2_s2, as either or both may be NULL. + */ +struct cgn_sess_s2 { + struct cds_lfht *cs2_ht; + struct cgn_sess2 *cs2_s2; + rte_atomic32_t cs2_id; + uint16_t cs2_dst_port; + rte_atomic16_t cs2_used; + int16_t cs2_max; + + /* + * Timeout for a map instantiated session. May be used for any + * 2-tuple sessions created on a PCP 3-tuplr sesion. + */ + uint16_t cs2_map_timeout; + + /* s2 session logging parameters */ + uint16_t cs2_log_periodic; /* Units of gc intervals */ + uint8_t cs2_full:1; + uint8_t cs2_enbld:1; + uint8_t cs2_log_start:1; + uint8_t cs2_log_end:1; + + uint8_t cs2_pad[1]; /* Pad to 8 byte boundary */ }; -static inline bool cgn_sess_key_valid(struct sess_lookup_key *sk) +static inline bool cgn_sess_key_valid(struct cgn_3tuple_key *key) { - if (sk->sk_ifindex && sk->sk_addr && sk->sk_id && sk->sk_ipproto) + if (key->k_ifindex && key->k_addr && key->k_port && key->k_ipproto) return true; return false; } -static inline bool cgn_s2_key_valid(struct s2_lookup_key *sk) +static inline bool cgn_s2_key_valid(struct cgn_2tuple_key *key) { - return (sk->s2k_addr && sk->s2k_id); + return (key->k_addr && key->k_port && !key->k_expired); } enum cgn_show_dir { @@ -56,15 +88,16 @@ enum cgn_show_dir { * * Addresses, masks and ports are it network-byte order. */ -#define CGN_SESS_FLTR_DESC_SZ 100 +#define CGN_SESS_FLTR_DESC_SZ 200 struct cgn_sess_fltr { char cf_desc[CGN_SESS_FLTR_DESC_SZ]; bool cf_all; bool cf_all_sess2; bool cf_no_sess2; - struct sess_lookup_key cf_subs; - struct sess_lookup_key cf_pub; - struct s2_lookup_key cf_dst; + uint32_t cf_ifindex; + struct cgn_3tuple_key cf_subs; + struct cgn_3tuple_key cf_pub; + struct cgn_2tuple_key cf_dst; uint32_t cf_subs_mask; uint32_t cf_pub_mask; uint32_t cf_dst_mask; @@ -74,7 +107,7 @@ struct cgn_sess_fltr { struct nat_pool *cf_np; /* Target session for batch request */ - struct sess_lookup_key cf_tgt; + struct cgn_3tuple_key cf_tgt; enum cgn_show_dir cf_dir; /* Show related */ @@ -82,32 +115,50 @@ struct cgn_sess_fltr { uint32_t cf_count; /* Op-mode map command */ - uint cf_timeout; + uint16_t cf_timeout; + bool cf_clear_stats; }; -struct cgn_sess2 *cgn_sess2_establish(struct cgn_session *cse, - struct cgn_packet *cpk, - rte_atomic32_t *id_rsc, int dir); - -int cgn_sess2_activate(struct cds_lfht *ht, struct cgn_sess2 *s2); - -struct cgn_sess2 *cgn_sess2_inspect(struct cds_lfht *ht, - struct cgn_packet *cpk, int dir); -uint32_t cgn_sess2_unexpired(struct cds_lfht *ht); - -struct cgn_sess2 *cgn_sess2_lookup(struct cds_lfht *ht, - struct cgn_packet *cpk, int dir); - -ulong cgn_sess2_count(struct cds_lfht *ht); -void cgn_sess2_gc_walk(struct cds_lfht *ht, uint *unexpd, uint *expd); -uint cgn_sess2_expire_all(struct cds_lfht *ht); -uint cgn_sess2_expire_id(struct cds_lfht *ht, uint32_t s2_id); - -struct cds_lfht *cgn_sess2_ht_create(void); -void cgn_sess2_ht_destroy(struct cds_lfht **htp); +/* + * API with 3-tuple parent session in cgn_session.c + * + * 'struct cgn_sess_s2' is the 2-tuple table and state embedded in each + * 3-tuple session. + */ +void cgn_sess_s2_disable(struct cgn_sess_s2 *cs2); +int16_t cgn_sess_s2_count(struct cgn_sess_s2 *cs2); +uint64_t cgn_sess2_timestamp(void); +struct cgn_sess2 *cgn_sess_s2_establish(struct cgn_sess_s2 *cs2, + struct cgn_packet *cpk, + int *error); +int cgn_sess_s2_activate(struct cgn_sess_s2 *cs2, struct cgn_sess2 *s2); +struct cgn_sess2 *cgn_sess_s2_inspect(struct cgn_sess_s2 *cs2, + struct cgn_packet *cpk, enum cgn_dir dir); +uint cgn_sess_s2_fltr_count(struct cgn_sess_s2 *cs2, + struct cgn_sess_fltr *fltr); +uint32_t cgn_sess_s2_unexpired(struct cgn_sess_s2 *cs2); +uint cgn_sess_s2_expire_all(struct cgn_sess_s2 *cs2); +uint cgn_sess_s2_expire_id(struct cgn_sess_s2 *cs2, uint32_t s2_id); +void cgn_sess2_clear_or_update_stats(struct cgn_sess_s2 *cs2, bool clear); +uint cgn_sess_s2_show(json_writer_t *json, struct cgn_sess_s2 *cs2, + struct cgn_sess_fltr *fltr); +void cgn_sess_s2_gc_walk(struct cgn_sess_s2 *cs2, uint *unexpd, uint *expd); +int cgn_sess_s2_log_walk(struct cgn_sess_s2 *cs2); -uint cgn_sess2_show_count(struct cds_lfht *ht, struct cgn_sess_fltr *fltr); -uint cgn_sess2_show(json_writer_t *json, struct cds_lfht *ht, - struct cgn_sess_fltr *fltr); +/* + * s2 session accessor functions + */ +struct cgn_session *cgn_sess2_session(struct cgn_sess2 *s2); +struct cgn_state *cgn_sess2_state(struct cgn_sess2 *s2); +uint32_t cgn_sess2_id(struct cgn_sess2 *s2); +uint32_t cgn_sess2_ipproto(struct cgn_sess2 *s2); +uint32_t cgn_sess2_addr(struct cgn_sess2 *s2); +uint16_t cgn_sess2_port(struct cgn_sess2 *s2); +uint64_t cgn_sess2_start_time(struct cgn_sess2 *s2); +uint32_t cgn_sess2_pkts_out_tot(struct cgn_sess2 *s2); +uint64_t cgn_sess2_bytes_out_tot(struct cgn_sess2 *s2); +uint64_t cgn_sess2_pkts_in_tot(struct cgn_sess2 *s2); +uint64_t cgn_sess2_bytes_in_tot(struct cgn_sess2 *s2); +uint8_t cgn_sess2_dir(struct cgn_sess2 *s2); #endif diff --git a/src/npf/cgnat/cgn_sess_state.c b/src/npf/cgnat/cgn_sess_state.c index eb4b2c46..72253771 100644 --- a/src/npf/cgnat/cgn_sess_state.c +++ b/src/npf/cgnat/cgn_sess_state.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -13,8 +13,8 @@ #include #include "util.h" -#include "soft_ticks.h" +#include "npf/cgnat/cgn.h" #include "npf/cgnat/cgn_limits.h" #include "npf/cgnat/cgn_mbuf.h" #include "npf/cgnat/cgn_sess2.h" @@ -26,11 +26,11 @@ static uint8_t cgn_tcp_fsm[CGN_TCP_STATE_COUNT][CGN_DIR_SZ][CGN_TCP_EVENT_COUNT] = { [CGN_TCP_STATE_NONE] = { - [CGN_DIR_FORW] = { 0 }, - [CGN_DIR_BACK] = { 0 }, + [CGN_DIR_OUT] = { 0 }, + [CGN_DIR_IN] = { 0 }, }, [CGN_TCP_STATE_CLOSED] = { - [CGN_DIR_FORW] = { + [CGN_DIR_OUT] = { [CGN_TCP_EVENT_NONE] = 0, [CGN_TCP_EVENT_SYN] = CGN_TCP_STATE_INIT, [CGN_TCP_EVENT_RST] = 0, @@ -38,7 +38,7 @@ cgn_tcp_fsm[CGN_TCP_STATE_COUNT][CGN_DIR_SZ][CGN_TCP_EVENT_COUNT] = { [CGN_TCP_EVENT_FIN] = 0, [CGN_TCP_EVENT_TO] = 0, }, - [CGN_DIR_BACK] = { + [CGN_DIR_IN] = { [CGN_TCP_EVENT_NONE] = 0, [CGN_TCP_EVENT_SYN] = 0, [CGN_TCP_EVENT_RST] = 0, @@ -48,7 +48,7 @@ cgn_tcp_fsm[CGN_TCP_STATE_COUNT][CGN_DIR_SZ][CGN_TCP_EVENT_COUNT] = { }, }, [CGN_TCP_STATE_INIT] = { - [CGN_DIR_FORW] = { + [CGN_DIR_OUT] = { [CGN_TCP_EVENT_NONE] = 0, [CGN_TCP_EVENT_SYN] = 0, [CGN_TCP_EVENT_RST] = 0, @@ -56,7 +56,7 @@ cgn_tcp_fsm[CGN_TCP_STATE_COUNT][CGN_DIR_SZ][CGN_TCP_EVENT_COUNT] = { [CGN_TCP_EVENT_FIN] = 0, [CGN_TCP_EVENT_TO] = CGN_TCP_STATE_CLOSED, }, - [CGN_DIR_BACK] = { + [CGN_DIR_IN] = { [CGN_TCP_EVENT_NONE] = 0, [CGN_TCP_EVENT_SYN] = CGN_TCP_STATE_ESTABLISHED, [CGN_TCP_EVENT_RST] = 0, @@ -66,7 +66,7 @@ cgn_tcp_fsm[CGN_TCP_STATE_COUNT][CGN_DIR_SZ][CGN_TCP_EVENT_COUNT] = { }, }, [CGN_TCP_STATE_ESTABLISHED] = { - [CGN_DIR_FORW] = { + [CGN_DIR_OUT] = { [CGN_TCP_EVENT_NONE] = 0, [CGN_TCP_EVENT_SYN] = 0, [CGN_TCP_EVENT_RST] = CGN_TCP_STATE_TRANS, @@ -74,7 +74,7 @@ cgn_tcp_fsm[CGN_TCP_STATE_COUNT][CGN_DIR_SZ][CGN_TCP_EVENT_COUNT] = { [CGN_TCP_EVENT_FIN] = CGN_TCP_STATE_C_FIN_RCV, [CGN_TCP_EVENT_TO] = CGN_TCP_STATE_TRANS, }, - [CGN_DIR_BACK] = { + [CGN_DIR_IN] = { [CGN_TCP_EVENT_NONE] = 0, [CGN_TCP_EVENT_SYN] = 0, [CGN_TCP_EVENT_RST] = CGN_TCP_STATE_TRANS, @@ -84,7 +84,7 @@ cgn_tcp_fsm[CGN_TCP_STATE_COUNT][CGN_DIR_SZ][CGN_TCP_EVENT_COUNT] = { }, }, [CGN_TCP_STATE_TRANS] = { - [CGN_DIR_FORW] = { + [CGN_DIR_OUT] = { [CGN_TCP_EVENT_NONE] = 0, [CGN_TCP_EVENT_SYN] = 0, [CGN_TCP_EVENT_RST] = 0, @@ -92,17 +92,17 @@ cgn_tcp_fsm[CGN_TCP_STATE_COUNT][CGN_DIR_SZ][CGN_TCP_EVENT_COUNT] = { [CGN_TCP_EVENT_FIN] = 0, [CGN_TCP_EVENT_TO] = CGN_TCP_STATE_CLOSED, }, - [CGN_DIR_BACK] = { + [CGN_DIR_IN] = { [CGN_TCP_EVENT_NONE] = 0, [CGN_TCP_EVENT_SYN] = 0, [CGN_TCP_EVENT_RST] = 0, - [CGN_TCP_EVENT_ACK] = CGN_TCP_STATE_ESTABLISHED, + [CGN_TCP_EVENT_ACK] = 0, [CGN_TCP_EVENT_FIN] = 0, [CGN_TCP_EVENT_TO] = CGN_TCP_STATE_CLOSED, }, }, [CGN_TCP_STATE_C_FIN_RCV] = { - [CGN_DIR_FORW] = { + [CGN_DIR_OUT] = { [CGN_TCP_EVENT_NONE] = 0, [CGN_TCP_EVENT_SYN] = 0, [CGN_TCP_EVENT_RST] = 0, @@ -110,7 +110,7 @@ cgn_tcp_fsm[CGN_TCP_STATE_COUNT][CGN_DIR_SZ][CGN_TCP_EVENT_COUNT] = { [CGN_TCP_EVENT_FIN] = 0, [CGN_TCP_EVENT_TO] = CGN_TCP_STATE_CLOSED, }, - [CGN_DIR_BACK] = { + [CGN_DIR_IN] = { [CGN_TCP_EVENT_NONE] = 0, [CGN_TCP_EVENT_SYN] = 0, [CGN_TCP_EVENT_RST] = 0, @@ -120,7 +120,7 @@ cgn_tcp_fsm[CGN_TCP_STATE_COUNT][CGN_DIR_SZ][CGN_TCP_EVENT_COUNT] = { }, }, [CGN_TCP_STATE_S_FIN_RCV] = { - [CGN_DIR_FORW] = { + [CGN_DIR_OUT] = { [CGN_TCP_EVENT_NONE] = 0, [CGN_TCP_EVENT_SYN] = 0, [CGN_TCP_EVENT_RST] = 0, @@ -128,7 +128,7 @@ cgn_tcp_fsm[CGN_TCP_STATE_COUNT][CGN_DIR_SZ][CGN_TCP_EVENT_COUNT] = { [CGN_TCP_EVENT_FIN] = CGN_TCP_STATE_CS_FIN_RCV, [CGN_TCP_EVENT_TO] = CGN_TCP_STATE_CLOSED, }, - [CGN_DIR_BACK] = { + [CGN_DIR_IN] = { [CGN_TCP_EVENT_NONE] = 0, [CGN_TCP_EVENT_SYN] = 0, [CGN_TCP_EVENT_RST] = 0, @@ -138,7 +138,7 @@ cgn_tcp_fsm[CGN_TCP_STATE_COUNT][CGN_DIR_SZ][CGN_TCP_EVENT_COUNT] = { }, }, [CGN_TCP_STATE_CS_FIN_RCV] = { - [CGN_DIR_FORW] = { + [CGN_DIR_OUT] = { [CGN_TCP_EVENT_NONE] = 0, [CGN_TCP_EVENT_SYN] = 0, [CGN_TCP_EVENT_RST] = 0, @@ -146,7 +146,7 @@ cgn_tcp_fsm[CGN_TCP_STATE_COUNT][CGN_DIR_SZ][CGN_TCP_EVENT_COUNT] = { [CGN_TCP_EVENT_FIN] = 0, [CGN_TCP_EVENT_TO] = CGN_TCP_STATE_CLOSED, }, - [CGN_DIR_BACK] = { + [CGN_DIR_IN] = { [CGN_TCP_EVENT_NONE] = 0, [CGN_TCP_EVENT_SYN] = 0, [CGN_TCP_EVENT_RST] = 0, @@ -163,36 +163,36 @@ cgn_tcp_fsm[CGN_TCP_STATE_COUNT][CGN_DIR_SZ][CGN_TCP_EVENT_COUNT] = { static uint8_t cgn_sess_fsm[CGN_SESS_STATE_COUNT][CGN_DIR_SZ][CGN_SESS_EVENT_COUNT] = { [CGN_SESS_STATE_NONE] = { - [CGN_DIR_FORW] = { 0 }, - [CGN_DIR_BACK] = { 0 }, + [CGN_DIR_OUT] = { 0 }, + [CGN_DIR_IN] = { 0 }, }, [CGN_SESS_STATE_CLOSED] = { - [CGN_DIR_FORW] = { + [CGN_DIR_OUT] = { [CGN_SESS_EVENT_NONE] = 0, [CGN_SESS_EVENT_PKT] = CGN_SESS_STATE_INIT, [CGN_SESS_EVENT_TO] = 0, }, - [CGN_DIR_BACK] = { 0 }, + [CGN_DIR_IN] = { 0 }, }, [CGN_SESS_STATE_INIT] = { - [CGN_DIR_FORW] = { + [CGN_DIR_OUT] = { [CGN_SESS_EVENT_NONE] = 0, [CGN_SESS_EVENT_PKT] = 0, [CGN_SESS_EVENT_TO] = CGN_SESS_STATE_CLOSED, }, - [CGN_DIR_BACK] = { + [CGN_DIR_IN] = { [CGN_SESS_EVENT_NONE] = 0, [CGN_SESS_EVENT_PKT] = CGN_SESS_STATE_ESTABLISHED, [CGN_SESS_EVENT_TO] = CGN_SESS_STATE_CLOSED, }, }, [CGN_SESS_STATE_ESTABLISHED] = { - [CGN_DIR_FORW] = { + [CGN_DIR_OUT] = { [CGN_SESS_EVENT_NONE] = 0, [CGN_SESS_EVENT_PKT] = 0, [CGN_SESS_EVENT_TO] = CGN_SESS_STATE_CLOSED, }, - [CGN_DIR_BACK] = { + [CGN_DIR_IN] = { [CGN_SESS_EVENT_NONE] = 0, [CGN_SESS_EVENT_PKT] = 0, [CGN_SESS_EVENT_TO] = CGN_SESS_STATE_CLOSED, @@ -202,20 +202,23 @@ cgn_sess_fsm[CGN_SESS_STATE_COUNT][CGN_DIR_SZ][CGN_SESS_EVENT_COUNT] = { /* - * Other session expiry times + * Default non-TCP or non-UDP session expiry times */ uint32_t cgn_sess_other_etime[CGN_ETIME_COUNT] = { [CGN_ETIME_OPENING] = CGN_DEF_ETIME_OTHER_OPENING, [CGN_ETIME_ESTBD] = CGN_DEF_ETIME_OTHER_ESTBD, }; +/* + * Default UDP session expiry times + */ uint32_t cgn_sess_udp_etime[CGN_ETIME_COUNT] = { [CGN_ETIME_OPENING] = CGN_DEF_ETIME_UDP_OPENING, [CGN_ETIME_ESTBD] = CGN_DEF_ETIME_UDP_ESTBD, }; /* - * Non-TCP session expiry times + * Default TCP session expiry times */ uint32_t cgn_sess_tcp_etime[CGN_ETIME_TCP_COUNT] = { [CGN_ETIME_TCP_OPENING] = CGN_DEF_ETIME_TCP_OPENING, @@ -223,29 +226,59 @@ uint32_t cgn_sess_tcp_etime[CGN_ETIME_TCP_COUNT] = { [CGN_ETIME_TCP_CLOSING] = CGN_DEF_ETIME_TCP_CLOSING, }; +/* + * Port-dependent session expiry times in seconds. + * + * Per-port expiry times for TCP and UDP will override the Established state + * default, if specified and if the original outbound dest port is known. + * + * The dest port will always be known for 2-tuple sessions. It should also be + * known for 3-tuple sessions, but will be reset to 0 if multiple flows are + * using that 3-tuple session. + */ +uint32_t cgn_port_tcp_etime[USHRT_MAX + 1]; +uint32_t cgn_port_udp_etime[USHRT_MAX + 1]; + +void cgn_cgn_port_tcp_etime_set(uint16_t port, uint32_t timeout) +{ + cgn_port_tcp_etime[port] = timeout; +} + + +void cgn_cgn_port_udp_etime_set(uint16_t port, uint32_t timeout) +{ + cgn_port_udp_etime[port] = timeout; +} +/* + * port is the outbound dest port or inbound source port. It is in host byte + * order. + */ void -cgn_sess_state_init(struct cgn_state *st, uint8_t proto) +cgn_sess_state_init(struct cgn_state *st, enum nat_proto proto, uint16_t port) { st->st_state = CGN_SESS_STATE_CLOSED; st->st_proto = proto; + st->st_dst_port = port; rte_atomic16_clear(&st->st_idle); rte_spinlock_init(&st->st_lock); } /* * Evaluate session state + * + * start_time Session start time, unix epoch microseconds */ void -cgn_sess_state_inspect(struct cgn_state *st, struct cgn_packet *cpk, int dir, - uint64_t start_time) +cgn_sess_state_inspect(struct cgn_state *st, struct cgn_packet *cpk, + enum cgn_dir dir, uint64_t start_time) { uint8_t new; rte_spinlock_lock(&st->st_lock); if (st->st_proto == NAT_PROTO_TCP) { - bool forw = (dir == CGN_DIR_FORW); + bool forw = (dir == CGN_DIR_OUT); enum cgn_tcp_event event; uint64_t rtt; @@ -255,12 +288,18 @@ cgn_sess_state_inspect(struct cgn_state *st, struct cgn_packet *cpk, int dir, */ /* hist_bit relies on these asserts */ - assert(CGN_SESS_HIST_FFIN == 0x02); - assert(CGN_SESS_HIST_BFIN == 0x04); - assert(CGN_SESS_HIST_FRST == 0x08); - assert(CGN_SESS_HIST_BRST == 0x10); - assert(CGN_SESS_HIST_FACK == 0x20); - assert(CGN_SESS_HIST_BACK == 0x40); + static_assert(CGN_SESS_HIST_FFIN == 0x02, + "cgn sess hist flag is wrong"); + static_assert(CGN_SESS_HIST_BFIN == 0x04, + "cgn sess hist flag is wrong"); + static_assert(CGN_SESS_HIST_FRST == 0x08, + "cgn sess hist flag is wrong"); + static_assert(CGN_SESS_HIST_BRST == 0x10, + "cgn sess hist flag is wrong"); + static_assert(CGN_SESS_HIST_FACK == 0x20, + "cgn sess hist flag is wrong"); + static_assert(CGN_SESS_HIST_BACK == 0x40, + "cgn sess hist flag is wrong"); uint8_t hist_bit = 0x02; if (!forw) @@ -281,8 +320,8 @@ cgn_sess_state_inspect(struct cgn_state *st, struct cgn_packet *cpk, int dir, /* External rtt. Look for incoming SYN-ACK. */ if (!forw && (cpk->cpk_tcp_flags & TH_ACK)) { - rtt = soft_ticks - start_time; - st->st_ext_rtt = MIN(rtt, USHRT_MAX); + rtt = cgn_sess2_timestamp() - start_time; + st->st_ext_rtt = rtt; } } else if (cpk->cpk_tcp_flags & TH_FIN) { @@ -303,9 +342,10 @@ cgn_sess_state_inspect(struct cgn_state *st, struct cgn_packet *cpk, int dir, /* Int rtt. Look for first forw ACK */ if (forw) { - rtt = soft_ticks - start_time - + rtt = cgn_sess2_timestamp() - + start_time - st->st_ext_rtt; - st->st_int_rtt = MIN(rtt, USHRT_MAX); + st->st_int_rtt = rtt; } } @@ -337,8 +377,17 @@ cgn_sess_state_inspect(struct cgn_state *st, struct cgn_packet *cpk, int dir, /* * Get state-dependent expiry time + * + * For 3-tuple sessions, port is the original dest port in the first outbound + * packet. If the session has been used for more than one flow, then it will + * have been reset to 0. + * + * For 2-tuple sessions port is the outbound dest port. + * + * port is in host byte order. */ -uint32_t cgn_sess_state_expiry_time(uint8_t proto, uint8_t state) +uint32_t cgn_sess_state_expiry_time(enum nat_proto proto, uint16_t port, + uint8_t state) { uint32_t etime; @@ -346,16 +395,22 @@ uint32_t cgn_sess_state_expiry_time(uint8_t proto, uint8_t state) return 0; if (proto == NAT_PROTO_TCP) { - if (state == CGN_TCP_STATE_ESTABLISHED) - etime = cgn_sess_tcp_etime[CGN_ETIME_TCP_ESTBD]; - else if (state == CGN_TCP_STATE_INIT) + if (state == CGN_TCP_STATE_ESTABLISHED) { + if (unlikely(cgn_port_tcp_etime[port] > 0)) + etime = cgn_port_tcp_etime[port]; + else + etime = cgn_sess_tcp_etime[CGN_ETIME_TCP_ESTBD]; + } else if (state == CGN_TCP_STATE_INIT) etime = cgn_sess_tcp_etime[CGN_ETIME_TCP_OPENING]; else etime = cgn_sess_tcp_etime[CGN_ETIME_TCP_CLOSING]; } else if (proto == NAT_PROTO_UDP) { - if (state == CGN_SESS_STATE_ESTABLISHED) - etime = cgn_sess_udp_etime[CGN_ETIME_ESTBD]; - else + if (state == CGN_SESS_STATE_ESTABLISHED) { + if (unlikely(cgn_port_udp_etime[port] > 0)) + etime = cgn_port_udp_etime[port]; + else + etime = cgn_sess_udp_etime[CGN_ETIME_ESTBD]; + } else etime = cgn_sess_udp_etime[CGN_ETIME_OPENING]; } else { if (state == CGN_SESS_STATE_ESTABLISHED) @@ -367,34 +422,35 @@ uint32_t cgn_sess_state_expiry_time(uint8_t proto, uint8_t state) } /* - * Timeout event for 2-tuple session. Returns timeout value for state - * (regardless of if it changed or not). + * Timeout event for 2-tuple session. Returns true if state is Closed. */ -uint32_t cgn_sess_state_timeout(struct cgn_state *st) +bool cgn_sess_state_timeout(struct cgn_state *st) { uint8_t new; - uint32_t etime; + bool closed; rte_spinlock_lock(&st->st_lock); if (st->st_proto == NAT_PROTO_TCP) { - new = cgn_tcp_fsm[st->st_state][CGN_DIR_FORW][CGN_TCP_EVENT_TO]; + new = cgn_tcp_fsm[st->st_state][CGN_DIR_OUT][CGN_TCP_EVENT_TO]; if (new != CGN_TCP_STATE_NONE && new != st->st_state) st->st_state = new; + + closed = (st->st_state == CGN_TCP_STATE_CLOSED); } else { - new = cgn_sess_fsm[st->st_state][CGN_DIR_FORW] + new = cgn_sess_fsm[st->st_state][CGN_DIR_OUT] [CGN_SESS_EVENT_TO]; if (new != CGN_SESS_STATE_NONE && new != st->st_state) st->st_state = new; - } - etime = cgn_sess_state_expiry_time(st->st_proto, new); + closed = (st->st_state == CGN_SESS_STATE_CLOSED); + } rte_spinlock_unlock(&st->st_lock); - return etime; + return closed; } /* @@ -440,9 +496,8 @@ void cgn_sess_state_jsonw(json_writer_t *json, struct cgn_state *st) if (st->st_proto == NAT_PROTO_TCP) { uint32_t rtt_ext, rtt_int; - /* millisecs to microsecs */ - rtt_ext = st->st_ext_rtt * 1000; - rtt_int = st->st_int_rtt * 1000; + rtt_ext = st->st_ext_rtt; + rtt_int = st->st_int_rtt; jsonw_uint_field(json, "rtt_ext", rtt_ext); jsonw_uint_field(json, "rtt_int", rtt_int); diff --git a/src/npf/cgnat/cgn_sess_state.h b/src/npf/cgnat/cgn_sess_state.h index f159b953..1452a422 100644 --- a/src/npf/cgnat/cgn_sess_state.h +++ b/src/npf/cgnat/cgn_sess_state.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -34,9 +34,10 @@ enum cgn_state_history { */ struct cgn_state { uint8_t st_state; - uint8_t st_proto; + enum nat_proto st_proto; uint8_t st_hist; - uint8_t st_pad1[3]; + uint8_t st_pad1[1]; + uint16_t st_dst_port; /* Outbound dest port */ rte_atomic16_t st_idle; /* keeps an estbd session alive */ /* @@ -45,12 +46,13 @@ struct cgn_state { * st_int_rtt - round-trip time from cgnat device to subscriber. * (Time from server SYN/ACK to subscriber ACK) * - * milliseconds. + * microseconds. */ - uint16_t st_ext_rtt; - uint16_t st_int_rtt; + uint64_t st_ext_rtt; + uint64_t st_int_rtt; rte_spinlock_t st_lock; + uint8_t st_pad2[4]; /* Pad to 32 bytes */ }; /* @@ -166,6 +168,11 @@ enum cgn_state_etime_tcp { extern uint32_t cgn_sess_tcp_etime[]; +/* + * Get or set TCP or UDP per-port Established expiry times + */ +void cgn_cgn_port_tcp_etime_set(uint16_t port, uint32_t timeout); +void cgn_cgn_port_udp_etime_set(uint16_t port, uint32_t timeout); static inline const char *cgn_tcp_state_str(enum cgn_tcp_state state) { @@ -246,40 +253,42 @@ static inline const char *cgn_sess_event_str(enum cgn_sess_event event) return "???"; } -static inline const char *cgn_dir_str(uint dir) +static inline const char *cgn_dir_str(enum cgn_dir dir) { switch (dir) { - case CGN_DIR_FORW: - return "FORW"; - case CGN_DIR_BACK: - return "BACK"; + case CGN_DIR_OUT: + return "OUT"; + case CGN_DIR_IN: + return "IN"; }; return "???"; } /* Initialize session state */ -void cgn_sess_state_init(struct cgn_state *st, uint8_t proto); +void cgn_sess_state_init(struct cgn_state *st, enum nat_proto proto, + uint16_t port); /* * Evaluate session state for packet * - * statep Pointer to state variable in 3-tuple or 5-tuple session + * st Pointer to state variable in 3-tuple or 5-tuple session * cpk Packet decomposition * dir Forwards or backwards + * start_time Session start time, unix epoch microseconds */ -void cgn_sess_state_inspect(struct cgn_state *ss, struct cgn_packet *cpk, - int dir, uint64_t start_time); +void cgn_sess_state_inspect(struct cgn_state *st, struct cgn_packet *cpk, + enum cgn_dir dir, uint64_t start_time); /* * Get state-dependent expiry time */ -uint32_t cgn_sess_state_expiry_time(uint8_t proto, uint8_t state); +uint32_t cgn_sess_state_expiry_time(enum nat_proto proto, uint16_t port, + uint8_t state); /* - * Timeout event. Returns timeout value for state (regardless of if it - * changed or not). + * Timeout event. Returns true if session is closed. */ -uint32_t cgn_sess_state_timeout(struct cgn_state *st); +bool cgn_sess_state_timeout(struct cgn_state *st); /* * Force a session to closed state diff --git a/src/npf/cgnat/cgn_session.c b/src/npf/cgnat/cgn_session.c index 778bf40b..5fa44fea 100644 --- a/src/npf/cgnat/cgn_session.c +++ b/src/npf/cgnat/cgn_session.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -32,14 +32,16 @@ #include #include #include -#include #include "compiler.h" -#include "pktmbuf.h" #include "if_var.h" -#include "vplane_log.h" -#include "util.h" #include "in_cksum.h" +#include "lcore_sched.h" +#include "pktmbuf_internal.h" +#include "rcu.h" +#include "util.h" +#include "vplane_debug.h" +#include "vplane_log.h" #include "npf/npf_addrgrp.h" #include "npf/nat/nat_pool_public.h" @@ -47,7 +49,9 @@ #include "npf/cgnat/cgn.h" #include "npf/apm/apm.h" #include "npf/cgnat/cgn_cmd_cfg.h" -#include "npf/cgnat/cgn_errno.h" +#include "npf/cgnat/cgn_rc.h" +#include "npf/cgnat/cgn_if.h" +#include "npf/cgnat/cgn_hash_key.h" #include "npf/cgnat/cgn_limits.h" #include "npf/cgnat/cgn_log.h" #include "npf/cgnat/cgn_map.h" @@ -69,17 +73,21 @@ struct cgn_sentry { rte_atomic64_t ce_bytes; uint64_t ce_pkts_tot; uint64_t ce_bytes_tot; - - uint32_t ce_ifindex; /* Interface index */ - uint32_t ce_addr; /* Address (net order) */ - uint16_t ce_port; /* port or id (net order) */ - uint8_t ce_ipproto; /* not cgn_proto */ + struct cgn_3tuple_key ce_key; /* hash key (12 bytes) */ uint8_t ce_active; /* True if sentry in table */ - uint8_t ce_expired; /* Expired session */ uint8_t ce_established; uint8_t ce_pad2[2]; }; +/* + * ce_ifindex (ce_key.k_ifindex) defaults to a vrf ID based value. + */ +#define ce_addr ce_key.k_addr +#define ce_ifindex ce_key.k_ifindex +#define ce_port ce_key.k_port +#define ce_ipproto ce_key.k_ipproto +#define ce_expired ce_key.k_expired + /* * cgnat session. */ @@ -90,125 +98,122 @@ struct cgn_session { struct cgn_sentry cs_back_entry; /* --- cacheline 2 boundary (128 bytes) --- */ - uint8_t cs_gc_pass; - uint8_t cs_pad0[1]; - rte_atomic16_t cs_refcnt; /* reference count */ - uint32_t cs_id; /* unique identifier */ vrfid_t cs_vrfid; /* VRF id (uint32_t) */ uint32_t cs_etime; /* expiry time */ - struct cds_lfht *cs_sess2_ht; /* Nested hash table */ struct cgn_source *cs_src; /* Back ptr to subscriber */ - uint64_t cs_start_time; - uint64_t cs_end_time; - rte_atomic32_t cs_sess2_id; /* sess2 ID resource */ - rte_atomic16_t cs_sess2_used; /* sess2 count */ - uint8_t cs_sess2_full; /* sess2 full */ - /* Logging parameter to be passed to any nested 2-tuple sessions */ - uint8_t cs_log_start:1; - uint8_t cs_log_end:1; + /* Dest addr and port table and state (32 bytes) */ + struct cgn_sess_s2 cs_s2; /* Session instantiated by map cmd and/or a packet */ uint8_t cs_pkt_instd:1; uint8_t cs_map_instd:1; + uint8_t cs_pad1[1]; uint16_t cs_l3_chk_delta; uint16_t cs_l4_chk_delta; - uint16_t cs_map_flag; /* True if mapping exists */ rte_atomic16_t cs_idle; + + rte_atomic64_t cs_unk_pkts; + /* --- cacheline 3 boundary (192 bytes) --- */ + uint64_t cs_unk_pkts_tot; struct rcu_head cs_rcu_head; /* 16 bytes */ - uint16_t cs_log_periodic; + uint64_t cs_start_time; /* unix epoch us */ + uint64_t cs_end_time; /* unix epoch us */ - /* timeout for a map instantiated session */ - uint32_t cs_map_timeout; + uint32_t cs_id; /* unique identifier */ + uint32_t cs_ifindex; /* Copy of ifp->ifindex */ + rte_atomic16_t cs_refcnt; /* reference count */ + uint16_t cs_map_flag; /* True if mapping exists */ + uint8_t cs_gc_pass; - uint8_t cs_pad3[42]; /* pad to cacheline boundary */ + uint8_t cs_pad3[11]; /* pad to cacheline boundary */ /* --- cacheline 4 boundary (256 bytes) --- */ }; +static_assert(offsetof(struct cgn_session, cs_back_entry) == 64, + "cgn_session structure: first cache line size exceeded"); +static_assert(offsetof(struct cgn_session, cs_vrfid) == 128, + "cgn_session structure: second cache line size exceeded"); +static_assert(offsetof(struct cgn_session, cs_unk_pkts_tot) == 192, + "cgn_session structure: third cache line size exceeded"); +static_assert(sizeof(struct cgn_session) == 256, + "cgn_session structure: larger than expected"); /* session hash tables */ -struct cds_lfht *cgn_sess_ht[CGN_DIR_SZ]; +static struct cds_lfht *cgn_sess_ht[CGN_DIR_SZ]; /* GC Timer */ -struct rte_timer cgn_gc_timer; +static struct rte_timer cgn_gc_timer; -/* cs_id resource */ +/* + * Monotonically increasing count. Used to assign a value to a new session + * in the sessions cs_id object. Wraps when it reaches max. + */ static rte_atomic32_t cgn_id_resource; -/* max sessions, and sessions used */ -int32_t cgn_sessions_max = CGN_SESSIONS_MAX; -int16_t cgn_dest_sessions_max = CGN_DEST_SESSIONS_MAX; - -/* Global count of all 3-tuple sessions */ -rte_atomic32_t cgn_sessions_used; - -/* Global count of all 5-tuple sessions */ -rte_atomic32_t cgn_sess2_used; - -/* Set true when table is full. Re-evaluated after GC. */ -bool cgn_session_table_full; /* Forward references */ static void cgn_session_expire_all(bool clear_map, bool restart_timer); +static void session_table_threshold_timer_expiry( + struct rte_timer *timer __unused, + void *arg __unused); + /* Time prototypes and functions. */ static void cgn_session_start_timer(void); static void cgn_session_stop_timer(void); -/* - * get current monotonic time in approximate seconds - */ -static inline uint32_t cgn_get_time_uptime(void) -{ - /* divide millisecond soft_ticks by 1024 */ - return (uint32_t)(soft_ticks >> 10); -} +/* Session table threshold, time, and timer */ +static int32_t session_table_threshold_cfg; /* configured percent */ +static int32_t session_table_threshold; /* threshold value */ +static bool session_table_threshold_been_below = true; +static uint32_t session_table_threshold_time; +static struct rte_timer session_table_threshold_timer; -/* Is t0 after t1? */ -static inline int time_after(uint32_t t0, uint32_t t1) -{ - return (int)(t0 - t1) >= 0; +/* Session logging thread defines and variables */ +#define CGNAT_MAX_HELPER_INTERVAL_US 1000000 /* 1 second in microseconds */ + +#define ASSERT_CGN_HELPER_THREAD() \ +{ \ + if (!is_cgn_helper_thread()) \ + rte_panic("not on cgnat helper thread\n"); \ } -/* - * Basic log string for a 3-tuple session - */ -static int cgn_session_log_str(struct cgn_session *cse, bool incl_trans, - char *log_str, uint log_str_sz) -{ -#define ADDR_CHARS 16 - char str1[ADDR_CHARS]; - struct ifnet *ifp; - uint32_t pid = cgn_session_id(cse); - uint32_t int_src = cgn_session_forw_addr(cse); - uint16_t int_port = cgn_session_forw_id(cse); - uint len; +#define CGN_HELPER_INVALID_CORE_NUM UINT_MAX - ifp = ifnet_byifindex(cgn_session_ifindex(cse)); +/* The core number requested due to configuration */ +static unsigned int cgn_desired_helper_core_num = CGN_HELPER_INVALID_CORE_NUM; - len = snprintf(log_str, log_str_sz, - "ifname=%s session-id=%u proto=%u " - "addr=%s port=%u", - ifp ? ifp->if_name : "-", pid, - cse->cs_forw_entry.ce_ipproto, - cgn_addrstr(ntohl(int_src), str1, ADDR_CHARS), - ntohs(int_port)); +/* The core number currently running on */ +static unsigned int cgn_helper_core_num = CGN_HELPER_INVALID_CORE_NUM; - if (incl_trans) { - uint32_t ext_src = cgn_session_back_addr(cse); - uint16_t ext_port = cgn_session_back_id(cse); +static pthread_t cgn_helper_pthread; +static unsigned int cgn_sleep_interval; - len += snprintf(log_str + len, log_str_sz - len, - " cgn-addr=%s cgn-port=%u", - cgn_addrstr(ntohl(ext_src), str1, ADDR_CHARS), - ntohs(ext_port)); - } +/* Structure counting logs sent */ +struct lcore_cgnat { + uint64_t logs; /* CGNAT logs transmitted on this core */ +}; - return len; +/* Same size as ptr so no value in doing alloc when first configured */ +static struct lcore_cgnat cgn_per_lcore[RTE_MAX_LCORE]; + +static struct lcore_cgnat *lcore_conf_get_cgnat(unsigned int lcore_id) +{ + if (lcore_id >= RTE_MAX_LCORE) + return NULL; + + return &cgn_per_lcore[lcore_id]; +} + +/* Is t0 after t1? */ +static inline int time_after(uint32_t t0, uint32_t t1) +{ + return (int)(t0 - t1) >= 0; } /* @@ -217,81 +222,89 @@ static int cgn_session_log_str(struct cgn_session *cse, bool incl_trans, */ void cgn_session_update_stats(struct cgn_session *cse, uint32_t pkts_out, uint32_t bytes_out, - uint32_t pkts_in, uint32_t bytes_in, - bool expired) + uint32_t pkts_in, uint32_t bytes_in) { - if (expired) - cgn_source_stats_sess_destroyed(cse->cs_src); + if (pkts_out) { + rte_atomic64_add(&cse->cs_forw_entry.ce_pkts, pkts_out); + rte_atomic64_add(&cse->cs_forw_entry.ce_bytes, bytes_out); + } - rte_atomic64_add(&cse->cs_forw_entry.ce_pkts, pkts_out); - rte_atomic64_add(&cse->cs_forw_entry.ce_bytes, bytes_out); - rte_atomic64_add(&cse->cs_back_entry.ce_pkts, pkts_in); - rte_atomic64_add(&cse->cs_back_entry.ce_bytes, bytes_in); + if (pkts_in) { + rte_atomic64_add(&cse->cs_back_entry.ce_pkts, pkts_in); + rte_atomic64_add(&cse->cs_back_entry.ce_bytes, bytes_in); + } } /* * Called by session gc. */ -static void -cgn_session_stats_periodic(struct cgn_session *cse) +static inline void +cgn_session_stats_periodic_inline(struct cgn_session *cse) { - uint64_t pkts_out, pkts_in, bytes_out, bytes_in; + uint64_t pkts_out, pkts_in, bytes_out = 0, bytes_in = 0; + uint64_t unk_pkts_in; pkts_out = rte_atomic64_exchange( (volatile uint64_t *)&cse->cs_forw_entry.ce_pkts.cnt, 0UL); - bytes_out = rte_atomic64_exchange( - (volatile uint64_t *)&cse->cs_forw_entry.ce_bytes.cnt, 0UL); + if (pkts_out) { + bytes_out = rte_atomic64_exchange( + (volatile uint64_t *)&cse->cs_forw_entry.ce_bytes.cnt, + 0UL); + + cse->cs_forw_entry.ce_pkts_tot += pkts_out; + cse->cs_forw_entry.ce_bytes_tot += bytes_out; + } + + /* + * unk_pkts are inbound pkts that matched a 3-tuple session but not a + * 2-tuple session (when 2-tuple are enabled). + */ + unk_pkts_in = rte_atomic64_exchange( + (volatile uint64_t *)&cse->cs_unk_pkts.cnt, 0UL); + if (unlikely(unk_pkts_in)) + cse->cs_unk_pkts_tot += unk_pkts_in; pkts_in = rte_atomic64_exchange( (volatile uint64_t *)&cse->cs_back_entry.ce_pkts.cnt, 0UL); - bytes_in = rte_atomic64_exchange( - (volatile uint64_t *)&cse->cs_back_entry.ce_bytes.cnt, 0UL); + if (pkts_in) { + bytes_in = rte_atomic64_exchange( + (volatile uint64_t *)&cse->cs_back_entry.ce_bytes.cnt, + 0UL); - cse->cs_forw_entry.ce_pkts_tot += pkts_out; - cse->cs_forw_entry.ce_bytes_tot += bytes_out; - cse->cs_back_entry.ce_pkts_tot += pkts_in; - cse->cs_back_entry.ce_bytes_tot += bytes_in; + cse->cs_back_entry.ce_pkts_tot += pkts_in; + cse->cs_back_entry.ce_bytes_tot += bytes_in; + } /* Add stats to source totals */ - cgn_source_update_stats(cse->cs_src, pkts_out, bytes_out, - pkts_in, bytes_in); + if (pkts_out || pkts_in || unk_pkts_in) + cgn_source_update_stats(cse->cs_src, pkts_out, bytes_out, + pkts_in, bytes_in, unk_pkts_in); } -/* Count hash table nodes */ -static ulong cgn_session_table_nodes(struct cds_lfht *ht) -{ - unsigned long count; - long dummy; - - if (!ht) - return 0; - - cds_lfht_count_nodes(ht, &dummy, &count, &dummy); - return count; -} - -ulong cgn_session_count(void) +static void +cgn_session_stats_periodic(struct cgn_session *cse) { - return cgn_session_table_nodes(cgn_sess_ht[CGN_DIR_FORW]); + cgn_session_stats_periodic_inline(cse); } static inline struct cgn_session * -sentry2session(const struct cgn_sentry *ce, int dir) +sentry2session(const struct cgn_sentry *ce, enum cgn_dir dir) { - if (dir == CGN_DIR_FORW) + if (dir == CGN_DIR_OUT) return caa_container_of(ce, struct cgn_session, cs_forw_entry); - else - return caa_container_of(ce, struct cgn_session, cs_back_entry); + + return caa_container_of(ce, struct cgn_session, cs_back_entry); } -static inline struct cgn_sentry *dir2sentry(struct cgn_session *cse, int dir) +static inline struct cgn_sentry *dir2sentry(struct cgn_session *cse, + enum cgn_dir dir) { - if (dir == CGN_DIR_FORW) + if (dir == CGN_DIR_OUT) return &cse->cs_forw_entry; - else - return &cse->cs_back_entry; + + return &cse->cs_back_entry; } uint32_t cgn_session_forw_addr(struct cgn_session *cse) @@ -304,6 +317,11 @@ uint32_t cgn_session_forw_id(struct cgn_session *cse) return cse->cs_forw_entry.ce_port; } +uint8_t cgn_session_ipproto(struct cgn_session *cse) +{ + return cse->cs_forw_entry.ce_ipproto; +} + uint32_t cgn_session_back_addr(struct cgn_session *cse) { return cse->cs_back_entry.ce_addr; @@ -343,11 +361,6 @@ uint16_t cgn_session_get_l4_delta(const struct cgn_session *cse, bool forw) return forw ? cse->cs_l4_chk_delta : ~cse->cs_l4_chk_delta; } -uint32_t cgn_session_get_ifindex(const struct cgn_session *cse) -{ - return cse->cs_forw_entry.ce_ifindex; -} - /* * cgn_session_create */ @@ -367,8 +380,8 @@ static struct cgn_session *cgn_session_create(int *error) return NULL; } - assert(cse == sentry2session(&cse->cs_forw_entry, CGN_DIR_FORW)); - assert(cse == sentry2session(&cse->cs_back_entry, CGN_DIR_BACK)); + assert(cse == sentry2session(&cse->cs_forw_entry, CGN_DIR_OUT)); + assert(cse == sentry2session(&cse->cs_back_entry, CGN_DIR_IN)); return cse; } @@ -394,31 +407,28 @@ void cgn_session_destroy(struct cgn_session *cse, bool rcu_free) if (!cse) return; - /* Release address and port mapping */ - uint32_t taddr, oaddr; - uint16_t tport, oport; - struct nat_pool *np; - uint8_t proto; + assert(cse->cs_src); - /* todo - store forw/rev flag at session creation time */ - cgn_session_get_back(cse, &taddr, &tport); - cgn_session_get_forw(cse, &oaddr, &oport); - proto = nat_proto_from_ipproto(cse->cs_forw_entry.ce_ipproto); + /* Release mapping if one exists */ + if (rte_atomic16_cmpset(&cse->cs_map_flag, true, false)) { - np = cgn_source_get_pool(cse->cs_src); - assert(np); + /* Release address and port mapping */ + struct cgn_map cmi = {0}; - /* Release mapping if one exists */ - if (rte_atomic16_cmpset(&cse->cs_map_flag, true, false)) - cgn_map_put(np, cse->cs_vrfid, CGN_DIR_OUT, - proto, oaddr, taddr, tport); + cgn_session_get_back(cse, &cmi.cmi_taddr, &cmi.cmi_tid); + cmi.cmi_reserved = true; + cmi.cmi_src = cse->cs_src; + cmi.cmi_proto = nat_proto_from_ipproto( + cse->cs_forw_entry.ce_ipproto); + + cgn_map_put(&cmi, cse->cs_vrfid); + } /* Release reference on source */ cgn_source_put(cse->cs_src); - /* Destroy nested hash table? */ - if (cse->cs_sess2_ht) - cgn_sess2_ht_destroy(&cse->cs_sess2_ht); + /* Disable a session from recording dest addr and port */ + cgn_sess_s2_disable(&cse->cs_s2); if (rcu_free) call_rcu(&cse->cs_rcu_head, cgn_session_rcu_free); @@ -427,155 +437,391 @@ void cgn_session_destroy(struct cgn_session *cse, bool rcu_free) } /* - * cgn_session_get: Get a reference to a cgnat session + * Set maximum CGN sessions; + * recalc session table threshold. */ -struct cgn_session *cgn_session_get(struct cgn_session *cse) +void cgn_session_set_max(int32_t val) { - if (cse) - rte_atomic16_inc(&cse->cs_refcnt); - return cse; + if (val > CGN_SESSIONS_MAX) + val = CGN_SESSIONS_MAX; + + cgn_sessions_max = val; + session_table_threshold_set(session_table_threshold_cfg, + session_table_threshold_time); } /* - * cgn_session_put: release a reference, which might allow G/C thread - * to destroy this session. + * Generate session table threshold log + * and restart timer if required. */ -void cgn_session_put(struct cgn_session *cse) +static void session_table_threshold_log(int32_t val, int32_t max) { - if (cse) { - assert(rte_atomic16_read(&cse->cs_refcnt) > 0); - rte_atomic16_dec(&cse->cs_refcnt); + cgn_log_resource_session_table( + CGN_RESOURCE_THRESHOLD, val, max); + + if (session_table_threshold_time) + rte_timer_reset(&session_table_threshold_timer, + session_table_threshold_time * rte_get_timer_hz(), + SINGLE, rte_get_master_lcore(), + session_table_threshold_timer_expiry, + NULL); +} + +/* + * Warn if over the configured session table threshold + */ +static void session_table_threshold_check(int32_t val) +{ + if (session_table_threshold && + session_table_threshold_been_below && + (val >= session_table_threshold) && + (!rte_timer_pending(&session_table_threshold_timer))) { + + session_table_threshold_been_below = false; + session_table_threshold_log(val, cgn_sessions_max); + } +} + +/* + * Set session table threshold + * + * threshold is in percent; interval is in seconds. + */ +void session_table_threshold_set(int32_t threshold, uint32_t interval) +{ + rte_timer_stop(&session_table_threshold_timer); + session_table_threshold_cfg = threshold; + session_table_threshold = + (cgn_sessions_max * threshold + 99) / 100; + session_table_threshold_time = interval; + session_table_threshold_been_below = true; + + /* Warn if over configured threshold */ + int32_t val = rte_atomic32_read(&cgn_sessions_used); + session_table_threshold_check(val); +} + +/* + * Handle session table threshold timer expiry. + */ +static void session_table_threshold_timer_expiry( + struct rte_timer *timer __unused, + void *arg __unused) +{ + int32_t val = rte_atomic32_read(&cgn_sessions_used); + + if (session_table_threshold && + (val >= session_table_threshold)) { + + session_table_threshold_log(val, cgn_sessions_max); } } +/* + * Mark the session table as full + */ +static void cgn_session_set_full(void) +{ + cgn_log_resource_session_table(CGN_RESOURCE_FULL, + rte_atomic32_read(&cgn_sessions_used), + cgn_sessions_max); + + cgn_session_table_full = true; +} + +/* + * Mark the session table as available. Called after the garbage collection + * walk if sessions used in now below max. + */ +static void cgn_session_set_available(void) +{ + cgn_log_resource_session_table(CGN_RESOURCE_AVAILABLE, + rte_atomic32_read(&cgn_sessions_used), + cgn_sessions_max); + + cgn_session_table_full = false; +} + /* * Is there space in the session table? + * + * We reserve a slot *before* creating the session. If the session + * subsequently fails to be activated for any reason then we MUST call + * cgn_session_slot_put to return the reserved slot. */ static bool cgn_session_slot_get(void) { - if (rte_atomic32_add_return(&cgn_sessions_used, 1) <= cgn_sessions_max) + int32_t val = rte_atomic32_add_return(&cgn_sessions_used, 1); + + /* Warn if over configured threshold */ + session_table_threshold_check(val); + + /* Error if table is full */ + + if (val <= cgn_sessions_max) return true; rte_atomic32_dec(&cgn_sessions_used); if (!cgn_session_table_full) - RTE_LOG(ERR, CGNAT, "SESSION_TABLE_FULL count=%u/%u\n", - rte_atomic32_read(&cgn_sessions_used), - cgn_sessions_max); - - /* - * Mark session table as full. This is reset in the gc when the - * session count reduces. - */ - cgn_session_table_full = true; + cgn_session_set_full(); return false; } static void cgn_session_slot_put(void) { - rte_atomic32_dec(&cgn_sessions_used); + int32_t val = rte_atomic32_sub_return(&cgn_sessions_used, 1); + + if (val < session_table_threshold) + session_table_threshold_been_below = true; } /* - * cgn_session_establish + * cgn_session_establish. Sessions are only ever created by outbound + * flows/ctx. */ struct cgn_session * -cgn_session_establish(struct cgn_packet *cpk, int dir, - uint32_t taddr, uint16_t tid, int *error, - struct cgn_source *src) +cgn_session_establish(struct cgn_packet *cpk, struct cgn_map *cmi, + int *error) { struct cgn_session *cse; - struct cgn_policy *cp = src->sr_policy; - uint32_t oaddr; - uint16_t oid; - if (dir == CGN_DIR_OUT) { - oaddr = cpk->cpk_saddr; - oid = cpk->cpk_sid; - } else { - oaddr = cpk->cpk_daddr; - oid = cpk->cpk_did; + /* + * Reserve a slot from the counters. The slot MUST be returned if an + * error occurs at any point before the session is activated. + */ + if (unlikely(!cgn_session_slot_get())) { + *error = -CGN_S1_ENOSPC; + return NULL; } cse = cgn_session_create(error); - if (!cse) + if (!cse) { + /* Return reserved slot */ + cgn_session_slot_put(); return NULL; + } /* * Populate forw sentry. Extract source addr and port from cache. + * + * Note that cpk_key.k_ifindex may be different from cpk_ifindex. The + * latter is always ifp->if_index whereas cpk_key.k_ifindex will + * either be ifp->if_index or a cgnat interface group index value. */ - cse->cs_forw_entry.ce_ifindex = cpk->cpk_ifindex; + cse->cs_forw_entry.ce_ifindex = cpk->cpk_key.k_ifindex; cse->cs_forw_entry.ce_ipproto = cpk->cpk_ipproto; - cse->cs_forw_entry.ce_addr = oaddr; - cse->cs_forw_entry.ce_port = oid; + cse->cs_forw_entry.ce_addr = cmi->cmi_oaddr; + cse->cs_forw_entry.ce_port = cmi->cmi_oid; cse->cs_forw_entry.ce_established = false; /* Populate back entry */ - cse->cs_back_entry.ce_ifindex = cpk->cpk_ifindex; + cse->cs_back_entry.ce_ifindex = cpk->cpk_key.k_ifindex; cse->cs_back_entry.ce_ipproto = cpk->cpk_ipproto; - cse->cs_back_entry.ce_addr = taddr; - cse->cs_back_entry.ce_port = tid; + cse->cs_back_entry.ce_addr = cmi->cmi_taddr; + cse->cs_back_entry.ce_port = cmi->cmi_tid; cse->cs_back_entry.ce_established = false; rte_atomic16_set(&cse->cs_refcnt, 0); rte_atomic16_set(&cse->cs_idle, 0); cse->cs_vrfid = cpk->cpk_vrfid; - cse->cs_start_time = soft_ticks; - cse->cs_log_start = cp->cp_log_sess_start ? 1 : 0; - cse->cs_log_end = cp->cp_log_sess_end ? 1 : 0; - cse->cs_log_periodic = cp->cp_log_sess_periodic; + cse->cs_ifindex = cpk->cpk_ifindex; + cse->cs_start_time = unix_epoch_us; + + /* Was the session created by a packet or by map command? */ + cse->cs_pkt_instd = cpk->cpk_pkt_instd; + cse->cs_map_instd = !cpk->cpk_pkt_instd; /* calculate checksum deltas */ - const uint32_t *oip32 = (const uint32_t *)&oaddr; - const uint32_t *nip32 = (const uint32_t *)&taddr; + const uint32_t *oip32 = (const uint32_t *)&cmi->cmi_oaddr; + const uint32_t *nip32 = (const uint32_t *)&cmi->cmi_taddr; cse->cs_l3_chk_delta = ~ip_fixup32_cksum(0, *oip32, *nip32); - cse->cs_l4_chk_delta = ~ip_fixup16_cksum(0, oid, tid); + cse->cs_l4_chk_delta = ~ip_fixup16_cksum(0, cmi->cmi_oid, cmi->cmi_tid); /* - * Does session need nested 2-tuple table? + * Remember the dest port that created this session. This is unknown + * for PCP sessions. */ - if (cgn_policy_record_dest(cp, oaddr, dir)) { - cse->cs_sess2_ht = cgn_sess2_ht_create(); - - if (!cse->cs_sess2_ht) { - *error = -CGN_S1_ENOMEM; - free(cse); - return NULL; - } - } + if (likely(cse->cs_pkt_instd)) + cse->cs_s2.cs2_dst_port = cpk->cpk_did; /* Take reference on source */ - cse->cs_src = cgn_source_get(src); + cse->cs_src = cgn_source_get(cmi->cmi_src); /* We already have a mapping */ cse->cs_map_flag = true; + /* The session now holds the mapping */ + cmi->cmi_reserved = false; + cse->cs_id = rte_atomic32_add_return(&cgn_id_resource, 1); return cse; } -bool cgn_session_log_start(struct cgn_session *cse) +/* + * Create a mapping and session via control plane. Used by unit-test and PCP. + * + * If pub_addr and pub_port are specified (!= 0) then we will try and obtain + * that mapping. pub_addr and pub_port are in network byte order. + */ +struct cgn_session * +cgn_session_map(struct ifnet *ifp, struct cgn_packet *cpk, + uint32_t pub_addr, uint16_t pub_port, int *error) { - return cse->cs_log_start; + struct cgn_session *cse, *in_cse = NULL; + struct cgn_policy *cp; + int rc = 0; + vrfid_t vrfid = cpk->cpk_vrfid; + + /* + * Currently only support both public address and port being + * specified, or neither being specified. + */ + if (!!pub_addr ^ !!pub_port) { + *error = -CGN_PCP_EINVAL; + return NULL; + } + + /* Look for existing forwards, or outbound, session */ + cse = cgn_session_lookup(&cpk->cpk_key, CGN_DIR_OUT); + + /* Look for existing backwards, or inbound, session */ + if (pub_addr && pub_port) { + in_cse = cgn_session_lookup(&cpk->cpk_key, CGN_DIR_IN); + + /* + * If the requested public address and port are currently + * in-use, and either there is no outbound session (cse==NULL) + * or the outbound session is using a different public address + * and port (in_cse!=cse), then we fail. + */ + if (in_cse && in_cse != cse) { + *error = -CGN_PCP_ENOSPC; + return NULL; + } + } + + /* + * Are we refreshing, or trying to create, an existing session? + */ + if (cse) { + /* + * We have found an existing session matching the protocol, + * subscriber address, and subscriber port. + * + * We mark this as 'map instantiated' and return it to the + * user, regardless of any specific public address and port + * they specified. + */ + + /* clear idle flag */ + if (rte_atomic16_read(&cse->cs_idle) != 0) + rte_atomic16_clear(&cse->cs_idle); + + return cse; + } + + /* Mapping info */ + struct cgn_map cmi = { + .cmi_reserved = false, + .cmi_proto = cpk->cpk_proto, + .cmi_oid = cpk->cpk_sid, + .cmi_oaddr = cpk->cpk_saddr, + .cmi_tid = pub_port, + .cmi_taddr = pub_addr, + .cmi_src = NULL, + }; + + /* + * Lookup source address in policy list on the interface + */ + cp = cgn_if_find_policy_by_addr(ifp, cmi.cmi_oaddr); + if (!cp) { + *error = -CGN_PCY_ENOENT; + return NULL; + } + + /* Check if session table is full *before* getting a mapping. */ + if (unlikely(cgn_session_table_full)) { + *error = -CGN_S1_ENOSPC; + return NULL; + } + + /* + * Allocate public address and port. + * + * If a public address or port is not specified then we use the same + * function as packet flows, cgn_map_get. This obtains a mapping + * using the config in the relevant policy. + */ + if (cmi.cmi_taddr == 0 && cmi.cmi_tid == 0) + rc = cgn_map_get(&cmi, cp, vrfid); + else + /* Use specified public address and port */ + rc = cgn_map_get2(&cmi, cp, vrfid); + + if (rc) { + *error = rc; + return NULL; + } + + /* Create a session. */ + cse = cgn_session_establish(cpk, &cmi, error); + if (!cse) + goto error; + + /* Check if we want to record sub-sessions */ + cgn_session_try_enable_sub_sess(cse, cp, cmi.cmi_oaddr); + + /* Add session to hash tables */ + rc = cgn_session_activate(cse, cpk, CGN_DIR_OUT); + + if (rc) { + *error = rc; + cgn_session_destroy(cse, false); + return NULL; + } + return cse; + +error: + if (cmi.cmi_reserved) + /* Release mapping */ + cgn_map_put(&cmi, vrfid); + + return NULL; } -bool cgn_session_log_end(struct cgn_session *cse) +/* + * Get pointer to the 3-tuple session that contains this cs2 structure + */ +struct cgn_session *cgn_sess_from_cs2(struct cgn_sess_s2 *cs2) { - return cse->cs_log_end; + struct cgn_session *cse = NULL; + + if (cs2) + cse = caa_container_of(cs2, struct cgn_session, cs_s2); + return cse; } -/* Units of session gc intervals (i.e. 10 secs) */ -uint16_t cgn_session_log_periodic(struct cgn_session *cse) +/* + * Get pointer to the subscriber of this cs2 structure + */ +struct cgn_source *cgn_src_from_cs2(struct cgn_sess_s2 *cs2) { - return cse->cs_log_periodic; + struct cgn_session *cse = NULL; + + if (cs2) + cse = caa_container_of(cs2, struct cgn_session, cs_s2); + + return cse ? cse->cs_src : NULL; } uint32_t cgn_session_ifindex(struct cgn_session *cse) { - return cse->cs_forw_entry.ce_ifindex; + return cse->cs_ifindex; } /* session ID. (not port) */ @@ -585,80 +831,43 @@ uint32_t cgn_session_id(struct cgn_session *cse) } static int cgn_sentry_insert(struct cgn_sentry *ce, struct cgn_sentry **old, - enum cgn_flow dir); -static void cgn_sentry_delete(struct cgn_sentry *ce, enum cgn_flow dir); + enum cgn_dir dir); +static void cgn_sentry_delete(struct cgn_sentry *ce, enum cgn_dir dir); /* - * Is there space in the nested session table? + * Is recording of destination address and port enabled for this 3-tuple + * session? */ -static bool cgn_sess2_slot_get(struct cgn_session *cse) +static inline bool cgn_sess_s2_is_enabled(struct cgn_session *cse) { - if (rte_atomic16_add_return(&cse->cs_sess2_used, 1) <= - cgn_dest_sessions_max) { - /* Success */ - rte_atomic32_inc(&cgn_sess2_used); - return true; - } - - /* - * No slots available. Decrement cs_sess2_used again. - */ - rte_atomic16_dec(&cse->cs_sess2_used); - - if (net_ratelimit() && !cse->cs_sess2_full) { - char log_str[140]; - - cgn_session_log_str(cse, true, log_str, sizeof(log_str)); - - RTE_LOG(ERR, CGNAT, "DEST_SESSIONS_FULL count=%u %s\n", - rte_atomic16_read(&cse->cs_sess2_used), log_str); - } - - /* - * Mark nested session table as full. This is reset in the gc when - * the session count reduces. - */ - cse->cs_sess2_full = true; - - return false; -} - -void cgn_sess2_slot_put(struct cgn_session *cse) -{ - rte_atomic16_dec(&cse->cs_sess2_used); - rte_atomic32_dec(&cgn_sess2_used); + return cse->cs_s2.cs2_enbld; } /* - * Create a nested session + * Check if we can enable sub-sessions on this 3-tuple session */ -static int -cgn_sess2_establish_and_activate(struct cgn_session *cse, - struct cgn_packet *cpk, int dir) +void cgn_session_try_enable_sub_sess(struct cgn_session *cse, + struct cgn_policy *cp, uint32_t oaddr) { - struct cgn_sess2 *s2; - int rc; + struct cgn_sess_s2 *cs2 = &cse->cs_s2; - /* Reserve a slot from the counters */ - if (unlikely(!cgn_sess2_slot_get(cse))) - return -CGN_S2_ENOSPC; + /* Already enabled? */ + if (cs2->cs2_enbld) + return; - s2 = cgn_sess2_establish(cse, cpk, &cse->cs_sess2_id, dir); - if (unlikely(!s2)) { - cgn_sess2_slot_put(cse); - return -CGN_S2_ENOMEM; - } + if (cgn_policy_record_dest(cp, oaddr)) { + cs2->cs2_enbld = true; - rc = cgn_sess2_activate(cse->cs_sess2_ht, s2); - if (unlikely(rc < 0)) { - /* Lost race to insert sess2 */ - cgn_sess2_slot_put(cse); - free(s2); - return rc; - } + /* + * The max value cannot change after the HT is created, so set + * it here from the user-configurable global. + */ + cs2->cs2_max = cgn_dest_sessions_max; - cgn_source_stats_sess_created(cse->cs_src); - return 0; + cs2->cs2_log_start = cp->cp_log_sess_start ? 1 : 0; + cs2->cs2_log_end = cp->cp_log_sess_end ? 1 : 0; + cs2->cs2_log_periodic = cp->cp_log_sess_periodic; + } } /* @@ -667,7 +876,7 @@ cgn_sess2_establish_and_activate(struct cgn_session *cse, * Activate new 3-tuple session. */ int cgn_session_activate(struct cgn_session *cse, - struct cgn_packet *cpk, int dir) + struct cgn_packet *cpk, enum cgn_dir dir) { struct cgn_sentry *old; int rc = 0; @@ -679,40 +888,53 @@ int cgn_session_activate(struct cgn_session *cse, if (cse->cs_forw_entry.ce_active) return 0; - /* Reserve a slot from the counters */ - if (unlikely(!cgn_session_slot_get())) - return -CGN_S1_ENOSPC; - /* Insert forw sentry into table */ - rc = cgn_sentry_insert(&cse->cs_forw_entry, &old, CGN_DIR_FORW); + rc = cgn_sentry_insert(&cse->cs_forw_entry, &old, CGN_DIR_OUT); if (unlikely(rc < 0)) { cgn_session_slot_put(); goto end; } /* Insert back sentry into table */ - rc = cgn_sentry_insert(&cse->cs_back_entry, &old, CGN_DIR_BACK); + rc = cgn_sentry_insert(&cse->cs_back_entry, &old, CGN_DIR_IN); if (unlikely(rc < 0)) { - cgn_sentry_delete(&cse->cs_forw_entry, CGN_DIR_FORW); + cgn_sentry_delete(&cse->cs_forw_entry, CGN_DIR_OUT); cgn_session_slot_put(); goto end; } - /* Add a nested 2-tuple session? */ - if (cse->cs_sess2_ht && cpk->cpk_keepalive) { - rc = cgn_sess2_establish_and_activate(cse, cpk, dir); + /* Increment 3-tuple sessions created in subscriber */ + cgn_source_stats_sess_created(cse->cs_src); + + /* + * Add a nested 2-tuple session? cpk_keepalive is only ever set for a + * real packet. It is *not* set for PCP ('map') instantiated + * sessions. + */ + if (cgn_sess_s2_is_enabled(cse) && cpk->cpk_keepalive) { + struct cgn_sess2 *s2; + int error = 0; + + assert(dir == CGN_DIR_OUT); + + /* Create an s2 session */ + s2 = cgn_sess_s2_establish(&cse->cs_s2, cpk, &error); + if (s2) + error = cgn_sess_s2_activate(&cse->cs_s2, s2); /* Count the error, then ignore it */ - if (rc < 0) { - cgn_error_inc(rc, dir); - rc = 0; - } + if (error < 0) + cgn_rc_inc(dir, error); + else + cgn_source_stats_sess2_created(cse->cs_src); } else { struct cgn_sentry *ce = dir2sentry(cse, dir); - cgn_source_stats_sess_created(cse->cs_src); - rte_atomic64_inc(&ce->ce_pkts); - rte_atomic64_add(&ce->ce_bytes, cpk->cpk_len); + /* Increment stats if session was created by a packet */ + if (likely(cse->cs_pkt_instd)) { + rte_atomic64_inc(&ce->ce_pkts); + rte_atomic64_add(&ce->ce_bytes, cpk->cpk_len); + } } end: @@ -727,108 +949,57 @@ cgn_session_deactivate(struct cgn_session *cse) { if (cse->cs_forw_entry.ce_active) { /* Remove from sentry table */ - cgn_sentry_delete(&cse->cs_forw_entry, CGN_DIR_FORW); - cgn_sentry_delete(&cse->cs_back_entry, CGN_DIR_BACK); + cgn_sentry_delete(&cse->cs_forw_entry, CGN_DIR_OUT); + cgn_sentry_delete(&cse->cs_back_entry, CGN_DIR_IN); /* Release the slot */ cgn_session_slot_put(); - /* If nested sessions are in-use then we count them */ - if (!cse->cs_sess2_ht) - cgn_source_stats_sess_destroyed(cse->cs_src); + /* Increment 3-tuple sessions destroyed in subscriber */ + cgn_source_stats_sess_destroyed(cse->cs_src); } } -static ALWAYS_INLINE ulong -cgn_hash(uint16_t id, uint32_t addr, uint32_t ifindex, uint8_t ipproto) -{ - return rte_jhash_3words(id, addr, ipproto, ifindex); -} - -static ulong -cgn_hash_sentry(const struct cgn_sentry *ce) -{ - ulong hash; - - hash = cgn_hash(ce->ce_port, ce->ce_addr, ce->ce_ifindex, - ce->ce_ipproto); - return hash; -} - -static ALWAYS_INLINE int -cgn_sess_match(const struct cgn_sentry *ce, uint16_t id, uint32_t addr, - uint32_t ifindex, uint8_t proto) -{ - /* Ports are expected to vary most */ - if (ce->ce_port != id) - return 0; - - if (ce->ce_addr != addr) - return 0; - - if (ce->ce_ifindex != ifindex) - return 0; - - if (ce->ce_ipproto != proto) - return 0; - - return 1; -} - -/* - * cgn_sess_node_match. Used when inserting a sentry. - */ -static int -cgn_sess_node_match(struct cds_lfht_node *node, const void *key) +static ALWAYS_INLINE ulong cgn_hash(const struct cgn_3tuple_key *key) { - const struct cgn_sentry *ce1, *ce2; - int rc; + static_assert(sizeof(*key) == 12, + "cgn 3 tuple key is wrong size"); - ce1 = caa_container_of(node, struct cgn_sentry, ce_node); - ce2 = key; - - if (unlikely(ce1->ce_expired)) - return 0; - - rc = cgn_sess_match(ce1, ce2->ce_port, ce2->ce_addr, - ce2->ce_ifindex, ce2->ce_ipproto); - return rc; + /* + * A special optimized version of jhash that handles 1 or more of + * uint32_ts. + */ + return rte_jhash_32b((const uint32_t *)key, + sizeof(*key) / sizeof(uint32_t), 0); } /* - * lfht match function, key is a pointer to a 'struct sess_lookup_key' object + * Hash table match function. + * + * key - Either a pointer to the key of the entry we are inserting, or + * a key we are lookng up. (type 'struct cgn_3tuple_key') + * node - Pointer to an existing table node. + * + * Return 1 for a match. */ static int -cgn_sess_lkey_match(struct cds_lfht_node *node, const void *key) +cgn_sess_match(struct cds_lfht_node *node, const void *key) { - const struct sess_lookup_key *lkey = key; const struct cgn_sentry *ce; - int rc; ce = caa_container_of(node, struct cgn_sentry, ce_node); - if (unlikely(ce->ce_expired)) - return 0; - - rc = cgn_sess_match(ce, lkey->sk_id, lkey->sk_addr, - lkey->sk_ifindex, lkey->sk_ipproto); - - return rc; + return !memcmp(&ce->ce_key, key, sizeof(ce->ce_key)); } /* * Lookup hash table with given key. Return pointer to hash table node. */ -static inline struct cds_lfht_node * -cgn_session_node(struct sess_lookup_key *key, int dir, +static ALWAYS_INLINE struct cds_lfht_node * +cgn_session_node(const struct cgn_3tuple_key *key, enum cgn_dir dir, struct cds_lfht_iter *iter) { - ulong hash; - - hash = cgn_hash(key->sk_id, key->sk_addr, key->sk_ifindex, - key->sk_ipproto); - - cds_lfht_lookup(cgn_sess_ht[dir], hash, cgn_sess_lkey_match, + cds_lfht_lookup(cgn_sess_ht[dir], cgn_hash(key), cgn_sess_match, key, iter); return cds_lfht_iter_get_node(iter); @@ -838,16 +1009,12 @@ cgn_session_node(struct sess_lookup_key *key, int dir, * Lookup hash table with given key and return the next node. */ static inline struct cds_lfht_node * -cgn_session_node_next(struct sess_lookup_key *key, int dir, +cgn_session_node_next(const struct cgn_3tuple_key *key, enum cgn_dir dir, struct cds_lfht_iter *iter) { struct cds_lfht_node *node; - ulong hash; - - hash = cgn_hash(key->sk_id, key->sk_addr, key->sk_ifindex, - key->sk_ipproto); - cds_lfht_lookup(cgn_sess_ht[dir], hash, cgn_sess_lkey_match, + cds_lfht_lookup(cgn_sess_ht[dir], cgn_hash(key), cgn_sess_match, key, iter); node = cds_lfht_iter_get_node(iter); @@ -863,7 +1030,7 @@ cgn_session_node_next(struct sess_lookup_key *key, int dir, * Get the first node in the hash table. */ static inline struct cds_lfht_node * -cgn_session_node_first(int dir, struct cds_lfht_iter *iter) +cgn_session_node_first(enum cgn_dir dir, struct cds_lfht_iter *iter) { cds_lfht_first(cgn_sess_ht[dir], iter); return cds_lfht_iter_get_node(iter); @@ -874,12 +1041,12 @@ cgn_session_node_first(int dir, struct cds_lfht_iter *iter) */ static int cgn_sentry_insert(struct cgn_sentry *ce, struct cgn_sentry **old, - enum cgn_flow dir) + enum cgn_dir dir) { struct cds_lfht_node *node; - node = cds_lfht_add_unique(cgn_sess_ht[dir], cgn_hash_sentry(ce), - cgn_sess_node_match, ce, &ce->ce_node); + node = cds_lfht_add_unique(cgn_sess_ht[dir], cgn_hash(&ce->ce_key), + cgn_sess_match, &ce->ce_key, &ce->ce_node); /* Did we loose the race to create a session? */ if (node != &ce->ce_node) { @@ -894,7 +1061,7 @@ cgn_sentry_insert(struct cgn_sentry *ce, struct cgn_sentry **old, /* * Delete sentry from the hash table */ -static void cgn_sentry_delete(struct cgn_sentry *ce, enum cgn_flow dir) +static void cgn_sentry_delete(struct cgn_sentry *ce, enum cgn_dir dir) { if (cgn_sess_ht[dir]) (void)cds_lfht_del(cgn_sess_ht[dir], &ce->ce_node); @@ -902,11 +1069,13 @@ static void cgn_sentry_delete(struct cgn_sentry *ce, enum cgn_flow dir) } /* - * If dir is CGN_DIR_OUT, then key represents a source address and port. - * If dir is CGN_DIR_IN, then key represents a dest address and port. + * cgn_sentry_lookup + * + * 'dir' - determines which table we lookup - forw (out) table or back (in) + * table. */ static inline struct cgn_sentry * -cgn_sentry_lookup_by_key(struct sess_lookup_key *key, int dir) +cgn_sentry_lookup(const struct cgn_3tuple_key *key, enum cgn_dir dir) { struct cds_lfht_iter iter; struct cds_lfht_node *node; @@ -918,60 +1087,11 @@ cgn_sentry_lookup_by_key(struct sess_lookup_key *key, int dir) return caa_container_of(node, struct cgn_sentry, ce_node); } -/* - * cgn_sentry_lookup - * - * 'dir' - determines which table we lookup - forw (out) table or back (in) - * table. - * - * 'lookup_src' - lookup source addr and port (true when 'dir == CGN_DIR_OUT' - * for normal operation, and 'dir == CGN_DIR_IN' for icmp - * errors) - */ -static inline struct cgn_sentry * -cgn_sentry_lookup(struct cgn_packet *cpk, int dir, bool lookup_src) -{ - struct sess_lookup_key lkey; - - /* Populate lookup key */ - lkey.sk_ifindex = cpk->cpk_ifindex; - lkey.sk_ipproto = cpk->cpk_ipproto; - - if (lookup_src) { - lkey.sk_addr = cpk->cpk_saddr; - lkey.sk_id = cpk->cpk_sid; - } else { - lkey.sk_addr = cpk->cpk_daddr; - lkey.sk_id = cpk->cpk_did; - } - - return cgn_sentry_lookup_by_key(&lkey, dir); -} - /* * cgn_session_lookup */ struct cgn_session * -cgn_session_lookup(struct cgn_packet *cpk, int dir) -{ - struct cgn_sentry *ce; - - ce = cgn_sentry_lookup(cpk, dir, (dir == CGN_DIR_OUT)); - if (ce) { - struct cgn_session *cse = sentry2session(ce, dir); - - return cse; - } - - return NULL; -} - -/* - * If dir is CGN_DIR_OUT, then key represents a source address and port. - * If dir is CGN_DIR_IN, then key represents a dest address and port. - */ -static struct cgn_session * -cgn_session_lookup_by_key(struct sess_lookup_key *key, int dir) +cgn_session_lookup(const struct cgn_3tuple_key *key, enum cgn_dir dir) { struct cds_lfht_iter iter; struct cds_lfht_node *node; @@ -989,22 +1109,19 @@ cgn_session_lookup_by_key(struct sess_lookup_key *key, int dir) * Lookup a packet embedded in an ICMP error message */ struct cgn_session * -cgn_session_lookup_icmp_err(struct cgn_packet *cpk, int dir) +cgn_session_lookup_icmp_err(struct cgn_packet *cpk, enum cgn_dir dir) { - struct cgn_sentry *ce; - /* + * Setup direction dependent part of hash key. Note that this is the + * reverse of normal. + * + * For embedded ICMP packets we switch the hash key address and port. * For inbound traffic, lookup the embedded source address. For * outbound traffic, lookup the embedded destination address. */ - ce = cgn_sentry_lookup(cpk, dir, (dir == CGN_DIR_IN)); - if (ce) { - struct cgn_session *cse = sentry2session(ce, dir); - - return cse; - } + cgn_pkt_key_init(cpk, cgn_reverse_dir(dir)); - return NULL; + return cgn_session_lookup(&cpk->cpk_key, dir); } /* Is there a cached CGNAT session handle in the packet? */ @@ -1022,25 +1139,111 @@ struct cgn_session *cgn_session_find_cached(struct rte_mbuf *mbuf) return cse; } +static int +cgn_session_inspect_s2(struct cgn_session *cse, struct cgn_sentry *ce, + struct cgn_packet *cpk, enum cgn_dir dir) +{ + struct cgn_sess2 *s2; + int error = 0; + + /* + * ICMP only has one ID field. We store the 'dest' ID in the + * 2-tuple session for inside-to-outside packets. This is the + * pre-translation (inside) ID. For outside-to-inside, we + * lookup the 'source' ID, which will be the outside ID, and + * hence we will not find the 2-tuple session. + * + * A workaround for this is to copy the 3-tuple session + * forward entry ID to the packet decomposition source ID + * field such that the 2-tuple lookup will now find the + * session. + */ + if (dir == CGN_DIR_IN && cpk->cpk_ipproto == IPPROTO_ICMP) + cpk->cpk_sid = cse->cs_forw_entry.ce_port; + + /* + * If we fail to find an s2 session here, then that means this + * packet is being sent to a different dest addr and/or port. + */ + s2 = cgn_sess_s2_inspect(&cse->cs_s2, cpk, dir); + + /* Add a nested 2-tuple session? */ + if (unlikely(!s2)) { + /* + * cpk_keepalive is only set true for certain pkts in + * the outbound direction. Pkts for which it is *not* + * set also include: TCP RST and all ICMP pkts except + * Echo Requests. + */ + if (cpk->cpk_keepalive) { + assert(dir == CGN_DIR_OUT); + + /* Create an s2 session */ + s2 = cgn_sess_s2_establish(&cse->cs_s2, cpk, &error); + if (s2) + error = cgn_sess_s2_activate(&cse->cs_s2, s2); + + if (error == 0) + cgn_source_stats_sess2_created(cse->cs_src); + else if (error == -CGN_S2_EEXIST) { + /* + * Lost race to add 2-tuple session. Count + * the error, then ignore it. + */ + cgn_rc_inc(dir, error); + error = 0; + } + + /* + * If error is still < 0 here then that is returned, + * and the flow will be blocked. If we cannot log a + * 2-tuple session then we do not want to allow the + * flow. + */ + } else { + /* + * Inbound pkt from unknown src addr or port. If dest + * session table is full then drop inbound pkts from + * an unknown source even of we know the dest addr and + * port. + */ + if (cse->cs_s2.cs2_full) + /* Block inbound pkt */ + error = -CGN_S2_ENOSPC; + else { + rte_atomic64_inc(&cse->cs_unk_pkts); + rte_atomic64_inc(&ce->ce_pkts); + rte_atomic64_add(&ce->ce_bytes, cpk->cpk_len); + } + } + } + + return error; +} + /* - * Inspect an already activated 3-tuple session. + * Inspect an already activated 3-tuple session. *Only* called by the packet + * path. */ struct cgn_session * -cgn_session_inspect(struct cgn_packet *cpk, int dir) +cgn_session_inspect(struct cgn_packet *cpk, enum cgn_dir dir, int *error) { struct cgn_sentry *ce; - ce = cgn_sentry_lookup(cpk, dir, (dir == CGN_DIR_OUT)); + ce = cgn_sentry_lookup(&cpk->cpk_key, dir); if (!ce) return NULL; struct cgn_session *cse = sentry2session(ce, dir); /* Simple state mechanism for 3-tuple sessions */ - if (unlikely(dir == CGN_DIR_BACK && !ce->ce_established)) + if (unlikely(dir == CGN_DIR_IN && !ce->ce_established)) ce->ce_established = true; - /* session may have been created by a map cmd */ + /* + * If a map instantiated session subsequently 'sees' a packet then set + * the pkt instantiated flag. + */ if (unlikely(!cse->cs_pkt_instd)) cse->cs_pkt_instd = true; @@ -1048,46 +1251,27 @@ cgn_session_inspect(struct cgn_packet *cpk, int dir) * If we have nested 2-tuple sessions then they take care of sessions * idle monitoring and stats. */ - if (unlikely(cse->cs_sess2_ht)) { - struct cgn_sess2 *s2; - - /* - * ICMP only has one ID field. We store the 'dest' ID in the - * 2-tuple session for inside-to-outside packets. This is the - * pre-translation (inside) ID. For outside-to-inside, we - * lookup the 'source' ID, which will be the outside ID, and - * hence we will not find the 2-tuple session. - * - * A workaround for this is to copy the 3-tuple session - * forward entry ID to the packet decomposition source ID - * field such that the 2-tuple lookup will now find the - * session. - */ - if (dir == CGN_DIR_IN && cpk->cpk_ipproto == IPPROTO_ICMP) - cpk->cpk_sid = cse->cs_forw_entry.ce_port; - - /* - * If we fail to find an s2 session here, then that means this - * packet is being sent to a different dest addr and/or port. - */ - s2 = cgn_sess2_inspect(cse->cs_sess2_ht, cpk, dir); + if (unlikely(cgn_sess_s2_is_enabled(cse))) + *error = cgn_session_inspect_s2(cse, ce, cpk, dir); + else { + if (likely(cpk->cpk_keepalive)) { + /* + * Clear idle flag, if packet is eligible. + */ + if (unlikely(rte_atomic16_read(&cse->cs_idle) != 0)) + rte_atomic16_clear(&cse->cs_idle); - /* Add a nested 2-tuple session? */ - if (!s2 && cpk->cpk_keepalive) { - int rc; - rc = cgn_sess2_establish_and_activate(cse, cpk, dir); + /* + * Is dest (or reverse src) port different from that + * used when cse was created? + */ + uint16_t fwd_dst_port = ((dir == CGN_DIR_OUT) ? + cpk->cpk_did : cpk->cpk_sid); - /* Count the error, then ignore it */ - if (rc < 0) - cgn_error_inc(rc, dir); + if (unlikely(cse->cs_s2.cs2_dst_port != 0 && + cse->cs_s2.cs2_dst_port != fwd_dst_port)) + cse->cs_s2.cs2_dst_port = 0; } - } else { - /* - * Clear idle flag, if packet is eligible. - */ - if (cpk->cpk_keepalive && - rte_atomic16_read(&cse->cs_idle) != 0) - rte_atomic16_clear(&cse->cs_idle); rte_atomic64_inc(&ce->ce_pkts); rte_atomic64_add(&ce->ce_bytes, cpk->cpk_len); @@ -1106,10 +1290,10 @@ int cgn_session_walk(cgn_sesswalk_cb cb, void *data) struct cgn_sentry *ce; int rc; - if (!cgn_sess_ht[CGN_DIR_FORW]) + if (!cgn_sess_ht[CGN_DIR_OUT]) return -ENOENT; - cds_lfht_for_each_entry(cgn_sess_ht[CGN_DIR_FORW], &iter, ce, ce_node) { + cds_lfht_for_each_entry(cgn_sess_ht[CGN_DIR_OUT], &iter, ce, ce_node) { cse = caa_container_of(ce, struct cgn_session, cs_forw_entry); @@ -1125,7 +1309,9 @@ int cgn_session_walk(cgn_sesswalk_cb cb, void *data) */ static uint32_t cgn_session_expiry_time(struct cgn_session *cse) { - uint8_t proto, state; + enum nat_proto proto; + uint8_t state; + uint32_t etime; if (cse->cs_back_entry.ce_expired) return 0; @@ -1137,7 +1323,15 @@ static uint32_t cgn_session_expiry_time(struct cgn_session *cse) else state = CGN_SESS_STATE_INIT; - return cgn_sess_state_expiry_time(proto, state); + /* PCP timeout (if set) takes precedence */ + if (unlikely(cse->cs_map_instd)) + etime = cse->cs_s2.cs2_map_timeout; + else + /* Get state-dependent expiry time */ + etime = cgn_sess_state_expiry_time( + proto, ntohs(cse->cs_s2.cs2_dst_port), state); + + return etime; } static void __attribute__((format(printf, 2, 3))) cmd_err(FILE *f, @@ -1198,7 +1392,7 @@ static int cgn_session_op_parse(FILE *f, int argc, char **argv, fltr->cf_all = true; /* Save clear/show command to fltr->cf_desc */ - for (i = 0; i < argc; i++) + for (i = 0; i < argc && l < (int)sizeof(fltr->cf_desc); i++) l += snprintf(fltr->cf_desc + l, sizeof(fltr->cf_desc) - l, "%s ", argv[i]); if (l > 0) @@ -1217,8 +1411,8 @@ static int cgn_session_op_parse(FILE *f, int argc, char **argv, cmd_err(f, "invalid protocol: %s\n", argv[1]); return -1; } - fltr->cf_subs.sk_ipproto = tmp; - fltr->cf_pub.sk_ipproto = tmp; + fltr->cf_subs.k_ipproto = tmp; + fltr->cf_pub.k_ipproto = tmp; argc -= 2; argv += 2; @@ -1233,10 +1427,10 @@ static int cgn_session_op_parse(FILE *f, int argc, char **argv, } pl = MIN(32, pl); - memcpy(&fltr->cf_subs.sk_addr, &npf_addr, 4); + memcpy(&fltr->cf_subs.k_addr, &npf_addr, 4); tmp = (0xFFFFFFFF << (32 - pl)) & 0xFFFFFFFF; fltr->cf_subs_mask = htonl(tmp); - fltr->cf_subs.sk_addr &= fltr->cf_subs_mask; + fltr->cf_subs.k_addr &= fltr->cf_subs_mask; argc -= 2; argv += 2; @@ -1249,7 +1443,7 @@ static int cgn_session_op_parse(FILE *f, int argc, char **argv, cmd_err(f, "invalid subs-port: %s\n", argv[1]); return -1; } - fltr->cf_subs.sk_id = htons(tmp); + fltr->cf_subs.k_port = htons(tmp); argc -= 2; argv += 2; @@ -1264,10 +1458,10 @@ static int cgn_session_op_parse(FILE *f, int argc, char **argv, } pl = MIN(32, pl); - memcpy(&fltr->cf_pub.sk_addr, &npf_addr, 4); + memcpy(&fltr->cf_pub.k_addr, &npf_addr, 4); tmp = (0xFFFFFFFF << (32 - pl)) & 0xFFFFFFFF; fltr->cf_pub_mask = htonl(tmp); - fltr->cf_pub.sk_addr &= fltr->cf_pub_mask; + fltr->cf_pub.k_addr &= fltr->cf_pub_mask; argc -= 2; argv += 2; @@ -1280,7 +1474,7 @@ static int cgn_session_op_parse(FILE *f, int argc, char **argv, cmd_err(f, "invalid pub-port: %s\n", argv[1]); return -1; } - fltr->cf_pub.sk_id = htons(tmp); + fltr->cf_pub.k_port = htons(tmp); argc -= 2; argv += 2; @@ -1295,10 +1489,12 @@ static int cgn_session_op_parse(FILE *f, int argc, char **argv, } pl = MIN(32, pl); - memcpy(&fltr->cf_dst.s2k_addr, &npf_addr, 4); + memcpy(&fltr->cf_dst.k_addr, &npf_addr, 4); tmp = (0xFFFFFFFF << (32 - pl)) & 0xFFFFFFFF; fltr->cf_dst_mask = htonl(tmp); - fltr->cf_dst.s2k_addr &= fltr->cf_dst_mask; + fltr->cf_dst.k_addr &= fltr->cf_dst_mask; + /* iterate, or lookup, the 'out' sentries */ + fltr->cf_dst.k_dir = CGN_DIR_OUT; argc -= 2; argv += 2; @@ -1311,7 +1507,7 @@ static int cgn_session_op_parse(FILE *f, int argc, char **argv, cmd_err(f, "invalid dst-port: %s\n", argv[1]); return -1; } - fltr->cf_dst.s2k_id = htons(tmp); + fltr->cf_dst.k_port = htons(tmp); argc -= 2; argv += 2; @@ -1344,14 +1540,15 @@ static int cgn_session_op_parse(FILE *f, int argc, char **argv, fltr->cf_all = false; } else if (!strcmp(argv[0], "intf") && argc >= 2) { - struct ifnet *ifp = ifnet_byifname(argv[1]); + struct ifnet *ifp = dp_ifnet_byifname(argv[1]); if (!ifp) { cmd_err(f, "invalid interface: %s\n", argv[1]); return -1; } - fltr->cf_subs.sk_ifindex = ifp->if_index; - fltr->cf_pub.sk_ifindex = ifp->if_index; + fltr->cf_subs.k_ifindex = cgn_if_key_index(ifp); + fltr->cf_pub.k_ifindex = cgn_if_key_index(ifp); + fltr->cf_ifindex = ifp->if_index; argc -= 2; argv += 2; @@ -1364,7 +1561,7 @@ static int cgn_session_op_parse(FILE *f, int argc, char **argv, cmd_err(f, "invalid tgt-addr: %s\n", argv[1]); return rc; } - memcpy(&fltr->cf_tgt.sk_addr, &npf_addr, 4); + memcpy(&fltr->cf_tgt.k_addr, &npf_addr, 4); argc -= 2; argv += 2; @@ -1376,7 +1573,7 @@ static int cgn_session_op_parse(FILE *f, int argc, char **argv, cmd_err(f, "invalid tgt-port: %s\n", argv[1]); return -1; } - fltr->cf_tgt.sk_id = htons(tmp); + fltr->cf_tgt.k_port = htons(tmp); argc -= 2; argv += 2; @@ -1388,19 +1585,19 @@ static int cgn_session_op_parse(FILE *f, int argc, char **argv, cmd_err(f, "invalid tgt-proto: %s\n", argv[1]); return -1; } - fltr->cf_tgt.sk_ipproto = tmp; + fltr->cf_tgt.k_ipproto = tmp; argc -= 2; argv += 2; } else if (!strcmp(argv[0], "tgt-intf") && argc >= 2) { - struct ifnet *ifp = ifnet_byifname(argv[1]); + struct ifnet *ifp = dp_ifnet_byifname(argv[1]); if (!ifp) { cmd_err(f, "invalid tgt-intf: %s\n", argv[1]); return -1; } - fltr->cf_tgt.sk_ifindex = ifp->if_index; + fltr->cf_tgt.k_ifindex = cgn_if_key_index(ifp); argc -= 2; argv += 2; @@ -1452,14 +1649,21 @@ static int cgn_session_op_parse(FILE *f, int argc, char **argv, } else if (!strcmp(argv[0], "timeout") && argc >= 2) { tmp = cgn_arg_to_int(argv[1]); - if (tmp < 0) + if (tmp < 0 || tmp > USHRT_MAX) cmd_err(f, "invalid timeout: %s\n", argv[1]); - fltr->cf_timeout = tmp; + fltr->cf_timeout = (uint16_t)tmp; argc -= 2; argv += 2; + } else if (!strcmp(argv[0], "statistics")) { + /* Clear statistics */ + fltr->cf_clear_stats = true; + + argc -= 1; + argv += 1; + } else { cmd_err(f, "invalid option: %s\n", argv[0]); return -1; @@ -1473,7 +1677,7 @@ static int cgn_session_op_parse(FILE *f, int argc, char **argv, fltr->cf_all_sess2 = true; - if (fltr->cf_dst.s2k_addr || fltr->cf_dst.s2k_id || fltr->cf_id2) { + if (fltr->cf_dst.k_addr || fltr->cf_dst.k_port || fltr->cf_id2) { fltr->cf_all = false; fltr->cf_all_sess2 = false; } @@ -1500,8 +1704,9 @@ int cgn_op_session_map(FILE *f, int argc, char **argv) struct cgn_session *cse; json_writer_t *json; struct ifnet *ifp = NULL; - uint8_t proto = 0; + uint8_t ipproto = 0; char *sa_arg = NULL; + char *pa_arg = NULL; int rc, i, error = 0; memset(&fltr, 0, sizeof(fltr)); @@ -1509,11 +1714,15 @@ int cgn_op_session_map(FILE *f, int argc, char **argv) /* Result is returned in json */ json = jsonw_new(f); - if (!json) + if (!json) { + cgn_rc_inc(CGN_DIR_OUT, CGN_PCP_ERR); return -1; + } - if (argc < 12) + if (argc < 12) { + error = -CGN_PCP_EINVAL; goto error; + } /* Remove "cgn-op map" */ argc -= 2; @@ -1523,48 +1732,84 @@ int cgn_op_session_map(FILE *f, int argc, char **argv) for (i = 1; i < argc; i++) { if (!strcmp(argv[i-1], "subs-addr")) sa_arg = argv[i]; + if (!strcmp(argv[i-1], "pub-addr")) + pa_arg = argv[i]; } /* Parse options */ rc = cgn_session_op_parse(f, argc, argv, &fltr); - if (rc < 0) + if (rc < 0) { + error = -CGN_PCP_EINVAL; goto error; + } /* check the subscriber (private) address and port */ - if (fltr.cf_subs.sk_addr == 0 || fltr.cf_subs_mask != 0xFFFFFFFF || - fltr.cf_subs.sk_id == 0) + if (fltr.cf_subs.k_addr == 0 || fltr.cf_subs_mask != 0xFFFFFFFF || + fltr.cf_subs.k_port == 0) { + error = -CGN_PCP_EINVAL; goto error; + } /* check the protocol */ - proto = fltr.cf_subs.sk_ipproto; + ipproto = fltr.cf_subs.k_ipproto; - if (proto != IPPROTO_TCP && proto != IPPROTO_UDP && - proto != IPPROTO_UDPLITE && proto != IPPROTO_DCCP) + if (ipproto != IPPROTO_TCP && ipproto != IPPROTO_UDP && + ipproto != IPPROTO_UDPLITE && ipproto != IPPROTO_DCCP) { + error = -CGN_PCP_EINVAL; goto error; + } /* check the interface */ - ifp = ifnet_byifindex(fltr.cf_subs.sk_ifindex); - if (!ifp) + ifp = dp_ifnet_byifindex(fltr.cf_ifindex); + if (!ifp) { + error = -CGN_PCP_EINVAL; goto error; + } - /* convert fltr params to cgnat cache params */ - cpk.cpk_saddr = fltr.cf_subs.sk_addr; - cpk.cpk_sid = fltr.cf_subs.sk_id; - cpk.cpk_ipproto = proto; - cpk.cpk_ifindex = fltr.cf_subs.sk_ifindex; + /* + * convert fltr params to cgnat cache params. + * + * cpk_daddr and cpk_did are set to the requested public address and + * port (if specified) in order to lookup the 'in' sentry. + */ + cpk.cpk_saddr = fltr.cf_subs.k_addr; + cpk.cpk_sid = fltr.cf_subs.k_port; + cpk.cpk_daddr = fltr.cf_pub.k_addr; + cpk.cpk_did = fltr.cf_pub.k_port; + cpk.cpk_ipproto = ipproto; + cpk.cpk_ifindex = ifp->if_index; + cpk.cpk_key.k_ifindex = cgn_if_key_index(ifp); cpk.cpk_l4ports = true; cpk.cpk_proto = nat_proto_from_ipproto(cpk.cpk_ipproto); cpk.cpk_vrfid = if_vrfid(ifp); + cpk.cpk_key.k_expired = false; + cpk.cpk_pkt_instd = false; - /* Create and activate a session */ - cse = cgn_session_map(ifp, &cpk, CGN_DIR_OUT, &error); + /* Setup direction dependent part of hash key */ + cgn_pkt_key_init(&cpk, CGN_DIR_OUT); + + /* Get mapping, create a session, and activate session */ + cse = cgn_session_map(ifp, &cpk, fltr.cf_pub.k_addr, + fltr.cf_pub.k_port, &error); if (!cse) goto error; - cse->cs_map_timeout = fltr.cf_timeout; - cse->cs_map_instd = true; +#define CGN_MAP_INSTD_FLAG (1 << 0) +#define CGN_PKT_INSTD_FLAG (1 << 1) + int result = 0; + + if (cse->cs_map_instd) + result |= CGN_MAP_INSTD_FLAG; + + if (cse->cs_pkt_instd) + result |= CGN_PKT_INSTD_FLAG; + + cse->cs_s2.cs2_map_timeout = fltr.cf_timeout; + /* + * Return result in json + */ char subs_addr[16]; char pub_addr[16]; @@ -1577,9 +1822,9 @@ int cgn_op_session_map(FILE *f, int argc, char **argv) jsonw_name(json, "map"); jsonw_start_object(json); - jsonw_int_field(json, "result", 0); + jsonw_int_field(json, "result", result); jsonw_string_field(json, "intf", ifp->if_name); - jsonw_uint_field(json, "proto", proto); + jsonw_uint_field(json, "proto", ipproto); jsonw_string_field(json, "subs_addr", subs_addr); jsonw_uint_field(json, "subs_port", ntohs(cse->cs_forw_entry.ce_port)); jsonw_string_field(json, "pub_addr", pub_addr); @@ -1589,25 +1834,30 @@ int cgn_op_session_map(FILE *f, int argc, char **argv) jsonw_end_object(json); jsonw_destroy(&json); + cgn_rc_inc(CGN_DIR_OUT, CGN_PCP_OK); return 0; error: + cgn_rc_inc(CGN_DIR_OUT, CGN_PCP_ERR); + /* + * Count the specific error. This may or may not be a PCP specific + * error. + */ + cgn_rc_inc(CGN_DIR_OUT, error); + jsonw_name(json, "map"); jsonw_start_object(json); - if (error >= 0) - error = -CGN_ERR_UNKWN; - jsonw_int_field(json, "result", error); - jsonw_string_field(json, "error", cgn_errno_str(error)); + jsonw_string_field(json, "error", cgn_rc_str(error)); jsonw_string_field(json, "intf", ifp ? ifp->if_name : "?"); - jsonw_uint_field(json, "proto", proto); + jsonw_uint_field(json, "proto", ipproto); jsonw_string_field(json, "subs_addr", sa_arg ? sa_arg : "0.0.0.0"); - jsonw_uint_field(json, "subs_port", ntohs(fltr.cf_subs.sk_id)); - jsonw_string_field(json, "pub_addr", "0.0.0.0"); - jsonw_uint_field(json, "pub_port", 0); - jsonw_uint_field(json, "timeout", fltr.cf_timeout); + jsonw_uint_field(json, "subs_port", ntohs(fltr.cf_subs.k_port)); + jsonw_string_field(json, "pub_addr", pa_arg ? pa_arg : "0.0.0.0"); + jsonw_uint_field(json, "pub_port", ntohs(fltr.cf_pub.k_port)); + jsonw_uint_field(json, "timeout", 0); jsonw_end_object(json); jsonw_destroy(&json); @@ -1634,8 +1884,10 @@ cgn_session_jsonw_one(json_writer_t *json, struct cgn_sess_fltr *fltr, * filter criteria for those sessions then do not display the outer * 3-tuple session if no 2-tuple sessions match the criteria. */ - if (cse->cs_sess2_ht && !fltr->cf_all_sess2 && !fltr->cf_no_sess2) { - uint s2_count = cgn_sess2_show_count(cse->cs_sess2_ht, fltr); + if (cgn_sess_s2_is_enabled(cse) && cse->cs_pkt_instd && + !fltr->cf_all_sess2 && !fltr->cf_no_sess2) { + + uint s2_count = cgn_sess_s2_fltr_count(&cse->cs_s2, fltr); if (s2_count == 0) return 0; @@ -1645,7 +1897,7 @@ cgn_session_jsonw_one(json_writer_t *json, struct cgn_sess_fltr *fltr, src_str, sizeof(src_str)); inet_ntop(AF_INET, &cse->cs_back_entry.ce_addr, trans_str, sizeof(trans_str)); - ifp = ifnet_byifindex(cse->cs_forw_entry.ce_ifindex); + ifp = dp_ifnet_byifindex(cse->cs_ifindex); jsonw_start_object(json); @@ -1659,6 +1911,20 @@ cgn_session_jsonw_one(json_writer_t *json, struct cgn_sess_fltr *fltr, jsonw_uint_field(json, "proto", cse->cs_forw_entry.ce_ipproto); jsonw_string_field(json, "intf", ifp->if_name); + jsonw_uint_field(json, "index", cse->cs_forw_entry.ce_ifindex); + + if (cse->cs_s2.cs2_dst_port) + jsonw_uint_field(json, "init_dst_port", + htons(cse->cs_s2.cs2_dst_port)); + + /* Has the session seen at least one packet? */ + jsonw_bool_field(json, "pkt_instd", cse->cs_pkt_instd); + + /* Was session created via PCP? */ + jsonw_bool_field(json, "map_instd", cse->cs_map_instd); + if (cse->cs_map_instd) + jsonw_uint_field(json, "map_timeout", + cse->cs_s2.cs2_map_timeout); if (fltr->cf_detail) { struct nat_pool *np; @@ -1687,17 +1953,30 @@ cgn_session_jsonw_one(json_writer_t *json, struct cgn_sess_fltr *fltr, jsonw_uint_field(json, "in_bytes", rte_atomic64_read(&bk->ce_bytes) + bk->ce_bytes_tot); + /* Inbound pkts from unknown source addr or port */ + jsonw_uint_field(json, "unk_pkts_in", + rte_atomic64_read(&cse->cs_unk_pkts) + + cse->cs_unk_pkts_tot); + jsonw_bool_field(json, "exprd", cse->cs_forw_entry.ce_expired); jsonw_uint_field(json, "refcnt", rte_atomic16_read(&cse->cs_refcnt)); - if (cse->cs_sess2_ht) { + /* + * We use the 2-tuple expiry mechanism if 2-tuple session are enabled + * and the session has seen at least one packet. + * + * We use the 3-tuple expiry mechanism if 2-tuple sessions are + * disabled *or* the session was created by PCP (or the map command) + * and has *not* seen a packet. + */ + if (cgn_sess_s2_is_enabled(cse) && cse->cs_pkt_instd) { ulong ht_count; /* count may be less than ht_count if there are filters */ if (!fltr->cf_no_sess2) - count = cgn_sess2_show(json, cse->cs_sess2_ht, fltr); + count = cgn_sess_s2_show(json, &cse->cs_s2, fltr); - ht_count = cgn_sess2_count(cse->cs_sess2_ht); + ht_count = cgn_sess_s2_count(&cse->cs_s2); jsonw_uint_field(json, "nsessions", ht_count); /* @@ -1726,13 +2005,7 @@ cgn_session_jsonw_one(json_writer_t *json, struct cgn_sess_fltr *fltr, CGN_SESS_STATE_ESTABLISHED); else jsonw_uint_field(json, "state", CGN_SESS_STATE_INIT); - - jsonw_uint_field(json, "max_to", 0); - jsonw_uint_field(json, "cur_to", 0); } else { - uint32_t uptime = cgn_get_time_uptime(); - uint32_t max_timeout = cgn_session_expiry_time(cse); - if (cse->cs_forw_entry.ce_expired) jsonw_uint_field(json, "state", CGN_SESS_STATE_CLOSED); else if (cse->cs_back_entry.ce_established) @@ -1741,6 +2014,13 @@ cgn_session_jsonw_one(json_writer_t *json, struct cgn_sess_fltr *fltr, else jsonw_uint_field(json, "state", CGN_SESS_STATE_INIT); + jsonw_uint_field(json, "nsessions", 0); + } + + if (!cgn_sess_s2_is_enabled(cse) || cse->cs_map_instd) { + uint32_t uptime = get_dp_uptime(); + uint32_t max_timeout = cgn_session_expiry_time(cse); + if (rte_atomic16_read(&cse->cs_idle)) jsonw_uint_field(json, "cur_to", (cse->cs_etime > uptime) ? @@ -1749,13 +2029,10 @@ cgn_session_jsonw_one(json_writer_t *json, struct cgn_sess_fltr *fltr, jsonw_uint_field(json, "cur_to", max_timeout); jsonw_uint_field(json, "max_to", max_timeout); - jsonw_uint_field(json, "nsessions", 0); } - jsonw_uint_field(json, "start_time", - cgn_ticks2timestamp(cse->cs_start_time)); - jsonw_uint_field(json, "duration", - cgn_start2duration(cse->cs_start_time)); + jsonw_uint_field(json, "start_time", cse->cs_start_time); + jsonw_uint_field(json, "duration", unix_epoch_us - cse->cs_start_time); jsonw_end_object(json); @@ -1775,28 +2052,37 @@ cgn_session_show_fltr(struct cgn_session *cse, struct cgn_sess_fltr *fltr) /* Filter on Subscriber address and port */ if (fltr->cf_subs_mask && - fltr->cf_subs.sk_addr != (fw->ce_addr & fltr->cf_subs_mask)) + fltr->cf_subs.k_addr != (fw->ce_addr & fltr->cf_subs_mask)) return false; - if (fltr->cf_subs.sk_id && fltr->cf_subs.sk_id != fw->ce_port) + if (fltr->cf_subs.k_port && fltr->cf_subs.k_port != fw->ce_port) return false; /* Filter on IP protocol */ - if (fltr->cf_subs.sk_ipproto && - fltr->cf_subs.sk_ipproto != fw->ce_ipproto) + if (fltr->cf_subs.k_ipproto && + fltr->cf_subs.k_ipproto != fw->ce_ipproto) return false; /* Filter on interface */ - if (fltr->cf_subs.sk_ifindex && - fltr->cf_subs.sk_ifindex != fw->ce_ifindex) + if (fltr->cf_ifindex && fltr->cf_ifindex != cse->cs_ifindex) return false; /* Filter on Public address and port */ if (fltr->cf_pub_mask && - fltr->cf_pub.sk_addr != (bk->ce_addr & fltr->cf_pub_mask)) + fltr->cf_pub.k_addr != (bk->ce_addr & fltr->cf_pub_mask)) + return false; + + /* + * Filter on destination port. This is the special case where 2-tuple + * sessions are *not* enabled, and we have only ever seen one dest + * port inuse on the 3-tuple session. + */ + if (fltr->cf_dst.k_port && !cgn_sess_s2_is_enabled(cse) && + cse->cs_s2.cs2_dst_port != 0 && + fltr->cf_dst.k_port != cse->cs_s2.cs2_dst_port) return false; - if (fltr->cf_pub.sk_id && fltr->cf_pub.sk_id != bk->ce_port) + if (fltr->cf_pub.k_port && fltr->cf_pub.k_port != bk->ce_port) return false; /* Filter on session ID */ @@ -1826,6 +2112,10 @@ void cgn_session_show(FILE *f, int argc, char **argv) argc -= 3; argv += 3; + rc = cgn_session_op_parse(f, argc, argv, &fltr); + if (rc < 0) + return; + json = jsonw_new(f); if (!json) return; @@ -1833,11 +2123,7 @@ void cgn_session_show(FILE *f, int argc, char **argv) jsonw_name(json, "sessions"); jsonw_start_array(json); - rc = cgn_session_op_parse(f, argc, argv, &fltr); - if (rc < 0) - goto end; - - if (!cgn_sess_ht[CGN_DIR_FORW]) + if (!cgn_sess_ht[CGN_DIR_OUT]) goto end; /* @@ -1848,7 +2134,7 @@ void cgn_session_show(FILE *f, int argc, char **argv) if (fltr.cf_subs_mask == 0xffffffff && cgn_sess_key_valid(&fltr.cf_subs)) { - cse = cgn_session_lookup_by_key(&fltr.cf_subs, CGN_DIR_OUT); + cse = cgn_session_lookup(&fltr.cf_subs, CGN_DIR_OUT); if (cse && cgn_session_show_fltr(cse, &fltr)) cgn_session_jsonw_one(json, &fltr, cse); @@ -1863,7 +2149,7 @@ void cgn_session_show(FILE *f, int argc, char **argv) if (fltr.cf_pub_mask == 0xffffffff && cgn_sess_key_valid(&fltr.cf_pub)) { - cse = cgn_session_lookup_by_key(&fltr.cf_pub, CGN_DIR_IN); + cse = cgn_session_lookup(&fltr.cf_pub, CGN_DIR_IN); if (cse && cgn_session_show_fltr(cse, &fltr)) cgn_session_jsonw_one(json, &fltr, cse); @@ -1918,10 +2204,10 @@ void cgn_session_id_list(FILE *f, int argc __unused, char **argv __unused) jsonw_name(json, "ids"); jsonw_start_array(json); - if (!cgn_sess_ht[CGN_DIR_FORW]) + if (!cgn_sess_ht[CGN_DIR_OUT]) goto end; - cds_lfht_for_each_entry(cgn_sess_ht[CGN_DIR_FORW], &iter, fw, ce_node) { + cds_lfht_for_each_entry(cgn_sess_ht[CGN_DIR_OUT], &iter, fw, ce_node) { if (fw->ce_expired) continue; @@ -1938,15 +2224,19 @@ void cgn_session_id_list(FILE *f, int argc __unused, char **argv __unused) * Mark session as expired */ static void -cgn_session_set_expired(struct cgn_session *cse) +cgn_session_set_expired(struct cgn_session *cse, bool update_stats) { cse->cs_forw_entry.ce_expired = true; cse->cs_back_entry.ce_expired = true; - cse->cs_end_time = soft_ticks; + cse->cs_end_time = unix_epoch_us; cse->cs_etime = 0; - /* Add stats to source totals */ - cgn_session_stats_periodic(cse); + /* + * Add stats to source totals. Do not do if called via gc, as this + * will be updating the stats anyway. + */ + if (unlikely(update_stats)) + cgn_session_stats_periodic(cse); } /* @@ -1954,48 +2244,23 @@ cgn_session_set_expired(struct cgn_session *cse) */ static void cgn_session_clear_mapping(struct cgn_session *cse) { - struct nat_pool *np; - if (cse->cs_src) cse->cs_src->sr_paired_addr = 0; - np = cgn_source_get_pool(cse->cs_src); - assert(np); - /* Release mapping immediately */ if (rte_atomic16_cmpset(&cse->cs_map_flag, true, false)) { - uint32_t taddr, oaddr; - uint16_t tport, oport; - uint8_t proto; + struct cgn_map cmi = {0}; - cgn_session_get_back(cse, &taddr, &tport); - cgn_session_get_forw(cse, &oaddr, &oport); - proto = nat_proto_from_ipproto(cse->cs_forw_entry.ce_ipproto); + cgn_session_get_back(cse, &cmi.cmi_taddr, &cmi.cmi_tid); + cmi.cmi_reserved = true; + cmi.cmi_src = cse->cs_src; + cmi.cmi_proto = nat_proto_from_ipproto( + cse->cs_forw_entry.ce_ipproto); - cgn_map_put(np, cse->cs_vrfid, CGN_DIR_OUT, proto, oaddr, - taddr, tport); + cgn_map_put(&cmi, cse->cs_vrfid); } } -/* - * Log a session clear event. This is done when one or more 2-tuple sessions - * are cleared manually, either from a clear command or a change in config - * (e.g. nat pool block size changes). This log message replaces the - * multiple SESSION_END log messages in order to avoid scale issues. - */ -static void -cgn_log_sess_clear(const char *desc, uint count, uint64_t clear_time) -{ -#define LOG_STR_SZ 300 - char log_str[LOG_STR_SZ]; - - snprintf(log_str, sizeof(log_str), - "desc=\"%s\" count=%u time=%lu", desc, count, - cgn_ticks2timestamp(clear_time)); - - RTE_LOG(NOTICE, CGNAT, "SESSION_CLEAR %s\n", log_str); -} - /* * addr must be specified. port=0 means any/all ports. */ @@ -2008,7 +2273,7 @@ cgn_session_clear_fltr(struct cgn_sess_fltr *fltr, bool clear_map, struct cgn_sentry *ce, *bk; uint count = 0; /* count 2-tuple sessions cleared */ - if (!cgn_sess_ht[CGN_DIR_FORW]) + if (!cgn_sess_ht[CGN_DIR_OUT]) return; /* @@ -2017,21 +2282,21 @@ cgn_session_clear_fltr(struct cgn_sess_fltr *fltr, bool clear_map, */ cgn_session_stop_timer(); - cds_lfht_for_each_entry(cgn_sess_ht[CGN_DIR_FORW], &iter, ce, ce_node) { + cds_lfht_for_each_entry(cgn_sess_ht[CGN_DIR_OUT], &iter, ce, ce_node) { if (!clear_map && ce->ce_expired) continue; /* Filter on IP protocol */ - if (fltr->cf_subs.sk_ipproto && - fltr->cf_subs.sk_ipproto != ce->ce_ipproto) + if (fltr->cf_subs.k_ipproto && + fltr->cf_subs.k_ipproto != ce->ce_ipproto) continue; /* Filter on Subscriber address and port */ if (fltr->cf_subs_mask && - fltr->cf_subs.sk_addr != (ce->ce_addr & fltr->cf_subs_mask)) + fltr->cf_subs.k_addr != (ce->ce_addr & fltr->cf_subs_mask)) continue; - if (fltr->cf_subs.sk_id && fltr->cf_subs.sk_id != ce->ce_port) + if (fltr->cf_subs.k_port && fltr->cf_subs.k_port != ce->ce_port) continue; cse = caa_container_of(ce, struct cgn_session, cs_forw_entry); @@ -2039,10 +2304,20 @@ cgn_session_clear_fltr(struct cgn_sess_fltr *fltr, bool clear_map, /* Filter on Public address and port */ if (fltr->cf_pub_mask && - fltr->cf_pub.sk_addr != (bk->ce_addr & fltr->cf_pub_mask)) + fltr->cf_pub.k_addr != (bk->ce_addr & fltr->cf_pub_mask)) + continue; + + if (fltr->cf_pub.k_port && fltr->cf_pub.k_port != bk->ce_port) continue; - if (fltr->cf_pub.sk_id && fltr->cf_pub.sk_id != bk->ce_port) + /* + * Filter on destination port. This is the special case where + * 2-tuple sessions are *not* enabled, and we have only ever + * seen one dest port inuse on the 3-tuple session. + */ + if (fltr->cf_dst.k_port && !cgn_sess_s2_is_enabled(cse) && + cse->cs_s2.cs2_dst_port != 0 && + fltr->cf_dst.k_port != cse->cs_s2.cs2_dst_port) continue; /* Filter on session ID */ @@ -2051,19 +2326,19 @@ cgn_session_clear_fltr(struct cgn_sess_fltr *fltr, bool clear_map, continue; /* Expire one or all 2-tuple sessions */ - if (cse->cs_sess2_ht) - count += cgn_sess2_expire_id(cse->cs_sess2_ht, - fltr->cf_id2); + if (cgn_sess_s2_is_enabled(cse)) + count += cgn_sess_s2_expire_id(&cse->cs_s2, + fltr->cf_id2); /* * If no unexpired 2-tuple sessions remain then expire * 3-tuple session and clear mapping. */ - if (!cse->cs_sess2_ht || - cgn_sess2_unexpired(cse->cs_sess2_ht) == 0) { + if (!cgn_sess_s2_is_enabled(cse) || + cgn_sess_s2_unexpired(&cse->cs_s2) == 0) { if (!ce->ce_expired) - cgn_session_set_expired(cse); + cgn_session_set_expired(cse, true); if (clear_map) cgn_session_clear_mapping(cse); @@ -2073,8 +2348,7 @@ cgn_session_clear_fltr(struct cgn_sess_fltr *fltr, bool clear_map, } /* Filter on interface */ - if (fltr->cf_subs.sk_ifindex && - fltr->cf_subs.sk_ifindex != ce->ce_ifindex) + if (fltr->cf_ifindex && fltr->cf_ifindex != cse->cs_ifindex) continue; /* Filter on NAT pool */ @@ -2083,10 +2357,10 @@ cgn_session_clear_fltr(struct cgn_sess_fltr *fltr, bool clear_map, continue; if (!ce->ce_expired) { - if (cse->cs_sess2_ht) - count += cgn_sess2_expire_all(cse->cs_sess2_ht); + if (cgn_sess_s2_is_enabled(cse)) + count += cgn_sess_s2_expire_all(&cse->cs_s2); - cgn_session_set_expired(cse); + cgn_session_set_expired(cse, true); } if (clear_map) @@ -2095,12 +2369,123 @@ cgn_session_clear_fltr(struct cgn_sess_fltr *fltr, bool clear_map, /* Log session clear command instead of every session */ if (count) - cgn_log_sess_clear(fltr->cf_desc, count, soft_ticks); + cgn_log_sess_clear(fltr->cf_desc, count, unix_epoch_us); - if (running && restart_timer) + if (restart_timer) cgn_session_start_timer(); } +/* + * Clear or update stats for one session + */ +static void +cgn_session_clear_or_update_stats(struct cgn_session *cse, bool clear) +{ + if (cgn_sess_s2_is_enabled(cse)) + cgn_sess2_clear_or_update_stats(&cse->cs_s2, clear); + + /* Clear the periodic counters, and update subscriber counts */ + cgn_session_stats_periodic(cse); + + /* Clear totals */ + if (clear) { + cse->cs_forw_entry.ce_pkts_tot = 0UL; + cse->cs_forw_entry.ce_bytes_tot = 0UL; + cse->cs_back_entry.ce_pkts_tot = 0UL; + cse->cs_back_entry.ce_bytes_tot = 0UL; + cse->cs_unk_pkts_tot = 0UL; + } +} + +/* + * Clear or update stats for all sessions + */ +static void cgn_session_clear_or_update_stats_all(bool clear) +{ + struct cds_lfht_iter iter; + struct cgn_session *cse; + struct cgn_sentry *ce; + + if (!cgn_sess_ht[CGN_DIR_OUT]) + return; + + cds_lfht_for_each_entry(cgn_sess_ht[CGN_DIR_OUT], &iter, ce, ce_node) { + if (ce->ce_expired) + continue; + + cse = caa_container_of(ce, struct cgn_session, cs_forw_entry); + cgn_session_clear_or_update_stats(cse, clear); + } +} + +/* + * Clear or update stats for specific sessions + */ +static void +cgn_session_clear_or_update_stats_fltr(struct cgn_sess_fltr *fltr, bool clear) +{ + struct cds_lfht_iter iter; + struct cgn_session *cse; + struct cgn_sentry *ce, *bk; + + if (!cgn_sess_ht[CGN_DIR_OUT]) + return; + + cds_lfht_for_each_entry(cgn_sess_ht[CGN_DIR_OUT], &iter, ce, ce_node) { + if (ce->ce_expired) + continue; + + /* Filter on IP protocol */ + if (fltr->cf_subs.k_ipproto && + fltr->cf_subs.k_ipproto != ce->ce_ipproto) + continue; + + /* Filter on Subscriber address and port */ + if (fltr->cf_subs_mask && + fltr->cf_subs.k_addr != (ce->ce_addr & fltr->cf_subs_mask)) + continue; + + if (fltr->cf_subs.k_port && fltr->cf_subs.k_port != ce->ce_port) + continue; + + cse = caa_container_of(ce, struct cgn_session, cs_forw_entry); + bk = &cse->cs_back_entry; + + /* Filter on Public address and port */ + if (fltr->cf_pub_mask && + fltr->cf_pub.k_addr != (bk->ce_addr & fltr->cf_pub_mask)) + continue; + + if (fltr->cf_pub.k_port && fltr->cf_pub.k_port != bk->ce_port) + continue; + + /* + * Filter on destination port. This is the special case where + * 2-tuple sessions are *not* enabled, and we have only ever + * seen one dest port inuse on the 3-tuple session. + */ + if (fltr->cf_dst.k_port && !cgn_sess_s2_is_enabled(cse) && + cse->cs_s2.cs2_dst_port != 0 && + fltr->cf_dst.k_port != cse->cs_s2.cs2_dst_port) + continue; + + /* Filter on session ID */ + if (fltr->cf_id1 && fltr->cf_id1 != cse->cs_id) + continue; + + /* Filter on interface */ + if (fltr->cf_ifindex && fltr->cf_ifindex != cse->cs_ifindex) + continue; + + /* Filter on NAT pool */ + if (fltr->cf_np && + fltr->cf_np != cgn_source_get_pool(cse->cs_src)) + continue; + + cgn_session_clear_or_update_stats(cse, clear); + } +} + /* * cgn-op clear session ... * @@ -2127,12 +2512,41 @@ void cgn_session_clear(FILE *f, int argc, char **argv) if (rc < 0) return; - if (fltr.cf_all) { - cgn_session_expire_all(true, true); - return; + if (fltr.cf_clear_stats) { + /* Clear session stats */ + if (fltr.cf_all) + cgn_session_clear_or_update_stats_all(true); + else + cgn_session_clear_or_update_stats_fltr(&fltr, true); + } else { + /* Clear sessions */ + if (fltr.cf_all) + cgn_session_expire_all(true, true); + else + cgn_session_clear_fltr(&fltr, true, true); } +} + +/* + * Update subscriber with session stats + */ +void cgn_session_update(FILE *f, int argc, char **argv) +{ + struct cgn_sess_fltr fltr; + int rc; - cgn_session_clear_fltr(&fltr, true, true); + /* Remove "cgn-op update session" */ + argc -= 3; + argv += 3; + + rc = cgn_session_op_parse(f, argc, argv, &fltr); + if (rc < 0) + return; + + if (fltr.cf_all) + cgn_session_clear_or_update_stats_all(false); + else + cgn_session_clear_or_update_stats_fltr(&fltr, false); } static void @@ -2143,7 +2557,7 @@ cgn_session_expire_all(bool clear_map, bool restart_timer) struct cgn_sentry *ce; uint count = 0; - if (!cgn_sess_ht[CGN_DIR_FORW]) + if (!cgn_sess_ht[CGN_DIR_OUT]) return; /* @@ -2152,17 +2566,17 @@ cgn_session_expire_all(bool clear_map, bool restart_timer) */ cgn_session_stop_timer(); - cds_lfht_for_each_entry(cgn_sess_ht[CGN_DIR_FORW], &iter, ce, ce_node) { + cds_lfht_for_each_entry(cgn_sess_ht[CGN_DIR_OUT], &iter, ce, ce_node) { if (!clear_map && ce->ce_expired) continue; cse = caa_container_of(ce, struct cgn_session, cs_forw_entry); if (!ce->ce_expired) { - if (cse->cs_sess2_ht) - count += cgn_sess2_expire_all(cse->cs_sess2_ht); + if (cgn_sess_s2_is_enabled(cse)) + count += cgn_sess_s2_expire_all(&cse->cs_s2); - cgn_session_set_expired(cse); + cgn_session_set_expired(cse, true); } if (clear_map) @@ -2171,9 +2585,9 @@ cgn_session_expire_all(bool clear_map, bool restart_timer) /* Log session clear command instead of every session */ if (count) - cgn_log_sess_clear("all", count, soft_ticks); + cgn_log_sess_clear("all", count, unix_epoch_us); - if (running && restart_timer) + if (restart_timer) cgn_session_start_timer(); } @@ -2191,7 +2605,7 @@ void cgn_session_expire_pool(bool restart_timer, struct nat_pool *np, struct cgn_sentry *ce; uint count = 0; - if (!cgn_sess_ht[CGN_DIR_FORW]) + if (!cgn_sess_ht[CGN_DIR_OUT]) return; /* @@ -2200,7 +2614,7 @@ void cgn_session_expire_pool(bool restart_timer, struct nat_pool *np, */ cgn_session_stop_timer(); - cds_lfht_for_each_entry(cgn_sess_ht[CGN_DIR_FORW], &iter, ce, ce_node) { + cds_lfht_for_each_entry(cgn_sess_ht[CGN_DIR_OUT], &iter, ce, ce_node) { struct nat_pool *cs_np; cse = caa_container_of(ce, struct cgn_session, cs_forw_entry); @@ -2210,10 +2624,10 @@ void cgn_session_expire_pool(bool restart_timer, struct nat_pool *np, continue; if (!ce->ce_expired) { - if (cse->cs_sess2_ht) - count += cgn_sess2_expire_all(cse->cs_sess2_ht); + if (cgn_sess_s2_is_enabled(cse)) + count += cgn_sess_s2_expire_all(&cse->cs_s2); - cgn_session_set_expired(cse); + cgn_session_set_expired(cse, true); } if (clear_mapping) @@ -2227,10 +2641,10 @@ void cgn_session_expire_pool(bool restart_timer, struct nat_pool *np, if (count) { char desc[60]; snprintf(desc, sizeof(desc), "pool %s", nat_pool_name(np)); - cgn_log_sess_clear(desc, count, soft_ticks); + cgn_log_sess_clear(desc, count, unix_epoch_us); } - if (running && restart_timer) + if (restart_timer) cgn_session_start_timer(); } @@ -2244,7 +2658,7 @@ void cgn_session_expire_policy(bool restart_timer, struct cgn_policy *cp) struct cgn_sentry *ce; uint count = 0; - if (!cgn_sess_ht[CGN_DIR_FORW]) + if (!cgn_sess_ht[CGN_DIR_OUT]) return; /* @@ -2253,7 +2667,7 @@ void cgn_session_expire_policy(bool restart_timer, struct cgn_policy *cp) */ cgn_session_stop_timer(); - cds_lfht_for_each_entry(cgn_sess_ht[CGN_DIR_FORW], &iter, ce, ce_node) { + cds_lfht_for_each_entry(cgn_sess_ht[CGN_DIR_OUT], &iter, ce, ce_node) { if (ce->ce_expired) continue; @@ -2262,20 +2676,20 @@ void cgn_session_expire_policy(bool restart_timer, struct cgn_policy *cp) if (cse->cs_src && cse->cs_src->sr_policy != cp) continue; - if (cse->cs_sess2_ht) - count += cgn_sess2_expire_all(cse->cs_sess2_ht); + if (cgn_sess_s2_is_enabled(cse)) + count += cgn_sess_s2_expire_all(&cse->cs_s2); - cgn_session_set_expired(cse); + cgn_session_set_expired(cse, true); } /* Log session clear command instead of every session */ if (count) { char desc[60]; snprintf(desc, sizeof(desc), "policy %s", cp->cp_name); - cgn_log_sess_clear(desc, count, soft_ticks); + cgn_log_sess_clear(desc, count, unix_epoch_us); } - if (running && restart_timer) + if (restart_timer) cgn_session_start_timer(); } @@ -2294,13 +2708,10 @@ static inline bool cgn_session_expired(struct cgn_session *cse) /* Session changed to idle */ /* Get state-dependent expiry time */ - if (likely(!cse->cs_map_instd)) - etime = cgn_session_expiry_time(cse); - else - etime = cse->cs_map_timeout; + etime = cgn_session_expiry_time(cse); /* Set expiry time */ - cse->cs_etime = cgn_get_time_uptime() + etime; + cse->cs_etime = get_dp_uptime() + etime; return false; } @@ -2308,11 +2719,11 @@ static inline bool cgn_session_expired(struct cgn_session *cse) /* * Session was already idle. Has it timed-out? */ - if (time_after(cgn_get_time_uptime(), cse->cs_etime)) { + if (time_after(get_dp_uptime(), cse->cs_etime)) { /* yes, session has timed-out */ /* Mark session as expired */ - cgn_session_set_expired(cse); + cgn_session_set_expired(cse, false); return true; } @@ -2340,52 +2751,55 @@ static inline bool cgn_session_expired(struct cgn_session *cse) static inline void cgn_session_gc_inspect(struct cgn_session *cse) { + uint s2_unexpd = 0, s2_expd = 0; - if (cse->cs_sess2_ht) { - uint unexpd = 0, expd = 0; + /* + * We use the 2-tuple expiry mechanism if 2-tuple session are enabled + * and the session has seen at least one packet. + * + * We use the 3-tuple expiry mechanism if 2-tuple sessions are + * disabled *or* the session was created by PCP (or the map command). + */ + if (cgn_sess_s2_is_enabled(cse)) { /* Are there any unexpired 2-tuple sessions? */ - cgn_sess2_gc_walk(cse->cs_sess2_ht, &unexpd, &expd); + cgn_sess_s2_gc_walk(&cse->cs_s2, &s2_unexpd, &s2_expd); /* - * sentry pkt and bytes counts will have been updated by the - * sess2 walk. Call cgn_session_stats_periodic to update - * subscriber entry with the stats. + * Mark the session as expired when there are no unexpired + * nested sessions remaining *and* the session was not created + * by PCP (PCP sessions use the timeout value specified in the + * PCP request). */ - cgn_session_stats_periodic(cse); + if (unlikely(s2_unexpd == 0 && + !cse->cs_forw_entry.ce_expired && + !cse->cs_map_instd)) { + cgn_session_set_expired(cse, false); - /* Is the nested table still full? */ - if (cse->cs_sess2_full && - rte_atomic16_read(&cse->cs_sess2_used) < - cgn_dest_sessions_max) { - char log_str[140]; - - cgn_session_log_str(cse, true, log_str, - sizeof(log_str)); - - RTE_LOG(ERR, CGNAT, - "DEST_SESSIONS_AVAILABLE count=%u %s\n", - rte_atomic16_read(&cse->cs_sess2_used), - log_str); - - cse->cs_sess2_full = false; + /* + * The next GC pass will exit before cs_gc_pass is + * tested and incremented since 's2_expd > 0', so we + * initialise the pass count to 1 here so that the + * 3-tuple session is destroyed on the second pass + * after this one (which is consistent with 3-tuple + * sessions with no 2-tuple sessions). + */ + cse->cs_gc_pass = 1; } + } - /* - * Mark the session as expired when all nested sessions have - * expired - */ - if (unexpd == 0 && !cse->cs_forw_entry.ce_expired) - cgn_session_set_expired(cse); + /* + * Update subscriber entry with the stats. This must be done after + * the s2 walk, if one occurred. + */ + cgn_session_stats_periodic_inline(cse); - /* Only progress with gc when no nested sessions remain */ - if ((unexpd + expd) > 0) - return; - } else - cgn_session_stats_periodic(cse); + /* Only progress with gc when no nested sessions remain */ + if ((s2_unexpd + s2_expd) > 0) + return; /* Is session expired? */ - if (!cgn_session_expired(cse)) + if (likely(!cgn_session_expired(cse))) return; /* Wait until all references on the session have been removed */ @@ -2402,20 +2816,22 @@ cgn_session_gc_inspect(struct cgn_session *cse) cgn_session_destroy(cse, true); } +static inline void start_timer(struct rte_timer *timer); + /* * Session table garbage collect walk */ -static void cgn_session_gc_walk(void) +static void cgn_session_gc(struct rte_timer *timer, void *arg __rte_unused) { struct cds_lfht_iter iter; struct cgn_sentry *ce; struct cgn_session *cse; - if (!cgn_sess_ht[CGN_DIR_FORW]) + if (!cgn_sess_ht[CGN_DIR_OUT]) return; /* Walk the forwards-flow session table */ - cds_lfht_for_each_entry(cgn_sess_ht[CGN_DIR_FORW], &iter, ce, ce_node) { + cds_lfht_for_each_entry(cgn_sess_ht[CGN_DIR_OUT], &iter, ce, ce_node) { cse = caa_container_of(ce, struct cgn_session, cs_forw_entry); cgn_session_gc_inspect(cse); @@ -2423,122 +2839,262 @@ static void cgn_session_gc_walk(void) /* Is table still full? */ if (cgn_session_table_full && - rte_atomic32_read(&cgn_sessions_used) < cgn_sessions_max) { + rte_atomic32_read(&cgn_sessions_used) < cgn_sessions_max) + cgn_session_set_available(); - RTE_LOG(ERR, CGNAT, "SESSION_TABLE_AVAILABLE count=%u/%u\n", - rte_atomic32_read(&cgn_sessions_used), - cgn_sessions_max); + /* Restart timer if dataplane is still running. */ + start_timer(timer); +} - cgn_session_table_full = false; - } +static bool is_cgn_helper_thread(void) +{ + return pthread_equal(pthread_self(), cgn_helper_pthread); } /* - * garbage collector timer callback + * Session table walk to perform logs */ -static void -cgn_session_gc(struct rte_timer *timer __rte_unused, void *arg __rte_unused) +static int cgn_session_log_walk(void) +{ + struct cds_lfht_iter iter; + struct cgn_sentry *ce; + struct cgn_session *cse; + unsigned int count = 0; + + ASSERT_CGN_HELPER_THREAD(); + + if (!cgn_sess_ht[CGN_DIR_OUT]) + return 0; + + /* Walk the forwards-flow session table */ + cds_lfht_for_each_entry(cgn_sess_ht[CGN_DIR_OUT], &iter, ce, ce_node) { + + cse = caa_container_of(ce, struct cgn_session, cs_forw_entry); + + if (cgn_sess_s2_is_enabled(cse)) + count += cgn_sess_s2_log_walk(&cse->cs_s2); + } + + return count; +} + +static int cgn_log_sessions(void) { - /* Walk the session table. */ - cgn_session_gc_walk(); + unsigned int count = cgn_session_log_walk(); - /* Restart timer if dataplane still running. */ - if (running) - cgn_session_start_timer(); + /* Increase the sleep interval up to max if nothing to log */ + if (count == 0) { + if (cgn_sleep_interval < CGNAT_MAX_HELPER_INTERVAL_US) { + cgn_sleep_interval *= 2; + + cgn_sleep_interval = MIN(cgn_sleep_interval, + CGNAT_MAX_HELPER_INTERVAL_US); + } + } else { + unsigned int lcore_id = rte_lcore_id(); + struct lcore_cgnat *lcore_info = lcore_conf_get_cgnat(lcore_id); + + lcore_info->logs += count; + + cgn_sleep_interval = 1; + } + + return cgn_sleep_interval; +} + +int cgn_helper_thread_func(unsigned int core_num, void *arg __unused) +{ + RTE_LOG(DEBUG, CGNAT, "Launching CGNAT help thread on core %u\n", + core_num); + + cgn_helper_core_num = core_num; + cgn_helper_pthread = pthread_self(); + cgn_sleep_interval = CGNAT_MAX_HELPER_INTERVAL_US; + + CMM_STORE_SHARED(cgn_helper_thread_enabled, 1); + + dp_rcu_register_thread(); + dp_rcu_thread_offline(); + + while (CMM_LOAD_SHARED(running) && + CMM_LOAD_SHARED(cgn_helper_thread_enabled)) { + dp_rcu_thread_online(); + dp_rcu_read_lock(); + + cgn_sleep_interval = cgn_log_sessions(); + + dp_rcu_read_unlock(); + dp_rcu_thread_offline(); + DP_DEBUG(CGNAT, DEBUG, CGNAT, "On core %u, thread %lu, " + "enabled %d, interval %u\n", core_num, + cgn_helper_pthread, cgn_helper_thread_enabled, + cgn_sleep_interval); + if (cgn_sleep_interval > 1) + usleep(cgn_sleep_interval); + } + + dp_rcu_unregister_thread(); + cgn_helper_core_num = CGN_HELPER_INVALID_CORE_NUM; + cgn_helper_pthread = 0; + CMM_STORE_SHARED(cgn_helper_thread_enabled, 0); + + return 0; +} + +static void cgn_helper_get_tx(unsigned int lcore_id, + uint64_t *pkts) +{ + struct lcore_cgnat *stats; + + stats = lcore_conf_get_cgnat(lcore_id); + if (stats) + *pkts = stats->logs; +} + +struct dp_lcore_feat cgn_feat = { + .name = "cgnat", + .dp_lcore_feat_fn = cgn_helper_thread_func, + .dp_lcore_feat_get_rx = NULL, + .dp_lcore_feat_get_tx = cgn_helper_get_tx, +}; + +static int cgn_stop_helper_thread(void) +{ + unsigned int lcore = cgn_helper_core_num; + + RTE_LOG(DEBUG, CGNAT, "Stopping cgn helper on core %u\n", + cgn_helper_core_num); + + /* Request the thread exit */ + CMM_STORE_SHARED(cgn_helper_thread_enabled, 0); + + return dp_unallocate_lcore_from_feature(lcore); +} + +int cgn_set_helper_thread(unsigned int core_num) +{ + int rc; + + if (CMM_LOAD_SHARED(cgn_helper_thread_enabled)) { + if (core_num == cgn_helper_core_num) /* no change */ + return 0; + cgn_stop_helper_thread(); + } + + RTE_LOG(DEBUG, CGNAT, "Setting helper on core %u\n", core_num); + cgn_desired_helper_core_num = core_num; + + rc = dp_allocate_lcore_to_feature(core_num, &cgn_feat); + if (rc) + RTE_LOG(ERR, CGNAT, "Failed to assign core %u\n", core_num); + + return 0; +} + +int cgn_disable_helper_thread(void) +{ + cgn_desired_helper_core_num = CGN_HELPER_INVALID_CORE_NUM; + + return cgn_stop_helper_thread(); } /* - * Unit-test only + * Called from unit-test and from cgn_source_uninit. */ void cgn_session_cleanup(void) { uint i; - /* Stop timer, and expire all entries */ + /* Stop timer, and expire all entries. Do not restart gc timer */ cgn_session_expire_all(false, false); - /* - * 1. set idle flags, start expiry timer of 0 secs - * 2. expiry time elapsed, set idle flags, start pass 2 - * 3. pass 2 done, deactivate and destroy session - */ for (i = 0; i < CGN_SESS_GC_COUNT + 2; i++) - cgn_session_gc_walk(); + /* Do not restart gc timer */ + cgn_session_gc(NULL, NULL); } /* - * Unit-test only. + * Called via hidden vplsh command. Used by unit-test and by dev testers. */ void cgn_session_gc_pass(void) { cgn_session_stop_timer(); - cgn_session_gc_walk(); + cgn_session_gc(&cgn_gc_timer, NULL); +} + +/* Start gc timer */ +static inline void start_timer(struct rte_timer *timer) +{ + /* Restart timer if dataplane is still running. */ + if (running && timer) + rte_timer_reset(timer, + CGN_SESS_GC_INTERVAL * rte_get_timer_hz(), + SINGLE, rte_get_master_lcore(), + cgn_session_gc, NULL); +} + +/* Stop gc timer */ +static inline void stop_timer(struct rte_timer *timer) +{ + if (timer) + rte_timer_stop(timer); } static void cgn_session_start_timer(void) { - rte_timer_reset(&cgn_gc_timer, - CGN_SESS_GC_INTERVAL * rte_get_timer_hz(), - SINGLE, rte_get_master_lcore(), cgn_session_gc, NULL); + start_timer(&cgn_gc_timer); } static void cgn_session_stop_timer(void) { - rte_timer_stop(&cgn_gc_timer); + stop_timer(&cgn_gc_timer); } /* - * cgn_session_init + * Called from DP_EVT_INIT event handler */ void cgn_session_init(void) { - if (cgn_sess_ht[CGN_DIR_FORW]) + if (cgn_sess_ht[CGN_DIR_OUT]) return; - cgn_sess_ht[CGN_DIR_FORW] = + cgn_sess_ht[CGN_DIR_OUT] = cds_lfht_new(CGN_SESSION_HT_INIT, CGN_SESSION_HT_MIN, CGN_SESSION_HT_MAX, CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING, NULL); - cgn_sess_ht[CGN_DIR_BACK] = + cgn_sess_ht[CGN_DIR_IN] = cds_lfht_new(CGN_SESSION_HT_INIT, CGN_SESSION_HT_MIN, CGN_SESSION_HT_MAX, CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING, NULL); rte_timer_init(&cgn_gc_timer); - cgn_session_start_timer(); + start_timer(&cgn_gc_timer); } /* - * cgn_session_uninit + * Called from DP_EVT_UNINIT event handler */ void cgn_session_uninit(void) { - uint i; - - if (!cgn_sess_ht[CGN_DIR_FORW]) + if (!cgn_sess_ht[CGN_DIR_OUT]) return; - /* Stop timer and expire all entries */ - cgn_session_expire_all(false, false); - - /* - * 1. set idle flags, start expiry timer of 0 secs - * 2. expiry time elapsed, set idle flags, start pass 2 - * 3. pass 2 done, deactivate and destroy session - */ - for (i = 0; i <= CGN_SESS_GC_COUNT; i++) - cgn_session_gc_walk(); - - assert(cgn_session_table_nodes(cgn_sess_ht[CGN_DIR_FORW]) == 0); - assert(cgn_session_table_nodes(cgn_sess_ht[CGN_DIR_BACK]) == 0); + /* Expire all entries and run gc multiple times */ + cgn_session_cleanup(); /* Destroy the session hash tables */ - dp_ht_destroy_deferred(cgn_sess_ht[CGN_DIR_FORW]); - cgn_sess_ht[CGN_DIR_FORW] = NULL; + dp_ht_destroy_deferred(cgn_sess_ht[CGN_DIR_OUT]); + cgn_sess_ht[CGN_DIR_OUT] = NULL; + + dp_ht_destroy_deferred(cgn_sess_ht[CGN_DIR_IN]); + cgn_sess_ht[CGN_DIR_IN] = NULL; +} - dp_ht_destroy_deferred(cgn_sess_ht[CGN_DIR_BACK]); - cgn_sess_ht[CGN_DIR_BACK] = NULL; +/* Used by unit-tests only */ +size_t cgn_session_size(void) +{ + return sizeof(struct cgn_session); } diff --git a/src/npf/cgnat/cgn_session.h b/src/npf/cgnat/cgn_session.h index c4a718ca..99eb6489 100644 --- a/src/npf/cgnat/cgn_session.h +++ b/src/npf/cgnat/cgn_session.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -8,43 +8,36 @@ #define _CGN_SESSION_H_ #include "util.h" +#include "npf/cgnat/cgn.h" +struct cgn_3tuple_key; struct cgn_session; +struct cgn_sess_s2; struct cgn_packet; struct cgn_policy; struct cgn_source; +struct cgn_sess2; +struct cgn_map; struct nat_pool; struct ifnet; -extern int32_t cgn_sessions_max; -extern int16_t cgn_dest_sessions_max; - -/* Global count of all 3-tuple sessions */ -extern rte_atomic32_t cgn_sessions_used; - -/* Global count of all 5-tuple sessions */ -extern rte_atomic32_t cgn_sess2_used; - -/* Is session table full? */ -extern bool cgn_session_table_full; - -bool cgn_session_log_start(struct cgn_session *cse); -bool cgn_session_log_end(struct cgn_session *cse); -uint16_t cgn_session_log_periodic(struct cgn_session *cse); uint32_t cgn_session_ifindex(struct cgn_session *cse); uint32_t cgn_session_id(struct cgn_session *cse); +struct cgn_session *cgn_sess_from_cs2(struct cgn_sess_s2 *cs2); +struct cgn_source *cgn_src_from_cs2(struct cgn_sess_s2 *cs2); + /* * Update 3-tuple session stats from a just-expired 2-tuple session. This is - * called via the master thread, so 2-tuple stats total will appear there. + * called via the main thread, so 2-tuple stats total will appear there. */ void cgn_session_update_stats(struct cgn_session *cse, uint32_t pkts_out, uint32_t bytes_out, - uint32_t pkts_in, uint32_t bytes_in, - bool expired); + uint32_t pkts_in, uint32_t bytes_in); uint32_t cgn_session_forw_addr(struct cgn_session *cse); uint32_t cgn_session_forw_id(struct cgn_session *cse); +uint8_t cgn_session_ipproto(struct cgn_session *cse); uint32_t cgn_session_back_addr(struct cgn_session *cse); uint32_t cgn_session_back_id(struct cgn_session *cse); @@ -55,30 +48,36 @@ void cgn_session_get_back(const struct cgn_session *cse, uint16_t cgn_session_get_l3_delta(const struct cgn_session *cse, bool forw); uint16_t cgn_session_get_l4_delta(const struct cgn_session *cse, bool forw); -uint32_t cgn_session_get_ifindex(const struct cgn_session *cse); + +void cgn_session_try_enable_sub_sess(struct cgn_session *cse, + struct cgn_policy *cp, uint32_t oaddr); /* * taddr - translation addr * tid - translation ID * add_dst - Add 2-tuple table */ -struct cgn_session *cgn_session_establish(struct cgn_packet *cpk, int dir, - uint32_t taddr, uint16_t tid, - int *error, struct cgn_source *src); +struct cgn_session *cgn_session_establish(struct cgn_packet *cpk, + struct cgn_map *cmi, int *error); int cgn_session_activate(struct cgn_session *cse, - struct cgn_packet *cpk, int dir); + struct cgn_packet *cpk, enum cgn_dir dir); void cgn_session_destroy(struct cgn_session *cse, bool rcu_free); -struct cgn_session *cgn_session_lookup(struct cgn_packet *sp, int dir); -struct cgn_session *cgn_session_inspect(struct cgn_packet *sp, int dir); -struct cgn_session *cgn_session_lookup_icmp_err(struct cgn_packet *sp, int dir); +struct cgn_session *cgn_session_lookup(const struct cgn_3tuple_key *key, + enum cgn_dir dir); +struct cgn_session *cgn_session_inspect(struct cgn_packet *cpk, + enum cgn_dir dir, int *error); +struct cgn_session *cgn_session_lookup_icmp_err(struct cgn_packet *cpk, + enum cgn_dir dir); struct cgn_session *cgn_session_find_cached(struct rte_mbuf *mbuf); -struct cgn_session *cgn_session_get(struct cgn_session *cse); -void cgn_session_put(struct cgn_session *cse); +void cgn_session_set_max(int32_t val); + +/* Threshold */ +void session_table_threshold_set(int32_t threshold, uint32_t interval); void cgn_session_init(void); void cgn_session_uninit(void); @@ -86,17 +85,16 @@ void cgn_session_uninit(void); void cgn_session_id_list(FILE *f, int argc, char **argv); void cgn_session_show(FILE *f, int argc, char **argv); void cgn_session_clear(FILE *f, int argc, char **argv); -ulong cgn_session_count(void); +void cgn_session_update(FILE *f, int argc, char **argv); void cgn_session_expire_policy(bool restart_timer, struct cgn_policy *cp); void cgn_session_expire_pool(bool restart_timer, struct nat_pool *np, bool clear_mapping); -void cgn_sess2_slot_put(struct cgn_session *cse); - int cgn_op_session_map(FILE *f, int argc, char **argv); struct cgn_session *cgn_session_map(struct ifnet *ifp, struct cgn_packet *cpk, - int dir, int *error); + uint32_t pub_addr, uint16_t pub_port, + int *error); /* * Session walk @@ -110,4 +108,9 @@ void cgn_sess_list_show(void); void cgn_session_cleanup(void); -#endif +/* Session Logging thread */ +int cgn_set_helper_thread(unsigned int core_num); +int cgn_disable_helper_thread(void); +int cgn_helper_thread_func(unsigned int core_num, void *arg); + +#endif /* _CGN_SESSION_H_ */ diff --git a/src/npf/cgnat/cgn_source.c b/src/npf/cgnat/cgn_source.c index d1cd431b..f24dcc85 100644 --- a/src/npf/cgnat/cgn_source.c +++ b/src/npf/cgnat/cgn_source.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -16,20 +16,23 @@ #include #include #include +#include #include "compiler.h" #include "if_var.h" #include "urcu.h" #include "util.h" #include "soft_ticks.h" +#include "vplane_log.h" +#include "npf/npf_addrgrp.h" #include "npf/nat/nat_proto.h" #include "npf/nat/nat_pool.h" #include "npf/cgnat/cgn.h" #include "npf/apm/apm.h" #include "npf/cgnat/cgn_cmd_cfg.h" -#include "npf/cgnat/cgn_errno.h" +#include "npf/cgnat/cgn_rc.h" #include "npf/cgnat/cgn_policy.h" #include "npf/cgnat/cgn_source.h" #include "npf/cgnat/cgn_limits.h" @@ -51,13 +54,25 @@ static rte_atomic32_t cgn_src_used; static int32_t cgn_src_max = CGN_SRC_TABLE_MAX; static bool cgn_src_table_full; +/* Subscriber table threshold, time, and timer */ +static int32_t subscriber_table_threshold_cfg; /* configured percent */ +static int32_t subscriber_table_threshold; /* threshold value */ +static bool subscriber_table_threshold_been_below = true; +static uint32_t subscriber_table_threshold_time; +static struct rte_timer subscriber_table_threshold_timer; + +static void subscriber_table_threshold_timer_expiry( + struct rte_timer *timer __unused, + void *arg __unused); + /* * Update stats in source from a session. Called periodically from the * session gc routine, and when a session is destroyed. */ void cgn_source_update_stats(struct cgn_source *src, uint64_t pkts_out, uint64_t bytes_out, - uint64_t pkts_in, uint64_t bytes_in) + uint64_t pkts_in, uint64_t bytes_in, + uint64_t unk_pkts_in) { assert(src); if (src) { @@ -65,6 +80,7 @@ void cgn_source_update_stats(struct cgn_source *src, src->sr_bytes_out += bytes_out; src->sr_pkts_in += pkts_in; src->sr_bytes_in += bytes_in; + src->sr_unk_pkts_in += unk_pkts_in; } } @@ -80,18 +96,34 @@ void cgn_source_put(struct cgn_source *src) rte_atomic32_dec(&src->sr_refcnt); } +/* Increment 3-tuple sessions created in subscriber */ void cgn_source_stats_sess_created(struct cgn_source *src) { if (src) rte_atomic32_inc(&src->sr_sess_created); } +/* Increment 3-tuple sessions destroyed in subscriber */ void cgn_source_stats_sess_destroyed(struct cgn_source *src) { if (src) rte_atomic32_inc(&src->sr_sess_destroyed); } +/* Increment 2-tuple sessions created in subscriber */ +void cgn_source_stats_sess2_created(struct cgn_source *src) +{ + if (src) + rte_atomic32_inc(&src->sr_sess2_created); +} + +/* Increment 2-tuple sessions destroyed in subscriber */ +void cgn_source_stats_sess2_destroyed(struct cgn_source *src) +{ + if (src) + rte_atomic32_inc(&src->sr_sess2_destroyed); +} + struct nat_pool *cgn_source_get_pool(struct cgn_source *src) { if (src && src->sr_policy) @@ -104,7 +136,7 @@ struct nat_pool *cgn_source_get_pool(struct cgn_source *src) * Add port block to source list */ int -cgn_source_add_block(struct cgn_source *src, uint8_t proto, +cgn_source_add_block(struct cgn_source *src, enum nat_proto proto, struct apm_port_block *pb, struct nat_pool *np) { assert(rte_spinlock_is_locked(&src->sr_lock)); @@ -116,13 +148,16 @@ cgn_source_add_block(struct cgn_source *src, uint8_t proto, /* Take reference on source */ cgn_source_get(src); + /* Set pointer to src in the port-block */ + apm_block_set_source(pb, src); + /* Set active_block for the requested protocol */ src->sr_active_block[proto] = pb; /* * Set active_block for other protocols, if they are not already set. */ - uint8_t p; + enum nat_proto p; for (p = NAT_PROTO_FIRST; p < NAT_PROTO_COUNT; p++) if (p != proto && src->sr_active_block[p] == NULL) src->sr_active_block[p] = pb; @@ -138,7 +173,9 @@ cgn_source_add_block(struct cgn_source *src, uint8_t proto, } if (nat_pool_log_pba(np)) - apm_log_block_alloc(pb, src->sr_addr); + apm_log_block_alloc(pb, src->sr_addr, + cgn_policy_get_name(src->sr_policy), + nat_pool_name(np)); return 0; } @@ -146,49 +183,120 @@ cgn_source_add_block(struct cgn_source *src, uint8_t proto, /* * cgn_session_destroy -> cgn_map_put */ -int +void cgn_source_del_block(struct cgn_source *src, struct apm_port_block *pb, struct nat_pool *np) { - assert(!rte_spinlock_is_locked(&src->sr_lock)); - rte_spinlock_lock(&src->sr_lock); - - /* - * Was src destroyed while we waited for lock? This should never - * happen in normal operation as only the master thread destroys - * sessions, and hence calls cgn_map_put and cgn_source_del_block. - */ - if ((src->sr_flags & SF_DEAD) != 0) { - rte_spinlock_unlock(&src->sr_lock); - return -1; - } + assert(rte_spinlock_is_locked(&src->sr_lock)); if (nat_pool_log_pba(np)) - apm_log_block_release(pb, src->sr_addr); + apm_log_block_release(pb, src->sr_addr, + cgn_policy_get_name(src->sr_policy), + nat_pool_name(np)); + /* Remove port-block from source's port-block list */ cds_list_del_rcu(apm_block_get_list_node(pb)); src->sr_block_count--; + /* + * If this source no longer has any port-blocks then the + * paired-address must be cleared. + * + * If its not cleared, and this is was the last port-block in-use on + * the public address, then there is a short window where two + * subscribers could end up using the same public address. + */ + if (src->sr_block_count == 0) + src->sr_paired_addr = 0; + + /* Clear src ptr in the port-block */ + apm_block_set_source(pb, NULL); + /* Release reference on source */ cgn_source_put(src); - uint8_t p; - for (p = NAT_PROTO_FIRST; p < NAT_PROTO_COUNT; p++) + enum nat_proto p; + for (p = NAT_PROTO_FIRST; p < NAT_PROTO_COUNT; p++) { + /* This block can no longer be the Active block */ if (pb == src->sr_active_block[p]) src->sr_active_block[p] = NULL; - /* Had the mbpu limit been previously reached? */ - if (src->sr_mbpu_full && - src->sr_block_count < nat_pool_get_mbpu(np)) { - cgn_log_subscriber_mbpu_avail(src->sr_addr, - src->sr_block_count, - nat_pool_get_mbpu(np)); - src->sr_mbpu_full = false; + /* Had the mbpu limit been previously reached? */ + if (src->sr_mbpu_full[p]) { + src->sr_mbpu_full[p] = false; + + cgn_log_resource_subscriber_mbpu( + CGN_RESOURCE_AVAILABLE, src->sr_addr, + nat_ipproto_from_proto(p), + src->sr_block_count, nat_pool_get_mbpu(np)); + } + } +} + +/* + * Generate subscriber table threshold log + * and restart timer if required. + */ +static void subscriber_table_threshold_log(int32_t val, int32_t max) +{ + cgn_log_resource_subscriber_table( + CGN_RESOURCE_THRESHOLD, val, max); + + if (subscriber_table_threshold_time) + rte_timer_reset(&subscriber_table_threshold_timer, + subscriber_table_threshold_time * rte_get_timer_hz(), + SINGLE, rte_get_master_lcore(), + subscriber_table_threshold_timer_expiry, + NULL); +} + +/* + * Warn if over the configured subscriber table threshold + */ +static void subscriber_table_threshold_check(int32_t val) +{ + if (subscriber_table_threshold && + subscriber_table_threshold_been_below && + (val >= subscriber_table_threshold) && + (!rte_timer_pending(&subscriber_table_threshold_timer))) { + + subscriber_table_threshold_been_below = false; + subscriber_table_threshold_log(val, cgn_src_max); } +} - rte_spinlock_unlock(&src->sr_lock); +/* + * Set subscriber table threshold + * + * threshold is in percent; interval is in seconds. + */ +void subscriber_table_threshold_set(int32_t threshold, uint32_t interval) +{ + rte_timer_stop(&subscriber_table_threshold_timer); + subscriber_table_threshold = + (cgn_src_max * threshold + 99) / 100; + subscriber_table_threshold_time = interval; + subscriber_table_threshold_been_below = true; + + /* Warn if over configured threshold */ + int32_t val = rte_atomic32_read(&cgn_src_used); + subscriber_table_threshold_check(val); +} - return 0; +/* + * Handle subscriber table threshold timer expiry. + */ +static void subscriber_table_threshold_timer_expiry( + struct rte_timer *timer __unused, + void *arg __unused) +{ + int32_t val = rte_atomic32_read(&cgn_src_used); + + if (subscriber_table_threshold && + (val >= subscriber_table_threshold)) { + + subscriber_table_threshold_log(val, cgn_src_max); + } } /* @@ -196,14 +304,22 @@ cgn_source_del_block(struct cgn_source *src, struct apm_port_block *pb, */ static bool cgn_src_slot_get(void) { - if (rte_atomic32_add_return(&cgn_src_used, 1) <= cgn_src_max) + int32_t val = rte_atomic32_add_return(&cgn_src_used, 1); + + /* Warn if over configured threshold */ + subscriber_table_threshold_check(val); + + /* Error if table is full */ + + if (val <= cgn_src_max) return true; rte_atomic32_dec(&cgn_src_used); if (!cgn_src_table_full) - RTE_LOG(ERR, CGNAT, "SUBSCRIBER_TABLE_FULL count=%u/%u\n", - rte_atomic32_read(&cgn_src_used), cgn_src_max); + cgn_log_resource_subscriber_table( + CGN_RESOURCE_FULL, rte_atomic32_read(&cgn_src_used), + cgn_src_max); /* * Mark src table as full. This is reset in the gc when the src count @@ -216,7 +332,10 @@ static bool cgn_src_slot_get(void) static void cgn_src_slot_put(void) { - rte_atomic32_dec(&cgn_src_used); + int32_t val = rte_atomic32_sub_return(&cgn_src_used, 1); + + if (val < subscriber_table_threshold) + subscriber_table_threshold_been_below = true; } /* Get subscriber hash table used and max counts */ @@ -225,6 +344,20 @@ int32_t cgn_source_get_used(void) return rte_atomic32_read(&cgn_src_used); } +/* + * Set maximum subscriber table entries; + * recalc subscriber table threshold. + */ +void cgn_source_set_max(int32_t val) +{ + if (val > CGN_SRC_TABLE_MAX) + val = CGN_SRC_TABLE_MAX; + + cgn_src_max = val; + subscriber_table_threshold_set(subscriber_table_threshold_cfg, + subscriber_table_threshold_time); +} + int32_t cgn_source_get_max(void) { return cgn_src_max; @@ -235,7 +368,7 @@ cgn_source_create(struct cgn_policy *cp, uint32_t addr, vrfid_t vrfid, int *error) { struct cgn_source *src; - uint8_t proto; + enum nat_proto proto; if (!cgn_src_slot_get()) { *error = -CGN_SRC_ENOSPC; @@ -253,7 +386,7 @@ cgn_source_create(struct cgn_policy *cp, uint32_t addr, vrfid_t vrfid, src->sr_vrfid = vrfid; rte_spinlock_init(&src->sr_lock); rte_atomic32_set(&src->sr_refcnt, 0); - src->sr_start_time = soft_ticks; + src->sr_start_time = unix_epoch_us; /* Take reference on policy */ src->sr_policy = cgn_policy_get(cp); @@ -269,6 +402,57 @@ cgn_source_create(struct cgn_policy *cp, uint32_t addr, vrfid_t vrfid, return src; } +/* + * Notification that a new policy has been added. + * + * Note that this is done after the policy is added to the policy hash table, + * but *before* it it added to the interface list. Hence it is not yet + * findable by packets. + * + * cgnat subscriber structures hold a reference on cgnat policies. It is + * possible for a policy to be unconfigured and reconfigured *before* any + * subscribers pointing to the original policy structure have been garbage + * collected. + * + * For each subscriber check if it is still referencing the old policy. If so + * then we need to release the reference on the old policy and take a + * reference on the new policy. + */ +void cgn_source_policy_added(struct cgn_policy *cp) +{ + struct cds_lfht_iter iter; + struct cgn_source *src; + + if (!cgn_src_ht) + return; + + cds_lfht_for_each_entry(cgn_src_ht, &iter, src, sr_node) { + uint32_t addr = htonl(src->sr_addr); + + /* + * There should be no subscribers pointing to the new policy + * just yet, but check anyway. + */ + if (src->sr_policy == cp) + continue; + + /* Is subscriber addr in match address-group? */ + if (npf_addrgrp_lookup_v4_by_handle(cp->cp_match_ag, + addr) != 0) + continue; /* No */ + + if (src->sr_policy) { + /* Release reference on the old policy */ + cgn_policy_dec_source_count(src->sr_policy); + cgn_policy_put(src->sr_policy); + } + + /* Take reference on the new policy */ + src->sr_policy = cgn_policy_get(cp); + cgn_policy_inc_source_count(src->sr_policy); + } +} + static void cgn_source_rcu_free(struct rcu_head *head) { struct cgn_source *src = caa_container_of(head, struct cgn_source, @@ -294,9 +478,10 @@ static void cgn_source_destroy(struct cgn_source *src) if (!src->sr_policy || src->sr_policy->cp_log_subs) cgn_log_subscriber_end( - src->sr_addr, src->sr_start_time, soft_ticks, + src->sr_addr, src->sr_start_time, unix_epoch_us, src->sr_pkts_out_tot, src->sr_bytes_out_tot, - src->sr_pkts_in_tot, src->sr_bytes_in_tot, + src->sr_pkts_in_tot, + src->sr_bytes_in_tot, src->sr_sess_created_tot); cgn_policy_dec_source_count(src->sr_policy); @@ -548,6 +733,143 @@ static uint cgn_count2rate(uint count, uint interval) return rate; } +static inline uint cgn_sess_rate_index_next(uint cur) +{ + if (likely(++cur < CGN_SESS_RATE_CNTRS)) + return cur; + return 0; +} + +static inline uint cgn_sess_rate_index_prev(uint cur) +{ + if (likely(cur > 0)) + return cur - 1; + return CGN_SESS_RATE_CNTRS - 1; +} + +/* + * Clear or update stats for one subscriber. + * + * These stats are only ever updated from either the session or subscriber + * garbage collection walks. They are never changed by a forwarding thread. + * + * Total sessions created and total sessions destroyed counts only ever + * increment. We do not zero either of these. + */ +static void +cgn_source_clear_or_update_stats_one(struct cgn_source *src, bool clear) +{ + /* Add periodic stats to totals and update stats in policy */ + cgn_source_stats_periodic(src); + + if (clear) { + uint i; + + /* Clear sessions-created samples */ + for (i = 0; i < CGN_SESS_RATE_5MIN; i++) + src->sr_sess_rate[i] = 0; + + /* Max session rates */ + src->sr_sess_rate_max = 0; + src->sr_sess_rate_max_time = 0UL; + src->sr_sess_rate_1m_max = 0; + src->sr_sess_rate_1m_max_time = 0UL; + + /* Packet and byte counts */ + src->sr_pkts_out_tot = 0UL; + src->sr_bytes_out_tot = 0UL; + src->sr_pkts_in_tot = 0UL; + src->sr_bytes_in_tot = 0UL; + src->sr_unk_pkts_in_tot = 0UL; + } +} + +static void +cgn_source_clear_or_update_stats(struct cgn_source_fltr *fltr, bool clear) +{ + struct cds_lfht_iter iter; + struct cgn_source *src; + + /* + * If a host mask is specified in filter, then just lookup address. + */ + if (fltr->sf_mask == 0xffffffff) { + src = cgn_source_lookup(fltr->sf_addr, VRF_DEFAULT_ID); + if (src) + cgn_source_clear_or_update_stats_one(src, clear); + + /* Nothing more to do */ + return; + } + + cds_lfht_for_each_entry(cgn_src_ht, &iter, src, sr_node) { + if (fltr->sf_mask && + (src->sr_addr & fltr->sf_mask) != fltr->sf_addr) + continue; + + cgn_source_clear_or_update_stats_one(src, clear); + } +} + +/* + * Clear or update subscriber stats + * + * cgn-op {clear| update} subscriber [address 100.64.0.0/30] stats + */ +void cgn_source_clear_or_update(int argc, char **argv, bool clear) +{ + struct cgn_source_fltr fltr = { 0 }; + bool stats = false; + + fltr.sf_all = true; + + /* Remove "cgn-op {clear| update} subscriber" */ + argc -= 3; + argv += 3; + + while (argc > 0) { + if (!strcmp(argv[0], "address") && argc >= 2) { + npf_addr_t npf_addr; + npf_netmask_t pl; + sa_family_t fam; + uint32_t addr; + bool negate; + ulong tmp; + int rc; + + rc = npf_parse_ip_addr(argv[1], &fam, &npf_addr, + &pl, &negate); + if (rc < 0) + return; + + pl = MIN(32, pl); + memcpy(&addr, &npf_addr, 4); + fltr.sf_addr = ntohl(addr); + + tmp = (0xFFFFFFFF << (32 - pl)) & 0xFFFFFFFF; + fltr.sf_mask = tmp; + fltr.sf_addr &= fltr.sf_mask; + fltr.sf_all = false; + + argc -= 2; + argv += 2; + + } else if (argc >= 1 && !strcmp(argv[0], "statistics")) { + stats = true; + argc -= 1; + argv += 1; + + } else { + /* Unknown option */ + argc -= 1; + argv += 1; + } + } + + if (stats) + cgn_source_clear_or_update_stats(&fltr, clear); +} + /* * cgn_source_jsonw_one */ @@ -586,10 +908,8 @@ cgn_source_jsonw_one(json_writer_t *json, uint detail __unused, if (detail) cgn_source_jsonw_port_blocks(json, src); - jsonw_uint_field(json, "start_time", - cgn_ticks2timestamp(src->sr_start_time)); - jsonw_uint_field(json, "duration", - cgn_start2duration(src->sr_start_time)); + jsonw_uint_field(json, "start_time", src->sr_start_time); + jsonw_uint_field(json, "duration", unix_epoch_us - src->sr_start_time); jsonw_uint_field(json, "map_reqs", src->sr_map_reqs); jsonw_uint_field(json, "map_fails", src->sr_map_fails); jsonw_uint_field(json, "map_active", @@ -611,62 +931,68 @@ cgn_source_jsonw_one(json_writer_t *json, uint detail __unused, jsonw_uint_field(json, "in_pkts", pkts); jsonw_uint_field(json, "in_bytes", bytes); + jsonw_uint_field(json, "unk_pkts_in", + src->sr_unk_pkts_in + src->sr_unk_pkts_in_tot); + /* Sessions stats */ uint32_t sess_crtd, sess_dstrd; + uint32_t sess2_crtd, sess2_dstrd; sess_crtd = rte_atomic32_read(&src->sr_sess_created); sess_dstrd = rte_atomic32_read(&src->sr_sess_destroyed); + sess2_crtd = rte_atomic32_read(&src->sr_sess2_created); + sess2_dstrd = rte_atomic32_read(&src->sr_sess2_destroyed); jsonw_uint_field(json, "sess_crtd", src->sr_sess_created_tot + sess_crtd); jsonw_uint_field(json, "sess_dstrd", src->sr_sess_destroyed_tot + sess_dstrd); + jsonw_uint_field(json, "sess2_crtd", + src->sr_sess2_created_tot + sess2_crtd); + jsonw_uint_field(json, "sess2_dstrd", + src->sr_sess2_destroyed_tot + sess2_dstrd); + /* * Session rates. We start at the last value recorded, and work * backwards from there. */ - uint i = src->sr_sess_rate_cur, n; + uint i, n; uint rate_max = 0, rate_20s, rate_1m = 0, rate_5m = 0; - if (i == 0) - i = CGN_SESS_RATE_CNTRS - 1; - else - i -= 1; + i = src->sr_sess_rate_index; + + for (n = 0; n < CGN_SESS_RATE_5MIN; n++) { + i = cgn_sess_rate_index_prev(i); - rate_20s = src->sr_sess_rate[i]; + if (n == 0) + rate_20s = src->sr_sess_rate[i]; - for (n = 0; n < CGN_SESS_RATE_CNTRS; n++) { - if ((n * CGN_SRC_GC_INTERVAL) < 60) + if (n < CGN_SESS_RATE_1MIN) rate_1m += src->sr_sess_rate[i]; rate_5m += src->sr_sess_rate[i]; - - /* Decrement i */ - if (i == 0) - i = CGN_SESS_RATE_CNTRS - 1; - else - i -= 1; } - /* Convert to sessions per second */ - uint ivals_per_min = 60 / CGN_SRC_GC_INTERVAL; - - rate_max = cgn_count2rate(src->sr_sess_rate_max, CGN_SRC_GC_INTERVAL); - + /* Current session rates in sessions per second */ rate_20s = cgn_count2rate(rate_20s, CGN_SRC_GC_INTERVAL); - rate_1m = cgn_count2rate(rate_1m, - CGN_SRC_GC_INTERVAL * ivals_per_min); - rate_5m = cgn_count2rate(rate_5m, - CGN_SRC_GC_INTERVAL * ivals_per_min * 5); + rate_1m = cgn_count2rate(rate_1m, 60); + rate_5m = cgn_count2rate(rate_5m, 300); jsonw_uint_field(json, "sess_rate_20s", rate_20s); jsonw_uint_field(json, "sess_rate_1m", rate_1m); jsonw_uint_field(json, "sess_rate_5m", rate_5m); + /* 20 sec max */ + rate_max = cgn_count2rate(src->sr_sess_rate_max, CGN_SRC_GC_INTERVAL); jsonw_uint_field(json, "sess_rate_max", rate_max); - jsonw_uint_field(json, "sess_rate_max_tm", - cgn_ticks2timestamp(src->sr_sess_rate_max_time)); + jsonw_uint_field(json, "sess_rate_max_tm", src->sr_sess_rate_max_time); + + /* 1 minute max */ + rate_max = cgn_count2rate(src->sr_sess_rate_1m_max, 60); + jsonw_uint_field(json, "sess_rate_1m_max", rate_max); + jsonw_uint_field(json, "sess_rate_1m_max_tm", + src->sr_sess_rate_1m_max_time); jsonw_end_object(json); } @@ -889,35 +1215,71 @@ void cgn_source_list(FILE *f, int argc, char **argv) } /* - * Called from master thread in garbage collection interval, and from + * Called from main thread in garbage collection interval, and from * cgn_source_destroy. */ static void cgn_source_stats_periodic(struct cgn_source *src) { + uint n, i; + assert(rte_spinlock_is_locked(&src->sr_lock)); /* * Sessions created and destroyed */ - uint32_t sess_crtd, sess_dstd; + uint32_t sess_crtd, sess_dstd, sess_crtd_1m; + uint32_t sess2_crtd, sess2_dstd; sess_crtd = rte_atomic32_exchange( (volatile uint32_t *)&src->sr_sess_created.cnt, 0); sess_dstd = rte_atomic32_exchange( (volatile uint32_t *)&src->sr_sess_destroyed.cnt, 0); + sess2_crtd = rte_atomic32_exchange( + (volatile uint32_t *)&src->sr_sess2_created.cnt, 0); + sess2_dstd = rte_atomic32_exchange( + (volatile uint32_t *)&src->sr_sess2_destroyed.cnt, 0); src->sr_sess_created_tot += sess_crtd; src->sr_sess_destroyed_tot += sess_dstd; + src->sr_sess2_created_tot += sess2_crtd; + src->sr_sess2_destroyed_tot += sess2_dstd; + + /* Check 1 minute max session rate *before* adding to samples */ + sess_crtd_1m = sess_crtd; + i = src->sr_sess_rate_index; + + for (n = 0; n < CGN_SESS_RATE_1MIN - 1; n++) { + i = cgn_sess_rate_index_prev(i); + sess_crtd_1m += src->sr_sess_rate[i]; + } + + if (sess_crtd_1m > src->sr_sess_rate_1m_max) { + uint rate_max; - src->sr_sess_rate[src->sr_sess_rate_cur] = sess_crtd; + src->sr_sess_rate_1m_max = sess_crtd_1m; + src->sr_sess_rate_1m_max_time = unix_epoch_us; - if (++src->sr_sess_rate_cur >= CGN_SESS_RATE_CNTRS) - src->sr_sess_rate_cur = 0; + /* Convert count to sessions-per-sec rate */ + rate_max = cgn_count2rate(src->sr_sess_rate_1m_max, + 60); + /* Update policy */ + cgn_policy_update_sess_rate(src->sr_policy, + src->sr_addr, + rate_max, + src->sr_sess_rate_1m_max_time); + } + + /* Add sess_crtd to sample array*/ + src->sr_sess_rate[src->sr_sess_rate_index] = sess_crtd; + src->sr_sess_rate_index = + cgn_sess_rate_index_next(src->sr_sess_rate_index); + + /* Check 20 sec max session rate */ if (sess_crtd > src->sr_sess_rate_max) { src->sr_sess_rate_max = sess_crtd; - src->sr_sess_rate_max_time = soft_ticks; + src->sr_sess_rate_max_time = unix_epoch_us; } /* @@ -927,17 +1289,20 @@ cgn_source_stats_periodic(struct cgn_source *src) src->sr_bytes_out_tot += src->sr_bytes_out; src->sr_pkts_in_tot += src->sr_pkts_in; src->sr_bytes_in_tot += src->sr_bytes_in; + src->sr_unk_pkts_in_tot += src->sr_unk_pkts_in; /* Update stats in policy */ cgn_policy_update_stats(src->sr_policy, src->sr_pkts_out, src->sr_bytes_out, src->sr_pkts_in, src->sr_bytes_in, - sess_crtd, sess_dstd); + src->sr_unk_pkts_in, + sess_crtd, sess_dstd, sess2_crtd, sess2_dstd); src->sr_pkts_out = 0UL; src->sr_bytes_out = 0UL; src->sr_pkts_in = 0UL; src->sr_bytes_in = 0UL; + src->sr_unk_pkts_in = 0UL; } /* @@ -985,7 +1350,7 @@ static void cgn_source_gc_inspect(struct cgn_source *src) rte_spinlock_unlock(&src->sr_lock); } -static void cgn_source_gc_walk(void) +static void cgn_source_gc(struct rte_timer *timer, void *arg __unused) { struct cds_lfht_iter iter; struct cgn_source *src; @@ -993,6 +1358,7 @@ static void cgn_source_gc_walk(void) if (!cgn_src_ht) return; + /* Walk the source table */ cds_lfht_for_each_entry(cgn_src_ht, &iter, src, sr_node) cgn_source_gc_inspect(src); @@ -1000,37 +1366,47 @@ static void cgn_source_gc_walk(void) if (cgn_src_table_full && rte_atomic32_read(&cgn_src_used) < cgn_src_max) { - RTE_LOG(ERR, CGNAT, "SUBSCRIBER_TABLE_AVAILABLE count=%u/%u\n", + cgn_log_resource_subscriber_table( + CGN_RESOURCE_AVAILABLE, rte_atomic32_read(&cgn_src_used), cgn_src_max); cgn_src_table_full = false; } -} - -static void cgn_source_gc(struct rte_timer *timer __unused, void *arg __unused) -{ - /* Walk the source table */ - cgn_source_gc_walk(); - /* Restart timer if dataplane still running */ - if (running) - rte_timer_reset(&cgn_src_timer, + /* Restart timer if the dataplane is still running */ + if (running && timer) + rte_timer_reset(timer, CGN_SRC_GC_INTERVAL * rte_get_timer_hz(), SINGLE, rte_get_master_lcore(), cgn_source_gc, NULL); } /* - * Unit-test only. + * Called from unit-test and from cgn_source_uninit. */ void cgn_source_cleanup(void) { + uint i; + rte_timer_stop(&cgn_src_timer); - cgn_source_gc_walk(); - cgn_source_gc_walk(); /* SF_EXPIRED */ - cgn_source_gc_walk(); /* SF_DEAD */ + + for (i = 0; i <= CGN_SRC_GC_COUNT; i++) + /* Do not restart gc timer */ + cgn_source_gc(NULL, NULL); } +/* + * Called via hidden vplsh command. Used by unit-test and by dev testers. + */ +void cgn_source_gc_pass(void) +{ + rte_timer_stop(&cgn_src_timer); + cgn_source_gc(&cgn_src_timer, NULL); +} + +/* + * Called from DP_EVT_INIT event handler + */ void cgn_source_init(void) { if (cgn_src_ht) @@ -1043,23 +1419,23 @@ void cgn_source_init(void) rte_timer_init(&cgn_src_timer); rte_timer_reset(&cgn_src_timer, - (CGN_SRC_GC_INTERVAL + 5) * rte_get_timer_hz(), + CGN_SRC_GC_INTERVAL * rte_get_timer_hz(), SINGLE, rte_get_master_lcore(), cgn_source_gc, NULL); } - +/* + * Called from DP_EVT_UNINIT event handler + */ void cgn_source_uninit(void) { - uint i; - if (!cgn_src_ht) return; - rte_timer_stop(&cgn_src_timer); + /* Do three passes of the garbage collector */ + cgn_source_cleanup(); - for (i = 0; i <= CGN_SRC_GC_COUNT; i++) - cgn_source_gc_walk(); + assert(rte_atomic32_read(&cgn_src_used) == 0); dp_ht_destroy_deferred(cgn_src_ht); cgn_src_ht = NULL; diff --git a/src/npf/cgnat/cgn_source.h b/src/npf/cgnat/cgn_source.h index f8b534a4..e52ef1c9 100644 --- a/src/npf/cgnat/cgn_source.h +++ b/src/npf/cgnat/cgn_source.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -19,9 +19,13 @@ struct cgn_source; struct apm_port_block; /* - * Record sr_sess_created value for every interval of last 5 minutes. + * Per subscriber record of sessions created during each sample period for + * last 5 minutes. Sample period is 20 seconds. */ -#define CGN_SESS_RATE_CNTRS ((60*5)/CGN_SRC_GC_INTERVAL) +#define CGN_SESS_RATE_1MIN (60 / CGN_SRC_GC_INTERVAL) +#define CGN_SESS_RATE_5MIN (300 / CGN_SRC_GC_INTERVAL) +#define CGN_SESS_RATE_CNTRS CGN_SESS_RATE_5MIN + /* * cgnat source/subscriber address table entry. Hash of sr_addr and @@ -33,9 +37,17 @@ struct apm_port_block; * ports in that block have been released for all protocols. * * sr_mbpu_full is set true when a subscriber has reached max-blocks-per-user - * limit. It is used to gate log messages. Note thats its possible (and - * likely) that one protocol will cause max-blocks to be reached, and that - * this should not prevent allocations for other protocols. + * limit and cannot allocate another port-block. It is used to gate log + * messages. Note thats its possible (and likely) that one protocol will + * cause max-blocks to be reached, and that this should not prevent + * allocations for other protocols. + * + * When we allocate a port-block we allocate bits for all protocols, even + * though only one protocol may require a port. As such, the logging is + * somewhat unusual. We log a per-protocol resource constraint msg for the + * protocol that failed to get a mapping because of mbpu. However we only log + * a subsequent resource available message when a port-block is freed for all + * protocols. */ struct cgn_source { struct cds_lfht_node sr_node; /* hash table node */ @@ -48,7 +60,7 @@ struct cgn_source { struct apm_port_block *sr_active_block[NAT_PROTO_COUNT]; struct cds_list_head sr_block_list; uint16_t sr_block_count; /* blocks in sr_block_list */ - uint8_t sr_mbpu_full; /* mbpu reached */ + uint8_t sr_mbpu_full[NAT_PROTO_COUNT]; vrfid_t sr_vrfid; rte_spinlock_t sr_lock; @@ -61,25 +73,50 @@ struct cgn_source { uint64_t sr_bytes_in; uint64_t sr_pkts_in_tot; uint64_t sr_bytes_in_tot; + uint64_t sr_unk_pkts_in; + uint64_t sr_unk_pkts_in_tot; struct cgn_policy *sr_policy; /* Back ptr to policy */ - uint64_t sr_start_time; /* millisecs */ + uint64_t sr_start_time; /* unix epoch us */ - /* Sessions created and destroyed in current interval */ + /* 3-tuple sessions created and destroyed in current interval */ rte_atomic32_t sr_sess_created; rte_atomic32_t sr_sess_destroyed; - /* Total sessions created/destroyed since src start */ + /* 2-tuple sessions created and destroyed in current interval */ + rte_atomic32_t sr_sess2_created; + rte_atomic32_t sr_sess2_destroyed; + + /* Total 3-tuple sessions created/destroyed since src start */ uint64_t sr_sess_created_tot; uint64_t sr_sess_destroyed_tot; + /* Total 2-tuple sessions created/destroyed since src start */ + uint64_t sr_sess2_created_tot; + uint64_t sr_sess2_destroyed_tot; + /* - * sr_sess_rate is a record of the number of sessions created during - * last 'n' complete gc intervals. + * sr_sess_rate is a record (circular buffer) of the number of + * sessions created during last 'n' complete gc intervals. + * + * sr_sess_rate_index is the next location to be populated. The last + * recorded value will be: + * cgn_sess_rate_index_prev(src->sr_sess_rate_index) */ - uint8_t sr_sess_rate_cur; + uint8_t sr_sess_rate_index; uint32_t sr_sess_rate[CGN_SESS_RATE_CNTRS]; - uint32_t sr_sess_rate_max; - uint64_t sr_sess_rate_max_time; + + /* + * sr_sess_rate_max is not actually a 'rate'. It is the max number of + * sessions created in a 20 sec sample period. Similarly, + * sr_sess_rate_1m_max is the max number of sessions created in any + * consecutive three sample periods. These are converted to + * session-per-second rates when updating the policy or returning + * json. + */ + uint32_t sr_sess_rate_max; /* 20s max */ + uint64_t sr_sess_rate_max_time; /* 20s max time */ + uint32_t sr_sess_rate_1m_max; /* 1m max */ + uint64_t sr_sess_rate_1m_max_time; /* 1m max time */ uint64_t sr_map_reqs; uint64_t sr_map_fails; @@ -90,16 +127,21 @@ struct cgn_source { #define SF_EXPIRED 0x01 #define SF_DEAD 0x02 -int cgn_source_add_block(struct cgn_source *src, uint8_t proto, +int cgn_source_add_block(struct cgn_source *src, enum nat_proto proto, struct apm_port_block *pb, struct nat_pool *np); -int cgn_source_del_block(struct cgn_source *src, struct apm_port_block *pb, - struct nat_pool *np); +void cgn_source_del_block(struct cgn_source *src, struct apm_port_block *pb, + struct nat_pool *np); + +/* Threshold */ +void subscriber_table_threshold_set(int32_t threshold, uint32_t interval); struct cgn_source *cgn_source_get(struct cgn_source *src); void cgn_source_put(struct cgn_source *src); void cgn_source_stats_sess_created(struct cgn_source *src); void cgn_source_stats_sess_destroyed(struct cgn_source *src); +void cgn_source_stats_sess2_created(struct cgn_source *src); +void cgn_source_stats_sess2_destroyed(struct cgn_source *src); struct nat_pool *cgn_source_get_pool(struct cgn_source *src); /* @@ -114,16 +156,24 @@ struct cgn_source *cgn_source_find_and_lock(struct cgn_policy *cp, void cgn_source_update_stats(struct cgn_source *src, uint64_t pkts_out, uint64_t bytes_out, - uint64_t pkts_in, uint64_t bytes_in); + uint64_t pkts_in, uint64_t bytes_in, + uint64_t unk_pkts_in); /* Get subscriber hash table used and max counts */ int32_t cgn_source_get_used(void); +void cgn_source_set_max(int32_t val); int32_t cgn_source_get_max(void); +/* Inform source database that a new policy has been added */ +void cgn_source_policy_added(struct cgn_policy *cp); + +void cgn_source_clear_or_update(int argc, char **argv, bool clear); void cgn_source_show(FILE *f, int argc, char **argv); void cgn_source_list(FILE *f, int argc, char **argv); +/* Unit-test only */ void cgn_source_cleanup(void); +void cgn_source_gc_pass(void); void cgn_source_init(void); void cgn_source_uninit(void); diff --git a/src/npf/cgnat/cgn_test.c b/src/npf/cgnat/cgn_test.c new file mode 100644 index 00000000..3103607b --- /dev/null +++ b/src/npf/cgnat/cgn_test.c @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include "npf/apm/apm.h" +#include "npf/cgnat/cgn_session.h" +#include "npf/cgnat/cgn_source.h" +#include "npf/cgnat/cgn_test.h" + +/* + * Used by CGNAT unit-tests + */ + +void dp_test_npf_clear_cgnat(void) +{ + cgn_session_cleanup(); + apm_cleanup(); + cgn_source_cleanup(); +} diff --git a/src/npf/cgnat/cgn_test.h b/src/npf/cgnat/cgn_test.h new file mode 100644 index 00000000..85c8124c --- /dev/null +++ b/src/npf/cgnat/cgn_test.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef _CGN_TEST_H_ +#define _CGN_TEST_H_ + +#include +#include +#include + +#include "npf/cgnat/cgn.h" + +/* + * Used by CGNAT unit-tests only + */ + +struct ifnet; +struct rte_mbuf; + +void dp_test_npf_clear_cgnat(void); +bool ipv4_cgnat_test(struct rte_mbuf **mbufp, struct ifnet *ifp, + enum cgn_dir dir, int *error); +size_t cgn_session_size(void); +size_t cgn_sess2_size(void); + +#endif diff --git a/src/npf/config/gpc_acl_cli.c b/src/npf/config/gpc_acl_cli.c new file mode 100644 index 00000000..0727fc8b --- /dev/null +++ b/src/npf/config/gpc_acl_cli.c @@ -0,0 +1,310 @@ +/* + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#include +#include +#include +#include + +#include "compiler.h" +#include "vplane_log.h" +#include "if_var.h" +#include "json_writer.h" + +#include "npf/config/gpc_cntr_query.h" +#include "npf/config/gpc_db_query.h" +#include "npf/config/pmf_att_rlgrp.h" +#include "npf/config/pmf_rule.h" +#include "npf/config/gpc_acl_cli.h" +#include "npf/config/gpc_hw.h" + +/* Op-mode commands : dump internals */ + +void +gpc_acl_dump(FILE *fp) +{ + struct gpc_rlset *gprs; + + /* Rulesets */ + GPC_RLSET_FOREACH(gprs) { + bool rs_in = gpc_rlset_is_ingress(gprs); + struct ifnet *rs_ifp = gpc_rlset_get_ifp(gprs); + bool rs_if_created = gpc_rlset_is_if_created(gprs); + char const *ifname = gpc_rlset_get_ifname(gprs); + uint32_t if_index = rs_ifp ? rs_ifp->if_index : 0; + fprintf(fp, " RLS:%p: %s(%u)/%s%s%s\n", + gprs, ifname, if_index, + rs_in ? "In " : "Out", + rs_ifp ? " IFP" : "", + rs_if_created ? " IfCrt" : "" + ); + /* Groups - i.e. TABLES */ + struct gpc_group *gprg; + GPC_GROUP_FOREACH(gprg, gprs) { + void *attr_rule = NULL; + uint32_t num_rules = 0; + + if (gpc_group_get_feature(gprg) == GPC_FEAT_ACL) { + void *earg = gpc_group_get_owner(gprg); + attr_rule = pmf_arlg_earg_get_attr_rule(earg); + num_rules = pmf_arlg_earg_get_rule_count(earg); + } + + bool rg_published = gpc_group_is_published(gprg); + bool rg_attached = gpc_group_is_attached(gprg); + bool rg_deferred = gpc_group_is_deferred(gprg); + bool rg_attr_rl = !!attr_rule; + bool rg_family = gpc_group_has_family(gprg); + bool rg_v6 = gpc_group_is_v6(gprg); + bool rg_ll_create = gpc_group_is_ll_created(gprg); + bool rg_ll_attach = gpc_group_is_ll_attached(gprg); + fprintf(fp, + " GRP:%p(%lx): %s(%u/%x)%s%s%s%s%s%s%s\n", + gprg, gpc_group_get_objid(gprg), + gpc_group_get_name(gprg), + num_rules, + gpc_group_get_summary(gprg), + rg_published ? " Pub" : "", + rg_ll_create ? " LLcrt" : "", + rg_attached ? " Att" : "", + rg_ll_attach ? " LLatt" : "", + rg_deferred ? " Defr" : "", + rg_attr_rl ? " GAttr" : "", + rg_family ? rg_v6 ? " v6" : " v4" : "" + ); + struct gpc_cntg *cntg = gpc_group_get_cntg(gprg); + struct gpc_cntr *cntr; + GPC_CNTR_FOREACH(cntr, cntg) { + bool ct_published = gpc_cntr_is_published(cntr); + if (!ct_published) + continue; + bool ct_ll_create + = gpc_cntr_is_ll_created(cntr); + bool ct_cnt_packet = gpc_cntr_pkt_enabled(cntr); + bool ct_cnt_byte = gpc_cntr_byt_enabled(cntr); + fprintf(fp, " CT:%p(%lx): %s%s%s%s%s\n", + cntr, gpc_cntr_get_objid(cntr), + gpc_cntr_get_name(cntr), + ct_published ? " Pub" : "", + ct_ll_create ? " LLcrt" : "", + ct_cnt_packet ? " Pkt" : "", + ct_cnt_byte ? " Byte" : "" + ); + uint64_t val_pkt = -1; + uint64_t val_byt = -1; + gpc_hw_counter_read(cntr, &val_pkt, &val_byt); + fprintf(fp, " %s(%lu/%lx)) %s(%lu/%lx)\n", + ct_cnt_packet ? "Pkt" : "-", + (unsigned long)val_pkt, + (unsigned long)val_pkt, + ct_cnt_byte ? "Byte" : "-", + (unsigned long)val_byt, + (unsigned long)val_byt + ); + } + /* Rules - i.e. ENTRIES */ + struct gpc_rule *gprl; + GPC_RULE_FOREACH(gprl, gprg) { + bool rl_published = gpc_rule_is_published(gprl); + bool rl_ll_create + = gpc_rule_is_ll_created(gprl); + fprintf(fp, " RL:%p(%lx): %u(%x)%s%s\n", + gprl, gpc_rule_get_objid(gprl), + gpc_rule_get_index(gprl), + gpc_rule_get_rule(gprl)->pp_summary, + rl_published ? " Pub" : "", + rl_ll_create ? " LLcrt" : "" + ); + } + } + } +} + +/* Op-mode commands : show counters */ + +static void +gpc_acl_show_cntr_ruleset(json_writer_t *json, struct gpc_rlset *gprs) +{ + bool rs_in = gpc_rlset_is_ingress(gprs); + + jsonw_string_field(json, "interface", gpc_rlset_get_ifname(gprs)); + jsonw_string_field(json, "direction", rs_in ? "in" : "out"); +} + +static void +gpc_acl_show_hw_cntr(json_writer_t *json, struct gpc_cntr *cntr) +{ + if (!gpc_cntr_is_ll_created(cntr)) + return; + + bool ct_cnt_packet = gpc_cntr_pkt_enabled(cntr); + bool ct_cnt_byte = gpc_cntr_byt_enabled(cntr); + + uint64_t val_pkt = -1; + uint64_t val_byt = -1; + bool ok = gpc_hw_counter_read(cntr, &val_pkt, &val_byt); + if (!ok) + return; + + jsonw_name(json, "hw"); + jsonw_start_object(json); + + if (ct_cnt_packet) + jsonw_uint_field(json, "pkts", val_pkt); + if (ct_cnt_byte) + jsonw_uint_field(json, "bytes", val_byt); + + jsonw_end_object(json); +} + +static void +gpc_acl_show_cntr(json_writer_t *json, struct gpc_cntr *cntr) +{ + if (!gpc_cntr_is_published(cntr)) + return; + + bool ct_cnt_packet = gpc_cntr_pkt_enabled(cntr); + bool ct_cnt_byte = gpc_cntr_byt_enabled(cntr); + + jsonw_start_object(json); + + jsonw_string_field(json, "name", gpc_cntr_get_name(cntr)); + jsonw_bool_field(json, "cnt-pkts", ct_cnt_packet); + jsonw_bool_field(json, "cnt-bytes", ct_cnt_byte); + + gpc_acl_show_hw_cntr(json, cntr); + + jsonw_end_object(json); +} + +int +gpc_acl_cmd_show_counters(FILE *fp, char const *ifname, int dir, + char const *rgname) +{ + json_writer_t *json = jsonw_new(fp); + if (!json) { + RTE_LOG(ERR, DATAPLANE, "failed to create json stream\n"); + return -ENOMEM; + } + + /* Enforce filter heirarchy */ + if (!ifname) + dir = 0; + if (!dir) + rgname = NULL; + + jsonw_pretty(json, true); + + /* Rulesets */ + struct gpc_rlset *gprs; + jsonw_name(json, "rulesets"); + jsonw_start_array(json); + GPC_RLSET_FOREACH(gprs) { + /* Skip rulesets w/o an interface */ + if (!gpc_rlset_get_ifp(gprs)) + continue; + /* Filter on interface & direction */ + if (ifname && !streq(ifname, gpc_rlset_get_ifname(gprs))) + continue; + if (dir < 0 && !gpc_rlset_is_ingress(gprs)) + continue; + if (dir > 0 && gpc_rlset_is_ingress(gprs)) + continue; + + jsonw_start_object(json); + gpc_acl_show_cntr_ruleset(json, gprs); + + /* Groups - i.e. TABLES */ + struct gpc_group *gprg; + jsonw_name(json, "groups"); + jsonw_start_array(json); + GPC_GROUP_FOREACH(gprg, gprs) { + if (gpc_group_get_feature(gprg) != GPC_FEAT_ACL) + continue; + + /* Filter on group name */ + if (rgname && !streq(rgname, gpc_group_get_name(gprg))) + continue; + + jsonw_start_object(json); + + jsonw_string_field(json, "name", + gpc_group_get_name(gprg)); + + struct gpc_cntg *cntg = gpc_group_get_cntg(gprg); + + struct gpc_cntr *cntr; + jsonw_name(json, "counters"); + jsonw_start_array(json); + GPC_CNTR_FOREACH(cntr, cntg) + gpc_acl_show_cntr(json, cntr); + jsonw_end_array(json); + + jsonw_end_object(json); + } + jsonw_end_array(json); + + jsonw_end_object(json); + } + jsonw_end_array(json); + + jsonw_destroy(&json); + + return 0; +} + +/* Op-mode commands : clear counters */ + +int +gpc_acl_cmd_clear_counters(char const *ifname, int dir, char const *rgname) +{ + int rc = 0; /* Success */ + + /* Enforce filter heirarchy */ + if (!ifname) + dir = 0; + if (!dir) + rgname = NULL; + + /* Rulesets */ + struct gpc_rlset *gprs; + GPC_RLSET_FOREACH(gprs) { + /* Skip rulesets w/o an interface */ + if (!gpc_rlset_get_ifp(gprs)) + continue; + /* Filter on interface & direction */ + if (ifname && !streq(ifname, gpc_rlset_get_ifname(gprs))) + continue; + if (dir < 0 && !gpc_rlset_is_ingress(gprs)) + continue; + if (dir > 0 && gpc_rlset_is_ingress(gprs)) + continue; + + /* Groups - i.e. TABLES */ + struct gpc_group *gprg; + GPC_GROUP_FOREACH(gprg, gprs) { + if (gpc_group_get_feature(gprg) != GPC_FEAT_ACL) + continue; + + /* Filter on group name */ + if (rgname && !streq(rgname, gpc_group_get_name(gprg))) + continue; + + struct gpc_cntg *cntg = gpc_group_get_cntg(gprg); + if (!cntg) + continue; + + struct gpc_cntr *cntr; + GPC_CNTR_FOREACH(cntr, cntg) { + if (!gpc_cntr_is_published(cntr)) + continue; + if (!gpc_hw_counter_clear(cntr)) + rc = -EIO; + } + } + } + + return rc; +} diff --git a/src/npf/config/gpc_acl_cli.h b/src/npf/config/gpc_acl_cli.h new file mode 100644 index 00000000..0c64b215 --- /dev/null +++ b/src/npf/config/gpc_acl_cli.h @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2020-2021 AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef GPC_ACL_CLI_H +#define GPC_ACL_CLI_H + +#include + +void gpc_acl_dump(FILE *fp); +int gpc_acl_cmd_show_counters(FILE *fp, char const *ifname, int dir, + char const *rgname); +int gpc_acl_cmd_clear_counters(char const *ifname, int dir, + char const *rgname); + +#endif /* GPC_ACL_CLI_H */ diff --git a/src/npf/config/gpc_cntr.c b/src/npf/config/gpc_cntr.c new file mode 100644 index 00000000..2e859309 --- /dev/null +++ b/src/npf/config/gpc_cntr.c @@ -0,0 +1,505 @@ +/* + * Copyright (c) 2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#include + +#include /* TAILQ macros */ +#include + +#include "compiler.h" +#include "vplane_log.h" + +#include "npf/config/gpc_cntr_query.h" +#include "npf/config/gpc_cntr_control.h" +#include "npf/config/gpc_db_query.h" +#include "npf/config/gpc_hw.h" + +/* -- counter group -- */ + +enum gpc_cntg_flags { + GPC_CNTG_TYPE_NAMED = (1 << 0), + GPC_CNTG_WHAT_PCKT = (1 << 1), + GPC_CNTG_WHAT_L3BY = (1 << 2), + GPC_CNTG_SHR_IF = (1 << 3), +}; + +struct gpc_cntg { + TAILQ_ENTRY(gpc_cntg) cntg_list; + struct gpc_group *cntg_gprg; + TAILQ_HEAD(gpc_cnqh, gpc_cntr) cntg_cntrs; + uint32_t cntg_flags; + uint16_t cntg_refcount; +}; + +/* -- counter -- */ + +#define CNTR_NAME_LEN 8 + +enum gpc_cntr_flags { + GPC_CNTF_CNT_PACKET = (1 << 0), + GPC_CNTF_CNT_BYTE = (1 << 1), + GPC_CNTF_PUBLISHED = (1 << 2), + GPC_CNTF_LL_CREATED = (1 << 3), +}; + +struct gpc_cntr { + TAILQ_ENTRY(gpc_cntr) cntr_list; + struct gpc_cntg *cntr_cntg; + char cntr_name[CNTR_NAME_LEN]; + uintptr_t cntr_objid; /* FAL object */ + uint16_t cntr_flags; + uint16_t cntr_refcount; +}; + +/* -- locals -- */ + +static TAILQ_HEAD(, gpc_cntg) cntr_groups[GPC_FEAT__MAX] = { + [GPC_FEAT_ACL] = TAILQ_HEAD_INITIALIZER(cntr_groups[GPC_FEAT_ACL]), + [GPC_FEAT_QOS] = TAILQ_HEAD_INITIALIZER(cntr_groups[GPC_FEAT_QOS]), +}; + +/* -- counter group accessors -- */ + +enum gpc_cntr_type +gpc_cntg_type(struct gpc_cntg const *cntg) +{ + if (cntg->cntg_flags & GPC_CNTG_TYPE_NAMED) + return GPC_CNTT_NAMED; + else // NOLINT: silence clang-tidy + return GPC_CNTT_NUMBERED; +} + +enum gpc_cntr_what +gpc_cntg_what(struct gpc_cntg const *cntg) +{ + enum gpc_cntr_what what = 0; + + if (cntg->cntg_flags & GPC_CNTG_WHAT_PCKT) + what |= GPC_CNTW_PACKET; + if (cntg->cntg_flags & GPC_CNTG_WHAT_L3BY) + what |= GPC_CNTW_L3BYTE; + + return what; +} + +enum gpc_cntr_share +gpc_cntg_share(struct gpc_cntg const *cntg) +{ + enum gpc_cntr_share share = 0; + + if (cntg->cntg_flags & GPC_CNTG_SHR_IF) + share = GPC_CNTS_INTERFACE; + + return share; +} + +struct gpc_group * +gpc_cntg_get_group(struct gpc_cntg const *cntg) +{ + return cntg->cntg_gprg; +} + +/* -- counter group DB refcount -- */ + +static void gpc_cntg_delete(struct gpc_cntg *cntg); + +void gpc_cntg_retain(struct gpc_cntg *cntg) +{ + /* Should be impossible */ + if (cntg->cntg_refcount == UINT16_MAX) + return; + ++cntg->cntg_refcount; +} + +void gpc_cntg_release(struct gpc_cntg *cntg) +{ + /* Should be impossible */ + if (!cntg->cntg_refcount) + return; + + if (!--cntg->cntg_refcount) + gpc_cntg_delete(cntg); +} + +/* -- counter group DB walk -- */ + +struct gpc_cntg * +gpc_cntg_first(enum gpc_feature feat) +{ + if (!gpc_feature_is_valid(feat)) + return NULL; + + return TAILQ_FIRST(&cntr_groups[feat]); +} + +struct gpc_cntg * +gpc_cntg_next(struct gpc_cntg const *cursor) +{ + return TAILQ_NEXT(cursor, cntg_list); +} + +/* -- counter group DB manipulation -- */ + +struct gpc_cntg * +gpc_cntg_create(struct gpc_group *gprg, enum gpc_cntr_type type, + enum gpc_cntr_what what, enum gpc_cntr_share share) +{ + struct gpc_cntg *cntg = calloc(1, sizeof(*cntg)); + if (!cntg) + return NULL; + + cntg->cntg_gprg = gprg; + TAILQ_INIT(&cntg->cntg_cntrs); + cntg->cntg_flags = 0; + cntg->cntg_refcount = 1; + + /* Record the type */ + switch (type) { + case GPC_CNTT_NAMED: + cntg->cntg_flags |= GPC_CNTG_TYPE_NAMED; + break; + case GPC_CNTT_NUMBERED: + break; + default: + goto error; + } + + /* Record the sharing */ + switch (share) { + case GPC_CNTS_INTERFACE: + cntg->cntg_flags |= GPC_CNTG_SHR_IF; + break; + default: + goto error; + } + + /* Record what to count */ + if (!what) + goto error; + if (what & GPC_CNTW_PACKET) { + cntg->cntg_flags |= GPC_CNTG_WHAT_PCKT; + what &= ~GPC_CNTW_PACKET; + } + if (what & GPC_CNTW_L3BYTE) { + cntg->cntg_flags |= GPC_CNTG_WHAT_L3BY; + what &= ~GPC_CNTW_L3BYTE; + } + if (what) + goto error; + + /* Identify the feature list to insert in to */ + enum gpc_feature feat = gpc_group_get_feature(gprg); + if (!gpc_feature_is_valid(feat)) + goto error; + + TAILQ_INSERT_TAIL(&cntr_groups[feat], cntg, cntg_list); + + return cntg; + +error: + free(cntg); + return NULL; +} + +static void +gpc_cntg_delete(struct gpc_cntg *cntg) +{ + enum gpc_feature feat = gpc_group_get_feature(cntg->cntg_gprg); + + TAILQ_REMOVE(&cntr_groups[feat], cntg, cntg_list); + free(cntg); +} + +/* + * For a counter group, notify creation or deletion of all counters. + * + * These are used for deferred notifications based upon the + * change in the group status. + */ +void +gpc_cntg_hw_ntfy_cntrs_create(struct gpc_cntg *cntg) +{ + struct gpc_group *gprg = cntg->cntg_gprg; + + if (!gpc_group_is_published(gprg)) + return; + + struct gpc_cntr *cntr; + TAILQ_FOREACH(cntr, &cntg->cntg_cntrs, cntr_list) + gpc_cntr_hw_ntfy_create(cntg, cntr); +} + +void +gpc_cntg_hw_ntfy_cntrs_delete(struct gpc_cntg *cntg) +{ + struct gpc_group *gprg = cntg->cntg_gprg; + + if (!gpc_group_is_published(gprg)) + return; + + struct gpc_cntr *cntr; + TAILQ_FOREACH(cntr, &cntg->cntg_cntrs, cntr_list) + gpc_cntr_hw_ntfy_delete(cntg, cntr); +} + +/* -- counter accessors -- */ + +struct gpc_cntg * +gpc_cntr_get_cntg(struct gpc_cntr const *cntr) +{ + return cntr->cntr_cntg; +} + +char const * +gpc_cntr_get_name(struct gpc_cntr const *cntr) +{ + return cntr->cntr_name; +} + +bool +gpc_cntr_pkt_enabled(struct gpc_cntr const *cntr) +{ + enum gpc_cntr_what what = gpc_cntg_what(cntr->cntr_cntg); + + return (what & GPC_CNTW_PACKET); +} + +bool +gpc_cntr_byt_enabled(struct gpc_cntr const *cntr) +{ + enum gpc_cntr_what what = gpc_cntg_what(cntr->cntr_cntg); + + return (what & GPC_CNTW_L3BYTE); +} + +uintptr_t +gpc_cntr_get_objid(struct gpc_cntr const *cntr) +{ + if (!cntr) + return 0; + + return cntr->cntr_objid; +} + +bool +gpc_cntr_is_published(struct gpc_cntr const *cntr) +{ + return cntr->cntr_flags & GPC_CNTF_PUBLISHED; +} + +bool +gpc_cntr_is_ll_created(struct gpc_cntr const *cntr) +{ + return cntr->cntr_flags & GPC_CNTF_LL_CREATED; +} + +/* -- counter manipulators -- */ + +void +gpc_cntr_set_objid(struct gpc_cntr *cntr, uintptr_t objid) +{ + cntr->cntr_objid = objid; +} + +static void +gpc_cntr_set_published(struct gpc_cntr *cntr) +{ + cntr->cntr_flags |= GPC_CNTF_PUBLISHED; +} + +static void +gpc_cntr_clear_published(struct gpc_cntr *cntr) +{ + cntr->cntr_flags &= ~GPC_CNTF_PUBLISHED; +} + +static void +gpc_cntr_set_ll_created(struct gpc_cntr *cntr) +{ + cntr->cntr_flags |= GPC_CNTF_LL_CREATED; +} + +static void +gpc_cntr_clear_ll_created(struct gpc_cntr *cntr) +{ + cntr->cntr_flags &= ~GPC_CNTF_LL_CREATED; +} + +/* -- counter DB walk -- */ + +struct gpc_cntr * +gpc_cntr_first(struct gpc_cntg const *cntg) +{ + return cntg ? TAILQ_FIRST(&cntg->cntg_cntrs) : NULL; +} + +struct gpc_cntr * +gpc_cntr_last(struct gpc_cntg const *cntg) +{ + return cntg ? TAILQ_LAST(&cntg->cntg_cntrs, gpc_cnqh) : NULL; +} + +struct gpc_cntr * +gpc_cntr_next(struct gpc_cntr const *cursor) +{ + return TAILQ_NEXT(cursor, cntr_list); +} + +/* -- counter DB refcount -- */ + +static void gpc_cntr_delete(struct gpc_cntr *cntr); + +void gpc_cntr_retain(struct gpc_cntr *cntr) +{ + /* Should be impossible */ + if (cntr->cntr_refcount == UINT16_MAX) + return; + ++cntr->cntr_refcount; +} + +void gpc_cntr_release(struct gpc_cntr *cntr) +{ + /* Should be impossible */ + if (!cntr->cntr_refcount) + return; + + if (!--cntr->cntr_refcount) + gpc_cntr_delete(cntr); +} + +/* -- counter DB lookup -- */ + +static struct gpc_cntr * +gpc_cntr_find(struct gpc_cntg *cntg, char const *name) +{ + struct gpc_cntr *cursor; + + TAILQ_FOREACH(cursor, &cntg->cntg_cntrs, cntr_list) + if (strcmp(name, cursor->cntr_name) == 0) + return cursor; + + return NULL; +} + +struct gpc_cntr * +gpc_cntr_find_and_retain(struct gpc_cntg *cntg, char const *name) +{ + struct gpc_cntr *cntr = gpc_cntr_find(cntg, name); + + if (cntr) + gpc_cntr_retain(cntr); + + return cntr; +} + +/* -- counter DB manipulation -- */ + +static struct gpc_cntr * +gpc_cntr_create(struct gpc_cntg *cntg, char const *name) +{ + struct gpc_cntr *cntr = calloc(1, sizeof(*cntr)); + if (!cntr) + return NULL; + + if (strlen(name) >= sizeof(cntr->cntr_name)) + goto error; + strcpy(cntr->cntr_name, name); + + TAILQ_INSERT_TAIL(&cntg->cntg_cntrs, cntr, cntr_list); + gpc_cntg_retain(cntg); + + cntr->cntr_cntg = cntg; + cntr->cntr_objid = 0; + cntr->cntr_flags = 0; + cntr->cntr_refcount = 1; + + /* Cache these for easy access */ + enum gpc_cntr_what what = gpc_cntg_what(cntg); + if (what & GPC_CNTW_PACKET) + cntr->cntr_flags |= GPC_CNTF_CNT_PACKET; + if (what & GPC_CNTW_L3BYTE) + cntr->cntr_flags |= GPC_CNTF_CNT_BYTE; + + return cntr; + +error: + free(cntr); + return NULL; +} + +static void +gpc_cntr_delete(struct gpc_cntr *cntr) +{ + struct gpc_cntg *cntg = cntr->cntr_cntg; + + gpc_cntr_hw_ntfy_delete(cntg, cntr); + + TAILQ_REMOVE(&cntg->cntg_cntrs, cntr, cntr_list); + gpc_cntg_release(cntg); + cntr->cntr_cntg = NULL; + free(cntr); +} + +struct gpc_cntr * +gpc_cntr_create_named(struct gpc_cntg *cntg, char const *name) +{ + struct gpc_cntr *cntr = gpc_cntr_create(cntg, name); + + return cntr; +} + +struct gpc_cntr * +gpc_cntr_create_numbered(struct gpc_cntg *cntg, uint16_t number) +{ + char cntr_name[CNTR_NAME_LEN]; + + snprintf(cntr_name, sizeof(cntr_name), "%u", number); + + /* This check can probably be removed */ + if (gpc_cntr_find(cntg, cntr_name)) { + RTE_LOG(ERR, FIREWALL, + "Error: Attempt to alloc numbered counter that already exists (%u)\n", + number); + return NULL; + } + + struct gpc_cntr *cntr = gpc_cntr_create(cntg, cntr_name); + + return cntr; +} + +/* -- counter hardware notify -- */ + +void +gpc_cntr_hw_ntfy_create(struct gpc_cntg *cntg, struct gpc_cntr *cntr) +{ + struct gpc_group *gprg = cntg->cntg_gprg; + + if (!gpc_group_is_published(gprg)) + return; + if (gpc_cntr_is_published(cntr)) + return; + + if (gpc_hw_counter_create(cntr)) + gpc_cntr_set_ll_created(cntr); + + gpc_cntr_set_published(cntr); +} + +void +gpc_cntr_hw_ntfy_delete(struct gpc_cntg *cntg, struct gpc_cntr *cntr) +{ + struct gpc_group *gprg = cntg->cntg_gprg; + + if (!gpc_group_is_published(gprg)) + return; + if (!gpc_cntr_is_published(cntr)) + return; + + gpc_hw_counter_delete(cntr); + + gpc_cntr_clear_ll_created(cntr); + gpc_cntr_clear_published(cntr); +} diff --git a/src/npf/config/gpc_cntr_control.h b/src/npf/config/gpc_cntr_control.h new file mode 100644 index 00000000..8ce2e018 --- /dev/null +++ b/src/npf/config/gpc_cntr_control.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef GPC_CNTR_CONTROL_H +#define GPC_CNTR_CONTROL_H + +#include +#include + +struct gpc_group; +struct gpc_cntg; +struct gpc_cntr; +enum gpc_cntr_type; +enum gpc_cntr_what; +enum gpc_cntr_share; + +/* -- counter group -- */ + +struct gpc_cntg *gpc_cntg_create(struct gpc_group *gprg, + enum gpc_cntr_type type, + enum gpc_cntr_what what, + enum gpc_cntr_share share); +void gpc_cntg_retain(struct gpc_cntg *cntg); +void gpc_cntg_release(struct gpc_cntg *cntg); + +void gpc_cntg_hw_ntfy_cntrs_create(struct gpc_cntg *cntg); +void gpc_cntg_hw_ntfy_cntrs_delete(struct gpc_cntg *cntg); + +/* -- counter -- */ + +struct gpc_cntr *gpc_cntr_create_named(struct gpc_cntg *cntg, + char const *name); +struct gpc_cntr *gpc_cntr_create_numbered(struct gpc_cntg *cntg, + uint16_t number); +struct gpc_cntr *gpc_cntr_find_and_retain(struct gpc_cntg *cntg, + char const *name); +void gpc_cntr_retain(struct gpc_cntr *cntr); +void gpc_cntr_release(struct gpc_cntr *cntr); + +void gpc_cntr_hw_ntfy_create(struct gpc_cntg *cntg, struct gpc_cntr *cntr); +void gpc_cntr_hw_ntfy_delete(struct gpc_cntg *cntg, struct gpc_cntr *cntr); + +#endif /* GPC_CNTR_CONTROL_H */ diff --git a/src/npf/config/gpc_cntr_query.h b/src/npf/config/gpc_cntr_query.h new file mode 100644 index 00000000..87c18bda --- /dev/null +++ b/src/npf/config/gpc_cntr_query.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef GPC_CNTR_QUERY_H +#define GPC_CNTR_QUERY_H + +#include +#include + +struct gpc_cntg; +struct gpc_cntr; +enum gpc_feature; + +/* + * Type of counters in the counter group, either named or numbered. + * Numbered can be created on demand, named have to be explicitly created. + */ +enum gpc_cntr_type { + GPC_CNTT_NAMED = 1, + GPC_CNTT_NUMBERED, +}; + +/* + * What to count, a bitmask. + * Appropriate bits set for each possibility. + */ +enum gpc_cntr_what { + GPC_CNTW_PACKET = (1 << 0), + GPC_CNTW_L3BYTE = (1 << 1), +}; + +/* + * How counters are shared. + * Only 'per-interface' for the moment. + */ +enum gpc_cntr_share { + GPC_CNTS_INTERFACE = 1, +}; + +/* -- counter group accessors -- */ + +enum gpc_cntr_type gpc_cntg_type(struct gpc_cntg const *cntg); +enum gpc_cntr_what gpc_cntg_what(struct gpc_cntg const *cntg); +enum gpc_cntr_share gpc_cntg_share(struct gpc_cntg const *cntg); + +struct gpc_group *gpc_cntg_get_group(struct gpc_cntg const *cntg); + +struct gpc_cntg *gpc_cntg_first(enum gpc_feature feat); +struct gpc_cntg *gpc_cntg_next(struct gpc_cntg const *cursor); + +#define GPC_CNTR_GROUP_FOREACH(feat, var) \ + for ((var) = gpc_cntg_first((feat)); \ + (var); \ + (var) = gpc_cntg_next((var))) + +/* -- counter accessors -- */ + +struct gpc_cntg *gpc_cntr_get_cntg(struct gpc_cntr const *cntr); +char const *gpc_cntr_get_name(struct gpc_cntr const *cntr); +bool gpc_cntr_pkt_enabled(struct gpc_cntr const *cntr); +bool gpc_cntr_byt_enabled(struct gpc_cntr const *cntr); + +bool gpc_cntr_is_published(struct gpc_cntr const *cntr); +bool gpc_cntr_is_ll_created(struct gpc_cntr const *cntr); + +uintptr_t gpc_cntr_get_objid(struct gpc_cntr const *cntr); +void gpc_cntr_set_objid(struct gpc_cntr *cntr, uintptr_t objid); + +struct gpc_cntr *gpc_cntr_first(struct gpc_cntg const *cntg); +struct gpc_cntr *gpc_cntr_last(struct gpc_cntg const *cntg); +struct gpc_cntr *gpc_cntr_next(struct gpc_cntr const *cursor); + +#define GPC_CNTR_FOREACH(var, head) \ + for ((var) = gpc_cntr_first((head)); \ + (var); \ + (var) = gpc_cntr_next((var))) + +#endif /* GPC_CNTR_QUERY_H */ diff --git a/src/npf/config/gpc_db.c b/src/npf/config/gpc_db.c new file mode 100644 index 00000000..92495104 --- /dev/null +++ b/src/npf/config/gpc_db.c @@ -0,0 +1,853 @@ +/* + * Copyright (c) 2020-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#include /* TAILQ macros */ +#include + +#include "compiler.h" +#include "vplane_log.h" +#include "if_var.h" + +#include "npf/config/gpc_cntr_query.h" +#include "npf/config/gpc_db_control.h" +#include "npf/config/gpc_db_query.h" +#include "npf/config/pmf_rule.h" +#include "npf/config/gpc_hw.h" + +/* -- ruleset -- */ + +enum gpc_rs_flags { + GPC_RSF_IN = (1 << 0), + GPC_RSF_IF_CREATED = (1 << 1), +}; + +struct gpc_rlset { + TAILQ_ENTRY(gpc_rlset) gprs_list; + TAILQ_HEAD(, gpc_group) gprs_groups; + void *gprs_owner; /* weak */ + char const *gprs_ifname; /* weak */ + struct ifnet *gprs_ifp; /* weak */ + uint32_t gprs_flags; +}; + + +/* -- group -- */ + +enum gpc_rg_flags { + GPC_RGF_PUBLISHED = (1 << 0), + GPC_RGF_LL_CREATED = (1 << 1), + GPC_RGF_ATTACHED = (1 << 2), + GPC_RGF_LL_ATTACHED = (1 << 3), + GPC_RGF_FAMILY = (1 << 4), + GPC_RGF_V6 = (1 << 5), + GPC_RGF_DEFERRED = (1 << 6), + +}; + +struct gpc_group { + TAILQ_ENTRY(gpc_group) gprg_list; + TAILQ_HEAD(gpc_rlqh, gpc_rule) gprg_rules; + void *gprg_owner; /* weak */ + struct gpc_rlset *gprg_rlset; + struct gpc_cntg *gprg_cntg; /* strong */ + enum gpc_feature gprg_feature; + char const *gprg_rgname; /* weak */ + uintptr_t gprg_objid; /* FAL object */ + uint32_t gprg_summary; + uint32_t gprg_flags; +}; + + +/* -- rule -- */ + +enum gpc_rl_flags { + GPC_RLF_PUBLISHED = (1 << 0), + GPC_RLF_LL_CREATED = (1 << 1), +}; + +struct gpc_rule { + TAILQ_ENTRY(gpc_rule) gprl_list; + void *gprl_owner; /* weak */ + struct gpc_group *gprl_group; + struct gpc_cntr *gprl_cntr; /* strong */ + struct pmf_rule *gprl_rule; + uintptr_t gprl_objid; /* FAL object */ + uint16_t gprl_index; + uint16_t gprl_flags; +}; + +/* -- locals -- */ + +static TAILQ_HEAD(, gpc_rlset) att_rlsets + = TAILQ_HEAD_INITIALIZER(att_rlsets); + +/* -- feature utility -- */ + +char const * +gpc_feature_get_name(enum gpc_feature feat) +{ + if (!gpc_feature_is_valid(feat)) + goto error; + + switch (feat) { + case GPC_FEAT_ACL: + return "ACL"; + case GPC_FEAT_QOS: + return "QOS"; + } + +error: + return "Error"; +} + +/* -- ruleset accessors -- */ + +char const * +gpc_rlset_get_ifname(struct gpc_rlset const *gprs) +{ + return gprs->gprs_ifname; +} + +struct ifnet * +gpc_rlset_get_ifp(struct gpc_rlset const *gprs) +{ + return gprs->gprs_ifp; +} + +void * +gpc_rlset_get_owner(struct gpc_rlset const *gprs) +{ + return gprs->gprs_owner; +} + +bool +gpc_rlset_is_ingress(struct gpc_rlset const *gprs) +{ + return gprs->gprs_flags & GPC_RSF_IN; +} + +bool +gpc_rlset_is_if_created(struct gpc_rlset const *gprs) +{ + return gprs->gprs_flags & GPC_RSF_IF_CREATED; +} + +/* -- ruleset manipulators -- */ + +void +gpc_rlset_set_if_created(struct gpc_rlset *gprs) +{ + gprs->gprs_flags |= GPC_RSF_IF_CREATED; +} + +static void +gpc_rlset_clear_if_created(struct gpc_rlset *gprs) +{ + gprs->gprs_flags &= ~GPC_RSF_IF_CREATED; +} + +bool +gpc_rlset_set_ifp(struct gpc_rlset *gprs) +{ + struct ifnet *iface = dp_ifnet_byifname(gprs->gprs_ifname); + if (!iface) + return false; + + gprs->gprs_ifp = iface; + if (iface->fal_l3) + gpc_rlset_set_if_created(gprs); + + return true; +} + +void +gpc_rlset_clear_ifp(struct gpc_rlset *gprs) +{ + gprs->gprs_ifp = NULL; + gpc_rlset_clear_if_created(gprs); +} + +/* -- ruleset DB walk -- */ + +struct gpc_rlset * +gpc_rlset_first(void) +{ + return TAILQ_FIRST(&att_rlsets); +} + +struct gpc_rlset * +gpc_rlset_next(struct gpc_rlset const *cursor) +{ + return TAILQ_NEXT(cursor, gprs_list); +} + +/* -- ruleset DB manipulation -- */ + +void +gpc_rlset_delete(struct gpc_rlset *gprs) +{ + TAILQ_REMOVE(&att_rlsets, gprs, gprs_list); + gprs->gprs_owner = NULL; + free(gprs); +} + +struct gpc_rlset * +gpc_rlset_create(bool ingress, char const *if_name, void *owner) +{ + struct gpc_rlset *gprs = calloc(1, sizeof(*gprs)); + if (!gprs) + return NULL; + + gprs->gprs_flags = ingress ? GPC_RSF_IN : 0; + gprs->gprs_ifname = if_name; + gprs->gprs_ifp = NULL; + gprs->gprs_owner = owner; + TAILQ_INIT(&gprs->gprs_groups); + + gpc_rlset_set_ifp(gprs); + + TAILQ_INSERT_TAIL(&att_rlsets, gprs, gprs_list); + + return gprs; +} + +/* -- group accessors -- */ + +char const * +gpc_group_get_name(struct gpc_group const *gprg) +{ + return gprg->gprg_rgname; +} + +struct gpc_rlset * +gpc_group_get_rlset(struct gpc_group const *gprg) +{ + return gprg->gprg_rlset; +} + +void * +gpc_group_get_owner(struct gpc_group const *gprg) +{ + return gprg->gprg_owner; +} + +enum gpc_feature +gpc_group_get_feature(struct gpc_group const *gprg) +{ + return gprg->gprg_feature; +} + +uint32_t +gpc_group_get_summary(struct gpc_group const *gprg) +{ + return gprg->gprg_summary; +} + +bool +gpc_group_has_family(struct gpc_group const *gprg) +{ + return gprg->gprg_flags & GPC_RGF_FAMILY; +} + +bool +gpc_group_is_v6(struct gpc_group const *gprg) +{ + return gprg->gprg_flags & GPC_RGF_V6; +} + +bool +gpc_group_is_ingress(struct gpc_group const *gprg) +{ + return gpc_rlset_is_ingress(gprg->gprg_rlset); +} + +bool +gpc_group_is_published(struct gpc_group const *gprg) +{ + return gprg->gprg_flags & GPC_RGF_PUBLISHED; +} + +bool +gpc_group_is_ll_created(struct gpc_group const *gprg) +{ + return gprg->gprg_flags & GPC_RGF_LL_CREATED; +} + +bool +gpc_group_is_attached(struct gpc_group const *gprg) +{ + return gprg->gprg_flags & GPC_RGF_ATTACHED; +} + +bool +gpc_group_is_ll_attached(struct gpc_group const *gprg) +{ + return gprg->gprg_flags & GPC_RGF_LL_ATTACHED; +} + +bool +gpc_group_is_deferred(struct gpc_group const *gprg) +{ + return gprg->gprg_flags & GPC_RGF_DEFERRED; +} + +uintptr_t +gpc_group_get_objid(struct gpc_group const *gprg) +{ + return gprg->gprg_objid; +} + +struct gpc_cntg * +gpc_group_get_cntg(struct gpc_group const *gprg) +{ + return gprg->gprg_cntg; +} + +/* -- group manipulators -- */ + +void +gpc_group_clear_family(struct gpc_group *gprg) +{ + gprg->gprg_flags &= ~(GPC_RGF_FAMILY|GPC_RGF_V6); +} + +void +gpc_group_set_v4(struct gpc_group *gprg) +{ + gprg->gprg_flags |= GPC_RGF_FAMILY; + gprg->gprg_flags &= ~GPC_RGF_V6; +} + +void +gpc_group_set_v6(struct gpc_group *gprg) +{ + gprg->gprg_flags |= (GPC_RGF_FAMILY|GPC_RGF_V6); +} + +static void +gpc_group_set_published(struct gpc_group *gprg) +{ + gprg->gprg_flags |= GPC_RGF_PUBLISHED; +} + +static void +gpc_group_clear_published(struct gpc_group *gprg) +{ + gprg->gprg_flags &= ~GPC_RGF_PUBLISHED; +} + +static void +gpc_group_set_ll_created(struct gpc_group *gprg) +{ + gprg->gprg_flags |= GPC_RGF_LL_CREATED; +} + +static void +gpc_group_clear_ll_created(struct gpc_group *gprg) +{ + gprg->gprg_flags &= ~GPC_RGF_LL_CREATED; +} + +static void +gpc_group_set_attached(struct gpc_group *gprg) +{ + gprg->gprg_flags |= GPC_RGF_ATTACHED; +} + +static void +gpc_group_clear_attached(struct gpc_group *gprg) +{ + gprg->gprg_flags &= ~GPC_RGF_ATTACHED; +} + +static void +gpc_group_set_ll_attached(struct gpc_group *gprg) +{ + gprg->gprg_flags |= GPC_RGF_LL_ATTACHED; +} + +static void +gpc_group_clear_ll_attached(struct gpc_group *gprg) +{ + gprg->gprg_flags &= ~GPC_RGF_LL_ATTACHED; +} + +void +gpc_group_set_deferred(struct gpc_group *gprg) +{ + gprg->gprg_flags |= GPC_RGF_DEFERRED; +} + +void +gpc_group_clear_deferred(struct gpc_group *gprg) +{ + gprg->gprg_flags &= ~GPC_RGF_DEFERRED; +} + +void +gpc_group_set_objid(struct gpc_group *gprg, uintptr_t objid) +{ + gprg->gprg_objid = objid; +} + +void +gpc_group_set_cntg(struct gpc_group *gprg, struct gpc_cntg *cntg) +{ + gprg->gprg_cntg = cntg; +} + +/* -- group DB misc -- */ + +/* + * Recalculate this after a difficult change, generally + * a rule deletion, or rule change. + */ +uint32_t +gpc_group_recalc_summary(struct gpc_group *gprg, struct pmf_rule *rule) +{ + uint32_t group_summary = 0; + +#define RLATTR_SUMMARY_MASK (PMF_RMS_IP_FAMILY|PMF_RAS_COUNT_DEF| \ + PMF_SUMMARY_COUNT_DEF_NAMED_FLAGS) + if (rule) + group_summary |= rule->pp_summary & RLATTR_SUMMARY_MASK; + + struct gpc_rule *gprl; + TAILQ_FOREACH(gprl, &gprg->gprg_rules, gprl_list) + group_summary |= gprl->gprl_rule->pp_summary; + + return group_summary; +} + +/* -- group DB walk -- */ + +struct gpc_group * +gpc_group_first(struct gpc_rlset const *gprs) +{ + return TAILQ_FIRST(&gprs->gprs_groups); +} + +struct gpc_group * +gpc_group_next(struct gpc_group const *cursor) +{ + return TAILQ_NEXT(cursor, gprg_list); +} + +/* -- group DB manipulation -- */ + +void +gpc_group_delete(struct gpc_group *gprg) +{ + struct gpc_rlset *gprs = gprg->gprg_rlset; + + TAILQ_REMOVE(&gprs->gprs_groups, gprg, gprg_list); + gprg->gprg_owner = NULL; + free(gprg); +} + +struct gpc_group * +gpc_group_create(struct gpc_rlset *gprs, enum gpc_feature feat, + char const *rg_name, void *owner) +{ + struct gpc_group *gprg = calloc(1, sizeof(*gprg)); + if (!gprg) + return NULL; + + gprg->gprg_owner = owner; + gprg->gprg_rlset = gprs; + gprg->gprg_cntg = NULL; + gprg->gprg_feature = feat; + gprg->gprg_rgname = rg_name; + TAILQ_INIT(&gprg->gprg_rules); + gprg->gprg_summary = 0; + gprg->gprg_flags = 0; + + TAILQ_INSERT_TAIL(&gprs->gprs_groups, gprg, gprg_list); + + return gprg; +} + +/* -- group hardware notify -- */ + +void +gpc_group_hw_ntfy_create(struct gpc_group *gprg, struct pmf_rule *rule) +{ + if (gpc_group_is_published(gprg)) + return; + if (!gpc_group_has_family(gprg)) + return; + if (gpc_group_is_deferred(gprg)) + return; + + /* Recalculate summary before publish */ + uint32_t summary = gpc_group_recalc_summary(gprg, rule); + gprg->gprg_summary = summary; + + if (gpc_hw_group_create(gprg)) + gpc_group_set_ll_created(gprg); + + gpc_group_set_published(gprg); +} + +void +gpc_group_hw_ntfy_delete(struct gpc_group *gprg) +{ + if (!gpc_group_is_published(gprg)) + return; + + gpc_hw_group_delete(gprg); + + /* Rules summary cleared to optimise rule delete */ + gprg->gprg_summary = 0; + + gpc_group_clear_ll_created(gprg); + gpc_group_clear_published(gprg); +} + +void +gpc_group_hw_ntfy_modify(struct gpc_group *gprg, uint32_t new) +{ + if (!gpc_group_is_published(gprg)) + return; + + if (new == gprg->gprg_summary) + return; + + gpc_hw_group_mod(gprg, new); + + gprg->gprg_summary = new; +} + +void +gpc_group_hw_ntfy_attach(struct gpc_group *gprg) +{ + if (!gpc_group_is_published(gprg)) + return; + if (gpc_group_is_deferred(gprg)) + return; + if (gpc_group_is_attached(gprg)) + return; + + struct gpc_rlset *gprs = gprg->gprg_rlset; + + struct ifnet *att_ifp = gpc_rlset_get_ifp(gprs); + if (!att_ifp || !gpc_rlset_is_if_created(gprs)) + return; + + if (gpc_hw_group_attach(gprg, att_ifp)) + gpc_group_set_ll_attached(gprg); + + gpc_group_set_attached(gprg); +} + +void +gpc_group_hw_ntfy_detach(struct gpc_group *gprg) +{ + if (!gpc_group_is_published(gprg)) + return; + if (!gpc_group_is_attached(gprg)) + return; + + struct gpc_rlset *gprs = gprg->gprg_rlset; + struct ifnet *att_ifp = gpc_rlset_get_ifp(gprs); + + gpc_hw_group_detach(gprg, att_ifp); + + gpc_group_clear_ll_attached(gprg); + gpc_group_clear_attached(gprg); +} + +/* -- group hardware notify of multiple rules -- */ + +/* + * For a group, notify creation or deletion of all rules. + * + * These are used for deferred notifications based upon the + * change in the group status. + */ +void +gpc_group_hw_ntfy_rules_create(struct gpc_group *gprg) +{ + if (!gpc_group_is_published(gprg)) + return; + + struct gpc_rule *gprl; + TAILQ_FOREACH(gprl, &gprg->gprg_rules, gprl_list) + gpc_rule_hw_ntfy_create(gprg, gprl); +} + +void +gpc_group_hw_ntfy_rules_delete(struct gpc_group *gprg) +{ + if (!gpc_group_is_published(gprg)) + return; + + struct gpc_rule *gprl; + TAILQ_FOREACH(gprl, &gprg->gprg_rules, gprl_list) + gpc_rule_hw_ntfy_delete(gprg, gprl); +} + + +/* -- rule accessors -- */ + +uint16_t +gpc_rule_get_index(struct gpc_rule const *gprl) +{ + return gprl->gprl_index; +} + +struct pmf_rule * +gpc_rule_get_rule(struct gpc_rule const *gprl) +{ + return gprl->gprl_rule; +} + +struct gpc_group * +gpc_rule_get_group(struct gpc_rule const *gprl) +{ + return gprl->gprl_group; +} + +void * +gpc_rule_get_owner(struct gpc_rule const *gprl) +{ + return gprl->gprl_owner; +} + +struct gpc_cntr * +gpc_rule_get_cntr(struct gpc_rule const *gprl) +{ + return gprl->gprl_cntr; +} + +uintptr_t +gpc_rule_get_objid(struct gpc_rule const *gprl) +{ + return gprl->gprl_objid; +} + +bool +gpc_rule_is_published(struct gpc_rule const *gprl) +{ + return gprl->gprl_flags & GPC_RLF_PUBLISHED; +} + +bool +gpc_rule_is_ll_created(struct gpc_rule const *gprl) +{ + return gprl->gprl_flags & GPC_RLF_LL_CREATED; +} + +/* -- rule manipulators -- */ + +static void +gpc_rule_set_published(struct gpc_rule *gprl) +{ + gprl->gprl_flags |= GPC_RLF_PUBLISHED; +} + +static void +gpc_rule_clear_published(struct gpc_rule *gprl) +{ + gprl->gprl_flags &= ~GPC_RLF_PUBLISHED; +} + +static void +gpc_rule_set_ll_created(struct gpc_rule *gprl) +{ + gprl->gprl_flags |= GPC_RLF_LL_CREATED; +} + +static void +gpc_rule_clear_ll_created(struct gpc_rule *gprl) +{ + gprl->gprl_flags &= ~GPC_RLF_LL_CREATED; +} + +void +gpc_rule_set_objid(struct gpc_rule *gprl, uintptr_t objid) +{ + gprl->gprl_objid = objid; +} + +void +gpc_rule_set_cntr(struct gpc_rule *gprl, struct gpc_cntr *cntr) +{ + gprl->gprl_cntr = cntr; +} + +/* -- rule DB walk -- */ + +struct gpc_rule * +gpc_rule_first(struct gpc_group const *gprg) +{ + return TAILQ_FIRST(&gprg->gprg_rules); +} + +struct gpc_rule * +gpc_rule_last(struct gpc_group const *gprg) +{ + return TAILQ_LAST(&gprg->gprg_rules, gpc_rlqh); +} + +struct gpc_rule * +gpc_rule_next(struct gpc_rule const *cursor) +{ + return TAILQ_NEXT(cursor, gprl_list); +} + +/* -- rule DB lookup -- */ + +static struct gpc_rule * +gpc_rule_find_core(struct gpc_group *gprg, uint32_t rl_idx, bool insert) +{ + struct gpc_rule *cursor; + + TAILQ_FOREACH(cursor, &gprg->gprg_rules, gprl_list) + if (rl_idx <= cursor->gprl_index) + break; + + if (!cursor) + return NULL; + + if (rl_idx == cursor->gprl_index || insert) + return cursor; + + return NULL; +} + +static struct gpc_rule * +gpc_rule_find_insertion(struct gpc_group *gprg, uint32_t index) +{ + return gpc_rule_find_core(gprg, index, true); +} + +struct gpc_rule * +gpc_rule_find(struct gpc_group *gprg, uint32_t index) +{ + return gpc_rule_find_core(gprg, index, false); +} + +/* -- rule DB manipulation -- */ + +void +gpc_rule_change_rule(struct gpc_rule *gprl, struct pmf_rule *new_rule) +{ + if (!gprl) + return; + + struct gpc_group *gprg = gprl->gprl_group; + + /* If any were published, update and notify */ + uint32_t old_summary = gpc_group_get_summary(gprg); + uint32_t new_summary = old_summary | new_rule->pp_summary; + gpc_group_hw_ntfy_modify(gprg, new_summary); + + /* Update the rule criteria now */ + + struct pmf_rule *old_rule = gprl->gprl_rule; + + gprl->gprl_rule = pmf_rule_copy(new_rule); + + gpc_rule_hw_ntfy_modify(gprg, gprl, old_rule); + + pmf_rule_free(old_rule); +} + +void +gpc_rule_delete(struct gpc_rule *gprl) +{ + struct gpc_group *gprg = gprl->gprl_group; + + TAILQ_REMOVE(&gprg->gprg_rules, gprl, gprl_list); + gprl->gprl_owner = NULL; + + pmf_rule_free(gprl->gprl_rule); + free(gprl); +} + +struct gpc_rule * +gpc_rule_create(struct gpc_group *gprg, uint32_t rl_idx, void *owner) +{ + struct gpc_rule *gprl = calloc(1, sizeof(*gprl)); + if (!gprl) + return NULL; + + gprl->gprl_owner = owner; + gprl->gprl_group = gprg; + gprl->gprl_rule = NULL; + gprl->gprl_index = rl_idx; + gprl->gprl_flags = 0; + + struct gpc_rule *cursor = TAILQ_LAST(&gprg->gprg_rules, gpc_rlqh); + if (!cursor || cursor->gprl_index < rl_idx) { + TAILQ_INSERT_TAIL(&gprg->gprg_rules, gprl, gprl_list); + } else { + cursor = gpc_rule_find_insertion(gprg, rl_idx); + + /* Never NULL, do not allow duplicates */ + if (!cursor || rl_idx == cursor->gprl_index) { + struct gpc_rlset *gprs = gprg->gprg_rlset; + bool dir_in = gpc_rlset_is_ingress(gprs); + RTE_LOG(ERR, FIREWALL, + "Error: No insertion point for GPC rule" + " %s/%s|%s:%u\n", + (dir_in) ? " In" : "Out", + gpc_rlset_get_ifname(gprs), + gpc_group_get_name(gprg), rl_idx); + free(gprl); + return NULL; + } + + TAILQ_INSERT_BEFORE(cursor, gprl, gprl_list); + } + + return gprl; +} + +/* -- rule hardware notify -- */ + +void +gpc_rule_hw_ntfy_create(struct gpc_group *gprg, struct gpc_rule *gprl) +{ + if (!gpc_group_is_published(gprg)) + return; + if (gpc_rule_is_published(gprl)) + return; + + if (gpc_hw_rule_add(gprl)) + gpc_rule_set_ll_created(gprl); + + gpc_rule_set_published(gprl); +} + +void +gpc_rule_hw_ntfy_modify(struct gpc_group *gprg, struct gpc_rule *gprl, + struct pmf_rule *old_rule) +{ + if (!gpc_group_is_published(gprg)) + return; + if (!gpc_rule_is_published(gprl)) { + gpc_rule_hw_ntfy_create(gprg, gprl); + return; + } + + gpc_hw_rule_mod(gprl, old_rule); +} + +void +gpc_rule_hw_ntfy_delete(struct gpc_group *gprg, struct gpc_rule *gprl) +{ + if (!gpc_group_is_published(gprg)) + return; + if (!gpc_rule_is_published(gprl)) + return; + + gpc_hw_rule_del(gprl); + + gpc_rule_clear_ll_created(gprl); + gpc_rule_clear_published(gprl); +} + diff --git a/src/npf/config/gpc_db_control.h b/src/npf/config/gpc_db_control.h new file mode 100644 index 00000000..901368a6 --- /dev/null +++ b/src/npf/config/gpc_db_control.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2020-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef GPC_DB_CONTROL_H +#define GPC_DB_CONTROL_H + +#include +#include + +enum gpc_feature; + +struct gpc_rlset; +struct gpc_group; +struct gpc_cntg; +struct gpc_rule; +struct gpc_cntr; + +struct pmf_rule; + +/* -- ruleset -- */ + +void gpc_rlset_set_if_created(struct gpc_rlset *gprs); +bool gpc_rlset_set_ifp(struct gpc_rlset *gprs); +void gpc_rlset_clear_ifp(struct gpc_rlset *gprs); + +struct gpc_rlset *gpc_rlset_create(bool ingress, char const *if_name, + void *owner); +void gpc_rlset_delete(struct gpc_rlset *gprs); + +/* -- group -- */ + +void gpc_group_clear_family(struct gpc_group *gprg); +void gpc_group_set_v4(struct gpc_group *gprg); +void gpc_group_set_v6(struct gpc_group *gprg); +void gpc_group_set_deferred(struct gpc_group *gprg); +void gpc_group_clear_deferred(struct gpc_group *gprg); + +void gpc_group_set_cntg(struct gpc_group *gprg, struct gpc_cntg *cntg); + +struct gpc_group *gpc_group_create(struct gpc_rlset *gprs, + enum gpc_feature feat, + char const *rg_name, void *owner); +void gpc_group_delete(struct gpc_group *gprg); + +void gpc_group_hw_ntfy_create(struct gpc_group *gprg, struct pmf_rule *rule); +void gpc_group_hw_ntfy_delete(struct gpc_group *gprg); +void gpc_group_hw_ntfy_modify(struct gpc_group *gprg, uint32_t new); +void gpc_group_hw_ntfy_attach(struct gpc_group *gprg); +void gpc_group_hw_ntfy_detach(struct gpc_group *gprg); + +void gpc_group_hw_ntfy_rules_create(struct gpc_group *gprg); +void gpc_group_hw_ntfy_rules_delete(struct gpc_group *gprg); + +/* -- rule -- */ + +struct gpc_rule *gpc_rule_create(struct gpc_group *gprg, uint32_t rl_idx, + void *owner); +void gpc_rule_delete(struct gpc_rule *gprl); +void gpc_rule_change_rule(struct gpc_rule *gprl, struct pmf_rule *new_rule); + +void gpc_rule_set_cntr(struct gpc_rule *gprl, struct gpc_cntr *cntr); + +void gpc_rule_hw_ntfy_create(struct gpc_group *gprg, struct gpc_rule *gprl); +void gpc_rule_hw_ntfy_modify(struct gpc_group *gprg, struct gpc_rule *gprl, + struct pmf_rule *old_rule); +void gpc_rule_hw_ntfy_delete(struct gpc_group *gprg, struct gpc_rule *gprl); + +#endif /* GPC_DB_CONTROL_H */ diff --git a/src/npf/config/gpc_db_query.h b/src/npf/config/gpc_db_query.h new file mode 100644 index 00000000..cd05d0b8 --- /dev/null +++ b/src/npf/config/gpc_db_query.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2020-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef GPC_DB_QUERY_H +#define GPC_DB_QUERY_H + +#include +#include + +struct gpc_rlset; +struct gpc_group; +struct gpc_cntg; +struct gpc_rule; +struct gpc_cntr; + +struct pmf_rule; + +/* + * When adding new features, check existing static arrays of + * GPC_FEAT__MAX elements, initialised based on the feature + * index. Also update the GPC_FEAT__LAST definition. + */ +#define GPC_FEAT__FIRST 1 +enum gpc_feature { + GPC_FEAT_ACL = GPC_FEAT__FIRST, + GPC_FEAT_QOS, +}; +#define GPC_FEAT__LAST (GPC_FEAT_QOS) +#define GPC_FEAT__MAX (GPC_FEAT__LAST + 1) + +static inline bool gpc_feature_is_valid(enum gpc_feature feat) +{ + return (feat >= GPC_FEAT__FIRST && feat <= GPC_FEAT__LAST); +} + +char const *gpc_feature_get_name(enum gpc_feature feat); + +/* -- ruleset accessors -- */ + +char const *gpc_rlset_get_ifname(struct gpc_rlset const *gprs); +struct ifnet *gpc_rlset_get_ifp(struct gpc_rlset const *gprs); +void *gpc_rlset_get_owner(struct gpc_rlset const *gprs); +bool gpc_rlset_is_ingress(struct gpc_rlset const *gprs); +bool gpc_rlset_is_if_created(struct gpc_rlset const *gprs); +struct gpc_rlset *gpc_rlset_first(void); +struct gpc_rlset *gpc_rlset_next(struct gpc_rlset const *cursor); + +#define GPC_RLSET_FOREACH(var) \ + for ((var) = gpc_rlset_first(); \ + (var); \ + (var) = gpc_rlset_next((var))) + +/* -- group accessors -- */ + +char const *gpc_group_get_name(struct gpc_group const *gprg); +struct gpc_rlset *gpc_group_get_rlset(struct gpc_group const *gprg); +void *gpc_group_get_owner(struct gpc_group const *gprg); +enum gpc_feature gpc_group_get_feature(struct gpc_group const *gprg); +uint32_t gpc_group_get_summary(struct gpc_group const *gprg); +bool gpc_group_has_family(struct gpc_group const *gprg); +bool gpc_group_is_v6(struct gpc_group const *gprg); +bool gpc_group_is_ingress(struct gpc_group const *gprg); +bool gpc_group_is_published(struct gpc_group const *gprg); +bool gpc_group_is_ll_created(struct gpc_group const *gprg); +bool gpc_group_is_attached(struct gpc_group const *gprg); +bool gpc_group_is_ll_attached(struct gpc_group const *gprg); +bool gpc_group_is_deferred(struct gpc_group const *gprg); +struct gpc_cntg *gpc_group_get_cntg(struct gpc_group const *gprg); +uintptr_t gpc_group_get_objid(struct gpc_group const *gprg); + +void gpc_group_set_objid(struct gpc_group *gprg, uintptr_t objid); +uint32_t gpc_group_recalc_summary(struct gpc_group *gprg, + struct pmf_rule *rule); + +struct gpc_group *gpc_group_first(struct gpc_rlset const *gprs); +struct gpc_group *gpc_group_next(struct gpc_group const *cursor); + +#define GPC_GROUP_FOREACH(var, head) \ + for ((var) = gpc_group_first((head)); \ + (var); \ + (var) = gpc_group_next((var))) + +/* -- rule accessors -- */ + +uint16_t gpc_rule_get_index(struct gpc_rule const *gprl); +struct pmf_rule *gpc_rule_get_rule(struct gpc_rule const *gprl); +struct gpc_group *gpc_rule_get_group(struct gpc_rule const *gprl); +void *gpc_rule_get_owner(struct gpc_rule const *gprl); +struct gpc_cntr *gpc_rule_get_cntr(struct gpc_rule const *gprl); +uintptr_t gpc_rule_get_objid(struct gpc_rule const *gprl); +void gpc_rule_set_objid(struct gpc_rule *gprl, uintptr_t objid); +bool gpc_rule_is_published(struct gpc_rule const *gprl); +bool gpc_rule_is_ll_created(struct gpc_rule const *gprl); + +struct gpc_rule *gpc_rule_find(struct gpc_group *gprg, uint32_t index); + +struct gpc_rule *gpc_rule_first(struct gpc_group const *gprg); +struct gpc_rule *gpc_rule_last(struct gpc_group const *gprg); +struct gpc_rule *gpc_rule_next(struct gpc_rule const *cursor); + +#define GPC_RULE_FOREACH(var, head) \ + for ((var) = gpc_rule_first((head)); \ + (var); \ + (var) = gpc_rule_next((var))) + +#endif /* GPC_DB_QUERY_H */ diff --git a/src/npf/config/pmf_hw.c b/src/npf/config/gpc_hw.c similarity index 58% rename from src/npf/config/pmf_hw.c rename to src/npf/config/gpc_hw.c index 2ac8f00c..4fd1ea1d 100644 --- a/src/npf/config/pmf_hw.c +++ b/src/npf/config/gpc_hw.c @@ -1,15 +1,23 @@ +/* + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ #include #include #include #include /* htons */ +#include #include "compiler.h" #include "util.h" #include "fal.h" #include "if_var.h" -#include "npf/config/pmf_att_rlgrp.h" +#include "netinet6/in6_var.h" +#include "npf/config/gpc_db_query.h" +#include "npf/config/gpc_cntr_query.h" #include "npf/config/pmf_rule.h" -#include "npf/config/pmf_hw.h" +#include "npf/config/gpc_hw.h" #include "vplane_log.h" #include "vplane_debug.h" @@ -17,54 +25,39 @@ rte_log((ok) ? RTE_LOG_DEBUG : RTE_LOG_ERR, \ RTE_LOGTYPE_ ## t, # t ": " __VA_ARGS__) -static bool pmf_hw_commit_needed; +static bool gpc_hw_commit_needed; /* ---- */ -static void -pmf_hw_rule_gen_mask(uint8_t *mask, uint8_t plen, uint8_t blen) -{ - /* set bytes */ - for (; blen && plen >= 8; --blen, plen -= 8) - *mask++ = 0xff; - - /* mixed byte */ - if (plen) - *mask++ = (0xff << (8 - plen)); - - /* clear bytes */ - while (blen--) - *mask++ = 0; -} - bool -pmf_hw_rule_add(struct pmf_attrl *earl, struct pmf_rule *rule) +gpc_hw_rule_add(struct gpc_rule *gprl) { - struct pmf_group_ext *earg = pmf_arlg_attrl_get_grp(earl); - struct pmf_rlset_ext *ears = pmf_arlg_grp_get_rls(earg); - char const *ifname = pmf_arlg_rls_get_ifname(ears); - struct pmf_cntr *eark = pmf_arlg_attrl_get_cntr(earl); - uintptr_t ctrobj = pmf_arlg_cntr_get_objid(eark); - uintptr_t grpobj = pmf_arlg_grp_get_objid(earg); + struct gpc_group *gprg = gpc_rule_get_group(gprl); + struct gpc_rlset *gprs = gpc_group_get_rlset(gprg); + char const *ifname = gpc_rlset_get_ifname(gprs); + struct gpc_cntr *gprk = gpc_rule_get_cntr(gprl); + uintptr_t ctrobj = gpc_cntr_get_objid(gprk); + uintptr_t grpobj = gpc_group_get_objid(gprg); bool grp_was_created = (grpobj != FAL_NULL_OBJECT_ID); uintptr_t rlobj = FAL_NULL_OBJECT_ID; - uint16_t index = pmf_arlg_attrl_get_index(earl); - bool ingress = pmf_arlg_grp_is_ingress(earg); - bool is_v6 = pmf_arlg_grp_is_v6(earg); - char const *rgname = pmf_arlg_grp_get_name(earg); + uint16_t index = gpc_rule_get_index(gprl); + bool ingress = gpc_group_is_ingress(gprg); + bool is_v6 = gpc_group_is_v6(gprg); + char const *rgname = gpc_group_get_name(gprg); + struct pmf_rule *rule = gpc_rule_get_rule(gprl); uint32_t summary = rule->pp_summary; bool ok = true; char const *ok_str = "SK"; + enum gpc_feature feat = gpc_group_get_feature(gprg); + char const *feat_str = gpc_feature_get_name(feat); int rc = 0; /* Success */ /* Nothing to do if no FAL object - e.g. vrouter */ if (!grp_was_created) goto log_add; -#define FAL_ENTRY_PRIORITY_TOP (16384u) - #define FAL_ENTRY_FIX_FIELDS 3 -#define FAL_ENTRY_VAR_FIELDS (2 + 7 + 5) +#define FAL_ENTRY_VAR_FIELDS (5 + 7 + 5) #define FAL_ENTRY_TOT_FIELDS (FAL_ENTRY_FIX_FIELDS + FAL_ENTRY_VAR_FIELDS) struct fal_attribute_t ent_attrs[FAL_ENTRY_TOT_FIELDS] = { [0] = { @@ -72,8 +65,8 @@ pmf_hw_rule_add(struct pmf_attrl *earl, struct pmf_rule *rule) .value.objid = grpobj, }, [1] = { - .id = FAL_ACL_ENTRY_ATTR_PRIORITY, - .value.u32 = FAL_ENTRY_PRIORITY_TOP - index, + .id = FAL_ACL_ENTRY_ATTR_RULE_NUMBER, + .value.u32 = index, }, [2] = { .id = FAL_ACL_ENTRY_ATTR_ADMIN_STATE, @@ -84,8 +77,12 @@ pmf_hw_rule_add(struct pmf_attrl *earl, struct pmf_rule *rule) /* Actions */ uint32_t num_actions - = !!(summary & (PMF_RAS_DROP|PMF_RAS_PASS)) - + !!(summary & PMF_RAS_COUNT_REF); + = 1 + + !!(summary & (PMF_RAS_DROP|PMF_RAS_PASS)) + + !!(summary & PMF_RAS_COUNT_REF) + + !!(summary & PMF_RAS_QOS_HW_DESIG) + + !!(summary & PMF_RAS_QOS_COLOUR) + + !!(summary & PMF_RAS_QOS_POLICE); struct fal_acl_action_data_t *actions = calloc(1, num_actions * sizeof(*actions)); if (!actions) @@ -122,6 +119,85 @@ pmf_hw_rule_add(struct pmf_attrl *earl, struct pmf_rule *rule) ++num_actions; } + /* Encode a designation (0..7) to set */ + if (summary & PMF_RAS_QOS_HW_DESIG) { + ent_attrs[nattr].id = FAL_ACL_ENTRY_ATTR_ACTION_SET_DESIGNATION; + ent_attrs[nattr].value.aclaction = &actions[num_actions]; + + int32_t set_designation = 8; /* Invalid */ + struct pmf_qos_mark const *qos_mark = rule->pp_action.qos_mark; + + if (qos_mark && qos_mark->paqm_has_desig == PMV_TRUE) + set_designation = qos_mark->paqm_desig; + + actions[num_actions].enable = true; + actions[num_actions].parameter.s32 = set_designation; + + summary &= ~PMF_RAS_QOS_HW_DESIG; + + /* Skip if invalid */ + if (set_designation < 8) { + ++nattr; + ++num_actions; + } + } + + /* Encode a colour (red/green/yellow) to set */ + if (summary & PMF_RAS_QOS_COLOUR) { + ent_attrs[nattr].id = FAL_ACL_ENTRY_ATTR_ACTION_SET_COLOUR; + ent_attrs[nattr].value.aclaction = &actions[num_actions]; + + enum pmf_mark_colour cfg_colour = PMMC_UNSET; + struct pmf_qos_mark const *qos_mark = rule->pp_action.qos_mark; + if (qos_mark) + cfg_colour = qos_mark->paqm_colour; + + enum fal_packet_colour set_colour = FAL_NUM_PACKET_COLOURS; + switch (cfg_colour) { + case PMMC_GREEN: + set_colour = FAL_PACKET_COLOUR_GREEN; + break; + case PMMC_YELLOW: + set_colour = FAL_PACKET_COLOUR_YELLOW; + break; + case PMMC_RED: + set_colour = FAL_PACKET_COLOUR_RED; + break; + default: + break; + } + + actions[num_actions].enable = true; + actions[num_actions].parameter.s32 = set_colour; + + summary &= ~PMF_RAS_QOS_COLOUR; + + /* Skip if invalid */ + if (set_colour < FAL_NUM_PACKET_COLOURS) { + ++nattr; + ++num_actions; + } + } + + /* Encode use of a rule policer */ + if (summary & PMF_RAS_QOS_POLICE) { + ent_attrs[nattr].id = FAL_ACL_ENTRY_ATTR_ACTION_POLICER; + ent_attrs[nattr].value.aclaction = &actions[num_actions]; + + fal_object_t policer_obj = rule->pp_action.qos_policer; + + actions[num_actions].enable = true; + actions[num_actions].parameter.objid = policer_obj; + + summary &= ~PMF_RAS_QOS_POLICE; + + /* Skip if invalid */ + if (policer_obj != FAL_NULL_OBJECT_ID) { + ++nattr; + ++num_actions; + } + } + summary &= ~PMF_RAS_COUNT_DEF; @@ -155,21 +231,24 @@ pmf_hw_rule_add(struct pmf_attrl *earl, struct pmf_rule *rule) struct pmf_attr_v6_prefix *v6pfx = rule->pp_match.l3[PMF_L3F_SRC].pm_l3v6; - uint8_t blen = sizeof(v6pfx->pm_bytes); - uint8_t plen = v6pfx->pm_plen; + struct in6_addr mask; - memcpy(curfld->data.ip6, v6pfx->pm_bytes, blen); - pmf_hw_rule_gen_mask(curfld->mask.ip6, plen, blen); + static_assert(sizeof(v6pfx->pm_bytes) == 16, + "unexpected size of IPv6 addr structure"); + memcpy(curfld->data.ip6, v6pfx->pm_bytes, 16); + in6_prefixlen2mask(&mask, v6pfx->pm_plen); + memcpy(curfld->mask.ip6, mask.s6_addr, 16); } else { ent_attrs[nattr].id = FAL_ACL_ENTRY_ATTR_FIELD_SRC_IPV4; struct pmf_attr_v4_prefix *v4pfx = rule->pp_match.l3[PMF_L3F_SRC].pm_l3v4; - uint8_t blen = sizeof(v4pfx->pm_bytes); - uint8_t plen = v4pfx->pm_plen; + uint32_t mask = prefixlen_to_mask(v4pfx->pm_plen); - memcpy(curfld->data.ip4, v4pfx->pm_bytes, blen); - pmf_hw_rule_gen_mask(curfld->mask.ip4, plen, blen); + static_assert(sizeof(v4pfx->pm_bytes) == 4, + "unexpected size of IPv4 addr structure"); + memcpy(curfld->data.ip4, v4pfx->pm_bytes, 4); + memcpy(curfld->mask.ip4, (void *)&mask, 4); } ++nattr; @@ -185,21 +264,24 @@ pmf_hw_rule_add(struct pmf_attrl *earl, struct pmf_rule *rule) struct pmf_attr_v6_prefix *v6pfx = rule->pp_match.l3[PMF_L3F_DST].pm_l3v6; - uint8_t blen = sizeof(v6pfx->pm_bytes); - uint8_t plen = v6pfx->pm_plen; + struct in6_addr mask; - memcpy(curfld->data.ip6, v6pfx->pm_bytes, blen); - pmf_hw_rule_gen_mask(curfld->mask.ip6, plen, blen); + static_assert(sizeof(v6pfx->pm_bytes) == 16, + "unexpected size of IPv6 addr structure"); + memcpy(curfld->data.ip6, v6pfx->pm_bytes, 16); + in6_prefixlen2mask(&mask, v6pfx->pm_plen); + memcpy(curfld->mask.ip6, mask.s6_addr, 16); } else { ent_attrs[nattr].id = FAL_ACL_ENTRY_ATTR_FIELD_DST_IPV4; struct pmf_attr_v4_prefix *v4pfx = rule->pp_match.l3[PMF_L3F_DST].pm_l3v4; - uint8_t blen = sizeof(v4pfx->pm_bytes); - uint8_t plen = v4pfx->pm_plen; + uint32_t mask = prefixlen_to_mask(v4pfx->pm_plen); - memcpy(curfld->data.ip4, v4pfx->pm_bytes, blen); - pmf_hw_rule_gen_mask(curfld->mask.ip4, plen, blen); + static_assert(sizeof(v4pfx->pm_bytes) == 4, + "unexpected size of IPv4 addr structure"); + memcpy(curfld->data.ip4, v4pfx->pm_bytes, 4); + memcpy(curfld->mask.ip4, (void *)&mask, 4); } ++nattr; @@ -331,7 +413,7 @@ pmf_hw_rule_add(struct pmf_attrl *earl, struct pmf_rule *rule) struct pmf_attr_l4icmp_vals *icmp = rule->pp_match.l4[PMF_L4F_ICMP_VALS].pm_l4icmp_vals; curfld->enable = true; - curfld->mask.u8 = 0xff; + curfld->mask.u8 = icmp->pm_class ? ICMP6_INFOMSG_MASK : 0xff; curfld->data.u8 = icmp->pm_type; ++nattr; @@ -358,7 +440,7 @@ pmf_hw_rule_add(struct pmf_attrl *earl, struct pmf_rule *rule) #undef FAL_ENTRY_VAR_FIELDS #undef FAL_ENTRY_TOT_FIELDS - pmf_hw_commit_needed = true; + gpc_hw_commit_needed = true; /* Call the FAL, and clean up */ rc = fal_acl_create_entry(nattr, ent_attrs, &rlobj); @@ -366,7 +448,7 @@ pmf_hw_rule_add(struct pmf_attrl *earl, struct pmf_rule *rule) free(actions); if (!rc) - pmf_arlg_attrl_set_objid(earl, rlobj); + gpc_rule_set_objid(gprl, rlobj); ok = (!rc || (rc == -EOPNOTSUPP && !grp_was_created)); ok_str = ok ? ((!rc) ? "OK" : "UN") : "NO"; @@ -374,8 +456,8 @@ pmf_hw_rule_add(struct pmf_attrl *earl, struct pmf_rule *rule) log_add: if (!ok || DP_DEBUG_ENABLED(NPF)) { ACL_LOG(ok, ACL_HW, - "HW-ACLv%s RL Add %s/%s|%s:%u [%lx] %x => %s(%d) [%lx]\n", - (is_v6) ? "6" : "4", + "HW-GPC(%s)v%s RL Add %s/%s|%s:%u [%lx] %x => %s(%d) [%lx]\n", + feat_str, (is_v6) ? "6" : "4", (ingress) ? " In" : "Out", ifname, rgname, index, grpobj, summary, ok_str, rc, rlobj); @@ -386,30 +468,32 @@ pmf_hw_rule_add(struct pmf_attrl *earl, struct pmf_rule *rule) } void -pmf_hw_rule_del(struct pmf_attrl *earl) +gpc_hw_rule_del(struct gpc_rule *gprl) { - struct pmf_group_ext *earg = pmf_arlg_attrl_get_grp(earl); - struct pmf_rlset_ext *ears = pmf_arlg_grp_get_rls(earg); - char const *ifname = pmf_arlg_rls_get_ifname(ears); - uintptr_t rlobj = pmf_arlg_attrl_get_objid(earl); + struct gpc_group *gprg = gpc_rule_get_group(gprl); + struct gpc_rlset *gprs = gpc_group_get_rlset(gprg); + char const *ifname = gpc_rlset_get_ifname(gprs); + uintptr_t rlobj = gpc_rule_get_objid(gprl); bool was_created = (rlobj != FAL_NULL_OBJECT_ID); - uint16_t index = pmf_arlg_attrl_get_index(earl); - bool ingress = pmf_arlg_grp_is_ingress(earg); - bool is_v6 = pmf_arlg_grp_is_v6(earg); - char const *rgname = pmf_arlg_grp_get_name(earg); + uint16_t index = gpc_rule_get_index(gprl); + bool ingress = gpc_group_is_ingress(gprg); + bool is_v6 = gpc_group_is_v6(gprg); + char const *rgname = gpc_group_get_name(gprg); bool ok = true; char const *ok_str = "SK"; + enum gpc_feature feat = gpc_group_get_feature(gprg); + char const *feat_str = gpc_feature_get_name(feat); int rc = 0; /* Success */ /* Nothing to do if no FAL object - e.g. vrouter */ if (!was_created) goto log_delete; - pmf_hw_commit_needed = true; + gpc_hw_commit_needed = true; rc = fal_acl_delete_entry(rlobj); if (!rc) - pmf_arlg_attrl_set_objid(earl, FAL_NULL_OBJECT_ID); + gpc_rule_set_objid(gprl, FAL_NULL_OBJECT_ID); ok = (!rc || (rc == -EOPNOTSUPP && !was_created)); ok_str = ok ? ((!rc) ? "OK" : "UN") : "NO"; @@ -417,8 +501,8 @@ pmf_hw_rule_del(struct pmf_attrl *earl) log_delete: if (!ok || DP_DEBUG_ENABLED(NPF)) { ACL_LOG(ok, ACL_HW, - "HW-ACLv%s RL Delete %s/%s|%s:%u [%lx] => %s(%d)\n", - (is_v6) ? "6" : "4", + "HW-GPC(%s)v%s RL Delete %s/%s|%s:%u [%lx] => %s(%d)\n", + feat_str, (is_v6) ? "6" : "4", (ingress) ? " In" : "Out", ifname, rgname, index, rlobj, ok_str, rc); } @@ -433,42 +517,46 @@ pmf_hw_rule_del(struct pmf_attrl *earl) * the FAL until such time as we generate proper modifies. */ void -pmf_hw_rule_mod(struct pmf_attrl *earl, struct pmf_rule *rule) +gpc_hw_rule_mod(struct gpc_rule *gprl, struct pmf_rule *old_rule __unused) { - struct pmf_group_ext *earg = pmf_arlg_attrl_get_grp(earl); - struct pmf_rlset_ext *ears = pmf_arlg_grp_get_rls(earg); - char const *ifname = pmf_arlg_rls_get_ifname(ears); - uintptr_t rlobj = pmf_arlg_attrl_get_objid(earl); - uint16_t index = pmf_arlg_attrl_get_index(earl); - bool ingress = pmf_arlg_grp_is_ingress(earg); - bool is_v6 = pmf_arlg_grp_is_v6(earg); - char const *rgname = pmf_arlg_grp_get_name(earg); + struct gpc_group *gprg = gpc_rule_get_group(gprl); + struct gpc_rlset *gprs = gpc_group_get_rlset(gprg); + char const *ifname = gpc_rlset_get_ifname(gprs); + uintptr_t rlobj = gpc_rule_get_objid(gprl); + uint16_t index = gpc_rule_get_index(gprl); + bool ingress = gpc_group_is_ingress(gprg); + bool is_v6 = gpc_group_is_v6(gprg); + char const *rgname = gpc_group_get_name(gprg); + enum gpc_feature feat = gpc_group_get_feature(gprg); + char const *feat_str = gpc_feature_get_name(feat); bool ok = true; if (!ok || DP_DEBUG_ENABLED(NPF)) { ACL_LOG(ok, ACL_HW, - "HW-ACLv%s RL Modify %s/%s|%s:%u [%lx]\n", - (is_v6) ? "6" : "4", + "HW-GPC(%s)v%s RL Modify %s/%s|%s:%u [%lx]\n", + feat_str, (is_v6) ? "6" : "4", (ingress) ? " In" : "Out", ifname, rgname, index, rlobj); } - pmf_hw_rule_del(earl); - pmf_hw_rule_add(earl, rule); + gpc_hw_rule_del(gprl); + gpc_hw_rule_add(gprl); } -/* ---- */ +/* -- group FAL notification -- */ bool -pmf_hw_group_create(struct pmf_group_ext *earg) +gpc_hw_group_create(struct gpc_group *gprg) { - struct pmf_rlset_ext *ears = pmf_arlg_grp_get_rls(earg); - char const *ifname = pmf_arlg_rls_get_ifname(ears); + struct gpc_rlset *gprs = gpc_group_get_rlset(gprg); + char const *ifname = gpc_rlset_get_ifname(gprs); uintptr_t grpobj = FAL_NULL_OBJECT_ID; - bool ingress = pmf_arlg_grp_is_ingress(earg); - bool is_v6 = pmf_arlg_grp_is_v6(earg); - char const *rgname = pmf_arlg_grp_get_name(earg); - uint32_t summary = pmf_arlg_grp_get_summary(earg); + bool ingress = gpc_group_is_ingress(gprg); + bool is_v6 = gpc_group_is_v6(gprg); + char const *rgname = gpc_group_get_name(gprg); + uint32_t summary = gpc_group_get_summary(gprg); + enum gpc_feature feat = gpc_group_get_feature(gprg); + char const *feat_str = gpc_feature_get_name(feat); /* Bind point list */ struct fal_object_list_t *bp_list @@ -481,7 +569,10 @@ pmf_hw_group_create(struct pmf_group_ext *earg) /* Action list */ uint32_t num_actions = !!(summary & (PMF_RAS_DROP|PMF_RAS_PASS)) - + !!(summary & PMF_RAS_COUNT_REF); + + !!(summary & PMF_RAS_COUNT_REF) + + !!(summary & PMF_RAS_QOS_HW_DESIG) + + !!(summary & PMF_RAS_QOS_COLOUR) + + !!(summary & PMF_RAS_QOS_POLICE); struct fal_object_list_t *act_list = calloc(1, sizeof(*act_list) + num_actions * sizeof(act_list->list[0])); @@ -490,12 +581,18 @@ pmf_hw_group_create(struct pmf_group_ext *earg) return false; } act_list->count = num_actions; + fal_object_t * const actions = &act_list->list[0]; num_actions = 0; if (summary & (PMF_RAS_DROP|PMF_RAS_PASS)) - act_list->list[num_actions++] - = FAL_ACL_ACTION_TYPE_PACKET_ACTION; + actions[num_actions++] = FAL_ACL_ACTION_TYPE_PACKET_ACTION; if (summary & PMF_RAS_COUNT_REF) - act_list->list[num_actions++] = FAL_ACL_ACTION_TYPE_COUNTER; + actions[num_actions++] = FAL_ACL_ACTION_TYPE_COUNTER; + if (summary & PMF_RAS_QOS_HW_DESIG) + actions[num_actions++] = FAL_ACL_ACTION_TYPE_SET_DESIGNATION; + if (summary & PMF_RAS_QOS_COLOUR) + actions[num_actions++] = FAL_ACL_ACTION_TYPE_SET_COLOUR; + if (summary & PMF_RAS_QOS_POLICE) + actions[num_actions++] = FAL_ACL_ACTION_TYPE_POLICER; #define FAL_TABLE_FIX_FIELDS 5 #define FAL_TABLE_VAR_FIELDS (7 + 5) @@ -614,7 +711,7 @@ pmf_hw_group_create(struct pmf_group_ext *earg) #undef FAL_TABLE_VAR_FIELDS #undef FAL_TABLE_TOT_FIELDS - pmf_hw_commit_needed = true; + gpc_hw_commit_needed = true; /* Call the FAL, and clean up */ int rc = fal_acl_create_table(nattr, tbl_attrs, &grpobj); @@ -622,15 +719,15 @@ pmf_hw_group_create(struct pmf_group_ext *earg) free(act_list); if (!rc) - pmf_arlg_grp_set_objid(earg, grpobj); + gpc_group_set_objid(gprg, grpobj); bool const ok = (!rc || rc == -EOPNOTSUPP); char const *ok_str = ok ? ((!rc) ? "OK" : "UN") : "NO"; if (!ok || DP_DEBUG_ENABLED(NPF)) { ACL_LOG(ok, ACL_HW, - "HW-ACLv%s GP Create %s/%s|%s %x => %s(%d) [%lx]\n", - (is_v6) ? "6" : "4", + "HW-GPC(%s)v%s GP Create %s/%s|%s %x => %s(%d) [%lx]\n", + feat_str, (is_v6) ? "6" : "4", (ingress) ? " In" : "Out", ifname, rgname, summary, ok_str, rc, grpobj); } @@ -639,28 +736,30 @@ pmf_hw_group_create(struct pmf_group_ext *earg) } void -pmf_hw_group_delete(struct pmf_group_ext *earg) +gpc_hw_group_delete(struct gpc_group *gprg) { - struct pmf_rlset_ext *ears = pmf_arlg_grp_get_rls(earg); - char const *ifname = pmf_arlg_rls_get_ifname(ears); - uintptr_t grpobj = pmf_arlg_grp_get_objid(earg); + struct gpc_rlset *gprs = gpc_group_get_rlset(gprg); + char const *ifname = gpc_rlset_get_ifname(gprs); + uintptr_t grpobj = gpc_group_get_objid(gprg); bool was_created = (grpobj != FAL_NULL_OBJECT_ID); - bool ingress = pmf_arlg_grp_is_ingress(earg); - bool is_v6 = pmf_arlg_grp_is_v6(earg); - char const *rgname = pmf_arlg_grp_get_name(earg); + bool ingress = gpc_group_is_ingress(gprg); + bool is_v6 = gpc_group_is_v6(gprg); + char const *rgname = gpc_group_get_name(gprg); bool ok = true; char const *ok_str = "SK"; + enum gpc_feature feat = gpc_group_get_feature(gprg); + char const *feat_str = gpc_feature_get_name(feat); int rc = 0; /* Success */ /* Nothing to do if no FAL object - e.g. vrouter */ if (!was_created) goto log_delete; - pmf_hw_commit_needed = true; + gpc_hw_commit_needed = true; rc = fal_acl_delete_table(grpobj); if (!rc) - pmf_arlg_grp_set_objid(earg, FAL_NULL_OBJECT_ID); + gpc_group_set_objid(gprg, FAL_NULL_OBJECT_ID); ok = (!rc || (rc == -EOPNOTSUPP && !was_created)); ok_str = ok ? ((!rc) ? "OK" : "UN") : "NO"; @@ -668,8 +767,8 @@ pmf_hw_group_delete(struct pmf_group_ext *earg) log_delete: if (!ok || DP_DEBUG_ENABLED(NPF)) { ACL_LOG(ok, ACL_HW, - "HW-ACLv%s GP Delete %s/%s|%s [%lx] => %s(%d)\n", - (is_v6) ? "6" : "4", + "HW-GPC(%s)v%s GP Delete %s/%s|%s [%lx] => %s(%d)\n", + feat_str, (is_v6) ? "6" : "4", (ingress) ? " In" : "Out", ifname, rgname, grpobj, ok_str, rc); } @@ -684,24 +783,26 @@ pmf_hw_group_delete(struct pmf_group_ext *earg) * fields, treating this as a NO-OP is currently safe. */ void -pmf_hw_group_mod(struct pmf_group_ext *earg, uint32_t new) +gpc_hw_group_mod(struct gpc_group *gprg, uint32_t new) { - struct pmf_rlset_ext *ears = pmf_arlg_grp_get_rls(earg); - char const *ifname = pmf_arlg_rls_get_ifname(ears); - uintptr_t grpobj = pmf_arlg_grp_get_objid(earg); - bool ingress = pmf_arlg_grp_is_ingress(earg); - bool is_v6 = pmf_arlg_grp_is_v6(earg); - char const *rgname = pmf_arlg_grp_get_name(earg); - uint32_t old = pmf_arlg_grp_get_summary(earg); + struct gpc_rlset *gprs = gpc_group_get_rlset(gprg); + char const *ifname = gpc_rlset_get_ifname(gprs); + uintptr_t grpobj = gpc_group_get_objid(gprg); + bool ingress = gpc_group_is_ingress(gprg); + bool is_v6 = gpc_group_is_v6(gprg); + char const *rgname = gpc_group_get_name(gprg); + uint32_t old = gpc_group_get_summary(gprg); uint32_t chg = old ^ new; uint32_t set = chg & new; uint32_t clr = chg & ~new; bool ok = true; + enum gpc_feature feat = gpc_group_get_feature(gprg); + char const *feat_str = gpc_feature_get_name(feat); if (!ok || DP_DEBUG_ENABLED(NPF)) { ACL_LOG(ok, ACL_HW, - "HW-ACLv%s GP Modify %s/%s|%s [%lx] old %x set %x clr %x\n", - (is_v6) ? "6" : "4", + "HW-GPC(%s)v%s GP Modify %s/%s|%s [%lx] old %x set %x clr %x\n", + feat_str, (is_v6) ? "6" : "4", (ingress) ? " In" : "Out", ifname, rgname, grpobj, old, set, clr); } @@ -709,17 +810,71 @@ pmf_hw_group_mod(struct pmf_group_ext *earg, uint32_t new) /* ---- */ +static uint32_t +pmf_hw_rtr_intf_attr_acl(bool ingress, bool is_v6) +{ + uint32_t acl_type; + + if (ingress) { + if (is_v6) + acl_type = FAL_ROUTER_INTERFACE_ATTR_V6_INGRESS_ACL; + else + acl_type = FAL_ROUTER_INTERFACE_ATTR_V4_INGRESS_ACL; + } else { + if (is_v6) + acl_type = FAL_ROUTER_INTERFACE_ATTR_V6_EGRESS_ACL; + else + acl_type = FAL_ROUTER_INTERFACE_ATTR_V4_EGRESS_ACL; + } + + return acl_type; +} + +static uint32_t +pmf_hw_rtr_intf_attr_qos(bool is_v6) +{ + uint32_t qos_type; + + if (is_v6) + qos_type = FAL_ROUTER_INTERFACE_ATTR_V6_INGRESS_QOS; + else + qos_type = FAL_ROUTER_INTERFACE_ATTR_V4_INGRESS_QOS; + + return qos_type; +} + bool -pmf_hw_group_attach(struct pmf_group_ext *earg, struct ifnet *ifp) +gpc_hw_group_attach(struct gpc_group *gprg, struct ifnet *ifp) { - uintptr_t grpobj = pmf_arlg_grp_get_objid(earg); - bool ingress = pmf_arlg_grp_is_ingress(earg); - bool is_v6 = pmf_arlg_grp_is_v6(earg); - char const *rgname = pmf_arlg_grp_get_name(earg); + uintptr_t grpobj = gpc_group_get_objid(gprg); + bool is_attached = gpc_group_is_ll_attached(gprg); + bool ingress = gpc_group_is_ingress(gprg); + bool is_v6 = gpc_group_is_v6(gprg); + char const *rgname = gpc_group_get_name(gprg); char const *ifname = ifp->if_name; - bool ok = true; - char const *ok_str = "SK"; - int rc = 0; /* Success */ + bool ok = false; + char const *ok_str = "ER"; + enum gpc_feature feat = gpc_group_get_feature(gprg); + char const *feat_str = gpc_feature_get_name(feat); + int rc = -EINVAL; + + /* Validate group feature, and not already attached */ + switch (feat) { + case GPC_FEAT_ACL: + break; + case GPC_FEAT_QOS: + if (!ingress) + goto log_attach; + break; + default: + goto log_attach; + } + if (is_attached) + goto log_attach; + + ok = true; + ok_str = "SK"; + rc = 0; /* Success */ /* Nothing to do if no FAL object - e.g. vrouter */ if (grpobj == FAL_NULL_OBJECT_ID) @@ -729,19 +884,16 @@ pmf_hw_group_attach(struct pmf_group_ext *earg, struct ifnet *ifp) acl.value.objid = grpobj; - if (ingress) { - if (is_v6) - acl.id = FAL_ROUTER_INTERFACE_ATTR_V6_INGRESS_ACL; - else - acl.id = FAL_ROUTER_INTERFACE_ATTR_V4_INGRESS_ACL; - } else { - if (is_v6) - acl.id = FAL_ROUTER_INTERFACE_ATTR_V6_EGRESS_ACL; - else - acl.id = FAL_ROUTER_INTERFACE_ATTR_V4_EGRESS_ACL; + switch (feat) { + case GPC_FEAT_ACL: + acl.id = pmf_hw_rtr_intf_attr_acl(ingress, is_v6); + break; + case GPC_FEAT_QOS: + acl.id = pmf_hw_rtr_intf_attr_qos(is_v6); + break; } - pmf_hw_commit_needed = true; + gpc_hw_commit_needed = true; rc = if_set_l3_intf_attr(ifp, &acl); @@ -751,8 +903,8 @@ pmf_hw_group_attach(struct pmf_group_ext *earg, struct ifnet *ifp) log_attach: if (!ok || DP_DEBUG_ENABLED(NPF)) { ACL_LOG(ok, ACL_HW, - "HW-ACLv%s GP Attach %s/%s|%s [%lx] => %s(%d)\n", - (is_v6) ? "6" : "4", + "HW-GPC(%s)v%s GP Attach %s/%s|%s [%lx] => %s(%d)\n", + feat_str, (is_v6) ? "6" : "4", (ingress) ? " In" : "Out", ifname, rgname, grpobj, ok_str, rc); } @@ -761,17 +913,35 @@ pmf_hw_group_attach(struct pmf_group_ext *earg, struct ifnet *ifp) } void -pmf_hw_group_detach(struct pmf_group_ext *earg, struct ifnet *ifp) +gpc_hw_group_detach(struct gpc_group *gprg, struct ifnet *ifp) { - uintptr_t grpobj = pmf_arlg_grp_get_objid(earg); - bool was_attached = pmf_arlg_grp_is_ll_attached(earg); - bool ingress = pmf_arlg_grp_is_ingress(earg); - bool is_v6 = pmf_arlg_grp_is_v6(earg); - char const *rgname = pmf_arlg_grp_get_name(earg); + uintptr_t grpobj = gpc_group_get_objid(gprg); + bool was_attached = gpc_group_is_ll_attached(gprg); + bool ingress = gpc_group_is_ingress(gprg); + bool is_v6 = gpc_group_is_v6(gprg); + char const *rgname = gpc_group_get_name(gprg); char const *ifname = ifp->if_name; - bool ok = true; - char const *ok_str = "SK"; - int rc = 0; /* Success */ + bool ok = false; + char const *ok_str = "ER"; + enum gpc_feature feat = gpc_group_get_feature(gprg); + char const *feat_str = gpc_feature_get_name(feat); + int rc = -EINVAL; + + /* Validate group feature, and not already attached */ + switch (feat) { + case GPC_FEAT_ACL: + break; + case GPC_FEAT_QOS: + if (!ingress) + goto log_detach; + break; + default: + goto log_detach; + } + + ok = true; + ok_str = "SK"; + rc = 0; /* Success */ /* Nothing to do if attach failed or skipped */ if (!was_attached) @@ -781,19 +951,16 @@ pmf_hw_group_detach(struct pmf_group_ext *earg, struct ifnet *ifp) acl.value.objid = FAL_NULL_OBJECT_ID; - if (ingress) { - if (is_v6) - acl.id = FAL_ROUTER_INTERFACE_ATTR_V6_INGRESS_ACL; - else - acl.id = FAL_ROUTER_INTERFACE_ATTR_V4_INGRESS_ACL; - } else { - if (is_v6) - acl.id = FAL_ROUTER_INTERFACE_ATTR_V6_EGRESS_ACL; - else - acl.id = FAL_ROUTER_INTERFACE_ATTR_V4_EGRESS_ACL; + switch (feat) { + case GPC_FEAT_ACL: + acl.id = pmf_hw_rtr_intf_attr_acl(ingress, is_v6); + break; + case GPC_FEAT_QOS: + acl.id = pmf_hw_rtr_intf_attr_qos(is_v6); + break; } - pmf_hw_commit_needed = true; + gpc_hw_commit_needed = true; rc = if_set_l3_intf_attr(ifp, &acl); @@ -803,8 +970,8 @@ pmf_hw_group_detach(struct pmf_group_ext *earg, struct ifnet *ifp) log_detach: if (!ok || DP_DEBUG_ENABLED(NPF)) { ACL_LOG(ok, ACL_HW, - "HW-ACLv%s GP Detach %s/%s|%s [%lx] => %s(%d)\n", - (is_v6) ? "6" : "4", + "HW-GPC(%s)v%s GP Detach %s/%s|%s [%lx] => %s(%d)\n", + feat_str, (is_v6) ? "6" : "4", (ingress) ? " In" : "Out", ifname, rgname, grpobj, ok_str, rc); } @@ -813,22 +980,25 @@ pmf_hw_group_detach(struct pmf_group_ext *earg, struct ifnet *ifp) /* ---- */ bool -pmf_hw_counter_create(struct pmf_cntr *eark) +gpc_hw_counter_create(struct gpc_cntr *gprk) { - struct pmf_group_ext *earg = pmf_arlg_cntr_get_grp(eark); - struct pmf_rlset_ext *ears = pmf_arlg_grp_get_rls(earg); - char const *ifname = pmf_arlg_rls_get_ifname(ears); - uintptr_t grpobj = pmf_arlg_grp_get_objid(earg); + struct gpc_cntg *cntg = gpc_cntr_get_cntg(gprk); + struct gpc_group *gprg = gpc_cntg_get_group(cntg); + struct gpc_rlset *gprs = gpc_group_get_rlset(gprg); + char const *ifname = gpc_rlset_get_ifname(gprs); + uintptr_t grpobj = gpc_group_get_objid(gprg); bool grp_was_created = (grpobj != FAL_NULL_OBJECT_ID); uintptr_t ctrobj = FAL_NULL_OBJECT_ID; - char const *ctname = pmf_arlg_cntr_get_name(eark); - bool ingress = pmf_arlg_grp_is_ingress(earg); - bool is_v6 = pmf_arlg_grp_is_v6(earg); - char const *rgname = pmf_arlg_grp_get_name(earg); - bool cnt_pkt = pmf_arlg_cntr_pkt_enabled(eark); - bool cnt_byt = pmf_arlg_cntr_byt_enabled(eark); + char const *ctname = gpc_cntr_get_name(gprk); + bool ingress = gpc_group_is_ingress(gprg); + bool is_v6 = gpc_group_is_v6(gprg); + char const *rgname = gpc_group_get_name(gprg); + bool cnt_pkt = gpc_cntr_pkt_enabled(gprk); + bool cnt_byt = gpc_cntr_byt_enabled(gprk); bool ok = true; char const *ok_str = "SK"; + enum gpc_feature feat = gpc_group_get_feature(gprg); + char const *feat_str = gpc_feature_get_name(feat); int rc = 0; /* Success */ /* Do not allocate a useless counter */ @@ -870,13 +1040,13 @@ pmf_hw_counter_create(struct pmf_cntr *eark) #undef FAL_COUNTER_VAR_FIELDS #undef FAL_COUNTER_TOT_FIELDS - pmf_hw_commit_needed = true; + gpc_hw_commit_needed = true; /* Call the FAL, and clean up */ rc = fal_acl_create_counter(nattr, cnt_attrs, &ctrobj); if (!rc) - pmf_arlg_cntr_set_objid(eark, ctrobj); + gpc_cntr_set_objid(gprk, ctrobj); ok = (!rc || (rc == -EOPNOTSUPP && !grp_was_created)); ok_str = ok ? ((!rc) ? "OK" : "UN") : "NO"; @@ -884,8 +1054,8 @@ pmf_hw_counter_create(struct pmf_cntr *eark) log_create: if (!ok || DP_DEBUG_ENABLED(NPF)) { ACL_LOG(ok, ACL_HW, - "HW-ACLv%s CT Add %s/%s|%s:%s [%lx]%s%s => %s(%d) [%lx]\n", - (is_v6) ? "6" : "4", + "HW-GPC(%s)v%s CT Add %s/%s|%s:%s [%lx]%s%s => %s(%d) [%lx]\n", + feat_str, (is_v6) ? "6" : "4", (ingress) ? " In" : "Out", ifname, rgname, ctname, grpobj, (cnt_pkt) ? " Pkt" : "", @@ -898,30 +1068,33 @@ pmf_hw_counter_create(struct pmf_cntr *eark) } void -pmf_hw_counter_delete(struct pmf_cntr *eark) +gpc_hw_counter_delete(struct gpc_cntr *gprk) { - struct pmf_group_ext *earg = pmf_arlg_cntr_get_grp(eark); - struct pmf_rlset_ext *ears = pmf_arlg_grp_get_rls(earg); - char const *ifname = pmf_arlg_rls_get_ifname(ears); - uintptr_t ctrobj = pmf_arlg_cntr_get_objid(eark); + struct gpc_cntg *cntg = gpc_cntr_get_cntg(gprk); + struct gpc_group *gprg = gpc_cntg_get_group(cntg); + struct gpc_rlset *gprs = gpc_group_get_rlset(gprg); + char const *ifname = gpc_rlset_get_ifname(gprs); + uintptr_t ctrobj = gpc_cntr_get_objid(gprk); bool was_created = (ctrobj != FAL_NULL_OBJECT_ID); - char const *ctname = pmf_arlg_cntr_get_name(eark); - bool ingress = pmf_arlg_grp_is_ingress(earg); - bool is_v6 = pmf_arlg_grp_is_v6(earg); - char const *rgname = pmf_arlg_grp_get_name(earg); + char const *ctname = gpc_cntr_get_name(gprk); + bool ingress = gpc_group_is_ingress(gprg); + bool is_v6 = gpc_group_is_v6(gprg); + char const *rgname = gpc_group_get_name(gprg); bool ok = true; char const *ok_str = "SK"; + enum gpc_feature feat = gpc_group_get_feature(gprg); + char const *feat_str = gpc_feature_get_name(feat); int rc = 0; /* Success */ /* Nothing to do if no FAL object - e.g. vrouter */ if (!was_created) goto log_delete; - pmf_hw_commit_needed = true; + gpc_hw_commit_needed = true; rc = fal_acl_delete_counter(ctrobj); if (!rc) - pmf_arlg_cntr_set_objid(eark, FAL_NULL_OBJECT_ID); + gpc_cntr_set_objid(gprk, FAL_NULL_OBJECT_ID); ok = (!rc || (rc == -EOPNOTSUPP && !was_created)); ok_str = ok ? ((!rc) ? "OK" : "UN") : "NO"; @@ -929,8 +1102,8 @@ pmf_hw_counter_delete(struct pmf_cntr *eark) log_delete: if (!ok || DP_DEBUG_ENABLED(NPF)) { ACL_LOG(ok, ACL_HW, - "HW-ACLv%s CT Delete %s/%s|%s:%s [%lx] => %s(%d)\n", - (is_v6) ? "6" : "4", + "HW-GPC(%s)v%s CT Delete %s/%s|%s:%s [%lx] => %s(%d)\n", + feat_str, (is_v6) ? "6" : "4", (ingress) ? " In" : "Out", ifname, rgname, ctname, ctrobj, ok_str, rc); @@ -938,22 +1111,25 @@ pmf_hw_counter_delete(struct pmf_cntr *eark) } bool -pmf_hw_counter_clear(struct pmf_cntr const *eark) +gpc_hw_counter_clear(struct gpc_cntr const *gprk) { - struct pmf_group_ext *earg = pmf_arlg_cntr_get_grp(eark); - struct pmf_rlset_ext *ears = pmf_arlg_grp_get_rls(earg); - char const *ifname = pmf_arlg_rls_get_ifname(ears); - uintptr_t ctrobj = pmf_arlg_cntr_get_objid(eark); + struct gpc_cntg *cntg = gpc_cntr_get_cntg(gprk); + struct gpc_group *gprg = gpc_cntg_get_group(cntg); + struct gpc_rlset *gprs = gpc_group_get_rlset(gprg); + char const *ifname = gpc_rlset_get_ifname(gprs); + uintptr_t ctrobj = gpc_cntr_get_objid(gprk); bool was_created = (ctrobj != FAL_NULL_OBJECT_ID); - char const *ctname = pmf_arlg_cntr_get_name(eark); - bool ingress = pmf_arlg_grp_is_ingress(earg); - bool is_v6 = pmf_arlg_grp_is_v6(earg); - char const *rgname = pmf_arlg_grp_get_name(earg); - bool cnt_pkt = pmf_arlg_cntr_pkt_enabled(eark); - bool cnt_byt = pmf_arlg_cntr_byt_enabled(eark); + char const *ctname = gpc_cntr_get_name(gprk); + bool ingress = gpc_group_is_ingress(gprg); + bool is_v6 = gpc_group_is_v6(gprg); + char const *rgname = gpc_group_get_name(gprg); + bool cnt_pkt = gpc_cntr_pkt_enabled(gprk); + bool cnt_byt = gpc_cntr_byt_enabled(gprk); bool ok = true; char const *ok_str_pkt = "SK"; char const *ok_str_byt = "SK"; + enum gpc_feature feat = gpc_group_get_feature(gprg); + char const *feat_str = gpc_feature_get_name(feat); int rc_pkt = 0, rc_byt = 0; /* Success */ /* Nothing to do if no FAL object - e.g. vrouter */ @@ -987,9 +1163,9 @@ pmf_hw_counter_clear(struct pmf_cntr const *eark) log_clear: if (!ok || DP_DEBUG_ENABLED(NPF)) { ACL_LOG(ok, ACL_HW, - "HW-ACLv%s CT Clr %s/%s|%s:%s [%lx]%s%s =>" + "HW-GPC(%s)v%s CT Clr %s/%s|%s:%s [%lx]%s%s =>" " P:%s(%d) B:%s(%d)\n", - (is_v6) ? "6" : "4", + feat_str, (is_v6) ? "6" : "4", (ingress) ? " In" : "Out", ifname, rgname, ctname, ctrobj, (cnt_pkt) ? " Pkt" : "", @@ -1002,22 +1178,25 @@ pmf_hw_counter_clear(struct pmf_cntr const *eark) } bool -pmf_hw_counter_read(struct pmf_cntr const *eark, +gpc_hw_counter_read(struct gpc_cntr const *gprk, uint64_t *pkts, uint64_t *bytes) { - struct pmf_group_ext *earg = pmf_arlg_cntr_get_grp(eark); - struct pmf_rlset_ext *ears = pmf_arlg_grp_get_rls(earg); - char const *ifname = pmf_arlg_rls_get_ifname(ears); - uintptr_t ctrobj = pmf_arlg_cntr_get_objid(eark); + struct gpc_cntg *cntg = gpc_cntr_get_cntg(gprk); + struct gpc_group *gprg = gpc_cntg_get_group(cntg); + struct gpc_rlset *gprs = gpc_group_get_rlset(gprg); + char const *ifname = gpc_rlset_get_ifname(gprs); + uintptr_t ctrobj = gpc_cntr_get_objid(gprk); bool was_created = (ctrobj != FAL_NULL_OBJECT_ID); - char const *ctname = pmf_arlg_cntr_get_name(eark); - bool ingress = pmf_arlg_grp_is_ingress(earg); - bool is_v6 = pmf_arlg_grp_is_v6(earg); - char const *rgname = pmf_arlg_grp_get_name(earg); - bool cnt_pkt = pmf_arlg_cntr_pkt_enabled(eark); - bool cnt_byt = pmf_arlg_cntr_byt_enabled(eark); + char const *ctname = gpc_cntr_get_name(gprk); + bool ingress = gpc_group_is_ingress(gprg); + bool is_v6 = gpc_group_is_v6(gprg); + char const *rgname = gpc_group_get_name(gprg); + bool cnt_pkt = gpc_cntr_pkt_enabled(gprk); + bool cnt_byt = gpc_cntr_byt_enabled(gprk); bool ok = true; char const *ok_str = "SK"; + enum gpc_feature feat = gpc_group_get_feature(gprg); + char const *feat_str = gpc_feature_get_name(feat); int rc = 0; /* Success */ /* Nothing to do if no FAL object - e.g. vrouter */ @@ -1056,8 +1235,8 @@ pmf_hw_counter_read(struct pmf_cntr const *eark, log_read: if (!ok || DP_DEBUG_ENABLED(NPF)) { ACL_LOG(ok, ACL_HW, - "HW-ACLv%s CT Get %s/%s|%s:%s [%lx]%s%s => %s(%d)\n", - (is_v6) ? "6" : "4", + "HW-GPC(%s)v%s CT Get %s/%s|%s:%s [%lx]%s%s => %s(%d)\n", + feat_str, (is_v6) ? "6" : "4", (ingress) ? " In" : "Out", ifname, rgname, ctname, ctrobj, (cnt_pkt) ? " Pkt" : "", @@ -1074,13 +1253,13 @@ pmf_hw_counter_read(struct pmf_cntr const *eark, /* ---- */ void -pmf_hw_commit(void) +gpc_hw_commit(void) { bool ok = true; int rc = 0; char const *ok_str = "SK"; - if (!pmf_hw_commit_needed) + if (!gpc_hw_commit_needed) goto log_commit; static uint32_t commit_counter; @@ -1104,5 +1283,5 @@ pmf_hw_commit(void) commit_counter, ok_str, rc); } - pmf_hw_commit_needed = false; + gpc_hw_commit_needed = false; } diff --git a/src/npf/config/gpc_hw.h b/src/npf/config/gpc_hw.h new file mode 100644 index 00000000..f7c654d6 --- /dev/null +++ b/src/npf/config/gpc_hw.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef GPC_HW_H +#define GPC_HW_H + +#include +#include + +struct gpc_group; +struct gpc_rule; +struct gpc_cntr; +struct pmf_rule; +struct ifnet; + +bool gpc_hw_rule_add(struct gpc_rule *gprl); +void gpc_hw_rule_mod(struct gpc_rule *gprl, struct pmf_rule *old_rule); +void gpc_hw_rule_del(struct gpc_rule *gprl); + +bool gpc_hw_group_attach(struct gpc_group *gprg, struct ifnet *ifp); +void gpc_hw_group_detach(struct gpc_group *gprg, struct ifnet *ifp); +bool gpc_hw_group_create(struct gpc_group *gprg); +void gpc_hw_group_mod(struct gpc_group *gprg, uint32_t new); +void gpc_hw_group_delete(struct gpc_group *gprg); + +bool gpc_hw_counter_create(struct gpc_cntr *gprk); +void gpc_hw_counter_delete(struct gpc_cntr *gprk); +bool gpc_hw_counter_clear(struct gpc_cntr const *gprk); +bool gpc_hw_counter_read(struct gpc_cntr const *gprk, + uint64_t *pkts, uint64_t *bytes); +void gpc_hw_commit(void); + +#endif /* GPC_HW_H */ diff --git a/src/npf/config/npf_attach_point.c b/src/npf/config/npf_attach_point.c index bcb135f9..6cb30ce0 100644 --- a/src/npf/config/npf_attach_point.c +++ b/src/npf/config/npf_attach_point.c @@ -363,9 +363,9 @@ attach_point_find(enum npf_attach_type attach_type, const char *attach_point) } const struct npf_attpt_key * -npf_attpt_item_key(const struct npf_attpt_item *ap) +npf_attpt_item_key(const struct npf_attpt_item *handle) { - return &ap->ap_key; + return &handle->ap_key; } static size_t @@ -760,12 +760,6 @@ npf_attpt_item_up_fn_context(const struct npf_attpt_item *ap) return ap->ap_fn_context; } -size_t -npf_attpt_item_rls_count(const struct npf_attpt_item *ap) -{ - return attach_point_rls_count(ap); -} - int npf_attpt_item_set_up(enum npf_attach_type attach_type, const char *attach_point, @@ -1009,36 +1003,16 @@ void npf_attpt_walk_rlsets(struct npf_attpt_item *ap, } } -int npf_attpt_group_find(struct npf_attpt_rlset *ars, - enum npf_rule_class group_class, const char *group) -{ - if (!ars || !group_valid(group_class, group)) - return -EINVAL; - - /* Acquire the attached group */ - struct npf_rlgrp_key rg_key = { - .rgk_class = group_class, - .rgk_name = group, - }; - struct ag_handle agh; - struct npf_attpt_group *agr - = attached_group_find(ars, &rg_key, &agh); - if (!agr) - return -ENOENT; - - return 0; -} - struct npf_attpt_rlset * -npf_attpt_group_rlset(const struct npf_attpt_group *rsg) +npf_attpt_group_rlset(const struct npf_attpt_group *handle) { - return rsg->ag_ars; + return handle->ag_ars; } const struct npf_rlgrp_key * -npf_attpt_group_key(const struct npf_attpt_group *rsg) +npf_attpt_group_key(const struct npf_attpt_group *handle) { - return &rsg->ag_key; + return &handle->ag_key; } bool @@ -1053,15 +1027,15 @@ npf_attpt_group_set_extend(struct npf_attpt_group *rsg, void *extend) } void * -npf_attpt_group_get_extend(const struct npf_attpt_group *rsg) +npf_attpt_group_get_extend(const struct npf_attpt_group *handle) { - return rsg->ag_extend; + return handle->ag_extend; } uint32_t -npf_attpt_group_dir_mask(const struct npf_attpt_group *rsg) +npf_attpt_group_dir_mask(const struct npf_attpt_group *handle) { - return rsg->ag_dir_mask; + return handle->ag_dir_mask; } void @@ -1117,6 +1091,7 @@ static const char *npf_attach_type_names[NPF_ATTACH_TYPE_COUNT] = { [NPF_ATTACH_TYPE_GLOBAL] = "global", [NPF_ATTACH_TYPE_QOS] = "qos", [NPF_ATTACH_TYPE_VRF] = "vrf", + [NPF_ATTACH_TYPE_ZONE] = "zone", }; const char *npf_get_attach_type_name(enum npf_attach_type attach_type) diff --git a/src/npf/config/npf_attach_point.h b/src/npf/config/npf_attach_point.h index 14d5303c..2bc981d1 100644 --- a/src/npf/config/npf_attach_point.h +++ b/src/npf/config/npf_attach_point.h @@ -55,6 +55,11 @@ * This is for attaching rules to a VRF id, and the name should be * the id associated wth the VRF. * + * NPF_ATTACH_TYPE_ZONE + * + * This is for attaching rules to a zone pairing, and the name should be of + * the form "FROM_ZONE>TO_ZONE". + * * NPF_ATTACH_TYPE_ALL * * This is used to indicate that there is no associated attach type. It @@ -73,6 +78,7 @@ enum npf_attach_type { NPF_ATTACH_TYPE_GLOBAL, NPF_ATTACH_TYPE_QOS, NPF_ATTACH_TYPE_VRF, + NPF_ATTACH_TYPE_ZONE, NPF_ATTACH_TYPE_COUNT /** This must be the last value */ }; @@ -210,8 +216,8 @@ enum npf_attpt_ev_type { typedef void (npf_attpt_ev_cb)( enum npf_attpt_ev_type event, struct npf_attpt_item *ap, void *data); -int npf_attpt_ev_listen(enum npf_attach_type type, uint32_t notes, - npf_attpt_ev_cb *cb); +int npf_attpt_ev_listen(enum npf_attach_type type, uint32_t events, + npf_attpt_ev_cb *fn); void npf_attpt_ev_notify(enum npf_attpt_ev_type event, struct npf_attpt_item *ap, void *data); @@ -263,15 +269,6 @@ void *npf_attpt_item_up_data_context(const struct npf_attpt_item *ap); npf_attpt_item_fn_ctx *npf_attpt_item_up_fn_context( const struct npf_attpt_item *ap); -/** - * Give the count of the number of rulesets on an attach point - * - * @param handle A pointer to the attach point item - * @return the number of rules - */ -size_t -npf_attpt_item_rls_count(const struct npf_attpt_item *handle); - /** * Set the state of an attach point to up. It can now have configuration * applied to it. @@ -405,18 +402,6 @@ typedef bool (npf_attpt_walk_rlsets_cb)(struct npf_attpt_rlset *ars, void *ctx); void npf_attpt_walk_rlsets( struct npf_attpt_item *ap, npf_attpt_walk_rlsets_cb *fn, void *ctx); -/** - * Lookup a group associated with an attachment point - * - * @param ars This ruleset attached to a point - * @param group_class The class of the rule-group that is being looked up - * @param group The name of the rule-group being looked up. - * @return Returns 0 on successfully finding an entry matching the group, or - * a negative errno on failure. - */ -int npf_attpt_group_find(struct npf_attpt_rlset *ars, - enum npf_rule_class group_class, const char *group); - /** * Return the ruleset a group is attached to * @@ -504,6 +489,6 @@ void *npf_attpt_rlset_get_extend(const struct npf_attpt_rlset *ars); * Set and Get the extension pointer assocated with an attached group */ bool npf_attpt_group_set_extend(struct npf_attpt_group *rsg, void *extend); -void *npf_attpt_group_get_extend(const struct npf_attpt_group *rsg); +void *npf_attpt_group_get_extend(const struct npf_attpt_group *handle); #endif /* NPF_ATTACH_POINT_H */ diff --git a/src/npf/config/npf_auto_attach.c b/src/npf/config/npf_auto_attach.c index 7f5164be..286e210a 100644 --- a/src/npf/config/npf_auto_attach.c +++ b/src/npf/config/npf_auto_attach.c @@ -103,8 +103,8 @@ static int get_auto_attach_point(enum npf_rule_class group_class, fn = npf_auto_attach_fns[group_class]; if (fn) return fn(group_class, group, attach_info); - else - return -ENOENT; + + return -ENOENT; } int npf_cfg_auto_attach_rule_add(enum npf_rule_class group_class, diff --git a/src/npf/config/npf_config.c b/src/npf/config/npf_config.c index 54fb4bce..8586d358 100644 --- a/src/npf/config/npf_config.c +++ b/src/npf/config/npf_config.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -32,32 +32,53 @@ enum npf_commit_type { NPF_COMMIT_DELETE }; +/* + * Free the npf config attach point. + * + * npf_conf - Pointer to structure registered earlier with + * npf_attpt_item_set_up(), which is used to point to rulesets when they are + * in use. This is associated with the attach point. + */ +static void npf_config_release(struct npf_config *npf_conf) +{ + if (!npf_conf) + return; + + npf_conf->nc_attached = false; + + const char *ap = rcu_xchg_pointer(&npf_conf->nc_attach_point, NULL); + if (ap) + free((void *)ap); +} + static void npf_config_free_rcu(struct rcu_head *head) { struct npf_config *npf_conf = caa_container_of(head, struct npf_config, nc_rcu); + npf_config_release(npf_conf); free(npf_conf); } -static int npf_config_default_alloc_free(struct npf_config **npf_confp, - bool alloc) +static int npf_config_default_alloc(struct npf_config **npf_confp) { - struct npf_config *npf_conf; + struct npf_config *npf_conf = calloc(sizeof(*npf_conf), 1); - if (alloc) { - npf_conf = calloc(sizeof(*npf_conf), 1); + if (npf_conf == NULL) + return -ENOMEM; - if (npf_conf == NULL) - return -ENOMEM; + rcu_assign_pointer(*npf_confp, npf_conf); + return 0; +} - rcu_assign_pointer(*npf_confp, npf_conf); - } else { - npf_conf = *npf_confp; +static int npf_config_default_free(struct npf_config **npf_confp) +{ + struct npf_config *npf_conf; + + npf_conf = rcu_xchg_pointer(npf_confp, NULL); - rcu_assign_pointer(*npf_confp, NULL); + if (npf_conf) call_rcu(&npf_conf->nc_rcu, npf_config_free_rcu); - } return 0; } @@ -69,8 +90,15 @@ static int npf_config_alloc(struct npf_config **npf_confp, char *attach_point; int rc; - if (*npf_confp != NULL && (*npf_confp)->nc_attach_point != NULL) - return 0; /* already allocated and associated */ + if (*npf_confp != NULL) { + if ((*npf_confp)->nc_attached) + /* already allocated and associated */ + return 0; + + /* This should not happen. But handle it anyway */ + if ((*npf_confp)->nc_attach_point) + npf_config_release(*npf_confp); + } attach_point = strdup(apk->apk_point); if (attach_point == NULL) @@ -82,7 +110,7 @@ static int npf_config_alloc(struct npf_config **npf_confp, if (npf_attpt_item_fn) rc = npf_attpt_item_fn(npf_confp, true); else - rc = npf_config_default_alloc_free(npf_confp, true); + rc = npf_config_default_alloc(npf_confp); if (rc) { free(attach_point); @@ -91,6 +119,7 @@ static int npf_config_alloc(struct npf_config **npf_confp, (*npf_confp)->nc_attach_type = apk->apk_type; (*npf_confp)->nc_attach_point = attach_point; + (*npf_confp)->nc_attached = true; return 0; } @@ -203,10 +232,7 @@ static void npf_cfg_commit(struct npf_attpt_item *ap, enum npf_commit_type type) /* Mark all the rulesets as clean. */ memset(npf_conf->nc_dirty_rulesets, 0, NPF_RS_TYPE_COUNT); - if (npf_conf->nc_active_flags == 0 && npf_conf->nc_attach_point) { - - free((void *)npf_conf->nc_attach_point); - npf_conf->nc_attach_point = NULL; + if (npf_conf->nc_active_flags == 0 && npf_conf->nc_attached) { npf_attpt_item_fn_ctx *npf_attpt_item_fn = npf_attpt_item_up_fn_context(ap); @@ -214,7 +240,10 @@ static void npf_cfg_commit(struct npf_attpt_item *ap, enum npf_commit_type type) if (npf_attpt_item_fn) npf_attpt_item_fn(npf_conf_p, false); else - npf_config_default_alloc_free(npf_conf_p, false); + npf_config_default_free(npf_conf_p); + + /* Clear nc_attached, and free nc_attach_point if necessary */ + npf_config_release(*npf_conf_p); } } @@ -464,13 +493,28 @@ void npf_show_attach_point_rulesets(json_writer_t *json, if (((rulesets & ruleset_type_bit) != 0) && npf_active(npf_conf, ruleset_type_bit)) { + /* Get the ruleset early; + * show nothing unless there's a ruleset. + */ + const npf_ruleset_t *ruleset = + npf_get_ruleset(npf_conf, ruleset_type); + + if (!ruleset) + continue; + if (!attach_point_json_printed) { + /* Get the attach point early; + * show nothing if it has already been deleted. + */ + const char *ap = rcu_dereference( + npf_conf->nc_attach_point); + if (!ap) + break; jsonw_start_object(json); jsonw_string_field(json, "attach_type", npf_get_attach_type_name( npf_conf->nc_attach_type)); - jsonw_string_field(json, "attach_point", - npf_conf->nc_attach_point); + jsonw_string_field(json, "attach_point", ap); jsonw_name(json, "rulesets"); jsonw_start_array(json); @@ -481,8 +525,7 @@ void npf_show_attach_point_rulesets(json_writer_t *json, npf_get_ruleset_type_name( ruleset_type)); - npf_json_ruleset(npf_get_ruleset(npf_conf, - ruleset_type), json); + npf_json_ruleset(ruleset, json); jsonw_end_object(json); } @@ -565,10 +608,14 @@ static void npf_clear_attach_point_rulesets(struct npf_attpt_item *ap, if (((sel->rulesets & ruleset_type_bit) != 0) && npf_active(npf_conf, ruleset_type_bit)) { - npf_clear_stats( - npf_get_ruleset(npf_conf, ruleset_type), - sel->group_class, sel->group_name, - sel->rule_no); + /* Only clear stats when there's a ruleset. */ + const npf_ruleset_t *ruleset = + npf_get_ruleset(npf_conf, ruleset_type); + + if (ruleset) + npf_clear_stats(ruleset, + sel->group_class, sel->group_name, + sel->rule_no); } } } diff --git a/src/npf/config/npf_config.h b/src/npf/config/npf_config.h index 8a382f4d..a137373a 100644 --- a/src/npf/config/npf_config.h +++ b/src/npf/config/npf_config.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -35,6 +35,7 @@ struct npf_config { unsigned long nc_stateful; bool nc_dirty_rulesets[NPF_RS_TYPE_COUNT]; + bool nc_attached; enum npf_attach_type nc_attach_type; const char *nc_attach_point; struct rcu_head nc_rcu; @@ -54,7 +55,9 @@ enum npf_active_bits { NPF_FW_OUT = BIT(NPF_RS_FW_OUT), NPF_DNAT = BIT(NPF_RS_DNAT), NPF_SNAT = BIT(NPF_RS_SNAT), + NPF_ZONE = BIT(NPF_RS_ZONE), NPF_LOCAL = BIT(NPF_RS_LOCAL), + NPF_ORIGINATE = BIT(NPF_RS_ORIGINATE), NPF_BRIDGE = BIT(NPF_RS_BRIDGE), NPF_IPSEC = BIT(NPF_RS_IPSEC), NPF_PBR = BIT(NPF_RS_PBR), @@ -75,16 +78,15 @@ enum npf_active_bits { NPF_IF_SESSION = BIT(NPF_RS_TYPE_COUNT+3), /* All causes for calling npf_hook_track() */ - NPF_V4_TRACK_IN = NPF_FW_STATE_OUT | - NPF_FW_IN | NPF_DNAT | NPF_NAT46, + NPF_V4_TRACK_IN = NPF_FW_STATE_OUT | NPF_FW_IN | NPF_DNAT, NPF_V4_TRACK_OUT = NPF_FW_STATE_IN | - NPF_FW_OUT | NPF_SNAT, - NPF_V6_TRACK_IN = NPF_FW_STATE_OUT | - NPF_FW_IN | NPF_NAT64, - NPF_V6_TRACK_OUT = NPF_FW_STATE_IN | NPF_FW_OUT, + NPF_FW_OUT | NPF_ZONE | NPF_SNAT, + NPF_V6_TRACK_IN = NPF_FW_STATE_OUT | NPF_FW_IN, + NPF_V6_TRACK_OUT = NPF_FW_STATE_IN | + NPF_FW_OUT | NPF_ZONE, }; -#define NAT64_OR_NAT46(_eth_type) ((_eth_type == htons(ETHER_TYPE_IPv4)) ? \ +#define NAT64_OR_NAT46(_eth_type) ((_eth_type == htons(RTE_ETHER_TYPE_IPV4)) ? \ NPF_NAT46 : NPF_NAT64) /** diff --git a/src/npf/config/npf_config_state.c b/src/npf/config/npf_config_state.c index abe51d06..0cbf507d 100644 --- a/src/npf/config/npf_config_state.c +++ b/src/npf/config/npf_config_state.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -30,10 +30,12 @@ * gettree /service/nat json | python -mjson.tool * gettree /interfaces/dataplane/dp0p1s1/firewall json | python -mjson.tool * gettree /interfaces/dataplane/dp0p1s1/policy json | python -mjson.tool + * gettree /security/zone-policy json | python -mjson.tool * * As well as invoking the script directly: * * /opt/vyatta/sbin/npf-get-state-pbr + * /opt/vyatta/sbin/npf-get-state-zones * /opt/vyatta/sbin/npf-get-state-fw * /opt/vyatta/sbin/npf-get-state-nat * @@ -42,6 +44,7 @@ * npf-op state all: fw-in fw-out bridge local * npf-op state all: nat64 snat dnat * npf-op state all: pbr + * npf-op state all: zone */ @@ -61,6 +64,10 @@ static const struct npf_rs_state_subtree { .subtree = {"firewall", "state", "local"}, .subtree_count = 3, }, + [NPF_RS_ORIGINATE] = { + .subtree = {"firewall", "state", "originate"}, + .subtree_count = 3, + }, [NPF_RS_BRIDGE] = { .subtree = {"firewall", "state", "l2"}, .subtree_count = 3, @@ -81,6 +88,9 @@ static const struct npf_rs_state_subtree { .subtree = {"ipv6-to-ipv4", NULL, NULL}, .subtree_count = 1, }, + [NPF_RS_ZONE] = { + .subtree_count = 0, + }, [NPF_RS_IPSEC] = { .subtree_count = 0, }, @@ -119,6 +129,8 @@ struct npf_ruleset_state_ctx { struct ruleset_select *sel; /* Set false after first ruleset has been added to json */ bool first; + /* Zones use this to store current inout zone name */ + char *name; enum npf_ruleset_type rs_type; }; @@ -177,8 +189,8 @@ static bool npf_show_state_rule_cb(npf_rule_t *rl, void *ctx) /* * Callback for each ruleset of a given type on an attach-point. Used for - * interface types (fw-in, fw-out, local, bridge and pbr). (nat uses a - * different callback function) + * interface types (fw-in, fw-out, local, bridge and pbr) and zones. (nat + * uses a different callback function) */ static bool npf_show_state_ruleset_cb(npf_rule_group_t *rg, void *ctx) { @@ -262,6 +274,17 @@ static void npf_show_state_intf_rs(json_writer_t *json, if (!npf_active(npf_conf, rs_type_bit)) return; + /* Get the ruleset early; + * show nothing unless there's a ruleset. + */ + const npf_ruleset_t *ruleset = npf_get_ruleset(npf_conf, rs_type); + + if (!ruleset) + return; + + if (!npf_conf->nc_attached) + return; + /* * Only start the outer json array once we know there is at least one * ruleset. @@ -316,8 +339,6 @@ static void npf_show_state_intf_rs(json_writer_t *json, npf_show_state_subtree_start(json, rs_type); - const npf_ruleset_t *ruleset = npf_get_ruleset(npf_conf, rs_type); - jsonw_name(json, "name"); jsonw_start_array(json); @@ -359,7 +380,7 @@ npf_show_state_intf(json_writer_t *json, enum npf_ruleset_type rs_type; ulong rulesets = info->sel->rulesets; - /* fw-in, fw-out, local, bridge and/or pbr */ + /* fw-in, fw-out, local, bridge, originate and/or pbr */ for (rs_type = 0; rs_type < NPF_RS_TYPE_COUNT; rs_type++) if ((rulesets & BIT(rs_type)) != 0) npf_show_state_intf_rs(json, npf_conf, rs_type, @@ -391,6 +412,7 @@ npf_show_ruleset_state_intf(json_writer_t *json, struct ruleset_select *sel) .json = json, .sel = sel, .first = true, + .name = NULL, }; jsonw_pretty(json, true); @@ -481,8 +503,12 @@ npf_show_state_nat_rs(json_writer_t *json __unused, if (!npf_active(npf_conf, rs_type_bit)) return; + /* Show nothing unless there's a ruleset. */ const npf_ruleset_t *ruleset = npf_get_ruleset(npf_conf, rs_type); + if (!ruleset) + return; + npf_ruleset_group_walk(ruleset, info->sel, npf_show_state_nat_ruleset_cb, info); } @@ -526,6 +552,7 @@ npf_show_ruleset_state_nat(json_writer_t *json, struct ruleset_select *sel) .json = json, .sel = sel, .first = true, + .name = NULL, }; jsonw_pretty(json, true); @@ -562,14 +589,227 @@ npf_show_ruleset_state_nat(json_writer_t *json, struct ruleset_select *sel) return 0; } +/************************************************************************ + * Zones + * + * Zone attach point names are a mashup of "FROM_ZONE>TO_ZONE", e.g. + * ZONE1>ZONE2, ZONE1>ZONE3, ZONE2>ZONE1 etc. + * + * This is formatted differently so we end up with this format: + * + * { + * "zone": [ + * { + * "input-zone-name": "ZONE1", + * "to": [ + * { + * "output-zone-name": "ZONE2", + * "name": { + * + * ] + * }, + * { + * "output-zone-name": "ZONE3", + * "name": { + * + * ] + * } + * ] + * }, + * { + * "input-zone-name": "ZONE2", + * "to": [ + * ] + * } + * ] + * } + */ + +/* + * Start input zone object + */ +static void +npf_zone_state_input_zone_start(json_writer_t *json, + struct npf_ruleset_state_ctx *info, + char *input_zone) +{ + /* + * Store the input zone name in the ctx structure until we end the + * input zone + */ + info->name = input_zone; + + /* start input zone */ + jsonw_start_object(json); + jsonw_string_field(json, "input-zone-name", input_zone); + + /* start 'to' array */ + jsonw_name(json, "to"); + jsonw_start_array(json); +} + +/* + * End input zone object + */ +static void +npf_zone_state_input_zone_end(json_writer_t *json, + struct npf_ruleset_state_ctx *info) +{ + /* end 'to' array */ + jsonw_end_array(json); + + /* end input zone */ + jsonw_end_object(json); + + /* We are finished with the input zone name */ + free(info->name); + info->name = NULL; +} + +static void npf_show_state_zone_rs(json_writer_t *json, + struct npf_config *npf_conf, + struct npf_ruleset_state_ctx *info) +{ + enum npf_ruleset_type rs_type = NPF_RS_ZONE; + unsigned long rs_type_bit = BIT(rs_type); + char *p, *input_zone, *output_zone; + bool free_input_zone = false; + + if (!npf_active(npf_conf, rs_type_bit)) + return; + + /* Get the ruleset early; + * show nothing unless there's a ruleset. + */ + const npf_ruleset_t *ruleset = npf_get_ruleset(npf_conf, rs_type); + + if (!ruleset) + return; + + if (!npf_conf->nc_attached) + return; + + if (info->first) { + jsonw_name(json, "zone"); + jsonw_start_array(json); + info->first = false; + } + + /* + * Split attach point string to get 'from' and 'to' zone names + */ + input_zone = strdup(npf_conf->nc_attach_point); + if (!input_zone) + return; + + /* Attach point name is of the form "PRIVATE>PUBLIC" */ + p = strstr(input_zone, ">"); + if (!p) { + free(input_zone); + return; + } + *p = '\0'; + output_zone = p+1; + + /* End previous input zone object? */ + if (info->name && strcmp(input_zone, info->name) != 0) + npf_zone_state_input_zone_end(json, info); + + /* New input zone object? */ + if (!info->name) + /* Yes. 'input_zone' is stored in 'info' structure */ + npf_zone_state_input_zone_start(json, info, input_zone); + else + /* No. Remember to free 'input_zone' later on */ + free_input_zone = true; + + jsonw_start_object(json); + jsonw_string_field(json, "output-zone-name", output_zone); + + jsonw_name(json, "name"); + jsonw_start_array(json); + + npf_ruleset_group_walk(ruleset, info->sel, + npf_show_state_ruleset_cb, info); + + jsonw_end_array(json); + + jsonw_end_object(json); /* output-zone-name */ + + if (free_input_zone) + free(input_zone); +} + +static void +npf_show_state_zone(json_writer_t *json, struct npf_attpt_item *ap, + struct npf_ruleset_state_ctx *info) +{ + struct npf_config **npf_conf_p = npf_attpt_item_up_data_context(ap); + if (!npf_conf_p) + return; + + struct npf_config *npf_conf = *npf_conf_p; + if (!npf_conf) + return; + + npf_show_state_zone_rs(json, npf_conf, info); +} + +static bool +npf_show_state_zone_cb(struct npf_attpt_item *ap, void *ctx) +{ + struct npf_ruleset_state_ctx *info = ctx; + + npf_show_state_zone(info->json, ap, info); + return true; +} + +/* + * npf_show_ruleset_state_zone + */ +static int +npf_show_ruleset_state_zone(json_writer_t *json, struct ruleset_select *sel) +{ + struct npf_ruleset_state_ctx info = { + .json = json, + .sel = sel, + .first = true, + .name = NULL, /* Used for input_zone_name */ + }; + + jsonw_pretty(json, true); + + if (sel->attach_type == NPF_ATTACH_TYPE_ALL) { + npf_attpt_item_walk_up(npf_show_state_zone_cb, &info); + } else { + struct npf_attpt_item *ap; + if (npf_attpt_item_find_up(sel->attach_type, + sel->attach_point, &ap) >= 0) { + npf_show_state_zone(json, ap, &info); + } + } + + /* Was an inout zone started? */ + if (info.name) + npf_zone_state_input_zone_end(json, &info); + + /* Was zone array started? */ + if (!info.first) + jsonw_end_array(json); + + return 0; +} + /* * npf_show_ruleset_state * * Returned state is dependent on ruleset type, which reflects where the * corresponding config exists in the tree. */ -#define RULESET_INTF (NPF_FW_IN | NPF_FW_OUT | NPF_BRIDGE | NPF_LOCAL | NPF_PBR) +#define RULESET_INTF (NPF_FW_IN | NPF_FW_OUT | NPF_BRIDGE | NPF_LOCAL \ + | NPF_PBR | NPF_ORIGINATE) #define RULESET_NAT (NPF_SNAT | NPF_DNAT | NPF_NAT64 | NPF_NAT46) +#define RULESET_ZONE (NPF_ZONE) int npf_show_ruleset_state(FILE *fp, struct ruleset_select *sel) @@ -591,6 +831,11 @@ npf_show_ruleset_state(FILE *fp, struct ruleset_select *sel) goto done; } + if ((sel->rulesets & RULESET_ZONE) != 0) { + npf_show_ruleset_state_zone(json, sel); + goto done; + } + done: jsonw_destroy(&json); return 0; diff --git a/src/npf/config/npf_config_state.h b/src/npf/config/npf_config_state.h index 7a49bba0..b09a47cb 100644 --- a/src/npf/config/npf_config_state.h +++ b/src/npf/config/npf_config_state.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ diff --git a/src/npf/config/npf_dump.h b/src/npf/config/npf_dump.h index 8c7adda6..2a36a502 100644 --- a/src/npf/config/npf_dump.h +++ b/src/npf/config/npf_dump.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * diff --git a/src/npf/config/npf_gen_ruleset.c b/src/npf/config/npf_gen_ruleset.c index d77b2491..68206514 100644 --- a/src/npf/config/npf_gen_ruleset.c +++ b/src/npf/config/npf_gen_ruleset.c @@ -23,6 +23,7 @@ #include "npf/npf_ruleset.h" #include "urcu.h" #include "vplane_log.h" +#include "npf_grouper.h" struct npf_attpt_group; struct npf_attpt_rlset; @@ -30,16 +31,32 @@ struct npf_attpt_rlset; void npf_replace_ruleset(npf_ruleset_t **dp_ruleset, npf_ruleset_t *new_dp_ruleset) { - npf_ruleset_t *old_dp_ruleset; + npf_ruleset_t *old_dp_ruleset = *dp_ruleset; - old_dp_ruleset = rcu_xchg_pointer(dp_ruleset, new_dp_ruleset); + if (new_dp_ruleset && old_dp_ruleset) + npf_ref_stats(old_dp_ruleset, new_dp_ruleset); + + rcu_xchg_pointer(dp_ruleset, new_dp_ruleset); /* Perform cleanup on the old ruleset */ - if (old_dp_ruleset) { - if (new_dp_ruleset) - npf_copy_stats(old_dp_ruleset, new_dp_ruleset); + if (old_dp_ruleset) npf_ruleset_free(old_dp_ruleset); +} + +static bool +npf_cfg_group_acl_rule_cb(void *param, struct npf_cfg_rule_walk_state *state) +{ + npf_rule_group_t *rg = param; + int ret; + + /* ACLs use this rule for group attributes */ + if (state->index == UINT32_MAX) { + ret = npf_parse_group_acl_rule(rg, state->rule); + if (ret) + return false; } + + return true; } struct create_ruleset_info { @@ -63,7 +80,8 @@ npf_cfg_create_ruleset_group_rule_cb(void *param, info->rgk->rgk_class == NPF_RULE_CLASS_ACL) return true; - ret = npf_make_rule(info->dp_rule_group, state->index, state->rule); + ret = npf_make_rule(info->dp_rule_group, state->index, state->rule, + info->ruleset_type_flags); if (ret) { info->error = ret; return false; @@ -98,7 +116,20 @@ npf_cfg_create_ruleset_group_cb(const struct npf_attpt_group *rsg, void *ctx) return false; } - npf_grouper_init(rg); + /* + * Look for any rules that contain rule group info, e.g. ACL rule 0. + * This must happen before the grouper is setup. + */ + if (rgk->rgk_class == NPF_RULE_CLASS_ACL) + npf_cfg_rule_group_walk(rgk->rgk_class, rgk->rgk_name, rg, + npf_cfg_group_acl_rule_cb); + + uint32_t count = npf_cfg_rule_count(rgk->rgk_class, rgk->rgk_name); + + /* Initialize the grouper before creating the rules */ + info->error = npf_match_setup(rg, count); + if (info->error) + return false; info->dp_rule_group = rg; info->rgk = rgk; @@ -112,7 +143,7 @@ npf_cfg_create_ruleset_group_cb(const struct npf_attpt_group *rsg, void *ctx) /* no rules in the group or does no exist, so discard it */ npf_free_group(rg); else - npf_grouper_optimize(rg); + npf_match_optimize(rg); info->num_rules_in_group = 0; info->dp_rule_group = NULL; diff --git a/src/npf/config/npf_gen_ruleset.h b/src/npf/config/npf_gen_ruleset.h index 8093c6db..28ef4e74 100644 --- a/src/npf/config/npf_gen_ruleset.h +++ b/src/npf/config/npf_gen_ruleset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016-2017 by Brocade Communications Systems, Inc. * All rights reserved. * diff --git a/src/npf/config/npf_ruleset_type.c b/src/npf/config/npf_ruleset_type.c index 1a72a9d6..7f3b4039 100644 --- a/src/npf/config/npf_ruleset_type.c +++ b/src/npf/config/npf_ruleset_type.c @@ -50,7 +50,8 @@ static const struct npf_ruleset_features { .name = "fw-out", .flags = NPF_RS_FLAG_DIR_OUT | NPF_RS_FLAG_APP_FW | NPF_RS_FLAG_FEAT_INTF, - .feat_flags = IF_FEAT_FLAG_DEFRAG | IF_FEAT_FLAG_FW, + .feat_flags = IF_FEAT_FLAG_DEFRAG | IF_FEAT_FLAG_DEFRAG_SPATH | + IF_FEAT_FLAG_FW, .log_level = RTE_LOGTYPE_FIREWALL, .log_name = "fw", }, @@ -64,10 +65,20 @@ static const struct npf_ruleset_features { [NPF_RS_SNAT] = { .name = "snat", .flags = NPF_RS_FLAG_DIR_OUT | NPF_RS_FLAG_FEAT_INTF, - .feat_flags = IF_FEAT_FLAG_DEFRAG | IF_FEAT_FLAG_FW, + .feat_flags = IF_FEAT_FLAG_DEFRAG | IF_FEAT_FLAG_DEFRAG_SPATH | + IF_FEAT_FLAG_FW, .log_level = RTE_LOGTYPE_FIREWALL, .log_name = "snat", }, + [NPF_RS_ZONE] = { + .name = "zone", + .flags = NPF_RS_FLAG_DIR_OUT | NPF_RS_FLAG_APP_FW + | NPF_RS_FLAG_FEAT_INTF_ALL, + .feat_flags = IF_FEAT_FLAG_DEFRAG | IF_FEAT_FLAG_DEFRAG_SPATH | + IF_FEAT_FLAG_FW, + .log_level = RTE_LOGTYPE_FIREWALL, + .log_name = "fw", + }, [NPF_RS_LOCAL] = { .name = "local", .flags = NPF_RS_FLAG_NOTRACK | NPF_RS_FLAG_DIR_IN, @@ -75,6 +86,14 @@ static const struct npf_ruleset_features { .log_level = RTE_LOGTYPE_FIREWALL, .log_name = "local", }, + [NPF_RS_ORIGINATE] = { + .name = "originate", + .flags = NPF_RS_FLAG_NOTRACK | NPF_RS_FLAG_DIR_OUT + | NPF_RS_FLAG_FEAT_INTF | NPF_RS_FLAG_FEAT_GBL, + .feat_flags = IF_FEAT_FLAG_DEFRAG_SPATH | IF_FEAT_FLAG_FW_ORIG, + .log_level = RTE_LOGTYPE_FIREWALL, + .log_name = "originate", + }, [NPF_RS_BRIDGE] = { .name = "bridge", .flags = NPF_RS_FLAG_NOTRACK | NPF_RS_FLAG_DIR_IN, @@ -85,7 +104,8 @@ static const struct npf_ruleset_features { [NPF_RS_IPSEC] = { .name = "ipsec", .flags = NPF_RS_FLAG_DIR_OUT | NPF_RS_FLAG_DIR_IN - | NPF_RS_FLAG_NOTRACK | NPF_RS_FLAG_NOTABLES, + | NPF_RS_FLAG_NOTRACK | NPF_RS_FLAG_NOTABLES + | NPF_RS_FLAG_NO_STATS | NPF_RS_FLAG_HASH_TBL, .feat_flags = 0, .log_level = RTE_LOGTYPE_DATAPLANE, .log_name = "IPsec", @@ -108,16 +128,16 @@ static const struct npf_ruleset_features { [NPF_RS_NAT64] = { .name = "nat64", .flags = NPF_RS_FLAG_NOTABLES | NPF_RS_FLAG_DIR_IN - | NPF_RS_FLAG_FEAT_GBL, - .feat_flags = IF_FEAT_FLAG_DEFRAG | IF_FEAT_FLAG_FW, + | NPF_RS_FLAG_FEAT_INTF_ALL, + .feat_flags = IF_FEAT_FLAG_DEFRAG | IF_FEAT_FLAG_NAT64, .log_level = RTE_LOGTYPE_FIREWALL, .log_name = "nat64", }, [NPF_RS_NAT46] = { .name = "nat46", .flags = NPF_RS_FLAG_NOTABLES | NPF_RS_FLAG_DIR_IN - | NPF_RS_FLAG_FEAT_GBL, - .feat_flags = IF_FEAT_FLAG_DEFRAG | IF_FEAT_FLAG_FW, + | NPF_RS_FLAG_FEAT_INTF_ALL, + .feat_flags = IF_FEAT_FLAG_DEFRAG | IF_FEAT_FLAG_NAT64, .log_level = RTE_LOGTYPE_FIREWALL, .log_name = "nat46", }, diff --git a/src/npf/config/npf_ruleset_type.h b/src/npf/config/npf_ruleset_type.h index f82ec3f9..d4d9e49c 100644 --- a/src/npf/config/npf_ruleset_type.h +++ b/src/npf/config/npf_ruleset_type.h @@ -18,6 +18,8 @@ enum npf_ruleset_type { NPF_RS_FW_OUT, NPF_RS_DNAT, NPF_RS_SNAT, + NPF_RS_ZONE, + NPF_RS_ORIGINATE, NPF_RS_LOCAL, NPF_RS_BRIDGE, NPF_RS_IPSEC, @@ -39,6 +41,10 @@ enum npf_ruleset_type { /** * The following are flags associated with the different rule types to * give the capabilities of the rules. + * + * NPF_RS_FLAG_FEAT_INTF - Attached to one intf, enabled on one intf + * NPF_RS_FLAG_FEAT_INTF_ALL - Attached to one intf, enabled on all intfs + * NPF_RS_FLAG_FEAT_GBL - Attached to global, enabled on all intfs */ enum npf_rs_flag { NPF_RS_FLAG_NOTRACK = 1 << 0, /* not tracking state */ @@ -47,7 +53,13 @@ enum npf_rs_flag { NPF_RS_FLAG_DIR_OUT = 1 << 3, /* rules are for OUT */ NPF_RS_FLAG_APP_FW = 1 << 4, /* rules may use app firewall */ NPF_RS_FLAG_FEAT_INTF = 1 << 5, /* feats enabled per intf */ - NPF_RS_FLAG_FEAT_GBL = 1 << 6, /* feats enabled on all intfs */ + NPF_RS_FLAG_FEAT_INTF_ALL = 1 << 6, /* feats enabled on all intfs */ + NPF_RS_FLAG_FEAT_GBL = 1 << 7, /* feats enabled on all intfs */ + NPF_RS_FLAG_NO_STATS = 1 << 8, /* no stats allocated per rule */ + NPF_RS_FLAG_HASH_TBL = 1 << 9, /* hash table linkage for rules in + * addition to list to enable + * faster lookup + */ }; /** diff --git a/src/npf/config/pmf_att_rlgrp.c b/src/npf/config/pmf_att_rlgrp.c index 44a8cd15..ec2ac868 100644 --- a/src/npf/config/pmf_att_rlgrp.c +++ b/src/npf/config/pmf_att_rlgrp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -14,548 +14,336 @@ #include "vplane_log.h" #include "if_var.h" +#include "npf/config/gpc_cntr_query.h" +#include "npf/config/gpc_cntr_control.h" +#include "npf/config/gpc_db_control.h" +#include "npf/config/gpc_db_query.h" #include "npf/config/pmf_rule.h" #include "npf/config/pmf_att_rlgrp.h" #include "npf/config/npf_attach_point.h" #include "npf/config/npf_rule_group.h" -#include "npf/config/pmf_hw.h" +#include "npf/config/gpc_hw.h" #include "dp_event.h" #define CNTR_NAME_LEN 8 -enum pmf_eark_flags { - PMF_EARKF_PUBLISHED = (1 << 0), - PMF_EARKF_LL_CREATED = (1 << 1), - PMF_EARKF_CNT_PACKET = (1 << 2), - PMF_EARKF_CNT_BYTE = (1 << 3), -}; - -struct pmf_cntr { - struct pmf_group_ext *eark_group; - char eark_name[CNTR_NAME_LEN]; - uintptr_t eark_objid; /* FAL object */ - uint16_t eark_flags; -}; - -enum pmf_earl_flags { - PMF_EARLF_PUBLISHED = (1 << 0), - PMF_EARLF_LL_CREATED = (1 << 1), -}; - -struct pmf_attrl { - TAILQ_ENTRY(pmf_attrl) earl_list; - struct pmf_group_ext *earl_group; - struct pmf_rule *earl_rule; - struct pmf_cntr earl_cntr; - uintptr_t earl_objid; /* FAL object */ - uint16_t earl_index; - uint16_t earl_flags; -}; - enum pmf_earg_flags { - PMF_EARGF_PUBLISHED = (1 << 0), - PMF_EARGF_ATTACHED = (1 << 1), - PMF_EARGF_DEFERRED = (1 << 3), - PMF_EARGF_RULE_ATTR = (1 << 4), - PMF_EARGF_FAMILY = (1 << 5), - PMF_EARGF_V6 = (1 << 6), - PMF_EARGF_LL_CREATED = (1 << 7), - PMF_EARGF_LL_ATTACHED = (1 << 8), + PMF_EARGF_RULE_ATTR = (1 << 0), }; struct pmf_group_ext { - TAILQ_ENTRY(pmf_group_ext) earg_list; - TAILQ_HEAD(pmf_rlqh, pmf_attrl) earg_rules; + struct gpc_group *earg_gprg; /* strong */ struct npf_attpt_group *earg_base; - struct pmf_rlset_ext *earg_rlset; - struct pmf_attrl *earg_rlattr; - char const *earg_rgname; /* weak */ - uintptr_t earg_objid; /* FAL object */ - uint32_t earg_summary; + struct pmf_rule *earg_attr_rule; uint32_t earg_num_rules; uint32_t earg_flags; }; -enum pmf_ears_flags { - PMF_EARSF_IN = (1 << 0), - PMF_EARSF_IFP = (1 << 1), - PMF_EARSF_IF_CREATED = (1 << 2), -}; - -struct pmf_rlset_ext { - TAILQ_ENTRY(pmf_rlset_ext) ears_list; - TAILQ_HEAD(, pmf_group_ext) ears_groups; - struct npf_attpt_rlset *ears_base; - char const *ears_ifname; /* weak */ - struct ifnet *ears_ifp; - uint32_t ears_flags; -}; - /* ---- */ -static TAILQ_HEAD(, pmf_rlset_ext) att_rlsets - = TAILQ_HEAD_INITIALIZER(att_rlsets); - static bool deferrals; -/* ---- */ - -uint16_t -pmf_arlg_attrl_get_index(struct pmf_attrl const *earl) -{ - return earl->earl_index; -} +static bool commit_pending; -struct pmf_rule const * -pmf_arlg_attrl_get_rule(struct pmf_attrl const *earl) -{ - return earl->earl_rule; -} - -struct pmf_group_ext * -pmf_arlg_attrl_get_grp(struct pmf_attrl const *earl) -{ - return earl->earl_group; -} +/* ---- */ -struct pmf_cntr * -pmf_arlg_attrl_get_cntr(struct pmf_attrl *earl) +void * +pmf_arlg_earg_get_attr_rule(void *earg_ptr) { - struct pmf_cntr *eark = &earl->earl_cntr; - - if (!(eark->eark_flags & PMF_EARKF_PUBLISHED)) + struct pmf_group_ext *earg = earg_ptr; + if (!earg) return NULL; - return eark; -} - -uintptr_t -pmf_arlg_attrl_get_objid(struct pmf_attrl const *earl) -{ - return earl->earl_objid; -} - -void -pmf_arlg_attrl_set_objid(struct pmf_attrl *earl, uintptr_t objid) -{ - earl->earl_objid = objid; -} - -struct pmf_group_ext * -pmf_arlg_cntr_get_grp(struct pmf_cntr const *eark) -{ - return eark->eark_group; -} - -uintptr_t -pmf_arlg_cntr_get_objid(struct pmf_cntr const *eark) -{ - if (!eark) - return 0; - - return eark->eark_objid; -} - -void -pmf_arlg_cntr_set_objid(struct pmf_cntr *eark, uintptr_t objid) -{ - eark->eark_objid = objid; -} - -char const * -pmf_arlg_cntr_get_name(struct pmf_cntr const *eark) -{ - return eark->eark_name; -} - -bool -pmf_arlg_cntr_pkt_enabled(struct pmf_cntr const *eark) -{ - return (eark->eark_flags & PMF_EARKF_CNT_PACKET); -} - -bool -pmf_arlg_cntr_byt_enabled(struct pmf_cntr const *eark) -{ - return (eark->eark_flags & PMF_EARKF_CNT_BYTE); -} - - -char const * -pmf_arlg_grp_get_name(struct pmf_group_ext const *earg) -{ - return earg->earg_rgname; -} + uint32_t rg_flags = earg->earg_flags; + if (!(rg_flags & PMF_EARGF_RULE_ATTR)) + return NULL; -struct pmf_rlset_ext * -pmf_arlg_grp_get_rls(struct pmf_group_ext const *earg) -{ - return earg->earg_rlset; + return earg->earg_attr_rule; } uint32_t -pmf_arlg_grp_get_summary(struct pmf_group_ext const *earg) +pmf_arlg_earg_get_rule_count(void *earg_ptr) { - return earg->earg_summary; -} - -bool -pmf_arlg_grp_is_v6(struct pmf_group_ext const *earg) -{ - bool is_v6 = (earg->earg_flags & PMF_EARGF_V6); + struct pmf_group_ext *earg = earg_ptr; + if (!earg) + return 0; - return is_v6; + return earg->earg_num_rules; } -bool -pmf_arlg_grp_is_ingress(struct pmf_group_ext const *earg) +static bool +pmf_arlg_rule_needs_cntr(struct gpc_cntg const *cntg, + struct pmf_rule const *rule) { - struct pmf_rlset_ext *ears = earg->earg_rlset; + enum gpc_cntr_type type = gpc_cntg_type(cntg); - bool is_ingress = (ears->ears_flags & PMF_EARSF_IN); - - return is_ingress; -} - -bool -pmf_arlg_grp_is_ll_attached(struct pmf_group_ext const *earg) -{ - bool ll_attached = (earg->earg_flags & PMF_EARGF_LL_ATTACHED); + switch (type) { + case GPC_CNTT_NUMBERED: + return true; + case GPC_CNTT_NAMED: + break; + default: + return false; + } - return ll_attached; -} + if (!(rule->pp_summary & PMF_RAS_COUNT_REF)) + return false; -uintptr_t -pmf_arlg_grp_get_objid(struct pmf_group_ext const *earg) -{ - return earg->earg_objid; + return true; } -void -pmf_arlg_grp_set_objid(struct pmf_group_ext *earg, uintptr_t objid) +static struct gpc_cntr * +pmf_arlg_rule_get_cntr(struct gpc_cntg *cntg, + struct pmf_rule const *rule, + uint32_t rl_number) { - earg->earg_objid = objid; -} + enum gpc_cntr_type type = gpc_cntg_type(cntg); + struct gpc_cntr *cntr = NULL; + + if (type == GPC_CNTT_NUMBERED) + cntr = gpc_cntr_create_numbered(cntg, rl_number); + else if (type == GPC_CNTT_NAMED) { + /* This needs to be done better */ + if (rule->pp_summary & PMF_RAS_PASS) + cntr = gpc_cntr_find_and_retain(cntg, "accept"); + else if (rule->pp_summary & PMF_RAS_DROP) + cntr = gpc_cntr_find_and_retain(cntg, "drop"); + } -char const * -pmf_arlg_rls_get_ifname(struct pmf_rlset_ext const *ears) -{ - return ears->ears_ifname; + return cntr; } -/* ---- */ - /* - * Recalculate this after a difficult change, generally - * a rule deletion, or rule change. + * The logic in here should really be based upon the names extracted + * as part of the rproc. */ -static uint32_t -pmf_arlg_recalc_summary(struct pmf_group_ext *earg, struct pmf_rule *rule) -{ - uint32_t group_summary = 0; - -#define RLATTR_SUMMARY_MASK (PMF_RMS_IP_FAMILY|PMF_RAS_COUNT_DEF) - if (rule) - group_summary |= rule->pp_summary & RLATTR_SUMMARY_MASK; - - struct pmf_attrl *earl; - TAILQ_FOREACH(earl, &earg->earg_rules, earl_list) - group_summary |= earl->earl_rule->pp_summary; - - return group_summary; -} - -/* ---- */ - -static void -pmf_alrg_hw_ntfy_grp_create(struct pmf_group_ext *earg, struct pmf_rule *rule) -{ - if (earg->earg_flags & PMF_EARGF_PUBLISHED) - return; - if (!(earg->earg_flags & PMF_EARGF_FAMILY)) - return; - if (earg->earg_flags & PMF_EARGF_DEFERRED) - return; - - /* Recalculate summary before publish */ - uint32_t summary = pmf_arlg_recalc_summary(earg, rule); - earg->earg_summary = summary; - - if (pmf_hw_group_create(earg)) - earg->earg_flags |= PMF_EARGF_LL_CREATED; - - earg->earg_flags |= PMF_EARGF_PUBLISHED; -} - -static void -pmf_alrg_hw_ntfy_grp_delete(struct pmf_group_ext *earg) -{ - if (!(earg->earg_flags & PMF_EARGF_PUBLISHED)) - return; - - pmf_hw_group_delete(earg); - - /* Rules summary cleared to optimise rule delete */ - earg->earg_summary = 0; - - earg->earg_flags &= ~(PMF_EARGF_PUBLISHED|PMF_EARGF_LL_CREATED); -} - static void -pmf_alrg_hw_ntfy_grp_summary_mod(struct pmf_group_ext *earg, uint32_t new) +pmf_arlg_rule_create_cntg_rules(struct gpc_group *gprg, + struct gpc_cntg *cntg, + struct pmf_rule const *attr_rule) { - if (!(earg->earg_flags & PMF_EARGF_PUBLISHED)) - return; - - if (new == earg->earg_summary) - return; - - pmf_hw_group_mod(earg, new); + struct gpc_cntr *cntr = NULL; + char const *cntr_name; + + /* What do we need? */ + bool const need_accept = attr_rule->pp_summary & PMF_RAS_COUNT_DEF_PASS; + bool const need_drop = attr_rule->pp_summary & PMF_RAS_COUNT_DEF_DROP; + + /* Have we got "accept"? */ + bool got_accept = false; + cntr = gpc_cntr_find_and_retain(cntg, "accept"); + got_accept = !!cntr; + if (cntr) + gpc_cntr_release(cntr); + + /* Have we got "drop"? */ + bool got_drop = false; + cntr = gpc_cntr_find_and_retain(cntg, "drop"); + got_drop = !!cntr; + if (cntr) + gpc_cntr_release(cntr); + + /* Make "accept" if needed and not present */ + if (need_accept && !got_accept) { + cntr_name = "accept"; + cntr = gpc_cntr_create_named(cntg, cntr_name); + if (!cntr) { +cntr_error: + ;/* semi-colon for goto target */ + struct gpc_rlset *gprs = gpc_group_get_rlset(gprg); + bool dir_in = gpc_rlset_is_ingress(gprs); + RTE_LOG(ERR, FIREWALL, + "Error: OOM for ACL attached group cntr=%s" + " %s/%s|%s\n", + cntr_name, + (dir_in) ? " In" : "Out", + gpc_rlset_get_ifname(gprs), + gpc_group_get_name(gprg)); + return; + } + gpc_cntr_hw_ntfy_create(cntg, cntr); + } - earg->earg_summary = new; + /* Make "drop" if needed and not present */ + if (need_drop && !got_drop) { + cntr_name = "drop"; + cntr = gpc_cntr_create_named(cntg, cntr_name); + if (!cntr) + goto cntr_error; + gpc_cntr_hw_ntfy_create(cntg, cntr); + } } static void -pmf_alrg_hw_ntfy_grp_attach(struct pmf_group_ext *earg) +pmf_arlg_rule_create_cntg(struct gpc_group *gprg, + struct pmf_rule const *attr_rule) { - if (!(earg->earg_flags & PMF_EARGF_PUBLISHED)) - return; - if (earg->earg_flags & PMF_EARGF_DEFERRED) - return; - if (earg->earg_flags & PMF_EARGF_ATTACHED) - return; + struct gpc_cntg *cntg; - struct pmf_rlset_ext *ears = earg->earg_rlset; - if (!(ears->ears_flags & PMF_EARSF_IFP)) + if (!(attr_rule->pp_summary & PMF_RAS_COUNT_DEF)) return; - if (!(ears->ears_flags & PMF_EARSF_IF_CREATED)) - return; - - if (pmf_hw_group_attach(earg, ears->ears_ifp)) - earg->earg_flags |= PMF_EARGF_LL_ATTACHED; - earg->earg_flags |= PMF_EARGF_ATTACHED; -} - -static void -pmf_alrg_hw_ntfy_grp_detach(struct pmf_group_ext *earg) -{ - if (!(earg->earg_flags & PMF_EARGF_PUBLISHED)) - return; - if (!(earg->earg_flags & PMF_EARGF_ATTACHED)) + /* + * This should be changed to depend upon information extracted + * from the rproc, specifically the 'type=' key/value pair. + */ + enum gpc_cntr_type type = GPC_CNTT_NUMBERED; + if (attr_rule->pp_summary & PMF_SUMMARY_COUNT_DEF_NAMED_FLAGS) + type = GPC_CNTT_NAMED; + + cntg = gpc_cntg_create(gprg, type, + GPC_CNTW_PACKET, GPC_CNTS_INTERFACE); + if (!cntg) { + struct gpc_rlset *gprs = gpc_group_get_rlset(gprg); + bool dir_in = gpc_rlset_is_ingress(gprs); + RTE_LOG(ERR, FIREWALL, + "Error: OOM for ACL attached group cntg" + " %s/%s|%s\n", + (dir_in) ? " In" : "Out", gpc_rlset_get_ifname(gprs), + gpc_group_get_name(gprg)); return; + } - struct pmf_rlset_ext *ears = earg->earg_rlset; - - pmf_hw_group_detach(earg, ears->ears_ifp); - - earg->earg_flags &= ~(PMF_EARGF_ATTACHED|PMF_EARGF_LL_ATTACHED); -} + gpc_group_set_cntg(gprg, cntg); -static void -pmf_arlg_hw_ntfy_cntr_add(struct pmf_group_ext *earg, struct pmf_attrl *earl) -{ - if (!(earg->earg_flags & PMF_EARGF_PUBLISHED)) + if (type != GPC_CNTT_NAMED) return; - if (!(earl->earl_rule->pp_summary & PMF_RAS_COUNT_REF)) - return; - - struct pmf_cntr *eark = &earl->earl_cntr; - - eark->eark_group = earg; - snprintf(eark->eark_name, sizeof(eark->eark_name), - "%u", earl->earl_index); - eark->eark_objid = 0; - eark->eark_flags = PMF_EARKF_CNT_PACKET; - - if (pmf_hw_counter_create(eark)) - eark->eark_flags |= PMF_EARKF_LL_CREATED; - - eark->eark_flags |= PMF_EARKF_PUBLISHED; + pmf_arlg_rule_create_cntg_rules(gprg, cntg, attr_rule); } static void -pmf_arlg_hw_ntfy_cntr_del(struct pmf_group_ext *earg, struct pmf_attrl *earl) +pmf_arlg_rule_delete_cntg(struct gpc_cntg *cntg) { - if (!(earg->earg_flags & PMF_EARGF_PUBLISHED)) - return; - - struct pmf_cntr *eark = &earl->earl_cntr; - - if (!(eark->eark_flags & PMF_EARKF_PUBLISHED)) - return; - - pmf_hw_counter_delete(eark); - - eark->eark_flags &= ~(PMF_EARKF_PUBLISHED|PMF_EARKF_LL_CREATED); + if (gpc_cntg_type(cntg) == GPC_CNTT_NAMED) { + struct gpc_cntr *cntr; + GPC_CNTR_FOREACH(cntr, cntg) { + gpc_cntr_release(cntr); + } + } - memset(eark, 0, sizeof(*eark)); + gpc_cntg_release(cntg); } static void -pmf_alrg_hw_ntfy_rule_add(struct pmf_group_ext *earg, struct pmf_attrl *earl) -{ - if (!(earg->earg_flags & PMF_EARGF_PUBLISHED)) - return; - if (earl->earl_flags & PMF_EARLF_PUBLISHED) - return; - - pmf_arlg_hw_ntfy_cntr_add(earg, earl); - - if (pmf_hw_rule_add(earl, earl->earl_rule)) - earl->earl_flags |= PMF_EARLF_LL_CREATED; - - earl->earl_flags |= PMF_EARLF_PUBLISHED; -} +pmf_arlg_rl_attr_check(struct pmf_group_ext *earg, struct pmf_rule *attr_rule); static void -pmf_alrg_hw_ntfy_rule_chg(struct pmf_group_ext *earg, struct pmf_attrl *earl, - struct pmf_rule *new_rule) +pmf_arlg_rule_change_cntg(struct pmf_group_ext *earg, + struct gpc_group *gprg, + struct pmf_rule *attr_rule) { - if (!(earg->earg_flags & PMF_EARGF_PUBLISHED)) - return; - if (!(earl->earl_flags & PMF_EARLF_PUBLISHED)) { - pmf_alrg_hw_ntfy_rule_add(earg, earl); + struct gpc_cntg *cntg = gpc_group_get_cntg(gprg); + if (!cntg) { + pmf_arlg_rule_create_cntg(gprg, attr_rule); + pmf_arlg_rl_attr_check(earg, attr_rule); return; } - pmf_hw_rule_mod(earl, new_rule); -} - -static void -pmf_alrg_hw_ntfy_rule_del(struct pmf_group_ext *earg, struct pmf_attrl *earl) -{ - if (!(earg->earg_flags & PMF_EARGF_PUBLISHED)) + if (!(attr_rule->pp_summary & PMF_RAS_COUNT_DEF)) { + pmf_arlg_rule_delete_cntg(cntg); + gpc_group_set_cntg(gprg, NULL); return; - if (!(earl->earl_flags & PMF_EARLF_PUBLISHED)) - return; - - pmf_hw_rule_del(earl); - - earl->earl_flags &= ~(PMF_EARLF_PUBLISHED|PMF_EARLF_LL_CREATED); - - pmf_arlg_hw_ntfy_cntr_del(earg, earl); -} - -/* ---- */ + } -/* - * For a group, notify creation or deletion of all rules. - * - * These are used for deferred notifications based upon the - * change in the group status. - */ -static void -pmf_alrg_hw_ntfy_rules_add(struct pmf_group_ext *earg) -{ - if (!(earg->earg_flags & PMF_EARGF_PUBLISHED)) - return; + /* Check if the counter type has changed */ + enum gpc_cntr_type type = GPC_CNTT_NUMBERED; + if (attr_rule->pp_summary & PMF_SUMMARY_COUNT_DEF_NAMED_FLAGS) + type = GPC_CNTT_NAMED; - struct pmf_attrl *earl; + if (type != gpc_cntg_type(cntg)) { + pmf_arlg_rl_attr_check(earg, NULL); - TAILQ_FOREACH(earl, &earg->earg_rules, earl_list) - pmf_alrg_hw_ntfy_rule_add(earg, earl); -} + pmf_arlg_rule_delete_cntg(cntg); + gpc_group_set_cntg(gprg, NULL); + pmf_arlg_rule_create_cntg(gprg, attr_rule); -static void -pmf_alrg_hw_ntfy_rules_del(struct pmf_group_ext *earg) -{ - if (!(earg->earg_flags & PMF_EARGF_PUBLISHED)) + pmf_arlg_rl_attr_check(earg, attr_rule); return; + } - struct pmf_attrl *earl; - - TAILQ_FOREACH(earl, &earg->earg_rules, earl_list) - pmf_alrg_hw_ntfy_rule_del(earg, earl); -} - - -/* ---- */ - -static void -pmf_arlg_rl_free(struct pmf_attrl *earl) -{ - if (!earl) + /* Same type of counters, nothing to do for numbered */ + if (type == GPC_CNTT_NUMBERED) return; - pmf_rule_free(earl->earl_rule); - free(earl); -} - -static void -pmf_arlg_rl_change(struct pmf_attrl *earl, struct pmf_rule *new_rule) -{ - if (!earl) + /* We could have changed the specific named counters */ + bool const need_accept = attr_rule->pp_summary & PMF_RAS_COUNT_DEF_PASS; + bool const need_drop = attr_rule->pp_summary & PMF_RAS_COUNT_DEF_DROP; + + bool got_accept = false; + struct gpc_cntr *cntr_accept + = gpc_cntr_find_and_retain(cntg, "accept"); + got_accept = !!cntr_accept; + + bool got_drop = false; + struct gpc_cntr *cntr_drop = gpc_cntr_find_and_retain(cntg, "drop"); + got_drop = !!cntr_drop; + + /* If we have what we need, nothing to do */ + if ((got_accept == need_accept) && (got_drop == need_drop)) { + if (cntr_accept) + gpc_cntr_release(cntr_accept); + if (cntr_drop) + gpc_cntr_release(cntr_drop); return; - - struct pmf_rule *old_rule = earl->earl_rule; - - earl->earl_rule = pmf_rule_copy(new_rule); - pmf_rule_free(old_rule); -} - -static struct pmf_attrl * -pmf_arlg_rl_alloc(struct pmf_rule *rule, uint32_t idx) -{ - struct pmf_attrl *earl = calloc(1, sizeof(*earl)); - - if (earl) { - earl->earl_index = idx; - earl->earl_rule = pmf_rule_copy(rule); } - return earl; -} + /* Force all rules to be unpublished (inefficient, but simple) */ + pmf_arlg_rl_attr_check(earg, NULL); -static struct pmf_attrl * -pmf_arlg_rl_find(struct pmf_group_ext *earg, uint32_t idx, bool insert) -{ - if (idx == UINT32_MAX) - return earg->earg_rlattr; + /* Create any missing counters */ + if ((need_accept && !got_accept) || (need_drop && !got_drop)) + pmf_arlg_rule_create_cntg_rules(gprg, cntg, attr_rule); - struct pmf_attrl *cursor; + /* Release unneeded counters */ - TAILQ_FOREACH(cursor, &earg->earg_rules, earl_list) - if (idx <= cursor->earl_index) - break; + if (got_accept && !need_accept) + gpc_cntr_release(cntr_accept); - if (!cursor) - return NULL; + if (got_drop && !need_drop) + gpc_cntr_release(cntr_drop); - if (idx == cursor->earl_index || insert) - return cursor; + /* Force all to be republished */ + pmf_arlg_rl_attr_check(earg, attr_rule); - return NULL; + /* Release references from lookup */ + if (cntr_accept) + gpc_cntr_release(cntr_accept); + if (cntr_drop) + gpc_cntr_release(cntr_drop); } + /* ---- */ /* * Check for a change in publication status due to the group attribute rule. */ static void -pmf_arlg_rl_attr_check(struct pmf_group_ext *earg, struct pmf_rule *rule) +pmf_arlg_rl_attr_check(struct pmf_group_ext *earg, struct pmf_rule *attr_rule) { + struct gpc_group *gprg = earg->earg_gprg; + struct gpc_cntg *cntg = gpc_group_get_cntg(gprg); struct pmf_attr_ip_family *ipfam = NULL; /* The group attribute rule has been removed */ - if (!rule) { + if (!attr_rule) { if (!(earg->earg_flags & PMF_EARGF_RULE_ATTR)) return; unpublish_group: /* A group is only visible if it has attr rule, and a family */ - if (earg->earg_flags & PMF_EARGF_PUBLISHED) { - pmf_alrg_hw_ntfy_grp_detach(earg); - pmf_alrg_hw_ntfy_rules_del(earg); - /* eventually delete counters */ - pmf_alrg_hw_ntfy_grp_delete(earg); + if (gpc_group_is_published(gprg)) { + gpc_group_hw_ntfy_detach(gprg); + gpc_group_hw_ntfy_rules_delete(gprg); + if (cntg) + gpc_cntg_hw_ntfy_cntrs_delete(cntg); + gpc_group_hw_ntfy_delete(gprg); /* Enable deferred republish */ - earg->earg_flags |= PMF_EARGF_DEFERRED; + gpc_group_set_deferred(gprg); deferrals = true; } - earg->earg_flags &= - ~(PMF_EARGF_RULE_ATTR|PMF_EARGF_FAMILY|PMF_EARGF_V6); + earg->earg_flags &= ~PMF_EARGF_RULE_ATTR; + gpc_group_clear_family(gprg); return; } @@ -563,22 +351,26 @@ pmf_arlg_rl_attr_check(struct pmf_group_ext *earg, struct pmf_rule *rule) if (!(earg->earg_flags & PMF_EARGF_RULE_ATTR)) { earg->earg_flags |= PMF_EARGF_RULE_ATTR; - ipfam = (rule) ? - rule->pp_match.l2[PMF_L2F_IP_FAMILY].pm_ipfam : NULL; + ipfam = (attr_rule) + ? attr_rule->pp_match.l2[PMF_L2F_IP_FAMILY].pm_ipfam + : NULL; if (!ipfam) return; publish_group: /* semi-colon for goto target */; bool is_v6 = ipfam->pm_v6; - earg->earg_flags |= PMF_EARGF_FAMILY; - earg->earg_flags |= (is_v6) ? PMF_EARGF_V6 : 0; + if (is_v6) + gpc_group_set_v6(gprg); + else + gpc_group_set_v4(gprg); /* Now publish everything referencing the group */ - pmf_alrg_hw_ntfy_grp_create(earg, rule); - /* eventually create counters */ - pmf_alrg_hw_ntfy_rules_add(earg); - pmf_alrg_hw_ntfy_grp_attach(earg); + gpc_group_hw_ntfy_create(gprg, attr_rule); + if (cntg) + gpc_cntg_hw_ntfy_cntrs_create(cntg); + gpc_group_hw_ntfy_rules_create(gprg); + gpc_group_hw_ntfy_attach(gprg); return; } @@ -588,33 +380,34 @@ pmf_arlg_rl_attr_check(struct pmf_group_ext *earg, struct pmf_rule *rule) /* Eventually check for counters change here */ /* Deleting the family acts like a group removal */ - ipfam = (rule) ? - rule->pp_match.l2[PMF_L2F_IP_FAMILY].pm_ipfam : NULL; + ipfam = (attr_rule) ? + attr_rule->pp_match.l2[PMF_L2F_IP_FAMILY].pm_ipfam : NULL; if (!ipfam) { - if (earg->earg_flags & PMF_EARGF_FAMILY) + if (gpc_group_has_family(gprg)) goto unpublish_group; return; } /* Just acquired a family, so acts like group creation, publish all */ - if (!(earg->earg_flags & PMF_EARGF_FAMILY)) + if (!gpc_group_has_family(gprg)) goto publish_group; /* Ensure the address family is the same */ bool is_v6 = ipfam->pm_v6; - if (!(earg->earg_flags & PMF_EARGF_V6) == !is_v6) + if (gpc_group_is_v6(gprg) == is_v6) return; /* The AF is different, so delete and re-add everything */ - if (earg->earg_flags & PMF_EARGF_PUBLISHED) { - pmf_alrg_hw_ntfy_grp_detach(earg); - pmf_alrg_hw_ntfy_rules_del(earg); - /* eventually delete counters */ - pmf_alrg_hw_ntfy_grp_delete(earg); + if (gpc_group_is_published(gprg)) { + gpc_group_hw_ntfy_detach(gprg); + gpc_group_hw_ntfy_rules_delete(gprg); + if (cntg) + gpc_cntg_hw_ntfy_cntrs_delete(cntg); + gpc_group_hw_ntfy_delete(gprg); } - earg->earg_flags &= - ~(PMF_EARGF_RULE_ATTR|PMF_EARGF_FAMILY|PMF_EARGF_V6); + earg->earg_flags &= ~PMF_EARGF_RULE_ATTR; + gpc_group_clear_family(gprg); /* Now add it all back again, with new AF */ goto publish_group; @@ -625,42 +418,56 @@ pmf_arlg_rl_attr_check(struct pmf_group_ext *earg, struct pmf_rule *rule) static bool pmf_arlg_rl_del(struct pmf_group_ext *earg, uint32_t rl_idx) { - struct pmf_rlset_ext *ears = earg->earg_rlset; - bool dir_in = (ears->ears_flags & PMF_EARSF_IN); + struct gpc_group *gprg = earg->earg_gprg; + struct gpc_rlset *gprs = gpc_group_get_rlset(gprg); + bool dir_in = gpc_rlset_is_ingress(gprs); + + /* This rule is for group attributes */ + if (rl_idx == UINT32_MAX) { + struct pmf_rule *attr_rule = earg->earg_attr_rule; + if (!attr_rule) + goto rule_del_error; + pmf_arlg_rl_attr_check(earg, NULL); + earg->earg_attr_rule = NULL; + pmf_rule_free(attr_rule); - struct pmf_attrl *earl = pmf_arlg_rl_find(earg, rl_idx, false); - if (!earl) { + struct gpc_cntg *cntg = gpc_group_get_cntg(gprg); + if (cntg) { + pmf_arlg_rule_delete_cntg(cntg); + gpc_group_set_cntg(gprg, NULL); + } + return true; + } + + struct gpc_rule *gprl = gpc_rule_find(gprg, rl_idx); + if (!gprl) { +rule_del_error: RTE_LOG(ERR, FIREWALL, "Error: No rule to delete for ACL attached group" " %s/%s|%s:%u\n", - (dir_in) ? " In" : "Out", ears->ears_ifname, - earg->earg_rgname, rl_idx); + (dir_in) ? " In" : "Out", gpc_rlset_get_ifname(gprs), + gpc_group_get_name(gprg), rl_idx); return false; } - /* This rule is for group attributes */ - if (rl_idx == UINT32_MAX) { - pmf_arlg_rl_attr_check(earg, NULL); - earg->earg_rlattr = NULL; - pmf_arlg_rl_free(earl); - return true; - } + uint32_t old_summary = gpc_group_get_summary(gprg); --earg->earg_num_rules; - pmf_alrg_hw_ntfy_rule_del(earg, earl); + gpc_rule_hw_ntfy_delete(gprg, gprl); + + struct gpc_cntr *cntr = gpc_rule_get_cntr(gprl); + + gpc_rule_delete(gprl); - TAILQ_REMOVE(&earg->earg_rules, earl, earl_list); - pmf_arlg_rl_free(earl); + if (cntr) + gpc_cntr_release(cntr); /* If any were published, recalculate and notify */ - if (earg->earg_summary) { - struct pmf_rule *attr_rule - = (earg->earg_rlattr) - ? earg->earg_rlattr->earl_rule - : NULL; - uint32_t new_summary = pmf_arlg_recalc_summary(earg, attr_rule); - pmf_alrg_hw_ntfy_grp_summary_mod(earg, new_summary); + if (old_summary) { + struct pmf_rule *attr_rule = earg->earg_attr_rule; + uint32_t summary = gpc_group_recalc_summary(gprg, attr_rule); + gpc_group_hw_ntfy_modify(gprg, summary); } return true; @@ -670,43 +477,93 @@ static bool pmf_arlg_rl_chg(struct pmf_group_ext *earg, struct pmf_rule *new_rule, uint32_t rl_idx) { - struct pmf_rlset_ext *ears = earg->earg_rlset; - bool dir_in = (ears->ears_flags & PMF_EARSF_IN); + struct gpc_group *gprg = earg->earg_gprg; + struct gpc_rlset *gprs = gpc_group_get_rlset(gprg); + bool dir_in = gpc_rlset_is_ingress(gprs); + + if (rl_idx == UINT32_MAX) { + struct pmf_rule *old_attr_rule = earg->earg_attr_rule; + if (!old_attr_rule) + goto rule_chg_error; + pmf_arlg_rule_change_cntg(earg, gprg, new_rule); + + earg->earg_attr_rule = pmf_rule_copy(new_rule); + pmf_rule_free(old_attr_rule); + return true; + } - struct pmf_attrl *earl = pmf_arlg_rl_find(earg, rl_idx, false); - if (!earl) { + struct gpc_rule *gprl = gpc_rule_find(gprg, rl_idx); + if (!gprl) { +rule_chg_error: RTE_LOG(ERR, FIREWALL, "Error: No rule to change for ACL attached group" " %s/%s|%s:%u\n", - (dir_in) ? " In" : "Out", ears->ears_ifname, - earg->earg_rgname, rl_idx); + (dir_in) ? " In" : "Out", gpc_rlset_get_ifname(gprs), + gpc_group_get_name(gprg), rl_idx); return false; } - if (rl_idx == UINT32_MAX) { - pmf_arlg_rl_attr_check(earg, new_rule); - pmf_arlg_rl_change(earl, new_rule); - return true; + /* Adjust a counter if necessary */ + struct gpc_cntg *cntg = gpc_group_get_cntg(gprg); + struct gpc_cntr *rel_cntr = NULL; + + /* If the group has counters configured */ + if (cntg) { + struct gpc_cntr *cntr = gpc_rule_get_cntr(gprl); + bool need_counter = pmf_arlg_rule_needs_cntr(cntg, new_rule); + if (!need_counter) { + /* This rule should release its counter (if any) */ + rel_cntr = cntr; + } else if (!cntr) { + /* Need a counter, but don't have one - acquire one */ + cntr = pmf_arlg_rule_get_cntr(cntg, new_rule, rl_idx); + gpc_rule_set_cntr(gprl, cntr); + gpc_cntr_hw_ntfy_create(cntg, cntr); + } else { + /* Counter needed, and/or rule match have changed */ + if (gpc_cntg_type(cntg) == GPC_CNTT_NAMED) { + struct gpc_cntr *new_cntr + = pmf_arlg_rule_get_cntr(cntg, + new_rule, 0); + if (new_cntr == cntr) { + gpc_cntr_release(new_cntr); + /* Do we need to clear the counter? */ + } else { + gpc_rule_set_cntr(gprl, new_cntr); + gpc_cntr_hw_ntfy_create(cntg, new_cntr); + rel_cntr = cntr; + } + } + /* + * The below call to gpc_rule_change_rule() will + * eventually publish the rule if unpublished, + * or delete it and add a new one (which we desire + * here) if already published. + * + * This is necessary as at the FAL layer, a rule + * references a counter, so changing the counter + * requires changing the rule; and we don't have + * support for in-place modify. + */ + } } /* If any were published, update and notify */ - uint32_t old_summary = earg->earg_summary; - uint32_t new_summary = old_summary | new_rule->pp_summary; - pmf_alrg_hw_ntfy_grp_summary_mod(earg, new_summary); + uint32_t old_summary = gpc_group_get_summary(gprg); - pmf_alrg_hw_ntfy_rule_chg(earg, earl, new_rule); - pmf_arlg_rl_change(earl, new_rule); + gpc_rule_change_rule(gprl, new_rule); /* We turned on new stuff above, turn off old stuff now */ if (old_summary) { - struct pmf_rule *attr_rule - = (earg->earg_rlattr) - ? earg->earg_rlattr->earl_rule - : NULL; - new_summary = pmf_arlg_recalc_summary(earg, attr_rule); - pmf_alrg_hw_ntfy_grp_summary_mod(earg, new_summary); + struct pmf_rule *attr_rule = earg->earg_attr_rule; + uint32_t summary = gpc_group_recalc_summary(gprg, attr_rule); + gpc_group_hw_ntfy_modify(gprg, summary); } + /* Release a counter, possibly freeing it */ + if (rel_cntr) + gpc_cntr_release(rel_cntr); + return true; } @@ -714,66 +571,57 @@ static bool pmf_arlg_rl_add(struct pmf_group_ext *earg, struct pmf_rule *rule, uint32_t rl_idx) { - struct pmf_rlset_ext *ears = earg->earg_rlset; - bool dir_in = (ears->ears_flags & PMF_EARSF_IN); - - struct pmf_attrl *earl = pmf_arlg_rl_alloc(rule, rl_idx); - if (!earl) { - RTE_LOG(ERR, FIREWALL, - "Error: OOM for ACL attached group rule" - " %s/%s|%s:%u\n", - (dir_in) ? " In" : "Out", ears->ears_ifname, - earg->earg_rgname, rl_idx); - return false; - } - - earl->earl_group = earg; + struct gpc_group *gprg = earg->earg_gprg; + struct gpc_rlset *gprs = gpc_group_get_rlset(gprg); + bool dir_in = gpc_rlset_is_ingress(gprs); /* This rule is for group attributes */ if (rl_idx == UINT32_MAX) { - if (earg->earg_rlattr) { + if (earg->earg_attr_rule) { RTE_LOG(ERR, FIREWALL, "Error: Dup rule 0 for ACL attached group rule" " %s/%s|%s\n", - (dir_in) ? " In" : "Out", ears->ears_ifname, - earg->earg_rgname); - pmf_arlg_rl_free(earl); + (dir_in) ? " In" : "Out", + gpc_rlset_get_ifname(gprs), + gpc_group_get_name(gprg)); return false; } - earg->earg_rlattr = earl; + rule = pmf_rule_copy(rule); + pmf_arlg_rule_create_cntg(gprg, rule); pmf_arlg_rl_attr_check(earg, rule); + earg->earg_attr_rule = rule; return true; } ++earg->earg_num_rules; - /* If any were published, update and notify */ - uint32_t new_summary = earg->earg_summary | rule->pp_summary; - pmf_alrg_hw_ntfy_grp_summary_mod(earg, new_summary); - - pmf_alrg_hw_ntfy_rule_add(earg, earl); - - struct pmf_attrl *cursor = TAILQ_LAST(&earg->earg_rules, pmf_rlqh); - if (!cursor || cursor->earl_index < rl_idx) { - TAILQ_INSERT_TAIL(&earg->earg_rules, earl, earl_list); - return true; - } + /* Find a counter if necessary */ + struct gpc_cntr *cntr = NULL; + struct gpc_cntg *cntg = gpc_group_get_cntg(gprg); + if (cntg && pmf_arlg_rule_needs_cntr(cntg, rule)) + cntr = pmf_arlg_rule_get_cntr(cntg, rule, rl_idx); - /* Find the element to insert in front of */ - cursor = pmf_arlg_rl_find(earg, rl_idx, true); - if (!cursor) { + /* Create the GPC rule, or fail and clean up */ + struct gpc_rule *gprl = gpc_rule_create(gprg, rl_idx, NULL); + if (!gprl) { RTE_LOG(ERR, FIREWALL, - "Error: No insertion point for ACL attached group" + "Error: OOM for ACL attached group rule" " %s/%s|%s:%u\n", - (dir_in) ? " In" : "Out", ears->ears_ifname, - earg->earg_rgname, rl_idx); - pmf_arlg_rl_free(earl); + (dir_in) ? " In" : "Out", gpc_rlset_get_ifname(gprs), + gpc_group_get_name(gprg), rl_idx); + if (cntr) + gpc_cntr_release(cntr); return false; } - TAILQ_INSERT_BEFORE(cursor, earl, earl_list); + gpc_rule_set_cntr(gprl, cntr); + + if (cntr) + gpc_cntr_hw_ntfy_create(cntg, cntr); + + gpc_rule_change_rule(gprl, rule); return true; } @@ -820,6 +668,9 @@ pmf_arlg_group_modify(void *vctx, struct npf_cfg_rule_group_event *ev) default: return; } + + /* This came from config, expect a commit */ + commit_pending = true; } /* @@ -864,7 +715,6 @@ pmf_arlg_attpt_grp_ev_handler(enum npf_attpt_ev_type event, bool const dir_in = (rls_type == NPF_RS_ACL_IN); struct pmf_group_ext *earg; - struct pmf_rlset_ext *ears; int ev_rc = -1; if (!enabled) @@ -882,12 +732,24 @@ pmf_arlg_attpt_grp_ev_handler(enum npf_attpt_ev_type event, return; } - ears = npf_attpt_rlset_get_extend(ars); earg->earg_base = agr; - earg->earg_rlset = ears; - earg->earg_rgname = rg_name; - earg->earg_flags |= PMF_EARGF_DEFERRED; - TAILQ_INIT(&earg->earg_rules); + + struct gpc_rlset *gprs = npf_attpt_rlset_get_extend(ars); + struct gpc_group *gprg + = gpc_group_create(gprs, GPC_FEAT_ACL, rg_name, earg); + if (!gprg) { + RTE_LOG(ERR, FIREWALL, + "Error: Failed to create GPC group" + " (%s/%s/%s/%s)\n", + "ACL", (dir_in) ? " In" : "Out", + if_name, rg_name); + + free(earg); + return; + } + gpc_group_set_deferred(gprg); + earg->earg_gprg = gprg; + bool ok = npf_attpt_group_set_extend(agr, earg); if (!ok) { RTE_LOG(ERR, FIREWALL, @@ -896,6 +758,8 @@ pmf_arlg_attpt_grp_ev_handler(enum npf_attpt_ev_type event, "ACL", (dir_in) ? " In" : "Out", if_name, rg_name); + earg->earg_gprg = NULL; + gpc_group_delete(gprg); free(earg); return; } @@ -911,12 +775,11 @@ pmf_arlg_attpt_grp_ev_handler(enum npf_attpt_ev_type event, if_name, rg_name, ev_rc); npf_attpt_group_set_extend(agr, NULL); + earg->earg_gprg = NULL; + gpc_group_delete(gprg); free(earg); return; } - - /* Append it to the list */ - TAILQ_INSERT_TAIL(&ears->ears_groups, earg, earg_list); } if (enabled) { @@ -930,8 +793,10 @@ pmf_arlg_attpt_grp_ev_handler(enum npf_attpt_ev_type event, /* Detached a group from an interface, so maybe unpublish, destroy */ if (!enabled && earg) { + struct gpc_group *gprg = earg->earg_gprg; + /* Notify clients */ - pmf_alrg_hw_ntfy_grp_detach(earg); + gpc_group_hw_ntfy_detach(gprg); ev_rc = npf_cfg_rule_group_dereg_user(NPF_RULE_CLASS_ACL, rg_name, earg); @@ -944,33 +809,54 @@ pmf_arlg_attpt_grp_ev_handler(enum npf_attpt_ev_type event, } /* Notify clients */ - pmf_alrg_hw_ntfy_rules_del(earg); + gpc_group_hw_ntfy_rules_delete(gprg); + + struct gpc_cntg *cntg = gpc_group_get_cntg(gprg); + if (cntg) + gpc_cntg_hw_ntfy_cntrs_delete(cntg); /* Deallocate all of the rules */ - struct pmf_attrl *cursor; - while (!!(cursor = TAILQ_LAST(&earg->earg_rules, pmf_rlqh))) { + struct gpc_rule *cursor; + while (!!(cursor = gpc_rule_last(gprg))) { --earg->earg_num_rules; - TAILQ_REMOVE(&earg->earg_rules, cursor, earl_list); - pmf_arlg_rl_free(cursor); + struct gpc_cntr *cntr = gpc_rule_get_cntr(cursor); + /* gpc_rule_hw_ntfy_delete(gprg, cursor); is a NO-OP */ + gpc_rule_delete(cursor); + if (cntr) + gpc_cntr_release(cntr); + } + + /* Deallocate remaining counters */ + if (cntg) { + if (gpc_cntg_type(cntg) == GPC_CNTT_NAMED) { + struct gpc_cntr *cntr; + while (!!(cntr = gpc_cntr_last(cntg))) + gpc_cntr_release(cntr); + } + gpc_cntg_release(cntg); + gpc_group_set_cntg(gprg, NULL); } /* Sanity before freeing */ earg->earg_num_rules = 0; - if (earg->earg_rlattr) { - pmf_arlg_rl_free(earg->earg_rlattr); - earg->earg_rlattr = NULL; + if (earg->earg_attr_rule) { + pmf_rule_free(earg->earg_attr_rule); + earg->earg_attr_rule = NULL; } /* Notify clients */ - pmf_alrg_hw_ntfy_grp_delete(earg); + gpc_group_hw_ntfy_delete(gprg); npf_attpt_group_set_extend(agr, NULL); - ears = earg->earg_rlset; - TAILQ_REMOVE(&ears->ears_groups, earg, earg_list); + earg->earg_gprg = NULL; + gpc_group_delete(gprg); free(earg); } + + /* This came from config, expect a commit */ + commit_pending = true; } /* @@ -988,9 +874,9 @@ pmf_arlg_attpt_grp_updn_handler(const struct npf_attpt_group *rsg, void *ctx) return true; if (is_up) - pmf_alrg_hw_ntfy_grp_attach(earg); + gpc_group_hw_ntfy_attach(earg->earg_gprg); else - pmf_alrg_hw_ntfy_grp_detach(earg); + gpc_group_hw_ntfy_detach(earg->earg_gprg); return true; } @@ -1006,44 +892,33 @@ pmf_arlg_attpt_grp_updn_handler(const struct npf_attpt_group *rsg, void *ctx) static void pmf_arlg_attpt_rls_updn(struct npf_attpt_rlset *ars, bool is_up) { - struct pmf_rlset_ext *ears = npf_attpt_rlset_get_extend(ars); - if (!ears) + struct gpc_rlset *gprs = npf_attpt_rlset_get_extend(ars); + if (!gprs) return; - struct ifnet *iface = ifnet_byifname(ears->ears_ifname); - if (is_up) { - if (!iface) - return; - /* Fill in the index */ - ears->ears_ifp = iface; - ears->ears_flags |= PMF_EARSF_IFP; - if (iface->if_created) - ears->ears_flags |= PMF_EARSF_IF_CREATED; - } + if (is_up && !gpc_rlset_set_ifp(gprs)) + return; npf_attpt_walk_rlset_grps(ars, pmf_arlg_attpt_grp_updn_handler, &is_up); - if (!is_up) { - /* Clear the index */ - ears->ears_ifp = NULL; - ears->ears_flags &= ~PMF_EARSF_IFP; - } + if (!is_up) + gpc_rlset_clear_ifp(gprs); } static void pmf_arlg_attpt_rls_if_created(struct npf_attpt_rlset *ars) { - struct pmf_rlset_ext *ears = npf_attpt_rlset_get_extend(ars); - if (!ears) + struct gpc_rlset *gprs = npf_attpt_rlset_get_extend(ars); + if (!gprs) return; - if (ears->ears_flags & PMF_EARSF_IF_CREATED) + if (gpc_rlset_is_if_created(gprs)) return; /* Mark as created */ - ears->ears_flags |= PMF_EARSF_IF_CREATED; + gpc_rlset_set_if_created(gprs); - if (!(ears->ears_flags & PMF_EARSF_IFP)) + if (!gpc_rlset_get_ifp(gprs)) return; /* Claim it came up */ @@ -1071,52 +946,33 @@ pmf_arlg_attpt_rls_ev_handler(enum npf_attpt_ev_type event, bool const dir_in = (rls_type == NPF_RS_ACL_IN); - uint32_t ears_flags - = 0 - | (dir_in) ? PMF_EARSF_IN : 0; - struct pmf_rlset_ext *ears; + struct gpc_rlset *gprs; if (!enabled) { - ears = npf_attpt_rlset_get_extend(ars); + gprs = npf_attpt_rlset_get_extend(ars); npf_attpt_rlset_set_extend(ars, NULL); - TAILQ_REMOVE(&att_rlsets, ears, ears_list); - free(ears); + gpc_rlset_delete(gprs); } else { - ears = calloc(1, sizeof(*ears)); - if (!ears) { + gprs = gpc_rlset_create(dir_in, if_name, ars); + if (!gprs) { RTE_LOG(ERR, FIREWALL, - "Error: OOM for attached ruleset extension" + "Error: Failed to create GPC ruleset" " (%s/%s/%s)\n", "ACL", (dir_in) ? " In" : "Out", if_name); return; } - ears->ears_base = ars; - ears->ears_flags = ears_flags; - ears->ears_ifname = if_name; - TAILQ_INIT(&ears->ears_groups); - bool ok = npf_attpt_rlset_set_extend(ars, ears); + + bool ok = npf_attpt_rlset_set_extend(ars, gprs); if (!ok) { RTE_LOG(ERR, FIREWALL, "Error: Failed to attach ruleset extension" " (%s/%s/%s)\n", "ACL", (dir_in) ? " In" : "Out", if_name); - free(ears); + gpc_rlset_delete(gprs); return; } - - /* Fill in the index */ - struct ifnet *iface = ifnet_byifname(if_name); - if (iface) { - ears->ears_ifp = iface; - ears->ears_flags |= PMF_EARSF_IFP; - if (iface->if_created) - ears->ears_flags |= PMF_EARSF_IF_CREATED; - } - - /* Append it to the list */ - TAILQ_INSERT_TAIL(&att_rlsets, ears, ears_list); } } @@ -1128,60 +984,89 @@ pmf_arlg_attpt_ap_ev_handler(enum npf_attpt_ev_type event, struct npf_attpt_rlset *ars; bool is_up = (event == NPF_ATTPT_EV_UP); - if (npf_attpt_rlset_find(ap, NPF_RS_ACL_IN, &ars) == 0) + bool any_sets = false; + if (npf_attpt_rlset_find(ap, NPF_RS_ACL_IN, &ars) == 0) { pmf_arlg_attpt_rls_updn(ars, is_up); - if (npf_attpt_rlset_find(ap, NPF_RS_ACL_OUT, &ars) == 0) + any_sets = true; + } + if (npf_attpt_rlset_find(ap, NPF_RS_ACL_OUT, &ars) == 0) { pmf_arlg_attpt_rls_updn(ars, is_up); + any_sets = true; + } + + /* If this occurs outside of config, force a commit */ + if (any_sets && !commit_pending) + gpc_hw_commit(); } static void -pmf_arlg_if_create_finished(struct ifnet *ifp) +pmf_arlg_if_feat_mode_change(struct ifnet *ifp, + enum if_feat_mode_event event) { struct npf_attpt_item *ap; + + if (event != IF_FEAT_MODE_EVENT_L3_FAL_ENABLED) + return; + if (npf_attpt_item_find_any(NPF_ATTACH_TYPE_INTERFACE, ifp->if_name, &ap) != 0) return; struct npf_attpt_rlset *ars; - if (npf_attpt_rlset_find(ap, NPF_RS_ACL_IN, &ars) == 0) + bool any_sets = false; + if (npf_attpt_rlset_find(ap, NPF_RS_ACL_IN, &ars) == 0) { pmf_arlg_attpt_rls_if_created(ars); - if (npf_attpt_rlset_find(ap, NPF_RS_ACL_OUT, &ars) == 0) + any_sets = true; + } + if (npf_attpt_rlset_find(ap, NPF_RS_ACL_OUT, &ars) == 0) { pmf_arlg_attpt_rls_if_created(ars); + any_sets = true; + } + + /* If this occurs outside of config, force a commit */ + if (any_sets && !commit_pending) + gpc_hw_commit(); } static const struct dp_event_ops pmf_arlg_events = { - .if_create_finished = pmf_arlg_if_create_finished, + .if_feat_mode_change = pmf_arlg_if_feat_mode_change, }; static void pmf_arlg_commit_deferrals(void) { - struct pmf_rlset_ext *ears; - TAILQ_FOREACH(ears, &att_rlsets, ears_list) { - struct pmf_group_ext *earg; - TAILQ_FOREACH(earg, &ears->ears_groups, earg_list) { - uint32_t rg_flags = earg->earg_flags; - bool rg_deferred = (rg_flags & PMF_EARGF_DEFERRED); - if (!rg_deferred) + struct gpc_rlset *gprs; + GPC_RLSET_FOREACH(gprs) { + struct gpc_group *gprg; + GPC_GROUP_FOREACH(gprg, gprs) { + if (gpc_group_get_feature(gprg) != GPC_FEAT_ACL) + continue; + + struct pmf_group_ext *earg + = gpc_group_get_owner(gprg); + + if (!gpc_group_is_deferred(gprg)) continue; /* Process a deferred group notification */ - earg->earg_flags &= ~PMF_EARGF_DEFERRED; + gpc_group_clear_deferred(gprg); /* Could be blocked by lack of address family */ - struct pmf_rule *attr_rule - = (earg->earg_rlattr) - ? earg->earg_rlattr->earl_rule - : NULL; - pmf_alrg_hw_ntfy_grp_create(earg, attr_rule); + struct pmf_rule *attr_rule = earg->earg_attr_rule; + gpc_group_hw_ntfy_create(gprg, attr_rule); + + /* Notify about all counters */ + struct gpc_cntg *cntg = gpc_group_get_cntg(gprg); + if (cntg) + gpc_cntg_hw_ntfy_cntrs_create(cntg); /* Notify about all rules */ - pmf_alrg_hw_ntfy_rules_add(earg); + gpc_group_hw_ntfy_rules_create(gprg); /* If the interface exists, we will attach */ - pmf_alrg_hw_ntfy_grp_attach(earg); + gpc_group_hw_ntfy_attach(gprg); } } } @@ -1192,8 +1077,9 @@ pmf_arlg_commit(void) if (deferrals) pmf_arlg_commit_deferrals(); - pmf_hw_commit(); + gpc_hw_commit(); deferrals = false; + commit_pending = false; } void pmf_arlg_init(void) @@ -1220,293 +1106,3 @@ void pmf_arlg_init(void) pmf_arlg_attpt_grp_ev_handler) < 0) rte_panic("PMF FAL top cannot listen to attpt grp events\n"); } - -/* Op-mode commands : dump internals */ - -void -pmf_arlg_dump(FILE *fp) -{ - struct pmf_rlset_ext *ears; - - /* Rulesets */ - TAILQ_FOREACH(ears, &att_rlsets, ears_list) { - uint32_t rs_flags = ears->ears_flags; - bool rs_in = (rs_flags & PMF_EARSF_IN); - bool rs_ifp = (rs_flags & PMF_EARSF_IFP); - bool rs_if_created = (rs_flags & PMF_EARSF_IF_CREATED); - uint32_t if_index = rs_ifp ? ears->ears_ifp->if_index : 0; - fprintf(fp, " RLS:%p: %s(%u)/%s%s%s\n", - ears, ears->ears_ifname, if_index, - rs_in ? "In " : "Out", - rs_ifp ? " IFP" : "", - rs_if_created ? " IfCrt" : "" - ); - /* Groups - i.e. TABLES */ - struct pmf_group_ext *earg; - TAILQ_FOREACH(earg, &ears->ears_groups, earg_list) { - uint32_t rg_flags = earg->earg_flags; - bool rg_published = (rg_flags & PMF_EARGF_PUBLISHED); - bool rg_attached = (rg_flags & PMF_EARGF_ATTACHED); - bool rg_deferred = (rg_flags & PMF_EARGF_DEFERRED); - bool rg_attr_rl = (rg_flags & PMF_EARGF_RULE_ATTR); - bool rg_family = (rg_flags & PMF_EARGF_FAMILY); - bool rg_v6 = (rg_flags & PMF_EARGF_V6); - bool rg_ll_create = (rg_flags & PMF_EARGF_LL_CREATED); - bool rg_ll_attach = (rg_flags & PMF_EARGF_LL_ATTACHED); - fprintf(fp, - " GRP:%p(%lx): %s(%u/%x)%s%s%s%s%s%s%s\n", - earg, earg->earg_objid, - earg->earg_rgname, earg->earg_num_rules, - earg->earg_summary, - rg_published ? " Pub" : "", - rg_ll_create ? " LLcrt" : "", - rg_attached ? " Att" : "", - rg_ll_attach ? " LLatt" : "", - rg_deferred ? " Defr" : "", - rg_attr_rl ? " GAttr" : "", - rg_family ? rg_v6 ? " v6" : " v4" : "" - ); - /* Stats - i.e. COUNTERS - cheat for now */ - struct pmf_attrl *earl; - TAILQ_FOREACH(earl, &earg->earg_rules, earl_list) { - struct pmf_cntr *eark = &earl->earl_cntr; - uint32_t ct_flags = eark->eark_flags; - bool ct_published - = (ct_flags & PMF_EARKF_PUBLISHED); - if (!ct_published) - continue; - bool ct_ll_create - = (ct_flags & PMF_EARKF_LL_CREATED); - bool ct_cnt_packet - = (ct_flags & PMF_EARKF_CNT_PACKET); - bool ct_cnt_byte - = (ct_flags & PMF_EARKF_CNT_BYTE); - fprintf(fp, " CT:%p(%lx): %s%s%s%s%s\n", - eark, eark->eark_objid, - eark->eark_name, - ct_published ? " Pub" : "", - ct_ll_create ? " LLcrt" : "", - ct_cnt_packet ? " Pkt" : "", - ct_cnt_byte ? " Byte" : "" - ); - uint64_t val_pkt = -1; - uint64_t val_byt = -1; - pmf_hw_counter_read(eark, &val_pkt, &val_byt); - fprintf(fp, " %s(%lu/%lx)) %s(%lu/%lx)\n", - ct_cnt_packet ? "Pkt" : "-", - (unsigned long)val_pkt, - (unsigned long)val_pkt, - ct_cnt_byte ? "Byte" : "-", - (unsigned long)val_byt, - (unsigned long)val_byt - ); - } - /* Rules - i.e. ENTRIES */ - TAILQ_FOREACH(earl, &earg->earg_rules, earl_list) { - uint32_t rl_flags = earl->earl_flags; - bool rl_published - = (rl_flags & PMF_EARLF_PUBLISHED); - bool rl_ll_create - = (rl_flags & PMF_EARLF_LL_CREATED); - fprintf(fp, " RL:%p(%lx): %u(%x)%s%s\n", - earl, earl->earl_objid, - earl->earl_index, - earl->earl_rule->pp_summary, - rl_published ? " Pub" : "", - rl_ll_create ? " LLcrt" : "" - ); - } - } - } -} - -/* Op-mode commands : show counters */ - -static void -pmf_arlg_show_cntr_ruleset(json_writer_t *json, struct pmf_rlset_ext *ears) -{ - uint32_t rs_flags = ears->ears_flags; - bool rs_in = (rs_flags & PMF_EARSF_IN); - - jsonw_string_field(json, "interface", ears->ears_ifname); - jsonw_string_field(json, "direction", rs_in ? "in" : "out"); -} - -static void -pmf_arlg_show_hw_cntr(json_writer_t *json, struct pmf_cntr *eark) -{ - uint32_t ct_flags = eark->eark_flags; - - bool ct_ll_create = (ct_flags & PMF_EARKF_LL_CREATED); - if (!ct_ll_create) - return; - - bool ct_cnt_packet = (ct_flags & PMF_EARKF_CNT_PACKET); - bool ct_cnt_byte = (ct_flags & PMF_EARKF_CNT_BYTE); - - uint64_t val_pkt = -1; - uint64_t val_byt = -1; - bool ok = pmf_hw_counter_read(eark, &val_pkt, &val_byt); - if (!ok) - return; - - jsonw_name(json, "hw"); - jsonw_start_object(json); - - if (ct_cnt_packet) - jsonw_uint_field(json, "pkts", val_pkt); - if (ct_cnt_byte) - jsonw_uint_field(json, "bytes", val_byt); - - jsonw_end_object(json); -} - -static void -pmf_arlg_show_cntr(json_writer_t *json, struct pmf_cntr *eark) -{ - uint32_t ct_flags = eark->eark_flags; - - bool ct_published = (ct_flags & PMF_EARKF_PUBLISHED); - if (!ct_published) - return; - - bool ct_cnt_packet = (ct_flags & PMF_EARKF_CNT_PACKET); - bool ct_cnt_byte = (ct_flags & PMF_EARKF_CNT_BYTE); - - jsonw_start_object(json); - - jsonw_string_field(json, "name", eark->eark_name); - jsonw_bool_field(json, "cnt-pkts", ct_cnt_packet); - jsonw_bool_field(json, "cnt-bytes", ct_cnt_byte); - - pmf_arlg_show_hw_cntr(json, eark); - - jsonw_end_object(json); -} - -int -pmf_arlg_cmd_show_counters(FILE *fp, char const *ifname, int dir, - char const *rgname) -{ - json_writer_t *json = jsonw_new(fp); - if (!json) { - RTE_LOG(ERR, DATAPLANE, "failed to create json stream\n"); - return -ENOMEM; - } - - /* Enforce filter heirarchy */ - if (!ifname) - dir = 0; - if (!dir) - rgname = NULL; - - jsonw_pretty(json, true); - - /* Rulesets */ - struct pmf_rlset_ext *ears; - jsonw_name(json, "rulesets"); - jsonw_start_array(json); - TAILQ_FOREACH(ears, &att_rlsets, ears_list) { - uint32_t rs_flags = ears->ears_flags; - /* Skip rulesets w/o an interface */ - if (!(ears->ears_flags & PMF_EARSF_IFP)) - continue; - /* Filter on interface & direction */ - if (ifname && strcmp(ifname, ears->ears_ifname) != 0) - continue; - if (dir < 0 && !(rs_flags & PMF_EARSF_IN)) - continue; - if (dir > 0 && (rs_flags & PMF_EARSF_IN)) - continue; - - jsonw_start_object(json); - pmf_arlg_show_cntr_ruleset(json, ears); - - /* Groups - i.e. TABLES */ - struct pmf_group_ext *earg; - jsonw_name(json, "groups"); - jsonw_start_array(json); - TAILQ_FOREACH(earg, &ears->ears_groups, earg_list) { - /* Filter on group name */ - if (rgname && strcmp(rgname, earg->earg_rgname) != 0) - continue; - - jsonw_start_object(json); - - jsonw_string_field(json, "name", earg->earg_rgname); - - /* Stats - i.e. COUNTERS - cheat for now */ - struct pmf_attrl *earl; - jsonw_name(json, "counters"); - jsonw_start_array(json); - TAILQ_FOREACH(earl, &earg->earg_rules, earl_list) { - struct pmf_cntr *eark = &earl->earl_cntr; - - pmf_arlg_show_cntr(json, eark); - } - jsonw_end_array(json); - - jsonw_end_object(json); - } - jsonw_end_array(json); - - jsonw_end_object(json); - } - jsonw_end_array(json); - - jsonw_destroy(&json); - - return 0; -} - -/* Op-mode commands : clear counters */ - -int -pmf_arlg_cmd_clear_counters(char const *ifname, int dir, char const *rgname) -{ - int rc = 0; /* Success */ - - /* Enforce filter heirarchy */ - if (!ifname) - dir = 0; - if (!dir) - rgname = NULL; - - /* Rulesets */ - struct pmf_rlset_ext *ears; - TAILQ_FOREACH(ears, &att_rlsets, ears_list) { - uint32_t rs_flags = ears->ears_flags; - /* Skip rulesets w/o an interface */ - if (!(ears->ears_flags & PMF_EARSF_IFP)) - continue; - /* Filter on interface & direction */ - if (ifname && strcmp(ifname, ears->ears_ifname) != 0) - continue; - if (dir < 0 && !(rs_flags & PMF_EARSF_IN)) - continue; - if (dir > 0 && (rs_flags & PMF_EARSF_IN)) - continue; - - /* Groups - i.e. TABLES */ - struct pmf_group_ext *earg; - TAILQ_FOREACH(earg, &ears->ears_groups, earg_list) { - /* Filter on group name */ - if (rgname && strcmp(rgname, earg->earg_rgname) != 0) - continue; - - /* Stats - i.e. COUNTERS - cheat for now */ - struct pmf_attrl *earl; - TAILQ_FOREACH(earl, &earg->earg_rules, earl_list) { - struct pmf_cntr *eark = &earl->earl_cntr; - - uint32_t ct_flags = eark->eark_flags; - if (!(ct_flags & PMF_EARKF_PUBLISHED)) - continue; - if (!pmf_hw_counter_clear(eark)) - rc = -EIO; - } - } - } - - return rc; -} diff --git a/src/npf/config/pmf_att_rlgrp.h b/src/npf/config/pmf_att_rlgrp.h index 295e3585..d8b72e34 100644 --- a/src/npf/config/pmf_att_rlgrp.h +++ b/src/npf/config/pmf_att_rlgrp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019,2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -7,46 +7,12 @@ #ifndef PMF_ATT_RLGRP_H #define PMF_ATT_RLGRP_H -#include #include -struct pmf_rlset_ext; -struct pmf_group_ext; -struct pmf_cntr; -struct pmf_attrl; -struct pmf_rule; - void pmf_arlg_init(void); void pmf_arlg_commit(void); -void pmf_arlg_dump(FILE *fp); -int pmf_arlg_cmd_show_counters(FILE *fp, char const *ifname, int dir, - char const *rgname); -int pmf_arlg_cmd_clear_counters(char const *ifname, int dir, - char const *rgname); - -uint16_t pmf_arlg_attrl_get_index(struct pmf_attrl const *earl); -struct pmf_rule const *pmf_arlg_attrl_get_rule(struct pmf_attrl const *earl); -struct pmf_group_ext *pmf_arlg_attrl_get_grp(struct pmf_attrl const *earl); -struct pmf_cntr *pmf_arlg_attrl_get_cntr(struct pmf_attrl *earl); -uintptr_t pmf_arlg_attrl_get_objid(struct pmf_attrl const *earl); -void pmf_arlg_attrl_set_objid(struct pmf_attrl *earl, uintptr_t objid); - -struct pmf_group_ext *pmf_arlg_cntr_get_grp(struct pmf_cntr const *eark); -uintptr_t pmf_arlg_cntr_get_objid(struct pmf_cntr const *eark); -void pmf_arlg_cntr_set_objid(struct pmf_cntr *eark, uintptr_t objid); -char const *pmf_arlg_cntr_get_name(struct pmf_cntr const *eark); -bool pmf_arlg_cntr_pkt_enabled(struct pmf_cntr const *eark); -bool pmf_arlg_cntr_byt_enabled(struct pmf_cntr const *eark); - -char const *pmf_arlg_grp_get_name(struct pmf_group_ext const *earg); -struct pmf_rlset_ext *pmf_arlg_grp_get_rls(struct pmf_group_ext const *earg); -uint32_t pmf_arlg_grp_get_summary(struct pmf_group_ext const *earg); -bool pmf_arlg_grp_is_v6(struct pmf_group_ext const *earg); -bool pmf_arlg_grp_is_ingress(struct pmf_group_ext const *earg); -bool pmf_arlg_grp_is_ll_attached(struct pmf_group_ext const *earg); -uintptr_t pmf_arlg_grp_get_objid(struct pmf_group_ext const *earg); -void pmf_arlg_grp_set_objid(struct pmf_group_ext *earg, uintptr_t objid); -char const *pmf_arlg_rls_get_ifname(struct pmf_rlset_ext const *ears); +void *pmf_arlg_earg_get_attr_rule(void *earg); +uint32_t pmf_arlg_earg_get_rule_count(void *earg); #endif /* PMF_ATT_RLGRP_H */ diff --git a/src/npf/config/pmf_dump.c b/src/npf/config/pmf_dump.c index 7f49f999..8ff86bf6 100644 --- a/src/npf/config/pmf_dump.c +++ b/src/npf/config/pmf_dump.c @@ -1,3 +1,9 @@ +/* + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + #include #include @@ -355,6 +361,7 @@ pmf_dump_icmp_attr_json(json_writer_t *json, struct pmf_attr_l4icmp_vals *icmp) jsonw_uint_field(json, "code", icmp->pm_code); jsonw_bool_field(json, "any_code", icmp->pm_any_code); + jsonw_bool_field(json, "class", icmp->pm_class); jsonw_bool_field(json, "named", icmp->pm_named); jsonw_end_object(json); @@ -558,6 +565,11 @@ pmf_dump_rule_nat_json(struct pmf_nat *nat, json_writer_t *json) if (masquerade != PMV_UNSET) jsonw_bool_field(json, "masquerade", (masquerade == PMV_TRUE)); + enum pmf_nat_pa pa = nat->pan_port_alloc; + if (pa != PMPA_UNSET) + jsonw_bool_field(json, "trans-port-alloc", + (pa == PMPA_SEQ) ? "sequential" : "random"); + if (nat->pan_taddr.any) pmf_dump_attr_json(json, "taddr", nat->pan_taddr.any); diff --git a/src/npf/config/pmf_dump.h b/src/npf/config/pmf_dump.h index a5fbb9e0..6021d055 100644 --- a/src/npf/config/pmf_dump.h +++ b/src/npf/config/pmf_dump.h @@ -1,3 +1,9 @@ +/* + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + #ifndef _PMF_DUMP_H_ #define _PMF_DUMP_H_ diff --git a/src/npf/config/pmf_hw.h b/src/npf/config/pmf_hw.h deleted file mode 100644 index 7114271f..00000000 --- a/src/npf/config/pmf_hw.h +++ /dev/null @@ -1,23 +0,0 @@ -#include -#include - -struct pmf_group_ext; -struct pmf_cntr; -struct pmf_attrl; -struct pmf_rule; -struct ifnet; - -bool pmf_hw_rule_add(struct pmf_attrl *earl, struct pmf_rule *rule); -void pmf_hw_rule_mod(struct pmf_attrl *earl, struct pmf_rule *rule); -void pmf_hw_rule_del(struct pmf_attrl *earl); -bool pmf_hw_group_attach(struct pmf_group_ext *earg, struct ifnet *ifp); -void pmf_hw_group_detach(struct pmf_group_ext *earg, struct ifnet *ifp); -bool pmf_hw_group_create(struct pmf_group_ext *earg); -void pmf_hw_group_mod(struct pmf_group_ext *earg, uint32_t new); -void pmf_hw_group_delete(struct pmf_group_ext *earg); -bool pmf_hw_counter_create(struct pmf_cntr *eark); -void pmf_hw_counter_delete(struct pmf_cntr *eark); -bool pmf_hw_counter_clear(struct pmf_cntr const *eark); -bool pmf_hw_counter_read(struct pmf_cntr const *eark, - uint64_t *pkts, uint64_t *bytes); -void pmf_hw_commit(void); diff --git a/src/npf/config/pmf_parse.c b/src/npf/config/pmf_parse.c index b54d753d..a2c81a29 100644 --- a/src/npf/config/pmf_parse.c +++ b/src/npf/config/pmf_parse.c @@ -1,3 +1,9 @@ +/* + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + #include #include #include @@ -122,31 +128,39 @@ static pkp_key_parser pkp_l4port; static pkp_key_parser pkp_port_grp; static pkp_key_parser pkp_tcpflgs; static pkp_key_parser pkp_icmp; +static pkp_key_parser pkp_icmp_class; static pkp_key_parser pkp_icmp_grp; /* The action parsers */ static pkp_key_parser pkp_fate; static pkp_key_parser pkp_stateful; +static pkp_key_parser pkp_qos_colour; +static pkp_key_parser pkp_qos_desig; static pkp_key_parser pkp_nat_type; static pkp_key_parser pkp_nat_pinhole; static pkp_key_parser pkp_nat_exclude; static pkp_key_parser pkp_nat_masq; static pkp_key_parser pkp_nat_port; +static pkp_key_parser pkp_nat_port_alloc; static pkp_key_parser pkp_nat_addr_grp; static pkp_key_parser pkp_nat_arange; /* The rproc parser */ static pkp_key_parser pkp_rproc; +static pkp_key_parser pkp_ctrdef_named; /* Types of action and nat keys */ enum pkp_act_field { PKP_ACT_FATE = 1, PKP_ACT_STATEFUL, + PKP_ACT_QOS_COLOUR, + PKP_ACT_QOS_DESIG, PKP_ACT_NAT_TYPE, PKP_ACT_NAT_PINHOLE, PKP_ACT_NAT_EXCLUDE, PKP_ACT_NAT_MASQ, PKP_ACT_NAT_PORT, + PKP_ACT_NAT_PORT_ALLOC, PKP_ACT_NAT_ADDR, PKP_ACT_NAT_ADDR_GROUP, PKP_ACT__LEN @@ -160,6 +174,15 @@ enum pkp_rp_field { PKP_RP__LEN }; +/* Types of rprocs' ctrdef */ +enum pkp_rp_ctrdef_field { + PKP_RP_CTRDEF_PACKETS = 1, + PKP_RP_CTRDEF_SHARING, + PKP_RP_CTRDEF_TYPE, + PKP_RP_CTRDEF_NAMED, + PKP_RP_CTRDEF__LEN +}; + /* * Keys appear in the input string in the form "key=value", where valid values * depends upon the type of the key. Value may not contain an equals sign. @@ -201,25 +224,35 @@ static const struct pkp_key match_keys[] = { {"icmpv4", PMF_L4F_ICMP_VALS, ML4, pkp_icmp}, {"icmpv4-group", PMF_L4F_ICMP_VALS, ML4, pkp_icmp_grp}, {"icmpv6", PMF_L4F_ICMP_VALS, ML4, pkp_icmp}, + {"icmpv6-class", PMF_L4F_ICMP_VALS, ML4, pkp_icmp_class}, {"icmpv6-group", PMF_L4F_ICMP_VALS, ML4, pkp_icmp_grp}, }; static struct pkp_key action_keys[] = { /* Actions */ {"action", PKP_ACT_FATE, 0, pkp_fate}, + {"qos-colour", PKP_ACT_QOS_COLOUR, 0, pkp_qos_colour}, + {"qos-desig", PKP_ACT_QOS_DESIG, 0, pkp_qos_desig}, {"stateful", PKP_ACT_STATEFUL, 0, pkp_stateful}, - {"nat-type", PKP_ACT_NAT_TYPE, 0, pkp_nat_type}, - {"nat-pinhole", PKP_ACT_NAT_PINHOLE, 0, pkp_nat_pinhole}, {"nat-exclude", PKP_ACT_NAT_EXCLUDE, 0, pkp_nat_exclude}, + {"nat-pinhole", PKP_ACT_NAT_PINHOLE, 0, pkp_nat_pinhole}, + {"nat-type", PKP_ACT_NAT_TYPE, 0, pkp_nat_type}, + {"trans-addr", PKP_ACT_NAT_ADDR, 0, pkp_nat_arange}, + {"trans-addr-group", PKP_ACT_NAT_ADDR_GROUP, 0, pkp_nat_addr_grp}, {"trans-addr-masquerade", PKP_ACT_NAT_MASQ, 0, pkp_nat_masq}, {"trans-port", PKP_ACT_NAT_PORT, 0, pkp_nat_port}, - {"trans-addr-group", PKP_ACT_NAT_ADDR_GROUP, 0, pkp_nat_addr_grp}, - {"trans-addr", PKP_ACT_NAT_ADDR, 0, pkp_nat_arange}, + {"trans-port-alloc", PKP_ACT_NAT_PORT_ALLOC, 0, pkp_nat_port_alloc}, }; static const struct pkp_key rproc_keys[] = { {"match", PKP_RP_MATCH, 0, pkp_rproc}, {"rproc", PKP_RP_ACTION, 0, pkp_rproc}, {"handle", PKP_RP_HANDLE, 0, pkp_rproc}, }; +static const struct pkp_key rproc_ctrdef_keys[] = { + {"packets", PKP_RP_CTRDEF_PACKETS, 0, NULL}, + {"sharing", PKP_RP_CTRDEF_SHARING, 0, NULL}, + {"type", PKP_RP_CTRDEF_TYPE, 0, NULL}, + {"named", PKP_RP_CTRDEF_NAMED, 0, pkp_ctrdef_named}, +}; /* Summary bits for the rule */ static uint32_t l2_summary[PMF_L2F__LEN] = { @@ -246,6 +279,114 @@ static uint32_t l4_summary[PMF_L4F__LEN] = { [PMF_L4F_ICMP_VALS] = PMF_RMS_L4_ICMP_TYPE, }; +/* Auxiliary functions: */ + +/* + * Create and return an initial 'struct pmf_unused' based upon a single + * string of space separated fields, the caller is eventually expect to + * free() the returned struct. + * It has 'num_pairs' and 'num_unused' set to number of fields, each + * 'pair' within has only its 'key' field set; that to a new (writeable) + * string identical to the equivalent supplied field. + */ +static int +pkp_split_parts(char const *rule_line, struct pkp_unused **remaining, + char delimiter) +{ + if (!rule_line || !remaining) + return -EINVAL; + + unsigned int slen = 0; + unsigned int nparts = 0; + + /* Find number of space separated parts */ + for (char const *p = rule_line; *p; ++p, ++slen) { + if (*p == delimiter) + continue; + ++nparts; + while (p[1] && p[1] != delimiter) { + ++slen; ++p; + } + } + + /* Allocate the part storage */ + struct pkp_unused *parts = + calloc(1, 1 + slen + + sizeof(*parts) + nparts * sizeof(parts->pairs[0])); + if (!parts) { + RTE_LOG(ERR, FIREWALL, + "Error: parsed rule parts alloc failed\n"); + return -ENOMEM; + } + + /* Copy the data */ + char * const new_rule = (char *)&parts->pairs[nparts]; + parts->num_pairs = nparts; + parts->num_unused = nparts; + memcpy(new_rule, rule_line, slen + 1); + + /* Split in to parts; space bounded */ + nparts = 0; + for (char *p = new_rule; *p; ++p) { + if (*p == delimiter) + continue; + parts->pairs[nparts++].key = p; + while (p[1] && p[1] != delimiter) + ++p; + if (p[1]) { + p[1] = '\0'; + ++p; + } + } + + *remaining = parts; + + return 0; +} + +/* + * Split the array of parts in to their key/value pairs. + * + * Each part is passed in as a 'key=value' string pointed to by the + * 'key' field in its pair struct. Both 'num_pairs' and 'num_unused' + * should be initialised to the number of elements in the 'pairs' field. + * + * On exit the 'key' field now points to the key alone (the '=' being + * replaced with a '\0'), and the 'value' field points to the value alone. + * + * We verify that both 'key' and 'value' are not zero length. + */ +static int +pkp_split_pairs(struct pkp_unused *parts) +{ + /* Split the parts in to key/value; equals bounded */ + for (unsigned int nparts = 0; nparts < parts->num_pairs; ++nparts) { + char *p = parts->pairs[nparts].key; + while (*p && *p != '=') + ++p; + if (*p) { + *p++ = '\0'; + parts->pairs[nparts].value = p; + } else { + parts->pairs[nparts].value = (char *)empty_str; + } + } + + /* Sanity check that we had a set of "key=value" entries */ + for (unsigned int nparts = 0; nparts < parts->num_pairs; ++nparts) { + char const *key = parts->pairs[nparts].key; + char const *value = parts->pairs[nparts].value; + + if (!key || !value || !*key || !*value) { + RTE_LOG(ERR, FIREWALL, + "Error: rule not in key=value form\n"); + return -ENOTDIR; + } + } + + return 0; +} + /* The parsers for match keys */ static bool @@ -1027,6 +1168,42 @@ pkp_icmp(struct pmf_rule *rule, struct pkp_key const *key, char *value) return true; } +static bool +pkp_icmp_class(struct pmf_rule *rule, struct pkp_key const *key, char *value) +{ + struct pmf_attr_l4icmp_vals l4icmp = { 0 }; + + bool is_v6 = (strcmp(key->pt_name, "icmpv6-class") == 0); + l4icmp.pm_tag = (is_v6) ? PMAT_L4_ICMP_V6_VALS : PMAT_L4_ICMP_V4_VALS; + l4icmp.pm_named = false; + l4icmp.pm_any_code = true; + l4icmp.pm_class = true; + + /* + * Only IPv6 supported for the moment, and 'info' class has a match + * and mask of 0x80 due to the way the ICMPv6 messages are designed. + */ + if (strcmp(value, "info") == 0) + l4icmp.pm_type = ICMP6_INFOMSG_MASK; + else if (strcmp(value, "error") != 0) { + RTE_LOG(ERR, FIREWALL, + "NPF: bad value in rule: %s=%s\n", key->pt_name, value); + return false; + } + + struct pmf_attr_l4icmp_vals *vp = pmf_leaf_attr_copy(&l4icmp); + if (!vp) { + RTE_LOG(ERR, FIREWALL, + "Error: No memory for parsed icmp%s values\n", + (is_v6) ? "v6" : "v4"); + return false; + } + + rule->pp_match.l4[PMF_L4F_ICMP_VALS].pm_l4icmp_vals = vp; + + return true; +} + static bool pkp_icmp_grp(struct pmf_rule *rule, struct pkp_key const *key, char *value) { @@ -1102,6 +1279,76 @@ pkp_stateful(struct pmf_rule *rule, struct pkp_key const *key, char *value) return true; } +static struct pmf_qos_mark * +pkp_qos_mark_attach(struct pmf_rule *rule) +{ + struct pmf_qos_mark *qos_mark = rule->pp_action.qos_mark; + + if (qos_mark) + return qos_mark; + + qos_mark = pmf_qos_mark_create(); + if (!qos_mark) { + RTE_LOG(ERR, FIREWALL, + "Error: No memory for parsed qos mark type\n"); + return NULL; + } + + rule->pp_action.qos_mark = qos_mark; + + return qos_mark; +} + +static bool +pkp_qos_colour(struct pmf_rule *rule, struct pkp_key const *key, char *value) +{ + enum pmf_mark_colour mark_colour; + + if (strcmp(value, "red") == 0) + mark_colour = PMMC_RED; + else if (strcmp(value, "yellow") == 0) + mark_colour = PMMC_YELLOW; + else if (strcmp(value, "green") == 0) + mark_colour = PMMC_GREEN; + else { + RTE_LOG(ERR, FIREWALL, + "NPF: unexpected value in rule: %s=%s\n", + key->pt_name, value); + return false; + } + + struct pmf_qos_mark *mark = pkp_qos_mark_attach(rule); + if (!mark) + return false; + + mark->paqm_colour = mark_colour; + rule->pp_summary |= PMF_RAS_QOS_COLOUR; + + return true; +} + +static bool +pkp_qos_desig(struct pmf_rule *rule, struct pkp_key const *key, char *value) +{ + char *endp = NULL; + unsigned long designation = strtoul(value, &endp, 10); + if (endp == value || *endp || designation > 7) { + RTE_LOG(ERR, FIREWALL, + "NPF: bad value in rule: %s=%s\n", key->pt_name, value); + return false; + } + + struct pmf_qos_mark *mark = pkp_qos_mark_attach(rule); + if (!mark) + return false; + + mark->paqm_desig = designation; + mark->paqm_has_desig = PMV_TRUE; + rule->pp_summary |= PMF_RAS_QOS_HW_DESIG; + + return true; +} + static struct pmf_nat * pkp_nat_attach(struct pmf_rule *rule) { @@ -1117,6 +1364,7 @@ pkp_nat_attach(struct pmf_rule *rule) return NULL; } + nat->pan_port_alloc = PMPA_RAND; rule->pp_action.nat = nat; return nat; @@ -1241,6 +1489,33 @@ pkp_nat_port(struct pmf_rule *rule, struct pkp_key const *key, char *value) return true; } +/* Port allocation - sequential or random */ +static bool +pkp_nat_port_alloc(struct pmf_rule *rule, struct pkp_key const *key, + char *value) +{ + enum pmf_nat_pa pa; + + if (strcmp(value, "sequential") == 0) + pa = PMPA_SEQ; + else if (strcmp(value, "random") == 0) + pa = PMPA_RAND; + else { + RTE_LOG(ERR, FIREWALL, + "NPF: unexpected value in rule: %s=%s\n", + key->pt_name, value); + return false; + } + + struct pmf_nat *nat = pkp_nat_attach(rule); + if (!nat) + return false; + + nat->pan_port_alloc = pa; + + return true; +} + static bool pkp_nat_addr_grp(struct pmf_rule *rule, struct pkp_key const *key __unused, char *value) @@ -1308,6 +1583,93 @@ pkp_find_action_key(const char *key) /* The parsers for rproc (match/action/handle) keys */ +static bool +pkp_ctrdef_named(struct pmf_rule *rule, struct pkp_key const *key, char *value) +{ + if (strcmp(value, "accept") == 0) { + rule->pp_summary |= PMF_RAS_COUNT_DEF_PASS; + return true; + } + if (strcmp(value, "drop") == 0) { + rule->pp_summary |= PMF_RAS_COUNT_DEF_DROP; + return true; + } + + RTE_LOG(ERR, FIREWALL, "NPF: bad value in rule: %s=%s\n", key->pt_name, + value); + + return false; +} + +static struct pkp_key const * +pkp_find_rproc_ctrdef_key(const char *key) +{ + for (unsigned int idx = 0; idx < ARRAY_SIZE(rproc_ctrdef_keys); ++idx) + if (strcmp(key, rproc_ctrdef_keys[idx].pt_name) == 0) + return &rproc_ctrdef_keys[idx]; + + return NULL; +} + +static int +pkp_parse_rproc_ctrdef_pairs(struct pkp_unused *parts, struct pmf_rule *rule) +{ + /* Handle the parts we recognise */ + for (unsigned int part = 0; part < parts->num_pairs; ++part) { + char const *str_key = parts->pairs[part].key; + char *str_value = parts->pairs[part].value; + + /* A prior parser may have consumed some */ + if (!str_key) + continue; + + /* Do we know this key? */ + struct pkp_key const *rkey = pkp_find_rproc_ctrdef_key(str_key); + if (!rkey) + continue; + + /* Does the key have a parser? */ + if (!rkey->pt_fn) + continue; + + /* Parse the key/value */ + if (!rkey->pt_fn(rule, rkey, str_value)) + return -EINVAL; + + parts->pairs[part].key = NULL; + --parts->num_unused; + } + + return 0; +} + +static bool +pkp_parse_rproc_ctrdef(struct pmf_rule *rule, struct pmf_proc_raw *praw) +{ + struct pkp_unused *parts = NULL; + char *pm_name = praw->pm_name; + char *str = &pm_name[praw->pm_argoff]; + int rval = pkp_split_parts(str, &parts, ','); + if (rval) { +exit_error: + if (parts) + free(parts); + return false; + } + + /* Split the parts in to their pairs */ + rval = pkp_split_pairs(parts); + if (rval) + goto exit_error; + + pkp_parse_rproc_ctrdef_pairs(parts, rule); + + if (parts) + free(parts); + + return true; +} + static bool pkp_rproc(struct pmf_rule *rule, struct pkp_key const *key, char *value) { @@ -1442,6 +1804,9 @@ pkp_rproc(struct pmf_rule *rule, struct pkp_key const *key, char *value) switch (rp_id) { case NPF_RPROC_ID_CTR_DEF: summary |= PMF_RAS_COUNT_DEF; + + if (!pkp_parse_rproc_ctrdef(rule, praw)) + return false; break; case NPF_RPROC_ID_CTR_REF: summary |= PMF_RAS_COUNT_REF; @@ -1621,111 +1986,6 @@ pkp_parse_rule_pairs(struct pkp_unused *parts, struct pmf_rule *rule) return 0; } -/* - * Split the array of parts in to their key/value pairs. - * - * Each part is passed in as a 'key=value' string pointed to by the - * 'key' field in its pair struct. Both 'num_pairs' and 'num_unused' - * should be initialised to the number of elements in the 'pairs' field. - * - * On exit the 'key' field now points to the key alone (the '=' being - * replaced with a '\0'), and the 'value' field points to the value alone. - * - * We verify that both 'key' and 'value' are not zero length. - */ -static int -pkp_split_pairs(struct pkp_unused *parts) -{ - /* Split the parts in to key/value; equals bounded */ - for (unsigned int nparts = 0; nparts < parts->num_pairs; ++nparts) { - char *p = parts->pairs[nparts].key; - while (*p && *p != '=') - ++p; - if (*p) { - *p++ = '\0'; - parts->pairs[nparts].value = p; - } else { - parts->pairs[nparts].value = (char *)empty_str; - } - } - - /* Sanity check that we had a set of "key=value" entries */ - for (unsigned int nparts = 0; nparts < parts->num_pairs; ++nparts) { - char const *key = parts->pairs[nparts].key; - char const *value = parts->pairs[nparts].value; - - if (!key || !value || !*key || !*value) { - RTE_LOG(ERR, FIREWALL, - "Error: rule not in key=value form\n"); - return -ENOTDIR; - } - } - - return 0; -} - -/* - * Create and return an initial 'struct pmf_unused' based upon a single - * string of space separated fields, the caller is eventually expect to - * free() the returned struct. - * It has 'num_pairs' and 'num_unused' set to number of fields, each - * 'pair' within has only its 'key' field set; that to a new (writeable) - * string identical to the equivalent supplied field. - */ -static int -pkp_split_parts(char const *rule_line, struct pkp_unused **remaining) -{ - if (!rule_line || !remaining) - return -EINVAL; - - unsigned int slen = 0; - unsigned int nparts = 0; - - /* Find number of space separated parts */ - for (char const *p = rule_line; *p; ++p, ++slen) { - if (*p == ' ') - continue; - ++nparts; - while (p[1] && p[1] != ' ') { - ++slen; ++p; - } - } - - /* Allocate the part storage */ - struct pkp_unused *parts = - calloc(1, 1 + slen + - sizeof(*parts) + nparts * sizeof(parts->pairs[0])); - if (!parts) { - RTE_LOG(ERR, FIREWALL, - "Error: parsed rule parts alloc failed\n"); - return -ENOMEM; - } - - /* Copy the data */ - char * const new_rule = (char *)&parts->pairs[nparts]; - parts->num_pairs = nparts; - parts->num_unused = nparts; - memcpy(new_rule, rule_line, slen + 1); - - /* Split in to parts; space bounded */ - nparts = 0; - for (char *p = new_rule; *p; ++p) { - if (*p == ' ') - continue; - parts->pairs[nparts++].key = p; - while (p[1] && p[1] != ' ') - ++p; - if (p[1]) { - p[1] = '\0'; - ++p; - } - } - - *remaining = parts; - - return 0; -} - static int pkp_parse_core_line(char const *rule_line, struct pmf_rule **prule, struct pkp_unused **remaining, bool full_rule) @@ -1743,7 +2003,7 @@ pkp_parse_core_line(char const *rule_line, struct pmf_rule **prule, /* Split the line in to a set of key/value fields */ struct pkp_unused *parts = NULL; - int rval = pkp_split_parts(rule_line, &parts); + int rval = pkp_split_parts(rule_line, &parts, ' '); if (rval) { exit_error: pmf_rule_free(rule); @@ -1775,10 +2035,10 @@ pkp_parse_core_line(char const *rule_line, struct pmf_rule **prule, * Parse rprocs out of a single string; Used for action-groups. */ int -pkp_parse_rproc_line(char const *rule_line, struct pmf_rule **prule, +pkp_parse_rproc_line(char const *rproc_line, struct pmf_rule **prule, struct pkp_unused **remaining) { - return pkp_parse_core_line(rule_line, prule, remaining, false); + return pkp_parse_core_line(rproc_line, prule, remaining, false); } /* @@ -1790,104 +2050,3 @@ pkp_parse_rule_line(char const *rule_line, struct pmf_rule **prule, { return pkp_parse_core_line(rule_line, prule, remaining, true); } - -/* - * Create and return an initial 'struct pmf_unused' based upon an array - * of argv style strings, the caller is eventually expect to free() the - * returned struct. - * It has 'num_pairs' and 'num_unused' set to parameter 'nparts', each - * 'pair' within has only its 'key' field set; that to a new (writeable) - * string identical to the equivalent supplied argv[] element. - */ -static int -pkp_collect_parts(unsigned int nparts, char **av, struct pkp_unused **remaining) -{ - if (!nparts || !av || !remaining) - return -EINVAL; - - unsigned int slen = 0; - - /* Find required part string storage */ - for (unsigned int idx = 0; idx < nparts; ++idx) - slen += 1 + strlen(av[idx]); - - /* Allocate the part storage */ - struct pkp_unused *parts = - calloc(1, 1 + slen + - sizeof(*parts) + nparts * sizeof(parts->pairs[0])); - if (!parts) { - RTE_LOG(ERR, FIREWALL, - "Error: parsed rule parts alloc failed\n"); - return -ENOMEM; - } - - char *p = (char *)&parts->pairs[nparts]; - parts->num_pairs = nparts; - parts->num_unused = nparts; - - /* Collect the parts */ - for (unsigned int idx = 0; idx < nparts; ++idx) { - unsigned int plen = 1 + strlen(av[idx]); - memcpy(p, av[idx], plen); - - parts->pairs[idx].key = p; - p += plen; - } - - *remaining = parts; - - return 0; -} - -/* - * Parse rule out of an argv style array of strings. - * - * In the returned 'struct rmf_unused', for recognised keys in a pair, the 'key' - * field is NULLed, and the 'num_unused' element indicates if any of the keys in - * a 'pair' were no recognised, in which its key and value fields are still - * present. - * - * Hence the caller can attempt to further parse such elements. It is intended - * that this could be used for parsing rprocs. - */ -int -pkp_parse_args(unsigned int ac, char **av, struct pmf_rule **prule, - struct pkp_unused **remaining) -{ - if (!ac || !av || !prule || !remaining) - return -EINVAL; - - struct pmf_rule *rule = pmf_rule_alloc(); - if (!rule) { - RTE_LOG(ERR, FIREWALL, - "Error: parsed rule alloc failed\n"); - return -ENOMEM; - } - - /* Copy and collect the arguments in to a parts array */ - struct pkp_unused *parts = NULL; - - int rval = pkp_collect_parts(ac, av, &parts); - if (rval) { -exit_error: - pmf_rule_free(rule); - if (parts) - free(parts); - return rval; - } - - /* Split the parts in to their pairs */ - rval = pkp_split_pairs(parts); - if (rval) - goto exit_error; - - /* Now parse the pairs */ - rval = pkp_parse_rule_pairs(parts, rule); - if (rval) - goto exit_error; - - *prule = rule; - *remaining = parts; - - return 0; -} diff --git a/src/npf/config/pmf_parse.h b/src/npf/config/pmf_parse.h index abb5dc44..e20f75b5 100644 --- a/src/npf/config/pmf_parse.h +++ b/src/npf/config/pmf_parse.h @@ -1,3 +1,9 @@ +/* + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + #ifndef _PMF_PARSE_H_ #define _PMF_PARSE_H_ @@ -24,7 +30,5 @@ int pkp_parse_rule_line(char const *rule_line, struct pmf_rule **prule, struct pkp_unused **remaining); int pkp_parse_rproc_line(char const *rproc_line, struct pmf_rule **prule, struct pkp_unused **remaining); -int pkp_parse_args(unsigned int ac, char **av, struct pmf_rule **prule, - struct pkp_unused **remaining); #endif /* _PMF_PARSE_H_ */ diff --git a/src/npf/config/pmf_rule.c b/src/npf/config/pmf_rule.c index 3a5550af..d10abca7 100644 --- a/src/npf/config/pmf_rule.c +++ b/src/npf/config/pmf_rule.c @@ -1,3 +1,9 @@ +/* + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + #include #include #include @@ -74,6 +80,14 @@ pmf_rule_dealloc(struct pmf_rule *rule) rule->pp_action.nat = NULL; } + /* Free any qos mark elements */ + if (rule->pp_action.qos_mark) { + struct pmf_qos_mark *qos_mark = rule->pp_action.qos_mark; + + free(qos_mark); + rule->pp_action.qos_mark = NULL; + } + free(rule); } @@ -296,6 +310,18 @@ pmf_rule_slow_copy(struct pmf_rule *old_rule) } } + /* Copy any qos mark elements */ + if (old_rule->pp_action.qos_mark) { + struct pmf_qos_mark *old_mark = old_rule->pp_action.qos_mark; + + struct pmf_qos_mark *new_mark = malloc(sizeof(*new_mark)); + if (!new_mark) + goto error_exit; + + memcpy(new_mark, old_mark, sizeof(*new_mark)); + new_rule->pp_action.qos_mark = new_mark; + } + /* Copy values */ new_rule->pp_action.fate = old_rule->pp_action.fate; new_rule->pp_action.stateful = old_rule->pp_action.stateful; @@ -441,10 +467,25 @@ pmf_nat_create(void) nat->pan_masquerade = PMV_UNSET; nat->pan_taddr.any = NULL; nat->pan_tports = NULL; + nat->pan_port_alloc = PMPA_UNSET; return nat; } +struct pmf_qos_mark * +pmf_qos_mark_create(void) +{ + struct pmf_qos_mark *qos_mark = malloc(sizeof(*qos_mark)); + if (!qos_mark) + return NULL; + + qos_mark->paqm_has_desig = PMV_UNSET; + qos_mark->paqm_desig = 0; + qos_mark->paqm_colour = PMMC_UNSET; + + return qos_mark; +} + static struct pmf_pext_list * pmf_rproc_list_create(uint32_t num, uint8_t tag) { diff --git a/src/npf/config/pmf_rule.h b/src/npf/config/pmf_rule.h index 46a76e95..9470e4af 100644 --- a/src/npf/config/pmf_rule.h +++ b/src/npf/config/pmf_rule.h @@ -1,3 +1,9 @@ +/* + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + #ifndef _PMF_RULE_H_ #define _PMF_RULE_H_ @@ -85,6 +91,20 @@ enum pmf_nat_type { PMN_DNAT, }; +/* NAT port allocation */ +enum pmf_nat_pa { + PMPA_UNSET, + PMPA_RAND, + PMPA_SEQ, +}; + +enum pmf_mark_colour { + PMMC_UNSET, + PMMC_RED, + PMMC_YELLOW, + PMMC_GREEN, +}; + /* * NB: Each attribute is malloc'ed individually, and so may be free'ed * or easily duplicated. They are either fixed size, or contain @@ -196,6 +216,7 @@ struct pmf_attr_l4icmp_vals { uint8_t pm_type; uint8_t pm_code; bool pm_any_code : 1; /* if true, ignore code */ + bool pm_class : 1; /* if true, mask type */ bool pm_named : 1; /* came from a string name */ }; @@ -270,6 +291,7 @@ struct pmf_nat { enum pmf_value pan_pinhole : 2; enum pmf_value pan_exclude : 2; enum pmf_value pan_masquerade : 2; + enum pmf_nat_pa pan_port_alloc : 2; /* Following only valid for "nat-src" / "nat-dst" */ union { @@ -303,10 +325,28 @@ enum pmf_summary { PMF_RMS_L4_ICMP_CODE = (1 << 20), /* L4 Spare */ /* Actions follow */ + PMF_RAS_QOS_HW_DESIG = (1 << 21), + PMF_RAS_QOS_COLOUR = (1 << 22), + PMF_RAS_QOS_POLICE = (1 << 23), PMF_RAS_DROP = (1 << 24), PMF_RAS_PASS = (1 << 25), PMF_RAS_COUNT_DEF = (1 << 26), PMF_RAS_COUNT_REF = (1 << 27), + /* Action counters (auto-per-action) */ + PMF_RAS_COUNT_DEF_PASS = (1 << 28), + PMF_RAS_COUNT_DEF_DROP = (1 << 29), +#define PMF_SUMMARY_COUNT_DEF_NAMED_FLAGS \ + (PMF_RAS_COUNT_DEF_PASS|PMF_RAS_COUNT_DEF_DROP) +}; + +/* + * The parsed result of a QoS mark. + */ +struct pmf_qos_mark { + enum pmf_value paqm_has_desig: 2; + + uint8_t paqm_desig : 3; + enum pmf_mark_colour paqm_colour : 3; }; struct pmf_rule { @@ -322,6 +362,8 @@ struct pmf_rule { struct pmf_nat *nat; struct pmf_pext_list *handle; /* "handle" rprocs */ struct pmf_pext_list *extend; /* action rprocs */ + struct pmf_qos_mark *qos_mark; + uintptr_t qos_policer; /* FAL object id */ } pp_action; uint32_t pp_summary; uint32_t pp_refcnt; @@ -353,6 +395,7 @@ struct pmf_pext_list *pmf_rproc_hlist_create(uint32_t num); struct pmf_proc_raw *pmf_rproc_raw_create(uint32_t data_len, void *data); struct pmf_nat *pmf_nat_create(void); +struct pmf_qos_mark *pmf_qos_mark_create(void); struct pmf_rule *pmf_rule_alloc(void); #endif /* _PMF_RULE_H_ */ diff --git a/src/npf/dpi/app_cmds.c b/src/npf/dpi/app_cmds.c index 2b73bd5b..111e1b4e 100644 --- a/src/npf/dpi/app_cmds.c +++ b/src/npf/dpi/app_cmds.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -20,11 +20,11 @@ #include "compiler.h" #include "json_writer.h" #include "npf_shim.h" -#include "app_cmds.h" #include "urcu.h" #include "util.h" #include "vplane_log.h" -#include "npf/dpi/dpi.h" +#include "npf/dpi/app_cmds.h" +#include "npf/dpi/npf_appdb.h" static zhash_t *cmd_op_hash; diff --git a/src/npf/dpi/app_cmds.h b/src/npf/dpi/app_cmds.h index 652f12e5..16a45496 100644 --- a/src/npf/dpi/app_cmds.h +++ b/src/npf/dpi/app_cmds.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ diff --git a/src/npf/dpi/dpi.c b/src/npf/dpi/dpi.c index 52583b05..4952ba1b 100644 --- a/src/npf/dpi/dpi.c +++ b/src/npf/dpi/dpi.c @@ -1,5 +1,8 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. + * + * Copyright (c) 2021 Centre for Development of Telematics. All rights reserved. + * * Copyright (c) 2016-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -7,203 +10,100 @@ */ #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* For Q_PROTO_BASE and Q_PROTO_MAX */ - -#include "npf/dpi/dpi.h" -#include "npf/dpi/dpi_private.h" -#include "npf/npf.h" /* For get_time_uptime() */ -#include "npf/npf_cache.h" -#include "npf/npf_nat.h" +#include +#include #include "npf/npf_session.h" -#include "npf/npf_rule_gen.h" -#include "npf_shim.h" -#include "npf/config/npf_config.h" -#include "npf/npf_ruleset.h" -#include "pktmbuf.h" -#include "qmdpi_const.h" -#include "qmdpi_struct.h" -#include "util.h" +#include "dpi.h" +#include "npf/dpi/dpi_internal.h" +#include "pktmbuf_internal.h" #include "vplane_log.h" -#include "json_writer.h" - -/* - * Application IDs begin with Q_PROTO_BASE which is 3 (see protodef.h). - * - * We use app ID 0 to indicate "no app" - * We use app ID 1 to indicate an error, ie DPI processing failed. - * We don't yet use app ID 2. - */ -_Static_assert(Q_PROTO_BASE >= DPI_APP_BASE, "Q_PROTO_BASE is too low"); - -/* - * APP_ID_Q is the base for internally assigned application IDs. - * Qosmos app IDs must not have APP_ID_Q set. - */ -_Static_assert(Q_PROTO_MAX < APP_ID_Q, "Q_PROTO_MAX is too high"); - -#define DPI_INI_STR_LEN 200 - -/* Index within the qosmos 'path', e.g. base.ip.tcp.http */ -#define DPI_L5_INDEX 3 - -static uint64_t dpi_app_id_to_type_bitfield(uint32_t app_id); +#include "util.h" +#include "npf/npf_rule_gen.h" -/* Local variables. */ -static struct qmdpi_engine *dpi_engine; -static struct qmdpi_bundle *dpi_bundle; -static struct qmdpi_worker *dpi_worker[RTE_MAX_LCORE]; -static rte_spinlock_t dpi_worker_lock[RTE_MAX_LCORE]; +struct flow_procs_tup { + struct dpi_engine_flow *flow; + struct dpi_engine_procs *procs; +}; +struct dpi_flow { + struct dpi_engine_flow ef; // Must be first. + struct flow_procs_tup *flows; + size_t flows_len; +}; -/* Get flow key tuple elements */ -static void dpi_flow_get_params(npf_session_t *se, npf_cache_t *npc, - npf_addr_t *saddr, uint16_t *sport, - npf_addr_t *daddr, uint16_t *dport) -{ - npf_nat_t *nt = npf_session_get_nat(se); - struct npf_ports *ports = &npc->npc_l4.ports; - - if (nt) { - npf_nat_t *nt = npf_session_get_nat(se); - npf_natpolicy_t *np = npf_nat_get_policy(nt); - - switch (npf_natpolicy_get_type(np)) { - case NPF_NATOUT: - npf_nat_get_orig(nt, saddr, sport); - *daddr = *npf_cache_dstip(npc); - *dport = ports->d_port; - break; - case NPF_NATIN: - npf_nat_get_orig(nt, daddr, dport); - *saddr = *npf_cache_srcip(npc); - *sport = ports->s_port; - break; - default: /* Hush up gcc */ - memset(saddr, 0, sizeof(npf_addr_t)); - memset(daddr, 0, sizeof(npf_addr_t)); - *dport = 0; - *sport = 0; - } - } else { - *saddr = *npf_cache_srcip(npc); - *sport = ports->s_port; - *daddr = *npf_cache_dstip(npc); - *dport = ports->d_port; - } -} +#define DPI_FLOW_ENGINE_ID ef.engine_id +#define DPI_FLOW_STATS ef.stats +#define DPI_FLOW_UPDATE_STATS ef.update_stats -/* - * DPI engine initialisation. - * - * Create the DPI engine and bundle instances, - * then activate the bundle and all the signatures. +/** + * Entry in the engine name to ID mapping. */ -bool -dpi_init(void) -{ - char sys_ini_str[DPI_INI_STR_LEN]; - int ret; - unsigned int lcore; - static bool initialised; - static bool run_already; - - /* Run only once, thereafter repeat the same status */ - if (run_already) - return initialised; - run_already = true; - - /* - * Appened the user init string (if any) to the system init string. - * The last value is taken for each parameter. - */ - snprintf(sys_ini_str, DPI_INI_STR_LEN, - "injection_mode=stream;nb_workers=%d;nb_flows=1", - rte_lcore_count()); - - /* Create DPI engine instance. */ - dpi_engine = qmdpi_engine_create(sys_ini_str); - - if (dpi_engine == NULL) { - RTE_LOG(ERR, DATAPLANE, "Failed to instantiate DPI engine\n"); - goto error; - } - - /* Create DPI bundle instance. */ - dpi_bundle = qmdpi_bundle_create_from_file(dpi_engine, NULL); - - if (dpi_bundle == NULL) { - RTE_LOG(ERR, DATAPLANE, "Failed to instantiate DPI bundle\n"); - goto error_engine; - } - - /* Activate DPI bundle. */ - ret = qmdpi_bundle_activate(dpi_bundle); - - if (ret < 0) { - RTE_LOG(ERR, DATAPLANE, "Failed to activate DPI bundle\n"); - goto error_bundle; - } - - /* Enable all signatures in DPI bundle. */ - ret = qmdpi_bundle_signature_enable_all(dpi_bundle); - - if (ret < 0) { - RTE_LOG(ERR, DATAPLANE, "Failed to enable DPI signatures\n"); - goto error_bundle; - } - - /* Start a DPI worker for each core. */ - RTE_LCORE_FOREACH(lcore) { - struct qmdpi_worker *worker = qmdpi_worker_create(dpi_engine); - if (!worker) { - RTE_LOG(ERR, DATAPLANE, - "Failed to instantiate DPI worker %d\n", lcore); - goto error_bundle; - } - dpi_worker[lcore] = worker; - rte_spinlock_init(&dpi_worker_lock[lcore]); - } - - RTE_LOG(INFO, DATAPLANE, "Initialised DPI (%d workers)\n", - rte_lcore_count()); - - initialised = true; +struct id_entry { + const char *name; + uint8_t id; +}; + +/* DPI engine dpi_engine_procs instances */ +extern struct dpi_engine_procs ndpi_engine_procs; +extern struct dpi_engine_procs user_engine_procs; + +/* Array of known DPI engine dpi_engine_procs */ +static struct dpi_engine_procs *engine_procs[] = { + &user_engine_procs, + &ndpi_engine_procs, +}; + +struct id_entry engine_name_id_map[] = { + { "ndpi", IANA_NDPI }, + { "user", IANA_USER }, +}; + +#define NULL_ENGINE (NULL) +static uint8_t global_engine = IANA_NDPI; + +static unsigned int engine_procs_len = ARRAY_SIZE(engine_procs); +static unsigned int engine_names_len = ARRAY_SIZE(engine_name_id_map); + +/* Find the first dpi_engine_proc which is: + * - not NULL + * - has the same ID as the given id + * - had the function func + * If one is found, set engine to it. + */ +#define ENGINE_PROC_FIND(engine, ID, func) { \ + for (unsigned int i = 0; i < engine_procs_len; i++) { \ + if (engine_procs[i] && engine_procs[i]->id == (ID) && \ + engine_procs[i]->func) { \ + (engine) = engine_procs[i]; \ + break; \ + } \ + } \ +} - return initialised; +#define CALL_IF_EXIST(_func, _procs, ...) \ + (_procs->_func ? _procs->_func(__VA_ARGS__) : false) -error_bundle: - qmdpi_bundle_destroy(dpi_bundle); -error_engine: - qmdpi_engine_destroy(dpi_engine); -error: - return false; +/* Run the specified function for all engines. + */ +#define ENGINE_PROC_EXEC_ALL(func) {\ + for (unsigned int i = 0; i < engine_procs_len; i++) {\ + if (engine_procs[i] && engine_procs[i]->func) {\ + engine_procs[i]->func();\ + } \ + } \ } -/* - * Do all the DPI processing. +/** + * Get the length of the given packet without the L3 and L4 headers. + * Currently, the only supported L4 protocols are TCP and UDP. + * + * @return length of packet without headers */ -static bool -dpi_process(struct qmdpi_worker *worker, npf_cache_t *npc, - struct rte_mbuf *mbuf, bool forw, - uint32_t ifindex, struct dpi_flow *dpi_flow) +static inline uint32_t +dpi_get_data_len(struct npf_cache *npc, struct rte_mbuf *mbuf) { - /* This should be impossible */ - if (unlikely(!worker)) - return false; + uint32_t offset = dp_pktmbuf_l2_len(mbuf) + dp_pktmbuf_l3_len(mbuf); + uint32_t data_len = rte_pktmbuf_data_len(mbuf) - offset; /* * Find the start of the transport payload. @@ -212,12 +112,9 @@ dpi_process(struct qmdpi_worker *worker, npf_cache_t *npc, * are actually UDP, and handle them here with the appropriate * adjustment. */ - uint16_t data_offset = pktmbuf_l2_len(mbuf) + pktmbuf_l3_len(mbuf); - uint16_t data_len = rte_pktmbuf_data_len(mbuf) - data_offset; switch (npf_cache_ipproto(npc)) { case IPPROTO_TCP: { uint16_t l4_offset = npc->npc_l4.tcp.doff << 2; - data_offset += l4_offset; data_len -= l4_offset; break; } @@ -226,9 +123,9 @@ dpi_process(struct qmdpi_worker *worker, npf_cache_t *npc, /* Ignore UDP with invalid (out of spec) length */ if (l4_len > data_len || l4_len < sizeof(struct udphdr)) - return true; + return 0; + /* Use the UDP header length */ - data_offset += sizeof(struct udphdr); data_len = l4_len - sizeof(struct udphdr); break; } @@ -236,428 +133,494 @@ dpi_process(struct qmdpi_worker *worker, npf_cache_t *npc, break; } - char *data_ptr = rte_pktmbuf_mtod(mbuf, char *) + data_offset; - - /* We need some payload to process */ - if (data_len == 0) - return true; + return data_len; +} - /* Update stats and possibly offload */ - if (dpi_flow->update_stats) { - unsigned int index = !forw; - struct dpi_flow_stats *fsp = &dpi_flow->stats[index]; - uint32_t new_val = fsp->bytes + data_len; +/** + * Update the stats for the given flow in the given direction with the given + * data length. + */ +static inline void +dpi_update_stats(struct dpi_engine_flow *flow, uint32_t data_len, bool forw) +{ + unsigned int index = !forw; + struct dpi_flow_stats *stats = &flow->stats[index]; + uint32_t new_val = stats->bytes + data_len; - if (new_val <= UINT16_MAX) { - fsp->pkts++; - fsp->bytes = new_val; - } - if (fsp->pkts == UINT16_MAX || fsp->bytes == UINT16_MAX) - dpi_flow->update_stats = false; + if (new_val <= UINT16_MAX) { + stats->pkts++; + stats->bytes = new_val; } - /* NB: Don't use gettimeofday() in the forwarding path */ - struct timeval tv; - tv.tv_usec = 0; - tv.tv_sec = get_time_uptime(); + if (stats->pkts == UINT16_MAX || stats->bytes == UINT16_MAX) + flow->update_stats = false; +} + +/** + * Run DPI processing on the given packet. + * + * @return false if any DPI engine returns false, true otherwise. + */ +static bool +dpi_process_pkt(struct npf_session *se, struct npf_cache *npc, + struct rte_mbuf *mbuf, int dir) +{ + if (pktmbuf_mdata_exists(mbuf, PKT_MDATA_DPI_SEEN)) + return true; + + uint32_t data_len = dpi_get_data_len(npc, mbuf); + struct dpi_flow *dpi_flow = npf_session_get_dpi(se); + bool forw = npf_session_forward_dir(se, dir); + bool ret = true; + bool offloaded = true; - dpi_status sts; + for (unsigned int i = 0; i < dpi_flow->flows_len; i++) { + struct dpi_engine_procs *procs = dpi_flow->flows[i].procs; + struct dpi_engine_flow *engine_flow = dpi_flow->flows[i].flow; - /* Set PDU information to be processed by the worker. */ - const int dir = forw ? QMDPI_DIR_CTS : QMDPI_DIR_STC; - sts = qmdpi_worker_pdu_set(worker, data_ptr, data_len, &tv, 0, - dir, ifindex); - if (unlikely(sts != DPI_SUCCESS)) - return false; + if (!engine_flow || + CALL_IF_EXIST(is_error, procs, engine_flow) || + CALL_IF_EXIST(is_offloaded, procs, engine_flow) || + !procs->process_pkt) + continue; - /* Process packet with worker */ - struct qmdpi_result *result; - sts = qmdpi_worker_process(worker, dpi_flow->key, &result); + if (engine_flow->update_stats) + dpi_update_stats(engine_flow, data_len, forw); - if (unlikely(sts < 0)) { - /* An error occurred while processing the packet. */ - if (net_ratelimit()) - RTE_LOG(ERR, DATAPLANE, "DPI worker: %s (%d)\n", - qmdpi_error_get_string(dpi_bundle, sts), sts); - return false; - } + if (!procs->process_pkt(engine_flow, mbuf, dir)) { + ret = false; + RTE_LOG(ERR, DATAPLANE, + "engine [%d] failed to process packet\n", + procs->id); + break; + } - /* Extract the L5 and L7 identifiers */ - struct qmdpi_path *path = qmdpi_result_path_get(result); - if (path && path->qp_len >= (DPI_L5_INDEX + 1)) { - dpi_flow->app_proto = - DPI_ENGINE_QOSMOS | path->qp_value[DPI_L5_INDEX]; - dpi_flow->app_name = - DPI_ENGINE_QOSMOS | path->qp_value[path->qp_len-1]; - dpi_flow->app_type = - dpi_app_id_to_type_bitfield(dpi_flow->app_name); + /* Offloaded if all flows offloaded */ + offloaded = CALL_IF_EXIST(is_offloaded, procs, engine_flow) + && offloaded; } - /* Does the engine suggest that we should offload now? */ - if (qmdpi_flow_is_offloaded(dpi_flow->key)) - dpi_flow->offloaded = true; + if (offloaded) + npf_session_set_pkt_hook(se, NULL); - return true; + pktmbuf_mdata_set(mbuf, PKT_MDATA_DPI_SEEN); + return ret; } -/* - * Clean up any per session flow information. - */ -static void -dpi_flow_key_destroy(struct qmdpi_flow *flow_key, uint8_t wrkr_id) +uint8_t +dpi_global_engine(void) { - rte_spinlock_t *worker_lock = &dpi_worker_lock[wrkr_id]; - struct qmdpi_worker *worker = dpi_worker[wrkr_id]; - struct qmdpi_result *result; - int err; - - rte_spinlock_lock(worker_lock); - err = qmdpi_flow_offload(worker, flow_key, &result); - if (!err) - err = qmdpi_flow_destroy(worker, flow_key, &result); - rte_spinlock_unlock(worker_lock); - - if (err && net_ratelimit()) - RTE_LOG(ERR, DATAPLANE, "DPI: flow destruction failed (%d)\n", - err); + return global_engine; } -void -dpi_session_flow_destroy(struct dpi_flow *dpi_flow) +uint8_t +dpi_engine_name_to_id(const char *name) { - if (!dpi_flow) - return; + if (!name) + return IANA_RESERVED; - struct qmdpi_flow *flow_key = dpi_flow->key; - if (flow_key) - dpi_flow_key_destroy(flow_key, dpi_flow->wrkr_id); + for (unsigned int i = 0; i < engine_names_len; i++) { + struct id_entry *entry = &engine_name_id_map[i]; + if (entry->name && strcmp(entry->name, name) == 0) + return entry->id; + } - free(dpi_flow); + return IANA_RESERVED; } -/* - * This processes each packet within a session, updating the - * information cached upon the session. - * - * This hook is only enabled for a session if there is a - * fully initialised flow structure attached. - * - * Returns true to continue procssing, or false to drop. - */ -static bool -dpi_session_pkt(npf_session_t *se, npf_cache_t *npc, - struct rte_mbuf *mbuf, int dir) +int32_t +dpi_engine_id_to_idx(uint8_t id) { - if (pktmbuf_mdata_exists(mbuf, PKT_MDATA_DPI_SEEN)) - return true; - - struct dpi_flow *dpi_flow = npf_session_get_dpi(se); + for (unsigned int i = 0; i < engine_names_len; i++) { + struct id_entry *entry = &engine_name_id_map[i]; + if (entry->id == id) + return i; + } - /* Optimise for subsequent packets */ - if (likely(dpi_flow->offloaded)) - return true; - /* This should be impossible */ - if (unlikely(!dpi_flow->key)) - return false; + return -1; +} - bool forw = npf_session_forward_dir(se, dir); - uint32_t ifindex = npf_session_get_if_index(se); +int +dpi_init(uint8_t engine_id) +{ + if (engine_id == IANA_RESERVED) { + /* Start all engines. */ + ENGINE_PROC_EXEC_ALL(init); + return 0; /* success */ + } - /* Access the correct worker, with exclusion */ - unsigned int wrkr_id = dpi_flow->wrkr_id; - rte_spinlock_t *worker_lock = &dpi_worker_lock[wrkr_id]; - struct qmdpi_worker *worker = dpi_worker[wrkr_id]; + /* Try to start only the specified engine. */ + struct dpi_engine_procs *engine = NULL_ENGINE; + ENGINE_PROC_FIND(engine, engine_id, init); + return engine ? engine->init() : -ENOENT; +} - /* - * Process the packet. In the event of an engine error we stop - * processing this flow, leaving it in an error state. - */ - rte_spinlock_lock(worker_lock); - if (!dpi_process(worker, npc, mbuf, forw, ifindex, dpi_flow)) { - dpi_flow->app_name = DPI_APP_ERROR; - dpi_flow->app_proto = DPI_APP_ERROR; - dpi_flow->app_type = DPI_APP_TYPE_NONE; - dpi_flow->offloaded = true; - dpi_flow->error = true; - dpi_flow->update_stats = false; +bool +dpi_terminate(uint8_t engine_id) +{ + if (engine_id == IANA_RESERVED) { + /* Stop all engines. */ + ENGINE_PROC_EXEC_ALL(terminate); + return true; } - rte_spinlock_unlock(worker_lock); - /* Unhook the handler when flow is offloaded */ - if (dpi_flow->offloaded) - npf_session_set_pkt_hook(se, NULL); + /* Try to stop only the specified engine. */ + struct dpi_engine_procs *engine = NULL_ENGINE; + ENGINE_PROC_FIND(engine, engine_id, terminate); + return engine ? engine->terminate() : false; +} - pktmbuf_mdata_set(mbuf, PKT_MDATA_DPI_SEEN); +void +dpi_session_flow_destroy(struct dpi_flow *flow) +{ + if (flow) { + if (flow->flows) { + for (unsigned int i = 0; i < flow->flows_len; i++) { + struct flow_procs_tup *tup = &flow->flows[i]; + if (tup->flow && tup->procs->destructor) + tup->procs->destructor(tup->flow); + } - return true; + free(flow->flows); + } + + free(flow); + } } -/* - * Associate a session with our flow state, and feed the first - * packet of the flow to the DPI engine. Ensure that subsequent - * packets will be fed to the engine by looking the above handler - * on to the session. - */ int -dpi_session_first_packet(npf_session_t *se, npf_cache_t *npc, - struct rte_mbuf *mbuf, int dir) +dpi_session_first_packet(struct npf_session *se, struct npf_cache *npc, + struct rte_mbuf *mbuf, int dir, size_t engines_len, + const uint8_t *engines) { - /* Sanity - We only create sessions for IP packets */ - if (!npf_iscached(npc, NPC_IP46)) + unsigned int i; + int ret = 0; + + /* Only create session for IP packets */ + if (unlikely(!npf_iscached(npc, NPC_IP46))) return -EINVAL; /* Impossible */ - /* We currently only support TCP or UDP */ - const uint8_t ip_proto = npf_cache_ipproto(npc); - if (ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP) + /* Only create session for TCP/UDP packets */ + const uint8_t protocol = npf_cache_ipproto(npc); + if (protocol != IPPROTO_TCP && protocol != IPPROTO_UDP) return -EINVAL; - /* Create our DPI structure */ - struct dpi_flow *dpi_flow = zmalloc_aligned(sizeof(*dpi_flow)); - if (!dpi_flow) + struct dpi_flow *flow = zmalloc_aligned(sizeof(struct dpi_flow)); + if (!flow) + return -ENOMEM; + + flow->flows = zmalloc_aligned(engines_len + * sizeof(struct flow_procs_tup)); + if (!flow->flows) { + free(flow); return -ENOMEM; + } - dpi_flow->key = NULL; - dpi_flow->app_proto = DPI_APP_UNDETERMINED; - dpi_flow->app_name = DPI_APP_UNDETERMINED; - dpi_flow->app_type = DPI_APP_TYPE_NONE; - dpi_flow->wrkr_id = dp_lcore_id(); - dpi_flow->offloaded = false; - dpi_flow->error = false; - dpi_flow->update_stats = true; + flow->flows_len = engines_len; /* Add it or lose the race */ - if (!npf_session_set_dpi(se, dpi_flow)) { - free(dpi_flow); + if (!npf_session_set_dpi(se, flow)) { + free(flow->flows); + free(flow); return -EEXIST; } - /* If user-defined applications exist, then evaluate them first. */ - if (npf_active(npf_global_config, NPF_APPLICATION)) { - const npf_ruleset_t *npf_rs = - npf_get_ruleset(npf_global_config, NPF_RS_APPLICATION); - if (npf_rs) { - npf_rule_t *rl = - npf_ruleset_inspect(npc, mbuf, npf_rs, - NULL, NULL, dir); - if (rl) { - /* Rule matched, so run the action. */ - npf_rproc_result_t rproc_result = { - .decision = NPF_DECISION_UNKNOWN, - }; - - npf_rproc_action(NULL, NULL, dir, rl, - se, &rproc_result); - return 0; - } - } - } + uint32_t data_len = dpi_get_data_len(npc, mbuf); - /* Fall back to Qosmos DPI. */ + for (i = 0; i < engines_len; i++) { + struct dpi_engine_procs *engine = NULL_ENGINE; + uint8_t engine_id = engines[i]; + struct flow_procs_tup *tup = &flow->flows[i]; - /* Extract the L3 + L4 protocol */ - int l3proto = npf_iscached(npc, NPC_IP4) ? Q_PROTO_IP : Q_PROTO_IP6; - int l4proto = (ip_proto == IPPROTO_TCP) ? Q_PROTO_TCP : Q_PROTO_UDP; + ENGINE_PROC_FIND(engine, engine_id, first_packet); + tup->procs = engine; - /* Create the flow key */ - npf_addr_t srcip; - npf_addr_t dstip; - uint16_t sport, dport; + if (tup->procs == NULL_ENGINE) { + RTE_LOG(ERR, DATAPLANE, "engine [%d] not found\n", + engine_id); + ret = -EINVAL; + goto free_flows; + } - dpi_flow_get_params(se, npc, &srcip, &sport, &dstip, &dport); + ret = tup->procs->first_packet(se, npc, mbuf, dir, + data_len, &tup->flow); - struct qmdpi_flow *flow_key = - qmdpi_flow_create(dpi_worker[dpi_flow->wrkr_id], l3proto, - l4proto, &srcip, &sport, &dstip, &dport); - if (!flow_key) { - dpi_flow->app_proto = DPI_APP_ERROR; - dpi_flow->app_name = DPI_APP_ERROR; - dpi_flow->app_type = DPI_APP_TYPE_NONE; - dpi_flow->offloaded = true; - dpi_flow->error = true; - dpi_flow->update_stats = false; + if (tup->flow) { + tup->flow->update_stats = true; + dpi_update_stats(tup->flow, data_len, + npf_session_forward_dir(se, dir)); + } - if (net_ratelimit()) - RTE_LOG(ERR, DATAPLANE, "DPI: flow creation failed\n"); - return -ENOMEM; + if (ret != 0) { + RTE_LOG(ERR, DATAPLANE, + "engine [%d] failed first packet\n", + engine_procs[i]->id); + goto free_flows; + } } - dpi_flow->key = flow_key; - npf_session_set_pkt_hook(se, dpi_session_pkt); - bool good = dpi_session_pkt(se, npc, mbuf, dir); - if (!good) - return -EINVAL; + npf_session_set_pkt_hook(se, dpi_process_pkt); + + return ret; + +free_flows: + for (unsigned int j = 0; j < i; j++) { + struct flow_procs_tup *tup = &flow->flows[i]; + if (!tup) + continue; + if (!tup->procs) + continue; + if (!tup->procs->destructor) + continue; + if (!tup->flow) + continue; + if (tup->procs->destructor && tup->flow) + tup->procs->destructor(tup->flow); + } - return 0; + free(flow->flows); + flow->flows = NULL; + flow->flows_len = 0; + + return ret; +} + +void +dpi_flow_for_each_engine(struct dpi_flow *flow, + int (*call)(uint8_t engine, uint32_t app, uint32_t proto, + uint32_t type, void *data), + void *data) +{ + if (!flow) + return; + + for (unsigned int i = 0; i < flow->flows_len; i++) { + struct flow_procs_tup *tup = &flow->flows[i]; + if (tup && tup->flow + && tup->procs->flow_get_id + && tup->procs->flow_get_proto + && tup->procs->flow_get_type) { + uint32_t app; + uint32_t proto; + uint32_t type; + app = tup->procs->flow_get_id(tup->flow); + proto = tup->procs->flow_get_proto(tup->flow); + type = tup->procs->flow_get_type(tup->flow); + + if (call(tup->flow->engine_id, + app, proto, type, data) != 0) + break; + } + } } -/* Extract the APP 'protocol', i.e. L5 information */ +/** + * Get the protocol ID the given flow is detected to be according to the given + * flow's engine. + * Returns DPI_APP_ERROR if there is no engine with the given ID, or the flow + * is in an error state, otherwise returns the protocol ID, which can be + * undetermined. + */ uint32_t -dpi_flow_get_app_proto(struct dpi_flow *flow) +dpi_flow_get_app_proto(uint8_t engine_id, struct dpi_flow *flow) { - return flow->app_proto; + if (!flow) + return DPI_APP_ERROR; + + for (unsigned int i = 0; i < flow->flows_len; i++) { + struct flow_procs_tup *tup = &flow->flows[i]; + if (tup->flow && tup->flow->engine_id == engine_id + && tup->procs->flow_get_proto) + return tup->procs->flow_get_proto(tup->flow); + } + + return DPI_APP_ERROR; } -/* Extract the APP 'name', i.e. L7 information */ uint32_t -dpi_flow_get_app_name(struct dpi_flow *flow) +dpi_flow_get_app_id(uint8_t engine_id, struct dpi_flow *flow) { - return flow->app_name; + if (!flow) + return DPI_APP_ERROR; + + for (unsigned int i = 0; i < flow->flows_len; i++) { + struct flow_procs_tup *tup = &flow->flows[i]; + if (tup->flow && tup->flow->engine_id == engine_id + && tup->procs->flow_get_id) + return tup->procs->flow_get_id(tup->flow); + } + + return DPI_APP_ERROR; } -/* Extract the APP 'type' for the 'name', i.e L7 information */ -uint64_t -dpi_flow_get_app_type(struct dpi_flow *flow) +uint32_t +dpi_flow_get_app_type(uint8_t engine_id, struct dpi_flow *flow) { - return flow->app_type; + if (!flow) + return DPI_APP_ERROR; + + for (unsigned int i = 0; i < flow->flows_len; i++) { + struct flow_procs_tup *tup = &flow->flows[i]; + if (tup->flow && tup->flow->engine_id == engine_id + && tup->procs->flow_get_type) + return tup->procs->flow_get_type(tup->flow); + } + + return DPI_APP_ERROR; } -/* Has the DPI engine ceased to process this stream? */ bool dpi_flow_get_offloaded(struct dpi_flow *flow) { - return flow->offloaded; + if (!flow) + /* Flow is invalid so offload to stop all further processing */ + return true; + + for (unsigned int i = 0; i < flow->flows_len; i++) { + struct flow_procs_tup *tup = &flow->flows[i]; + if (tup->procs->is_offloaded && + !tup->procs->is_offloaded(tup->flow)) + return false; + } + + return true; } -/* Is this flow in an error state? */ bool dpi_flow_get_error(struct dpi_flow *flow) { - return flow->error; + if (!flow || !flow->flows) + return true; + + for (unsigned int i = 0; i < flow->flows_len; i++) { + struct flow_procs_tup *tup = &flow->flows[i]; + if (tup->procs->is_error && tup->flow && + !tup->procs->is_error(tup->flow)) + return false; + } + + return true; } -/* - * Return a pointer to the per direction packet stats. - * NB: These are clamped. - */ -const struct dpi_flow_stats * -dpi_flow_get_stats(struct dpi_flow *flow, bool forw) +const struct dpi_flow_stats *dpi_flow_get_stats( + const struct dpi_engine_flow *flow, bool forw) { unsigned int index = !forw; - struct dpi_flow_stats *fsp = &flow->stats[index]; - - return fsp; + return &flow->stats[index]; } -/* Return the application ID for the given Qosmos application name. */ uint32_t -dpi_app_name_to_id_qosmos(const char *app_name) +dpi_app_name_to_id(uint8_t engine_id, const char *app_name) { - struct qmdpi_signature *signature = - qmdpi_worker_signature_get_byname(dpi_worker[dp_lcore_id()], - dpi_bundle, app_name); - - if (signature) - return DPI_ENGINE_QOSMOS | qmdpi_signature_id_get(signature); - - /* No such name. */ - return DPI_APP_NA; + struct dpi_engine_procs *engine = NULL_ENGINE; + ENGINE_PROC_FIND(engine, engine_id, name_to_id); + return engine ? engine->name_to_id(app_name) : DPI_APP_ERROR; } -/* Return the application ID for the given application name. */ uint32_t -dpi_app_name_to_id(const char *app_name) +dpi_app_type_name_to_id(uint8_t engine_id, const char *type_name) { - /* No name? Then no ID. */ - if ((!app_name) || (!*app_name)) - return DPI_APP_NA; + struct dpi_engine_procs *engine = NULL_ENGINE; + ENGINE_PROC_FIND(engine, engine_id, type_to_id); + return engine ? engine->type_to_id(type_name) : DPI_APP_ERROR; +} - /* - * Assuming that Qosmos names will be used more often, - * We first lookup the name in Qosmos. - * The order isn't important. - */ - uint32_t app_id = dpi_app_name_to_id_qosmos(app_name); +void +dpi_info_json(struct dpi_flow *dpi_flow, json_writer_t *json) +{ + if (!dpi_flow) + return; - if (app_id == DPI_APP_NA) - /* Name not found in Qosmos, so lookup in the app DB. */ - app_id = appdb_name_to_id(app_name); + jsonw_name(json, "dpi"); + jsonw_start_object(json); - return app_id; -} + jsonw_name(json, "engines"); + jsonw_start_array(json); -/* Return the name associated with the given application ID. */ -const char * -dpi_app_id_to_name(uint32_t app_id) -{ - if (APP_ID_QOSMOS(app_id)) { - struct qmdpi_signature *signature = - qmdpi_worker_signature_get_byid( - dpi_worker[dp_lcore_id()], - dpi_bundle, - app_id & DPI_APP_MASK); - - return qmdpi_signature_name_get(signature); - } else - return appdb_id_to_name(app_id); -} + uint32_t num_engines = 0; -/* - * Return the type ID for the given application type name. - * Currently only Qosmos types are supported. - */ -uint32_t -dpi_app_type_name_to_id(const char *type_name) -{ - int type_id = qmdpi_tag_id_get_byname(dpi_bundle, type_name); + for (unsigned int i = 0; i < dpi_flow->flows_len; i++) { + struct dpi_engine_procs *procs = dpi_flow->flows[i].procs; + struct dpi_engine_flow *engine_flow = dpi_flow->flows[i].flow; - return type_id > 0 ? type_id : 0; + if (!engine_flow || !procs->info_json) + continue; + + if (procs->info_json(engine_flow, json)) + num_engines++; + } + + jsonw_end_array(json); + jsonw_uint_field(json, "num-engines", num_engines); + jsonw_end_object(json); } -/* - * Return the type bitfield for all the app types - * associated with the given application ID. - * - * We can only convert Qosmos app IDs to types, since each Qosmos app ID - * has a unique set of types - whereas user-defined app IDs can be associated - * with different types in different rules. - */ -static uint64_t -dpi_app_id_to_type_bitfield(uint32_t app_id) +void +dpi_info_log(struct dpi_flow *dpi_flow, char *buf, size_t buf_len) { - assert(APP_ID_QOSMOS(app_id)); + if (!dpi_flow || !buf) + return; + + size_t used_buf_len = 0; + + for (unsigned int i = 0; i < dpi_flow->flows_len; i++) { + struct dpi_engine_procs *engine = dpi_flow->flows[i].procs; + struct dpi_engine_flow *engine_flow = dpi_flow->flows[i].flow; + + if (!engine->info_log) + continue; - struct qmdpi_signature *signature = - qmdpi_worker_signature_get_byid(dpi_worker[dp_lcore_id()], - dpi_bundle, - app_id & DPI_APP_MASK); + used_buf_len += engine->info_log(engine_flow, + buf + used_buf_len, buf_len - used_buf_len); - return qmdpi_signature_tags_get(signature); + if (used_buf_len) + break; + } + + if (!used_buf_len) + buf_app_printf(buf, &used_buf_len, buf_len, + "engine=None app-name=None " + "proto-name=None type=None"); } -/* Return the type name associated with the given application type. */ -const char * -dpi_app_type_to_name(uint32_t app_type) +struct dpi_engine_flow * +dpi_get_engine_flow(struct dpi_flow *flow, uint8_t engine_id) { - return qmdpi_tag_name_get_byid(dpi_bundle, app_type); + if (!flow) + return NULL; + + for (unsigned int i = 0; i < flow->flows_len; i++) { + if (flow->flows[i].flow->engine_id == engine_id) + return flow->flows[i].flow; + } + + return NULL; } -/* - * Converts an application ID into a string, writing it to the buffer at - * "used_buf_len", ensuring it does not go off the end of the buffer. - * - * This also handles ids DPI_APP_NA, ERROR and UNDETERMINED. - */ -static void -dpi_app_name_to_str(char *buf, size_t *used_buf_len, const size_t total_buf_len, - uint32_t id) +void +dpi_app_id_to_buf(char *buf, size_t *used_buf_len, const size_t total_buf_len, + uint32_t id, const char *(*id_to_name)(uint32_t)) { - const char *str = dpi_app_id_to_name(id); + const char *str; switch (id & DPI_APP_MASK) { case DPI_APP_NA: - buf_app_printf(buf, used_buf_len, total_buf_len, ""); + buf_app_printf(buf, used_buf_len, total_buf_len, + "(Unavailable)"); break; case DPI_APP_ERROR: - buf_app_printf(buf, used_buf_len, total_buf_len, ""); + buf_app_printf(buf, used_buf_len, total_buf_len, + "(Error)"); break; case DPI_APP_UNDETERMINED: buf_app_printf(buf, used_buf_len, total_buf_len, - ""); + "(Undetermined)"); break; default: + str = id_to_name(id); if (str) { buf_app_printf(buf, used_buf_len, total_buf_len, "%s", str); @@ -668,92 +631,80 @@ dpi_app_name_to_str(char *buf, size_t *used_buf_len, const size_t total_buf_len, } } -#define MAX_JSON_DPI_NAME_SIZE 128 - -/* - * Outputs as a JSON string field called "field_name" which contains - * the application ID converted into a name. - */ -static void -dpi_app_name_json(json_writer_t *json, const char *field_name, uint32_t id) +void +dpi_app_type_to_buf(char *buf, size_t *used_buf_len, const size_t total_buf_len, + uint32_t type, const char *(*id_to_type)(uint32_t)) { - char str[MAX_JSON_DPI_NAME_SIZE]; - size_t used_buf_len = 0; - - dpi_app_name_to_str(str, &used_buf_len, MAX_JSON_DPI_NAME_SIZE, id); - jsonw_string_field(json, field_name, str); -} + const char *str; -/* - * go through the type bits and create an array of types by name. - */ -static void -dpi_types_json(json_writer_t *json, const char *field_name, uint64_t type_bits) -{ - jsonw_name(json, field_name); - jsonw_start_array(json); + switch (type) { + case DPI_APP_TYPE_NONE: + buf_app_printf(buf, used_buf_len, total_buf_len, + "(None)"); + break; - while (type_bits) { - uint32_t next_psn = __builtin_ffsl(type_bits); - const char *str = dpi_app_type_to_name(next_psn); + default: + str = id_to_type(type); if (str) { - jsonw_string(json, str); + buf_app_printf(buf, used_buf_len, total_buf_len, + "%s", str); } else { - char buf[40]; - snprintf(buf, sizeof(buf), "%u", next_psn); - jsonw_string(json, buf); + buf_app_printf(buf, used_buf_len, total_buf_len, + "%u", type); } - /* unset the bit just processed */ - type_bits &= (type_bits - 1); } - jsonw_end_array(json); } -/* - * This exports using JSON the DPI information associated with the flow - */ -void -dpi_info_json(struct dpi_flow *dpi_flow, json_writer_t *json) +bool no_app_id(uint32_t app_id) { - jsonw_name(json, "dpi"); - jsonw_start_object(json); - - dpi_app_name_json(json, "app-name", dpi_flow_get_app_name(dpi_flow)); - dpi_app_name_json(json, "proto-name", dpi_flow_get_app_proto(dpi_flow)); - - jsonw_uint_field(json, "type-bits", dpi_flow_get_app_type(dpi_flow)); - dpi_types_json(json, "types", dpi_flow_get_app_type(dpi_flow)); + return ((app_id & DPI_APP_MASK) <= DPI_APP_UNDETERMINED); +} - jsonw_bool_field(json, "offloaded", dpi_flow_get_offloaded(dpi_flow)); - jsonw_bool_field(json, "error", dpi_flow_get_error(dpi_flow)); +bool no_app_type(uint32_t app_type) +{ + return (app_type == DPI_APP_TYPE_NONE); +} - const struct dpi_flow_stats *stats = dpi_flow_get_stats(dpi_flow, true); - jsonw_uint_field(json, "forward-pkts", stats->pkts); - jsonw_uint_field(json, "forward-bytes", stats->bytes); +void dpi_refcount_inc(uint8_t engine_id) +{ + struct dpi_engine_procs *engine = NULL_ENGINE; + ENGINE_PROC_FIND(engine, engine_id, refcount_inc); + if (engine) + engine->refcount_inc(); +} - stats = dpi_flow_get_stats(dpi_flow, false); - jsonw_uint_field(json, "backward-pkts", stats->pkts); - jsonw_uint_field(json, "backward-bytes", stats->bytes); +uint32_t dpi_refcount_dec(uint8_t engine_id) +{ + struct dpi_engine_procs *engine = NULL_ENGINE; + ENGINE_PROC_FIND(engine, engine_id, refcount_dec); + if (engine) + return engine->refcount_dec(); - jsonw_end_object(json); + return 0; } -/* - * This logs into a string the DPI information associated with the flow. - */ -void -dpi_info_log(struct dpi_flow *dpi_flow, char *buf, size_t buf_len) +bool dpi_flow_pkt_count_maxed(struct dpi_flow *dpi_flow, uint32_t max) { - size_t used_buf_len = 0; - const uint32_t app_name = dpi_flow_get_app_name(dpi_flow); - const uint32_t app_proto = dpi_flow_get_app_proto(dpi_flow); + if (!dpi_flow) + return false; - buf_app_printf(buf, &used_buf_len, buf_len, "app-name="); - dpi_app_name_to_str(buf, &used_buf_len, buf_len, app_name); - if (app_proto != app_name) { - buf_app_printf(buf, &used_buf_len, buf_len, - " proto-name="); - dpi_app_name_to_str(buf, &used_buf_len, buf_len, - app_proto); + for (unsigned int i = 0; i < dpi_flow->flows_len; i++) { + struct dpi_engine_flow *engine_flow = dpi_flow->flows[i].flow; + + if (!engine_flow) + continue; + + uint32_t cnt; + const struct dpi_flow_stats *ds; + ds = dpi_flow_get_stats(engine_flow, true); + cnt = ds->pkts; + + ds = dpi_flow_get_stats(engine_flow, false); + cnt += ds->pkts; + + if (cnt >= max) + return true; } + + return false; } diff --git a/src/npf/dpi/dpi.h b/src/npf/dpi/dpi.h deleted file mode 100644 index b04192a4..00000000 --- a/src/npf/dpi/dpi.h +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. - * Copyright (c) 2016-2017 by Brocade Communications Systems, Inc. - * All rights reserved. - * - * SPDX-License-Identifier: LGPL-2.1-only - */ - -#ifndef DPI_H -#define DPI_H - -#include -#include -#include "json_writer.h" - -/* - * Everything declared here MUST be defined in BOTH dpi.c and dpi_stubs.c - * for builds with, and without, DPI. - */ - -/* - * From - * https://www.iana.org/assignments/ipfix/ipfix.xhtml#classification-engine-ids - */ -#define IANA_RESERVED 0 -#define IANA_USER 6 -#define IANA_QOSMOS 21 - -#define DPI_ENGINE_RESERVED (IANA_RESERVED << DPI_ENGINE_SHIFT) -#define DPI_ENGINE_QOSMOS (IANA_QOSMOS << DPI_ENGINE_SHIFT) -#define DPI_ENGINE_USER (IANA_USER << DPI_ENGINE_SHIFT) - -/* Error codes */ -#define _DPI_APP_NA 0 /* Not available, e.g. not in image */ -#define _DPI_APP_ERROR 1 /* Error occurred during processing */ -#define _DPI_APP_UNDETERMINED 2 /* Determination not yet available */ -#define DPI_APP_BASE 3 /* First app ID; equals Q_PROTO_BASE */ - -/* Generic error codes */ -#define DPI_APP_NA (DPI_ENGINE_RESERVED | _DPI_APP_NA) -#define DPI_APP_ERROR (DPI_ENGINE_RESERVED | _DPI_APP_ERROR) -#define DPI_APP_UNDETERMINED (DPI_ENGINE_RESERVED | _DPI_APP_UNDETERMINED) - -/* - * User engine error codes. - * Set the engine bits to indicate that the determination was made by - * the user engine (app DB). - */ -#define DPI_APP_USER_NA (DPI_ENGINE_USER | _DPI_APP_NA) - -#define DPI_APP_TYPE_NONE 0 - -#define DPI_SUCCESS QMDPI_SUCCESS -#define DPI_FAILURE QMDPI_EPERM - -/* - * Application ID format: - * - * 33222222 22221111 11111100 00000000 - * 10987654 32109876 54321098 76543210 - * +--------+--------+--------+--------+ - * | Engine |Q Application ID | - * +--------+--------+--------+--------+ - * - * Engine: IANA_QOSMOS or IANA_USER - * - * Q: For user-defined applications: - * 0 = Qosmos compatible ID; 1 = internally allocated ID. - * Q is the topmost bit in the application ID. - * - * Application ID: unique identifier per application. - * - * - * There are three application classes: - * - * 1. Qosmos: - * Engine = IANA_QOSMOS; Q doesn't apply. AppID is assigned by Qosmos. - * No entry is made in the app DB. - * - * 2a. User-defined, Qosmos compatible (ie, shared app name): - * Engine = IANA_USER; Q = 0. AppID is assigned by Qosmos. - * An entry (or refcount) is made in the app DB. - * - * 2b. User-defined, Qosmos incompatible (ie, unique user app name): - * Engine = IANA_USER; Q = 1. AppID is assigned internally by vRouter. - * An entry (or refcount) is made in the app DB. - */ - -/* DPI engine is in the topmost bits */ -#define DPI_ENGINE_SHIFT 24 - -/* - * The Q bit is the topmost appID bit. - * It indicates an internally-assigned application ID. - */ -#define APP_ID_Q (1 << 23) - -/* Mask out the DPI engine bits, leaving just Q + the app ID. */ -#define DPI_APP_MASK 0x00ffffff - -/* Whether the given appID is a Qosmos ID or Qosmos compatible. */ -#define APP_ID_QOSMOS(app_id) \ - ((app_id >> DPI_ENGINE_SHIFT == IANA_QOSMOS) || \ - ((app_id >> DPI_ENGINE_SHIFT == IANA_USER) && !(app_id & APP_ID_Q))) - -/* dpi_status should be an enum provided by Qosmos in qmdpi_const.h. */ -typedef int dpi_status; - -/* App DB walker callback function type */ -typedef int (*app_walker_t)(json_writer_t *json, void *data); - -struct dpi_flow; -/* forward declare some structures */ -struct npf_cache; -struct npf_session; -struct rte_mbuf; - -struct dpi_flow_stats { - uint16_t pkts; - uint16_t bytes; -}; - -bool dpi_init(void); -void dpi_session_flow_destroy(struct dpi_flow *flow); -int dpi_session_first_packet(struct npf_session *se, struct npf_cache *npc, - struct rte_mbuf *mbuf, int dir); -uint32_t dpi_flow_get_app_proto(struct dpi_flow *flow); -uint32_t dpi_flow_get_app_name(struct dpi_flow *flow); -uint64_t dpi_flow_get_app_type(struct dpi_flow *flow); -bool dpi_flow_get_offloaded(struct dpi_flow *flow); -bool dpi_flow_get_error(struct dpi_flow *flow); -const struct dpi_flow_stats *dpi_flow_get_stats(struct dpi_flow *flow, - bool forw); -uint32_t dpi_app_name_to_id(const char *app_name); -uint32_t dpi_app_name_to_id_qosmos(const char *app_name); -const char *dpi_app_id_to_name(uint32_t app_id); -uint32_t dpi_app_type_name_to_id(const char *type_name); -const char *dpi_app_type_to_name(uint32_t app_type); -void dpi_info_json(struct dpi_flow *dpi_flow, json_writer_t *json); -int appdb_name_walk(json_writer_t *json, app_walker_t callback); -int appdb_id_walk(json_writer_t *json, app_walker_t callback); -int appdb_name_entry_to_json(json_writer_t *json, void *data); -int appdb_id_entry_to_json(json_writer_t *json, void *data); - -/* The recommended minimum size to pass as buf_len to dpi_info_log() */ -#define MAX_DPI_LOG_SIZE 256 -void dpi_info_log(struct dpi_flow *dpi_flow, char *buf, size_t buf_len); - -uint32_t appdb_name_to_id(const char *name); -char *appdb_id_to_name(uint32_t app_id); - -#endif /* DPI_H */ diff --git a/src/npf/dpi/dpi_internal.h b/src/npf/dpi/dpi_internal.h new file mode 100644 index 00000000..e86be4fe --- /dev/null +++ b/src/npf/dpi/dpi_internal.h @@ -0,0 +1,423 @@ +/* + * Copyright (c) 2017-2018,2020, AT&T Intellectual Property. + * All rights reserved. + * + * Copyright (c) 2021 Centre for Development of Telematics. All rights reserved. + * + * Copyright (c) 2016-2017 by Brocade Communications Systems, Inc. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef DPI_H +#define DPI_H + +#include +#include +#include +#include "json_writer.h" +#include "npf/npf_cache.h" + +/* + * From + * https://www.iana.org/assignments/ipfix/ipfix.xhtml#classification-engine-ids + */ +#define IANA_RESERVED 0 +#define IANA_USER 6 +#define IANA_NDPI 22 + +#define DPI_ENGINE_RESERVED (IANA_RESERVED << DPI_ENGINE_SHIFT) +#define DPI_ENGINE_USER (IANA_USER << DPI_ENGINE_SHIFT) +#define DPI_ENGINE_NDPI (IANA_NDPI << DPI_ENGINE_SHIFT) + +/* Error codes */ +#define _DPI_APP_NA 0 /* Not available, e.g. not in image */ +#define _DPI_APP_ERROR 1 /* Error occurred during processing */ +#define _DPI_APP_UNDETERMINED 2 /* Determination not yet available */ +#define DPI_APP_BASE 3 /* First app ID */ + +/* Generic error codes */ +#define DPI_APP_NA (DPI_ENGINE_RESERVED | _DPI_APP_NA) +#define DPI_APP_ERROR (DPI_ENGINE_RESERVED | _DPI_APP_ERROR) +#define DPI_APP_UNDETERMINED (DPI_ENGINE_RESERVED | _DPI_APP_UNDETERMINED) + +/* + * User engine error codes. + * Set the engine bits to indicate that the determination was made by + * the user engine (app DB). + */ +#define DPI_APP_USER_NA (DPI_ENGINE_USER | _DPI_APP_NA) +#define DPI_APP_USER_ERROR (DPI_ENGINE_USER | _DPI_APP_ERROR) +#define DPI_APP_USER_UNDETERMINED (DPI_ENGINE_USER | _DPI_APP_UNDETERMINED) + +/* ID for the first user-defined application. */ +#define DPI_APP_USER_BASE (DPI_ENGINE_USER | DPI_APP_BASE) + +/* Application type codes. */ +#define DPI_APP_TYPE_NONE 0 + +/* + * Application ID format: + * + * 33222222 22221111 11111100 00000000 + * 10987654 32109876 54321098 76543210 + * +--------+--------+--------+--------+ + * | Engine | Application ID | + * +--------+--------+--------+--------+ + * + * Engine: IANA defined engine ID, such as IANA_NPDI or IANA_USER. + * + * Application ID: unique identifier per application, not necessarily unique + * across engines. + */ + +/* DPI engine is in the topmost bits */ +#define DPI_ENGINE_SHIFT 24 + +/* Mask out the DPI engine bits, leaving just the app ID. */ +#define DPI_APP_MASK 0x00ffffff + +/* DPI direction */ +enum dpi_dir { + DPI_DIR_FORW, + DPI_DIR_BACK +}; + +/* Forward declare some structures */ +struct npf_cache; +struct npf_session; +struct rte_mbuf; +struct dpi_flow; + +struct dpi_flow_stats { + uint16_t pkts; + uint16_t bytes; +}; + +/** + * "Super type" for all DPI flows. + * All dpi flow subtypes must include this struct as their first member. + */ +struct dpi_engine_flow { + uint8_t engine_id; + struct dpi_flow_stats stats[2]; + bool update_stats; +}; + + +/** + * Return the global DPI engine. + * This is a temporary solution until netflow can provide engine IDs. + */ +uint8_t dpi_global_engine(void); + +/** + * Attempt to find the ID of the engine with the given name. + * Returns IANA_RESERVED if name is NULL or no engine is found, otherwise + * returns the ID of the engine. + */ +uint8_t dpi_engine_name_to_id(const char *name); + +/** + * Attempt to find the index of the engine with the given ID. + * Returns -1 if no engine has the given ID, + * otherwise returns the index of the engine. + */ +int32_t dpi_engine_id_to_idx(uint8_t id); + +/** + * Initialise the engine with the given ID, or all installed engines if the + * given ID is IANA_RESERVED. + * + * Returns: + * + * - if ID is IANA_RESERVED: + * errno if any engine's initialisation function fails + * + * - if ID is not IANA_RESERVED: + * errno if there is no engine with the given ID, + * or the engine's initialisation function fails + * + * - otherwise returns zero indicating success. + */ +int dpi_init(uint8_t engine_id); + +/** + * Terminate the engine with the given ID, or all installed engines if the + * given ID is IANA_RESERVED. + * + * Returns: + * - if ID is IANA_RESERVED, false if any engine's termination function + * returns false + * - if ID is not IANA_RESERVED, false if there is no engine with the given + * ID, or the engine's termination function returns false + * - otherwise returns true + */ +bool dpi_terminate(uint8_t engine_id); + +/** + * Destroy the given flow using the given engine's flow destructor. + */ +void dpi_session_flow_destroy(struct dpi_flow *flow); + +/** + * Attach DPI to the given session, using mbuf as the first packet of the + * session. + * + * Returns 0 on success, otherwise return the first non-zero return value + * from the underlying DPI engines. + */ +int dpi_session_first_packet(struct npf_session *se, struct npf_cache *npc, + struct rte_mbuf *mbuf, int dir, + size_t engines_len, const uint8_t *engines); + +/** + * Invoke the given callback for each DPI engine + * associated with the given flow. + * + * The callback receives the engine, app, proto, type, and data. + */ +void dpi_flow_for_each_engine(struct dpi_flow *flow, + int (*call)(uint8_t engine, uint32_t app, uint32_t proto, + uint32_t type, void *data), + void *data); + +/** + * Get the protocol ID the given flow is detected to be according to the given + * engine. + * Returns DPI_APP_ERROR if there is no engine with the given ID, or the flow + * is in an error state, otherwise returns the protocol ID, which can be + * undetermined. + */ +uint32_t dpi_flow_get_app_proto(uint8_t engine_id, struct dpi_flow *flow); + +/** + * Get the application ID the given flow is detected to be according to the + * given engine. + * Returns DPI_APP_ERROR if there is no engine with the given ID, or the flow + * is in an error state, otherwise returns the application ID, which can be + * undetermined. + */ +uint32_t dpi_flow_get_app_id(uint8_t engine_id, struct dpi_flow *flow); + +/** + * Get the application type ID the given flow is detected to be according to + * the given engine. + * Returns DPI_APP_ERROR if there is no engine with the given ID, or the flow + * is in an error state, otherwise returns the application type ID, which can + * be undetermined. + */ +uint32_t dpi_flow_get_app_type(uint8_t engine_id, struct dpi_flow *flow); + +/** + * Check if all DPI engines running on the given flow deem the given flow is + * offloaded - ie, they no longer need to see packets for this flow. + * Returns false if any DPI engine is not finished with the given flow, + * otherwise returns true. + */ +bool dpi_flow_get_offloaded(struct dpi_flow *flow); + +/** + * Check if all DPI engines running on the given flow deem the given flow to + * be in an error state. + * Returns true if all DPI engines deem the flow to be in an error state, + * otherwise returns true + */ +bool dpi_flow_get_error(struct dpi_flow *flow); + +/** + * Get the packet and byte statistics for the given flow in the given direction. + * Returns a pointer to the flow stats in the given direction. + */ +const struct dpi_flow_stats *dpi_flow_get_stats( + const struct dpi_engine_flow *flow, bool forw); + +/** + * Get the ID corresponding to the given application name, according to the + * given engine. + * Returns DPI_APP_ERROR if there is no engine with the given ID, otherwise + * returns the ID corresponding to the given application name. + */ +uint32_t dpi_app_name_to_id(uint8_t engine_id, const char *app_name); + +/** + * Get the ID corresponding to the given application type, according to the + * given engine. + * Returns DPI_APP_ERROR if there is no engine with the given ID, otherwise + * returns the ID corresponding to the given application type. + */ +uint32_t dpi_app_type_name_to_id(uint8_t engine_id, const char *type_name); + +/** + * Export the given flow to JSON, with the given writer. + */ +void dpi_info_json(struct dpi_flow *dpi_flow, + json_writer_t *json); + +/* The recommended maximum size to pass as buf_len to dpi_info_log() */ +#define MAX_DPI_LOG_SIZE 256 + +/** + * Log the given flow to the given buffer. + */ +void dpi_info_log(struct dpi_flow *dpi_flow, char *buf, + size_t buf_len); + +/** + * Get the engine flow from the given flow corresponding to the given + * engine ID. Packets with no data are not included in the stats + * (i.e TCP SYN/ACK). + * Returns NULL if the given flow is NULL or there is no flow for the given + * engine ID, otherwise returns the engine flow. + */ +struct dpi_engine_flow *dpi_get_engine_flow(struct dpi_flow *flow, + uint8_t engine_id); + +/* + * Converts an application ID into a string, writing it to the buffer at + * "used_buf_len", ensuring it does not go off the end of the buffer. + * + * This also handles ids DPI_APP_NA, ERROR and UNDETERMINED. + */ +void dpi_app_id_to_buf(char *buf, size_t *used_buf_len, + const size_t total_buf_len, uint32_t id, + const char *(*id_to_name)(uint32_t)); + +/* + * Converts an application type into a string, writing it to the buffer at + * "used_buf_len", ensuring it does not go off the end of the buffer. + * + * This also handles DPI_APP_TYPE_NONE. + */ +void dpi_app_type_to_buf(char *buf, size_t *used_buf_len, + const size_t total_buf_len, uint32_t type, + const char *(*id_to_type)(uint32_t)); + +struct dpi_engine_procs { + /** + * ID of the engine + */ + uint8_t id; + + /** + * Engine initialisation function + * Returns zero if the engine successfully initialised + * or has already been initialised; errno otherwise. + */ + int (*init)(void); + + /** + * Engine termination function + * Returns true if the engine successfully terminated + * or has already been terminated, false otherwise. + */ + bool (*terminate)(void); + + + /** + * Refcount. + */ + void (*refcount_inc)(void); + uint32_t (*refcount_dec)(void); + + /** + * Flow destructor. + */ + void (*destructor)(struct dpi_engine_flow *flow); + + /** + * Initialise a new flow, setting *flow to the pointer to the new flow, + * and running the engine on the first packet. The data_len argument is + * the size of the packet without L3 and L4 headers. + * The first packet may have no contents (i.e TCP SYN), data_len will + * be 0 in this case. + * Non-zero return values are propagated up. + */ + int (*first_packet)(struct npf_session *se, struct npf_cache *npc, + struct rte_mbuf *mbuf, int dir, uint32_t data_len, + struct dpi_engine_flow **flow); + + /** + * Process the given packet. + * This is called for each non-first, non-empty packet for the given + * flow, unless the flow is in an error state or offloaded, as defined + * by 'is_error', and 'is_offloaded' respectively. + * Return true on success, false otherwise. + */ + bool (*process_pkt)(struct dpi_engine_flow *flow, + struct rte_mbuf *mbuf, + int dir); + + /** + * If a flow is in an error state, no further processing will be + * carried out. + * Return true if the given flow is in an error state, false otherwise. + */ + bool (*is_error)(struct dpi_engine_flow *flow); + + /** + * A flow should be deemed as offloaded when there should be no more + * DPI processing carried out on the flow. + * + * For example: + * - the flow's application cannot be determined with any further + * processing + * - The flow's application has been determined + * + * Return true if the given flow is offloaded, false otherwise. + */ + bool (*is_offloaded)(struct dpi_engine_flow *flow); + + /** + * Get the protocol ID of the given flow. + */ + uint32_t (*flow_get_proto)(struct dpi_engine_flow *flow); + + /** + * Get the application ID of the given flow. + */ + uint32_t (*flow_get_id)(struct dpi_engine_flow *flow); + + /** + * Get the application type ID of the given flow. + */ + uint32_t (*flow_get_type)(struct dpi_engine_flow *flow); + + /** + * Get the ID for the given name. + */ + uint32_t (*name_to_id)(const char *name); + + /** + * Get the ID for the given type. + */ + uint32_t (*type_to_id)(const char *type); + + /** + * Write the JSON representation of the given flow to the given + * writer. Return false if nothing was written, else true. + */ + bool (*info_json)(struct dpi_engine_flow *flow, json_writer_t *json); + + /** + * Log the given flow to the given buf. + * Return amount of buf used to log the flow. + */ + size_t (*info_log)(struct dpi_engine_flow *flow, char *buf, + size_t buf_len); +}; + + +bool no_app_id(uint32_t app_id); +bool no_app_type(uint32_t app_type); + +void dpi_refcount_inc(uint8_t engine_id); +uint32_t dpi_refcount_dec(uint8_t engine_id); + +/* Return true if the sum of the forward and backward packet counts + * for the given dpi_flow is greater than the specified maximum. + */ +bool dpi_flow_pkt_count_maxed(struct dpi_flow *dpi_flow, uint32_t max); + +#endif /* DPI_H */ diff --git a/src/npf/dpi/dpi_private.h b/src/npf/dpi/dpi_private.h deleted file mode 100644 index 2413c7c6..00000000 --- a/src/npf/dpi/dpi_private.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. - * - * SPDX-License-Identifier: LGPL-2.1-only - */ - -#ifndef DPI_PRIVATE_H -#define DPI_PRIVATE_H - -/* Per session DPI information */ -struct dpi_flow { - struct qmdpi_flow *key; - uint32_t app_proto; /* L5 */ - uint32_t app_name; /* L7 */ - uint64_t app_type; /* Type bitfield */ - struct dpi_flow_stats stats[2]; - uint8_t wrkr_id; - uint8_t offloaded: 1; - uint8_t error: 1; - uint8_t update_stats: 1; -}; - -#endif /* DPI_PRIVATE_H */ diff --git a/src/npf/dpi/dpi_public.c b/src/npf/dpi/dpi_public.c index 89756d5b..e9876ee1 100644 --- a/src/npf/dpi/dpi_public.c +++ b/src/npf/dpi/dpi_public.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -17,8 +17,8 @@ #include "compiler.h" #include "pl_node.h" #include "pipeline/nodes/pl_nodes_common.h" -#include "dpi_public.h" -#include "npf/dpi/dpi.h" +#include "dpi.h" +#include "npf/dpi/dpi_internal.h" #include "npf/npf_session.h" #include "npf/npf_if.h" #include "if_feat.h" @@ -50,14 +50,16 @@ static void dpi_if_feature_disable(struct ifnet *ifp) /* Enable DPI on the given interface. */ int -dpi_enable(struct ifnet *ifp) +dp_dpi_enable(struct ifnet *ifp) { - if (!dpi_init()) - return -ENOMEM; - if (!ifp) return -EINVAL; + /* Ensure all DPI engines are enabled */ + int ret = dpi_init(IANA_RESERVED); + if (ret != 0) + return ret; + dpi_if_feature_enable(ifp); dpi_enabled_count++; @@ -66,7 +68,7 @@ dpi_enable(struct ifnet *ifp) /* Disable DPI on the given interface. */ int -dpi_disable(struct ifnet *ifp) +dp_dpi_disable(struct ifnet *ifp) { if (!ifp) return -EINVAL; @@ -81,14 +83,14 @@ dpi_disable(struct ifnet *ifp) /* Indicate whether DPI is enabled. */ bool -dpi_is_enabled(void) +dp_dpi_is_enabled(void) { return (dpi_enabled_count != 0); } /* Return the L7 DPI application ID. */ uint32_t -dpi_get_app_id(struct rte_mbuf *mbuf) +dp_dpi_get_app_id(struct rte_mbuf *mbuf) { /* First find the session - this should already be present */ npf_session_t *se = npf_session_find_cached(mbuf); @@ -103,5 +105,6 @@ dpi_get_app_id(struct rte_mbuf *mbuf) if (dpi_flow_get_error(dpi_flow)) return DPI_APP_ERROR; - return dpi_flow_get_app_name(dpi_flow); + /* Temporary solution until netflow can provide engine IDs */ + return dpi_flow_get_app_id(dpi_global_engine(), dpi_flow); } diff --git a/src/npf/dpi/dpi_stubs.c b/src/npf/dpi/dpi_stubs.c deleted file mode 100644 index 3239722d..00000000 --- a/src/npf/dpi/dpi_stubs.c +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. - * Copyright (c) 2016-2017 by Brocade Communications Systems, Inc. - * All rights reserved. - * - * SPDX-License-Identifier: LGPL-2.1-only - */ - -/* - * dpi_stubs.c - * - * Stubs for non-DPI builds. - * - * See dpi.c for the equivalent full functions. - */ - -#include -#include -#include - -#include "compiler.h" -#include "dpi_public.h" -#include "npf/dpi/dpi.h" -#include "npf/npf_cache.h" -#include "npf/rproc/npf_rproc.h" -#include "app_cmds.h" - -bool -dpi_init(void) -{ - return true; -} - -void -dpi_session_flow_destroy(struct dpi_flow *flow __unused) -{ -} - -int -dpi_session_first_packet(struct npf_session *se __unused, - npf_cache_t *npc __unused, - struct rte_mbuf *mbuf __unused, - int dir __unused) -{ - return -ENOMEM; -} - -uint32_t -dpi_flow_get_app_proto(struct dpi_flow *flow __unused) -{ - return DPI_APP_NA; -} - -uint32_t -dpi_flow_get_app_name(struct dpi_flow *flow __unused) -{ - return DPI_APP_NA; -} - -uint64_t -dpi_flow_get_app_type(struct dpi_flow *flow __unused) -{ - return 0; -} - -bool -dpi_flow_get_offloaded(struct dpi_flow *flow __unused) -{ - return true; -} - -bool -dpi_flow_get_error(struct dpi_flow *flow __unused) -{ - return false; -} - -const struct dpi_flow_stats * -dpi_flow_get_stats(struct dpi_flow *flow __unused, bool forw __unused) -{ - return NULL; -} - -uint32_t -dpi_app_name_to_id(const char *app_name __unused) -{ - return DPI_APP_NA; -} - -const char * -dpi_app_id_to_name(uint32_t app_id __unused) -{ - return NULL; -} - -uint32_t -dpi_app_type_name_to_id(const char *type_name __unused) -{ - return DPI_APP_NA; -} - -const char * -dpi_app_type_to_name(uint32_t app_type __unused) -{ - return NULL; -} - -const npf_rproc_ops_t npf_dpi_ops = { - .ro_name = "dpi", - .ro_type = NPF_RPROC_TYPE_MATCH, - .ro_id = NPF_RPROC_ID_DPI, - .ro_bidir = false, - .ro_ctor = NULL, - .ro_dtor = NULL, - .ro_action = NULL, - .ro_match = NULL, -}; - -int dpi_enable(struct ifnet *ifp __unused) -{ - return 0; -} - -int dpi_disable(struct ifnet *ifp __unused) -{ - return 0; -} - -bool dpi_is_enabled(void) -{ - return false; -} - -uint32_t dpi_get_app_id(struct rte_mbuf *mbuf __unused) -{ - return DPI_APP_NA; -} - -void -dpi_info_json(struct dpi_flow *dpi_flow __unused, json_writer_t *json __unused) -{ -} - -void -dpi_info_log(struct dpi_flow *dpi_flow __unused, char *buf __unused, - size_t buf_len __unused) -{ -} - -const npf_rproc_ops_t npf_appfw_ops = { - .ro_name = "app-firewall", - .ro_type = NPF_RPROC_TYPE_ACTION, - .ro_id = NPF_RPROC_ID_APPFW, - .ro_bidir = false, - .ro_ctor = NULL, - .ro_dtor = NULL, - .ro_action = NULL, - .ro_match = NULL, -}; - -const npf_rproc_ops_t npf_app_ops = { - .ro_name = "app", - .ro_type = NPF_RPROC_TYPE_ACTION, - .ro_id = NPF_RPROC_ID_APP, - .ro_bidir = false, - .ro_ctor = NULL, - .ro_dtor = NULL, - .ro_action = NULL, - .ro_match = NULL, -}; - -int -cmd_app_op(FILE *f __unused, int argc __unused, char **argv __unused) -{ - return 0; -} diff --git a/src/npf/dpi/dpi_user.c b/src/npf/dpi/dpi_user.c new file mode 100644 index 00000000..8fde1f0f --- /dev/null +++ b/src/npf/dpi/dpi_user.c @@ -0,0 +1,320 @@ +/* + * Copyright (c) 2020 AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +/* + * APIs for user-defined applications. + */ + +#include +#include +#include +#include + +#include "ip_funcs.h" +#include "ip6_funcs.h" +#include "pktmbuf.h" +#include "npf/npf_ruleset.h" +#include "npf/npf_rule_gen.h" +#include "npf/rproc/npf_rproc.h" +#include "npf/npf_cache.h" +#include "npf/config/npf_config.h" +#include "npf_shim.h" +#include "npf/dpi/dpi_internal.h" +#include "npf/dpi/npf_appdb.h" +#include "npf/dpi/npf_typedb.h" +#include "npf/dpi/dpi_user.h" + +/* Count of all uses. */ +static uint32_t user_refcount; +static bool init; + +/** + * Initialise hash tables. + * Returns zero on success; errno if failed to create name or ID hash tables. + */ +static int +dpi_user_init(void) +{ + int ret; + + if (init) + return 0; + + ret = appdb_init(); + if (ret != 0) + return ret; + + ret = typedb_init(); + if (ret != 0) { + appdb_destroy(); + return ret; + } + + init = true; + return 0; +} + +/** + * Increment the refcount. + */ +static void +dpi_user_refcount_inc(void) +{ + if (++user_refcount == 0) + /* Overflowed */ + --user_refcount; +} + +/** + * Decrement the refcount. + */ +static uint32_t +dpi_user_refcount_dec(void) +{ + if (user_refcount) + user_refcount--; + + return user_refcount; +} + +/** + * Destroy the given flow. + */ +static void +dpi_user_flow_destroy(struct dpi_engine_flow *flow) +{ + if (flow) + free(flow); +} + +/** + * Process the given packet. + * Since the user engine always determines on the first packet, this is not + * added to the engine_procs. + * Returns false if there is no user-defined ruleset, true otherwise. + */ +static bool +dpi_user_process_pkt(struct npf_session *se, npf_cache_t *npc, + struct rte_mbuf *mbuf, int dir) +{ + const npf_ruleset_t *npf_rs = + npf_get_ruleset(npf_global_config, NPF_RS_APPLICATION); + + if (!npf_rs) + return false; + + npf_rule_t *rl = + npf_ruleset_inspect(npc, mbuf, npf_rs, + NULL, NULL, dir); + if (rl) { + /* Rule matched, so run the action. */ + npf_rproc_result_t rproc_result = { + .decision = NPF_DECISION_UNKNOWN, + }; + + npf_rproc_action(NULL, NULL, dir, rl, + se, &rproc_result); + } + + return true; +} + +/** + * Initialise a new flow with the given packet. + * Returns: + * - 0 on success + * - -EINVAL if the given packet is not an IP, TCP or UDP packet + * - -ENOMEM if cannot allocate memory for flows + * - -EEXIST if a flow already exists for the given session + * - -ENOTSUP if user-defined applications are not active in the global NPF + * config + * - -EINVAL if the packet could not be processed (no rulesets defined) + */ +static int +dpi_user_first_packet(struct npf_session *se, struct npf_cache *npc, + struct rte_mbuf *mbuf, int dir, uint32_t data_len __unused, + struct dpi_engine_flow **engine_flow) +{ + /* Only process if user-defined applications are enabled */ + if (!npf_active(npf_global_config, NPF_APPLICATION)) + return 0; // Keep going + + struct user_flow *flow = zmalloc_aligned(sizeof(struct user_flow)); + if (!flow) + return -ENOMEM; + + flow->USER_FLOW_ENGINE_ID = IANA_USER; + flow->application = DPI_APP_USER_UNDETERMINED; + flow->protocol = DPI_APP_USER_UNDETERMINED; + flow->type = DPI_APP_TYPE_NONE; + *engine_flow = (struct dpi_engine_flow *)flow; + + return dpi_user_process_pkt(se, npc, mbuf, dir) ? 0 : -EINVAL; +} + +/** + * Returns if the given flow is offloaded. + * Since the user engine only inspects headers, it only needs to check a + * single packet, so is always "offloaded". + */ +static bool +dpi_user_is_offload(struct dpi_engine_flow *flow __unused) +{ + return true; +} + +static bool +dpi_user_is_error(struct dpi_engine_flow *flow __unused) +{ + return false; +} + +static uint32_t +dpi_user_get_proto(struct dpi_engine_flow *dpi_engine_flow) +{ + return ((struct user_flow *)dpi_engine_flow)->protocol; +} + +static uint32_t +dpi_user_get_id(struct dpi_engine_flow *dpi_engine_flow) +{ + return ((struct user_flow *)dpi_engine_flow)->application; +} + +static uint32_t +dpi_user_get_type(struct dpi_engine_flow *dpi_engine_flow) +{ + return ((struct user_flow *)dpi_engine_flow)->type; +} + +static uint32_t +dpi_user_name_to_id(const char *name) +{ + return appdb_name_to_id(name); +} + +static uint32_t +dpi_user_type_to_id(const char *type) +{ + return typedb_name_to_id(type); +} + +static const char * +dpi_user_id_to_name(uint32_t id) +{ + return appdb_id_to_name(id); +} + +static const char * +dpi_user_type_to_name(uint32_t type) +{ + return typedb_id_to_name(type); +} + +static bool +dpi_user_flow_json(struct dpi_engine_flow *dpi_engine_flow, json_writer_t *json) +{ + if (!user_refcount) + /* The user engine is not in use */ + return false; + + struct user_flow *flow = (struct user_flow *)dpi_engine_flow; + if (!flow) + return false; + + jsonw_start_object(json); + + const char *name = appdb_id_to_name(flow->application); + const char *proto = appdb_id_to_name(flow->protocol); + const char *type = typedb_id_to_name(flow->type); + + const struct dpi_flow_stats *stats = + dpi_flow_get_stats(dpi_engine_flow, true); + + jsonw_uint_field(json, "forward-pkts", stats->pkts); + jsonw_uint_field(json, "forward-bytes", stats->bytes); + + stats = dpi_flow_get_stats(dpi_engine_flow, false); + jsonw_uint_field(json, "backward-pkts", stats->pkts); + jsonw_uint_field(json, "backward-bytes", stats->bytes); + + jsonw_string_field(json, "engine", "user"); + + if (name) + jsonw_string_field(json, "app-name", name); + + if (proto) + jsonw_string_field(json, "proto-name", proto); + + if (type) + jsonw_string_field(json, "type", type); + + jsonw_bool_field(json, "offloaded", true); + + jsonw_end_object(json); + + return true; +} + +static size_t +dpi_user_flow_log(struct dpi_engine_flow *flow, char *buf, size_t buf_len) +{ + if (!user_refcount) + /* The user engine is not in use */ + return 0; + + if (!buf) + return 0; + + if (!flow) + return 0; + + size_t used_buf_len = 0; + const uint32_t app_id = dpi_user_get_id(flow); + const uint32_t app_proto = dpi_user_get_proto(flow); + const uint32_t app_type = dpi_user_get_type(flow); + + /* Say nothing, if we've nothing useful to say. */ + if (no_app_id(app_id) && no_app_id(app_proto) && no_app_type(app_type)) + return 0; + + buf_app_printf(buf, &used_buf_len, buf_len, "engine=user "); + + buf_app_printf(buf, &used_buf_len, buf_len, "app-name="); + dpi_app_id_to_buf(buf, &used_buf_len, buf_len, app_id, + dpi_user_id_to_name); + + buf_app_printf(buf, &used_buf_len, buf_len, " proto-name="); + dpi_app_id_to_buf(buf, &used_buf_len, buf_len, app_proto, + dpi_user_id_to_name); + + buf_app_printf(buf, &used_buf_len, buf_len, " type="); + dpi_app_type_to_buf(buf, &used_buf_len, buf_len, app_type, + dpi_user_type_to_name); + + return used_buf_len; +} + + +struct dpi_engine_procs user_engine_procs = { + .id = IANA_USER, + .init = dpi_user_init, + .terminate = NULL, + .refcount_inc = dpi_user_refcount_inc, + .refcount_dec = dpi_user_refcount_dec, + .destructor = dpi_user_flow_destroy, + .first_packet = dpi_user_first_packet, + .process_pkt = NULL, // Engine always determines on the first packet + .is_offloaded = dpi_user_is_offload, + .is_error = dpi_user_is_error, + .flow_get_proto = dpi_user_get_proto, + .flow_get_id = dpi_user_get_id, + .flow_get_type = dpi_user_get_type, + .name_to_id = dpi_user_name_to_id, + .type_to_id = dpi_user_type_to_id, + .info_json = dpi_user_flow_json, + .info_log = dpi_user_flow_log, +}; diff --git a/src/npf/dpi/dpi_user.h b/src/npf/dpi/dpi_user.h new file mode 100644 index 00000000..ca158bf2 --- /dev/null +++ b/src/npf/dpi/dpi_user.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2020 AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef DPI_USER_H +#define DPI_USER_H + +#include +#include + +struct user_flow { + struct dpi_engine_flow ef; // Must be first. + uint32_t application; + uint32_t protocol; + uint32_t type; +}; + +#define USER_FLOW_ENGINE_ID ef.engine_id +#define USER_FLOW_STATS ef.stats +#define USER_FLOW_UPDATE_STATS ef.update_stats + +#endif /* DPI_USER_H */ diff --git a/src/npf/dpi/ndpi.c b/src/npf/dpi/ndpi.c new file mode 100644 index 00000000..698154df --- /dev/null +++ b/src/npf/dpi/ndpi.c @@ -0,0 +1,597 @@ +/* + * Copyright (c) 2021 AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2021 Centre for Development of Telematics. All rights reserved. + * + * Copyright (c) 2021 Centre for Development of Telematics. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +/* + * ndpi.c + * + * nDPI implementation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "compiler.h" +#include "npf/npf.h" +#include "npf/npf_session.h" +#include "npf/npf_rule_gen.h" +#include "npf/npf_ruleset.h" +#include "npf/npf_cache.h" +#include "npf/rproc/npf_rproc.h" +#include "npf/config/npf_config.h" +#include "npf_shim.h" +#include "npf/dpi/app_cmds.h" +#include "npf/dpi/dpi_internal.h" +#include "ndpi_main.h" +#include "vplane_log.h" +#include "util.h" +#include "vplane_debug.h" + +#define NDPI_PROTOCOLS_PATH "/opt/vyatta/etc/dpi/protocols.cfg" +#define NDPI_CATEGORIES_PATH "/opt/vyatta/etc/dpi/categories.cfg" + +#define NDPI_FLOW_PKT_MAX 10 + +#define DPI_INTERNAL_UNKNOWN (DPI_ENGINE_NDPI | NDPI_PROTOCOL_UNKNOWN) + +/* Count of all nDPI uses. */ +static uint32_t ndpi_refcount; + +/* Flag to enable/ disable nDPI protocol guessing. + * 1 = enabled, 0 = disabled + */ +static uint8_t enable_protocol_guess = 1; + +static const char *dpi_ndpi_app_id_to_name(uint32_t app_id); + +struct ndpi_flow { + struct dpi_engine_flow ef; // Must be first. + struct ndpi_flow_struct *key; + bool error; + bool offloaded; + uint32_t application; + uint32_t protocol; + uint32_t type; + struct ndpi_id_struct *src_id; + struct ndpi_id_struct *dest_id; + rte_spinlock_t fl_lock; + struct rcu_head n_rcu_head; +}; + +#define NDPI_FLOW_ENGINE_ID ef.engine_id +#define NDPI_FLOW_STATS ef.stats +#define NDPI_FLOW_UPDATE_STATS ef.update_stats + +static struct ndpi_detection_module_struct *detection_modules[RTE_MAX_LCORE]; + +static inline uint16_t +dpi_to_ndpi_proto(uint32_t id) +{ + return (uint16_t) id & DPI_APP_MASK; +} + +static inline uint32_t +dpi_from_ndpi_proto(uint16_t id) +{ + return DPI_ENGINE_NDPI | id; +} + +/* Return true if the sum of the forward and backward packet counts + * for the given ndpi_flow is greater than or equal to the specified maximum. + */ +static bool +dpi_ndpi_flow_pkt_count_maxed(const struct ndpi_flow *flow, uint32_t max) +{ + if (!flow) + return false; + + const struct dpi_engine_flow *engine_flow = + (const struct dpi_engine_flow *)flow; + uint32_t cnt; + const struct dpi_flow_stats *ds; + + ds = dpi_flow_get_stats(engine_flow, true); + cnt = ds->pkts; + + ds = dpi_flow_get_stats(engine_flow, false); + cnt += ds->pkts; + + if (cnt >= max) + return true; + + return false; +} + +/** + * Process the given packet with nDPI. + * + * Finds and pass the start of the L3 header to nDPI, and set the flow to + * offloaded if the protocol is successfully determined. + * + * @return false if the given detection module is NULL, true otherwise. + */ +static bool +dpi_ndpi_process(struct ndpi_detection_module_struct *detect, + struct rte_mbuf *mbuf, struct ndpi_flow *flow) +{ + if (unlikely(!detect)) + return false; + + uint16_t offset = dp_pktmbuf_l2_len(mbuf); + uint16_t data_len = rte_pktmbuf_data_len(mbuf) - offset; + + const unsigned char *data = + rte_pktmbuf_mtod(mbuf, const unsigned char *) + offset; + + ndpi_protocol proto = ndpi_detection_process_packet(detect, flow->key, + data, data_len, (uint64_t) get_time_uptime(), + flow->src_id, flow->dest_id); + + /* Offload the given ndpi_flow if the protocol is known, + * or if the sum of its forward and backward packet counts + * is greater than or equal to NDPI_FLOW_PKT_MAX. + */ + flow->offloaded = + proto.master_protocol != NDPI_PROTOCOL_UNKNOWN || + proto.app_protocol != NDPI_PROTOCOL_UNKNOWN || + dpi_ndpi_flow_pkt_count_maxed(flow, NDPI_FLOW_PKT_MAX); + + if (flow->offloaded) { + /* Give up protocol detection by nDPI. Update detected + * protocols in ndpi_protocol structure using protocols + * guessed by nDPI if enable_protocol_guess is set to 1. + */ + uint8_t proto_guessed = 0; + proto = ndpi_detection_giveup(detect, flow->key, + enable_protocol_guess, &proto_guessed); + } + + /* Sometimes nDPI sets "app_protocol" without setting "master_protocol", + * so we see app 'TLS' over protocol 'Unknown' which doesn't make sense. + * In this case we swap the app and protocol to get 'Unknown over TLS'. + */ + if ((proto.master_protocol == NDPI_PROTOCOL_UNKNOWN) && + (proto.app_protocol != NDPI_PROTOCOL_UNKNOWN)) { + /* Swap */ + flow->protocol = dpi_from_ndpi_proto(proto.app_protocol); + flow->application = dpi_from_ndpi_proto(NDPI_PROTOCOL_UNKNOWN); + } else { + /* Regular */ + flow->protocol = dpi_from_ndpi_proto(proto.master_protocol); + flow->application = dpi_from_ndpi_proto(proto.app_protocol); + } + + flow->type = ndpi_get_proto_category(detect, proto); + + if (unlikely(dp_debug & DP_DBG_DPI)) { + RTE_LOG(DEBUG, DATAPLANE, "ndpi: P='%s' A='%s' C='%s'\n", + ndpi_get_proto_name(detection_modules[dp_lcore_id()], + proto.master_protocol), + ndpi_get_proto_name(detection_modules[dp_lcore_id()], + proto.app_protocol), + ndpi_category_get_name(detection_modules[dp_lcore_id()], + proto.category)); + } + + return true; +} + +/** + * Process the given packet with nDPI + * + * The flow attached to the given session will be placed in an error state if + * the DPI engine is invalid. + * + * @return false if the flow attached to the given session has an invalid key, + * true otherwise. + */ +static bool +dpi_ndpi_process_pkt(struct dpi_engine_flow *engine_flow, + struct rte_mbuf *mbuf, int dir __unused) +{ + struct ndpi_flow *flow = (struct ndpi_flow *) engine_flow; + + if (unlikely(!flow->key)) + return false; + + rte_spinlock_lock(&flow->fl_lock); + if (!dpi_ndpi_process(detection_modules[dp_lcore_id()], + mbuf, flow)) { + flow->protocol = DPI_APP_ERROR; + flow->offloaded = true; + flow->error = true; + } + + rte_spinlock_unlock(&flow->fl_lock); + return true; +} + +static bool initialised; + +static bool dpi_ndpi_terminate(void); + +/** + * Initialise nDPI's detection modules. + * + * @return zero on success; errno if couldn't initialise detection module. + */ +static int +dpi_ndpi_init(void) +{ + unsigned int lcore; + NDPI_PROTOCOL_BITMASK all; + FILE *file; + + if (initialised) + return 0; + + set_ndpi_malloc(zmalloc_aligned); + NDPI_BITMASK_SET_ALL(all); + + FOREACH_DP_LCORE(lcore) { + struct ndpi_detection_module_struct *detect + = ndpi_init_detection_module(ndpi_no_prefs); + if (!detect) { + RTE_LOG(ERR, DATAPLANE, + "Failed to initialise detection module: %d\n", + lcore); + dpi_ndpi_terminate(); + return -ENOMEM; + } + ndpi_set_protocol_detection_bitmask2(detect, &all); + + if ((file = fopen(NDPI_PROTOCOLS_PATH, "r")) != NULL) { + ndpi_load_protocols_file(detect, NDPI_PROTOCOLS_PATH); + fclose(file); + } + + if ((file = fopen(NDPI_CATEGORIES_PATH, "r")) != NULL) { + ndpi_load_categories_file(detect, NDPI_CATEGORIES_PATH); + fclose(file); + } + + ndpi_finalize_initalization(detect); + + detection_modules[lcore] = detect; + } + + initialised = true; + return 0; +} + +/** + * Terminate nDPI's detection modules. + * + * @return true on success, false if couldn't initialise detection module. + */ +static bool +dpi_ndpi_terminate(void) +{ + unsigned int lcore; + RTE_LCORE_FOREACH(lcore) { + if (detection_modules[lcore]) { + ndpi_exit_detection_module(detection_modules[lcore]); + detection_modules[lcore] = NULL; + } + } + + initialised = false; + return true; +} + +/** + * Increment the refcount. + */ +static void +dpi_ndpi_refcount_inc(void) +{ + if (++ndpi_refcount == 0) + /* Overflowed */ + --ndpi_refcount; +} + +/** + * Decrement the refcount. + */ +static uint32_t +dpi_ndpi_refcount_dec(void) +{ + if (ndpi_refcount) + ndpi_refcount--; + + return ndpi_refcount; +} + +/** + * Free the dpi flow. Called from RCU callback. + */ +static void +dpi_ndpi_free(struct rcu_head *head) +{ + struct ndpi_flow *flow = caa_container_of(head, struct ndpi_flow, + n_rcu_head); + + ndpi_free_flow(flow->key); + ndpi_free(flow->src_id); + ndpi_free(flow->dest_id); + free(flow); +} + +/* + * Destroy the given flow, which can be NULL. + */ +static void +dpi_ndpi_session_flow_destroy(struct dpi_engine_flow *dpi_flow) +{ + if (!dpi_flow) + return; + + struct ndpi_flow *flow = (struct ndpi_flow *) dpi_flow; + call_rcu(&flow->n_rcu_head, dpi_ndpi_free); +} + +/* + * Initialise the flow with the first packet of the given session, and attempt + * to determine the protocol of the flow with the packet. + */ +static int +dpi_ndpi_session_first_packet(struct npf_session *se __unused, + struct npf_cache *npc __unused, struct rte_mbuf *mbuf, + int dir, uint32_t data_len, struct dpi_engine_flow **dpi_flow) +{ + struct ndpi_flow *flow = zmalloc_aligned(sizeof(struct ndpi_flow)); + if (!flow) + return -ENOMEM; + + flow->NDPI_FLOW_ENGINE_ID = IANA_NDPI; + flow->key = NULL; + flow->application = DPI_APP_UNDETERMINED; + flow->protocol = DPI_APP_UNDETERMINED; + flow->type = DPI_APP_TYPE_NONE; + flow->error = false; + flow->offloaded = false; + rte_spinlock_init(&flow->fl_lock); + + flow->key = ndpi_flow_malloc(SIZEOF_FLOW_STRUCT); + if (!flow->key) + goto key_error; + + flow->src_id = ndpi_malloc(SIZEOF_ID_STRUCT); + if (!flow->src_id) + goto src_id_error; + + flow->dest_id = ndpi_malloc(SIZEOF_ID_STRUCT); + if (!flow->dest_id) + goto dest_id_error; + + if (data_len != 0 && !dpi_ndpi_process_pkt( + (struct dpi_engine_flow *)flow, mbuf, dir)) + return -EINVAL; + + *dpi_flow = (struct dpi_engine_flow *)flow; + return 0; + +dest_id_error: + ndpi_free(flow->src_id); + flow->src_id = NULL; + +src_id_error: + ndpi_free(flow->key); + flow->key = NULL; + flow->error = true; + flow->offloaded = true; + flow->protocol = DPI_APP_ERROR; + +key_error: + free(flow); + return -ENOMEM; +} + +static uint32_t +dpi_ndpi_flow_get_app_proto(struct dpi_engine_flow *dpi_flow) +{ + struct ndpi_flow *flow = (struct ndpi_flow *) dpi_flow; + return flow->protocol; +} + +static uint32_t +dpi_ndpi_flow_get_app_id(struct dpi_engine_flow *dpi_flow) +{ + struct ndpi_flow *flow = (struct ndpi_flow *) dpi_flow; + return flow->application; +} + +static uint32_t +dpi_ndpi_flow_get_app_type(struct dpi_engine_flow *dpi_flow) +{ + struct ndpi_flow *flow = (struct ndpi_flow *) dpi_flow; + return flow->type; +} + +static bool +dpi_ndpi_flow_get_offloaded(struct dpi_engine_flow *dpi_flow) +{ + struct ndpi_flow *flow = (struct ndpi_flow *) dpi_flow; + return flow->offloaded; +} + +static bool +dpi_ndpi_flow_get_error(struct dpi_engine_flow *dpi_flow) +{ + struct ndpi_flow *flow = (struct ndpi_flow *) dpi_flow; + return flow->error; +} + +static uint32_t +dpi_ndpi_app_name_to_id(const char *app_name) +{ + struct ndpi_detection_module_struct *ndpi_str = + detection_modules[dp_lcore_id()]; + + uint16_t id; + if (!ndpi_str) + id = NDPI_PROTOCOL_UNKNOWN; + else { + id = ndpi_get_protocol_id(ndpi_str, (char *)app_name); + + /* Work around that NDPI returns -1 for UNKNOWN. */ + if (id == (uint16_t) -1) + id = NDPI_PROTOCOL_UNKNOWN; + } + + return dpi_from_ndpi_proto(id); +} + +static const char * +dpi_ndpi_app_id_to_name(uint32_t app_id) +{ + struct ndpi_detection_module_struct *ndpi_str = + detection_modules[dp_lcore_id()]; + + if (!ndpi_str) + return "UNKNOWN"; + + return ndpi_get_proto_name(ndpi_str, dpi_to_ndpi_proto(app_id)); +} + +static uint32_t +dpi_ndpi_app_type_name_to_id(const char *type_name) +{ + struct ndpi_detection_module_struct *ndpi_str = + detection_modules[dp_lcore_id()]; + + if (!ndpi_str) + return NDPI_PROTOCOL_CATEGORY_UNSPECIFIED; + + /* Work around that NDPI returns 30 for empty names. */ + int id = *type_name + ? ndpi_get_category_id(ndpi_str, (char *)type_name) + : NDPI_PROTOCOL_CATEGORY_UNSPECIFIED; + + return (uint32_t) id; +} + +static const char * +dpi_ndpi_app_type_to_name(uint32_t app_type) +{ + struct ndpi_detection_module_struct *ndpi_str = + detection_modules[dp_lcore_id()]; + + if (!ndpi_str) + return "UNKNOWN"; + + return ndpi_category_get_name(ndpi_str, + (ndpi_protocol_category_t) app_type); +} + +/* + * This uses JSON to export the DPI information associated with the flow. + */ +static bool +dpi_ndpi_info_json(struct dpi_engine_flow *dpi_engine_flow, json_writer_t *json) +{ + if (!ndpi_refcount) + /* The nDPI engine is not in use */ + return false; + + if (!dpi_engine_flow || !json) + return false; + + struct ndpi_flow *flow = (struct ndpi_flow *) dpi_engine_flow; + jsonw_start_object(json); + + const struct dpi_flow_stats *stats = + dpi_flow_get_stats(dpi_engine_flow, true); + + jsonw_uint_field(json, "forward-pkts", stats->pkts); + jsonw_uint_field(json, "forward-bytes", stats->bytes); + + stats = dpi_flow_get_stats(dpi_engine_flow, false); + jsonw_uint_field(json, "backward-pkts", stats->pkts); + jsonw_uint_field(json, "backward-bytes", stats->bytes); + + jsonw_string_field(json, "engine", "ndpi"); + + jsonw_string_field(json, "app-name", + dpi_ndpi_app_id_to_name(flow->application)); + jsonw_string_field(json, "proto-name", + dpi_ndpi_app_id_to_name(flow->protocol)); + jsonw_string_field(json, "type", + dpi_ndpi_app_type_to_name(flow->type)); + + jsonw_bool_field(json, "offloaded", flow->offloaded); + jsonw_bool_field(json, "error", flow->error); + + jsonw_end_object(json); + + return true; +} + +static size_t +dpi_ndpi_info_log(struct dpi_engine_flow *dpi_flow, char *buf, size_t buf_len) +{ + if (!ndpi_refcount) + /* The nDPI engine is not in use */ + return 0; + + if (!buf) + return 0; + + if (!dpi_flow) + return 0; + + size_t used_buf_len = 0; + const uint32_t app_id = dpi_ndpi_flow_get_app_id(dpi_flow); + const uint32_t app_proto = dpi_ndpi_flow_get_app_proto(dpi_flow); + const uint32_t app_type = dpi_ndpi_flow_get_app_type(dpi_flow); + + /* Say nothing, if we've nothing useful to say. */ + if (no_app_id(app_id) && no_app_id(app_proto) && no_app_type(app_type)) + return 0; + + buf_app_printf(buf, &used_buf_len, buf_len, "engine=ndpi "); + + buf_app_printf(buf, &used_buf_len, buf_len, "app-name="); + dpi_app_id_to_buf(buf, &used_buf_len, buf_len, app_id, + dpi_ndpi_app_id_to_name); + + buf_app_printf(buf, &used_buf_len, buf_len, " proto-name="); + dpi_app_id_to_buf(buf, &used_buf_len, buf_len, app_proto, + dpi_ndpi_app_id_to_name); + + buf_app_printf(buf, &used_buf_len, buf_len, " type="); + dpi_app_type_to_buf(buf, &used_buf_len, buf_len, app_type, + dpi_ndpi_app_type_to_name); + + return used_buf_len; +} + +struct dpi_engine_procs ndpi_engine_procs = { + .id = IANA_NDPI, + .init = dpi_ndpi_init, + .terminate = dpi_ndpi_terminate, + .refcount_inc = dpi_ndpi_refcount_inc, + .refcount_dec = dpi_ndpi_refcount_dec, + .destructor = dpi_ndpi_session_flow_destroy, + .first_packet = dpi_ndpi_session_first_packet, + .process_pkt = dpi_ndpi_process_pkt, + .is_offloaded = dpi_ndpi_flow_get_offloaded, + .is_error = dpi_ndpi_flow_get_error, + .flow_get_proto = dpi_ndpi_flow_get_app_proto, + .flow_get_id = dpi_ndpi_flow_get_app_id, + .flow_get_type = dpi_ndpi_flow_get_app_type, + .name_to_id = dpi_ndpi_app_name_to_id, + .type_to_id = dpi_ndpi_app_type_name_to_id, + .info_json = dpi_ndpi_info_json, + .info_log = dpi_ndpi_info_log, +}; diff --git a/src/npf/dpi/npf_appdb.c b/src/npf/dpi/npf_appdb.c new file mode 100644 index 00000000..7fc69385 --- /dev/null +++ b/src/npf/dpi/npf_appdb.c @@ -0,0 +1,382 @@ +/* + * Copyright (c) 2020-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +/* + * Application name database. + */ + +#include +#include "npf/config/npf_config.h" +#include "npf/dpi/dpi_internal.h" +#include "dpi/npf_appdb.h" + +#define APP_NAME_HT_SIZE 32 +#define APP_NAME_HT_MIN 32 +#define APP_NAME_HT_MAX 8192 +#define APP_NAME_HT_FLAGS (CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING) + +#define APP_ID_HT_SIZE 32 +#define APP_ID_HT_MIN 32 +#define APP_ID_HT_MAX 8192 +#define APP_ID_HT_FLAGS (CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING) + +/* Application database entry. */ +struct adb_entry { + char *ae_name; /* Name string */ + uint32_t ae_id; /* Application ID */ + uint32_t ae_refcount; /* Refcount */ + struct cds_lfht_node ae_name_ht_node; /* App name hash table */ + struct cds_lfht_node ae_id_ht_node; /* App ID hash table */ + struct rcu_head e_rcu_head; +}; + +/* Application database hash tables. Appls are hashed by name and by ID. */ +static struct cds_lfht *app_name_ht; /* Hash table of application names */ +static struct cds_lfht *app_id_ht; /* Hash table of application IDs */ + +static uint32_t name_hash_seed; + +static struct adb_entry *appdb_add(const char *name, uint32_t id); + +/* + * Match function for the app name hash table. + * Returns zero for a non-match, and non-zero for a match. + */ +static int +appdb_name_match(struct cds_lfht_node *ht_node, const void *data) +{ + struct adb_entry *entry = caa_container_of(ht_node, struct adb_entry, + ae_name_ht_node); + + return !strcmp(data, entry->ae_name); +} + +/* + * Lookup the given application name in the application DB. + * Return a pointer to the entry, or NULL if not found. + */ +static struct adb_entry * +appdb_find_name(const char *name) +{ + struct cds_lfht_iter iter; + unsigned long hash = rte_jhash(name, strlen(name), + name_hash_seed); + + if (!app_name_ht) + return NULL; + + cds_lfht_lookup(app_name_ht, hash, appdb_name_match, name, &iter); + + struct cds_lfht_node *ht_node = cds_lfht_iter_get_node(&iter); + + if (ht_node) + return caa_container_of(ht_node, struct adb_entry, + ae_name_ht_node); + return NULL; +} + +/* + * Match function for the app id hash table. + * Returns zero for a non-match, and non-zero for a match. + */ +static int +appdb_id_match(struct cds_lfht_node *ht_node, const void *data) +{ + struct adb_entry *entry = caa_container_of(ht_node, struct adb_entry, + ae_id_ht_node); + const uint32_t *id = data; + + return *id == entry->ae_id; +} + +/* + * Lookup the given application ID in the application DB. + * Return a pointer to the entry, or NULL if not found. + */ +static struct adb_entry * +appdb_find_id(uint32_t app_id) +{ + struct cds_lfht_iter iter; + unsigned long hash = app_id; + + if (!app_id_ht) + return NULL; + + cds_lfht_lookup(app_id_ht, hash, appdb_id_match, &app_id, &iter); + + struct cds_lfht_node *ht_node = cds_lfht_iter_get_node(&iter); + + if (ht_node) + return caa_container_of(ht_node, struct adb_entry, + ae_id_ht_node); + return NULL; +} + +int +appdb_init(void) +{ + if (app_name_ht && app_id_ht) + /* Already init'd. */ + return 0; + + app_name_ht = cds_lfht_new(APP_NAME_HT_SIZE, + APP_NAME_HT_MIN, + APP_NAME_HT_MAX, + APP_NAME_HT_FLAGS, + NULL); + + if (!app_name_ht) + return -ENOMEM; + + app_id_ht = cds_lfht_new(APP_ID_HT_SIZE, + APP_ID_HT_MIN, + APP_ID_HT_MAX, + APP_ID_HT_FLAGS, + NULL); + + if (!app_id_ht) { + cds_lfht_destroy(app_name_ht, NULL); + app_name_ht = NULL; + return -ENOMEM; + } + + /* Add default entries. */ + appdb_add("Unavailable", DPI_APP_USER_NA); + appdb_add("Error", DPI_APP_USER_ERROR); + appdb_add("Unknown", DPI_APP_USER_UNDETERMINED); + + name_hash_seed = random(); + return 0; +} + +void +appdb_destroy(void) +{ + if (app_name_ht) + cds_lfht_destroy(app_name_ht, NULL); + + if (app_id_ht) + cds_lfht_destroy(app_id_ht, NULL); + + app_name_ht = NULL; + app_id_ht = NULL; +} + +/* + * Return the application ID from the given adb_entry. + */ +uint32_t +appdb_entry_get_id(struct adb_entry *e) +{ + if (e) + return e->ae_id; + + return DPI_APP_USER_NA; +} + +/* + * Convert the given app DB name entry to JSON. + * This is a callback from appdb_name_walk. + */ +int +appdb_name_entry_to_json(json_writer_t *json, struct adb_entry *entry) +{ + char buf[11]; /* "id" is u32. "0x" + 8 digits + null = 11. */ + + jsonw_name(json, entry->ae_name); + jsonw_start_object(json); + snprintf(buf, sizeof(buf), "%#x", entry->ae_id); + jsonw_string_field(json, "id", buf); + jsonw_uint_field(json, "refcount", entry->ae_refcount); + jsonw_end_object(json); + + /* Tell the walker to continue. */ + return 0; +} + +/* + * Walk the app name hash. + */ +int +appdb_name_walk(json_writer_t *json, app_walker_t *callback) +{ + struct cds_lfht_iter iter; + struct adb_entry *entry; + int rc = 0; + + if (!app_name_ht) + return rc; + + cds_lfht_for_each_entry(app_name_ht, &iter, entry, ae_name_ht_node) { + rc = callback(json, entry); + if (rc) + break; + } + + return rc; +} + +/* + * Lookup the given application name in the application DB. + * Return the application ID, or DPI_APP_NA if not found. + */ +uint32_t +appdb_name_to_id(const char *name) +{ + struct adb_entry *entry = appdb_find_name(name); + + return entry ? entry->ae_id : DPI_APP_NA; +} + +/* Convert the given app DB ID entry to JSON. + * This is a callback from appdb_id_walk. + */ +int +appdb_id_entry_to_json(json_writer_t *json, struct adb_entry *entry) +{ + char buf[11]; /* "id" is u32. "0x" + 8 digits + null = 11. */ + + snprintf(buf, sizeof(buf), "%#x", entry->ae_id); + jsonw_name(json, buf); + jsonw_start_object(json); + jsonw_string_field(json, "name", entry->ae_name); + jsonw_uint_field(json, "refcount", entry->ae_refcount); + jsonw_end_object(json); + + /* Tell the walker to continue. */ + return 0; +} + +/* Walk the app ID hash. */ +int +appdb_id_walk(json_writer_t *json, app_walker_t *callback) +{ + struct cds_lfht_iter iter; + struct adb_entry *entry; + int rc = 0; + + if (!app_id_ht) + return rc; + + cds_lfht_for_each_entry(app_id_ht, &iter, entry, ae_id_ht_node) { + rc = callback(json, entry); + if (rc) + break; + } + + return rc; +} + +/* + * Lookup the given application ID in the application DB. + * Return the application name, or NULL if not found. + */ +const char * +appdb_id_to_name(uint32_t app_id) +{ + struct adb_entry *entry = appdb_find_id(app_id); + + return entry ? entry->ae_name : NULL; +} + +static struct adb_entry * +appdb_add(const char *name, uint32_t id) +{ + struct adb_entry *entry = zmalloc_aligned(sizeof(struct adb_entry)); + if (!entry) + return NULL; + + entry->ae_name = strdup(name); + if (!entry->ae_name) { + free(entry); + return NULL; + } + + entry->ae_id = id; + entry->ae_refcount = 1; + + /* Add to app name hash table. */ + cds_lfht_node_init(&entry->ae_name_ht_node); + + /* Make an aligned copy of 'name' that we can hash on. */ + char __name[RTE_ALIGN(strlen(name), 4)] + __rte_aligned(sizeof(uint32_t)); + + memcpy(__name, name, strlen(name)); + unsigned long name_hash = rte_jhash(__name, strlen(name), + name_hash_seed); + cds_lfht_add(app_name_ht, name_hash, &entry->ae_name_ht_node); + + /* Add to app ID hash table. */ + cds_lfht_node_init(&entry->ae_id_ht_node); + cds_lfht_add(app_id_ht, entry->ae_id, &entry->ae_id_ht_node); + + return entry; +} + +/* + * Find an existing app DB entry with the given name and increment its refcount. + * If not found, then create a new entry. + */ +struct adb_entry * +appdb_find_or_alloc(const char *name) +{ + static uint32_t user_app_id = DPI_APP_USER_BASE; + + /* No name? No entry. */ + if (!name || strlen(name) < 1) + return NULL; + + /* First, search for an existing entry. */ + struct adb_entry *entry = appdb_find_name(name); + if (entry) { + /* We only need to bump the refcount + * for an existing entry. + */ + entry->ae_refcount++; + return entry; + } + + /* Not found, so we need to create a new app DB entry. */ + + if (user_app_id == 0) + /* All the IDs have been consumed. */ + return NULL; + + return appdb_add(name, user_app_id++); +} + +/* + * Free the DB entry. Called from RCU callback. + */ +static void +appdb_entry_free(struct rcu_head *head) +{ + struct adb_entry *entry = caa_container_of(head, struct adb_entry, + e_rcu_head); + + free(entry->ae_name); + free(entry); +} + +/* + * Decrement the given appDB entry's refcount. + * If zero then remove the entry from the appDB. + */ +bool +appdb_dealloc(struct adb_entry *entry) +{ + if (!entry) + return false; + + if (--entry->ae_refcount == 0) { + cds_lfht_del(app_name_ht, &entry->ae_name_ht_node); + cds_lfht_del(app_id_ht, &entry->ae_id_ht_node); + call_rcu(&entry->e_rcu_head, appdb_entry_free); + } + + return true; +} diff --git a/src/npf/dpi/npf_appdb.h b/src/npf/dpi/npf_appdb.h new file mode 100644 index 00000000..d9d727ca --- /dev/null +++ b/src/npf/dpi/npf_appdb.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2020 AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef NPF_APPDB_H +#define NPF_APPDB_H + +#include +#include +#include "json_writer.h" + +/** + * Walker prototype. + */ +struct adb_entry; +typedef int (app_walker_t)(json_writer_t *json, struct adb_entry *entry); + +/** + * Initialise the application database. + * Returns 0 on success; errno on failure. + */ +int appdb_init(void); + +/** + * Destroy the application database. + */ +void appdb_destroy(void); + +/** + * Write the JSON representation of the application database name entry, given + * with the data pointer. + * Intended for use with appdb_name_walk. Therefore, returns 0 on success. + */ +int appdb_name_entry_to_json(json_writer_t *json, struct adb_entry *entry); + +/** + * Walk the application database name entries. + */ +int appdb_name_walk(json_writer_t *json, app_walker_t *callback); + +/* + * Lookup the given application name in the application DB. + * Return the application ID, or DPI_APP_NA if not found. + */ +uint32_t appdb_name_to_id(const char *name); + +/** + * Write the JSON representation of the application database ID entry, given + * with the data pointer. + * Intended for use with appdb_id_walk. Therefore, returns 0 on success. + */ +int appdb_id_entry_to_json(json_writer_t *json, struct adb_entry *entry); + +/** + * Walk the application database ID entries. + */ +int appdb_id_walk(json_writer_t *json, app_walker_t *callback); + +/* + * Lookup the given application ID in the application DB. + * Return the application name, or NULL if not found. + */ +const char *appdb_id_to_name(uint32_t app_id); + +/* + * Find an existing app DB entry with the given name and increment its + * refcount. If not found, then create a new entry. + */ +struct adb_entry *appdb_find_or_alloc(const char *name); + +/* + * Decrement the given appDB entry's refcount. + * If zero then remove the entry from the appDB. + */ +bool appdb_dealloc(struct adb_entry *entry); + +/** + * Return the application ID from the given adb_entry, + * or return DPI_APP_USER_NA if the given entry doesn't exist. + */ +uint32_t appdb_entry_get_id(struct adb_entry *e); + +#endif /* NPF_APPDB_H */ diff --git a/src/npf/dpi/npf_typedb.c b/src/npf/dpi/npf_typedb.c new file mode 100644 index 00000000..244ca4c7 --- /dev/null +++ b/src/npf/dpi/npf_typedb.c @@ -0,0 +1,283 @@ +/* + * Copyright (c) 2020-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +/* + * Application type database. + */ + +#include +#include "npf/config/npf_config.h" +#include "npf/dpi/dpi_internal.h" +#include "npf/dpi/npf_typedb.h" + +#define TYPE_NAME_HT_SIZE 32 +#define TYPE_NAME_HT_MIN 32 +#define TYPE_NAME_HT_MAX 8192 +#define TYPE_NAME_HT_FLAGS (CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING) + +#define TYPE_ID_HT_SIZE 32 +#define TYPE_ID_HT_MIN 32 +#define TYPE_ID_HT_MAX 8192 +#define TYPE_ID_HT_FLAGS (CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING) + +/* Type database entry. */ +struct tdb_entry { + char *te_name; /* Name string */ + uint32_t te_id; /* Type ID */ + uint32_t te_refcount; /* Refcount */ + struct cds_lfht_node te_name_ht_node; /* Type name hash table */ + struct cds_lfht_node te_id_ht_node; /* Type ID hash table */ + struct rcu_head t_rcu_head; +}; + +/* Application database hash tables. Appls are hashed by name and by ID. */ +static struct cds_lfht *type_name_ht; /* Hash table of type names */ +static struct cds_lfht *type_id_ht; /* Hash table of type IDs */ + +static uint32_t name_hash_seed; + +/* + * Match function for the type name hash table. + * Returns zero for a non-match, and non-zero for a match. + */ +static int +typedb_name_match(struct cds_lfht_node *ht_node, const void *data) +{ + struct tdb_entry *entry = caa_container_of(ht_node, struct tdb_entry, + te_name_ht_node); + + return !strcmp(data, entry->te_name); +} + +/* + * Lookup the given type name in the type DB. + * Return a pointer to the entry, or NULL if not found. + */ +static struct tdb_entry * +typedb_find_name(const char *name) +{ + struct cds_lfht_iter iter; + unsigned long hash = rte_jhash(name, strlen(name), + name_hash_seed); + + if (!type_name_ht) + return NULL; + + cds_lfht_lookup(type_name_ht, hash, typedb_name_match, name, &iter); + + struct cds_lfht_node *ht_node = cds_lfht_iter_get_node(&iter); + + if (ht_node) + return caa_container_of(ht_node, struct tdb_entry, + te_name_ht_node); + return NULL; +} + +/* + * Match function for the type id hash table. + * Returns zero for a non-match, and non-zero for a match. + */ +static int +typedb_id_match(struct cds_lfht_node *ht_node, const void *data) +{ + struct tdb_entry *entry = caa_container_of(ht_node, struct tdb_entry, + te_id_ht_node); + const uint32_t *id = data; + + return *id == entry->te_id; +} + +/* + * Lookup the given type ID in the type DB. + * Return a pointer to the entry, or NULL if not found. + */ +static struct tdb_entry * +typedb_find_id(uint32_t type_id) +{ + struct cds_lfht_iter iter; + unsigned long hash = type_id; + + if (!type_id_ht) + return NULL; + + cds_lfht_lookup(type_id_ht, hash, typedb_id_match, &type_id, &iter); + + struct cds_lfht_node *ht_node = cds_lfht_iter_get_node(&iter); + + if (ht_node) + return caa_container_of(ht_node, struct tdb_entry, + te_id_ht_node); + return NULL; +} + +int +typedb_init(void) +{ + if (type_name_ht && type_id_ht) + /* Already init'd. */ + return 0; + + type_name_ht = cds_lfht_new(TYPE_NAME_HT_SIZE, + TYPE_NAME_HT_MIN, + TYPE_NAME_HT_MAX, + TYPE_NAME_HT_FLAGS, + NULL); + + if (!type_name_ht) + return -ENOMEM; + + type_id_ht = cds_lfht_new(TYPE_ID_HT_SIZE, + TYPE_ID_HT_MIN, + TYPE_ID_HT_MAX, + TYPE_ID_HT_FLAGS, + NULL); + + if (!type_id_ht) { + cds_lfht_destroy(type_name_ht, NULL); + type_name_ht = NULL; + return -ENOMEM; + } + + name_hash_seed = random(); + return 0; +} + +/* + * Return the type ID from the given tdb_entry + */ +uint32_t +typedb_entry_get_id(struct tdb_entry *e) +{ + if (e) + return e->te_id; + + return DPI_APP_TYPE_NONE; +} + +/* + * Lookup the given type name in the type DB. + * Return the type ID, or DPI_APP_TYPE_NONE if not found. + */ +uint32_t +typedb_name_to_id(const char *name) +{ + struct tdb_entry *entry = typedb_find_name(name); + + return entry ? entry->te_id : DPI_APP_TYPE_NONE; +} + +/* + * Lookup the given type ID in the type DB. + * Return the type name, or NULL if not found. + */ +const char * +typedb_id_to_name(uint32_t type_id) +{ + if (type_id == DPI_APP_TYPE_NONE) + return (char *)"None"; + + struct tdb_entry *entry = typedb_find_id(type_id); + + return entry ? entry->te_name : NULL; +} + +static struct tdb_entry * +typedb_add(const char *name, uint32_t id) +{ + struct tdb_entry *entry = zmalloc_aligned(sizeof(struct tdb_entry)); + if (!entry) + return NULL; + + entry->te_name = strdup(name); + if (!entry->te_name) { + free(entry); + return NULL; + } + + entry->te_id = id; + entry->te_refcount = 1; + + /* Add to type name hash table. */ + cds_lfht_node_init(&entry->te_name_ht_node); + + /* Make an aligned copy of 'name' that we can hash on. */ + char __name[RTE_ALIGN(strlen(name), 4)] + __rte_aligned(sizeof(uint32_t)); + + memcpy(__name, name, strlen(name)); + unsigned long name_hash = rte_jhash(__name, strlen(name), + name_hash_seed); + cds_lfht_add(type_name_ht, name_hash, &entry->te_name_ht_node); + + /* Add to type ID hash table. */ + cds_lfht_node_init(&entry->te_id_ht_node); + cds_lfht_add(type_id_ht, entry->te_id, &entry->te_id_ht_node); + + return entry; +} + +/* + * Find an existing type DB entry with the given name and increment its + * refcount. If not found, then create a new entry. + */ +struct tdb_entry * +typedb_find_or_alloc(const char *name) +{ + static uint32_t user_type_id = DPI_APP_BASE; + + /* No name? No entry. */ + if (!name || strlen(name) < 1) + return NULL; + + /* First, search for an existing entry. */ + struct tdb_entry *entry = typedb_find_name(name); + if (entry) { + /* We only need to bump the refcount + * for an existing entry. + */ + entry->te_refcount++; + return entry; + } + + /* Not found, so we need to create a new type DB entry. */ + + if (user_type_id == 0) + /* All the IDs have been consumed. */ + return NULL; + + return typedb_add(name, user_type_id++); +} + +/* + * Free the DB entry. Called from RCU callback. + */ +static void typedb_entry_free(struct rcu_head *head) +{ + struct tdb_entry *entry = caa_container_of(head, struct tdb_entry, + t_rcu_head); + + free(entry->te_name); + free(entry); +} + +/* + * Decrement the given type DB entry's refcount. + * If zero then remove the entry from the typeDB. + */ +bool +typedb_dealloc(struct tdb_entry *entry) +{ + if (!entry) + return false; + + if (--entry->te_refcount == 0) { + cds_lfht_del(type_name_ht, &entry->te_name_ht_node); + cds_lfht_del(type_id_ht, &entry->te_id_ht_node); + call_rcu(&entry->t_rcu_head, typedb_entry_free); + } + + return true; +} diff --git a/src/npf/dpi/npf_typedb.h b/src/npf/dpi/npf_typedb.h new file mode 100644 index 00000000..6f2b6921 --- /dev/null +++ b/src/npf/dpi/npf_typedb.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2020 AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef NPF_TYPEDB_H +#define NPF_TYPEDB_H + +#include +#include +#include "json_writer.h" + +/** + * Walker prototype. + */ +struct tdb_entry; +typedef int (type_walker_t)(json_writer_t *json, struct tdb_entry *entry); + +/** + * Initialise the type database. + * Returns zero on success; errno on failure. + */ +int typedb_init(void); + +/* + * Lookup the given type name in the type DB. + * Return the type ID, or DPI_TYPE_NA if not found. + */ +uint32_t typedb_name_to_id(const char *name); + +/* + * Lookup the given type ID in the type DB. + * Return the type name, or NULL if not found. + */ +const char *typedb_id_to_name(uint32_t type_id); + +/* + * Find an existing type DB entry with the given name and increment its + * refcount. If not found, then create a new entry. + */ +struct tdb_entry *typedb_find_or_alloc(const char *name); + +/* + * Decrement the given typeDB entry's refcount. + * If zero then remove the entry from the typeDB. + */ +bool typedb_dealloc(struct tdb_entry *entry); + +/** + * Return the type ID from the given tdb_entry, + * or return DPI_APP_TYPE_NONE if the given entry doesn't exist. + */ +uint32_t typedb_entry_get_id(struct tdb_entry *e); + +#endif /* NPF_TYPEDB_H */ diff --git a/src/npf/fragment/ipv4_frag_tbl.c b/src/npf/fragment/ipv4_frag_tbl.c index f7672520..0c5d0052 100644 --- a/src/npf/fragment/ipv4_frag_tbl.c +++ b/src/npf/fragment/ipv4_frag_tbl.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. */ @@ -56,7 +56,7 @@ #include "snmp_mib.h" #include "util.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" struct cds_lfht; diff --git a/src/npf/fragment/ipv4_frag_tbl.h b/src/npf/fragment/ipv4_frag_tbl.h index b6056027..f1dcc9e1 100644 --- a/src/npf/fragment/ipv4_frag_tbl.h +++ b/src/npf/fragment/ipv4_frag_tbl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. */ @@ -50,7 +50,7 @@ #include #include "urcu.h" -#include "vrf.h" +#include "vrf_internal.h" struct cds_lfht; struct vrf; diff --git a/src/npf/fragment/ipv4_rsmbl.c b/src/npf/fragment/ipv4_rsmbl.c index ed85a078..75452d8d 100644 --- a/src/npf/fragment/ipv4_rsmbl.c +++ b/src/npf/fragment/ipv4_rsmbl.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. */ @@ -54,10 +54,12 @@ #include "ip_funcs.h" #include "ipv4_frag_tbl.h" #include "ipv4_rsmbl.h" -#include "pktmbuf.h" +#include "pipeline.h" +#include "pl_fused_gen.h" +#include "pktmbuf_internal.h" #include "snmp_mib.h" #include "util.h" -#include "vrf.h" +#include "vrf_internal.h" struct cds_lfht; @@ -175,33 +177,72 @@ ipv4_frag_process(struct cds_lfht *frag_tables, struct ipv4_frag_pkt *fp, { uint32_t idx = 0; vrfid_t vrf_id = pktmbuf_get_vrf(mb); + struct pl_packet pkt = { + .mbuf = mb, + .l2_pkt_type = pkt_mbuf_get_l2_traffic_type(mb), + }; + unsigned int i; /* Lock the frag pkt */ rte_spinlock_lock(&fp->pkt_lock); - fp->frag_size += len; - if (ofs == 0) { - /* this is the first fragment. */ - idx = (fp->frags[FIRST_FRAG_IDX].mb == NULL) ? - FIRST_FRAG_IDX : UINT32_MAX; + /* is this a repeat of the first fragment? */ + if (fp->frags[FIRST_FRAG_IDX].mb == NULL) { + idx = FIRST_FRAG_IDX; + } else { + rte_pktmbuf_free(mb); + mb = NULL; + goto done; + } } else if (more_frags == 0) { /* this is the last fragment. */ fp->total_size = ofs + len; idx = (fp->frags[LAST_FRAG_IDX].mb == NULL) ? LAST_FRAG_IDX : UINT32_MAX; } else { - /* this is the intermediate fragment. */ + /* this is an intermediate fragment. */ idx = fp->last_idx; + /* + * Check if its a duplicate intermediate fragment + * by checking the offset of all previous fragments + */ + for (i = 0; i < fp->last_idx; i++) { + if (fp->frags[i].ofs == ofs) { + rte_pktmbuf_free(mb); + mb = NULL; + goto done; + } + + if (fp->frags[i].ofs < ofs && + (fp->frags[i].ofs + fp->frags[i].len) > ofs) { + /* + * We already have a fragment that includes + * the start byte of this one. + */ + rte_pktmbuf_free(mb); + mb = NULL; + goto done; + } + + if (fp->frags[i].ofs < (ofs + len) && + (fp->frags[i].ofs + fp->frags[i].len) > + (ofs + len)) { + /* + * We already have a fragment that includes + * the end byte of this one. + */ + rte_pktmbuf_free(mb); + mb = NULL; + goto done; + } + } + if (idx < ARRAY_SIZE(fp->frags)) fp->last_idx++; } - /* - * errorneous packet: either exceeed max allowed number of fragments, - * or duplicate first/last fragment encountered. - * TODO: Could issue ICMP Packet Too Big. Probably not necessary - */ + /* errorneous packet: exceeded max allowed number of fragments */ if (idx >= ARRAY_SIZE(fp->frags)) { ipv4_frag_free(frag_tables, fp); IPSTAT_INC(vrf_id, IPSTATS_MIB_REASMFAILS); @@ -210,15 +251,22 @@ ipv4_frag_process(struct cds_lfht *frag_tables, struct ipv4_frag_pkt *fp, goto done; } + if (unlikely(!pipeline_fused_l2_consume(&pkt))) { + mb = NULL; + goto done; + } + IPSTAT_INC(vrf_id, IPSTATS_MIB_REASMREQDS); /* Remove session if we enqueue or reassemble */ pktmbuf_mdata_clear(mb, PKT_MDATA_SESSION_SENTRY); + fp->frag_size += len; fp->frags[idx].ofs = ofs; fp->frags[idx].len = len; fp->frags[idx].mb = mb; + mb = NULL; /* not all fragments are collected yet. */ @@ -314,7 +362,7 @@ struct rte_mbuf *ipv4_handle_fragment(struct rte_mbuf *m) /* prepare mbuf: setup l2_len/l3_len. */ ip = iphdr(m); hlen = ip->ihl << 2; - m->l2_len = ETHER_HDR_LEN; + m->l2_len = RTE_ETHER_HDR_LEN; m->l3_len = hlen; mo = ipv4_frag_mbuf(m); diff --git a/src/npf/fragment/ipv4_rsmbl.h b/src/npf/fragment/ipv4_rsmbl.h index 8e5c4628..1cf46157 100644 --- a/src/npf/fragment/ipv4_rsmbl.h +++ b/src/npf/fragment/ipv4_rsmbl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -9,7 +9,7 @@ #ifndef IPV4_RSMBL_H #define IPV4_RSMBL_H -#include "vrf.h" +#include "vrf_internal.h" struct vrf; diff --git a/src/npf/fragment/ipv6_rsmbl.c b/src/npf/fragment/ipv6_rsmbl.c index 52a9666f..4b5a2896 100644 --- a/src/npf/fragment/ipv6_rsmbl.c +++ b/src/npf/fragment/ipv6_rsmbl.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -25,10 +25,10 @@ #include "npf/fragment/ipv6_rsmbl_tbl.h" #include "npf/npf_cache.h" #include "npf/npf_session.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "snmp_mib.h" /* IPv6 stats */ #include "util.h" -#include "vrf.h" +#include "vrf_internal.h" struct cds_lfht; @@ -228,6 +228,11 @@ ipv6_frag_process(struct cds_lfht *frag_table, struct ipv6_frag_pkt *fp, struct ip6_hdr *ip6; uint32_t idx = 0; vrfid_t vrf_id = pktmbuf_get_vrf(m); + struct pl_packet pkt = { + .mbuf = m, + .l2_pkt_type = pkt_mbuf_get_l2_traffic_type(m), + }; + unsigned int i; /* * Payload length (everything after the initial IPv6 hdr) @@ -265,8 +270,13 @@ ipv6_frag_process(struct cds_lfht *frag_table, struct ipv6_frag_pkt *fp, /* * First fragment */ - idx = (fp->frags[FIRST_FRAG_IDX].mb == NULL) ? - FIRST_FRAG_IDX : UINT32_MAX; + if (fp->frags[FIRST_FRAG_IDX].mb == NULL) { + idx = FIRST_FRAG_IDX; + } else { + rte_pktmbuf_free(m); + m = NULL; + goto done; + } /* * 'fp->frag_size' is the accumulated number of bytes * that will comprise the reassembled packet. @@ -280,7 +290,7 @@ ipv6_frag_process(struct cds_lfht *frag_table, struct ipv6_frag_pkt *fp, fp->last_unfrg_hlen = npc->last_unfrg_hlen; fp->last_unfrg_hofs = npc->last_unfrg_hofs; - fp->first_frg_proto = npc->npc_next_proto; + fp->first_frg_proto = npf_cache_ipproto(npc); } else if (!npc->fh_more) { /* * Last fragment @@ -301,6 +311,43 @@ ipv6_frag_process(struct cds_lfht *frag_table, struct ipv6_frag_pkt *fp, * Intermediate fragment */ idx = fp->last_idx; + /* + * Check if its a duplicate intermediate fragment + * by checking the offset of all previous fragments + */ + for (i = 0; i < fp->last_idx; i++) { + if (fp->frags[i].ofs == npc->fh_offset) { + rte_pktmbuf_free(m); + m = NULL; + goto done; + } + + if (fp->frags[i].ofs < npc->fh_offset && + (fp->frags[i].ofs + fp->frags[i].len) > + npc->fh_offset) { + /* + * We already have a fragment that includes + * the start byte of this one. + */ + rte_pktmbuf_free(m); + m = NULL; + goto done; + } + + if (fp->frags[i].ofs < + (npc->fh_offset + (plen - extra_hlen)) && + (fp->frags[i].ofs + fp->frags[i].len) > + (npc->fh_offset + (plen - extra_hlen))) { + /* + * We already have a fragment that includes + * the end byte of this one. + */ + rte_pktmbuf_free(m); + m = NULL; + goto done; + } + + } if (idx < IPV6_MAX_FRAGS_PER_SET) fp->last_idx++; @@ -315,8 +362,8 @@ ipv6_frag_process(struct cds_lfht *frag_table, struct ipv6_frag_pkt *fp, fp->mtu = plen + sizeof(struct ip6_hdr); /* - * errorneous packet: either exceeed max allowed number of - * fragments, or duplicate first/last fragment encountered. + * errorneous packet: exceeded max allowed number of + * fragments. */ if (idx >= ARRAY_SIZE(fp->frags) || fp->frags[idx].mb != NULL) { @@ -327,12 +374,18 @@ ipv6_frag_process(struct cds_lfht *frag_table, struct ipv6_frag_pkt *fp, goto done; } + if (unlikely(!pipeline_fused_l2_consume(&pkt))) { + m = NULL; + goto done; + } + fp->frags[idx].ofs = npc->fh_offset; /* Payload bytes in this fragment */ fp->frags[idx].len = plen - extra_hlen; fp->frags[idx].mb = m; pktmbuf_mdata_clear(m, PKT_MDATA_SESSION_SENTRY); + m = NULL; if (likely(fp->frag_size < fp->total_size)) { @@ -361,11 +414,11 @@ ipv6_frag_process(struct cds_lfht *frag_table, struct ipv6_frag_pkt *fp, npc->last_unfrg_hofs = fp->last_unfrg_hofs; /* - * Update the caches next proto field, since - * the next proto is no longer a fragmentation + * Update the caches final proto field, since + * the final proto is no longer a fragmentation * header. */ - npc->npc_next_proto = fp->first_frg_proto; + npc->npc_proto_final = fp->first_frg_proto; /* * Retrieve the senders mtu value that we @@ -411,8 +464,9 @@ ipv6_frag_mbuf(struct rte_mbuf *m, npf_cache_t *npc, /* * Key is 9 words - src, dst and fragmentation identifier */ - assert(sizeof(key.src_dst) == sizeof(ip6->ip6_src) + - sizeof(ip6->ip6_dst)); + static_assert(sizeof(key.src_dst) == sizeof(ip6->ip6_src) + + sizeof(ip6->ip6_dst), + "ipv6 frag key is wrong size"); memcpy(key.src_dst, &ip6->ip6_src, sizeof(ip6->ip6_src)); memcpy(&key.src_dst[IPV6_FRAG_KEY_WORDS/2], &ip6->ip6_dst, sizeof(ip6->ip6_dst)); @@ -456,7 +510,7 @@ struct rte_mbuf * ipv6_handle_fragment(struct rte_mbuf *m, uint16_t *npf_flag) { npf_cache_t *npc; - uint16_t gleaned_mtu = ETHER_MTU; + uint16_t gleaned_mtu = RTE_ETHER_MTU; if (!m || (*npf_flag & NPF_FLAG_CACHE_EMPTY) != 0) return m; diff --git a/src/npf/fragment/ipv6_rsmbl.h b/src/npf/fragment/ipv6_rsmbl.h index 99759707..e05bec17 100644 --- a/src/npf/fragment/ipv6_rsmbl.h +++ b/src/npf/fragment/ipv6_rsmbl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -12,7 +12,7 @@ #include #include "ipv4_rsmbl.h" /* LAST_FRAG_IDX etc. */ -#include "vrf.h" +#include "vrf_internal.h" struct rte_mbuf; diff --git a/src/npf/fragment/ipv6_rsmbl_tbl.c b/src/npf/fragment/ipv6_rsmbl_tbl.c index 5a14606e..a2a6fe64 100644 --- a/src/npf/fragment/ipv6_rsmbl_tbl.c +++ b/src/npf/fragment/ipv6_rsmbl_tbl.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -21,7 +21,7 @@ #include "npf/fragment/ipv6_rsmbl_tbl.h" #include "util.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" struct cds_lfht; diff --git a/src/npf/fragment/ipv6_rsmbl_tbl.h b/src/npf/fragment/ipv6_rsmbl_tbl.h index a97e1b3d..87f5a18b 100644 --- a/src/npf/fragment/ipv6_rsmbl_tbl.h +++ b/src/npf/fragment/ipv6_rsmbl_tbl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -14,7 +14,7 @@ #include "npf/fragment/ipv6_rsmbl.h" #include "urcu.h" -#include "vrf.h" +#include "vrf_internal.h" struct cds_lfht; struct vrf; diff --git a/src/npf/grouper2.c b/src/npf/grouper2.c index bb3f5c6b..6c3fe05d 100644 --- a/src/npf/grouper2.c +++ b/src/npf/grouper2.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * diff --git a/src/npf/grouper2.h b/src/npf/grouper2.h index 25a06c6e..f20accd6 100644 --- a/src/npf/grouper2.h +++ b/src/npf/grouper2.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * diff --git a/src/npf/nat/nat_cmd_cfg.c b/src/npf/nat/nat_cmd_cfg.c index fde23fbe..a8a4cf52 100644 --- a/src/npf/nat/nat_cmd_cfg.c +++ b/src/npf/nat/nat_cmd_cfg.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -38,7 +38,7 @@ #include "commands.h" #include "compiler.h" -#include "config.h" +#include "config_internal.h" #include "util.h" #include "vplane_log.h" diff --git a/src/npf/nat/nat_cmd_op.c b/src/npf/nat/nat_cmd_op.c index 77370928..ee937995 100644 --- a/src/npf/nat/nat_cmd_op.c +++ b/src/npf/nat/nat_cmd_op.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -14,7 +14,7 @@ #include "commands.h" #include "compiler.h" -#include "config.h" +#include "config_internal.h" #include "util.h" #include "vplane_log.h" diff --git a/src/npf/nat/nat_pool.c b/src/npf/nat/nat_pool.c index 1704ff72..64ce59fe 100644 --- a/src/npf/nat/nat_pool.c +++ b/src/npf/nat/nat_pool.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -27,7 +27,7 @@ #include "npf/nat/nat_cmd_cfg.h" #include "npf/nat/nat_pool_event.h" #include "npf/nat/nat_pool.h" - +#include "npf/cgnat/cgn_log.h" /* * NAT pool. Each pool contains: @@ -40,7 +40,7 @@ * nat_pool.c handles: * * 1. nat pool configuration - * 2. nat pool table management (name based lookup; master thread only) + * 2. nat pool table management (name based lookup; main thread only) * * nat pool entries are stored in a hash table for lookup during * config. @@ -76,7 +76,7 @@ static struct cds_lfht *nat_pool_ht; * NAT pool configuration */ struct nat_pool_cfg { - char *np_name; + char np_name[NAT_POOL_NAME_MAX]; enum nat_pool_type np_type; /* Config for address allocation */ @@ -99,13 +99,25 @@ struct nat_pool_cfg { struct nat_pool_range np_range[NAT_POOL_MAX_RANGES]; /* Address group name */ - char *np_blacklist_name; + const char *np_blocklist_name; }; struct match { const char *name; }; +/* NAT pool threshold, time, and timer */ +static int32_t np_threshold_cfg; /* configured percent */ +static uint32_t np_threshold_time; + +/* Forward references */ +static void np_threshold_timer_expiry( + struct rte_timer *timer __unused, + void *arg); +static void nat_pool_client_counts(struct nat_pool *np, uint32_t *nusers, + uint64_t *naddrs); +static void nat_pool_destroy(struct nat_pool *np, bool rcu_free); + /* Get pool name */ char *nat_pool_name(struct nat_pool *np) { @@ -125,32 +137,27 @@ bool nat_pool_log_pba(struct nat_pool *np) return !np || np->np_log_pba; } -/* - * Increment number of users of this pool. Typically this is a count of the - * number of subscribers addresses in policies or rules that use this pool. - */ -void nat_pool_incr_nusers(struct nat_pool *np, uint32_t naddrs) +/* Is this a blocked address? */ +bool +nat_pool_is_blocked_addr(struct nat_pool *np, uint32_t addr) { - if (np) { - np->np_nusers++; - np->np_nuser_addrs += naddrs; - } + return np->np_blocklist && + npf_addrgrp_lookup_v4_by_handle(np->np_blocklist, addr) == 0; } -void nat_pool_decr_nusers(struct nat_pool *np, uint32_t naddrs) +/* + * Check if an address in in a NAT pool. 'addr' is in network-byte order. + */ +bool nat_pool_is_pool_addr(const struct nat_pool *np, uint32_t addr) { - if (np) { - np->np_nusers--; - np->np_nuser_addrs -= naddrs; - } -} + if (!np || !np->np_ranges || !np->np_ranges->nr_ag) + return false; -/* Is this a blacklisted address? */ -bool -nat_pool_is_blacklist_addr(struct nat_pool *np, uint32_t addr) -{ - return np->np_blacklist != NULL && - npf_addrgrp_lookup_v4(np->np_blacklist, addr) == 0; + /* Is addr in one of this pools address ranges? */ + if (npf_addrgrp_lookup_v4_by_handle(np->np_ranges->nr_ag, addr) == 0) + return true; + + return false; } /* Is NAT pool active? */ @@ -197,9 +204,9 @@ void nat_pool_clear_active(struct nat_pool *np) static ulong nat_pool_hash(const char *name) { int len = strlen(name); - char __name[len] __rte_aligned(sizeof(uint32_t)); + char __name[RTE_ALIGN(len, 4)] __rte_aligned(sizeof(uint32_t)); - memcpy(__name, name, len + 1); + memcpy(__name, name, len); return rte_jhash(__name, len, 0); } @@ -247,15 +254,124 @@ struct nat_pool *nat_pool_lookup(const char *name) void nat_pool_clear_addr_hints(struct nat_pool *np) { struct nat_pool_ranges *nr = np->np_ranges; - uint i; + enum nat_proto proto; if (!nr) return; - for (i = NAT_PROTO_FIRST; i <= NAT_PROTO_LAST; i++) - rte_atomic32_set(&nr->nr_addr_hint[i], 0); + for (proto = NAT_PROTO_FIRST; proto <= NAT_PROTO_LAST; proto++) + rte_atomic32_set(&nr->nr_addr_hint[proto], 0); +} + +/* + * Create an address-group from a NAT pool range set. + */ +static struct npf_addrgrp * +nat_pool_ranges_to_addrgrp(struct nat_pool_ranges *nr, const char *np_name) +{ + char ag_name[NAT_POOL_NAME_MAX + 5]; + struct npf_addrgrp *ag; + uint32_t addr1, addr2; + uint i; + int rc; + + /* + * The data model prevents address-group names from starting with an + * underscore, so we are guaranteed that the name used for the + * address-group is unique. + */ + snprintf(ag_name, sizeof(ag_name), "_%s_AG", np_name); + + /* Create address group and add to table set */ + ag = npf_addrgrp_cfg_add(ag_name); + if (!ag) + return NULL; + + for (i = 0; i < nr->nr_nranges; i++) { + struct nat_pool_range *pr = &nr->nr_range[i]; + + /* Pool addresses are in host order */ + addr1 = htonl(pr->pr_addr_start); + addr2 = htonl(pr->pr_addr_stop); + + if (addr1 == addr2) + rc = npf_addrgrp_prefix_insert(ag_name, + (npf_addr_t *)&addr1, + 4, 32); + else + rc = npf_addrgrp_range_insert(ag_name, + (npf_addr_t *)&addr1, + (npf_addr_t *)&addr2, + 4); + + if (rc < 0) { + npf_addrgrp_cfg_delete(ag_name); + return NULL; + } + } + + /* + * Take a reference on the address-group and then remove the addr-grp + * from the table set. + * + * After this the address-group is hidden in that it cannot be found + * in the table set. The only thing keeping it in existence is the + * reference held by the NAT pool ranges structure. + */ + npf_addrgrp_get(ag); + npf_addrgrp_cfg_delete(ag_name); + + return ag; +} + +/* + * Convert prefix or subnet to address range . Given a NAT pool address range + * in prefix format, determine the first and last addresses in the range. + * + * Users will typically use a prefix address range to assign a small number of + * addresses out of a larger /24 subnet. Several CGNAT policies may be + * configured, each with adjoining address prefixes. In these cases we do + * *not* want to lose the first and last address in the prefix range. + * Exceptions are when the first address is .0 or the last address is .255. + * + * If the user has specified a subnet address range then we do not use the + * first and last addresses. + */ +static void +nat_pool_prefix_setup_addr_start_stop(struct nat_pool_range *pr) +{ + if (pr->pr_mask == 32) { + pr->pr_addr_start = pr->pr_prefix; + pr->pr_addr_stop = pr->pr_prefix; + return; + } + + uint32_t first, last, mask; + + first = pr->pr_prefix; + mask = 0xFFFFFFFFUL << (32 - pr->pr_mask); + last = (first | ~mask); + first = (first & mask); + + if (pr->pr_mask < 31) { + /* + * Only use the first or last address if configured to do so, + * and they do not result in the last byte being 0 or 255. + */ + if (pr->pr_type == NPA_SUBNET || (first & 0xFF) == 0) + first += 1; + + if (pr->pr_type == NPA_SUBNET || (last & 0xFF) == 255) + last -= 1; + } + + pr->pr_addr_start = first; + pr->pr_addr_stop = last; } +/* + * Create a NAT address pool range structure from configuration + */ static struct nat_pool_ranges * nat_pool_create_ranges(struct nat_pool_cfg *cfg, int *error) { @@ -278,35 +394,71 @@ nat_pool_create_ranges(struct nat_pool_cfg *cfg, int *error) /* Copy address ranges */ for (i = 0; i < cfg->np_nranges; i++) { - memcpy(&nr->nr_range[i], &cfg->np_range[i], - sizeof(struct nat_pool_range)); + struct nat_pool_range *pr = &nr->nr_range[i]; - nr->nr_naddrs += (nr->nr_range[i].pr_addr_stop - - nr->nr_range[i].pr_addr_start + 1); - nr->nr_range[i].pr_range = i; + memcpy(pr, &cfg->np_range[i], sizeof(*pr)); - if (cfg->np_range[i].pr_name) - nr->nr_range[i].pr_name = - strdup(cfg->np_range[i].pr_name); + /* Convert prefix or subnet to address range */ + if (pr->pr_type == NPA_PREFIX || pr->pr_type == NPA_SUBNET) + nat_pool_prefix_setup_addr_start_stop(pr); + + pr->pr_naddrs = pr->pr_addr_stop - pr->pr_addr_start + 1; + nr->nr_naddrs += pr->pr_naddrs; + pr->pr_range = i; + pr->pr_shared = cfg->np_range[i].pr_shared; + + /* Copy range name */ + strcpy(pr->pr_name, cfg->np_range[i].pr_name); } - for (i = NAT_PROTO_FIRST; i <= NAT_PROTO_LAST; i++) - rte_atomic32_set(&nr->nr_addr_hint[i], 0); + enum nat_proto p; + for (p = NAT_PROTO_FIRST; p <= NAT_PROTO_LAST; p++) + rte_atomic32_set(&nr->nr_addr_hint[p], 0); + + /* + * Create a 'hidden' address-group from the set of address ranges. + * This is used to quickly test if an address in in a NAT pool, for + * example when ICMP Echo Requests are received on the outside + * interface. + */ + nr->nr_ag = nat_pool_ranges_to_addrgrp(nr, cfg->np_name); + assert(nr->nr_ag); return nr; } -static void nat_pool_free_ranges(struct nat_pool_ranges *ranges) +static void nat_pool_rcu_free_ranges(struct rcu_head *head) { - uint i; + struct nat_pool_ranges *nr; + + nr = caa_container_of(head, struct nat_pool_ranges, nr_rcu_head); + free(nr); +} - if (!ranges) +/* + * If the parent pool structure is being freed then this function will be + * called from an rcu_callback, in which case we free 'nr' immediately. + * + * If the ranges of an existing pool are just being re-configured, then we + * want to rcu-free 'nr'. (This will occur after we rcu_xchg_pointer the + * np_ranges pointer in the NAT pool.) + */ +static void nat_pool_free_ranges(struct nat_pool_ranges *nr, bool rcu_free) +{ + struct npf_addrgrp *ag; + + if (!nr) return; - for (i = 0; i < ranges->nr_nranges; i++) - free(ranges->nr_range[i].pr_name); + /* Release reference on ranges address-group */ + ag = rcu_xchg_pointer(&nr->nr_ag, NULL); + if (ag) + npf_addrgrp_put(ag); - free(ranges); + if (rcu_free) + call_rcu(&nr->nr_rcu_head, nat_pool_rcu_free_ranges); + else + free(nr); } /* @@ -340,7 +492,7 @@ nat_pool_update_ranges(struct nat_pool *np, struct nat_pool_ranges *new) /* Loop through 'new' ranges to find matching range name */ for (j = 0; j < new->nr_nranges; j++) { if (!strcmp(new->nr_range[j].pr_name, - old->nr_range[i].pr_name)) { + old->nr_range[i].pr_name)) { /* Found match. Has it changed? */ if (new->nr_range[j].pr_type != old->nr_range[i].pr_type || @@ -364,7 +516,9 @@ nat_pool_update_ranges(struct nat_pool *np, struct nat_pool_ranges *new) nat_pool_clear_active(np); old = rcu_xchg_pointer(&np->np_ranges, new); - nat_pool_free_ranges(old); + + /* rcu-free old np_ranges */ + nat_pool_free_ranges(old, true); return 0; } @@ -386,11 +540,11 @@ static struct nat_pool *nat_pool_create(struct nat_pool_cfg *cfg, int *error) np->np_ranges = nat_pool_create_ranges(cfg, error); if (!np->np_ranges) { - free(np); - return NULL; + *error = -ENOMEM; + goto error; } - np->np_name = strdup(cfg->np_name); + strcpy(np->np_name, cfg->np_name); /* Copy items from config */ np->np_type = cfg->np_type; @@ -402,10 +556,17 @@ static struct nat_pool *nat_pool_create(struct nat_pool_cfg *cfg, int *error) np->np_port_end = cfg->np_port_end; np->np_pa = cfg->np_pa; np->np_log_pba = cfg->np_log_pba; + np->np_blocklist = NULL; - if (cfg->np_blacklist_name) - np->np_blacklist = - npf_addrgrp_lookup_name(cfg->np_blacklist_name); + if (cfg->np_blocklist_name) { + /* We store a pointer the address group */ + np->np_blocklist = + npf_addrgrp_lookup_name(cfg->np_blocklist_name); + + /* Take reference on ag since we are storing ptr */ + if (np->np_blocklist) + npf_addrgrp_get(np->np_blocklist); + } /* Initialize non-config items */ rte_atomic32_init(&np->np_refcnt); @@ -422,15 +583,15 @@ static struct nat_pool *nat_pool_create(struct nat_pool_cfg *cfg, int *error) np->np_nports = np->np_port_end - np->np_port_start + 1; return np; +error: + nat_pool_destroy(np, false); + return NULL; } /* Free a nat pool */ static void nat_pool_free(struct nat_pool *np) { - if (np->np_name) - free(np->np_name); - - nat_pool_free_ranges(np->np_ranges); + nat_pool_free_ranges(np->np_ranges, false); np->np_ranges = NULL; free(np); @@ -461,7 +622,7 @@ static int nat_pool_insert(struct nat_pool *np) &np->np_node); /* - * This should never happen as entries are only added by master thread + * This should never happen as entries are only added by main thread */ if (node != &np->np_node) return -EEXIST; @@ -498,9 +659,21 @@ static int nat_pool_delete(struct nat_pool *np) return 0; } -static void nat_pool_destroy(struct nat_pool *np) +static void nat_pool_destroy(struct nat_pool *np, bool rcu_free) { - call_rcu(&np->np_rcu_head, nat_pool_rcu_free); + struct npf_addrgrp *ag; + + assert(rte_atomic32_read(&np->np_refcnt) == 0); + + /* Release reference on blocked address-group */ + ag = rcu_xchg_pointer(&np->np_blocklist, NULL); + if (ag) + npf_addrgrp_put(ag); + + if (rcu_free) + call_rcu(&np->np_rcu_head, nat_pool_rcu_free); + else + nat_pool_free(np); } /* @@ -519,7 +692,7 @@ void nat_pool_put(struct nat_pool *np) { assert(np); if (np && rte_atomic32_dec_and_test(&np->np_refcnt)) - nat_pool_destroy(np); + nat_pool_destroy(np, true); } /* @@ -544,7 +717,7 @@ int nat_pool_walk(nat_poolwalk_cb cb, void *data) /* * Get the range index that an address is in. Returns -1 if address is not in - * any range. + * any range. 'addr' is in host-byte order. */ int nat_pool_addr_range(struct nat_pool *np, uint32_t addr) { @@ -560,27 +733,35 @@ int nat_pool_addr_range(struct nat_pool *np, uint32_t addr) } /* - * Get next address in pool after the given address + * Get next address in pool after the given address. If the given addr is 0 + * then the first address in the first range is returned. */ -uint32_t nat_pool_next_addr(struct nat_pool *np, uint32_t addr) +uint32_t nat_pool_next_addr(struct nat_pool *np, uint32_t addr, + struct nat_pool_range **prp) { struct nat_pool_ranges *nr = np->np_ranges; uint range; int rc; - if (addr == 0) - /* Return first address */ + if (addr == 0) { + /* Return first address in first range */ + if (likely(prp != NULL)) + *prp = &nr->nr_range[0]; return nr->nr_range[0].pr_addr_start; + } /* Find the range that addr is in */ rc = nat_pool_addr_range(np, addr); /* * If the ranges in the pool have changed, then return the - * first address + * first address in the first range */ - if (rc < 0) + if (rc < 0) { + if (likely(prp != NULL)) + *prp = &nr->nr_range[0]; return nr->nr_range[0].pr_addr_start; + } range = (uint)rc; @@ -590,34 +771,56 @@ uint32_t nat_pool_next_addr(struct nat_pool *np, uint32_t addr) range = 0; addr = nr->nr_range[range].pr_addr_start; } + if (likely(prp != NULL)) + *prp = &nr->nr_range[range]; return addr; } +static const char *nat_pool_range_type_str(enum nat_pool_range_type type) +{ + switch (type) { + case NPA_PREFIX: + return "prefix"; + case NPA_RANGE: + return "range"; + case NPA_SUBNET: + return "subnet"; + }; + return "unknown"; +} + /* * NAT pool range string */ static char *nat_pool_range_str(struct nat_pool_range *pr) { - static char str[40]; + static char str[70]; uint32_t addr; + char str1[20], str2[20]; - if (true || pr->pr_type == NPA_RANGE) { - char str1[20], str2[20]; + addr = htonl(pr->pr_addr_start); + inet_ntop(AF_INET, &addr, str1, sizeof(str1)); - addr = htonl(pr->pr_addr_start); - inet_ntop(AF_INET, &addr, str1, sizeof(str1)); + addr = htonl(pr->pr_addr_stop); + inet_ntop(AF_INET, &addr, str2, sizeof(str2)); - addr = htonl(pr->pr_addr_stop); - inet_ntop(AF_INET, &addr, str2, sizeof(str2)); + snprintf(str, sizeof(str), "%s - %s", str1, str2); - snprintf(str, sizeof(str), "%s-%s", str1, str2); - } else { - char str1[20]; + return str; +} - addr = htonl(pr->pr_prefix); - inet_ntop(AF_INET, &addr, str1, sizeof(str1)); - snprintf(str, sizeof(str), "%s/%u", str1, pr->pr_mask); - } +/* + * NAT pool prefix or subnet string + */ +static char *nat_pool_prefix_str(struct nat_pool_range *pr) +{ + static char str[30]; + char str1[20]; + uint32_t addr; + + addr = htonl(pr->pr_prefix); + inet_ntop(AF_INET, &addr, str1, sizeof(str1)); + snprintf(str, sizeof(str), "%s/%u", str1, pr->pr_mask); return str; } @@ -627,23 +830,56 @@ static char *nat_pool_range_str(struct nat_pool_range *pr) static void nat_pool_jsonw_ranges(json_writer_t *json, struct nat_pool *np) { - struct nat_pool_ranges *nr = np->np_ranges; + struct nat_pool_ranges *nr = rcu_dereference(np->np_ranges); uint i; jsonw_uint_field(json, "naddrs", nr->nr_naddrs); + jsonw_uint_field(json, "used", rte_atomic32_read(&nr->nr_used)); jsonw_name(json, "address_ranges"); jsonw_start_array(json); for (i = 0; i < nr->nr_nranges; i++) { + struct nat_pool_range *pr = &nr->nr_range[i]; + jsonw_start_object(json); - jsonw_string_field(json, "name", nr->nr_range[i].pr_name); - jsonw_string_field(json, "range", - nat_pool_range_str(&nr->nr_range[i])); - jsonw_uint_field(json, "naddrs", nr->nr_range[i].pr_naddrs); + jsonw_string_field(json, "name", pr->pr_name); + jsonw_string_field(json, "type", + nat_pool_range_type_str(pr->pr_type)); + + /* Display the address range for all types */ + jsonw_string_field(json, "range", nat_pool_range_str(pr)); + + jsonw_uint_field(json, "naddrs", pr->pr_naddrs); + + if (pr->pr_type == NPA_PREFIX) + jsonw_string_field(json, "prefix", + nat_pool_prefix_str(pr)); + else if (pr->pr_type == NPA_SUBNET) + jsonw_string_field(json, "subnet", + nat_pool_prefix_str(pr)); + jsonw_end_object(json); } jsonw_end_array(json); + + /* + * Add json for hidden NAT pool address-group. + * + * We use the generic address group code to format the json for this + * hidden group. The per address-group json is normally an object + * within an array. We dont have the array here, so need to name the + * json object. + */ + struct npf_addrgrp *ag = rcu_dereference(nr->nr_ag); + if (ag) { + struct npf_show_ag_ctl ctl = { 0 }; + ctl.af[AG_IPv4] = true; + ctl.detail = true; + + jsonw_name(json, "address-group"); + npf_addrgrp_jsonw_one(json, ag, &ctl); + } } /* @@ -729,7 +965,7 @@ static const char *nat_pool_port_allcn_str(enum nat_port_allcn pa) static void nat_pool_jsonw_one(json_writer_t *json, struct nat_pool *np) { - int i; + const char *name; jsonw_start_object(json); @@ -754,34 +990,41 @@ nat_pool_jsonw_one(json_writer_t *json, struct nat_pool *np) jsonw_uint_field(json, "mbpu", np->np_mbpu); /* Number of users (eg cgnat policies) sharing this pool */ - jsonw_uint_field(json, "nusers", np->np_nusers); + uint32_t nusers = 0; + uint64_t nuser_addrs = 0UL; + + nat_pool_client_counts(np, &nusers, &nuser_addrs); + + jsonw_uint_field(json, "nusers", nusers); /* Total private addresses sharing this pool */ - jsonw_uint_field(json, "nuser_addrs", np->np_nuser_addrs); + jsonw_uint_field(json, "nuser_addrs", nuser_addrs); nat_pool_jsonw_mappings(json, np); nat_pool_jsonw_pba(json, np); - if (np->np_blacklist) { - char *name = npf_addrgrp_handle2name(np->np_blacklist); - jsonw_string_field(json, "blacklist", - name ? name : "(unknown)"); - } + name = npf_addrgrp_handle2name(np->np_blocklist); + if (name) + jsonw_string_field(json, "blacklist", name); + + jsonw_bool_field(json, "log_pba", np->np_log_pba); /* deprecated */ + jsonw_bool_field(json, "log_all", false); /* deprecated */ - jsonw_bool_field(json, "log_pba", np->np_log_pba); - jsonw_bool_field(json, "log_all", np->np_full); + /* Are all nat pool addrs in-use? */ + jsonw_bool_field(json, "full", np->np_full); jsonw_name(json, "current"); jsonw_start_object(json); - for (i = NAT_PROTO_FIRST; i <= NAT_PROTO_LAST; i++) { + enum nat_proto p; + for (p = NAT_PROTO_FIRST; p <= NAT_PROTO_LAST; p++) { static char str[16]; uint32_t addr; - addr = nat_pool_hint(np, i); + addr = nat_pool_hint(np, p); addr = htonl(addr); inet_ntop(AF_INET, &addr, str, sizeof(str)); - jsonw_string_field(json, nat_proto_lc_str(i), str); + jsonw_string_field(json, nat_proto_lc_str(p), str); } jsonw_end_object(json); jsonw_end_object(json); @@ -822,7 +1065,65 @@ void nat_pool_show(FILE *f, int argc __unused, char **argv __unused) } /* - * Parse address and mask. 'item' is either "prefix" or "address-mask". + * Parse comma separated list of address range options: "a=yes,b=no" etc. + */ +static int +nat_pool_cfg_parse_range_opts(char *opts, struct nat_pool_range *pr) +{ + char *sep; + char *item, *value; + + /* Address range option defaults */ + pr->pr_shared = true; + + if (!opts) + return 0; + + /* First item */ + item = opts; + + /* NULL terminate the first "item=value" if necessary */ + sep = strstr(opts, ","); + if (sep) { + *sep = '\0'; + opts = sep + 1; + } else + opts = NULL; + + while (true) { + /* item=value */ + sep = strstr(item, "="); + if (!sep) + return -EINVAL; + + *sep = '\0'; + value = sep + 1; + + /* Sharing of addresses in this range */ + if (!strcmp(item, "shared")) { + if (!strcmp(value, "no")) + pr->pr_shared = false; + } + + if (!opts) + break; + + /* Next option ... */ + item = opts; + + sep = strstr(opts, ","); + if (sep) { + *sep = '\0'; + opts = sep + 1; + } else + opts = NULL; + } + + return 0; +} + +/* + * Parse address and mask. 'item' is either "prefix" or "subnet". * * Value is of format "RANGE2/10.1.1.0/24" */ @@ -832,58 +1133,57 @@ nat_pool_cfg_parse_prefix(char *item, char *value, struct nat_pool_range *pr) npf_netmask_t mask; sa_family_t fam; npf_addr_t addr; - char *sep; + char *sep, *prefix, *opts = NULL; bool negate; int rc; if (!strcmp(item, "prefix")) pr->pr_type = NPA_PREFIX; + else if (!strcmp(item, "subnet")) + pr->pr_type = NPA_SUBNET; else return -EINVAL; sep = strstr(value, "/"); if (sep) { *sep = '\0'; - pr->pr_name = value; - value = sep+1; + + /* Copy range name */ + strncpy(pr->pr_name, value, sizeof(pr->pr_name)); + pr->pr_name[NAT_POOL_NAME_MAX - 1] = '\0'; + + /* Pointer to prefix string */ + prefix = sep + 1; } else return -EINVAL; - rc = npf_parse_ip_addr(value, &fam, &addr, &mask, &negate); + /* Remember where the options start. These will be parsed later */ + sep = strstr(prefix, ","); + if (sep) { + *sep = '\0'; + opts = sep + 1; + } + + /* Parse prefix string */ + rc = npf_parse_ip_addr(prefix, &fam, &addr, &mask, &negate); if (rc < 0) return -EINVAL; pr->pr_prefix = NPF_ADDR_TO_UINT32(&addr); pr->pr_mask = MIN(mask, 32); - /* Convert prefix to address range */ - if (pr->pr_mask == 32) { - pr->pr_addr_start = pr->pr_prefix; - pr->pr_addr_stop = pr->pr_prefix; - } else { - uint32_t first, last, mask; - - first = pr->pr_prefix; - mask = 0xFFFFFFFFUL << (32 - pr->pr_mask); - last = (first | ~mask); - first = (first & mask); - - if (pr->pr_mask < 31) { - /* Do not use first or last address */ - first += 1; - last -= 1; - } + rc = nat_pool_cfg_parse_range_opts(opts, pr); - pr->pr_addr_start = first; - pr->pr_addr_stop = last; - } - return 0; + return rc; } /* * Parse address range. * * Value is of format "RANGE1/1.1.1.1-1.1.1.4" + * + * optional per-range config is as follows: + * "RANGE1/1.1.1.1-1.1.1.4,shared=yes" */ static int nat_pool_cfg_parse_addr_range(char *item __unused, char *value, @@ -893,7 +1193,7 @@ nat_pool_cfg_parse_addr_range(char *item __unused, char *value, sa_family_t fam; npf_addr_t addr; bool negate; - char *sep; + char *sep, *range, *opts = NULL; int rc; pr->pr_type = NPA_RANGE; @@ -901,17 +1201,30 @@ nat_pool_cfg_parse_addr_range(char *item __unused, char *value, sep = strstr(value, "/"); if (sep) { *sep = '\0'; - pr->pr_name = value; - value = sep+1; + + /* Copy range name */ + strncpy(pr->pr_name, value, sizeof(pr->pr_name)); + pr->pr_name[NAT_POOL_NAME_MAX - 1] = '\0'; + + /* Pointer to address range string */ + range = sep + 1; } else return -EINVAL; - sep = strstr(value, "-"); + /* Remember where the options start. These will be parsed later */ + sep = strstr(range, ","); + if (sep) { + *sep = '\0'; + opts = sep + 1; + } + + /* Parse address range string */ + sep = strstr(range, "-"); if (!sep) return -EINVAL; *sep = '\0'; - char *first = value, *last = sep+1; + char *first = range, *last = sep + 1; rc = npf_parse_ip_addr(first, &fam, &addr, &mask, &negate); if (rc < 0) @@ -925,9 +1238,10 @@ nat_pool_cfg_parse_addr_range(char *item __unused, char *value, return -EINVAL; pr->pr_addr_stop = NPF_ADDR_TO_UINT32(&addr); - pr->pr_naddrs = pr->pr_addr_stop - pr->pr_addr_start + 1; - return 0; + rc = nat_pool_cfg_parse_range_opts(opts, pr); + + return rc; } /* @@ -1067,16 +1381,16 @@ nat_pool_cfg_parse_aa(char *item __unused, char *value, } /* - * Parse blacklist name. blacklist= + * Parse blocklist name. blacklist= */ static int -nat_pool_cfg_parse_blacklist(char *item __unused, char *value, +nat_pool_cfg_parse_blocklist(char *item __unused, char *value, struct nat_pool_cfg *cfg) { if (!npf_addrgrp_lookup_name(value)) return -EINVAL; - cfg->np_blacklist_name = value; + cfg->np_blocklist_name = value; return 0; } @@ -1125,7 +1439,7 @@ static int nat_pool_cfg_parse(FILE *f __unused, int argc, char **argv, item = argv[0]; value = sep+1; - if (!strcmp(item, "prefix")) { + if (!strcmp(item, "prefix") || !strcmp(item, "subnet")) { if (cfg->np_nranges >= NAT_POOL_MAX_RANGES) { rc = -EINVAL; @@ -1177,7 +1491,7 @@ static int nat_pool_cfg_parse(FILE *f __unused, int argc, char **argv, rc = nat_pool_cfg_parse_aa(item, value, cfg); else if (!strcmp(item, "blacklist")) - rc = nat_pool_cfg_parse_blacklist(item, value, cfg); + rc = nat_pool_cfg_parse_blocklist(item, value, cfg); if (rc) goto error; @@ -1193,10 +1507,15 @@ static int nat_pool_cfg_parse(FILE *f __unused, int argc, char **argv, /* * Create or update a nat pool + * + * nat-ut pool add POOL1 type=cgnat + * address-range=RANGE1/1.1.1.11-1.1.1.20 + * prefix=RANGE2/1.1.1.192/26 + * block-size=4096 max-blocks=32 log-pba=yes */ int nat_pool_cfg_add(FILE *f, int argc, char **argv) { - struct nat_pool *cur; + struct nat_pool *np; int rc = 0; /* Items are parsed and stored in cfg */ @@ -1207,25 +1526,28 @@ int nat_pool_cfg_add(FILE *f, int argc, char **argv) if (argc < 4) return -EINVAL; - cfg.np_name = argv[3]; + strncpy(cfg.np_name, argv[3], sizeof(cfg.np_name)); + cfg.np_name[NAT_POOL_NAME_MAX - 1] = '\0'; argc -= 4; argv += 4; - cur = nat_pool_lookup(cfg.np_name); + np = nat_pool_lookup(cfg.np_name); /* Setup defaults */ - if (cur) { - cfg.np_type = cur->np_type; - cfg.np_ap = cur->np_ap; - cfg.np_aa = cur->np_aa; - cfg.np_block_sz = cur->np_block_sz; - cfg.np_mbpu = cur->np_mbpu; - cfg.np_port_start = cur->np_port_start; - cfg.np_port_end = cur->np_port_end; - cfg.np_pa = cur->np_pa; - cfg.np_log_pba = cur->np_log_pba; + if (np) { + cfg.np_type = np->np_type; + cfg.np_ap = np->np_ap; + cfg.np_aa = np->np_aa; + cfg.np_block_sz = np->np_block_sz; + cfg.np_mbpu = np->np_mbpu; + cfg.np_port_start = np->np_port_start; + cfg.np_port_end = np->np_port_end; + cfg.np_pa = np->np_pa; + cfg.np_log_pba = np->np_log_pba; cfg.np_nranges = 0; - cfg.np_blacklist_name = NULL; + + cfg.np_blocklist_name = + npf_addrgrp_handle2name(np->np_blocklist); } else { cfg.np_type = NPT_CGNAT; @@ -1238,28 +1560,29 @@ int nat_pool_cfg_add(FILE *f, int argc, char **argv) cfg.np_pa = NAT_PA_SEQUENTIAL; cfg.np_log_pba = true; cfg.np_nranges = 0; + cfg.np_blocklist_name = NULL; } rc = nat_pool_cfg_parse(f, argc, argv, &cfg); if (rc < 0) goto error; - if (cur) { + if (np) { /* Update existing nat pool */ bool destructive_change = false; /* Some changes are disallowed on an active pool */ - if (cur->np_type != cfg.np_type || - cur->np_block_sz != cfg.np_block_sz || - cur->np_ap != cfg.np_ap || - cur->np_aa != cfg.np_aa || - cur->np_port_start != cfg.np_port_start || - cur->np_port_end != cfg.np_port_end) + if (np->np_type != cfg.np_type || + np->np_block_sz != cfg.np_block_sz || + np->np_ap != cfg.np_ap || + np->np_aa != cfg.np_aa || + np->np_port_start != cfg.np_port_start || + np->np_port_end != cfg.np_port_end) destructive_change = true; /* Mark NAT pool as inactive (clears all mappings.) */ if (destructive_change) - nat_pool_clear_active(cur); + nat_pool_clear_active(np); /* * Update the address ranges of an existing nat pool. If @@ -1275,36 +1598,36 @@ int nat_pool_cfg_add(FILE *f, int argc, char **argv) goto error; /* This will free existing range struct if successful */ - rc = nat_pool_update_ranges(cur, nr); + rc = nat_pool_update_ranges(np, nr); if (rc < 0) { - nat_pool_free_ranges(nr); + nat_pool_free_ranges(nr, false); goto error; } } - cur->np_ap = cfg.np_ap; - cur->np_aa = cfg.np_aa; - cur->np_block_sz = cfg.np_block_sz; - cur->np_mbpu = cfg.np_mbpu; - cur->np_port_start = cfg.np_port_start; - cur->np_port_end = cfg.np_port_end; - cur->np_pa = cfg.np_pa; - cur->np_log_pba = cfg.np_log_pba; - - if (cfg.np_blacklist_name) - cur->np_blacklist = - npf_addrgrp_lookup_name(cfg.np_blacklist_name); - else - cur->np_blacklist = NULL; + np->np_ap = cfg.np_ap; + np->np_aa = cfg.np_aa; + np->np_block_sz = cfg.np_block_sz; + np->np_mbpu = cfg.np_mbpu; + np->np_port_start = cfg.np_port_start; + np->np_port_end = cfg.np_port_end; + np->np_pa = cfg.np_pa; + np->np_log_pba = cfg.np_log_pba; + + /* Has blocklist address-group changed? */ + const char *name = npf_addrgrp_handle2name(np->np_blocklist); + + npf_addrgrp_update_handle(name, cfg.np_blocklist_name, + &np->np_blocklist); /* State derived from config */ - cur->np_nports = cur->np_port_end - cur->np_port_start + 1; + np->np_nports = np->np_port_end - np->np_port_start + 1; /* Mark pool as active. This is a noop if already active. */ - nat_pool_set_active(cur); + nat_pool_set_active(np); } else { /* Create new nat pool and copy addresses from addr_array */ - struct nat_pool *np = nat_pool_create(&cfg, &rc); + np = nat_pool_create(&cfg, &rc); if (!np) { rc = -ENOMEM; @@ -1314,7 +1637,7 @@ int nat_pool_cfg_add(FILE *f, int argc, char **argv) /* Insert pool into hash table and take reference */ rc = nat_pool_insert(np); if (rc < 0) { - nat_pool_free(np); + nat_pool_destroy(np, false); goto error; } @@ -1323,6 +1646,9 @@ int nat_pool_cfg_add(FILE *f, int argc, char **argv) } rc = 0; + /* Set pool's warning threshold */ + np_threshold_set(np, NULL); + error: return rc; } @@ -1371,3 +1697,166 @@ void nat_pool_uninit(void) dp_ht_destroy_deferred(nat_pool_ht); nat_pool_ht = NULL; } + +/* + * Generate NAT pool threshold log + * and restart timer if required. + */ +static void np_threshold_log( + struct nat_pool *np, + int32_t val, int32_t max) +{ + cgn_log_resource_pool( + CGN_RESOURCE_THRESHOLD, np, val, max); + + if (np_threshold_time) + rte_timer_reset(&np->np_threshold_timer, + np_threshold_time * rte_get_timer_hz(), + SINGLE, rte_get_master_lcore(), + np_threshold_timer_expiry, np); +} + +/* + * Warn if over the configured nat pool threshold + */ +static void np_threshold_check(struct nat_pool *np, int32_t val) +{ + if (np->np_threshold && + np->np_threshold_been_below && + (val >= np->np_threshold) && + (!rte_timer_pending(&np->np_threshold_timer))) { + + np->np_threshold_been_below = false; + np_threshold_log(np, val, np->np_ranges->nr_naddrs); + } +} + +/* + * Set NAT pool threshold for one NAT pool + * + * threshold is in percent; interval is in seconds. + */ +int np_threshold_set(struct nat_pool *np, void *arg __unused) +{ + rte_timer_stop(&np->np_threshold_timer); + np->np_threshold = + (np->np_ranges->nr_naddrs * np_threshold_cfg + 99) / 100; + np->np_threshold_been_below = true; + + /* Warn if over configured threshold */ + int32_t val = rte_atomic32_read(&np->np_ranges->nr_used); + np_threshold_check(np, val); + + return 0; +} + +/* + * Set NAT pool threshold for all NAT pools + * + * threshold is in percent; interval is in seconds. + */ +void np_threshold_set_all(int32_t threshold, uint32_t interval) +{ + np_threshold_cfg = threshold; + np_threshold_time = interval; + nat_pool_walk(np_threshold_set, NULL); +} + +/* + * Handle NAT pool threshold timer expiry. + */ +static void np_threshold_timer_expiry( + struct rte_timer *timer __unused, + void *arg) +{ + struct nat_pool *np = arg; + + int32_t val = rte_atomic32_read(&np->np_ranges->nr_used); + + if (np->np_threshold && + (val >= np->np_threshold)) { + + np_threshold_log(np, val, np->np_ranges->nr_naddrs); + } +} + +/* + * Increment the threshold counter when taking a NAT pool entry. + * Generate a log if over the threshold. + */ +void np_threshold_get(struct nat_pool *np) +{ + int32_t val = rte_atomic32_add_return(&np->np_ranges->nr_used, 1); + + /* Warn if over configured threshold */ + np_threshold_check(np, val); +} + +/* + * Decrement the threshold counter when returning a NAT pool entry + */ +void np_threshold_put(struct nat_pool *np) +{ + int32_t val = rte_atomic32_sub_return(&np->np_ranges->nr_used, 1); + + if (val < np->np_threshold) + np->np_threshold_been_below = true; +} + +/************************************************************************** + * NAT Pool to Client API + **************************************************************************/ + +/* + * Fixed size array for holding client operations pointers. + */ +static struct np_client_ops *np_client_ops[NP_CLIENT_MAX_OPS]; + +/* Register client ops */ +bool nat_pool_client_register(const struct np_client_ops *ops) +{ + uint32_t i; + + /* Add client to first free space */ + for (i = 0; i < ARRAY_SIZE(np_client_ops); i++) { + if (!rcu_cmpxchg_pointer(&np_client_ops[i], NULL, + (struct np_client_ops *)ops)) + return true; + } + return false; +} + +/* Unregister client ops */ +void nat_pool_client_unregister(const struct np_client_ops *op) +{ + struct np_client_ops *ops = (struct np_client_ops *) op; + uint32_t i; + + for (i = 0; i < ARRAY_SIZE(np_client_ops); i++) { + if (rcu_cmpxchg_pointer(&np_client_ops[i], ops, NULL) == ops) + return; + } +} + +/* + * Get the number of users and addresses using this NAT pool. + * + * For example, if two CGNAT policies were using this pool then nusers would + * be two. + * + * naddrs is a count of all the possible source addresses for all the users + * that may use this pool. naddrs can be compared with the number of addresses + * in the pool to give a private-public address contention ratio. + */ +static void nat_pool_client_counts(struct nat_pool *np, uint32_t *nusers, + uint64_t *naddrs) +{ + struct np_client_ops *ops; + uint32_t i; + + for (i = 0; i < ARRAY_SIZE(np_client_ops); i++) { + ops = rcu_dereference(np_client_ops[i]); + if (ops) + ops->np_client_counts(np, nusers, naddrs); + } +} diff --git a/src/npf/nat/nat_pool.h b/src/npf/nat/nat_pool.h index 2d4b6226..843ad8df 100644 --- a/src/npf/nat/nat_pool.h +++ b/src/npf/nat/nat_pool.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "urcu.h" @@ -17,6 +18,9 @@ #include "npf/nat/nat_pool_public.h" +/* Max length of names, enforced by config, is 42 */ +#define NAT_POOL_NAME_MAX 43 + enum nat_pool_type { NPT_CGNAT, }; @@ -30,12 +34,15 @@ enum nat_pool_type { * NAT pool address ranges may be configured as a range or a prefix and * length. All types are converted to a useable range. * - * NPA_PREFIX prefix and mask. First and last address are *not* useable + * NPA_PREFIX prefix and mask. First and last address *are* useable + * (except .0 and .255) + * NPA_SUBNET prefix and mask. First and last address are *not* useable * NPA_RANGE address range */ enum nat_pool_range_type { NPA_PREFIX, NPA_RANGE, + NPA_SUBNET, }; /* @@ -81,14 +88,19 @@ enum nat_port_allcn { * the type configured, all nat_pool_range entries are converted to a useable * range and stored in na_addr_start and naddr_stop. Prefixes and addresses * are in host byte order. + * + * When an address range is created from a prefix and mask less than 31 then + * the first and last addresses are omitted from the range. + * + * pr_shared - Allow multiple subscribers to use each address in this range. */ struct nat_pool_range { - char *pr_name; + char pr_name[NAT_POOL_NAME_MAX]; uint32_t pr_prefix; uint8_t pr_mask; enum nat_pool_range_type pr_type; uint8_t pr_range; /* Range number */ - uint8_t pr_pad1[1]; + bool pr_shared; /* address range */ uint32_t pr_addr_start; @@ -96,15 +108,29 @@ struct nat_pool_range { uint32_t pr_naddrs; }; +/* + * Set of address ranges. + * + * The addresses are all added to a 'hidden' address-group pointed to by + * nr_ag. This is hidden in that it is removed from the address-group + * tableset after creation, and so will not be found by the config. + * + * nr_used is a count of the number of pool addresses with *no* free port + * blocks. + */ struct nat_pool_ranges { uint8_t nr_nranges; /* number of addr ranges */ uint32_t nr_naddrs; /* total address count */ + rte_atomic32_t nr_used; /* addresses in use */ + struct npf_addrgrp *nr_ag; /* addr-grp of pool addrs */ struct nat_pool_range nr_range[NAT_POOL_MAX_RANGES]; /* * Record of last allocated address per differentiated protocol. */ rte_atomic32_t nr_addr_hint[NAT_PROTO_COUNT]; + + struct rcu_head nr_rcu_head; }; /* @@ -117,7 +143,7 @@ struct nat_pool { uint16_t np_flags; /* Pool identity */ - char *np_name; + char np_name[NAT_POOL_NAME_MAX]; enum nat_pool_type np_type; /* cgnat or ? */ /* Config for address allocation */ @@ -140,13 +166,6 @@ struct nat_pool { /* Number of ports per addr. derived from port start/end */ uint16_t np_nports; - /* - * Total # of private addrs in rules or policies attached to this - * pool. - */ - uint32_t np_nusers; - uint64_t np_nuser_addrs; - /* * Mapping stats. */ @@ -163,22 +182,21 @@ struct nat_pool { /* address ranges */ struct nat_pool_ranges *np_ranges; - /* - * We rely on the yang data model to keep the addr group in existence - * while a nat pool is using it. - */ - struct npf_addrgrp *np_blacklist; + struct npf_addrgrp *np_blocklist; /* address group */ + + /* NAT pool threshold and timer */ + int32_t np_threshold; + bool np_threshold_been_below; + struct rte_timer np_threshold_timer; }; /* * Get next address in an address pool */ -uint32_t nat_pool_next_addr(struct nat_pool *np, uint32_t addr); +uint32_t nat_pool_next_addr(struct nat_pool *np, uint32_t addr, + struct nat_pool_range **prp); -/* - * Get the range index that an address is in. Returns -1 if address is not in - * any range. - */ +/* Which address range is an address in? */ int nat_pool_addr_range(struct nat_pool *np, uint32_t addr); /* Return true if address-pool paired is enabled */ @@ -206,13 +224,13 @@ static inline uint16_t nat_pool_get_mbpu(const struct nat_pool *np) * address after this one when doing the next allocation. */ static inline void -nat_pool_hint_set(struct nat_pool *np, uint32_t addr, uint8_t proto) +nat_pool_hint_set(struct nat_pool *np, uint32_t addr, enum nat_proto proto) { rte_atomic32_set(&np->np_ranges->nr_addr_hint[proto], addr); } static inline uint32_t -nat_pool_hint(struct nat_pool *np, uint8_t proto) +nat_pool_hint(struct nat_pool *np, enum nat_proto proto) { return rte_atomic32_read(&np->np_ranges->nr_addr_hint[proto]); } @@ -277,4 +295,9 @@ nat_pool_incr_block_limit(struct nat_pool *np) rte_atomic64_inc(&np->np_pb_limit); } +int np_threshold_set(struct nat_pool *np, void *arg); +void np_threshold_set_all(int32_t threshold, uint32_t interval); +void np_threshold_get(struct nat_pool *np); +void np_threshold_put(struct nat_pool *np); + #endif diff --git a/src/npf/nat/nat_pool_event.c b/src/npf/nat/nat_pool_event.c index aeea168a..843d2f37 100644 --- a/src/npf/nat/nat_pool_event.c +++ b/src/npf/nat/nat_pool_event.c @@ -70,15 +70,3 @@ bool nat_pool_event_register(const struct np_event_ops *ops) } return false; } - -/* Unregister event ops */ -void nat_pool_event_unregister(const struct np_event_ops *op) -{ - struct np_event_ops *ops = (struct np_event_ops *) op; - uint32_t i; - - for (i = 0; i < ARRAY_SIZE(np_ops); i++) { - if (rcu_cmpxchg_pointer(&np_ops[i], ops, NULL) == ops) - return; - } -} diff --git a/src/npf/nat/nat_pool_event.h b/src/npf/nat/nat_pool_event.h index 22709fee..2278ba7d 100644 --- a/src/npf/nat/nat_pool_event.h +++ b/src/npf/nat/nat_pool_event.h @@ -53,7 +53,4 @@ void nat_pool_event(enum np_evt evt, struct nat_pool *np); /* Register event ops */ bool nat_pool_event_register(const struct np_event_ops *ops); -/* Unregister event ops */ -void nat_pool_event_unregister(const struct np_event_ops *op); - #endif /* _NAT_POOL_EVENT_H_ */ diff --git a/src/npf/nat/nat_pool_public.h b/src/npf/nat/nat_pool_public.h index 9546c66c..030d71af 100644 --- a/src/npf/nat/nat_pool_public.h +++ b/src/npf/nat/nat_pool_public.h @@ -35,12 +35,15 @@ void nat_pool_clear_active(struct nat_pool *np); /* Log port-block alloc and release? */ bool nat_pool_log_pba(struct nat_pool *np); -/* Incr/decr number of users of a pool */ -void nat_pool_incr_nusers(struct nat_pool *np, uint32_t naddrs); -void nat_pool_decr_nusers(struct nat_pool *np, uint32_t naddrs); +/* Is this a blocked address? */ +bool nat_pool_is_blocked_addr(struct nat_pool *np, uint32_t addr); -/* Is this a blacklisted address? */ -bool nat_pool_is_blacklist_addr(struct nat_pool *np, uint32_t addr); +/* + * Check if an address in in a NAT pool. 'addr' is in network-byte order. + * This should be reasonably efficient as it looks up the address-group + * representation of the NAT pool (i.e. a Patricia Tree lookup). + */ +bool nat_pool_is_pool_addr(const struct nat_pool *np, uint32_t addr); /* lookup nat pool in hash table */ struct nat_pool *nat_pool_lookup(const char *name); @@ -61,4 +64,26 @@ void nat_pool_show(FILE *f, int argc, char **argv); void nat_pool_init(void); void nat_pool_uninit(void); +/************************************************************************** + * NAT Pool to Client API + **************************************************************************/ + +/* + * Allow space for 2 clients + */ +#define NP_CLIENT_MAX_OPS 2 + +/* Per-client functions */ +struct np_client_ops { + /* Get the number of users and addresses using this NAT pool */ + void (*np_client_counts)(struct nat_pool *np, uint32_t *nusers, + uint64_t *naddrs); +}; + +/* Register client ops */ +bool nat_pool_client_register(const struct np_client_ops *ops); + +/* Unregister event ops */ +void nat_pool_client_unregister(const struct np_client_ops *ops); + #endif diff --git a/src/npf/nat/nat_proto.h b/src/npf/nat/nat_proto.h index 60265efe..a2498827 100644 --- a/src/npf/nat/nat_proto.h +++ b/src/npf/nat/nat_proto.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -7,15 +7,21 @@ #ifndef _NAT_PROTO_H_ #define _NAT_PROTO_H_ +#include /* * Protocol classification + * + * Note that if ICMP is given a unique pool to allocate ID's from, + * then the NAT64 code needs checked to ensure that the it works as + * expected, as NAT64 maps between ICMPv4 (protocol 1) and ICMPv6 + * (protocol 58). */ enum nat_proto { NAT_PROTO_TCP, NAT_PROTO_UDP, NAT_PROTO_OTHER, -}; +} __attribute__ ((__packed__)); #define NAT_PROTO_FIRST NAT_PROTO_TCP #define NAT_PROTO_LAST NAT_PROTO_OTHER @@ -23,7 +29,7 @@ enum nat_proto { #define NAT_PROTO_NONE NAT_PROTO_COUNT /* Get the nat_proto enum from the protocol number */ -static inline uint8_t nat_proto_from_ipproto(uint8_t ipproto) +static inline enum nat_proto nat_proto_from_ipproto(uint8_t ipproto) { switch (ipproto) { case IPPROTO_TCP: @@ -34,24 +40,44 @@ static inline uint8_t nat_proto_from_ipproto(uint8_t ipproto) return NAT_PROTO_OTHER; } -static inline const char *nat_proto_str(uint8_t proto) +/* + * Only works for TCP and UDP. Used for logging. + */ +static inline uint8_t nat_ipproto_from_proto(enum nat_proto proto) +{ + switch (proto) { + case NAT_PROTO_TCP: + return IPPROTO_TCP; + case NAT_PROTO_UDP: + return IPPROTO_UDP; + case NAT_PROTO_OTHER: + break; + } + return 0; +} + +static inline const char *nat_proto_str(enum nat_proto proto) { switch (proto) { case NAT_PROTO_TCP: return "TCP"; case NAT_PROTO_UDP: return "UDP"; + case NAT_PROTO_OTHER: + break; } return "Other"; } -static inline const char *nat_proto_lc_str(uint8_t proto) +static inline const char *nat_proto_lc_str(enum nat_proto proto) { switch (proto) { case NAT_PROTO_TCP: return "tcp"; case NAT_PROTO_UDP: return "udp"; + case NAT_PROTO_OTHER: + break; } return "other"; } diff --git a/src/npf/npf.h b/src/npf/npf.h index 2bdb3517..dd1cf335 100644 --- a/src/npf/npf.h +++ b/src/npf/npf.h @@ -53,7 +53,6 @@ /* * Return values from npf_hook_track and npf_hook_notrack. * - * NPF_ACTION_TO_V4 and NPF_ACTION_TO_V6 are never returned by npf_hook_notrack. * A tag is never returned by npf_hook_track. * * The flags returned are from the same set as those passed in. @@ -68,8 +67,6 @@ typedef enum { typedef enum { NPF_ACTION_NORMAL, - NPF_ACTION_TO_V4, - NPF_ACTION_TO_V6, NPF_ACTION_TO_LOCAL } npf_action_t; @@ -97,6 +94,7 @@ typedef struct { */ #define NPF_FLAG_IN_SESSION 0x0001 /* Pkt matched a session */ #define NPF_FLAG_CACHE_EMPTY 0x0002 /* Cache is empty */ +#define NPF_FLAG_FROM_ZONE 0x0004 /* Came from a zone iface */ #define NPF_FLAG_FROM_US 0x0008 /* router originated packet */ #define NPF_FLAG_FROM_LOCAL 0x0010 /* from kernel, with local addr */ #define NPF_FLAG_FROM_IPV6 0x0020 /* Nat64, converted IPv6 pkt */ diff --git a/src/npf/npf_addrgrp.c b/src/npf/npf_addrgrp.c index 6f8db668..4602204d 100644 --- a/src/npf/npf_addrgrp.c +++ b/src/npf/npf_addrgrp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -71,6 +71,8 @@ struct ptree_table; * entries or with prefixes. * * Changes to an address-groups ptree are protected by a read-write lock. + * There is one 'writer' (main thread) and multiple 'readers' (forwarding + * threads). The readers are only blocked when the writer holds the lock. * * * g_addrgrp_table[] @@ -200,10 +202,14 @@ struct npf_addrgrp_entry { * added to the ptrees. * * Each list entry may be either a prefix or range. + * + * ag_lock in an RTE Read-Write lock. The lock is used to protect data that + * allows multiple readers in parallel, but only one writer. All readers are + * blocked until the writer is finished writing. */ struct npf_addrgrp { char *ag_name; - int ag_tid; /* Index of ag in tableset */ + uint32_t ag_tid; /* Index of ag in tableset */ rte_rwlock_t ag_lock; bool ag_any[AG_MAX]; /* 0.0.0.0/0 or ::/0 */ zlist_t *ag_list[AG_MAX]; @@ -217,6 +223,9 @@ struct npf_addrgrp { #define AG_AF2ALEN(_af) ((_af) == AG_IPv4 ? AG_KLEN_IPv4 : AG_KLEN_IPv6) #define AG_AF2INET(_af) ((_af) == AG_IPv4 ? AF_INET : AF_INET6) +/* Forward reference */ +static void npf_tbl_entry_free_cb(void *data); + /* * We store NPF_NO_NETMASK (255) in the prefix list to allow for the user to * add, for example, both 10.0.0.1 and 10.0.0.1/32. Only one from the list is @@ -259,7 +268,7 @@ static inline uint8_t *ap_prefix(struct npf_addrgrp_entry *ae) /* * Get the address-group for an address family and table ID */ -struct npf_addrgrp *npf_addrgrp_tid_lookup(int tid) +static struct npf_addrgrp *npf_addrgrp_tid_lookup(int tid) { struct npf_tbl *table; @@ -278,11 +287,15 @@ struct npf_addrgrp *npf_addrgrp_tid_lookup(int tid) * Lookup an address in an address-group. Called from forwarding thread. */ int -npf_addrgrp_lookup(enum npf_addrgrp_af af, struct npf_addrgrp *ag, - npf_addr_t *addr) +npf_addrgrp_lookup(enum npf_addrgrp_af af, uint32_t tid, npf_addr_t *addr) { + struct npf_addrgrp *ag; struct ptree_node *pn; + if (unlikely(!npf_tbl_id_is_valid(tid))) + return -EINVAL; + + ag = npf_addrgrp_tid_lookup(tid); if (unlikely(!ag)) return -EINVAL; @@ -301,7 +314,10 @@ npf_addrgrp_lookup(enum npf_addrgrp_af af, struct npf_addrgrp *ag, return (pn != NULL) ? 0 : -ENOENT; } -int npf_addrgrp_lookup_v4(struct npf_addrgrp *ag, uint32_t addr) +/* + * Base IPv4 lookup function + */ +static ALWAYS_INLINE int ag_lookup_v4(struct npf_addrgrp *ag, uint32_t addr) { struct ptree_node *pn; @@ -321,14 +337,40 @@ int npf_addrgrp_lookup_v4(struct npf_addrgrp *ag, uint32_t addr) return (pn != NULL) ? 0 : -ENOENT; } -int npf_addrgrp_lookup_v6(struct npf_addrgrp *ag, uint8_t *addr) +/* + * Lookup an IPv4 address-group by table ID + */ +int npf_addrgrp_lookup_v4(uint32_t tid, uint32_t addr) +{ + struct npf_addrgrp *ag; + + if (unlikely(!npf_tbl_id_is_valid(tid))) + return -EINVAL; + + ag = npf_addrgrp_tid_lookup(tid); + + return ag_lookup_v4(ag, addr); +} + +/* + * Lookup an IPv4 address-group by handle + */ +int npf_addrgrp_lookup_v4_by_handle(struct npf_addrgrp *ag, uint32_t addr) +{ + return ag_lookup_v4(ag, addr); +} + +/* + * Base IPv6 lookup function + */ +static ALWAYS_INLINE int ag_lookup_v6(struct npf_addrgrp *ag, uint8_t *addr) { struct ptree_node *pn; if (unlikely(!ag)) return -EINVAL; - /* If 0.0.0.0/0 then we always match */ + /* If 0::0/0 then we always match */ if (ag->ag_any[AG_IPv6]) return 0; @@ -341,6 +383,29 @@ int npf_addrgrp_lookup_v6(struct npf_addrgrp *ag, uint8_t *addr) return (pn != NULL) ? 0 : -ENOENT; } +/* + * Lookup an IPv6 address-group by table ID + */ +int npf_addrgrp_lookup_v6(uint32_t tid, uint8_t *addr) +{ + struct npf_addrgrp *ag; + + if (unlikely(!npf_tbl_id_is_valid(tid))) + return -EINVAL; + + ag = npf_addrgrp_tid_lookup(tid); + + return ag_lookup_v6(ag, addr); +} + +/* + * Lookup an IPv6 address-group by handle + */ +int npf_addrgrp_lookup_v6_by_handle(struct npf_addrgrp *ag, uint8_t *addr) +{ + return ag_lookup_v6(ag, addr); +} + /* * Create an address-group tableset */ @@ -357,27 +422,13 @@ npf_addrgrp_tbl_create(void) if (!table) return -1; + npf_tbl_set_entry_freefn(table, npf_tbl_entry_free_cb); + rcu_assign_pointer(g_addrgrp_table, table); } return 0; } -static int _npf_addrgrp_destroy(struct npf_addrgrp *ag); -static void npf_addrgrp_data_destroy(struct npf_addrgrp *ag); - -/* - * Callback for each address-group in the tableset - */ -static int -npf_addrgrp_destroy_cb(const char *name __unused, uint id __unused, void *data, - void *ctx __unused) -{ - struct npf_addrgrp *ag = data; - - /* Destroy address group */ - return _npf_addrgrp_destroy(ag); -} - /* * Destroy address-group tableset */ @@ -389,15 +440,10 @@ npf_addrgrp_tbl_destroy(void) if (!g_addrgrp_table) return -EINVAL; - if (npf_tbl_size(g_addrgrp_table) > 0) - npf_tbl_walk(g_addrgrp_table, npf_addrgrp_destroy_cb, NULL); - - if (npf_tbl_size(g_addrgrp_table) != 0) - return -EEXIST; - table = g_addrgrp_table; g_addrgrp_table = NULL; + /* Remove each entry from table and free memory */ npf_tbl_destroy(table); return 0; @@ -471,44 +517,85 @@ npf_addrgrp_tid2name(uint32_t tid) int npf_addrgrp_name2tid(const char *name, uint32_t *tid) { struct npf_tbl *table; - int id; + uint32_t id; + + /* Always set tid in case the return value is not checked */ + *tid = NPF_TBLID_NONE; table = rcu_dereference(g_addrgrp_table); if (!table) return -1; id = npf_tbl_name2id(table, name); - if (id < 0) - return id; + if (id == NPF_TBLID_NONE) + return -ENOENT; - *tid = (uint32_t)id; + *tid = id; return 0; } /* - * Get an address-groups table ID + * Get an address-group handle from a table ID. If this is to be stored by a + * client then the client should take a reference on the address-group by + * calling npf_addrgrp_get. */ -int npf_addrgrp_get_tid(struct npf_addrgrp *ag) +struct npf_addrgrp *npf_addrgrp_tid2handle(uint32_t tid) { - if (ag) - return ag->ag_tid; - return -ENOENT; + if (unlikely(!npf_tbl_id_is_valid(tid))) + return NULL; + + return npf_addrgrp_tid_lookup(tid); } -char *npf_addrgrp_handle2name(struct npf_addrgrp *ag) +/* + * Get an address-group name from a handle. + */ +const char *npf_addrgrp_handle2name(struct npf_addrgrp *ag) { return ag ? ag->ag_name : NULL; } +/* + * Update a client address group handle. + * + * When a client stores a pointer to an address-group (as opposed to a table + * ID) then it must hold a reference on that address-group while the pointer + * is valid. + */ +void npf_addrgrp_update_handle(const char *old_name, const char *new_name, + struct npf_addrgrp **agp) +{ + struct npf_addrgrp *old_ag, *new_ag = NULL; + + if ((!old_name && !new_name) || + (old_name && new_name && !strcmp(old_name, new_name))) + /* Nothing to do */ + return; + + if (new_name) + new_ag = npf_addrgrp_lookup_name(new_name); + + if (new_ag) + /* Take reference on new address-group */ + npf_addrgrp_get(new_ag); + + /* Update client handle */ + old_ag = rcu_xchg_pointer(agp, new_ag); + + if (old_ag) + /* Release reference on old address-group */ + npf_addrgrp_put(old_ag); +} + /* * Create an address-group, and insert it into address-group tableset */ -struct npf_addrgrp *npf_addrgrp_create(const char *name) +struct npf_addrgrp *npf_addrgrp_cfg_add(const char *name) { struct npf_addrgrp *ag; int rc; - /* Create address-group tableset */ + /* Create global address-group tableset */ rc = npf_addrgrp_tbl_create(); if (rc < 0) return NULL; @@ -522,6 +609,8 @@ struct npf_addrgrp *npf_addrgrp_create(const char *name) if (!ag) return NULL; + ag->ag_tid = NPF_TBLID_NONE; + /* Initialize address-group data */ rte_rwlock_init(&ag->ag_lock); @@ -541,18 +630,17 @@ struct npf_addrgrp *npf_addrgrp_create(const char *name) ag->ag_name = strdup(name); - /* Add entry to tableset */ - ag->ag_tid = npf_tbl_entry_insert(g_addrgrp_table, ag); - - if (ag->ag_tid < 0) + /* + * Adding entry to tableset must be last. Note that this takes a + * reference on the address-groups container. + */ + rc = npf_tbl_entry_insert(g_addrgrp_table, ag, &ag->ag_tid); + if (rc < 0) goto error; return ag; error: - /* free address group lists and trees */ - npf_addrgrp_data_destroy(ag); - /* free (uninserted) tableset entry */ npf_tbl_entry_destroy(ag); @@ -562,12 +650,10 @@ struct npf_addrgrp *npf_addrgrp_create(const char *name) /* * Destroy the address-group specific data of an address-group * - * zlist_destroy takes care of freeing each list entry through either the - * callback function, npf_addrgrp_entry_free, or free (in no callback - * specified). + * Called via callback from the tableset when the address-groups containing + * structure is freed. */ -static void -npf_addrgrp_data_destroy(struct npf_addrgrp *ag) +static void npf_addrgrp_destroy(struct npf_addrgrp *ag) { /* * The zlist free function callbacks will take care of removing @@ -593,30 +679,60 @@ npf_addrgrp_data_destroy(struct npf_addrgrp *ag) rte_rwlock_write_unlock(&ag->ag_lock); - if (ag->ag_name) + if (ag->ag_name) { free(ag->ag_name); + ag->ag_name = NULL; + } } -static int -_npf_addrgrp_destroy(struct npf_addrgrp *ag) +/* + * Callback from tableset to destroy an entry. + + * This is called as a result of npf_addrgrp_cfg_delete calling + * npf_tbl_entry_remove. It is called after the entry is removed from the + * zhash table *and* after an RCU quiescent period has elapsed. + */ +static void npf_tbl_entry_free_cb(void *data) { + struct npf_addrgrp *ag = data; + if (!ag) - return -EINVAL; + return; + npf_addrgrp_destroy(ag); +} - /* free lists and trees */ - npf_addrgrp_data_destroy(ag); +/* + * Take reference on address-group container. + * + * The first reference is taken via npf_tbl_entry_insert when the + * address-group is inserted into the g_addrgrp_table tableset. + * + * When the last reference is removed, the address-group is removed from + * g_addrgrp_table and freed. This usually occurs when the address-group is + * unconfigured, but may be a later time if something else holds a reference + * on the group. + */ +struct npf_addrgrp *npf_addrgrp_get(struct npf_addrgrp *ag) +{ + return npf_tbl_entry_get(ag); +} - /* Remove addr table from tableset and destroy it */ - return npf_tbl_entry_remove(g_addrgrp_table, ag); +/* + * Release reference on address-grou containerp. Address-group is destroyed + * when last reference is removed. + */ +void npf_addrgrp_put(struct npf_addrgrp *ag) +{ + npf_tbl_entry_put(ag); } /* * Remove an address-group from tableset, and destroy it. */ -int -npf_addrgrp_destroy(const char *name) +int npf_addrgrp_cfg_delete(const char *name) { struct npf_addrgrp *ag; + int rc; if (!g_addrgrp_table) return -EINVAL; @@ -626,7 +742,27 @@ npf_addrgrp_destroy(const char *name) if (!ag) return -ENOENT; - return _npf_addrgrp_destroy(ag); + assert(ag->ag_tid != NPF_TBLID_NONE); + + /* + * Remove address-group from tableset and free the memory. The + * address-group function npf_tbl_entry_free_cb is called after an rcu + * period. The sequence is: + * + * npf_addrgrp_cfg_delete + * npf_tbl_entry_remove + * zhash_delete + * npf_tbl_zhash_delete_cb + * _npf_tbl_entry_put + * npf_tbl_entry_free_rcu + * _npf_tbl_entry_destroy + * npf_tbl_entry_free_cb + * npf_addrgrp_destroy + * free container + */ + rc = npf_tbl_entry_remove(g_addrgrp_table, ag); + + return rc; } /* @@ -774,7 +910,7 @@ npf_addrgrp_prefix_mask_remove(struct npf_addrgrp_entry *ae, uint8_t mask) * Compare two addresses */ static int -npf_addrgrp_addr_cmp(uint8_t *addr1, uint8_t *addr2, uint8_t alen) +npf_addrgrp_addr_cmp(const uint8_t *addr1, const uint8_t *addr2, uint8_t alen) { int i; @@ -785,7 +921,7 @@ npf_addrgrp_addr_cmp(uint8_t *addr1, uint8_t *addr2, uint8_t alen) for (i = 0; i < alen; i++) { if (addr1[i] > addr2[i]) return 1; - else if (addr1[i] < addr2[i]) + if (addr1[i] < addr2[i]) return -1; } return 0; @@ -856,6 +992,12 @@ static void set_host_bits(uint8_t *a, int alen, int mask) { int i, b; + /* + * If mask is NPF_NO_NETMASK then change it to 32 or 128 for host bits + * calculation. + */ + mask = MIN(mask, alen * 8); + /* Start at least significant byte */ for (i = alen - 1, b = alen*8 - mask; i >= 0 && b > 7; i--, b -= 8) a[i] = 0xff; @@ -1099,7 +1241,7 @@ npf_addrgrp_prefix_insert_list(zlist_t *list, zlist_free_fn free_fn, } /* - * Return true if any hosts bits are set + * Return true if any addr is a prefix and mask and any hosts bits are set */ static bool host_bits_set(uint8_t *addr, uint8_t alen, uint8_t mask) @@ -1107,7 +1249,9 @@ host_bits_set(uint8_t *addr, uint8_t alen, uint8_t mask) int i, b; uint8_t *a = addr; - if (mask == alen*8) + /* Host addresses will have a mask of either 32, 128, or 255 */ + if (mask >= alen*8) + /* Host address */ return false; /* @@ -1128,7 +1272,7 @@ host_bits_set(uint8_t *addr, uint8_t alen, uint8_t mask) * Return true if address is zero */ static bool -is_addr_zero(uint8_t *addr, uint8_t alen) +is_addr_zero(const uint8_t *addr, uint8_t alen) { uint i; @@ -1170,7 +1314,6 @@ int npf_addrgrp_prefix_insert(const char *name, npf_addr_t *addr, struct npf_addrgrp_entry *ae; struct npf_addrgrp *ag; enum npf_addrgrp_af af; - bool new = false; int rc; if (alen != AG_KLEN_IPv4 && alen != AG_KLEN_IPv6) @@ -1184,27 +1327,16 @@ int npf_addrgrp_prefix_insert(const char *name, npf_addr_t *addr, if (!is_addr_zero(addr->s6_addr, alen)) return -EINVAL; } else { - /* - * If mask is NPF_NO_NETMASK then change to 32 or 128 for host - * bits check - */ - uint8_t mm = MIN(mask, alen * 8); - - /* - * check no host bits are set - */ - if (host_bits_set(addr->s6_addr, alen, mm)) + /* If not a host address, check no host bits are set */ + if (host_bits_set(addr->s6_addr, alen, mask)) return -EINVAL; } - /* Create an address-group if one doesn't already exist */ + /* An address-group should already exist */ ag = npf_addrgrp_lookup_name(name); - if (!ag) { - ag = npf_addrgrp_create(name); - if (!ag) - return -EINVAL; - new = true; - } + if (!ag) + return -ENOENT; + af = AG_ALEN2AF(alen); /* Only one 0.0.0.0/0 (or ::/0) allowed */ @@ -1217,8 +1349,6 @@ int npf_addrgrp_prefix_insert(const char *name, npf_addr_t *addr, */ ae = npf_addrgrp_list_prefix_lookup(ag, addr->s6_addr, mask, alen); if (ae) { - assert(!new); - if (ae->ae_type == NPF_ADDRGRP_TYPE_RANGE) return -EEXIST; @@ -1236,11 +1366,8 @@ int npf_addrgrp_prefix_insert(const char *name, npf_addr_t *addr, ae = npf_addrgrp_prefix_insert_list(list, npf_addrgrp_entry_free, addr->s6_addr, alen, mask, ag); - if (!ae) { - if (new) - _npf_addrgrp_destroy(ag); + if (!ae) return -ENOMEM; - } /* * Special case of 0.0.0.0/0 (or ::/0). We just set a boolean, and do @@ -1265,9 +1392,6 @@ int npf_addrgrp_prefix_insert(const char *name, npf_addr_t *addr, assert(rc == 0); - if (rc < 0 && new) - _npf_addrgrp_destroy(ag); - return rc; } @@ -1275,7 +1399,7 @@ int npf_addrgrp_prefix_insert(const char *name, npf_addr_t *addr, * reverse address. The CIDR utils use host-byte order, and address-groups * use network byte order. */ -static inline void reverse_addr(uint8_t *dst, uint8_t *src, int len) +static inline void reverse_addr(uint8_t *dst, const uint8_t *src, int len) { int i; @@ -1513,7 +1637,6 @@ int npf_addrgrp_range_insert(const char *name, npf_addr_t *start, { struct npf_addrgrp_entry *ae, *cur_ae = NULL; struct npf_addrgrp *ag; - bool new = false; if (alen != AG_KLEN_IPv4 && alen != AG_KLEN_IPv6) return -EINVAL; @@ -1522,14 +1645,10 @@ int npf_addrgrp_range_insert(const char *name, npf_addr_t *start, if (npf_addrgrp_addr_cmp(start->s6_addr, end->s6_addr, alen) >= 0) return -EINVAL; - /* Create an address-group if one doesn't already exist */ + /* An address-group should already exist */ ag = npf_addrgrp_lookup_name(name); - if (!ag) { - ag = npf_addrgrp_create(name); - if (!ag) - return -EINVAL; - new = true; - } + if (!ag) + return -ENOENT; /* * Does the new range overlap with an existing prefix entry or range @@ -1568,11 +1687,8 @@ int npf_addrgrp_range_insert(const char *name, npf_addr_t *start, ae = npf_addrgrp_range_insert_list(list, start->s6_addr, end->s6_addr, alen, ag); - if (!ae) { - if (new) - _npf_addrgrp_destroy(ag); + if (!ae) return -ENOMEM; - } /* * Convert range to minimal set of CIDR notation blocks, and add to @@ -1631,16 +1747,8 @@ int npf_addrgrp_prefix_remove(const char *name, npf_addr_t *addr, if (!is_addr_zero(addr->s6_addr, alen)) return -EINVAL; } else { - /* - * If mask is NPF_NO_NETMASK then change to 32 or 128 for host - * bits check - */ - uint8_t tmp = MIN(mask, alen * 8); - - /* - * check no host bits are set - */ - if (host_bits_set(addr->s6_addr, alen, tmp)) + /* If not a host address, check no host bits are set */ + if (host_bits_set(addr->s6_addr, alen, mask)) return -EINVAL; } @@ -1720,6 +1828,16 @@ int npf_addrgrp_range_remove(const char *name, npf_addr_t *start, /* * Walk address-group tree */ +static int +_npf_addrgrp_tree_walk(enum npf_addrgrp_af af, struct npf_addrgrp *ag, + pt_walk_cb *cb, void *ctx) +{ + if (ptree_get_table_leaf_count(ag->ag_tree[af]) == 0) + return 0; + + return ptree_walk(ag->ag_tree[af], PT_UP, cb, ctx); +} + int npf_addrgrp_tree_walk(enum npf_addrgrp_af af, int tid, pt_walk_cb *cb, void *ctx) @@ -1733,10 +1851,7 @@ npf_addrgrp_tree_walk(enum npf_addrgrp_af af, int tid, if (af != AG_IPv4 && af != AG_IPv6) return -EINVAL; - if (ptree_get_table_leaf_count(ag->ag_tree[af]) == 0) - return 0; - - return ptree_walk(ag->ag_tree[af], PT_UP, cb, ctx); + return _npf_addrgrp_tree_walk(af, ag, cb, ctx); } /* @@ -1813,19 +1928,24 @@ npf_addrgrp_ipv4_range_walk(int tid, ag_ipv4_range_cb *cb, void *ctx) /* * Determine how many addresses are included in a table + * + * The user may want to specify 'count_all' to count the all-zero and all-ones + * addresses of prefixes. For example, if the address-group is used for + * address matching then they probably want to set this to true. However if + * the address-group is used as an address pool (e.g. SNAT NAT policy) then + * they should set this to false since the all-zero and all-ones addresses are + * not used. */ uint64_t -npf_addrgrp_naddrs(enum npf_addrgrp_af af, int tid) +npf_addrgrp_naddrs_by_handle(enum npf_addrgrp_af af, struct npf_addrgrp *ag, + bool count_all) { struct npf_addrgrp_entry *ae; struct npf_addrgrp_entry *ap; - struct npf_addrgrp *ag; zlist_t *list; uint64_t naddrs = 0; uint8_t alen = AG_AF2ALEN(af); - ag = npf_addrgrp_tid_lookup(tid); - assert(ag != NULL); if (!ag) return 0; @@ -1837,7 +1957,7 @@ npf_addrgrp_naddrs(enum npf_addrgrp_af af, int tid) for (ae = zlist_first(list); ae != NULL; ae = zlist_next(list)) { if (ae->ae_type == NPF_ADDRGRP_TYPE_PREFIX) naddrs += npf_addrgrp_useable_addrs(ae->ap_mask[0], - alen, false); + alen, count_all); else { for (ap = zlist_first(ae->ar_list); ap != NULL; ap = zlist_next(ae->ar_list)) { @@ -1850,13 +1970,24 @@ npf_addrgrp_naddrs(enum npf_addrgrp_af af, int tid) return naddrs; } +uint64_t +npf_addrgrp_naddrs(enum npf_addrgrp_af af, int tid, bool count_all) +{ + struct npf_addrgrp *ag; + + ag = npf_addrgrp_tid_lookup(tid); + if (!ag) + return 0; + + return npf_addrgrp_naddrs_by_handle(af, ag, count_all); +} + /******************************************************************** * Address group show *******************************************************************/ static void npf_addrgrp_jsonw_list(json_writer_t *json, zlist_t *list, - const char *name, struct npf_show_ag_ctl *ctl); /* @@ -1915,9 +2046,9 @@ npf_addrgrp_jsonw_list_entry(json_writer_t *json, struct npf_addrgrp_entry *ae, jsonw_string_field(json, "start", str1); jsonw_string_field(json, "end", str2); - if (ctl->range_pfxs) - npf_addrgrp_jsonw_list(json, ae->ar_list, - "range-prefixes", ctl); + /* Show the prefixes derived from the address range */ + if (ctl->detail) + npf_addrgrp_jsonw_list(json, ae->ar_list, ctl); jsonw_end_object(json); } @@ -1927,12 +2058,12 @@ npf_addrgrp_jsonw_list_entry(json_writer_t *json, struct npf_addrgrp_entry *ae, * Write json array for an address-group list. */ static void -npf_addrgrp_jsonw_list(json_writer_t *json, zlist_t *list, const char *name, +npf_addrgrp_jsonw_list(json_writer_t *json, zlist_t *list, struct npf_show_ag_ctl *ctl) { struct npf_addrgrp_entry *ae; - jsonw_name(json, name); + jsonw_name(json, "entries"); jsonw_start_array(json); for (ae = zlist_first(list); ae != NULL; @@ -1964,7 +2095,7 @@ static void npf_addrgrp_jsonw_tree(json_writer_t *json, struct npf_addrgrp *ag, enum npf_addrgrp_af af) { - jsonw_name(json, "tree"); + jsonw_name(json, "entries"); jsonw_start_array(json); /* @@ -1977,29 +2108,51 @@ npf_addrgrp_jsonw_tree(json_writer_t *json, struct npf_addrgrp *ag, npf_addrgrp_jsonw_prefix(json, af, (uint8_t *)addr, 0); } - npf_addrgrp_tree_walk(af, ag->ag_tid, - npf_addrgrp_jsonw_tree_cb, json); + _npf_addrgrp_tree_walk(af, ag, npf_addrgrp_jsonw_tree_cb, json); jsonw_end_array(json); } +static void +npf_addrgrp_jsonw_optimal(json_writer_t *json, struct npf_addrgrp *ag, + int af); + /* * Write json for an address-group */ -static void -npf_addrgrp_jsonw(json_writer_t *json, struct npf_addrgrp *ag, - struct npf_show_ag_ctl *ctl) +void +npf_addrgrp_jsonw_one(json_writer_t *json, struct npf_addrgrp *ag, + struct npf_show_ag_ctl *ctl) { - assert(AG_IPv6 > AG_IPv4); - int af; + int nentries = 0; + + rte_rwlock_read_lock(&ag->ag_lock); + + /* + * Only add an address-group to the json if it contains entries for + * the address-family asked for. + */ + for (af = AG_IPv4; af <= AG_IPv6; af++) + if (ctl->af[af]) + nentries += zlist_size(ag->ag_list[af]); + + if (!nentries) { + rte_rwlock_read_unlock(&ag->ag_lock); + return; + } - jsonw_name(json, "address-group"); jsonw_start_object(json); jsonw_string_field(json, "name", ag->ag_name); jsonw_uint_field(json, "id", ag->ag_tid); + if (ctl->brief) + goto end; + + /* + * An address-group may contain IPv4 and IPv6 entries + */ for (af = AG_IPv4; af <= AG_IPv6; af++) { if (!ctl->af[af]) continue; @@ -2007,44 +2160,35 @@ npf_addrgrp_jsonw(json_writer_t *json, struct npf_addrgrp *ag, jsonw_name(json, af == AG_IPv4 ? "ipv4" : "ipv6"); jsonw_start_object(json); - if (ctl->list) - npf_addrgrp_jsonw_list(json, ag->ag_list[af], - "list-entries", ctl); - - if (ctl->tree) + if (ctl->optimal) + npf_addrgrp_jsonw_optimal(json, ag, af); + else if (ctl->tree) npf_addrgrp_jsonw_tree(json, ag, af); + else + npf_addrgrp_jsonw_list(json, ag->ag_list[af], ctl); jsonw_end_object(json); } +end: jsonw_end_object(json); + + rte_rwlock_read_unlock(&ag->ag_lock); } /* * Callback for each address-group in the global table. */ -static int -npf_addrgrp_show_json_cb(const char *name __unused, uint id __unused, - void *data, void *ctx) +static int npf_addrgrp_show_cb(const char *name __unused, uint id __unused, + void *data, void *ctx) { struct npf_addrgrp *ag = data; struct npf_show_ag_ctl *ctl = ctx; - /* - * Table walk is either looking for the first address-group - * (ctl->tid == 0), or the next address-group equal to or greater - * than ctl->tid. - */ - if (ctl->tid > 0 && ag->ag_tid < ctl->tid) - return 0; + assert(ag); + assert(ctl); - rte_rwlock_read_lock(&ag->ag_lock); - - npf_addrgrp_jsonw(ctl->json, ag, ctl); - - rte_rwlock_read_unlock(&ag->ag_lock); - - /* stop the walk when we find a suitable address-group */ - return 1; + npf_addrgrp_jsonw_one(ctl->json, ag, ctl); + return 0; } /* @@ -2052,37 +2196,38 @@ npf_addrgrp_show_json_cb(const char *name __unused, uint id __unused, * * Return an empty address-group object if address-group is not found */ -void -npf_addrgrp_show_json(FILE *fp, struct npf_show_ag_ctl *ctl) +void npf_addrgrp_show(FILE *fp, struct npf_show_ag_ctl *ctl) { - struct npf_addrgrp *ag = NULL; json_writer_t *json; - if (!g_addrgrp_table || npf_tbl_size(g_addrgrp_table) == 0) - return; - - ctl->json = json = jsonw_new(fp); + json = jsonw_new(fp); if (!json) return; + jsonw_pretty(json, true); + jsonw_name(json, "address-groups"); + jsonw_start_array(json); + + if (!g_addrgrp_table) + goto end; + if (ctl->name) { + struct npf_addrgrp *ag; + ag = npf_addrgrp_lookup_name(ctl->name); if (!ag) - goto end_ag; - } - - if (ag) { - rte_rwlock_read_lock(&ag->ag_lock); + goto end; /* Show one address-group */ - npf_addrgrp_jsonw(json, ag, ctl); + npf_addrgrp_jsonw_one(json, ag, ctl); - rte_rwlock_read_unlock(&ag->ag_lock); - } else - /* Show address-group with ID equal or greater than ctl->tid */ - npf_tbl_walk(g_addrgrp_table, npf_addrgrp_show_json_cb, ctl); + } else { + ctl->json = json; + npf_tbl_walk(g_addrgrp_table, npf_addrgrp_show_cb, ctl); + } -end_ag: +end: + jsonw_end_array(json); jsonw_destroy(&json); } @@ -2099,8 +2244,10 @@ npf_addrgrp_get_optimal(zlist_t *list, struct cidr_tree *cidr, int alen) for (ae = zlist_first(list); ae != NULL; ae = zlist_next(list)) { if (ae->ae_type == NPF_ADDRGRP_TYPE_PREFIX) { + uint8_t mask = ag_ptree_mask(ae->ae_af, ae->ap_mask[0]); + reverse_addr(a1, ap_prefix(ae), alen); - npf_cidr_save_prefix(cidr, a1, ae->ap_mask[0]); + npf_cidr_save_prefix(cidr, a1, mask); } else { if (zlist_size(ae->ar_list) > 0) npf_addrgrp_get_optimal(ae->ar_list, cidr, @@ -2126,30 +2273,20 @@ npf_addrgrp_show_json_opt_cb(uint8_t *pfx, int alen, int mask, void *ctx) return 0; } -static int -_npf_addrgrp_show_json_opt(int id, void *data, void *ctx) +static void +npf_addrgrp_jsonw_optimal(json_writer_t *json, struct npf_addrgrp *ag, + int af) { - struct npf_addrgrp *ag = data; - struct npf_show_ag_ctl *ctl = ctx; - json_writer_t *json = ctl->json; - - jsonw_name(json, "address-group"); - jsonw_start_object(json); - - jsonw_string_field(json, "name", ag->ag_name); - jsonw_uint_field(json, "id", id); + struct cidr_tree cidr; + int alen = AG_AF2ALEN(af); + zlist_t *list; - /* Can only be IPv4 *or* IPv6 */ - jsonw_name(json, ctl->af[AG_IPv4] ? "ipv4" : "ipv6"); - jsonw_start_object(json); + assert(af == AG_IPv4 || af == AG_IPv6); + list = ag->ag_list[af]; - jsonw_name(json, "tree"); + jsonw_name(json, "entries"); jsonw_start_array(json); - struct cidr_tree cidr; - int alen = ctl->af[AG_IPv4] ? 4 : 16; - zlist_t *list = ag->ag_list[ctl->af[AG_IPv4] ? AG_IPv4 : AG_IPv6]; - npf_cidr_tree_init(&cidr, alen); if (zlist_size(list) > 0) @@ -2160,41 +2297,4 @@ _npf_addrgrp_show_json_opt(int id, void *data, void *ctx) npf_cidr_tree_free(&cidr); jsonw_end_array(json); - jsonw_end_object(json); - jsonw_end_object(json); - - return 0; -} - -/* - * Show list of optimal address-group tree entries, i.e. the minimal set of - * prefixes and masks to provide same coverage as the user has configured. - */ -void -npf_addrgrp_show_json_opt(FILE *fp, struct npf_show_ag_ctl *ctl) -{ - struct npf_addrgrp *ag = NULL; - json_writer_t *json; - - if (!g_addrgrp_table || npf_tbl_size(g_addrgrp_table) == 0) - return; - - ctl->json = json = jsonw_new(fp); - if (!json) - return; - - if (ctl->name) - ag = npf_addrgrp_lookup_name(ctl->name); - if (!ag) - goto end_ag; - - rte_rwlock_read_lock(&ag->ag_lock); - - /* Show one address-group */ - _npf_addrgrp_show_json_opt(ag->ag_tid, ag, ctl); - - rte_rwlock_read_unlock(&ag->ag_lock); - -end_ag: - jsonw_destroy(&json); } diff --git a/src/npf/npf_addrgrp.h b/src/npf/npf_addrgrp.h index cfb3fed2..ec9e8eb3 100644 --- a/src/npf/npf_addrgrp.h +++ b/src/npf/npf_addrgrp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -51,14 +51,13 @@ enum npf_addrgrp_af { * with read-write lock. * * @param af Address-group address family. AG_IPv4 or AG_IPv6. - * @param ag Address-group handle + * @param tid Address-group table ID * @param addr Address to lookup * @return 0 if address found else a negative error code if not found: - * -EINVAL if address-group not found + * -EINVAL if tid is invalid or address group is not found * -ENOENT if entry in address-group not found */ -int npf_addrgrp_lookup(enum npf_addrgrp_af af, struct npf_addrgrp *ag, - npf_addr_t *addr); +int npf_addrgrp_lookup(enum npf_addrgrp_af af, uint32_t tid, npf_addr_t *addr); /** * @brief Lookup an IPv4 address in an address-group. @@ -66,13 +65,27 @@ int npf_addrgrp_lookup(enum npf_addrgrp_af af, struct npf_addrgrp *ag, * Called from forwarding thread, so access to underlying ptree is protected * with read-write lock. * + * @param tid Address-group table ID + * @param addr IPv4 address to lookup + * @return 0 if address found else a negative error code if not found: + * -EINVAL if tid is invalid or address group is not found + * -ENOENT if entry in address-group not found + */ +int npf_addrgrp_lookup_v4(uint32_t tid, uint32_t addr); + +/** + * @brief Lookup an IPv4 address in an address-group by handle + * + * Called from forwarding thread, so access to underlying ptree is protected + * with read-write lock. + * * @param ag Address-group handle * @param addr IPv4 address to lookup * @return 0 if address found else a negative error code if not found: * -EINVAL if ag is NULL * -ENOENT if entry in address-group not found */ -int npf_addrgrp_lookup_v4(struct npf_addrgrp *ag, uint32_t addr); +int npf_addrgrp_lookup_v4_by_handle(struct npf_addrgrp *ag, uint32_t addr); /** * @brief Lookup an IPv6 address in an address-group. @@ -80,18 +93,27 @@ int npf_addrgrp_lookup_v4(struct npf_addrgrp *ag, uint32_t addr); * Called from forwarding thread, so access to underlying ptree is protected * with read-write lock. * - * @param ag Address-group handle + * @param tid Address-group table ID * @param addr IPv6 address to lookup * @return 0 if address found else a negative error code if not found: - * -EINVAL if ag is NULL + * -EINVAL if tid is invalid or address group is not found * -ENOENT if entry in address-group not found */ -int npf_addrgrp_lookup_v6(struct npf_addrgrp *ag, uint8_t *addr); +int npf_addrgrp_lookup_v6(uint32_t tid, uint8_t *addr); /** - * @brief Get name from address group handle + * @brief Lookup an IPv6 address in an address-group by handle. + * + * Called from forwarding thread, so access to underlying ptree is protected + * with read-write lock. + * + * @param ag Address-group handle + * @param addr IPv6 address to lookup + * @return 0 if address found else a negative error code if not found: + * -EINVAL if ag is NULL + * -ENOENT if entry in address-group not found */ -char *npf_addrgrp_handle2name(struct npf_addrgrp *ag); +int npf_addrgrp_lookup_v6_by_handle(struct npf_addrgrp *ag, uint8_t *addr); /************************************************************************* @@ -103,11 +125,6 @@ char *npf_addrgrp_handle2name(struct npf_addrgrp *ag); */ uint npf_addrgrp_ntables(void); -/** - * @brief Lookup an address group for a given table ID - */ -struct npf_addrgrp *npf_addrgrp_tid_lookup(int tid); - /** * @brief Is this a valid address-group table ID? * @@ -121,30 +138,44 @@ bool npf_addrgrp_tid_valid(uint32_t tid); */ const char *npf_addrgrp_tid2name(uint32_t tid); +/** + * @brief Get an address-group name from a handle. + */ +const char *npf_addrgrp_handle2name(struct npf_addrgrp *ag); + /** * @brief Address-group name to table ID */ int npf_addrgrp_name2tid(const char *name, uint32_t *tid); /** - * @brief Get an address-groups table ID + * @brief Get an address-group handle from a table ID. + * + * If the handle is to be stored by a client then the client should take a + * reference on the address-group by calling npf_addrgrp_get. */ -int npf_addrgrp_get_tid(struct npf_addrgrp *ag); +struct npf_addrgrp *npf_addrgrp_tid2handle(uint32_t tid); /** * @brief Lookup an address-group by name */ struct npf_addrgrp *npf_addrgrp_lookup_name(const char *name); +/** + * @brief Update a client address group handle + */ +void npf_addrgrp_update_handle(const char *old_name, const char *new_name, + struct npf_addrgrp **agp); + /** * @brief Create an address-group and insert it into tableset */ -struct npf_addrgrp *npf_addrgrp_create(const char *name); +struct npf_addrgrp *npf_addrgrp_cfg_add(const char *name); /** * @brief Remove an address-group from the tableset and destroy it */ -int npf_addrgrp_destroy(const char *name); +int npf_addrgrp_cfg_delete(const char *name); /** * @brief Destroy address-group tableset @@ -159,6 +190,16 @@ int npf_addrgrp_tbl_destroy(void); * Address-group management api *************************************************************************/ +/** + * @brief Take a reference on an address-group + */ +struct npf_addrgrp *npf_addrgrp_get(struct npf_addrgrp *ag); + +/** + * @brief Release reference on address-group + */ +void npf_addrgrp_put(struct npf_addrgrp *ag); + /** * @brief Returns number of entries in an address-group * @@ -264,8 +305,17 @@ int npf_addrgrp_ipv4_range_walk(int tid, ag_ipv4_range_cb *cb, void *ctx); /** * @brief Get number of useable addresses in an address-group + * + * The user may want to specify 'count_all' to count the all-zero and all-ones + * addresses of prefixes. For example, if the address-group is used for + * address matching then they probably want to set this to true. However if + * the address-group is used as an address pool (e.g. SNAT NAT policy) then + * they should set this to false since the all-zero and all-ones addresses are + * not used. */ -uint64_t npf_addrgrp_naddrs(enum npf_addrgrp_af af, int tid); +uint64_t npf_addrgrp_naddrs_by_handle(enum npf_addrgrp_af af, + struct npf_addrgrp *ag, bool count_all); +uint64_t npf_addrgrp_naddrs(enum npf_addrgrp_af af, int tid, bool count_all); /******************************************************************** @@ -278,11 +328,11 @@ uint64_t npf_addrgrp_naddrs(enum npf_addrgrp_af af, int tid); struct npf_show_ag_ctl { json_writer_t *json; char *name; /* show this named address-group or .. */ - int tid; /* .. show address-group with this ID */ - bool list; /* show list entries */ - bool range_pfxs; /* show list entry range prefixes */ + bool detail; /* also show prefixes of ranges */ + bool brief; /* return a list of address groups */ bool tree; /* show tree entries */ bool af[AG_MAX]; /* show IPv4 and/or IPv6 entries */ + bool optimal; /* convert tree to optimal list of prefixes */ }; /** @@ -371,34 +421,12 @@ struct npf_show_ag_ctl { * } * */ -void npf_addrgrp_show_json(FILE *fp, struct npf_show_ag_ctl *ctl); +void npf_addrgrp_show(FILE *fp, struct npf_show_ag_ctl *ctl); /** - * @brief Return json for optimal set of address subblocks - * - * Determine the optimal set of CIDR address subblocks that may be used to - * represent the current prefix and range entries for an address-group, - * and returns the result to the user. - * - * Returns json in the following format: - * - * { - * "address-group": { - * { - * "name":"ADDR_GRP1", - * "id":1, - * "ipv4":{ - * "tree":[ - * { - * "type":0, - * "prefix":"4.0.0.0", - * "mask":20 - * }, - * ] - * } - * } - * } + * @brief Return json for an address-group by handle */ -void npf_addrgrp_show_json_opt(FILE *fp, struct npf_show_ag_ctl *ctl); +void npf_addrgrp_jsonw_one(json_writer_t *json, struct npf_addrgrp *ag, + struct npf_show_ag_ctl *ctl); #endif diff --git a/src/npf/npf_apm.c b/src/npf/npf_apm.c index fed6fced..9694f4fa 100644 --- a/src/npf/npf_apm.c +++ b/src/npf/npf_apm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -28,6 +28,9 @@ #include "npf/npf_addrgrp.h" #include "npf/npf_apm.h" #include "npf/npf_nat.h" +#include "npf/npf_pack.h" +#include "npf/npf_rc.h" +#include "npf/nat/nat_proto.h" #include "npf_tblset.h" #include "urcu.h" #include "util.h" @@ -41,8 +44,9 @@ * within the defined range, the original is used. * * For SNAT, we consider the entire translation space (2^32 addrs * 2^16 - * ports) by using an RCU hash table for a per-addr port translation - * bitmap. This hash table is referenced by all snat users. + * ports) by using an RCU hash table for a per-addr/per-protocol (tcp, udp + * others) port translation bitmap. This hash table is referenced by all + * snat users. * * The portmap is a sparse bitmap implementation - The entire range * of the bitmap (2^16 bits) is divided into 'sections', which are @@ -142,13 +146,16 @@ rte_atomic64_t pm_mem_used; struct port_section { unsigned long ps_bm[PM_SECTION_WORDS];/* section bitmap */ uint16_t ps_used; /* bits allocated */ - uint16_t pad[3]; /* Pad to cache line */ +}; + +struct port_prot { + struct port_section *pp_sections[PM_SECTION_CNT]; + uint16_t pp_used; /* # allocated ports */ }; struct port_map { - struct port_section *pm_sections[PM_SECTION_CNT]; rte_spinlock_t pm_lock; /* for sync'ing updates */ - uint16_t pm_used; /* # allocated ports */ + struct port_prot pm_nprot[NAT_PROTO_COUNT]; uint8_t pm_flags; /* for removal */ uint32_t pm_addr; /* addr of this port map */ vrfid_t pm_vrfid; @@ -191,6 +198,7 @@ struct apm_table_params { uint32_t ap_addr; uint32_t ap_map_flags; in_port_t ap_port; + enum nat_proto ap_nprot; }; /* Set bits in a section, span words if needed */ @@ -242,31 +250,61 @@ static int test_bits(unsigned long bit, int nr_bits, unsigned long *addr) } static void port_stats_inc(int nr_ports, struct port_map *pm, - struct port_section *ps) + enum nat_proto nprot, struct port_section *ps) { - pm->pm_used += nr_ports; + pm->pm_nprot[nprot].pp_used += nr_ports; ps->ps_used += nr_ports; } -static void port_stats_dec(struct port_map *pm, struct port_section *ps) +static void port_stats_dec(struct port_map *pm, enum nat_proto nprot, + struct port_section *ps) { - pm->pm_used--; + pm->pm_nprot[nprot].pp_used--; ps->ps_used--; } +/* + * Check that not trying to free mappings that still have ports allocated. + */ +static void apm_free_map_sanity(struct port_map *pm) +{ + int nprot; + + for (nprot = NAT_PROTO_FIRST; nprot < NAT_PROTO_COUNT; nprot++) { + struct port_section **pp_sections; + int i; + + pp_sections = pm->pm_nprot[nprot].pp_sections; + + for (i = 0; i < PM_SECTION_CNT; i++) { + assert((pp_sections[i] && pp_sections[i]->ps_used) + == 0); + if (pp_sections[i] && pp_sections[i]->ps_used && + net_ratelimit()) + RTE_LOG(ERR, FIREWALL, + "NPF port map: prot %s: section: %d used: %d\n", + nat_proto_lc_str(nprot), i, + pp_sections[i]->ps_used); + } + } +} + /* Free the entry */ static void map_rcu_free(struct rcu_head *head) { struct port_map *pm = caa_container_of(head, struct port_map, - pm_rcu_head); - int i; - - /* Sanity, can only happen with a bug */ - for (i = 0; i < PM_SECTION_CNT; i++) { - if (pm->pm_sections[i] && pm->pm_sections[i]->ps_used) - rte_panic("NPF port map: section: %d used: %d\n", - i, pm->pm_sections[i]->ps_used); - } + pm_rcu_head); + + /* + * Perform a sanity check if marked as dead, as it means it + * was called from the GC routine. It is not called if released + * for other reasons which happens when forcing releases of + * the resources even if they are in use. This currently occurs + * when the dataplane is shutdown or due to unit tests. + */ + if ((pm->pm_flags & PM_FLAG_DEAD) != 0) + apm_free_map_sanity(pm); + rte_free(pm); } @@ -295,8 +333,19 @@ static void map_gc(struct rte_timer *timer __rte_unused, void *arg __rte_unused) } } else if (pm->pm_flags & PM_FLAG_REMOVABLE) pm->pm_flags |= PM_FLAG_DEAD; - else if (!pm->pm_used) - pm->pm_flags = PM_FLAG_REMOVABLE; + else { + uint8_t pm_flags = PM_FLAG_REMOVABLE; + uint8_t nprot; + + for (nprot = NAT_PROTO_FIRST; nprot < NAT_PROTO_COUNT; + nprot++) { + if (pm->pm_nprot[nprot].pp_used) { + pm_flags = 0; + break; + } + } + pm->pm_flags = pm_flags; + } rte_spinlock_unlock(&pm->pm_lock); } @@ -398,12 +447,14 @@ static struct port_map *map_get(uint32_t addr, vrfid_t vrfid, bool create) /* Get a portmap section, allocate if needed */ static struct port_section *map_get_section(struct port_map *pm, - int n, bool create) + enum nat_proto nprot, int n, bool create) { + struct port_section **pp_sections = pm->pm_nprot[nprot].pp_sections; + size_t sz = sizeof(struct port_section); - if (pm->pm_sections[n] || !create) - return pm->pm_sections[n]; + if (pp_sections[n] || !create) + return pp_sections[n]; /* memory limit. Yes, this is racy */ if (rte_atomic64_add_return(&pm_mem_used, sz) > PM_MEM_LIMIT) { @@ -411,17 +462,17 @@ static struct port_section *map_get_section(struct port_map *pm, return NULL; } - pm->pm_sections[n] = rte_zmalloc("apm", sz, RTE_CACHE_LINE_SIZE); + pp_sections[n] = rte_zmalloc("apm", sz, RTE_CACHE_LINE_SIZE); - return pm->pm_sections[n]; + return pp_sections[n]; } /* Put a port section, free if unused. assumes lock held */ -static void map_put_section(struct port_map *pm, - struct port_section *ps, int n) +static void map_put_section(struct port_map *pm, enum nat_proto nprot, + struct port_section *ps, int n) { if (!ps->ps_used) { - pm->pm_sections[n] = NULL; + pm->pm_nprot[nprot].pp_sections[n] = NULL; rte_atomic64_sub(&pm_mem_used, sizeof(struct port_section)); rte_free(ps); } @@ -439,8 +490,9 @@ static bool addr_in_range(const struct npf_apm_range *ar, uint32_t addr) return (addr >= ar->ar_addr_start && addr <= ar->ar_addr_stop); } -static inline int port_alloc_ports(struct port_map *pm, uint32_t map_flags, - int nr_bits, uint16_t *port) +static inline int port_alloc_ports(struct port_map *pm, enum nat_proto nprot, + uint32_t map_flags, int nr_bits, + uint16_t *port) { unsigned long bit = PM_SECTION_BIT(*port); struct port_section *ps; @@ -451,25 +503,25 @@ static inline int port_alloc_ports(struct port_map *pm, uint32_t map_flags, * We will loop for the next port. */ if ((map_flags & NPF_NAT_MAP_EVEN_PORT) && !(bit & 1)) - return -EADDRINUSE; + return -NPF_RC_NAT_EADDRINUSE; /* cannot span a section */ if (unlikely(npf_apm_span_section(*port, nr_bits))) - return -ENOSPC; + return -NPF_RC_NAT_ENOSPC; section = PM_SECTION_OF_PORT(*port); - ps = map_get_section(pm, section, true); + ps = map_get_section(pm, nprot, section, true); if (!ps) - return -ENOMEM; + return -NPF_RC_NAT_ENOMEM; if (ps->ps_used >= PM_SECTION_BITS) - return -ENOSPC; + return -NPF_RC_NAT_ENOSPC; if (test_bits(bit, nr_bits, ps->ps_bm)) - return -EADDRINUSE; + return -NPF_RC_NAT_EADDRINUSE; set_bits(bit, nr_bits, ps->ps_bm); - port_stats_inc(nr_bits, pm, ps); + port_stats_inc(nr_bits, pm, nprot, ps); *port = PM_BIT_TO_PORT(bit) + (section * PM_SECTION_BITS); return 0; @@ -477,55 +529,75 @@ static inline int port_alloc_ports(struct port_map *pm, uint32_t map_flags, /* Get a set of ports if desired */ static int map_allocate_ports(struct npf_apm_range *ar, uint32_t map_flags, - struct port_map *pm, int nr_ports, uint16_t *port) + struct port_map *pm, enum nat_proto nprot, + int nr_ports, uint16_t *port) { int rc = 0; uint16_t i; /* - * Do we want more ports than the configured range? + * Do we want more ports than the configured range? This is very + * unlikely to happen. Its only possible if a single port is + * configured in the translation config, and the SIP ALG subsequently + * requests two ports. */ - if (nr_ports > ar->ar_port_range) - return -ERANGE; + if (unlikely(nr_ports > ar->ar_port_range)) + return -NPF_RC_NAT_ERANGE; /* - * If the port(s) are in the range, use it, otherwise - * choose a random start port in the range, but ensure - * we do not exceed the range. + * If the port(s) are in the range, use it, otherwise either choose a + * random start port in the range (but ensure we do not exceed the + * range), or choose the next sequential port that is not in use. */ - if (!ports_in_range(ar, nr_ports, *port)) - *port = ar->ar_port_start + - (random() % (ar->ar_port_range - (nr_ports - 1))); + if (!ports_in_range(ar, nr_ports, *port)) { + if ((map_flags & NPF_NAT_PA_SEQ) == 0) + /* Random port allocation */ + *port = ar->ar_port_start + + (random() % (ar->ar_port_range - + (nr_ports - 1))); + else + /* Sequential port allocation */ + *port = ar->ar_port_start; + } rte_spinlock_lock(&pm->pm_lock); /* Room at the Inn? */ - if ((pm->pm_used + nr_ports) > ar->ar_port_range) { + if ((pm->pm_nprot[nprot].pp_used + nr_ports) > ar->ar_port_range) { rte_spinlock_unlock(&pm->pm_lock); - return -ENOSPC; + /* + * Note that this is the failure path most likely taken when + * we run out of SNAT mappings. + */ + return -NPF_RC_NAT_ENOSPC; } /* * Loop through the range. * - * The pathological case is consecutive ports in a highly - * fragmented map. Otherwise we converge fairly quickly for - * single port allocations. + * The pathological case is consecutive ports in a highly fragmented + * map. Otherwise we converge fairly quickly for single port + * allocations. * * Note that we do not span sections for a consecutive port * request. */ for (i = 0; i < ar->ar_port_range; i++) { - rc = port_alloc_ports(pm, map_flags, nr_ports, port); + rc = port_alloc_ports(pm, nprot, map_flags, nr_ports, port); switch (rc) { case 0: - case -ENOMEM: + /* Success! */ goto done; - case -ENOSPC: /* Section is full, skip to next one */ + case -NPF_RC_NAT_ENOMEM: + /* Failed to create new map section */ + goto done; + case -NPF_RC_NAT_ENOSPC: + /* Map section is full, skip to next one */ *port = PM_SECTION_SPAN_NEXT(*port); break; - case -EADDRINUSE: /* Try next port */ + case -NPF_RC_NAT_EADDRINUSE: + /* Port is unavailable, try next one */ (*port)++; break; } @@ -536,14 +608,15 @@ static int map_allocate_ports(struct npf_apm_range *ar, uint32_t map_flags, } /* - * Handle special case. If the port map is highly fragmented, - * its possible for a multi-port request to fail with a -EADDRINUSE - * IOW, the bits are available, but they are not sequential + * Handle special case. If the port map is highly fragmented, its + * possible for a multi-port request to fail with a + * -NPF_RC_NAT_EADDRINUSE return code. In other words, the bits are + * available, but they are not sequential * * So return the correct error, we are out of space. */ - if ((rc == -EADDRINUSE) && (nr_ports > 1)) - rc = -ENOSPC; + if ((rc == -NPF_RC_NAT_EADDRINUSE) && (nr_ports > 1)) + rc = -NPF_RC_NAT_ENOSPC; done: /* Reset the flags, if we allocated */ @@ -574,7 +647,7 @@ static uint32_t map_translate_addr(struct npf_apm_range *ar, uint32_t inaddr) /* Get the snat translation address/ports */ static int map_snat(struct npf_apm_range *ar, int nr_ports, vrfid_t vrfid, uint32_t *addr, in_port_t *port, - uint32_t map_flags) + enum nat_proto nprot, uint32_t map_flags) { int rc; struct port_map *pm; @@ -588,21 +661,29 @@ static int map_snat(struct npf_apm_range *ar, int nr_ports, if (!*port || !nr_ports) return 0; + /* + * This should never happen. The most number of ports ever requested + * at a time is 2 for the SIP ALG. This implies something has gone + * badly wrong somewhere. Possibly memory corruption? + */ if (nr_ports > (int) LONGBITS) - return -EINVAL; - - /* Bad configuration? */ - if (nr_ports > ar->ar_port_range) - return -ERANGE; + return -NPF_RC_INTL; rc = 0; /* Hush up gcc, range is always at least 1 */ for (i = 0; i < ar->ar_addr_range; i++) { + /* + * Get the apm entry for this address. This can fail in two + * ways if a new entry is required - first, malloc fails, and + * second we reach the configured memory limit for all apm + * entries. + */ pm = map_get(*addr, vrfid, true); if (!pm) - return -ENOMEM; + return -NPF_RC_NAT_ENOMEM; - rc = map_allocate_ports(ar, map_flags, pm, nr_ports, port); - if (!rc || rc == -ENOMEM) + rc = map_allocate_ports(ar, map_flags, pm, nprot, + nr_ports, port); + if (!rc || rc == -NPF_RC_NAT_ENOMEM) break; /* try a new addr, wrap */ @@ -618,25 +699,21 @@ static int map_snat(struct npf_apm_range *ar, int nr_ports, static int map_dnat(npf_apm_t *apm, struct npf_apm_range *ar, int nr_ports, uint32_t *addr, in_port_t *port) { - int rc = -ERANGE; + int rc = 0; rte_spinlock_lock(&apm->apm_dnat_lock); *addr = map_translate_addr(ar, *addr); - if (!nr_ports) { - rc = 0; + if (!nr_ports) goto done; - } /* Are requested port(s) in range? */ - if (ports_in_range(ar, nr_ports, *port)) { - rc = 0; + if (ports_in_range(ar, nr_ports, *port)) goto done; - } if (nr_ports > ar->ar_port_range) { - rc = -ERANGE; + rc = -NPF_RC_NAT_ERANGE; goto done; } @@ -647,8 +724,6 @@ static int map_dnat(npf_apm_t *apm, struct npf_apm_range *ar, ar->ar_port_next += nr_ports; - rc = 0; - done: rte_spinlock_unlock(&apm->apm_dnat_lock); return rc; @@ -669,8 +744,8 @@ static int map_release_port(struct port_section *ps, uint16_t port) } /* Return a mapped address & port to the map */ -int npf_apm_put_map(npf_apm_t *apm, uint32_t map_flags, vrfid_t vrfid, - npf_addr_t ipaddr, in_port_t ipport) +int npf_apm_put_map(npf_apm_t *apm, uint32_t map_flags, uint8_t ip_prot, + vrfid_t vrfid, npf_addr_t ipaddr, in_port_t ipport) { uint32_t addr; uint16_t port; @@ -678,6 +753,7 @@ int npf_apm_put_map(npf_apm_t *apm, uint32_t map_flags, vrfid_t vrfid, struct port_section *ps; int n; int rc; + enum nat_proto nprot = nat_proto_from_ipproto(ip_prot); if (!apm || !ipport) return 0; @@ -702,11 +778,11 @@ int npf_apm_put_map(npf_apm_t *apm, uint32_t map_flags, vrfid_t vrfid, n = PM_SECTION_OF_PORT(port); rte_spinlock_lock(&pm->pm_lock); - ps = map_get_section(pm, n, false); + ps = map_get_section(pm, nprot, n, false); rc = map_release_port(ps, port); if (!rc) { - port_stats_dec(pm, ps); - map_put_section(pm, ps, n); + port_stats_dec(pm, nprot, ps); + map_put_section(pm, nprot, ps, n); } rte_spinlock_unlock(&pm->pm_lock); @@ -717,16 +793,18 @@ int npf_apm_put_map(npf_apm_t *apm, uint32_t map_flags, vrfid_t vrfid, /* Allocate a mapping given the address range */ static int map_allocate_from_range(struct npf_apm *apm, struct npf_apm_range *ar, int nr_ports, vrfid_t vrfid, - uint32_t *addr, in_port_t *port, uint32_t map_flags) + uint32_t *addr, in_port_t *port, enum nat_proto nprot, + uint32_t map_flags) { - int rc = -EINVAL; + int rc = -NPF_RC_INTL; switch (apm->apm_type) { case NPF_NATIN: rc = map_dnat(apm, ar, nr_ports, addr, port); break; case NPF_NATOUT: - rc = map_snat(ar, nr_ports, vrfid, addr, port, map_flags); + rc = map_snat(ar, nr_ports, vrfid, addr, port, nprot, + map_flags); break; } return rc; @@ -759,10 +837,10 @@ static int apm_table_cb(uint32_t start, uint32_t stop, uint32_t range, ap->ap_rc = map_allocate_from_range(ap->ap_apm, &ar, ap->ap_nr_ports, ap->ap_vrfid, &ap->ap_addr, - &ap->ap_port, ap->ap_map_flags); + &ap->ap_port, ap->ap_nprot, ap->ap_map_flags); /* Only keep going on full portmaps */ - if (ap->ap_rc == -ENOSPC) + if (ap->ap_rc == -NPF_RC_NAT_ENOSPC) return 0; /* stop address-group walk */ @@ -772,7 +850,7 @@ static int apm_table_cb(uint32_t start, uint32_t stop, uint32_t range, /* map_allocate_from_table - Walk a table */ static int map_allocate_from_table(struct npf_apm *apm, int nr_ports, vrfid_t vrfid, uint32_t *addr, in_port_t *port, - uint32_t map_flags) + enum nat_proto nprot, uint32_t map_flags) { struct apm_table_params ap; int rc; @@ -782,6 +860,7 @@ static int map_allocate_from_table(struct npf_apm *apm, int nr_ports, ap.ap_addr = *addr; ap.ap_port = *port; ap.ap_vrfid = vrfid; + ap.ap_nprot = nprot; ap.ap_map_flags = map_flags; ap.ap_rc = 0; @@ -794,7 +873,7 @@ static int map_allocate_from_table(struct npf_apm *apm, int nr_ports, RTE_LOG(ERR, FIREWALL, "NPF APM: Bad table walk. Table: %u rc: %d\n", apm->apm_table_id, rc); - return rc; + return -NPF_RC_INTL; } /* Only update if successful */ @@ -808,12 +887,14 @@ static int map_allocate_from_table(struct npf_apm *apm, int nr_ports, } /* Get an address & port from the map */ -int npf_apm_get_map(npf_apm_t *apm, uint32_t map_flags, int nr_ports, - vrfid_t vrfid, npf_addr_t *ipaddr, in_port_t *ipport) +int npf_apm_get_map(npf_apm_t *apm, uint32_t map_flags, uint8_t ip_prot, + int nr_ports, vrfid_t vrfid, npf_addr_t *ipaddr, + in_port_t *ipport) { uint32_t addr = NPF_ADDR_TO_UINT32(ipaddr); uint32_t *ipaddrp = (uint32_t *) ipaddr; in_port_t port = ntohs(*ipport); + enum nat_proto nprot = nat_proto_from_ipproto(ip_prot); int rc; /* @@ -825,10 +906,11 @@ int npf_apm_get_map(npf_apm_t *apm, uint32_t map_flags, int nr_ports, */ if (APM_USES_TABLE(apm)) rc = map_allocate_from_table(apm, nr_ports, vrfid, - &addr, &port, map_flags); + &addr, &port, nprot, map_flags); else rc = map_allocate_from_range(apm, &apm->apm_ar, - nr_ports, vrfid, &addr, &port, map_flags); + nr_ports, vrfid, &addr, &port, nprot, + map_flags); if (!rc) { if (apm->apm_type == NPF_NATIN) rte_atomic64_add(&apm->apm_dnat_used, nr_ports); @@ -940,9 +1022,9 @@ void npf_apm_uninit(void) /* Ensure all is deleted */ apm_delete_all(); - rcu_read_unlock(); + dp_rcu_read_unlock(); cds_lfht_destroy(apm_ht, NULL); - rcu_read_lock(); + dp_rcu_read_lock(); } /* jsonify a section */ @@ -965,6 +1047,7 @@ static void json_pm(json_writer_t *json, struct port_map *pm) struct port_section *ps; uint32_t naddr; char buf[INET6_ADDRSTRLEN]; + enum nat_proto nprot; naddr = htonl(pm->pm_addr); inet_ntop(AF_INET, &naddr, buf, sizeof(buf)); @@ -979,21 +1062,31 @@ static void json_pm(json_writer_t *json, struct port_map *pm) else jsonw_string_field(json, "state", "ACTIVE"); - jsonw_uint_field(json, "used", pm->pm_used); + jsonw_name(json, "protocols"); + jsonw_start_array(json); + + for (nprot = NAT_PROTO_FIRST; nprot < NAT_PROTO_COUNT; nprot++) { + + jsonw_start_object(json); + jsonw_string_field(json, "protocol", nat_proto_lc_str(nprot)); + jsonw_uint_field(json, "ports_used", + pm->pm_nprot[nprot].pp_used); + + if (pm->pm_nprot[nprot].pp_used) { + jsonw_name(json, "ports"); + jsonw_start_array(json); + for (i = 0; i < PM_SECTION_CNT; i++) { + ps = pm->pm_nprot[nprot].pp_sections[i]; + if (ps) + json_ps(json, ps, i); + } + jsonw_end_array(json); + } - if (!pm->pm_used) { jsonw_end_object(json); - return; } - jsonw_name(json, "ports"); - jsonw_start_array(json); - for (i = 0; i < PM_SECTION_CNT; i++) { - ps = pm->pm_sections[i]; - if (ps) - json_ps(json, ps, i); - } - jsonw_end_array(json); + jsonw_end_array(json); /* protocols */ jsonw_end_object(json); } @@ -1007,7 +1100,8 @@ void npf_apm_dump(FILE *fp) json_writer_t *json; struct cds_lfht_iter iter; struct port_map *pm; - uint64_t count = 0; + uint64_t count[NAT_PROTO_COUNT] = {0}; + enum nat_proto nprot; json = jsonw_new(fp); jsonw_name(json, "apm"); @@ -1022,12 +1116,26 @@ void npf_apm_dump(FILE *fp) cds_lfht_for_each_entry(apm_ht, &iter, pm, pm_node) { rte_spinlock_lock(&pm->pm_lock); json_pm(json, pm); - count += pm->pm_used; + for (nprot = NAT_PROTO_FIRST; nprot < NAT_PROTO_COUNT; + nprot++) { + count[nprot] += pm->pm_nprot[nprot].pp_used; + } rte_spinlock_unlock(&pm->pm_lock); } jsonw_end_array(json); - jsonw_uint_field(json, "mapping_count", count); + jsonw_name(json, "protocols"); + jsonw_start_array(json); + + for (nprot = NAT_PROTO_FIRST; nprot < NAT_PROTO_COUNT; nprot++) { + + jsonw_start_object(json); + jsonw_string_field(json, "protocol", nat_proto_lc_str(nprot)); + jsonw_uint_field(json, "mapping_count", count[nprot]); + jsonw_end_object(json); + } + jsonw_end_array(json); /* protocols */ + jsonw_end_object(json); jsonw_destroy(&json); } @@ -1045,17 +1153,20 @@ void npf_apm_flush_all(void) /* * Get the allocation status for a particular address and port. * + * ip_prot IP protocol to check the port for * ipaddr translation address * port port in host order */ bool -npf_apm_get_allocated(vrfid_t vrfid, npf_addr_t ipaddr, in_port_t port) +npf_apm_get_allocated(uint8_t ip_prot, vrfid_t vrfid, npf_addr_t ipaddr, + in_port_t port) { uint32_t addr; unsigned long bit; struct port_map *pm; struct port_section *ps; int n; + enum nat_proto nprot = nat_proto_from_ipproto(ip_prot); addr = NPF_ADDR_TO_UINT32(&ipaddr); @@ -1064,7 +1175,7 @@ npf_apm_get_allocated(vrfid_t vrfid, npf_addr_t ipaddr, in_port_t port) return false; n = PM_SECTION_OF_PORT(port); - ps = map_get_section(pm, n, false); + ps = map_get_section(pm, nprot, n, false); if (!ps) return false; @@ -1076,4 +1187,3 @@ npf_apm_get_allocated(vrfid_t vrfid, npf_addr_t ipaddr, in_port_t port) return false; } - diff --git a/src/npf/npf_apm.h b/src/npf/npf_apm.h index 0e2edd44..e5589853 100644 --- a/src/npf/npf_apm.h +++ b/src/npf/npf_apm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -24,7 +24,7 @@ #include "npf/npf.h" #include "util.h" -#include "vrf.h" +#include "vrf_internal.h" typedef struct npf_apm npf_apm_t; @@ -78,16 +78,16 @@ npf_apm_span_section(in_port_t port, uint nr_ports) void npf_apm_init(void); void npf_apm_uninit(void); -int npf_apm_get_map(npf_apm_t *apm, uint32_t map_flags, int nr_ports, - vrfid_t vrfid, npf_addr_t *addr, in_port_t *port); -int npf_apm_put_map(npf_apm_t *apm, uint32_t map_flags, vrfid_t vrfid, - npf_addr_t addr, in_port_t port); -npf_apm_t *npf_apm_create(uint32_t mask, uint32_t table_id, uint8_t type, - npf_addr_t start_addr, npf_addr_t stop_addr, - in_port_t start_port, in_port_t stop_port); -void npf_apm_update(npf_apm_t *apm, uint32_t mask, uint8_t type, - npf_addr_t addr_start, npf_addr_t addr_stop, - in_port_t start_port, in_port_t stop_port); +int npf_apm_get_map(npf_apm_t *apm, uint32_t map_flags, uint8_t ip_prot, + int nr_ports, vrfid_t vrfid, npf_addr_t *addr, in_port_t *port); +int npf_apm_put_map(npf_apm_t *apm, uint32_t map_flags, uint8_t ip_prot, + vrfid_t vrfid, npf_addr_t addr, in_port_t port); +npf_apm_t *npf_apm_create(uint32_t match_mask, uint32_t table_id, uint8_t type, + npf_addr_t a_start, npf_addr_t a_stop, + in_port_t p_start, in_port_t p_stop); +void npf_apm_update(npf_apm_t *apm, uint32_t match_mask, uint8_t type, + npf_addr_t a_start, npf_addr_t a_stop, + in_port_t p_start, in_port_t p_stop); void npf_apm_destroy(npf_apm_t *apm); npf_apm_t *npf_apm_clone(npf_apm_t *apm); void npf_apm_flush_all(void); @@ -96,10 +96,12 @@ void npf_apm_dump(FILE *fp); /* * Get the allocation status for a particular address and port. * + * ip_prot IP protocol to check the port for * ipaddr translation address * port port in host order */ bool -npf_apm_get_allocated(vrfid_t ctfid, npf_addr_t ipaddr, in_port_t port); +npf_apm_get_allocated(uint8_t ip_prot, vrfid_t ctfid, npf_addr_t ipaddr, + in_port_t port); #endif /* NPF_APM_H */ diff --git a/src/npf/npf_cache.c b/src/npf/npf_cache.c index a8233e18..1d062ffa 100644 --- a/src/npf/npf_cache.c +++ b/src/npf/npf_cache.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. */ @@ -63,21 +63,25 @@ #include "npf/npf_cache.h" #include "npf/npf_mbuf.h" #include "npf/npf_nat.h" +#include "npf/npf_rc.h" #define ICMP_ERROR_MIN_L4_SIZE 8 #define IPV6_HDR_FO_MASK 0xFFF8 /* fragment hdr mask - in host order */ -static int npf_rw_proto_cksum(npf_cache_t *, struct rte_mbuf *, uint16_t); -static int npf_set_ip_size(npf_cache_t *, struct rte_mbuf *, uint16_t); -static int npf_rw_udp_len(npf_cache_t *, struct rte_mbuf *, uint16_t); +static int npf_rw_proto_cksum(npf_cache_t *npc, + struct rte_mbuf *nbuf, uint16_t sum); +static int npf_set_ip_size(npf_cache_t *npc, + struct rte_mbuf *nbuf, uint16_t sz); +static int npf_rw_udp_len(npf_cache_t *npc, + struct rte_mbuf *nbuf, uint16_t len); /* * Optimized version of npf_fetch_datum. * Assumes header in one mbuf. */ static inline void -__nbuf_fetch_datum(struct rte_mbuf *m __attribute__((unused)), +__nbuf_fetch_datum(struct rte_mbuf *m __unused, void *n_ptr, size_t len, void *buf) { assert((char *)n_ptr + len @@ -128,22 +132,14 @@ npf_tcpsaw(const npf_cache_t *npc, tcp_seq *seq, tcp_seq *ack, uint32_t *win) if (npf_iscached(npc, NPC_IP4)) { const struct ip *ip = &npc->npc_ip.v4; return ntohs(ip->ip_len) - npf_cache_hlen(npc) - thlen; - } else if (npf_iscached(npc, NPC_IP6)) { + } + if (npf_iscached(npc, NPC_IP6)) { const struct ip6_hdr *ip6 = &npc->npc_ip.v6; return ntohs(ip6->ip6_plen) - thlen; } return 0; } -bool -npf_fetch_grouper(npf_cache_t *npc, char **ptr) -{ - if (!npf_iscached(npc, NPC_GROUPER)) - return false; - *ptr = npc->npc_grouper; - return true; -} - static inline void npf_update_grouper(npf_cache_t *npc, void *from, uint offset, uint length) { @@ -158,7 +154,7 @@ bool npf_fetch_tcpopts(const npf_cache_t *npc, struct rte_mbuf *nbuf, uint16_t *mss, uint8_t *wscale) { - void *n_ptr = pktmbuf_mtol4(nbuf, void *); + void *n_ptr = dp_pktmbuf_mtol4(nbuf, void *); const struct tcphdr *th = &npc->npc_l4.tcp; int topts_len, step; @@ -249,62 +245,6 @@ void npf_store_tcp_options(npf_cache_t *npc, struct rte_mbuf *nbuf, void *buf) nbuf_advstore(&nbuf, &ptr, offset, len, buf); } -/* - * Re-write the MSS value in an existing TCP option, if present - */ - -bool npf_store_tcp_mss(const npf_cache_t *npc, struct rte_mbuf *nbuf, - uint16_t *mss) -{ - void *n_ptr = pktmbuf_mtol4(nbuf, void *); - const struct tcphdr *th = &npc->npc_l4.tcp; - uint topts_len, step; - - /* Determine if there are any TCP options, get their length. */ - topts_len = (th->doff << 2) - sizeof(struct tcphdr); - if (topts_len <= 0) - /* No options. */ - return false; - - /* First step: advance over TCP header up to options. */ - step = sizeof(struct tcphdr); - - while (topts_len > 0) { - uint8_t opt, len; - - if (__nbuf_advfetch(&nbuf, &n_ptr, step, sizeof(opt), &opt)) - return false; - - switch (opt) { - case TCPOPT_EOL: - /* Done. */ - return false; - case TCPOPT_NOP: - topts_len--; - step = 1; - break; - case TCPOPT_MAXSEG: - if (nbuf_advstore(&nbuf, &n_ptr, 2, - sizeof(*mss), mss)) - return false; - - return true; - default: - if (__nbuf_advfetch(&nbuf, &n_ptr, 1, sizeof(len), - &len)) - return false; - - if (len < 2 || len > topts_len) - return false; - - topts_len -= len; - step = len - 1; - break; - } - } - return false; -} - /* * Cache IPv6 fragmentation header. * @@ -342,7 +282,7 @@ npf_cache_ipv6_routing_hdr(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr) * Limited validation checks for IPv4 packet. * These are redundant when routing, but necessary for bridging. */ -static bool +static int npf_ipv4_valid(const struct rte_mbuf *m, const void *n_ptr) { const struct ip *ip = n_ptr; @@ -352,26 +292,29 @@ npf_ipv4_valid(const struct rte_mbuf *m, const void *n_ptr) eod = rte_pktmbuf_mtod(m, char *) + rte_pktmbuf_data_len(m); if (unlikely((const char *) n_ptr + sizeof(struct ip) > eod)) - return false; + return -NPF_RC_L3_SHORT; if (unlikely(ip->ip_v != IPVERSION)) - return false; + return -NPF_RC_L3_HDR_VER; hlen = ip->ip_hl << 2; - if (unlikely(hlen < sizeof(struct ip) || - (const char *) n_ptr + hlen > eod)) - return false; + if (unlikely(hlen < sizeof(struct ip))) + return -NPF_RC_L3_HDR_LEN; + if (unlikely((const char *) n_ptr + hlen > eod)) + return -NPF_RC_L3_SHORT; - return true; + return 0; } -static bool +static int npf_fetch_ipv4(npf_cache_t *npc, struct rte_mbuf *nbuf, const void *n_ptr) { - struct ip *ip = &npc->npc_ip.v4; + int rc = npf_ipv4_valid(nbuf, n_ptr); - if (unlikely(!npf_ipv4_valid(nbuf, n_ptr))) - return false; + if (unlikely(rc < 0)) + return rc; + + struct ip *ip = &npc->npc_ip.v4; memcpy(ip, n_ptr, sizeof(struct ip)); @@ -383,32 +326,37 @@ npf_fetch_ipv4(npf_cache_t *npc, struct rte_mbuf *nbuf, const void *n_ptr) npc->npc_srcdst = (npf_srcdst_t *)&ip->ip_src; npc->npc_info |= NPC_IP4; npc->npc_hlen = ip->ip_hl << 2; - npc->npc_next_proto = npc->npc_ip.v4.ip_p; - return true; + npc->npc_proto_final = npc->npc_ip.v4.ip_p; + return 0; } /* Limited validation checks for IPv6 packet. */ -static bool +static int npf_ipv6_valid(const struct rte_mbuf *m, const void *n_ptr) { const char *eod = rte_pktmbuf_mtod(m, char *) + rte_pktmbuf_data_len(m); if ((const char *) n_ptr + sizeof(struct ip6_hdr) > eod) - return false; + return -NPF_RC_L3_SHORT; const struct ip6_hdr *ip6 = n_ptr; - return (ip6->ip6_vfc & IPV6_VERSION_MASK) == IPV6_VERSION; + if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) + return -NPF_RC_L3_HDR_VER; + + return 0; } -static bool +static int npf_fetch_ipv6(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr) { - struct ip6_hdr *ip6 = &npc->npc_ip.v6; + int rc = npf_ipv6_valid(nbuf, n_ptr); - if (unlikely(!npf_ipv6_valid(nbuf, n_ptr))) - return false; + if (unlikely(rc < 0)) + return rc; + + struct ip6_hdr *ip6 = &npc->npc_ip.v6; uint32_t hlen = sizeof(struct ip6_hdr), next_hlen; uint16_t last_unfrg_hlen; @@ -416,7 +364,7 @@ npf_fetch_ipv6(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr) /* Fetch IPv6 header and set initial next-protocol value. */ memcpy(ip6, n_ptr, sizeof(struct ip6_hdr)); - npc->npc_next_proto = ip6->ip6_nxt; + npc->npc_proto_final = ip6->ip6_nxt; npc->npc_hlen = hlen; /* @@ -454,20 +402,20 @@ npf_fetch_ipv6(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr) * Advance by the length of the current header and * prefetch the extension header. */ - while (npc->npc_next_proto != IPPROTO_NONE) { + while (npc->npc_proto_final != IPPROTO_NONE) { struct ip6_ext ip6e; if (unlikely(__nbuf_advfetch(&nbuf, &n_ptr, hlen, sizeof(struct ip6_ext), &ip6e) != 0)) { /* Failed to fetch header */ - npc->npc_next_proto = IPPROTO_NONE; + npc->npc_proto_final = IPPROTO_NONE; break; } /* * Determine whether we are going to continue. */ - switch (npc->npc_next_proto) { + switch (npc->npc_proto_final) { case IPPROTO_HOPOPTS: case IPPROTO_DSTOPTS: /* @@ -519,18 +467,18 @@ npf_fetch_ipv6(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr) if (next_hlen == 0) break; - if (npc->npc_next_proto != IPPROTO_FRAGMENT) { + if (npc->npc_proto_final != IPPROTO_FRAGMENT) { last_unfrg_hlen = next_hlen; last_unfrg_hofs += hlen; } - npc->npc_next_proto = ip6e.ip6e_nxt; + npc->npc_proto_final = ip6e.ip6e_nxt; npc->npc_hlen += next_hlen; hlen = next_hlen; } /* Store the l3_len, if not calculated earlier. */ - if (pktmbuf_l3_len(nbuf) == 0) - pktmbuf_l3_len(nbuf) = npc->npc_hlen; + if (dp_pktmbuf_l3_len(nbuf) == 0) + dp_pktmbuf_l3_len(nbuf) = npc->npc_hlen; npc->last_unfrg_hlen = last_unfrg_hlen; npc->last_unfrg_hofs = last_unfrg_hofs; @@ -540,25 +488,25 @@ npf_fetch_ipv6(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr) npc->npc_srcdst = (npf_srcdst_t *)&ip6->ip6_src; npc->npc_info |= NPC_IP6; - return true; + return 0; } /* * npf_fetch_ip: fetch, check and cache IP header. */ -static bool npf_fetch_ip(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, +static int npf_fetch_ip(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, uint16_t eth_proto) { switch (ntohs(eth_proto)) { - case ETHER_TYPE_IPv4: + case RTE_ETHER_TYPE_IPV4: return npf_fetch_ipv4(npc, nbuf, n_ptr); - case ETHER_TYPE_IPv6: + case RTE_ETHER_TYPE_IPV6: return npf_fetch_ipv6(npc, nbuf, n_ptr); default: - return false; + return -NPF_RC_NON_IP; } } @@ -567,7 +515,7 @@ void npf_recache_ip_ttl(npf_cache_t *npc, struct rte_mbuf *nbuf) { if (!npf_iscached(npc, NPC_IP46)) return; - char *n_ptr = pktmbuf_mtol3(nbuf, char *); + char *n_ptr = dp_pktmbuf_mtol3(nbuf, char *); /* This reads TTL/PROTO/CHECKSUM */ if (npf_iscached(npc, NPC_IP4)) { @@ -590,8 +538,8 @@ void npf_recache_ip_ttl(npf_cache_t *npc, struct rte_mbuf *nbuf) * "icmp_err" indicates that the cache is being populated from an IP * packet within an ICMP error packet, and so may be truncated. */ -static inline bool npf_fetch_tcp(npf_cache_t *npc, struct rte_mbuf *nbuf, - void *n_ptr, u_int hlen, bool icmp_err) +static inline int npf_fetch_tcp(npf_cache_t *npc, struct rte_mbuf *nbuf, + void *n_ptr, u_int hlen, bool icmp_err) { struct tcphdr *th = &npc->npc_l4.tcp; @@ -600,32 +548,32 @@ static inline bool npf_fetch_tcp(npf_cache_t *npc, struct rte_mbuf *nbuf, if (icmp_err) { if (__nbuf_advfetch(&nbuf, &n_ptr, hlen, ICMP_ERROR_MIN_L4_SIZE, th)) - return false; + return -NPF_RC_L4_SHORT; npc->npc_info |= NPC_SHORT_ICMP_ERR; } else - return false; + return -NPF_RC_L4_SHORT; } npc->npc_info |= NPC_L4PORTS; - return true; + return 0; } /* * npf_fetch_udp: fetch, check and cache UDP/UDP-Lite header. */ -static inline bool npf_fetch_udp(npf_cache_t *npc, struct rte_mbuf *nbuf, - void *n_ptr, u_int hlen) +static inline int npf_fetch_udp(npf_cache_t *npc, struct rte_mbuf *nbuf, + void *n_ptr, u_int hlen) { struct udphdr *uh = &npc->npc_l4.udp; /* Fetch UDP/UDP-Lite header. */ if (__nbuf_advfetch(&nbuf, &n_ptr, hlen, sizeof(struct udphdr), uh)) - return false; + return -NPF_RC_L4_SHORT; npc->npc_info |= NPC_L4PORTS; - return true; + return 0; } /* @@ -634,21 +582,22 @@ static inline bool npf_fetch_udp(npf_cache_t *npc, struct rte_mbuf *nbuf, * This only fetches the basic 'common header', it does not fetch any * of the various chunks. */ -static inline bool npf_fetch_sctp(npf_cache_t *npc, struct rte_mbuf *nbuf, - void *n_ptr, u_int hlen) +static inline int npf_fetch_sctp(npf_cache_t *npc, struct rte_mbuf *nbuf, + void *n_ptr, u_int hlen) { struct npf_sctp *sh = &npc->npc_l4.sctp; /* Ensure it is small enough in case it is within an ICMP error */ - assert(sizeof(struct npf_sctp) <= ICMP_ERROR_MIN_L4_SIZE); + static_assert(sizeof(struct npf_sctp) <= ICMP_ERROR_MIN_L4_SIZE, + "npf sctp structure is too big"); /* Fetch SCTP common header. */ if (__nbuf_advfetch(&nbuf, &n_ptr, hlen, sizeof(*sh), sh)) - return false; + return -NPF_RC_L4_SHORT; npc->npc_info |= NPC_L4PORTS; - return true; + return 0; } /* @@ -662,8 +611,8 @@ static inline bool npf_fetch_sctp(npf_cache_t *npc, struct rte_mbuf *nbuf, * "icmp_err" indicates that the cache is being populated from an IP * packet within an ICMP error packet, and so may be truncated. */ -static inline bool npf_fetch_dccp(npf_cache_t *npc, struct rte_mbuf *nbuf, - void *n_ptr, u_int hlen, bool icmp_err) +static inline int npf_fetch_dccp(npf_cache_t *npc, struct rte_mbuf *nbuf, + void *n_ptr, u_int hlen, bool icmp_err) { struct npf_dccp *dh = &npc->npc_l4.dccp; @@ -672,15 +621,15 @@ static inline bool npf_fetch_dccp(npf_cache_t *npc, struct rte_mbuf *nbuf, if (icmp_err) { if (__nbuf_advfetch(&nbuf, &n_ptr, hlen, ICMP_ERROR_MIN_L4_SIZE, dh)) - return false; + return -NPF_RC_L4_SHORT; npc->npc_info |= NPC_SHORT_ICMP_ERR; } else - return false; + return -NPF_RC_L4_SHORT; } npc->npc_info |= NPC_L4PORTS; - return true; + return 0; } /* @@ -746,17 +695,17 @@ static void npf_decode_icmp6(npf_cache_t *npc) /* * npf_fetch_icmp: fetch ICMP code, type and possible query ID. */ -static inline bool npf_fetch_icmp(npf_cache_t *npc, struct rte_mbuf *nbuf, - void *n_ptr, u_int hlen) +static inline int npf_fetch_icmp(npf_cache_t *npc, struct rte_mbuf *nbuf, + void *n_ptr, u_int hlen) { /* Ensure the ICMP protocol and IP protocol are compatible */ if (npf_iscached(npc, NPC_IP4) ^ (npf_cache_ipproto(npc) == IPPROTO_ICMP)) - return false; + return -NPF_RC_L3_PROTO; /* Fetch basic ICMP header and possibly id/seq */ if (__nbuf_advfetch(&nbuf, &n_ptr, hlen, ICMP_MINLEN, &npc->npc_l4)) - return false; + return -NPF_RC_L4_SHORT; if (npf_cache_ipproto(npc) == IPPROTO_ICMP) npf_decode_icmp4(npc); @@ -765,70 +714,64 @@ static inline bool npf_fetch_icmp(npf_cache_t *npc, struct rte_mbuf *nbuf, /* Cache: layer 4 - ICMP. */ npc->npc_info |= NPC_ICMP; - return true; + return 0; } -/* - * npf_cache_all: general routine to cache all relevant IP (v4 or v6) - * and TCP, UDP or ICMP headers. Only called once at top level - * of NPF processing. - * - * returns true if packet is OK. - */ -bool npf_cache_all(npf_cache_t *npc, struct rte_mbuf *nbuf, uint16_t eth_proto) +static int _npf_cache_all_at(npf_cache_t *npc, struct rte_mbuf *nbuf, + void *n_ptr, uint16_t eth_proto, bool icmp_err, + bool update_grouper) { - return npf_cache_all_at(npc, nbuf, npf_iphdr(nbuf), eth_proto, false); -} + int rc = npf_fetch_ip(npc, nbuf, n_ptr, eth_proto); -bool npf_cache_all_at(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, - uint16_t eth_proto, bool icmp_err) -{ - if (!npf_fetch_ip(npc, nbuf, n_ptr, eth_proto)) - return true; /* true as this might be a non-ip packet */ + if (unlikely(rc < 0)) + return rc; u_int hlen = npf_cache_hlen(npc); if (unlikely(npf_iscached(npc, NPC_IPFRAG))) - return true; - - bool ok = true; + return 0; switch (npf_cache_ipproto(npc)) { case IPPROTO_TCP: - ok = npf_fetch_tcp(npc, nbuf, n_ptr, hlen, icmp_err); + rc = npf_fetch_tcp(npc, nbuf, n_ptr, hlen, icmp_err); break; case IPPROTO_UDP: case IPPROTO_UDPLITE: - ok = npf_fetch_udp(npc, nbuf, n_ptr, hlen); + rc = npf_fetch_udp(npc, nbuf, n_ptr, hlen); break; case IPPROTO_SCTP: - ok = npf_fetch_sctp(npc, nbuf, n_ptr, hlen); + rc = npf_fetch_sctp(npc, nbuf, n_ptr, hlen); break; case IPPROTO_DCCP: - ok = npf_fetch_dccp(npc, nbuf, n_ptr, hlen, icmp_err); + rc = npf_fetch_dccp(npc, nbuf, n_ptr, hlen, icmp_err); break; case IPPROTO_ICMP: case IPPROTO_ICMPV6: - ok = npf_fetch_icmp(npc, nbuf, n_ptr, hlen); + rc = npf_fetch_icmp(npc, nbuf, n_ptr, hlen); break; default: break; } - if (unlikely(!ok)) - return false; + if (unlikely(rc < 0)) + return rc; /* * If we have an IPv6 routing header then we only want to match in the * bytecode since we may need to match on route type. */ if (unlikely(npf_iscached(npc, NPC_IPV6_ROUTING))) - return true; + return 0; + + if (unlikely(!update_grouper)) { + npc->npc_info &= ~NPC_GROUPER; + return 0; + } /* * Update grouper protocol, source address, and destination address */ if (likely(npc->npc_info & NPC_IP4)) { - npf_update_grouper(npc, &npc->npc_next_proto, + npf_update_grouper(npc, &npc->npc_proto_final, NPC_GPR_PROTO_OFF_v4, NPC_GPR_PROTO_LEN_v4); @@ -843,7 +786,7 @@ bool npf_cache_all_at(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, /* * Must be IPv6 else call to npf_fetch_ip would have failed */ - npf_update_grouper(npc, &npc->npc_next_proto, + npf_update_grouper(npc, &npc->npc_proto_final, NPC_GPR_PROTO_OFF_v6, NPC_GPR_PROTO_LEN_v6); @@ -909,7 +852,34 @@ bool npf_cache_all_at(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, /* Mark the cache grouper as populated */ npc->npc_info |= NPC_GROUPER; - return true; + return 0; +} + +/* + * npf_cache_all: general routine to cache all relevant IP (v4 or v6) + * and TCP, UDP or ICMP headers. Only called once at top level + * of NPF processing. + * + * returns 0 if packet is OK. + */ +int npf_cache_all(npf_cache_t *npc, struct rte_mbuf *nbuf, uint16_t eth_proto) +{ + return _npf_cache_all_at(npc, nbuf, npf_iphdr(nbuf), eth_proto, + false, true); +} + +bool npf_cache_all_at(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, + uint16_t eth_proto) +{ + return _npf_cache_all_at(npc, nbuf, n_ptr, eth_proto, true, true) == 0; +} + +/* Cache packet without updating the cache grouper */ +bool npf_cache_all_nogpr(npf_cache_t *npc, struct rte_mbuf *nbuf, + uint16_t eth_proto) +{ + return _npf_cache_all_at(npc, nbuf, npf_iphdr(nbuf), eth_proto, + false, false) == 0; } /* @@ -1147,15 +1117,15 @@ void npf_udp_cksum(npf_cache_t *npc, struct rte_mbuf *nbuf) uint16_t cksum; void *l3hdr; - l3hdr = pktmbuf_mtol3(nbuf, void *); + l3hdr = dp_pktmbuf_mtol3(nbuf, void *); udp = (struct udphdr *)(rte_pktmbuf_mtod(nbuf, char *) + nbuf->l2_len + npf_cache_hlen(npc)); udp->check = 0; if (npf_iscached(npc, NPC_IP4)) - cksum = in4_cksum_mbuf(nbuf, l3hdr, udp); + cksum = dp_in4_cksum_mbuf(nbuf, l3hdr, udp); else if (npf_iscached(npc, NPC_IP6)) - cksum = in6_cksum_mbuf(nbuf, l3hdr, udp); + cksum = dp_in6_cksum_mbuf(nbuf, l3hdr, udp); else return; @@ -1175,15 +1145,15 @@ void npf_tcp_cksum(npf_cache_t *npc, struct rte_mbuf *nbuf) uint16_t cksum; void *l3hdr; - l3hdr = pktmbuf_mtol3(nbuf, void *); + l3hdr = dp_pktmbuf_mtol3(nbuf, void *); tcp = (struct tcphdr *)(rte_pktmbuf_mtod(nbuf, char *) + nbuf->l2_len + npf_cache_hlen(npc)); tcp->check = 0; if (npf_iscached(npc, NPC_IP4)) - cksum = in4_cksum_mbuf(nbuf, l3hdr, tcp); + cksum = dp_in4_cksum_mbuf(nbuf, l3hdr, tcp); else if (npf_iscached(npc, NPC_IP6)) - cksum = in6_cksum_mbuf(nbuf, l3hdr, tcp); + cksum = dp_in6_cksum_mbuf(nbuf, l3hdr, tcp); else return; @@ -1206,25 +1176,25 @@ npf_ipv4_cksum(struct rte_mbuf *nbuf, int proto, char *l4hdr) struct icmp *icmp; struct iphdr *l3hdr; - l3hdr = pktmbuf_mtol3(nbuf, struct iphdr *); + l3hdr = dp_pktmbuf_mtol3(nbuf, struct iphdr *); switch (proto) { case IPPROTO_TCP: tcp = (struct tcphdr *)l4hdr; tcp->check = 0; - tcp->check = in4_cksum_mbuf(nbuf, l3hdr, tcp); + tcp->check = dp_in4_cksum_mbuf(nbuf, l3hdr, tcp); break; case IPPROTO_UDP: udp = (struct udphdr *)l4hdr; udp->check = 0; - udp->check = in4_cksum_mbuf(nbuf, l3hdr, udp); + udp->check = dp_in4_cksum_mbuf(nbuf, l3hdr, udp); /* Do not encode the 'no checksum' value */ udp->check = (udp->check == 0) ? 0xffff : udp->check; break; case IPPROTO_ICMP: icmp = (struct icmp *)l4hdr; icmp->icmp_cksum = 0; - icmp->icmp_cksum = in4_cksum_mbuf(nbuf, NULL, icmp); + icmp->icmp_cksum = dp_in4_cksum_mbuf(nbuf, NULL, icmp); break; case IPPROTO_SCTP: /* CRC without pseudo-header */ default: @@ -1246,25 +1216,25 @@ void npf_ipv6_cksum(struct rte_mbuf *nbuf, int proto, char *l4hdr) struct icmp6_hdr *icmp6; struct ip6_hdr *l3hdr; - l3hdr = pktmbuf_mtol3(nbuf, struct ip6_hdr *); + l3hdr = dp_pktmbuf_mtol3(nbuf, struct ip6_hdr *); switch (proto) { case IPPROTO_TCP: tcp = (struct tcphdr *)l4hdr; tcp->check = 0; - tcp->check = in6_cksum_mbuf(nbuf, l3hdr, tcp); + tcp->check = dp_in6_cksum_mbuf(nbuf, l3hdr, tcp); break; case IPPROTO_UDP: udp = (struct udphdr *)l4hdr; udp->check = 0; - udp->check = in6_cksum_mbuf(nbuf, l3hdr, udp); + udp->check = dp_in6_cksum_mbuf(nbuf, l3hdr, udp); /* Do not encode the 'no checksum' value */ udp->check = (udp->check == 0) ? 0xffff : udp->check; break; case IPPROTO_ICMPV6: icmp6 = (struct icmp6_hdr *)l4hdr; icmp6->icmp6_cksum = 0; - icmp6->icmp6_cksum = in6_cksum_mbuf(nbuf, l3hdr, icmp6); + icmp6->icmp6_cksum = dp_in6_cksum_mbuf(nbuf, l3hdr, icmp6); break; case IPPROTO_SCTP: /* CRC without pseudo-header */ default: @@ -1309,8 +1279,8 @@ int npf_update_tcp_cksum(npf_cache_t *npc, struct rte_mbuf *nbuf, /* * npf_rwrip: rewrite required IP address, update the cache. */ -bool npf_rwrip(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, - const int di, const npf_addr_t *addr) +int npf_rwrip(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, + const int di, const npf_addr_t *addr) { npf_addr_t *oaddr; u_int offby; @@ -1327,7 +1297,7 @@ bool npf_rwrip(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, /* Advance to the address and rewrite it. */ if (nbuf_advstore(&nbuf, &n_ptr, offby, npc->npc_alen, addr)) - return false; + return -NPF_RC_L3_SHORT; /* Cache: IP address. */ memcpy(oaddr, addr, npc->npc_alen); @@ -1340,7 +1310,7 @@ bool npf_rwrip(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, npf_update_grouper(npc, npf_cache_v4dst(npc), NPC_GPR_DADDR_OFF_v4, NPC_GPR_DADDR_LEN_v4); - return true; + return 0; } /* @@ -1383,7 +1353,7 @@ bool npf_rwrip6(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, /* * npf_rwrport: rewrite required TCP/UDP port, update the cache. */ -bool npf_rwrport(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, +int npf_rwrport(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, const int di, in_port_t port) { u_int offby = npf_cache_hlen(npc); @@ -1403,7 +1373,7 @@ bool npf_rwrport(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, /* Advance and rewrite the port. */ if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(in_port_t), &port)) - return false; + return -NPF_RC_L4_SHORT; /* Cache: TCP/UDP port. */ if (oport) @@ -1417,14 +1387,14 @@ bool npf_rwrport(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, npf_update_grouper(npc, &port, NPC_GPR_DPORT_OFF_v4, NPC_GPR_DPORT_LEN_v4); - return true; + return 0; } /* * Rewrite required ICMP query ID, update the cache. */ -bool npf_rwricmpid(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, - uint16_t new_id) +int npf_rwricmpid(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, + uint16_t new_id) { struct icmp *ic = &npc->npc_l4.icmp; uint16_t *old_id = &ic->icmp_id; @@ -1434,19 +1404,19 @@ bool npf_rwricmpid(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, /* Advance and rewrite the ICMP id. */ if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(new_id), &new_id)) - return false; + return -NPF_RC_L4_SHORT; /* Cache: ICMP id */ *old_id = new_id; - return true; + return 0; } /* * Rewrite IPv4 and/or transports checksums based upon provided checksum deltas, * also update the fields in the packet cache. */ -bool +int npf_v4_rwrcksums(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, uint16_t l3_chk_delta, uint16_t l4_chk_delta) { @@ -1464,7 +1434,7 @@ npf_v4_rwrcksums(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, /* Advance to the IPv4 checksum and rewrite it. */ offby = offsetof(struct ip, ip_sum); if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(ipsum), &ipsum)) - return false; + return -NPF_RC_L3_SHORT; ip->ip_sum = ipsum; offby = npf_cache_hlen(npc) - offby; @@ -1485,7 +1455,7 @@ npf_v4_rwrcksums(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, cksum = &uh->check; if (*cksum == 0) { /* No need to update. */ - return true; + return 0; } offby += offsetof(struct udphdr, check); break; @@ -1498,12 +1468,12 @@ npf_v4_rwrcksums(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, break; } case IPPROTO_SCTP: { - return true; + return 0; } case IPPROTO_ICMP: { /* This should never occur (due to having no session) */ if (unlikely(!npf_iscached(npc, NPC_ICMP_ECHO))) - return true; + return 0; struct icmp *ic = &npc->npc_l4.icmp; cksum = &ic->icmp_cksum; offby += offsetof(struct icmp, icmp_cksum); @@ -1514,8 +1484,8 @@ npf_v4_rwrcksums(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, default: /* In case we ever add another L4 port based protocol */ if (npf_iscached(npc, NPC_L4PORTS)) - return false; - return true; + return -NPF_RC_INTL; + return 0; } /* Update the checksum in the cache */ @@ -1523,8 +1493,8 @@ npf_v4_rwrcksums(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, /* Update the checksum in the mbuf */ if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(uint16_t), cksum)) - return false; - return true; + return -NPF_RC_L4_SHORT; + return 0; } /* Convert a string port to a port */ @@ -1580,7 +1550,9 @@ npf_ipv6_is_fragment(struct rte_mbuf *m, uint16_t *npf_flag) * packet will be fully cached, and the packet will not be * cached again. */ - npf_cache_t *n = npf_get_cache(npf_flag, m, htons(ETHER_TYPE_IPv6)); + int rc; + npf_cache_t *n = npf_get_cache(npf_flag, m, + htons(RTE_ETHER_TYPE_IPV6), &rc); if (n && npf_iscached(n, NPC_IPFRAG) && !npf_ip6_has_non_frag_ext_hdrs(n)) @@ -1601,15 +1573,15 @@ static const char *npf_protocol_name[NPF_PROTO_IDX_COUNT + 1] = { }; const char * -npf_get_protocol_name_from_idx(uint8_t proto_idx) +npf_get_protocol_name_from_idx(enum npf_proto_idx proto_idx) { if (proto_idx <= NPF_PROTO_IDX_COUNT) return npf_protocol_name[proto_idx]; - else - return "none"; + + return "none"; } -uint8_t npf_proto_idx_from_str(const char *proto) +enum npf_proto_idx npf_proto_idx_from_str(const char *proto) { uint8_t idx; @@ -1622,7 +1594,7 @@ uint8_t npf_proto_idx_from_str(const char *proto) int npf_prepare_for_l4_header_change(struct rte_mbuf **m, npf_cache_t *npc) { - uint header_len = pktmbuf_l2_len(*m) + npf_cache_hlen(npc); + uint header_len = dp_pktmbuf_l2_len(*m) + npf_cache_hlen(npc); /* Include minimum L4 header for handled L4 protocols. */ switch (npf_cache_ipproto(npc)) { diff --git a/src/npf/npf_cache.h b/src/npf/npf_cache.h index 94159ebf..02f963dd 100644 --- a/src/npf/npf_cache.h +++ b/src/npf/npf_cache.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. */ @@ -62,20 +62,20 @@ #include "in_cksum.h" #include "npf/npf.h" #include "npf/npf_mbuf.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" typedef uint32_t tcp_seq; struct rte_mbuf; -#define npf_iphdr(m) (pktmbuf_mtol3(m, struct iphdr *)) +#define npf_iphdr(m) (dp_pktmbuf_mtol3(m, struct iphdr *)) enum npf_proto_idx { NPF_PROTO_IDX_TCP, NPF_PROTO_IDX_UDP, NPF_PROTO_IDX_ICMP, NPF_PROTO_IDX_OTHER, -}; +} __attribute__ ((__packed__)); #define NPF_PROTO_IDX_FIRST NPF_PROTO_IDX_TCP #define NPF_PROTO_IDX_LAST NPF_PROTO_IDX_OTHER @@ -97,8 +97,8 @@ static inline uint8_t npf_proto_idx_from_proto(uint8_t proto) return NPF_PROTO_IDX_OTHER; } -const char *npf_get_protocol_name_from_idx(uint8_t proto_idx); -uint8_t npf_proto_idx_from_str(const char *proto); +const char *npf_get_protocol_name_from_idx(enum npf_proto_idx proto_idx); +enum npf_proto_idx npf_proto_idx_from_str(const char *proto); /* The SCTP common header - which is all we read */ struct npf_sctp { @@ -206,8 +206,8 @@ typedef struct npf_cache { uint32_t npc_info; /* Information flags */ uint16_t npc_hlen; uint8_t npc_alen; /* Size (v4/6) of addrs */ - uint8_t npc_next_proto; - uint8_t npc_proto_idx; + uint8_t npc_proto_final;/* Last header in chain */ + enum npf_proto_idx npc_proto_idx; uint8_t npc_ipv6_routing_type; uint8_t npc_alg_flags; /* Per-packet alg flags */ @@ -280,17 +280,65 @@ typedef struct npf_cache { int npf_tcpsaw(const npf_cache_t *npc, tcp_seq *seq, tcp_seq *ack, uint32_t *win); -bool npf_fetch_grouper(npf_cache_t *npc, char **ptr); bool npf_fetch_tcpopts(const npf_cache_t *npc, struct rte_mbuf *nbuf, uint16_t *mss, uint8_t *wscale); void *npf_get_tcp_options(npf_cache_t *npc, struct rte_mbuf *nbuf, void *buf); void npf_store_tcp_options(npf_cache_t *npc, struct rte_mbuf *nbuf, void *buf); -bool npf_store_tcp_mss(const npf_cache_t *npc, struct rte_mbuf *nbuf, - uint16_t *mss); void npf_recache_ip_ttl(npf_cache_t *npc, struct rte_mbuf *nbuf); -bool npf_cache_all(npf_cache_t *npc, struct rte_mbuf *nbuf, uint16_t eth_proto); + + +/** + * General routine to cache all relevant IP (v4 or v6) and TCP, UDP or ICMP + * headers. Only called once at top level of NPF processing. + * + * @param npc + * The npf packet cache. + * + * @param nbuf + * The packet. + * + * @param eth_proto + * The ethernet header type field in network byte order. + */ +int npf_cache_all(npf_cache_t *npc, struct rte_mbuf *nbuf, uint16_t eth_proto); + +/** + * Cache all relevant IP (v4 or v6) and TCP, UDP or ICMP headers from a given + * point in a packet. Used to cache packets embedded within ICMP error + * messages, in which case the l4 header may be truncated. + * + * @param npc + * The npf packet cache. + * + * @param n_ptr + * Pointer to the l3 header in the packet. + * + * @param nbuf + * The packet. + * + * @param eth_proto + * The ethernet header type field in network byte order. + */ bool npf_cache_all_at(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, - uint16_t eth_proto, bool icmp_err); + uint16_t eth_proto); + +/** + * Cache all relevant IP (v4 or v6) and TCP, UDP or ICMP headers in a packet + * without updating the cashe grouper data. Not to be used for packets + * embedded within ICMP error messages. + * + * @param npc + * The npf packet cache. + * + * @param nbuf + * The packet. + * + * @param eth_proto + * The ethernet header type field in network byte order. + */ +bool npf_cache_all_nogpr(npf_cache_t *npc, struct rte_mbuf *nbuf, + uint16_t eth_proto); + uint16_t npf_hdrlen(npf_cache_t *npc); uint16_t npf_payload_len(npf_cache_t *npc); uint16_t npf_payload_fetch(npf_cache_t *npc, struct rte_mbuf *nbuf, @@ -305,16 +353,16 @@ void npf_ipv4_cksum(struct rte_mbuf *nbuf, int proto, char *l4hdr); void npf_ipv6_cksum(struct rte_mbuf *nbuf, int proto, char *l4hdr); int npf_update_tcp_cksum(npf_cache_t *npc, struct rte_mbuf *nbuf, uint32_t old_val, uint32_t new_val); -bool npf_rwrip(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, - const int di, const npf_addr_t *addr); +int npf_rwrip(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, + const int di, const npf_addr_t *addr); bool npf_rwrip6(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, const int di, const npf_addr_t *addr); -bool npf_rwrport(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, - const int di, in_port_t port); -bool npf_rwricmpid(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, - uint16_t new_id); -bool npf_v4_rwrcksums(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, - uint16_t l3_chk_delta, uint16_t l4_chk_delta); +int npf_rwrport(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, + const int di, in_port_t port); +int npf_rwricmpid(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, + uint16_t new_id); +int npf_v4_rwrcksums(npf_cache_t *npc, struct rte_mbuf *nbuf, void *n_ptr, + uint16_t l3_chk_delta, uint16_t l4_chk_delta); in_port_t npf_port_from_str(const char *p); npf_cache_t *npf_cache(void); uint16_t npf_cache_mtu(void); @@ -343,7 +391,8 @@ static inline void npf_cache_reset(npf_cache_t *npc) RTE_DECLARE_PER_LCORE(npf_cache_t, npf_cache); static inline npf_cache_t * -npf_get_cache(uint16_t *npf_flag, struct rte_mbuf *m, uint16_t eth_type) +npf_get_cache(uint16_t *npf_flag, struct rte_mbuf *m, uint16_t eth_type, + int *error) { npf_cache_t *n = &RTE_PER_LCORE(npf_cache); @@ -353,8 +402,11 @@ npf_get_cache(uint16_t *npf_flag, struct rte_mbuf *m, uint16_t eth_type) npf_cache_init(n); /* Cache everything. Drop if junk. */ - if (unlikely(!npf_cache_all(n, m, eth_type))) + int rc = npf_cache_all(n, m, eth_type); + if (unlikely(rc < 0)) { + *error = rc; return NULL; + } *npf_flag ^= NPF_FLAG_CACHE_EMPTY; } else { @@ -385,10 +437,10 @@ npf_iscached(const npf_cache_t *npc, const int inf) static inline uint8_t npf_cache_ipproto(const npf_cache_t *npc) { - return npc->npc_next_proto; + return npc->npc_proto_final; } -static inline uint8_t +static inline enum npf_proto_idx npf_cache_proto_idx(const npf_cache_t *npc) { return npc->npc_proto_idx; diff --git a/src/npf/npf_cidr_util.c b/src/npf/npf_cidr_util.c index a6e740f6..12916e67 100644 --- a/src/npf/npf_cidr_util.c +++ b/src/npf/npf_cidr_util.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -48,7 +48,7 @@ #define MAX_BIT_IPV4 31 #define MAX_BIT_IPv6 127 -#define MAX_BIT(_alen) (_alen == 4 ? MAX_BIT_IPV4 : MAX_BIT_IPv6) +#define MAX_BIT(_alen) ((_alen) == 4 ? MAX_BIT_IPV4 : MAX_BIT_IPv6) struct cidr_node { @@ -87,7 +87,7 @@ static inline void clear_bit(uint8_t *a, int bit) * Is bit set in an address? Address is in host byte order. Least * significant bit is bit 0. */ -static inline bool test_bit(uint8_t *a, int bit) +static inline bool test_bit(const uint8_t *a, int bit) { return (a[bit/8] >> bit%8) & 1; } @@ -111,7 +111,7 @@ static void clear_host_bits(uint8_t *a, int alen, int mask) /* * Compare two addresses. Return -1 if a1 < a2, +1 id a1 > a2, 0 id a1 == a2. */ -static int addr_cmp(uint8_t *a1, uint8_t *a2, int alen) +static int addr_cmp(const uint8_t *a1, const uint8_t *a2, int alen) { int i; @@ -119,7 +119,7 @@ static int addr_cmp(uint8_t *a1, uint8_t *a2, int alen) for (i = alen - 1; i >= 0; i--) { if (a1[i] < a2[i]) return -1; - else if (a1[i] > a2[i]) + if (a1[i] > a2[i]) return 1; } return 0; @@ -152,7 +152,7 @@ static void addr_sr(uint8_t *addr, int alen) * Add a2 to a1 and store the result in r. Returns 0 if successful, else -1. * r may point to the same memory as either a1 or a2. */ -static int addr_add(uint8_t *r, uint8_t *a1, uint8_t *a2, int alen) +static int addr_add(uint8_t *r, const uint8_t *a1, const uint8_t *a2, int alen) { int i; uint x, co = 0; @@ -200,7 +200,7 @@ static int addr_incr(uint8_t *addr, int alen) * 0x0A000008 gives 0x00000007 * 0x0A001000 gives 0x00000FFF */ -static void host_mask(uint8_t *addr, uint8_t *mask, int alen) +static void host_mask(const uint8_t *addr, uint8_t *mask, int alen) { int i; @@ -225,7 +225,7 @@ static void host_mask(uint8_t *addr, uint8_t *mask, int alen) /* * Count the number of leading zeros in an address */ -static int addr_clz(uint8_t *addr, int alen) +static int addr_clz(const uint8_t *addr, int alen) { assert(alen >= 4 && (alen & 0x3) == 0); diff --git a/src/npf/npf_cidr_util.h b/src/npf/npf_cidr_util.h index fe8661b8..6a4ecd1e 100644 --- a/src/npf/npf_cidr_util.h +++ b/src/npf/npf_cidr_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ diff --git a/src/npf/npf_cmd.c b/src/npf/npf_cmd.c index d91d1c4a..a17fa254 100644 --- a/src/npf/npf_cmd.c +++ b/src/npf/npf_cmd.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * diff --git a/src/npf/npf_cmd.h b/src/npf/npf_cmd.h index 147da3cd..55a110ee 100644 --- a/src/npf/npf_cmd.h +++ b/src/npf/npf_cmd.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * diff --git a/src/npf/npf_cmd_cfg.c b/src/npf/npf_cmd_cfg.c index 16ac4f41..87abb652 100644 --- a/src/npf/npf_cmd_cfg.c +++ b/src/npf/npf_cmd_cfg.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. + * * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -24,9 +25,9 @@ #include "commands.h" #include "compiler.h" -#include "config.h" +#include "config_internal.h" #include "npf/npf.h" -#include "npf/alg/npf_alg_public.h" +#include "npf/alg/alg_npf.h" #include "npf/config/npf_attach_point.h" #include "npf/config/npf_auto_attach.h" #include "npf/config/npf_config.h" @@ -40,9 +41,13 @@ #include "npf/npf_session.h" #include "npf/npf_state.h" #include "npf/npf_timeouts.h" +#include "npf/npf_vrf.h" #include "npf/rproc/npf_ext_session_limit.h" +#include "npf/zones/npf_zone_public.h" +#include "npf/app_group/app_group_cmd.h" #include "util.h" #include "vplane_log.h" +#include "qos_public.h" #define NPF_MAX_CMDLINE 1024 @@ -109,8 +114,10 @@ cmd_npf_addrgrp_create(FILE *f, int argc, char **argv) return -EEXIST; } - /* Create table and insert into the address-group tableset */ - t = npf_addrgrp_create(name); + /* + * Create an address-group and insert into the address-group tableset + */ + t = npf_addrgrp_cfg_add(name); if (t == NULL) { RTE_LOG(ERR, DATAPLANE, "Could not create npf address-group \"%s\"\n", @@ -144,10 +151,10 @@ cmd_npf_addrgrp_delete(FILE *f, int argc, char **argv) } /* - * Remove the table from the hash table immediately. Remove from - * tableset and free after RCU grace period. + * Remove from tableset immediately. Only free memory when ref count + * is zero. */ - npf_addrgrp_destroy(name); + npf_addrgrp_cfg_delete(name); return 0; } @@ -217,16 +224,9 @@ cmd_npf_addrgrp_entry_add(FILE *f, int argc, char **argv) /* Does this address-group exist? */ if (npf_addrgrp_lookup_name(name) == NULL) { - struct npf_addrgrp *t; - - /* Create table and insert into the address-group tableset */ - t = npf_addrgrp_create(name); - if (t == NULL) { - RTE_LOG(ERR, DATAPLANE, - "Could not create npf address-group \"%s\"\n", - name); - return -ENOSPC; - } + RTE_LOG(ERR, DATAPLANE, "address-group \"%s\" does not exist\n", + name); + return -ENOENT; } /* masklen will be set to NPF_NO_NETMASK if no mask is present */ @@ -346,24 +346,37 @@ static int cmd_npf_global_timeout(FILE *f, int argc, char **argv) { vrfid_t vrfid; - uint8_t s; + struct vrf *vrf = NULL; char *p; uint32_t tout; - uint8_t proto_index; + enum npf_proto_idx proto_idx; enum npf_timeout_action action; + struct npf_timeout *to; + int rc = -1; if (argc < 5) { npf_cmd_err(f, "%s", npf_cmd_str_missing_arg); - return -1; + goto end; } /* Parse vrf id */ vrfid = strtoul(argv[0], NULL, 10); if ((vrfid == VRF_INVALID_ID) || (vrfid >= VRF_ID_MAX)) { npf_cmd_err(f, "%s", "invalid global timeout VRF"); - return -1; + goto end; } + /* + * We can race with VRF creation, so manage VRF reference counts + * to maintain state. Take a temp reference on the vrf. + */ + vrf = vrf_find_or_create(vrfid); + if (!vrf) + goto end; + + to = vrf_get_npf_timeout_rcu(vrfid); + if (!to) + goto end; /* Parse action */ if (!strcmp(argv[1], "update")) @@ -372,30 +385,72 @@ cmd_npf_global_timeout(FILE *f, int argc, char **argv) action = TIMEOUT_DEL; else { npf_cmd_err(f, "%s", "invalid global timeout action"); - return -1; + goto end; } /* Parse protocol */ - proto_index = npf_proto_idx_from_str(argv[2]); - if (proto_index == NPF_PROTO_IDX_NONE) { + proto_idx = npf_proto_idx_from_str(argv[2]); + if (proto_idx == NPF_PROTO_IDX_NONE) { npf_cmd_err(f, "%s", "invalid global timeout protocol"); - return -1; + goto end; } - if (proto_index == NPF_PROTO_IDX_TCP) - s = npf_map_str_to_tcp_state(argv[3]); - else - s = npf_map_str_to_generic_state(argv[3]); - /* Parse timeout */ tout = strtoul(argv[4], &p, 10); if (*p != '\0') { npf_cmd_err(f, "%s", "invalid global timeout value"); - return -1; + goto end; } + if (proto_idx == NPF_PROTO_IDX_TCP) { + enum tcp_session_state state; - return npf_timeout_set(vrfid, action, proto_index, s, tout); + state = npf_map_str_to_tcp_state(argv[3]); + + if (state == NPF_TCPS_NONE) { + npf_cmd_err(f, "%s", "invalid state name"); + goto end; + } + + /* Set the TCP timeout */ + rc = npf_tcp_timeout_set(to, state, tout); + if (rc < 0) + goto end; + } else { + enum dp_session_state state; + + state = dp_session_name2state(argv[3]); + + if (state == SESSION_STATE_NONE) { + npf_cmd_err(f, "%s", "invalid state name"); + goto end; + } + + /* Set the non-TCP timeout */ + rc = npf_gen_timeout_set(to, proto_idx, state, tout); + if (rc < 0) + goto end; + } + + /* Take/release permanent refernece on the vrf */ + switch (action) { + case TIMEOUT_SET: + vrf_find_or_create(vrfid); /* Inc on set */ + to->to_set_count++; + break; + case TIMEOUT_DEL: + vrf_delete_by_ptr(vrf); /* Dec on reset */ + to->to_set_count--; + break; + }; + rc = 0; + +end: + /* Always release temp vrf reference */ + if (vrf) + vrf_delete_by_ptr(vrf); + + return rc; } /* @@ -449,7 +504,8 @@ cmd_add_rule(FILE *f, int argc, char **argv) if (strcmp(argv[1], "0") != 0) { npf_cmd_err(f, "invalid index: %s", argv[1]); return -1; - } else if (group_class == NPF_RULE_CLASS_ACL) + } + if (group_class == NPF_RULE_CLASS_ACL) index = UINT32_MAX; } @@ -497,6 +553,9 @@ cmd_add_rule(FILE *f, int argc, char **argv) if (group_class == NPF_RULE_CLASS_APP_FW) npf_dirty_app_fw_users(); + if (group_class == NPF_RULE_CLASS_DSCP_GROUP) + qos_sched_res_grp_update(group); + return 0; } @@ -533,7 +592,7 @@ cmd_delete_rule(FILE *f, int argc, char **argv) index = (uint32_t)strtoul(argv[1], NULL, 10); if (index == 0) { - if (strcmp(argv[1], "0")) { + if (strcmp(argv[1], "0") != 0) { npf_cmd_err(f, "invalid index: %s", argv[1]); return -1; } @@ -657,6 +716,70 @@ cmd_commit(FILE *f, int argc, char **argv __unused) return 0; } +static int +cmd_npf_zone_add(FILE *f, int argc, char **argv) +{ + return npf_zone_cfg_add(f, argc, argv); +} + +static int +cmd_npf_zone_remove(FILE *f, int argc, char **argv) +{ + return npf_zone_cfg_remove(f, argc, argv); +} + +static int +cmd_npf_zone_local(FILE *f, int argc, char **argv) +{ + return npf_zone_cfg_local(f, argc, argv); +} + +static int +cmd_npf_zone_policy_add(FILE *f, int argc, char **argv) +{ + return npf_zone_cfg_policy_add(f, argc, argv); +} + +static int +cmd_npf_zone_policy_remove(FILE *f, int argc, char **argv) +{ + return npf_zone_cfg_policy_remove(f, argc, argv); +} + +static int +cmd_npf_zone_intf_add(FILE *f, int argc, char **argv) +{ + return npf_zone_cfg_intf_add(f, argc, argv); +} + +static int +cmd_npf_zone_intf_remove(FILE *f, int argc, char **argv) +{ + return npf_zone_cfg_intf_remove(f, argc, argv); +} + +static int +cmd_app_group_add(FILE *f, int argc, char **argv) +{ + if (argc != 2) { + npf_cmd_err(f, "%s", "invalid argument count: should be 2"); + return -1; + } + + return app_group_add(argv[0], argv[1]); +} + +static int +cmd_app_group_del(FILE *f, int argc, char **argv) +{ + if (argc != 1) { + npf_cmd_err(f, "%s", "invalid argument count: should be 1"); + return -1; + } + + return app_group_del(argv[0]); +} + enum { FW_ALG, FW_TABLE_CREATE, @@ -672,6 +795,15 @@ enum { FW_GLOBAL_TCPSTRICT_ENABLE, FW_GLOBAL_TCPSTRICT_DISABLE, FW_GLOBAL_TIMEOUT, + FW_ZONE_ADD, + FW_ZONE_REMOVE, + FW_ZONE_LOCAL, + FW_ZONE_INTF_ADD, + FW_ZONE_INTF_REMOVE, + FW_ZONE_POLICY_ADD, + FW_ZONE_POLICY_REMOVE, + FW_APP_GROUP_ADD, + FW_APP_GROUP_DEL, ADD_RULE, DELETE_RULE, ATTACH_GROUP, @@ -749,6 +881,42 @@ static const struct npf_command npf_cmd_cfg[] = { .tokens = "fw global timeout", .handler = cmd_npf_global_timeout, }, + [FW_ZONE_ADD] = { + .tokens = "zone add", + .handler = cmd_npf_zone_add, + }, + [FW_ZONE_REMOVE] = { + .tokens = "zone remove", + .handler = cmd_npf_zone_remove, + }, + [FW_ZONE_LOCAL] = { + .tokens = "zone local", + .handler = cmd_npf_zone_local, + }, + [FW_ZONE_POLICY_ADD] = { + .tokens = "zone policy add", + .handler = cmd_npf_zone_policy_add, + }, + [FW_ZONE_POLICY_REMOVE] = { + .tokens = "zone policy remove", + .handler = cmd_npf_zone_policy_remove, + }, + [FW_ZONE_INTF_ADD] = { + .tokens = "zone intf add", + .handler = cmd_npf_zone_intf_add, + }, + [FW_ZONE_INTF_REMOVE] = { + .tokens = "zone intf remove", + .handler = cmd_npf_zone_intf_remove, + }, + [FW_APP_GROUP_ADD] = { + .tokens = "app-grp add", + .handler = cmd_app_group_add, + }, + [FW_APP_GROUP_DEL] = { + .tokens = "app-grp del", + .handler = cmd_app_group_del, + }, [ADD_RULE] = { .tokens = "add", .handler = cmd_add_rule, diff --git a/src/npf/npf_cmd_op.c b/src/npf/npf_cmd_op.c index 767adf87..0451b678 100644 --- a/src/npf/npf_cmd_op.c +++ b/src/npf/npf_cmd_op.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -21,24 +21,26 @@ #include "commands.h" #include "control.h" #include "compiler.h" -#include "npf/alg/npf_alg_public.h" +#include "npf/alg/alg_npf.h" #include "npf/config/npf_attach_point.h" #include "npf/config/npf_config.h" #include "npf/config/npf_config_state.h" #include "npf/config/npf_dump.h" #include "npf/config/npf_rule_group.h" #include "npf/config/npf_ruleset_type.h" -#include "npf/config/pmf_att_rlgrp.h" +#include "npf/config/gpc_acl_cli.h" #include "npf/npf_addrgrp.h" #include "npf/npf_apm.h" #include "npf/npf_cmd.h" #include "npf/npf_session.h" #include "npf/npf_state.h" #include "npf/npf_timeouts.h" +#include "npf/npf_rc.h" +#include "npf/zones/npf_zone_public.h" #include "npf/rproc/npf_ext_session_limit.h" #include "npf/rproc/npf_ext_nptv6.h" #include "npf_shim.h" -#include "vrf.h" +#include "vrf_internal.h" static zhash_t *g_npf_op_cmds; @@ -71,7 +73,7 @@ cmd_acl_show_counters(FILE *f, int argc, char **argv) if (argc > 2) rgname = argv[2]; - return pmf_arlg_cmd_show_counters(f, ifname, dir, rgname); + return gpc_acl_cmd_show_counters(f, ifname, dir, rgname); } static int @@ -103,7 +105,7 @@ cmd_acl_clear_counters(FILE *f, int argc, char **argv) if (argc > 2) rgname = argv[2]; - return pmf_arlg_cmd_clear_counters(ifname, dir, rgname); + return gpc_acl_cmd_clear_counters(ifname, dir, rgname); } static int @@ -137,7 +139,7 @@ cmd_dump_groups(FILE *f, int argc __unused, char **argv __unused) static int cmd_dump_acls(FILE *f, int argc __unused, char **argv __unused) { - pmf_arlg_dump(f); + gpc_acl_dump(f); return 0; } @@ -148,6 +150,15 @@ cmd_dump_attach_points(FILE *f, int argc __unused, char **argv __unused) return 0; } +/* + * show one or more zones + */ +static int +cmd_show_zones(FILE *f, int argc, char **argv) +{ + return npf_zone_show(f, argc, argv); +} + static int get_ruleset_selection(FILE *f, struct ruleset_select *sel, int argc, char **argv) { @@ -313,32 +324,26 @@ cmd_flush_rulesets(FILE *f, int argc __unused, char **argv __unused) } /* - * npf fw show address-group - * af={all|ipv4|ipv6} - * list={all|none|list-only} - * tree={all|none} - * id= or name= - * - * Default is: - * npf fw show address-group af=all list=all tree=none id=0 - * - * If an address-group name is specified then just that address-group is - * returned. - * - * If the user want to fetch *all* groups, then multiple commands are - * required. Table ID should initially set to 0, and then set it to the last - * fetched ID plus 1 for subsequent calls. So for example, the initial call - * with id 0 might return 2 so the next call should use id 3. + * Show address groups */ - -/* Parse address-group args */ static int cmd_npf_show_addrgrp_args(int argc, char **argv, struct npf_show_ag_ctl *ctl) { - char *endp, *p, *a; + /* Set defaults */ + ctl->af[AG_IPv4] = true; + ctl->af[AG_IPv6] = true; + ctl->detail = false; + ctl->brief = false; + ctl->tree = false; + ctl->optimal = false; + ctl->name = NULL; while (argc > 0) { - /* = */ + char *a, *p; + + /* + * Separate parameter and argument + */ p = strdupa(argv[0]); if (!p) break; @@ -350,40 +355,24 @@ cmd_npf_show_addrgrp_args(int argc, char **argv, struct npf_show_ag_ctl *ctl) a += 1; if (!strcmp(p, "af")) { - if (!strcmp(a, "ipv4")) { - ctl->af[AG_IPv4] = true; + if (!strcmp(a, "ipv4")) ctl->af[AG_IPv6] = false; - } else if (!strcmp(a, "ipv6")) { + else if (!strcmp(a, "ipv6")) ctl->af[AG_IPv4] = false; - ctl->af[AG_IPv6] = true; - } else if (!strcmp(a, "all")) { - ctl->af[AG_IPv6] = true; - ctl->af[AG_IPv4] = true; - } - } else if (!strcmp(p, "list")) { - if (!strcmp(a, "none")) - ctl->list = false; - else if (!strcmp(a, "list-only")) { - ctl->list = true; - ctl->range_pfxs = false; - } else if (!strcmp(a, "all")) { - ctl->list = true; - ctl->range_pfxs = true; - } - } else if (!strcmp(p, "tree")) { - if (!strcmp(a, "all")) - ctl->tree = true; - else if (!strcmp(a, "none")) - ctl->tree = false; - } else if (!strcmp(p, "id")) { - ctl->tid = strtoul(a, &endp, 10); - if (*endp) - /* Invalid number. */ - return -1; + } else if (!strcmp(p, "name")) { ctl->name = strdup(a); - } else - break; + + } else if (!strcmp(p, "option")) { + if (!strcmp(a, "detail")) + ctl->detail = true; + else if (!strcmp(a, "brief")) + ctl->brief = true; + else if (!strcmp(a, "tree")) + ctl->tree = true; + else if (!strcmp(a, "optimal")) + ctl->optimal = true; + } argc--; argv++; @@ -397,45 +386,10 @@ cmd_npf_show_addrgrp(FILE *f, int argc, char **argv) { struct npf_show_ag_ctl ctl = {0}; - /* Default to show everything */ - ctl.af[AG_IPv4] = true; - ctl.af[AG_IPv6] = true; - ctl.list = true; - ctl.range_pfxs = false; - ctl.tree = false; - ctl.tid = 0; - + /* Parse args */ cmd_npf_show_addrgrp_args(argc, argv, &ctl); - npf_addrgrp_show_json(f, &ctl); - if (ctl.name) - free(ctl.name); - - return 0; -} - -/* - * npf fw show address-group optimal af={ipv4|ipv6} {} - * - * Only handles one address family and address-group per call. - */ -static int -cmd_npf_show_addrgrp_opt(FILE *f, int argc, char **argv) -{ - struct npf_show_ag_ctl ctl = {0}; - - cmd_npf_show_addrgrp_args(argc, argv, &ctl); - - if (ctl.name == NULL) { - npf_cmd_err(f, "No name specified"); - return -1; - } - if (ctl.af[AG_IPv4] == ctl.af[AG_IPv6]) { - npf_cmd_err(f, "IPv4 or IPv6 should be specified"); - return -1; - } - - npf_addrgrp_show_json_opt(f, &ctl); + npf_addrgrp_show(f, &ctl); if (ctl.name) free(ctl.name); @@ -513,14 +467,16 @@ enum { FW_SHOW_SESSION_LIMIT, FW_CLEAR_SESSION_LIMIT, FW_SHOW_ADDRGRP, - FW_SHOW_ADDRGRP_OPT, PORTMAP_CLEAR, PORTMAP_DUMP, DUMPALG, DUMP_GROUPS, DUMP_ACLS, DUMP_ATTACH_POINTS, + SHOW_ZONES, SHOW_STATE, + RC_SHOW_COUNTERS, + RC_CLEAR_COUNTERS, SHOW, CLEAR, FLUSH, @@ -561,10 +517,6 @@ static const struct npf_command npf_cmd_op[] = { .tokens = "fw show address-group", .handler = cmd_npf_show_addrgrp, }, - [FW_SHOW_ADDRGRP_OPT] = { - .tokens = "fw show address-group optimal", - .handler = cmd_npf_show_addrgrp_opt, - }, [PORTMAP_CLEAR] = { .tokens = "fw portmap clear", .handler = cmd_npf_clear_portmap, @@ -589,6 +541,18 @@ static const struct npf_command npf_cmd_op[] = { .tokens = "dump attach-points", .handler = cmd_dump_attach_points, }, + [SHOW_ZONES] = { + .tokens = "show zones", + .handler = cmd_show_zones, + }, + [RC_SHOW_COUNTERS] = { + .tokens = "rc show counters", + .handler = npf_show_rc_counts, + }, + [RC_CLEAR_COUNTERS] = { + .tokens = "rc clear counters", + .handler = npf_clear_rc_counts, + }, [SHOW_STATE] = { .tokens = "state", .handler = cmd_show_ruleset_state, diff --git a/src/npf/npf_dataplane_session.c b/src/npf/npf_dataplane_session.c index da0124f2..7379e7b5 100644 --- a/src/npf/npf_dataplane_session.c +++ b/src/npf/npf_dataplane_session.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -15,9 +15,11 @@ #include "npf/npf_if.h" #include "npf/npf_cache.h" +#include "npf/npf_rc.h" #include "npf/npf_session.h" #include "npf/npf_nat.h" #include "npf/npf_nat64.h" +#include "npf/npf_state.h" #include "npf/npf_dataplane_session.h" /* Initial session creation timeout - can be virtually anything */ @@ -61,7 +63,13 @@ static int nat_session_establish(npf_cache_t *npc, struct rte_mbuf *nbuf, return rc; /* Mark this session as containing NAT */ - session_set_nat(*ss); + if (npf_nat_type(nt) == NPF_NATOUT) { + session_set_snat(*ss); + session_set_out(*ss); + } else { + session_set_dnat(*ss); + session_set_in(*ss); + } return 0; } @@ -99,20 +107,28 @@ static void dps_feature_log(enum session_log_event event, struct session *s, npf_session_feature_log(event, s, sf); } +static int dps_feature_nat_info(void *data, uint32_t *taddr, uint16_t *tport) +{ + npf_session_t *se = data; + return npf_session_feature_nat_info(se, taddr, tport); +} + /* Callbacks for the npf_session_t */ static const struct session_feature_ops ops = { .expired = dps_feature_expire, .destroy = dps_feature_destroy, .json = dps_feature_json, .log = dps_feature_log, + .nat_info = dps_feature_nat_info, }; /* - * Create a dataplane session and add the - * npf session as a feature. + * Create a dataplane session and add the npf session as a feature. Returns 0 + * for success or -NPF_RC_DP_SESS_ESTB for failure. */ int npf_dataplane_session_establish(npf_session_t *se, npf_cache_t *npc, - struct rte_mbuf *nbuf, const struct ifnet *ifp) + struct rte_mbuf *nbuf, + const struct ifnet *ifp, bool out) { npf_nat_t *nt = npf_session_get_nat(se); @@ -135,14 +151,19 @@ int npf_dataplane_session_establish(npf_session_t *se, npf_cache_t *npc, if (rc) { npf_session_destroy(se); - return rc; + return -NPF_RC_DP_SESS_ESTB; } + /* Get a custom session timeout, if configured */ + timeout = npf_state_get_custom_timeout(ifp->if_vrfid, npc, nbuf); + if (timeout) + session_set_custom_timeout(s, timeout); + /* Cache dataplane session on npf session */ npf_session_set_dp_session(se, s); /* Update the dataplane state/timeout */ - npf_session_update_state(se); + npf_session_update_state(se, s); /* Now add the npf session as a feature datum */ rc = session_feature_add(s, ifp->if_index, SESSION_FEATURE_NPF, se); @@ -151,6 +172,19 @@ int npf_dataplane_session_establish(npf_session_t *se, npf_cache_t *npc, goto bad; } + /* Mark direction on dataplane session */ + if (out) + session_set_out(s); + else + session_set_in(s); + + if (npf_session_is_fw(se)) + session_set_fw(s); + + /* If this is a DPI session, then mark it as such. */ + if (npf_session_get_dpi(se)) + session_set_app(npf_session_get_dp_session(se)); + /* * If this is an ALG secondary session, link, * and mark it as such. @@ -162,6 +196,8 @@ int npf_dataplane_session_establish(npf_session_t *se, npf_cache_t *npc, if (rc) goto bad; + /* Mark both the parent and child as alg sessions */ + session_set_alg(npf_session_get_dp_session(parent)); session_set_alg(s); } @@ -186,12 +222,12 @@ int npf_dataplane_session_establish(npf_session_t *se, npf_cache_t *npc, else session_set_nat46(s); } - - return rc; + assert(rc == 0); + return 0; bad: if (created) session_expire(s, nbuf); - return rc; + return -NPF_RC_DP_SESS_ESTB; } static void __attribute__((constructor)) npf_dataplane_session_init(void) diff --git a/src/npf/npf_dataplane_session.h b/src/npf/npf_dataplane_session.h index 90bc02de..83366456 100644 --- a/src/npf/npf_dataplane_session.h +++ b/src/npf/npf_dataplane_session.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -18,6 +18,7 @@ /* Protos */ int npf_dataplane_session_establish(npf_session_t *se, npf_cache_t *npc, - struct rte_mbuf *nbuf, const struct ifnet *ifp); + struct rte_mbuf *nbuf, + const struct ifnet *ifp, bool out); #endif /* NPF_DATAPLANE_SESSION_H */ diff --git a/src/npf/npf_disassemble.c b/src/npf/npf_disassemble.c index 98559d5d..81a9a47d 100644 --- a/src/npf/npf_disassemble.c +++ b/src/npf/npf_disassemble.c @@ -127,8 +127,14 @@ static const struct npf_instruction { [0] = NPF_OPERAND_REL_ADDRESS, }, }, - [NPF_OPCODE_PROTO] = { - .name = "test proto", + [NPF_OPCODE_PROTO_FINAL] = { + .name = "test proto-final", + .op = { + [0] = NPF_OPERAND_VALUE, + }, + }, + [NPF_OPCODE_PROTO_BASE] = { + .name = "test proto-base", .op = { [0] = NPF_OPERAND_VALUE, }, @@ -240,30 +246,6 @@ static const struct npf_instruction { static uint npf_instruction_size = ARRAY_SIZE(npf_instructions); -/* - * Get the number of operands for a specific opcode - */ -uint -npf_ncode_opcode_noperands(enum npf_opcode_type_enum opcode) -{ - const struct npf_instruction *insn; - enum npf_operand_type_enum operand; - uint noperands, i; - - if (opcode > NPF_OPCODE_MAX) - return 0; - - insn = &npf_instructions[opcode]; - - noperands = 0; - for (i = 0; i < ARRAY_SIZE(insn->op); i++) { - operand = insn->op[i]; - if (operand <= NPF_OPERAND_MAX) - noperands += npf_operand_nwords[operand]; - } - return noperands; -} - static void npf_tcpflags2str(char *buf, unsigned int tfl) { @@ -408,6 +390,12 @@ npf_ncode_operand(uint8_t operand, const uint32_t *nc_base, case NPF_OPERAND_ICMP_TYPE_CODE: { const uint32_t op = *nc_pc; + if (op & NC_ICMP_HAS_CLASS) + buf_app_printf(buf, used_buf_len, total_buf_len, + "class=%s", + NC_ICMP_GET_TYPE_FROM_OP(op) ? + "error" : "info"); + if (op & NC_ICMP_HAS_TYPE) buf_app_printf(buf, used_buf_len, total_buf_len, "%u", NC_ICMP_GET_TYPE_FROM_OP(op)); diff --git a/src/npf/npf_disassemble.h b/src/npf/npf_disassemble.h index cf01ed13..29382fd9 100644 --- a/src/npf/npf_disassemble.h +++ b/src/npf/npf_disassemble.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. */ @@ -50,7 +50,5 @@ /* Forward Declarations */ typedef struct json_writer json_writer_t; -uint npf_ncode_opcode_noperands(enum npf_opcode_type_enum opcode); - void npf_json_ncode(const void *nc, size_t len, json_writer_t *json); #endif /* NPF_DISASSEMBLE_H */ diff --git a/src/npf/npf_event.h b/src/npf/npf_event.h index d729aa83..46f2f342 100644 --- a/src/npf/npf_event.h +++ b/src/npf/npf_event.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * diff --git a/src/npf/npf_grouper.c b/src/npf/npf_grouper.c new file mode 100644 index 00000000..1c7b7bbe --- /dev/null +++ b/src/npf/npf_grouper.c @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include "npf_grouper.h" +#include "grouper2.h" +#include "npf_rule_gen.h" + +/* + * Packet matching callback functions which use the grouper2 API + */ + +int npf_grouper_init(int af, g2_config_t **g_ctx) +{ + if (af == AF_INET) + *g_ctx = g2_init(NPC_GPR_SIZE_v4); + else + *g_ctx = g2_init(NPC_GPR_SIZE_v6); + + if (!*g_ctx) + return -ENOMEM; + + return 0; +} + +int npf_grouper_add_rule(int af, g2_config_t *g_ctx, uint32_t rule_no, + uint8_t *match_addr, uint8_t *mask, + void *match_ctx) +{ + if (!g2_create_rule(g_ctx, rule_no, match_ctx)) + return -ENOMEM; + + if (!g2_add(g_ctx, 0, + (af == AF_INET ? NPC_GPR_SIZE_v4 : NPC_GPR_SIZE_v6), + match_addr, mask)) + return -EINVAL; + + return 0; +} + +int npf_grouper_build(g2_config_t **g_ctx) +{ + g2_optimize(g_ctx); + + return 0; +} + +int npf_grouper_match(int af, g2_config_t *g_ctx, npf_cache_t *npc, + void *data, npf_rule_t **rl) +{ + uint8_t *pkt; + + if (unlikely(!npc)) + return 0; + + pkt = (uint8_t *)npc->npc_grouper; + if (af == AF_INET) + *rl = g2_eval4(g_ctx, pkt, data); + else + *rl = g2_eval6(g_ctx, pkt, data); + if (*rl) + return 1; + + return 0; +} + +int npf_grouper_destroy(g2_config_t **g_ctx) +{ + /* Release groupers */ + g2_destroy(g_ctx); + + return 0; +} diff --git a/src/npf/npf_grouper.h b/src/npf/npf_grouper.h new file mode 100644 index 00000000..d18c7be8 --- /dev/null +++ b/src/npf/npf_grouper.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef NPF_GROUPER_H +#define NPF_GROUPER_H + +/* Forward declarations */ +typedef struct npf_rule npf_rule_t; + +#include +#include "grouper2.h" + +int npf_grouper_init(int af, g2_config_t **g_ctx); + +int npf_grouper_add_rule(int af, g2_config_t *g_ctx, uint32_t rule_no, + uint8_t *match_add, uint8_t *mask, + void *match_ctx); + +int npf_grouper_build(g2_config_t **g_ctx); + +int npf_grouper_match(int af, g2_config_t *g_ctx, npf_cache_t *npc, + void *data, npf_rule_t **rl); + +int npf_grouper_destroy(g2_config_t **g_ctx); + +#endif diff --git a/src/npf/npf_icmp.c b/src/npf/npf_icmp.c index 93d00d9b..57cd2a29 100644 --- a/src/npf/npf_icmp.c +++ b/src/npf/npf_icmp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -22,7 +22,8 @@ #include "npf/npf_icmp.h" #include "npf/npf_mbuf.h" #include "npf/npf_nat.h" -#include "pktmbuf.h" +#include "npf/npf_rc.h" +#include "pktmbuf_internal.h" struct ifnet; struct npf_instance; @@ -40,13 +41,13 @@ npf_icmp_err_session_find(int di, struct rte_mbuf *nbuf, npf_cache_t *npc, /* Only valid for IPv4/IPv6 */ if (npf_iscached(npc, NPC_IP4)) - ether_proto = htons(ETHER_TYPE_IPv4); + ether_proto = htons(RTE_ETHER_TYPE_IPV4); else if (npf_iscached(npc, NPC_IP6)) - ether_proto = htons(ETHER_TYPE_IPv6); + ether_proto = htons(RTE_ETHER_TYPE_IPV6); else return NULL; - void *n_ptr = pktmbuf_mtol3(nbuf, char *) + npf_cache_hlen(npc); + void *n_ptr = dp_pktmbuf_mtol3(nbuf, char *) + npf_cache_hlen(npc); /* Find the start of the packet embedded in the ICMP error. */ n_ptr = nbuf_advance(&nbuf, n_ptr, ICMP_MINLEN); @@ -56,9 +57,10 @@ npf_icmp_err_session_find(int di, struct rte_mbuf *nbuf, npf_cache_t *npc, /* Init the embedded npc. */ npf_cache_t enpc; npf_cache_init(&enpc); + enpc.npc_srcdst = NULL; /* Inspect the embedded packet. */ - if (!npf_cache_all_at(&enpc, nbuf, n_ptr, ether_proto, true)) + if (!npf_cache_all_at(&enpc, nbuf, n_ptr, ether_proto)) return NULL; /* @@ -92,38 +94,39 @@ npf_icmpv4_err_nat(npf_cache_t *npc, const int di) { if (!npc || !di || !ifp || !(*mbuf)) - return 1; + goto error; if (pktmbuf_prepare_for_header_change(mbuf, 0) != 0) - return 1; + goto error; struct rte_mbuf *m0 = *mbuf; struct rte_mbuf *m = m0; - void *n_ptr = pktmbuf_mtol3(m, char *) + npf_cache_hlen(npc); + void *n_ptr = dp_pktmbuf_mtol3(m, char *) + npf_cache_hlen(npc); /* Find the start of the packet embedded in the ICMP error. */ n_ptr = nbuf_advance(&m, n_ptr, ICMP_MINLEN); if (!n_ptr) - return 1; + goto error; /* Init the embedded npc. */ npf_cache_t enpc; npf_cache_init(&enpc); /* Inspect the embedded packet. */ - if (!npf_cache_all_at(&enpc, m, n_ptr, htons(ETHER_TYPE_IPv4), true)) - return 1; + if (!npf_cache_all_at(&enpc, m, n_ptr, + htons(RTE_ETHER_TYPE_IPV4))) + goto error; /* Sanity checks - these should never occur */ if (!npf_iscached(&enpc, NPC_IP4)) - return 1; + goto error; if (enpc.npc_info & NPC_ICMP_ERR) - return 1; + goto error; /* Find the session for the embedded packet */ npf_session_t *se = npf_session_find_by_npc(&enpc, di, ifp, true); if (!se) - return 1; + goto error; /* * For payloads which use a pseudo header, the final ICMP header @@ -177,7 +180,7 @@ npf_icmpv4_err_nat(npf_cache_t *npc, int error = npf_nat_untranslate_at(&enpc, m, nt, !forw, di ^ PFIL_ALL, n_ptr); if (error) - return 1; + goto error; /* * With the embedded packet having now been translated, we adjust the @@ -189,16 +192,16 @@ npf_icmpv4_err_nat(npf_cache_t *npc, npf_addr_t outer_addr; memcpy(&outer_addr, dnat ? embed_src : embed_dst, sizeof(uint32_t)); - n_ptr = pktmbuf_mtol3(m0, void *); + n_ptr = dp_pktmbuf_mtol3(m0, void *); if (!npf_nat_translate_l3_at(npc, m0, n_ptr, dnat, &outer_addr)) - return 1; + goto error; /* * Cannot use deltas for the ICMP checksum for truncated * ICMP error packets, so calculate it over all the data. */ if (enpc.npc_info & NPC_SHORT_ICMP_ERR) { - char *start_icmp = pktmbuf_mtol4(m, char *); + char *start_icmp = dp_pktmbuf_mtol4(m, char *); npf_ipv4_cksum(m, IPPROTO_ICMP, start_icmp); @@ -230,7 +233,7 @@ npf_icmpv4_err_nat(npf_cache_t *npc, unsigned int offby = npf_cache_hlen(npc); offby += offsetof(struct icmp, icmp_cksum); if (nbuf_advstore(&m0, &n_ptr, offby, sizeof(*cksum), cksum)) - return 1; + goto error; } /* @@ -240,6 +243,8 @@ npf_icmpv4_err_nat(npf_cache_t *npc, npc->npc_info &= ~NPC_ICMP_ERR_NAT; return 0; +error: + return -NPF_RC_ICMP_ERR_NAT; } int __noinline diff --git a/src/npf/npf_icmp.h b/src/npf/npf_icmp.h index b72547b5..6132792d 100644 --- a/src/npf/npf_icmp.h +++ b/src/npf/npf_icmp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * diff --git a/src/npf/npf_if.c b/src/npf/npf_if.c index d48f2c44..3f9b162f 100644 --- a/src/npf/npf_if.c +++ b/src/npf/npf_if.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -27,9 +27,10 @@ #include "npf/config/npf_ruleset_type.h" #include "npf/npf_if.h" #include "npf/npf_ruleset.h" +#include "npf/zones/npf_zone_public.h" +#include "npf/npf_rc.h" #include "npf/npf_session.h" #include "npf/npf_vrf.h" -#include "npf/cgnat/cgn_if.h" #include "util.h" #include "vplane_log.h" @@ -52,12 +53,13 @@ struct npf_if_internal { uint32_t niif_refcnt; uint32_t niif_flags; - struct cgn_intf *niif_cgn; + struct npf_zone_intf *niif_zif; struct cds_list_head niif_list; /* * Per interface ruleset count. Used for rulesets that attach to an - * interface. This includes: + * interface, and for zone rulesets (for which interfaces are + * assigned). The former includes: * * fw-in, fw-out, dnat, snat, nat64, pbr, nptv6-in, nptv6-out * bridge, session-rproc, portmonitor-in, portmonitor-out @@ -70,6 +72,9 @@ struct npf_if_internal { * so that rulesets can use the variable that way if they so choose. */ uint16_t niif_rs_count[NPF_RS_TYPE_COUNT]; + + /* Per-core return code counters */ + struct npf_rc_counts *niif_rcc; }; /* Forward reference */ @@ -89,6 +94,8 @@ static rte_spinlock_t niif_lock = RTE_SPINLOCK_INITIALIZER; static void npf_if_dealloc(struct npf_if_internal *niif) { + npf_rc_counts_destroy(&niif->niif_rcc); + free(niif); /* call_rcu not required */ } @@ -117,12 +124,13 @@ npf_if_gc(struct rte_timer *t __rte_unused, void *arg __rte_unused) /* * Take a reference on an NPF interface structure. If it does not exist, then - * create it. May be called from either forwarding thread or master loop. + * create it. May be called from either forwarding thread or main loop. * This is called when: * * 1. After DP_EVT_IF_INDEX_SET event if interface has interface attach points - * 2. A session is activated on an interface - * 3. npf config (e.g. nat64) on a different interface requires npf features + * 2. An interface is set into a zone + * 3. A session is activated on an interface + * 4. npf config (nat64 or zone) on a different interface requires npf features * to be enabled on all other interfaces * * initial_sess_count should be set to 1 for item #3, otherwise is should be @@ -149,6 +157,12 @@ npf_if_niif_create(struct ifnet *ifp, uint32_t initial_sess_count) CDS_INIT_LIST_HEAD(&niif->niif_list); rcu_assign_pointer(ifp->if_npf, &niif->niif_if); + + /* Return code counters */ + struct npf_rc_counts *rcc; + + rcc = npf_rc_counts_create(); + rcu_assign_pointer(niif->niif_rcc, rcc); } niif->niif_refcnt++; @@ -163,7 +177,7 @@ npf_if_niif_create(struct ifnet *ifp, uint32_t initial_sess_count) /* * Remove reference from NPF interface structure. If it is the last * reference, then NULL ifp->if_npf pointer and place niif onto the garbage - * collection list. May be called from either forwarding thread or master + * collection list. May be called from either forwarding thread or main * loop. */ static void @@ -194,6 +208,23 @@ npf_if_niif_delete(struct ifnet *ifp) ifp->if_name, niif->niif_refcnt); } +/* + * Get return code counter pointer + */ +struct npf_rc_counts *npf_if_get_rcc(struct ifnet *ifp) +{ + struct npf_if_internal *niif; + + if (unlikely(!ifp)) + return NULL; + + niif = (struct npf_if_internal *)ifp->if_npf; + if (!niif) + return NULL; + + return rcu_dereference(niif->niif_rcc); +} + /* * Take reference on interface niif */ @@ -204,11 +235,11 @@ void npf_if_reference_one(struct ifnet *ifp, void *arg __unused) /* * Take reference on niif for all interfaces. Typically this happens when - * nat64 is configured on one interface. + * nat64 or zones are configured on one interface. */ void npf_if_reference_all(void) { - ifnet_walk(npf_if_reference_one, NULL); + dp_ifnet_walk(npf_if_reference_one, NULL); } /* @@ -224,7 +255,7 @@ void npf_if_release_one(struct ifnet *ifp, void *arg __unused) */ void npf_if_release_all(void) { - ifnet_walk(npf_if_release_one, NULL); + dp_ifnet_walk(npf_if_release_one, NULL); } /* @@ -264,10 +295,6 @@ npf_if_sessions_handling_enable(struct ifnet *ifp, bool nif_exists) if (!niif) goto end; - /* Enable defrag and fw features on interface */ - if_feat_refcnt_incr(ifp, IF_FEAT_DEFRAG); - if_feat_refcnt_incr(ifp, IF_FEAT_FW); - end: rte_spinlock_unlock(&niif_lock); } @@ -297,10 +324,6 @@ npf_if_sessions_handling_disable(struct ifnet *ifp, bool lock) if (niif == NULL) goto end; - /* Disable defrag and fw features on interface */ - if_feat_refcnt_decr(ifp, IF_FEAT_DEFRAG); - if_feat_refcnt_decr(ifp, IF_FEAT_FW); - /* Remove reference on npf interface structure. */ npf_if_niif_delete(ifp); @@ -354,7 +377,7 @@ npf_if_alloc_free(struct npf_config **npf_confp, bool alloc) * DP_EVT_IF_RENAME event. */ void -npf_if_enable(struct ifnet *ifp, uint32_t ifindex __unused) +npf_if_enable(struct ifnet *ifp) { int rc; @@ -376,8 +399,8 @@ npf_if_enable(struct ifnet *ifp, uint32_t ifindex __unused) */ npf_vrf_if_index_set(ifp); - /* Is this interface used for cgnat? */ - cgn_nif_index_set(ifp); + /* Is this interface in a zone? */ + npf_zone_if_index_set(ifp); rte_spinlock_unlock(&niif_lock); } @@ -411,12 +434,13 @@ npf_if_disable_with_name(struct ifnet *ifp, const char *if_name) npf_if_sessions_handling_disable(ifp, false); /* - * Decrement per-interface ruleset counts to zero + * Decrement per-interface ruleset counts to zero except for zones, + * which are handled by npf_zone_if_index_unset below. */ - npf_if_rs_count_decr_to_zero(ifp, ~0); + npf_if_rs_count_decr_to_zero(ifp, (~0 & ~NPF_ZONE)); - /* Is this interface used for cgnat? */ - cgn_nif_index_unset(ifp); + /* Is this interface in a zone? */ + npf_zone_if_index_unset(ifp); /* * Are there any feature counts for the vrf this interface? @@ -449,7 +473,7 @@ void npf_if_rename(struct ifnet *ifp, const char *old_ifname) { npf_if_disable_with_name(ifp, old_ifname); - npf_if_enable(ifp, ifp->if_index); + npf_if_enable(ifp); } /* @@ -472,17 +496,16 @@ npf_if_rs_count_incr(struct ifnet *ifp, enum npf_ruleset_type rs_type) return; } + enum npf_rs_flag rfl = npf_get_ruleset_type_flags(rs_type); + /* * Increment interface feature ref counts for this ruleset type when * the ruleset count changes from 0 to 1 if it is a 'per interface' * type. */ if (niif->niif_rs_count[rs_type]++ == 0) { - enum npf_rs_flag rfl; /* Are features enabled per-interface? */ - rfl = npf_get_ruleset_type_flags(rs_type); - if ((rfl & NPF_RS_FLAG_FEAT_INTF) != 0) { enum if_feat_flag ffl; @@ -492,7 +515,8 @@ npf_if_rs_count_incr(struct ifnet *ifp, enum npf_ruleset_type rs_type) } } - npf_gbl_rs_count_incr(rs_type); + if ((rfl & NPF_RS_FLAG_FEAT_INTF_ALL) != 0) + npf_gbl_rs_count_incr(rs_type); } /* @@ -515,17 +539,16 @@ npf_if_rs_count_decr(struct ifnet *ifp, enum npf_ruleset_type rs_type) return; } + enum npf_rs_flag rfl = npf_get_ruleset_type_flags(rs_type); + /* * Decrement interface feature ref counts for this ruleset type when * the ruleset count changes from 1 to 0 if it is a 'per interface' * type. */ if (--niif->niif_rs_count[rs_type] == 0) { - enum npf_rs_flag rfl; /* Are features enabled per-interface? */ - rfl = npf_get_ruleset_type_flags(rs_type); - if ((rfl & NPF_RS_FLAG_FEAT_INTF) != 0) { enum if_feat_flag ffl; @@ -535,7 +558,8 @@ npf_if_rs_count_decr(struct ifnet *ifp, enum npf_ruleset_type rs_type) } } - npf_gbl_rs_count_decr(rs_type); + if ((rfl & NPF_RS_FLAG_FEAT_INTF_ALL) != 0) + npf_gbl_rs_count_decr(rs_type); } /* @@ -569,7 +593,7 @@ npf_if_apev_if_add_rlset(enum npf_attpt_ev_type ev __unused, enum npf_ruleset_type *ruleset_type = (enum npf_ruleset_type *) data; const struct npf_attpt_key *apk = npf_attpt_item_key(ap); - struct ifnet *ifp = ifnet_byifname(apk->apk_point); + struct ifnet *ifp = dp_ifnet_byifname(apk->apk_point); if (!ifp || !ifp->if_index) return; @@ -590,7 +614,7 @@ npf_if_apev_if_del_rlset(enum npf_attpt_ev_type ev __unused, enum npf_ruleset_type *ruleset_type = (enum npf_ruleset_type *) data; const struct npf_attpt_key *apk = npf_attpt_item_key(ap); - struct ifnet *ifp = ifnet_byifname(apk->apk_point); + struct ifnet *ifp = dp_ifnet_byifname(apk->apk_point); if (!ifp || !ifp->if_index) return; @@ -647,82 +671,151 @@ void npf_if_addr_change(enum cont_src_en cont_src, struct ifnet *ifp, return; /* Update if we have an SNAT ruleset */ - npf_ruleset_t *rs = nif->nif_conf.nc_rulesets[NPF_RS_SNAT]; + const npf_ruleset_t *rs = npf_get_ruleset(&nif->nif_conf, NPF_RS_SNAT); + if (rs) npf_ruleset_update_masquerade(ifp, rs); } /* - * Get cgnat interface structure. + * Assign a zone interface to an npf interface. */ -struct cgn_intf *npf_if_get_cgn(struct ifnet *ifp) +int +npf_if_zone_assign(struct ifnet *ifp, struct npf_zone_intf *zif, bool lock) { struct npf_if_internal *niif; + struct npf_zone_intf *cur; + int rc = 0; - if (!ifp) - return NULL; + if (lock) + rte_spinlock_lock(&niif_lock); niif = (struct npf_if_internal *)ifp->if_npf; - if (niif) - return niif->niif_cgn; + cur = niif ? rcu_dereference(niif->niif_zif) : NULL; - return NULL; + /* Already assigned or unassigned? */ + assert(!cur != !zif); + if (!cur == !zif) { + rc = -EINVAL; + goto end; + } + + if (zif) { + /* Take reference on, or create, niif */ + niif = npf_if_niif_create(ifp, 0); + + assert(niif != NULL); + if (!niif) { + rc = -ENOMEM; + goto end; + } + + rcu_assign_pointer(niif->niif_zif, zif); + npf_if_rs_count_incr(ifp, NPF_RS_ZONE); + } else { + npf_if_rs_count_decr(ifp, NPF_RS_ZONE); + rcu_assign_pointer(niif->niif_zif, NULL); + + /* Remove reference from niif */ + npf_if_niif_delete(ifp); + } + +end: + if (lock) + rte_spinlock_unlock(&niif_lock); + + return rc; } -int npf_if_set_cgn(struct ifnet *ifp, struct cgn_intf *cgn) +/* + * npf_if_zone_is_enabled + */ +bool npf_if_zone_is_enabled(const struct npf_if *nif) { - struct npf_if_internal *niif; - int rc = 0; + struct npf_if_internal *niif = (struct npf_if_internal *)nif; - assert(!rte_spinlock_is_locked(&niif_lock)); + return niif && niif->niif_zif; +} - rte_spinlock_lock(&niif_lock); +/* Zone intf from ifp */ +struct npf_zone_intf *npf_if_zone_intf(struct ifnet *ifp) +{ + struct npf_if *nif = rcu_dereference(ifp->if_npf); + struct npf_if_internal *niif = (struct npf_if_internal *)nif; - if (!ifp || !cgn) { - rc = -1; - goto end; - } + if (niif) + return rcu_dereference(niif->niif_zif); + return NULL; +} - /* Take reference on niif. Create, if necessary */ - niif = npf_if_niif_create(ifp, 0); - if (!niif) { - rc = -1; - goto end; - } +/* Zone from nif */ +struct npf_zone *npf_nif_zone(const struct npf_if *nif) +{ + struct npf_if_internal *niif = (struct npf_if_internal *)nif; - rc = 0; - rcu_assign_pointer(niif->niif_cgn, cgn); + if (niif) + return npf_zone_zif2zone(rcu_dereference(niif->niif_zif)); + return NULL; +} -end: - rte_spinlock_unlock(&niif_lock); - return rc; +/* Zone from ifp */ +struct npf_zone *npf_if_zone(const struct ifnet *ifp) +{ + struct npf_if *nif = rcu_dereference(ifp->if_npf); + struct npf_if_internal *niif = (struct npf_if_internal *)nif; + + if (niif) + return npf_zone_zif2zone(rcu_dereference(niif->niif_zif)); + return NULL; } -int npf_if_clear_cgn(struct ifnet *ifp, bool lock) +/* + * This is called when a ruleset it attached to "global:" + */ +static void +npf_apev_gbl_add_rlset(enum npf_attpt_ev_type ev __unused, + struct npf_attpt_item *ap __unused, void *data) { - struct npf_if_internal *niif; - struct cgn_intf *cgn; - int rc = -1; + enum npf_ruleset_type rs_type = *(enum npf_ruleset_type *)data; + enum npf_rs_flag rfl = npf_get_ruleset_type_flags(rs_type); - if (lock) { - assert(!rte_spinlock_is_locked(&niif_lock)); + if ((rfl & NPF_RS_FLAG_FEAT_GBL) != 0) { rte_spinlock_lock(&niif_lock); - } else { - assert(rte_spinlock_is_locked(&niif_lock)); - } - niif = (struct npf_if_internal *)ifp->if_npf; - if (niif) { - rc = 0; - cgn = rcu_xchg_pointer(&niif->niif_cgn, NULL); + /* Enable the relevant features on all interfaces */ + npf_gbl_rs_count_incr(rs_type); - /* Release reference on niif. */ - if (cgn) - npf_if_niif_delete(ifp); + rte_spinlock_unlock(&niif_lock); } +} + +static void +npf_apev_gbl_del_rlset(enum npf_attpt_ev_type ev __unused, + struct npf_attpt_item *ap __unused, void *data) +{ + enum npf_ruleset_type *rs_type = (enum npf_ruleset_type *) data; + enum npf_rs_flag rfl = npf_get_ruleset_type_flags(*rs_type); + + if ((rfl & NPF_RS_FLAG_FEAT_GBL) != 0) { + rte_spinlock_lock(&niif_lock); + + npf_gbl_rs_count_decr(*rs_type); - if (lock) rte_spinlock_unlock(&niif_lock); + } +} - return rc; +/* + * The global attach point is used when a ruleset if configured in a loopback + * interface. + */ +void npf_gbl_attach_point_init(void) +{ + npf_attpt_ev_listen(NPF_ATTACH_TYPE_GLOBAL, + (1 << NPF_ATTPT_EV_RLSET_ADD_COMMIT), + npf_apev_gbl_add_rlset); + + npf_attpt_ev_listen(NPF_ATTACH_TYPE_GLOBAL, + (1 << NPF_ATTPT_EV_RLSET_DEL_COMMIT), + npf_apev_gbl_del_rlset); } diff --git a/src/npf/npf_if.h b/src/npf/npf_if.h index 8394b976..2cac895c 100644 --- a/src/npf/npf_if.h +++ b/src/npf/npf_if.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -21,7 +21,8 @@ struct ifnet; struct npf_config; -struct cgn_intf; +struct npf_zone_intf; +struct npf_zone; struct npf_if { struct npf_config nif_conf; @@ -29,6 +30,7 @@ struct npf_if { struct ifnet *nif_ifp; }; +bool npf_if_zone_is_enabled(const struct npf_if *nif); void npf_if_sessions_handling_enable(struct ifnet *ifp, bool nif_exists); void npf_if_sessions_handling_disable(struct ifnet *ifp, bool lock); @@ -45,6 +47,9 @@ npf_if_active(struct npf_if *nif, uint32_t bitmask) if (unlikely(!nif)) return false; + if ((bitmask & NPF_ZONE) && npf_if_zone_is_enabled(nif)) + return true; + struct npf_config *nif_conf = npf_if_conf(nif); if (npf_active(nif_conf, bitmask)) return true; @@ -55,6 +60,25 @@ npf_if_active(struct npf_if *nif, uint32_t bitmask) return false; } +/* + * Is SNAT configured in this interface? + */ +static inline bool npf_snat_active(struct ifnet *ifp) +{ + struct npf_if *nif; + struct npf_config *nif_conf; + + nif = rcu_dereference(ifp->if_npf); + if (!nif) + return false; + + nif_conf = npf_if_conf(nif); + if (npf_active(nif_conf, NPF_SNAT)) + return true; + + return false; +} + static inline void npf_if_session_inc(struct ifnet *ifp) { struct npf_if *nif = rcu_dereference(ifp->if_npf); @@ -99,7 +123,7 @@ void npf_if_reference_one(struct ifnet *ifp, void *arg); void npf_if_release_all(void); void npf_if_release_one(struct ifnet *ifp, void *arg); -void npf_if_enable(struct ifnet *ifp, uint32_t ifindex); +void npf_if_enable(struct ifnet *ifp); void npf_if_disable(struct ifnet *ifp, uint32_t ifindex); void npf_if_rename(struct ifnet *ifp, const char *old_ifname); @@ -113,4 +137,13 @@ void npf_if_cleanup(void); void npf_if_addr_change(enum cont_src_en cont_src, struct ifnet *ifp, uint32_t if_index, int af, const void *addr); +int npf_if_zone_assign(struct ifnet *ifp, struct npf_zone_intf *zif, + bool lock); +struct npf_zone_intf *npf_if_zone_intf(struct ifnet *ifp); +struct npf_zone *npf_nif_zone(const struct npf_if *nif); +struct npf_zone *npf_if_zone(const struct ifnet *ifp); + +struct npf_rc_counts *npf_if_get_rcc(struct ifnet *ifp); +void npf_gbl_attach_point_init(void); + #endif /* NPF_IF_H */ diff --git a/src/npf/npf_if_feat.c b/src/npf/npf_if_feat.c index f52f1c31..82f344b0 100644 --- a/src/npf/npf_if_feat.c +++ b/src/npf/npf_if_feat.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -9,9 +9,8 @@ #include #include "util.h" -#include "vrf.h" +#include "vrf_internal.h" #include "if_var.h" -#include "dpi_public.h" #include "pl_node.h" #include "pipeline/nodes/pl_nodes_common.h" #include "vplane_log.h" @@ -39,9 +38,13 @@ static void npf_if_feat_enable_acl_out(struct ifnet *ifp, bool enable) if (enable) { pl_node_add_feature_by_inst(&ipv4_acl_out_feat, ifp); pl_node_add_feature_by_inst(&ipv6_acl_out_feat, ifp); + pl_node_add_feature_by_inst(&ipv4_acl_out_spath_feat, ifp); + pl_node_add_feature_by_inst(&ipv6_acl_out_spath_feat, ifp); } else { pl_node_remove_feature_by_inst(&ipv4_acl_out_feat, ifp); pl_node_remove_feature_by_inst(&ipv6_acl_out_feat, ifp); + pl_node_remove_feature_by_inst(&ipv4_acl_out_spath_feat, ifp); + pl_node_remove_feature_by_inst(&ipv6_acl_out_spath_feat, ifp); } } @@ -63,6 +66,22 @@ static void npf_if_feat_enable_defrag(struct ifnet *ifp, bool enable) } } +/* + * Enable or disable defrag feature for spath (egress) + */ +static void npf_if_feat_enable_defrag_spath(struct ifnet *ifp, bool enable) +{ + if (enable) { + pl_node_add_feature_by_inst(&ipv4_defrag_out_spath_feat, ifp); + pl_node_add_feature_by_inst(&ipv6_defrag_out_spath_feat, ifp); + } else { + pl_node_remove_feature_by_inst(&ipv4_defrag_out_spath_feat, + ifp); + pl_node_remove_feature_by_inst(&ipv6_defrag_out_spath_feat, + ifp); + } +} + /* * Enable or disable fw feature */ @@ -73,11 +92,29 @@ static void npf_if_feat_enable_fw(struct ifnet *ifp, bool enable) pl_node_add_feature_by_inst(&ipv6_fw_in_feat, ifp); pl_node_add_feature_by_inst(&ipv4_fw_out_feat, ifp); pl_node_add_feature_by_inst(&ipv6_fw_out_feat, ifp); + pl_node_add_feature_by_inst(&ipv4_fw_out_spath_feat, ifp); + pl_node_add_feature_by_inst(&ipv6_fw_out_spath_feat, ifp); } else { pl_node_remove_feature_by_inst(&ipv4_fw_in_feat, ifp); pl_node_remove_feature_by_inst(&ipv6_fw_in_feat, ifp); pl_node_remove_feature_by_inst(&ipv4_fw_out_feat, ifp); pl_node_remove_feature_by_inst(&ipv6_fw_out_feat, ifp); + pl_node_remove_feature_by_inst(&ipv4_fw_out_spath_feat, ifp); + pl_node_remove_feature_by_inst(&ipv6_fw_out_spath_feat, ifp); + } +} + +/* + * Enable or disable fw originate feature + */ +static void npf_if_feat_enable_fw_orig(struct ifnet *ifp, bool enable) +{ + if (enable) { + pl_node_add_feature_by_inst(&ipv4_fw_orig_feat, ifp); + pl_node_add_feature_by_inst(&ipv6_fw_orig_feat, ifp); + } else { + pl_node_remove_feature_by_inst(&ipv4_fw_orig_feat, ifp); + pl_node_remove_feature_by_inst(&ipv6_fw_orig_feat, ifp); } } @@ -123,14 +160,36 @@ static void npf_if_feat_enable_cgnat(struct ifnet *ifp, bool enable) } } +/* + * Enable or disable nat64 feature + */ +static void npf_if_feat_enable_nat64(struct ifnet *ifp, bool enable) +{ + if (enable) { + pl_node_add_feature_by_inst(&ipv4_nat46_in_feat, ifp); + pl_node_add_feature_by_inst(&ipv6_nat46_out_feat, ifp); + pl_node_add_feature_by_inst(&ipv6_nat64_in_feat, ifp); + pl_node_add_feature_by_inst(&ipv4_nat64_out_feat, ifp); + } else { + pl_node_remove_feature_by_inst(&ipv4_nat46_in_feat, ifp); + pl_node_remove_feature_by_inst(&ipv6_nat46_out_feat, ifp); + pl_node_remove_feature_by_inst(&ipv6_nat64_in_feat, ifp); + pl_node_remove_feature_by_inst(&ipv4_nat64_out_feat, ifp); + } +} + void npf_if_feat_init(void) { if_feat_init(npf_if_feat_enable_acl_in, "acl-in", IF_FEAT_ACL_IN); if_feat_init(npf_if_feat_enable_acl_out, "acl-out", IF_FEAT_ACL_OUT); if_feat_init(npf_if_feat_enable_defrag, "defrag", IF_FEAT_DEFRAG); + if_feat_init(npf_if_feat_enable_defrag_spath, "defrag-spath", + IF_FEAT_DEFRAG_SPATH); if_feat_init(npf_if_feat_enable_fw, "firewall", IF_FEAT_FW); + if_feat_init(npf_if_feat_enable_fw_orig, "fw-orig", IF_FEAT_FW_ORIG); if_feat_init(npf_if_feat_enable_pbr, "pbr", IF_FEAT_PBR); if_feat_init(npf_if_feat_enable_nptv6, "nptv6", IF_FEAT_NPTV6); if_feat_init(npf_if_feat_enable_cgnat, "cgnat", IF_FEAT_CGNAT); + if_feat_init(npf_if_feat_enable_nat64, "nat64", IF_FEAT_NAT64); if_feat_init(NULL, "dpi", IF_FEAT_DPI); } diff --git a/src/npf/npf_instr.c b/src/npf/npf_instr.c index 74352750..e2a0c33d 100644 --- a/src/npf/npf_instr.c +++ b/src/npf/npf_instr.c @@ -58,7 +58,7 @@ #include "npf/npf_ncode.h" #include "npf/npf_ruleset.h" -#define NPF_PORTRANGE_MATCH(r, p) (p >= (r >> 16) && p <= (r & 0xffff)) +#define NPF_PORTRANGE_MATCH(r, p) ((p) >= ((r) >> 16) && (p) <= ((r) & 0xffff)) /* @@ -67,26 +67,48 @@ int npf_match_mac(const struct rte_mbuf *nbuf, uint32_t opts, const char *filt) { - const struct ether_hdr *eh = rte_pktmbuf_mtod(nbuf, struct ether_hdr *); - const struct ether_addr *addr; + const struct rte_ether_hdr *eh = + rte_pktmbuf_mtod(nbuf, struct rte_ether_hdr *); + const struct rte_ether_addr *addr; addr = (opts & NC_MATCH_SRC) ? &eh->s_addr : &eh->d_addr; - return ether_addr_equal(addr, (struct ether_addr *)filt) ? 0 : -1; + return rte_ether_addr_equal(addr, + (struct rte_ether_addr *)filt) ? 0 : -1; } /* - * npf_match_proto: match layer 4 protocol. + * npf_match_proto_final: match the L4 protocol. */ int -npf_match_proto(const npf_cache_t *npc, uint32_t ap) +npf_match_proto_final(const npf_cache_t *npc, uint32_t ap) { - const int proto = ap & 0xff; + const int proto_final = ap & 0xff; if (!npf_iscached(npc, NPC_IP46)) return -1; - return (npf_cache_ipproto(npc) != proto) ? -1 : 0; + return (npf_cache_ipproto(npc) != proto_final) ? -1 : 0; +} + +/* + * npf_match_proto_base: match the protocol in IPv4 or IPv6 header + */ +int +npf_match_proto_base(const npf_cache_t *npc, uint32_t ap) +{ + const uint8_t proto_base = ap & 0xff; + + if (likely(npf_iscached(npc, NPC_IP4))) { + const struct ip *ip = &npc->npc_ip.v4; + return (proto_base != ip->ip_p) ? -1 : 0; + } + if (likely(npf_iscached(npc, NPC_IP6))) { + const struct ip6_hdr *ip6 = &npc->npc_ip.v6; + return (proto_base != ip6->ip6_nxt) ? -1 : 0; + } + + return -1; } /* @@ -105,21 +127,16 @@ int npf_match_table(const npf_cache_t *npc, uint32_t opts, const u_int tid) { npf_addr_t *addr; - struct npf_addrgrp *ag; if (opts & NC_MATCH_SRC) addr = npf_cache_srcip(npc); else addr = npf_cache_dstip(npc); - ag = npf_addrgrp_tid_lookup(tid); - if (!ag) - return -EINVAL; - if (npf_iscached(npc, NPC_IP4)) - return npf_addrgrp_lookup(AG_IPv4, ag, addr); - else if (npf_iscached(npc, NPC_IP6)) - return npf_addrgrp_lookup(AG_IPv6, ag, addr); + return npf_addrgrp_lookup(AG_IPv4, tid, addr); + if (npf_iscached(npc, NPC_IP6)) + return npf_addrgrp_lookup(AG_IPv6, tid, addr); return -1; } @@ -129,7 +146,7 @@ npf_match_ip_fam(const npf_cache_t *npc, uint32_t fam) { if (fam == AF_INET && npf_iscached(npc, NPC_IP4)) return 0; - else if (fam == AF_INET6 && npf_iscached(npc, NPC_IP6)) + if (fam == AF_INET6 && npf_iscached(npc, NPC_IP6)) return 0; return -1; } @@ -270,18 +287,24 @@ npf_match_icmp4(const npf_cache_t *npc, uint32_t tc) if (unlikely(npf_cache_ipproto(npc) != IPPROTO_ICMP)) return -1; + /* Match type class, if required. */ + if (tc & NC_ICMP_HAS_CLASS) { + const bool error = NC_ICMP_GET_TYPE_FROM_OP(tc); + if (npf_iscached(npc, NPC_ICMP_ERR) != error) + return -1; + return 0; + } + /* Match code/type, if required. */ - if ((1u << 31) & tc) { - const uint8_t type = (tc >> 8) & 0xff; - if (type != ic->icmp_type) { + if (tc & NC_ICMP_HAS_TYPE) { + const uint8_t type = NC_ICMP_GET_TYPE_FROM_OP(tc); + if (type != ic->icmp_type) return -1; - } } - if ((1u << 30) & tc) { - const uint8_t code = tc & 0xff; - if (code != ic->icmp_code) { + if (tc & NC_ICMP_HAS_CODE) { + const uint8_t code = NC_ICMP_GET_CODE_FROM_OP(tc); + if (code != ic->icmp_code) return -1; - } } return 0; } @@ -310,18 +333,24 @@ npf_match_icmp6(const npf_cache_t *npc, uint32_t tc) if (unlikely(npf_cache_ipproto(npc) != IPPROTO_ICMPV6)) return -1; + /* Match type class, if required. */ + if (tc & NC_ICMP_HAS_CLASS) { + const bool error = NC_ICMP_GET_TYPE_FROM_OP(tc); + if (npf_iscached(npc, NPC_ICMP_ERR) != error) + return -1; + return 0; + } + /* Match code/type, if required. */ - if ((1u << 31) & tc) { - const uint8_t type = (tc >> 8) & 0xff; - if (type != ic6->icmp6_type) { + if (tc & NC_ICMP_HAS_TYPE) { + const uint8_t type = NC_ICMP_GET_TYPE_FROM_OP(tc); + if (type != ic6->icmp6_type) return -1; - } } - if ((1u << 30) & tc) { - const uint8_t code = tc & 0xff; - if (code != ic6->icmp6_code) { + if (tc & NC_ICMP_HAS_CODE) { + const uint8_t code = NC_ICMP_GET_CODE_FROM_OP(tc); + if (code != ic6->icmp6_code) return -1; - } } return 0; } @@ -335,6 +364,9 @@ npf_match_tcpfl(const npf_cache_t *npc, uint32_t fl) const uint8_t tcpfl = (fl >> 8) & 0xff, mask = fl & 0xff; const struct tcphdr *th = &npc->npc_l4.tcp; + if (unlikely(!npf_iscached(npc, NPC_IP46))) + return -1; + /* already attempted at beginning of hook */ if (unlikely(npf_cache_ipproto(npc) != IPPROTO_TCP)) return -1; @@ -367,7 +399,7 @@ npf_match_dscp(const npf_cache_t *npc, const uint64_t set) int npf_match_etype(const struct rte_mbuf *nbuf, uint32_t etype) { - uint16_t ether_type = ethtype(nbuf, ETHER_TYPE_VLAN); + uint16_t ether_type = ethtype(nbuf, RTE_ETHER_TYPE_VLAN); if (ether_type != etype) return -1; diff --git a/src/npf/npf_instr.h b/src/npf/npf_instr.h index cf6c99aa..73c8d0fd 100644 --- a/src/npf/npf_instr.h +++ b/src/npf/npf_instr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. */ @@ -44,7 +44,7 @@ #include "npf/npf.h" #include "npf/npf_cache.h" #include "npf/npf_session.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" struct rte_mbuf; @@ -54,7 +54,8 @@ typedef struct npf_cache npf_cache_t; typedef struct npf_rule npf_rule_t; int npf_match_mac(const struct rte_mbuf *nbuf, uint32_t opts, const char *filt); -int npf_match_proto(const npf_cache_t *npc, uint32_t ap); +int npf_match_proto_final(const npf_cache_t *npc, uint32_t ap); +int npf_match_proto_base(const npf_cache_t *npc, uint32_t ap); int npf_match_pcp(const struct rte_mbuf *nbuf, uint32_t pcp); int npf_match_table(const npf_cache_t *npc, uint32_t opts, const u_int tid); int npf_match_ip_fam(const npf_cache_t *npc, uint32_t fam); @@ -69,7 +70,7 @@ int npf_match_icmp4(const npf_cache_t *npc, uint32_t tc); int npf_match_ip6_rt(const npf_cache_t *npc, uint32_t type); int npf_match_icmp6(const npf_cache_t *npc, uint32_t tc); int npf_match_tcpfl(const npf_cache_t *npc, uint32_t fl); -int npf_match_dscp(const npf_cache_t *npc, const uint64_t n); +int npf_match_dscp(const npf_cache_t *npc, const uint64_t set); int npf_match_etype(const struct rte_mbuf *nbuf, uint32_t etype); struct ifnet; diff --git a/src/npf/npf_match.c b/src/npf/npf_match.c new file mode 100644 index 00000000..a69866ec --- /dev/null +++ b/src/npf/npf_match.c @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +/* + * Abstraction for a packet matching utility. Used to provide the ability + * to use different packet matching algorithms depending on the ruleset + * type. + * + * If no callback table is registered, the abstraction uses grouper2 by default + */ +#include "npf_match.h" +#include "npf_grouper.h" + +static npf_match_cb_tbl * npf_match_cbs[NPF_RS_TYPE_COUNT]; + +int npf_match_register_cb_tbl(enum npf_ruleset_type rs_type, + npf_match_cb_tbl *tbl) +{ + if (!tbl->npf_match_init_cb || !tbl->npf_match_add_rule_cb || + !tbl->npf_match_build_cb || !tbl->npf_match_classify_cb || + !tbl->npf_match_destroy_cb) + return -EINVAL; + + npf_match_cbs[rs_type] = tbl; + + return 0; +} + +int npf_match_init(enum npf_ruleset_type rs_type, int af, const char *name, + uint32_t max_rules, npf_match_ctx_t **ctx) +{ + npf_match_cb_tbl *tbl; + + tbl = npf_match_cbs[rs_type]; + if (tbl) + return tbl->npf_match_init_cb(af, name, max_rules, ctx); + + return npf_grouper_init(af, (g2_config_t **)ctx); +} + +int npf_match_add_rule(enum npf_ruleset_type rs_type, + int af, npf_match_ctx_t *ctx, + uint32_t rule_no, uint8_t *match_addr, uint8_t *mask, + void *match_ctx) +{ + npf_match_cb_tbl *tbl; + + tbl = npf_match_cbs[rs_type]; + if (tbl) + return tbl->npf_match_add_rule_cb(af, ctx, rule_no, + match_addr, mask, match_ctx); + + return npf_grouper_add_rule(af, (g2_config_t *)ctx, rule_no, + match_addr, mask, match_ctx); +} + +int npf_match_build(enum npf_ruleset_type rs_type, + int af, npf_match_ctx_t **ctx) +{ + npf_match_cb_tbl *tbl; + + tbl = npf_match_cbs[rs_type]; + if (tbl) + return tbl->npf_match_build_cb(af, ctx); + + return npf_grouper_build((g2_config_t **)ctx); +} + +int npf_match_classify(enum npf_ruleset_type rs_type, + int af, npf_match_ctx_t *ctx, + npf_cache_t *npc, struct npf_match_cb_data *data, + npf_rule_t **rl) +{ + npf_match_cb_tbl *tbl; + + tbl = npf_match_cbs[rs_type]; + if (tbl) + return tbl->npf_match_classify_cb(af, ctx, npc, data, rl); + + return npf_grouper_match(af, (g2_config_t *)ctx, npc, data, rl); +} + +int npf_match_destroy(enum npf_ruleset_type rs_type, + int af, npf_match_ctx_t **ctx) +{ + npf_match_cb_tbl *tbl; + + tbl = npf_match_cbs[rs_type]; + if (tbl) + return tbl->npf_match_destroy_cb(af, ctx); + + return npf_grouper_destroy((g2_config_t **)ctx); +} diff --git a/src/npf/npf_match.h b/src/npf/npf_match.h new file mode 100644 index 00000000..9461feb9 --- /dev/null +++ b/src/npf/npf_match.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +/* + * Abstraction for a packet matching utility. Used to provide the ability + * to use different packet matching algorithms depending on the ruleset + * type. + */ +#ifndef NPF_MATCH_H +#define NPF_MATCH_H + +#include "npf/config/npf_ruleset_type.h" +#include "npf/npf_ruleset.h" +#include "npf/npf_cache.h" + +typedef struct npf_match_ctx npf_match_ctx_t; + +struct npf_match_cb_data { + npf_cache_t *npc; + struct rte_mbuf *mbuf; + const struct ifnet *ifp; + int dir; + npf_session_t *se; + npf_rule_group_t *rg; +}; + +typedef int (*npf_match_init_cb_t)(int af, const char *name, + uint32_t max_rules, + npf_match_ctx_t **ctx); +typedef int (*npf_match_add_rule_cb_t)(int af, npf_match_ctx_t *ctx, + uint32_t rule_no, + uint8_t *match_addr, uint8_t *mask, + void *match_ctx); +typedef int (*npf_match_build_cb_t)(int af, npf_match_ctx_t **ctx); +typedef int (*npf_match_classify_cb_t)(int af, npf_match_ctx_t *ctx, + npf_cache_t *npc, + struct npf_match_cb_data *data, + npf_rule_t **rl); +typedef int (*npf_match_destroy_cb_t)(int af, npf_match_ctx_t **ctx); + + +typedef struct npf_match_cb_tbl { + npf_match_init_cb_t npf_match_init_cb; + npf_match_add_rule_cb_t npf_match_add_rule_cb; + npf_match_build_cb_t npf_match_build_cb; + npf_match_classify_cb_t npf_match_classify_cb; + npf_match_destroy_cb_t npf_match_destroy_cb; +} npf_match_cb_tbl; + +int npf_match_register_cb_tbl(enum npf_ruleset_type rs_type, + npf_match_cb_tbl *tbl); + +int npf_match_init(enum npf_ruleset_type rs_type, + int af, const char *name, + uint32_t max_rules, npf_match_ctx_t **ctx); + +int npf_match_add_rule(enum npf_ruleset_type rs_type, + int af, npf_match_ctx_t *ctx, uint32_t rule_no, + uint8_t *match_addr, uint8_t *mask, + void *match_ctx); + +int npf_match_build(enum npf_ruleset_type rs_type, + int af, npf_match_ctx_t **ctx); + +int npf_match_classify(enum npf_ruleset_type rs_type, + int af, npf_match_ctx_t *ctx, + npf_cache_t *npc, struct npf_match_cb_data *data, + npf_rule_t **rl); + +int npf_match_destroy(enum npf_ruleset_type rs_type, + int af, npf_match_ctx_t **ctx); + +#endif diff --git a/src/npf/npf_mbuf.c b/src/npf/npf_mbuf.c index e2f0a33c..b483781b 100644 --- a/src/npf/npf_mbuf.c +++ b/src/npf/npf_mbuf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. */ diff --git a/src/npf/npf_mbuf.h b/src/npf/npf_mbuf.h index aa8b2d74..beb82fb0 100644 --- a/src/npf/npf_mbuf.h +++ b/src/npf/npf_mbuf.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. */ diff --git a/src/npf/npf_nat.c b/src/npf/npf_nat.c index ec072699..3aea2f62 100644 --- a/src/npf/npf_nat.c +++ b/src/npf/npf_nat.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. */ @@ -100,11 +100,12 @@ #include #include "compiler.h" -#include "config.h" +#include "config_internal.h" #include "in_cksum.h" #include "if_var.h" +#include "ip_funcs.h" #include "npf/npf.h" -#include "npf/alg/npf_alg_public.h" +#include "npf/alg/alg_npf.h" #include "npf/config/npf_config.h" #include "npf/config/npf_ruleset_type.h" #include "npf/npf_addrgrp.h" @@ -113,10 +114,13 @@ #include "npf/npf_icmp.h" #include "npf/npf_mbuf.h" #include "npf/npf_nat.h" +#include "npf/npf_rc.h" #include "npf/npf_ruleset.h" #include "npf/rproc/npf_ext_log.h" +#include "npf/npf_pack.h" #include "npf_tblset.h" -#include "pktmbuf.h" +#include "npf_addr.h" +#include "pktmbuf_internal.h" #include "urcu.h" #include "vplane_log.h" @@ -124,6 +128,15 @@ struct npf_session; /* * NAT policy structure. + * + * The two main places where a reference is held on a NAT policy are: + * + * 1. The ruleset (npf_rule_t, r_natp) + * 2. The NAT data for a session (struct npf_nat, nt_natpolicy) + * + * A pointer to a NAT policy is also stored for a short time in each media + * structure in a SIP ALG request structure stored in the request hash table. + * No reference is held on the NAT policy in this instance. */ struct npf_natpolicy { struct rcu_head n_rcu_head; @@ -190,12 +203,12 @@ static inline bool before(uint32_t n1, uint32_t n2) return (((int32_t) (n1 - n2)) < 0 ? true : false); } #define after(n2, n1) before(n1, n2) -#define direction_index(a) (a == PFIL_IN ? 0 : 1) +#define direction_index(a) ((a) == PFIL_IN ? 0 : 1) #define TCP_SACK_PERBLOCK 8 /* - * npf_nat_policy_get() - Get a ref to a nat policy + * npf_nat_policy_get() - Take a reference on a NAT policy */ npf_natpolicy_t *npf_nat_policy_get(npf_natpolicy_t *np) { @@ -206,7 +219,7 @@ npf_natpolicy_t *npf_nat_policy_get(npf_natpolicy_t *np) /* * Free the nat policy. * Called from both a call_rcu thread context as well - * as from the master thread. + * as from the main thread. */ static void npf_nat_policy_free(struct rcu_head *head) { @@ -218,16 +231,19 @@ static void npf_nat_policy_free(struct rcu_head *head) } /* - * npf_nat_policy_put() - Release ref to nat policy + * npf_nat_policy_put() - Release reference on a NAT policy */ void npf_nat_policy_put(npf_natpolicy_t *np) { + assert(np); + assert(rte_atomic32_read(&np->n_refcnt) > 0); + if (rte_atomic32_dec_and_test(&np->n_refcnt)) npf_nat_policy_free(&np->n_rcu_head); } /* - * npf_nat_policy_put_rcu() - Release ref to nat policy + * npf_nat_policy_put_rcu() - Release reference on a NAT policy * * called during a nat masquerade address change. * @@ -236,6 +252,9 @@ void npf_nat_policy_put(npf_natpolicy_t *np) */ static void npf_nat_policy_put_rcu(npf_natpolicy_t *np) { + assert(np); + assert(rte_atomic32_read(&np->n_refcnt) > 0); + if (rte_atomic32_dec_and_test(&np->n_refcnt)) call_rcu(&np->n_rcu_head, npf_nat_policy_free); } @@ -484,7 +503,7 @@ npf_create_natpolicy(npf_rule_t *rl, uint8_t type, uint32_t flags, return -ENOMEM; } - rte_atomic32_set(&np->n_refcnt, 1); + rte_atomic32_init(&np->n_refcnt); np->n_type = type; np->n_flags = flags; @@ -606,17 +625,18 @@ void npf_nat_get_original_tuple(npf_nat_t *nt, npf_cache_t *npc, * npf_nat_create: create a new NAT translation entry. */ static npf_nat_t * -npf_nat_create(npf_rule_t *rl, - npf_cache_t *npc, npf_natpolicy_t *np, vrfid_t vrfid) +npf_nat_create(npf_rule_t *rl, npf_cache_t *npc, npf_natpolicy_t *np, + vrfid_t vrfid, int *rc) { npf_nat_t *nt; int nr_ports = 0; - int rc; /* Create a nat struct */ nt = malloc_aligned(sizeof(npf_nat_t)); - if (nt == NULL) + if (nt == NULL) { + *rc = -NPF_RC_NAT_ENOMEM; return NULL; + } nt->nt_natpolicy = npf_nat_policy_get(np); nt->nt_alg = NULL; @@ -663,9 +683,16 @@ npf_nat_create(npf_rule_t *rl, } else nt->nt_oport = nt->nt_tport = 0; - rc = npf_nat_alloc_map(np, rl, nt->nt_map_flags, vrfid, - (npf_addr_t *) &nt->nt_taddr, &nt->nt_tport, nr_ports); - if (unlikely(rc != 0)) { + if ((np->n_flags & NPF_NAT_PA_SEQ) != 0) + nt->nt_map_flags |= NPF_NAT_PA_SEQ; + + int error; + error = npf_nat_alloc_map(np, rl, nt->nt_map_flags, + npf_cache_ipproto(npc), + vrfid, (npf_addr_t *) &nt->nt_taddr, + &nt->nt_tport, nr_ports); + if (unlikely(error < 0)) { + *rc = error; npf_nat_destroy(nt); return NULL; } @@ -855,6 +882,7 @@ npf_nat_translate_at(npf_cache_t *npc, struct rte_mbuf *nbuf, uint16_t l3_chk_delta = nt->nt_l3_chk; uint16_t l4_chk_delta = nt->nt_l4_chk; bool l4_changed = l4_chk_delta; + int rc = 0; /* * This expression is not ambiguous @@ -873,29 +901,33 @@ npf_nat_translate_at(npf_cache_t *npc, struct rte_mbuf *nbuf, } /* Rewrite IP and possibly the transport checksums */ - if (!npf_v4_rwrcksums(npc, nbuf, n_ptr, l3_chk_delta, l4_chk_delta)) { + rc = npf_v4_rwrcksums(npc, nbuf, n_ptr, l3_chk_delta, l4_chk_delta); + if (rc < 0) { /* * It is okay to fail for packets embedded in short ICMP * error messages, as it just has a partial L4 header. */ if (!(npc->npc_info & NPC_SHORT_ICMP_ERR)) - return -EINVAL; + return rc; } /* Rewrite source or destination address */ - if (!npf_rwrip(npc, nbuf, n_ptr, di, addr)) - return -EINVAL; + rc = npf_rwrip(npc, nbuf, n_ptr, di, addr); + if (rc < 0) + return rc; /* Maybe rewrite some L4 information */ if (l4_changed) { if (likely(npf_iscached(npc, NPC_L4PORTS))) { /* Rewrite source or destination port */ - if (!npf_rwrport(npc, nbuf, n_ptr, di, port)) - return -EINVAL; + rc = npf_rwrport(npc, nbuf, n_ptr, di, port); + if (rc < 0) + return rc; } else if (npf_iscached(npc, NPC_ICMP_ECHO)) { /* Rewrite ICMP query/response ID */ - if (!npf_rwricmpid(npc, nbuf, n_ptr, port)) - return -EINVAL; + rc = npf_rwricmpid(npc, nbuf, n_ptr, port); + if (rc < 0) + return rc; } } @@ -909,11 +941,11 @@ static int npf_nat_translate(npf_cache_t *npc, struct rte_mbuf *nbuf, npf_nat_t *nt, const bool forw, const int di) { - void *n_ptr = pktmbuf_mtol3(nbuf, void *); + void *n_ptr = dp_pktmbuf_mtol3(nbuf, void *); int rc = npf_nat_translate_at(npc, nbuf, nt, forw, di, n_ptr, false); - if (rc) + if (rc < 0) return rc; /* Mark as SNAT / DNAT for the rest of the packet path */ @@ -970,9 +1002,9 @@ npf_nat_translate_l3_at(npf_cache_t *npc, struct rte_mbuf *mbuf, uint16_t l3_delta = ip_fixup32_cksum(0, *old_addr, *new_addr); - if (!npf_v4_rwrcksums(npc, mbuf, n_ptr, ~l3_delta, 0)) + if (npf_v4_rwrcksums(npc, mbuf, n_ptr, ~l3_delta, 0) < 0) return false; - if (!npf_rwrip(npc, mbuf, n_ptr, dnat ? PFIL_IN : PFIL_OUT, addr)) + if (npf_rwrip(npc, mbuf, n_ptr, dnat ? PFIL_IN : PFIL_OUT, addr) < 0) return false; /* Set the natted flag */ @@ -1033,8 +1065,7 @@ npf_nat_clone_and_undo(struct rte_mbuf *mbuf, const struct ifnet *in_ifp, void *n_ptr = npf_iphdr(unnat); - if (!npf_cache_all_at( - &npc, unnat, n_ptr, htons(ETHER_TYPE_IPv4), false) || + if (npf_cache_all(&npc, unnat, htons(RTE_ETHER_TYPE_IPV4)) < 0 || !npf_iscached(&npc, NPC_IP4) || (npc.npc_info & NPC_ICMP_ERR)) { rte_pktmbuf_free(unnat); @@ -1106,8 +1137,7 @@ npf_nat_copy_and_undo(struct rte_mbuf *mbuf, const struct ifnet *in_ifp, void *n_ptr = npf_iphdr(unnat); - if (!npf_cache_all_at( - &npc, unnat, n_ptr, htons(ETHER_TYPE_IPv4), false) || + if (npf_cache_all(&npc, unnat, htons(RTE_ETHER_TYPE_IPV4)) < 0 || !npf_iscached(&npc, NPC_IP4) || (npc.npc_info & NPC_ICMP_ERR)) { rte_pktmbuf_free(unnat); @@ -1160,7 +1190,7 @@ nat_do_subsequent(npf_cache_t *npc, struct rte_mbuf **nbuf, unsigned int ip_len = ntohs(ip->tot_len); if (unlikely(ip_len > if_mtu)) { if (obey_df && (ip->frag_off & htons(IP_DF))) - return -E2BIG; + return -NPF_RC_NAT_E2BIG; } /* Log any matched (or session matched) packet immediately */ @@ -1172,7 +1202,7 @@ nat_do_subsequent(npf_cache_t *npc, struct rte_mbuf **nbuf, if (unlikely(!!nt->nt_alg)) { error = pktmbuf_prepare_for_header_change(nbuf, 0); if (error) - return error; + return -NPF_RC_MBUF_ENOMEM; /* Adjust the TCP seq/ack if required */ struct npf_seq_ack *asa = nt->nt_sa; @@ -1181,12 +1211,12 @@ nat_do_subsequent(npf_cache_t *npc, struct rte_mbuf **nbuf, /* Perform the per ALG tasks */ if (npf_alg_nat(se, npc, *nbuf, nt, di)) - return -EINVAL; + return -NPF_RC_ALG_ERR; } error = npf_prepare_for_l4_header_change(nbuf, npc); if (error) - return error; + return -NPF_RC_MBUF_ENOMEM; /* Perform the translation. */ int rc = npf_nat_translate(npc, *nbuf, nt, forw, di); @@ -1298,18 +1328,16 @@ nat_try_initial(const struct npf_config *npf_config, npf_cache_t *npc, unsigned int ip_len = ntohs(ip->tot_len); if (unlikely(ip_len > if_mtu)) { if (obey_df && (ip->frag_off & htons(IP_DF))) { - error = -E2BIG; + error = -NPF_RC_NAT_E2BIG; goto no_nat_work; } } } /* Create the nat struct */ - nt = npf_nat_create(rl, npc, np, pktmbuf_get_vrf(*nbuf)); - if (!nt) { - error = -ENOMEM; + nt = npf_nat_create(rl, npc, np, pktmbuf_get_vrf(*nbuf), &error); + if (!nt) goto no_nat_work; - } nt->nt_mtu = if_mtu; nt->nt_map_flags |= (obey_df) ? NPF_NAT_OBEY_DF : 0; @@ -1319,7 +1347,7 @@ nat_try_initial(const struct npf_config *npf_config, npf_cache_t *npc, nse = npf_session_establish(npc, *nbuf, ifp, di, &error); if (nse == NULL || error) { npf_nat_expire(nt, pktmbuf_get_vrf(*nbuf)); - error = (error) ? error : -ENOMEM; + error = (error) ? error : -NPF_RC_INTL; goto no_nat_work; } *se_ptr = se = nse; @@ -1381,12 +1409,12 @@ void npf_nat_set_orig(npf_nat_t *nt, const npf_addr_t *addr, in_port_t port) /* * npf_nat_setalg: associate an ALG with the NAT entry. */ -void npf_nat_setalg(npf_nat_t *nt, const struct npf_alg *alg) +void npf_nat_setalg(npf_nat_t *nt, struct npf_alg *alg) { if (alg) /* Take reference on alg */ - alg = npf_alg_get((struct npf_alg *)alg); - else + alg = npf_alg_get(alg); + else if (nt->nt_alg) /* Release reference on alg */ npf_alg_put((struct npf_alg *)nt->nt_alg); @@ -1405,7 +1433,7 @@ const struct npf_alg *npf_nat_getalg(npf_nat_t *nt) static uint64_t npf_natpolicy_table_range(const npf_natpolicy_t *np) { - return npf_addrgrp_naddrs(AG_IPv4, np->n_table_id); + return npf_addrgrp_naddrs(AG_IPv4, np->n_table_id, false); } /* get mapping range from nat policy */ @@ -1422,12 +1450,6 @@ uint64_t npf_natpolicy_get_map_range(const npf_natpolicy_t *np) return (uint64_t)addrs * ports; } -/* Return the type of nat (SNAT/DNAT) from the policy */ -uint8_t npf_natpolicy_get_type(npf_natpolicy_t *np) -{ - return np->n_type; -} - /* Get the type of nat (NATIN/NATOUT) */ uint8_t npf_nat_type(npf_nat_t *nt) { @@ -1455,7 +1477,8 @@ void npf_nat_destroy(npf_nat_t *nt) { npf_nat_setalg(nt, NULL); npf_rule_put(nt->nt_rl); - npf_nat_policy_put(nt->nt_natpolicy); + if (nt->nt_natpolicy) + npf_nat_policy_put(nt->nt_natpolicy); free(nt->nt_sa); free(nt); } @@ -1476,7 +1499,8 @@ npf_nat_expire(npf_nat_t *nt, vrfid_t vrfid) else npf_nat_get_trans(nt, &t_addr, &t_port); - npf_nat_free_map(np, nt->nt_rl, nt->nt_map_flags, vrfid, + npf_nat_free_map(np, nt->nt_rl, nt->nt_map_flags, + npf_session_get_proto(nt->nt_session), vrfid, t_addr, t_port); npf_nat_destroy(nt); @@ -1484,101 +1508,163 @@ npf_nat_expire(npf_nat_t *nt, vrfid_t vrfid) /* APM map op failure msg. */ static void npf_nat_log_map_error(const char *which, npf_rule_t *rl, - struct npf_natpolicy *np, const npf_addr_t *addr, - in_port_t port, int nr_ports, int rc) + struct npf_natpolicy *np, uint8_t ip_prot, + const npf_addr_t *addr, in_port_t port, int nr_ports, int rc) { if (net_ratelimit()) { char addrstr[INET6_ADDRSTRLEN]; char buf[ERR_MSG_LEN]; - uint64_t overall, used; + uint64_t overall, used[NAT_PROTO_COUNT]; + enum nat_proto nprot = nat_proto_from_ipproto(ip_prot); - npf_rule_get_overall_used(rl, &used, &overall); + npf_rule_get_overall_used(rl, used, &overall); inet_ntop(AF_INET, addr, addrstr, sizeof(addrstr)); - RTE_LOG(ERR, FIREWALL, "%cNAT: map %s %d (%s:%d) failed: %s, " - "used %"PRIu64"/%"PRIu64"\n", + RTE_LOG(ERR, FIREWALL, "%cNAT: map %s %d (%s:%d prot %u) " + "`failed: %s, used %"PRIu64"/%"PRIu64"\n", np->n_type == NPF_NATIN ? 'D' : 'S', which, - nr_ports, addrstr, ntohs(port), + nr_ports, addrstr, ntohs(port), ip_prot, strerror_r(-rc, buf, ERR_MSG_LEN), - used, overall); + used[nprot], overall); } } /* Allocate one or more mappings from an APM */ int npf_nat_alloc_map(npf_natpolicy_t *np, npf_rule_t *rl, uint32_t map_flags, - vrfid_t vrfid, npf_addr_t *addr, in_port_t *port, int num) + uint8_t ip_prot, vrfid_t vrfid, npf_addr_t *addr, + in_port_t *port, int num) { int rc; - rc = npf_apm_get_map(np->n_apm, map_flags, num, vrfid, addr, port); + rc = npf_apm_get_map(np->n_apm, map_flags, ip_prot, num, vrfid, addr, + port); if (!rc) - npf_rule_update_map_stats(rl, num, map_flags); + npf_rule_update_map_stats(rl, num, map_flags, ip_prot); else - npf_nat_log_map_error("get", rl, np, addr, *port, num, rc); + npf_nat_log_map_error("get", rl, np, ip_prot, addr, *port, num, + rc); return rc; } /* Return a single mapping to an APM */ int npf_nat_free_map(npf_natpolicy_t *np, npf_rule_t *rl, uint32_t map_flags, - vrfid_t vrfid, const npf_addr_t addr, in_port_t port) + uint8_t ip_prot, vrfid_t vrfid, const npf_addr_t addr, + in_port_t port) { int rc; - rc = npf_apm_put_map(np->n_apm, map_flags, vrfid, addr, port); + rc = npf_apm_put_map(np->n_apm, map_flags, ip_prot, vrfid, addr, port); if (!rc) - npf_rule_update_map_stats(rl, -1, map_flags); + npf_rule_update_map_stats(rl, -1, map_flags, ip_prot); else - npf_nat_log_map_error("put", rl, np, &addr, port, 1, rc); + npf_nat_log_map_error("put", rl, np, ip_prot, &addr, port, 1, + rc); return rc; } -static void npf_natpolicy_dump(const npf_natpolicy_t *np) +bool +npf_nat_info(npf_nat_t *nat, int *type, npf_addr_t *addr, + in_port_t *port, u_int *masq) { - char start[INET_ADDRSTRLEN], stop[INET_ADDRSTRLEN]; - - inet_ntop(AF_INET, &np->n_taddr, start, sizeof(start)); - inet_ntop(AF_INET, &np->n_taddr_stop, stop, sizeof(stop)); - RTE_LOG(ERR, FIREWALL, " NATP(%p): type %s flags 0x%x refcnt: %u\n", - np, (np->n_type == NPF_NATOUT) ? "NATOUT" : "NATIN", - np->n_flags, rte_atomic32_read(&np->n_refcnt)); + *masq = (nat->nt_natpolicy->n_flags & NPF_NAT_MASQ); + *type = nat->nt_natpolicy->n_type; + npf_nat_get_trans(nat, addr, port); - if (!(np->n_flags & NPF_NAT_TABLE)) { - RTE_LOG(ERR, FIREWALL, - " taddr %s-%s tport %d-%d addr_sz: %hhu\n", - start, stop, np->n_tport, np->n_tport_stop, np->n_addr_sz); - } else { - RTE_LOG(ERR, FIREWALL, - " table %s tport %d-%d addr_sz: %hhu\n", - npf_addrgrp_tid2name(np->n_table_id), np->n_tport, - np->n_tport_stop, np->n_addr_sz); - } + return true; } -void npf_nat_dump(const npf_nat_t *nt) +int npf_nat_npf_pack_pack(npf_nat_t *nt, struct npf_pack_nat *pnt, + struct sentry_packet *sp_back) { - char oaddr[INET_ADDRSTRLEN], taddr[INET_ADDRSTRLEN]; - npf_addr_t t_addr, o_addr; - in_port_t t_port, o_port; + npf_rule_t *rule; - npf_nat_get_orig(nt, &o_addr, &o_port); - npf_nat_get_trans(nt, &t_addr, &t_port); + if (!pnt) + return -EINVAL; + + rule = npf_nat_get_rule(nt); + pnt->pnt_rule_hash = (rule ? npf_rule_get_hash(rule) : 0); - inet_ntop(AF_INET, &o_addr, oaddr, sizeof(oaddr)); - inet_ntop(AF_INET, &t_addr, taddr, sizeof(taddr)); + pnt->pnt_l3_chk = nt->nt_l3_chk; + pnt->pnt_l4_chk = nt->nt_l4_chk; + pnt->pnt_map_flags = npf_nat_get_map_flags(nt); + pnt->pnt_taddr = nt->nt_taddr; + pnt->pnt_tport = nt->nt_tport; + pnt->pnt_oaddr = nt->nt_oaddr; + pnt->pnt_oport = nt->nt_oport; - npf_natpolicy_dump(nt->nt_natpolicy); + /* Set translation address in back sentry */ + switch (nt->nt_natpolicy->n_type) { + case NPF_NATIN: + sp_back->sp_addrids[1] = nt->nt_taddr; + break; + case NPF_NATOUT: + sp_back->sp_addrids[2] = nt->nt_taddr; + break; + } + + return 0; } -bool -npf_nat_info(npf_nat_t *nat, int *type, npf_addr_t *addr, - in_port_t *port, u_int *masq) +int npf_nat_npf_pack_restore(struct npf_session *se, + struct npf_pack_nat *pnt, + struct ifnet *ifp) { - *masq = (nat->nt_natpolicy->n_flags & NPF_NAT_MASQ); - *type = nat->nt_natpolicy->n_type; - npf_nat_get_trans(nat, addr, port); + npf_nat_t *nt; + npf_rule_t *rl; + npf_natpolicy_t *np; + int rc = -ENOENT; - return true; + if (!se || !pnt || !ifp) + return -EINVAL; + + /* Create a nat struct */ + nt = zmalloc_aligned(sizeof(npf_nat_t)); + if (!nt) + return -ENOMEM; + + rl = pnt->pnt_rule_hash ? + npf_get_rule_by_hash(pnt->pnt_rule_hash) : NULL; + if (!rl) + goto error; + + nt->nt_rl = npf_rule_get(rl); + + np = npf_rule_get_natpolicy(rl); + if (!np || !np->n_apm) + goto error; + + /* Take a reference on the NAT policy */ + nt->nt_natpolicy = npf_nat_policy_get(np); + + nt->nt_l3_chk = pnt->pnt_l3_chk; + nt->nt_l4_chk = pnt->pnt_l4_chk; + nt->nt_map_flags = pnt->pnt_map_flags; + nt->nt_taddr = pnt->pnt_taddr; + nt->nt_tport = pnt->pnt_tport; + nt->nt_oaddr = pnt->pnt_oaddr; + nt->nt_oport = pnt->pnt_oport; + + vrfid_t vrfid = npf_session_get_vrfid(se); + + rc = npf_nat_alloc_map(nt->nt_natpolicy, rl, nt->nt_map_flags, vrfid, + npf_session_get_proto(se), (npf_addr_t *) &nt->nt_taddr, + &nt->nt_tport, 1); + if (rc) + goto error; + + nt->nt_mtu = ifp->if_mtu; + nt->nt_session = se; + npf_session_setnat(se, nt, + (nt->nt_natpolicy->n_flags & NPF_NAT_PINHOLE)); + + return 0; +error: + if (nt->nt_natpolicy) + npf_nat_policy_put(nt->nt_natpolicy); + npf_rule_put(nt->nt_rl); + free(nt); + return rc; } diff --git a/src/npf/npf_nat.h b/src/npf/npf_nat.h index 05cbe18b..6c9a5380 100644 --- a/src/npf/npf_nat.h +++ b/src/npf/npf_nat.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. */ @@ -46,11 +46,12 @@ typedef struct npf_natpolicy npf_natpolicy_t; #include #include -#include "alg/npf_alg_public.h" +#include "alg/alg_npf.h" #include "npf/npf.h" #include "npf/npf_cache.h" #include "npf/npf_session.h" -#include "pktmbuf.h" +#include "npf/npf_apm.h" +#include "pktmbuf_internal.h" #include "util.h" /* Forward Declarations */ @@ -58,6 +59,7 @@ struct ifnet; struct npf_config; struct npf_session; struct rte_mbuf; +struct npf_pack_nat; typedef struct npf_cache npf_cache_t; typedef struct npf_session npf_session_t; @@ -77,10 +79,15 @@ enum { NPF_NAT_MAP_EVEN_PORT = (1u << 5), NPF_NAT_PINHOLE = (1u << 6), NPF_NAT_OBEY_DF = (1u << 7), /* npf_nat_t only */ + NPF_NAT_PA_SEQ = (1u << 8), /* alloc ports sequentially */ }; +/* Take reference on a NAT policy */ npf_natpolicy_t *npf_nat_policy_get(npf_natpolicy_t *np); + +/* Release reference on a NAT policy */ void npf_nat_policy_put(npf_natpolicy_t *np); + uint32_t npf_nat_get_map_flags(npf_nat_t *nt); void npf_nat_set_seq_ack(npf_session_t *se, npf_cache_t *npc, int16_t diff, int di); @@ -92,7 +99,6 @@ int npf_create_natpolicy(npf_rule_t *rl, uint8_t type, uint32_t flags, npf_addr_t *taddr_stop, uint32_t match_mask, in_port_t tport, in_port_t tport_stop); uint64_t npf_natpolicy_get_map_range(const npf_natpolicy_t *np); -uint8_t npf_natpolicy_get_type(npf_natpolicy_t *np); void npf_natpolicy_update_masq(npf_rule_t *rl, const npf_addr_t *addr); npf_nat_t *npf_nat_custom_nat(npf_nat_t *pnat, uint32_t flags); void npf_nat_finalise(npf_cache_t *npc, npf_session_t *se, int di, @@ -115,18 +121,22 @@ void npf_nat_get_trans(const npf_nat_t *nt, npf_addr_t *addr, in_port_t *tport); void npf_nat_get_orig(const npf_nat_t *nt, npf_addr_t *addr, in_port_t *oport); void npf_nat_set_trans(npf_nat_t *nt, const npf_addr_t *addr, in_port_t tport); void npf_nat_set_orig(npf_nat_t *nt, const npf_addr_t *addr, in_port_t oport); -void npf_nat_setalg(npf_nat_t *nt, const struct npf_alg *alg); +void npf_nat_setalg(npf_nat_t *nt, struct npf_alg *alg); const struct npf_alg *npf_nat_getalg(npf_nat_t *nt); + +/* Get the NAT policy from a NAT struct. Does *not* take a reference. */ npf_natpolicy_t *npf_nat_get_policy(const npf_nat_t *nt); + npf_rule_t *npf_nat_get_rule(const npf_nat_t *nt); uint8_t npf_nat_type(npf_nat_t *nt); void npf_nat_destroy(npf_nat_t *nt); void npf_nat_expire(npf_nat_t *nt, vrfid_t vrfid); int npf_nat_alloc_map(npf_natpolicy_t *np, npf_rule_t *rl, uint32_t map_flags, - vrfid_t vrfid, npf_addr_t *addr, in_port_t *port, int num); + uint8_t ip_prot, vrfid_t vrfid, npf_addr_t *addr, + in_port_t *port, int num); int npf_nat_free_map(npf_natpolicy_t *np, npf_rule_t *rl, uint32_t map_flags, - vrfid_t vrfid, const npf_addr_t addr, in_port_t port); -void npf_nat_dump(const npf_nat_t *nt); + uint8_t ip_prot, vrfid_t vrfid, const npf_addr_t addr, + in_port_t port); bool npf_nat_info(npf_nat_t *nat, int *type, npf_addr_t *addr, in_port_t *port, u_int *masq); struct rte_mbuf * @@ -135,4 +145,8 @@ npf_nat_clone_and_undo(struct rte_mbuf *m, const struct ifnet *in_ifp, struct rte_mbuf * npf_nat_copy_and_undo(struct rte_mbuf *m, const struct ifnet *in_ifp, const struct ifnet *out_ifp); +int npf_nat_npf_pack_pack(npf_nat_t *nt, struct npf_pack_nat *pnt, + struct sentry_packet *sp_back); +int npf_nat_npf_pack_restore(struct npf_session *se, struct npf_pack_nat *pnt, + struct ifnet *ifp); #endif /* NPF_NAT_H */ diff --git a/src/npf/npf_nat64.c b/src/npf/npf_nat64.c index 6fea212e..0b2b2e16 100644 --- a/src/npf/npf_nat64.c +++ b/src/npf/npf_nat64.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -29,7 +29,7 @@ #include "ip6_funcs.h" #include "ip_funcs.h" #include "netinet6/in6.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "vplane_log.h" #include "urcu.h" #include "util.h" @@ -40,9 +40,11 @@ #include "npf/npf_cache.h" #include "npf/npf_nat64.h" #include "npf/npf_session.h" +#include "npf/npf_rc.h" #include "npf/npf_ruleset.h" #include "npf/rproc/npf_rproc.h" #include "npf/rproc/npf_ext_nat64.h" +#include "npf/npf_pack.h" struct ifnet; struct rte_mbuf; @@ -96,6 +98,23 @@ npf_nat64_get_rule(struct npf_nat64 *n64) return n64 ? n64->n64_rule : NULL; } +uint8_t npf_nat64_is_v6(struct npf_nat64 *n64) +{ + return n64->n64_v6 ? 1 : 0; +} + +uint8_t npf_nat64_is_linked(struct npf_nat64 *n64) +{ + return n64->n64_linked ? 1 : 0; +} + +void npf_nat64_get_trans(struct npf_nat64 *n64, + npf_addr_t *addr, in_port_t *port) +{ + memcpy(addr, &n64->n64_t_addr, sizeof(npf_addr_t)); + *port = n64->n64_t_port; +} + /* * Does this nat64 session have a peer session? */ @@ -206,15 +225,20 @@ insert_6052_addr(uint32_t *ip4addr, uint8_t *ip6addr, uint8_t mask) * Get an IPv4 address from nat64 rproc and IPv6 address * * se6 - nat64 IPv6 ingress session + * ip_prot - IP protocol * id - L4 ID, e.g. TCP port * nm - nat64 rproc address mapping configuration and state * v6_addr - IPv6 address source or dest of packet to be translated * v4_addr - New IPv4 address is written to this uint32_t + * + * Note that if ICMP is given a unique pool to allocate ID's from, + * then the NAT64 code needs checked to ensure that the it works as expected, + * as NAT64 maps between ICMPv4 (protocol 1) and ICMPv6 (protocol 58). */ static int -nat64_get_map_v4(struct npf_nat64 *nat64, npf_rule_t *rl, uint16_t *id, - struct nat64_map *nm, uint32_t *v4_addr, char *v6_addr, - vrfid_t vrfid) +nat64_get_map_v4(struct npf_nat64 *nat64, npf_rule_t *rl, uint8_t ip_prot, + uint16_t *id, struct nat64_map *nm, uint32_t *v4_addr, + char *v6_addr, vrfid_t vrfid) { int rc; @@ -227,7 +251,7 @@ nat64_get_map_v4(struct npf_nat64 *nat64, npf_rule_t *rl, uint16_t *id, */ if (!extract_6052_addr(v4_addr, v6_addr, nm->nm_mask)) - return -EINVAL; + return -NPF_RC_NAT64_6052; break; case NPF_NAT64_ONE2ONE: @@ -243,11 +267,11 @@ nat64_get_map_v4(struct npf_nat64 *nat64, npf_rule_t *rl, uint16_t *id, case NPF_NAT64_OVERLOAD: if (!rl) - return -EINVAL; + return -NPF_RC_INTL; npf_natpolicy_t *np = npf_rule_get_natpolicy(rl); if (!np) - return -EINVAL; + return -NPF_RC_INTL; assert(!nat64->n64_np); @@ -264,13 +288,13 @@ nat64_get_map_v4(struct npf_nat64 *nat64, npf_rule_t *rl, uint16_t *id, * n64_t_addr is initially 0.0.0.0 */ rc = npf_nat_alloc_map(np, rl, - nat64->n64_map_flags, + nat64->n64_map_flags, ip_prot, nat64->n64_vrfid, &nat64->n64_t_addr, &nat64->n64_t_port, 1); if (rc != 0) - return -EINVAL; + return -NPF_RC_NAT64_ENOSPC; *v4_addr = nat64->n64_t_addr.s6_addr32[0]; *id = nat64->n64_t_port; @@ -278,7 +302,7 @@ nat64_get_map_v4(struct npf_nat64 *nat64, npf_rule_t *rl, uint16_t *id, break; case NPF_NAT64_NONE: - return -EINVAL; + return -NPF_RC_INTL; }; return 0; @@ -303,7 +327,7 @@ nat64_get_map_v6(uint16_t *id, struct nat64_map *nm, npf_addr_t *v6_addr, if (!insert_6052_addr(&v4_addr, v6_addr->s6_addr, nm->nm_mask)) - return -EINVAL; + return -NPF_RC_NAT64_6052; break; case NPF_NAT64_ONE2ONE: /* @@ -316,9 +340,9 @@ nat64_get_map_v6(uint16_t *id, struct nat64_map *nm, npf_addr_t *v6_addr, *id = nm->nm_start_port; break; case NPF_NAT64_OVERLOAD: - return -EINVAL; + return -NPF_RC_INTL; case NPF_NAT64_NONE: - return -EINVAL; + return -NPF_RC_INTL; }; return 0; @@ -328,13 +352,13 @@ nat64_get_map_v6(uint16_t *id, struct nat64_map *nm, npf_addr_t *v6_addr, * Conversion utility to go from v4 to v6 space. Only supports tcp/udp and * icmp echos. */ -static bool +static int npf_4to6_convert(struct rte_mbuf **m, npf_cache_t *npc, npf_addr_t *src, uint16_t sid, npf_addr_t *dst, uint16_t did) { if (!*m || !npc) - return false; + return -NPF_RC_INTL; struct iphdr *ip = iphdr(*m); uint16_t proto = npf_cache_ipproto(npc); @@ -345,7 +369,7 @@ npf_4to6_convert(struct rte_mbuf **m, npf_cache_t *npc, uint32_t data_len = ntohs(ip->tot_len) - hlen; if (npf_prepare_for_l4_header_change(m, npc) != 0) - return false; + return -NPF_RC_MBUF_ENOMEM; /* * Grow the l3 header space so there is just enough @@ -356,21 +380,21 @@ npf_4to6_convert(struct rte_mbuf **m, npf_cache_t *npc, l2 = rte_pktmbuf_mtod(*m, char *); new_l2 = rte_pktmbuf_prepend(*m, sizeof(struct ip6_hdr) - hlen); if (!new_l2) - return false; + return -NPF_RC_MBUF_ERR; memmove(new_l2, l2, (*m)->l2_len); l2 = new_l2; /* Reset the L3 length */ - pktmbuf_l3_len(*m) = sizeof(struct ip6_hdr); + dp_pktmbuf_l3_len(*m) = sizeof(struct ip6_hdr); struct ip6_hdr *ip6 = ip6hdr(*m); char *l4hdr = (char *)(ip6 + 1); /* fix up ether type */ - if ((*m)->l2_len == ETHER_HDR_LEN) { - struct ether_hdr *eth = (struct ether_hdr *)l2; - eth->ether_type = htons(ETHER_TYPE_IPv6); + if ((*m)->l2_len == RTE_ETHER_HDR_LEN) { + struct rte_ether_hdr *eth = (struct rte_ether_hdr *)l2; + eth->ether_type = htons(RTE_ETHER_TYPE_IPV6); } ip6->ip6_flow = 0; @@ -412,20 +436,20 @@ npf_4to6_convert(struct rte_mbuf **m, npf_cache_t *npc, /* now recompute checksum */ npf_ipv6_cksum(*m, proto, l4hdr); - return true; + return 0; } /* * Conversion utility to go from v6 to v4 space. Only supports tcp/udp and * icmp echos. */ -static bool +static int npf_6to4_convert(struct rte_mbuf **m, npf_cache_t *npc, uint32_t v4_saddr, uint16_t sid, uint32_t v4_daddr, uint16_t did) { if (!*m || !npc) - return false; + return -NPF_RC_INTL; struct ip6_hdr *ip6 = ip6hdr(*m); uint16_t proto = npf_cache_ipproto(npc); @@ -440,7 +464,7 @@ npf_6to4_convert(struct rte_mbuf **m, npf_cache_t *npc, sizeof(struct ip6_hdr); if (npf_prepare_for_l4_header_change(m, npc) != 0) - return false; + return -NPF_RC_MBUF_ENOMEM; /* * Shrink l3 header size such that we are left with space for @@ -451,21 +475,21 @@ npf_6to4_convert(struct rte_mbuf **m, npf_cache_t *npc, l2 = rte_pktmbuf_mtod(*m, char *); new_l2 = rte_pktmbuf_adj(*m, hlen - sizeof(struct iphdr)); if (!new_l2) - return false; + return -NPF_RC_MBUF_ERR; memmove(new_l2, l2, (*m)->l2_len); l2 = new_l2; /* Reset the L3 length */ - pktmbuf_l3_len(*m) = sizeof(struct iphdr); + dp_pktmbuf_l3_len(*m) = sizeof(struct iphdr); struct iphdr *ip = iphdr(*m); char *l4hdr = (char *)(ip + 1); /* fix up ether type */ - if ((*m)->l2_len == ETHER_HDR_LEN) { - struct ether_hdr *eth = (struct ether_hdr *)l2; - eth->ether_type = htons(ETHER_TYPE_IPv4); + if ((*m)->l2_len == RTE_ETHER_HDR_LEN) { + struct rte_ether_hdr *eth = (struct rte_ether_hdr *)l2; + eth->ether_type = htons(RTE_ETHER_TYPE_IPV4); } ip->ihl = sizeof(struct iphdr) >> 2; /* fixed 20 bytes for now */ @@ -514,7 +538,7 @@ npf_6to4_convert(struct rte_mbuf **m, npf_cache_t *npc, /* now fixup proto cksums */ npf_ipv4_cksum(*m, proto, l4hdr); - return true; + return 0; } /* @@ -532,14 +556,12 @@ npf_nat64_session_establish(npf_session_t **sep, npf_cache_t *npc, struct npf_nat64 *nat64; npf_session_t *se = *sep; bool new = false; - int error, rc = 0; + int rc = 0; if (!se) { - se = npf_session_establish(npc, m, ifp, dir, &error); - if (error) - return error; - if (se == NULL) - return -EINVAL; + se = npf_session_establish(npc, m, ifp, dir, &rc); + if (rc || !se) + return rc; new = true; } @@ -549,7 +571,7 @@ npf_nat64_session_establish(npf_session_t **sep, npf_cache_t *npc, if (!nat64) { if (new) npf_session_destroy(se); - return -ENOMEM; + return -NPF_RC_NAT64_ENOMEM; } nat64->n64_v6 = npf_iscached(npc, NPC_IP6); nat64->n64_rproc_id = rproc_id; @@ -560,7 +582,7 @@ npf_nat64_session_establish(npf_session_t **sep, npf_cache_t *npc, nat64->n64_stats_in = zmalloc_aligned(NAT64_STATS_SIZE); nat64->n64_stats_out = zmalloc_aligned(NAT64_STATS_SIZE); if (!nat64->n64_stats_in || !nat64->n64_stats_out) { - rc = -ENOMEM; + rc = -NPF_RC_NAT64_ENOMEM; goto error; } npf_session_set_nat64(se, nat64); @@ -624,7 +646,7 @@ npf_nat64_session_link(struct npf_session *se1, struct npf_session *se2) m1 = npf_session_get_nat64(se1); m2 = npf_session_get_nat64(se2); if (!m1 || !m2) - return -EINVAL; + return -NPF_RC_INTL; /* We always use the lock from the v6 session */ lock = m1->n64_v6 ? &m1->n64_lock : &m2->n64_lock; @@ -667,7 +689,7 @@ npf_nat64_session_link(struct npf_session *se1, struct npf_session *se2) npf_session_get_id(se1), npf_session_get_id(se2)); - return -EINVAL; + return -NPF_RC_INTL; } m1->n64_linked = true; m2->n64_linked = true; @@ -683,7 +705,7 @@ npf_nat64_session_link(struct npf_session *se1, struct npf_session *se2) "id(%lu) and id(%lu)\n", npf_session_get_id(se1), npf_session_get_id(se2)); - return -EINVAL; + return -NPF_RC_INTL; } } @@ -735,13 +757,15 @@ npf_nat64_session_destroy(struct npf_session *se) peer->n64_peer = NULL; if (nat64->n64_np) { - npf_nat_free_map(nat64->n64_np, NULL, + npf_nat_free_map(nat64->n64_np, nat64->n64_rule, nat64->n64_map_flags, + npf_session_get_proto(se), nat64->n64_vrfid, nat64->n64_t_addr, nat64->n64_t_port); - npf_nat_policy_put(nat64->n64_np); + if (nat64->n64_np) + npf_nat_policy_put(nat64->n64_np); } npf_rule_put(nat64->n64_rule); @@ -751,6 +775,12 @@ npf_nat64_session_destroy(struct npf_session *se) npf_session_set_nat64(se, NULL); } +/* Get rproc_id */ +int npf_nat64_get_rproc_id(struct npf_nat64 *n64) +{ + return n64->n64_rproc_id; +} + /* * Is this a nat64 session? */ @@ -764,18 +794,6 @@ bool npf_nat64_session_is_nat64(npf_session_t *se) return false; } -/* - * Is this a nat46 session? - */ -bool npf_nat64_session_is_nat46(npf_session_t *se) -{ - struct npf_nat64 *nat64; - - nat64 = npf_session_get_nat64(se); - if (nat64) - return nat64->n64_rproc_id == NPF_RPROC_ID_NAT46; - return false; -} /* * Add nat64 session stats @@ -849,8 +867,10 @@ npf_nat64_session_json(json_writer_t *json, npf_session_t *se) jsonw_bool_field(json, "in", npf_session_forward_dir(se, PFIL_IN)); if (n64->n64_rule) { + const char *gr_name = npf_rule_get_name(n64->n64_rule); + jsonw_string_field(json, "ruleset", - npf_rule_get_name(n64->n64_rule)); + gr_name ? gr_name : ""); jsonw_uint_field(json, "rule", npf_rule_get_num(n64->n64_rule)); } @@ -906,12 +926,12 @@ npf_nat64_session_json(json_writer_t *json, npf_session_t *se) * so that nat64 has another opportunity to create an egress session and link * it to the ingress session. */ -static npf_decision_t -npf_nat64_6to4_in(npf_action_t *action, const struct npf_config *npf_config, +nat64_decision_t +npf_nat64_6to4_in(const struct npf_config *npf_config, npf_session_t **sep, struct ifnet *ifp, npf_cache_t *npc, - struct rte_mbuf **m, uint16_t *npf_flag) + struct rte_mbuf **m, uint16_t *npf_flag, int *rcp) { - npf_decision_t decision = NPF_DECISION_PASS; + nat64_decision_t decision = NAT64_DECISION_UNMATCHED; npf_addr_t saddr = NPF_ADDR_ZERO, daddr = NPF_ADDR_ZERO; npf_addr_t *src = &saddr, *dst = &daddr; npf_session_t *se6 = *sep; @@ -920,7 +940,7 @@ npf_nat64_6to4_in(npf_action_t *action, const struct npf_config *npf_config, npf_rule_t *rl = NULL; bool new_flow = false; uint16_t sid, did; - int rc; + int rc = 0; /* * If an ingress session exist and it is a nat64 session, then get the @@ -936,7 +956,7 @@ npf_nat64_6to4_in(npf_action_t *action, const struct npf_config *npf_config, const npf_ruleset_t *rlset; struct nat64 *rproc; struct ip6_hdr *ip6; - int error = 0; + uint8_t ip_prot; /* * Peer egress session not found. @@ -953,7 +973,7 @@ npf_nat64_6to4_in(npf_action_t *action, const struct npf_config *npf_config, PFIL_IN); if (!rl) - return NPF_DECISION_PASS; + return NAT64_DECISION_UNMATCHED; } /* @@ -965,8 +985,11 @@ npf_nat64_6to4_in(npf_action_t *action, const struct npf_config *npf_config, * to translate the first packet of the flow. */ rproc = npf_rule_rproc_handle_from_id(rl, NPF_RPROC_ID_NAT64); - if (!rproc) - return NPF_DECISION_PASS; + if (!rproc) { + /* This should never happen */ + *rcp = -NPF_RC_INTL; + return NAT64_DECISION_DROP; + } /* * Check packet is eligible for v6-to-v4 translation *before* @@ -976,8 +999,10 @@ npf_nat64_6to4_in(npf_action_t *action, const struct npf_config *npf_config, */ if (!npf_iscached(npc, NPC_IP6) || (!npf_iscached(npc, NPC_L4PORTS) && - !npf_iscached(npc, NPC_ICMP_ECHO))) - return NPF_DECISION_BLOCK; + !npf_iscached(npc, NPC_ICMP_ECHO))) { + *rcp = -NPF_RC_L4_PROTO; + return NAT64_DECISION_DROP; + } /* * Create or update v6 ingress session. Add s_nat64 to @@ -989,8 +1014,10 @@ npf_nat64_6to4_in(npf_action_t *action, const struct npf_config *npf_config, rc = npf_nat64_session_establish( &se6, npc, *m, ifp, rl, PFIL_IN, NPF_RPROC_ID_NAT64, (rproc->n6_log & N64_LOG_SESSIONS) != 0); - if (rc < 0) - return NPF_DECISION_BLOCK; + if (rc < 0) { + *rcp = rc; + return NAT64_DECISION_DROP; + } vrfid_t vrfid = npf_session_get_vrfid(se6); @@ -1001,34 +1028,39 @@ npf_nat64_6to4_in(npf_action_t *action, const struct npf_config *npf_config, n64 = npf_session_get_nat64(se6); if (unlikely(!n64)) { /* Should never happen */ - decision = NPF_DECISION_UNMATCHED; + *rcp = -NPF_RC_INTL; + decision = NAT64_DECISION_DROP; goto error; } /* Get mapping for v4 src addr */ - rc = nat64_get_map_v4(n64, rl, &sid, &rproc->n6_src, - saddr.s6_addr32, (char *)&ip6->ip6_src, - vrfid); - if (rc) { - decision = NPF_DECISION_UNMATCHED; + ip_prot = npf_cache_ipproto(npc); + rc = nat64_get_map_v4(n64, rl, ip_prot, &sid, &rproc->n6_src, + saddr.s6_addr32, (char *)&ip6->ip6_src, + vrfid); + if (rc < 0) { + *rcp = rc; + decision = NAT64_DECISION_DROP; goto error; } /* Get mapping for v4 dst addr */ - rc = nat64_get_map_v4(n64, rl, &did, &rproc->n6_dst, + rc = nat64_get_map_v4(n64, rl, ip_prot, &did, &rproc->n6_dst, daddr.s6_addr32, (char *)&ip6->ip6_dst, vrfid); if (rc) { - decision = NPF_DECISION_UNMATCHED; + *rcp = -NPF_RC_NAT64_ENOSPC; + decision = NAT64_DECISION_DROP; goto error; } /* * We need to activate the session before the mbuf changes */ - error = npf_session_activate(se6, ifp, npc, *m); - if (unlikely(error)) { - decision = NPF_DECISION_BLOCK; + rc = npf_session_activate(se6, ifp, npc, *m); + if (unlikely(rc < 0)) { + *rcp = rc; + decision = NAT64_DECISION_DROP; goto error; } @@ -1054,26 +1086,27 @@ npf_nat64_6to4_in(npf_action_t *action, const struct npf_config *npf_config, rc = npf_session_sentry_extract(se4, &if_index, &af, &dst, &did, &src, &sid); - if (unlikely(rc || af != AF_INET)) - return NPF_DECISION_BLOCK; + if (unlikely(rc || af != AF_INET)) { + *rcp = rc; + return NAT64_DECISION_DROP; + } } /* * Do the 6-to-4 conversion */ uint64_t bytes = rte_pktmbuf_pkt_len(*m); - bool ok = npf_6to4_convert(m, npc, src->s6_addr32[0], sid, + rc = npf_6to4_convert(m, npc, src->s6_addr32[0], sid, dst->s6_addr32[0], did); - if (likely(ok)) { + if (likely(rc == 0)) { /* * stats. NOTE, this is currently only recording stats if a * session does not exist. */ npf_nat64_add_pkt_in(n64, bytes); - /* Flag to IPv4 input */ - *action = NPF_ACTION_TO_V4; + decision = NAT64_DECISION_TO_V4; /* Flag to output pipeline */ *npf_flag |= NPF_FLAG_FROM_IPV6; @@ -1087,7 +1120,8 @@ npf_nat64_6to4_in(npf_action_t *action, const struct npf_config *npf_config, pktmbuf_mdata_invar_set(*m, PKT_MDATA_INVAR_NAT64); } } else { - decision = NPF_DECISION_BLOCK; + *rcp = rc; + decision = NAT64_DECISION_DROP; goto error; } @@ -1126,16 +1160,16 @@ npf_nat64_6to4_in(npf_action_t *action, const struct npf_config *npf_config, * #3 is the unlikely scenario. It may occur if orthogonal nat64 and nat46 * rules create ingress sessions simultaneously. */ -static npf_decision_t +nat64_decision_t npf_nat64_6to4_out(npf_session_t **sep, struct ifnet *ifp, npf_cache_t *npc, - struct rte_mbuf **m, uint16_t *npf_flag) + struct rte_mbuf **m, const uint16_t *npf_flag, int *rcp) { npf_session_t *se4 = *sep; struct npf_nat64 *n64; - int rc; + int rc = 0; if ((*npf_flag & NPF_FLAG_FROM_IPV6) == 0) - return NPF_DECISION_PASS; + return NAT64_DECISION_UNMATCHED; /* * 6-to-4 packets will contain nat64 metadata as long as ingress and @@ -1153,7 +1187,7 @@ npf_nat64_6to4_out(npf_session_t **sep, struct ifnet *ifp, npf_cache_t *npc, n64 = npf_session_get_nat64(se6); if (unlikely(!se6 || !n64)) /* This should never happen */ - return NPF_DECISION_PASS; + return NAT64_DECISION_DROP; /* * Create an IPv4 session if one does not already exist (#1). Add a @@ -1164,17 +1198,20 @@ npf_nat64_6to4_out(npf_session_t **sep, struct ifnet *ifp, npf_cache_t *npc, rc = npf_nat64_session_establish(&se4, npc, *m, ifp, n64->n64_rule, PFIL_OUT, NPF_RPROC_ID_NAT64, n64->n64_log_sessions); - if (rc < 0 || se4 == NULL) - return NPF_DECISION_BLOCK; + if (rc < 0 || se4 == NULL) { + *rcp = rc; + return NAT64_DECISION_DROP; + } /* * Link v4 and v6 sessions. This handles scenario #3, where we have a * race between two packet flows in different directions. */ rc = npf_nat64_session_link(se6, se4); - - if (rc < 0) - return NPF_DECISION_PASS; + if (rc < 0) { + *rcp = rc; + return NAT64_DECISION_DROP; + } if (!*sep) *sep = se4; @@ -1184,7 +1221,7 @@ npf_nat64_6to4_out(npf_session_t **sep, struct ifnet *ifp, npf_cache_t *npc, npf_nat64_add_pkt_out(npf_session_get_nat64(se4), rte_pktmbuf_pkt_len(*m)); - return NPF_DECISION_PASS; + return NAT64_DECISION_PASS; } /* @@ -1213,12 +1250,12 @@ npf_nat64_6to4_out(npf_session_t **sep, struct ifnet *ifp, npf_cache_t *npc, * so that nat64 has another opportunity to create an egress session and link * it to the ingress session. */ -static npf_decision_t -npf_nat64_4to6_in(npf_action_t *action, const struct npf_config *npf_config, +nat64_decision_t +npf_nat64_4to6_in(const struct npf_config *npf_config, npf_session_t **sep, struct ifnet *ifp, npf_cache_t *npc, - struct rte_mbuf **m, uint16_t *npf_flag) + struct rte_mbuf **m, uint16_t *npf_flag, int *rcp) { - npf_decision_t decision = NPF_DECISION_PASS; + nat64_decision_t decision = NAT64_DECISION_UNMATCHED; npf_addr_t saddr = NPF_ADDR_ZERO, daddr = NPF_ADDR_ZERO; npf_addr_t *src = &saddr, *dst = &daddr; npf_session_t *se4 = *sep; @@ -1227,7 +1264,7 @@ npf_nat64_4to6_in(npf_action_t *action, const struct npf_config *npf_config, npf_rule_t *rl = NULL; bool new_flow = false; uint16_t sid, did; - int rc; + int rc = 0; /* * If an ingress session exist and it is a nat64 session, then get the @@ -1243,7 +1280,6 @@ npf_nat64_4to6_in(npf_action_t *action, const struct npf_config *npf_config, const npf_ruleset_t *rlset; struct nat64 *rproc; struct iphdr *ip; - int error = 0; /* * Peer egress session not found. @@ -1260,7 +1296,7 @@ npf_nat64_4to6_in(npf_action_t *action, const struct npf_config *npf_config, PFIL_IN); if (!rl) - return NPF_DECISION_PASS; + return NAT64_DECISION_UNMATCHED; } /* @@ -1272,8 +1308,11 @@ npf_nat64_4to6_in(npf_action_t *action, const struct npf_config *npf_config, * to translate the first packet of the flow. */ rproc = npf_rule_rproc_handle_from_id(rl, NPF_RPROC_ID_NAT46); - if (!rproc) - return NPF_DECISION_PASS; + if (!rproc) { + /* This should never happen */ + *rcp = -NPF_RC_INTL; + return NAT64_DECISION_DROP; + } /* * Check packet is eligible for v4-to-v6 translation *before* @@ -1283,8 +1322,10 @@ npf_nat64_4to6_in(npf_action_t *action, const struct npf_config *npf_config, */ if (!npf_iscached(npc, NPC_IP4) || (!npf_iscached(npc, NPC_L4PORTS) && - !npf_iscached(npc, NPC_ICMP_ECHO))) - return NPF_DECISION_BLOCK; + !npf_iscached(npc, NPC_ICMP_ECHO))) { + *rcp = -NPF_RC_L4_PROTO; + return NAT64_DECISION_DROP; + } /* * Create or update v4 ingress session. @@ -1296,8 +1337,10 @@ npf_nat64_4to6_in(npf_action_t *action, const struct npf_config *npf_config, &se4, npc, *m, ifp, rl, PFIL_IN, NPF_RPROC_ID_NAT46, (rproc->n6_log & N64_LOG_SESSIONS) != 0); - if (rc < 0) - return NPF_DECISION_BLOCK; + if (rc < 0) { + *rcp = rc; + return NAT64_DECISION_DROP; + } /* Get src and dst ports from cache */ npf_cache_extract_ids(npc, &sid, &did); @@ -1306,14 +1349,16 @@ npf_nat64_4to6_in(npf_action_t *action, const struct npf_config *npf_config, n64 = npf_session_get_nat64(se4); if (unlikely(!n64)) { /* Should never happen */ - decision = NPF_DECISION_UNMATCHED; + *rcp = -NPF_RC_INTL; + decision = NAT64_DECISION_DROP; goto error; } /* Get mapping for v4 src addr */ rc = nat64_get_map_v6(NULL, &rproc->n6_src, src, ip->saddr); - if (rc) { - decision = NPF_DECISION_UNMATCHED; + if (rc < 0) { + *rcp = rc; + decision = NAT64_DECISION_DROP; goto error; } @@ -1321,17 +1366,19 @@ npf_nat64_4to6_in(npf_action_t *action, const struct npf_config *npf_config, * Get v6 dst addr from the rproc and/or pkt */ rc = nat64_get_map_v6(&did, &rproc->n6_dst, dst, ip->daddr); - if (rc) { - decision = NPF_DECISION_UNMATCHED; + if (rc < 0) { + *rcp = rc; + decision = NAT64_DECISION_DROP; goto error; } /* * We need to activate the session now before the mbuf changes */ - error = npf_session_activate(se4, ifp, npc, *m); - if (unlikely(error)) { - decision = NPF_DECISION_BLOCK; + rc = npf_session_activate(se4, ifp, npc, *m); + if (unlikely(rc < 0)) { + *rcp = rc; + decision = NAT64_DECISION_DROP; goto error; } @@ -1357,25 +1404,26 @@ npf_nat64_4to6_in(npf_action_t *action, const struct npf_config *npf_config, rc = npf_session_sentry_extract(se6, &if_index, &af, &dst, &did, &src, &sid); - if (unlikely(rc || af != AF_INET6)) - return NPF_DECISION_BLOCK; + if (unlikely(rc || af != AF_INET6)) { + *rcp = rc; + return NAT64_DECISION_DROP; + } } /* * Do the 4-to-6 conversion */ uint64_t bytes = rte_pktmbuf_pkt_len(*m); - bool ok = npf_4to6_convert(m, npc, src, sid, dst, did); + rc = npf_4to6_convert(m, npc, src, sid, dst, did); - if (likely(ok)) { + if (likely(rc == 0)) { /* * stats. NOTE, this is currently only recording stats if a * session does not exist. */ npf_nat64_add_pkt_in(n64, bytes); - /* Flag to IPv4 input */ - *action = NPF_ACTION_TO_V6; + decision = NAT64_DECISION_TO_V6; /* Flag to output pipeline */ *npf_flag |= NPF_FLAG_FROM_IPV4; @@ -1389,7 +1437,8 @@ npf_nat64_4to6_in(npf_action_t *action, const struct npf_config *npf_config, pktmbuf_mdata_invar_set(*m, PKT_MDATA_INVAR_NAT64); } } else { - decision = NPF_DECISION_BLOCK; + *rcp = rc; + decision = NAT64_DECISION_DROP; goto error; } @@ -1428,16 +1477,16 @@ npf_nat64_4to6_in(npf_action_t *action, const struct npf_config *npf_config, * #3 is the unlikely scenario. It may occur if orthogonal nat64 and nat46 * rules create ingress sessions simultaneously. */ -static npf_decision_t +nat64_decision_t npf_nat64_4to6_out(npf_session_t **sep, struct ifnet *ifp, npf_cache_t *npc, - struct rte_mbuf **m, uint16_t *npf_flag) + struct rte_mbuf **m, const uint16_t *npf_flag, int *rcp) { npf_session_t *se6 = *sep; struct npf_nat64 *n64; - int rc; + int rc = 0; if ((*npf_flag & NPF_FLAG_FROM_IPV4) == 0) - return NPF_DECISION_PASS; + return NAT64_DECISION_UNMATCHED; /* * 4-to-6 packets will contain nat64 metadata as long as ingress and @@ -1455,7 +1504,7 @@ npf_nat64_4to6_out(npf_session_t **sep, struct ifnet *ifp, npf_cache_t *npc, n64 = npf_session_get_nat64(se4); if (!se4 || !n64) /* This should never happen */ - return NPF_DECISION_PASS; + return NAT64_DECISION_DROP; /* * Create an IPv6 session if one does not already exist (#1). Add a @@ -1466,8 +1515,10 @@ npf_nat64_4to6_out(npf_session_t **sep, struct ifnet *ifp, npf_cache_t *npc, rc = npf_nat64_session_establish(&se6, npc, *m, ifp, n64->n64_rule, PFIL_OUT, NPF_RPROC_ID_NAT46, n64->n64_log_sessions); - if (rc < 0 || se6 == NULL) - return NPF_DECISION_BLOCK; + if (rc < 0 || se6 == NULL) { + *rcp = rc; + return NAT64_DECISION_DROP; + } /* * Link v6 and v4 sessions. This handles scenario #3, where we have a @@ -1475,8 +1526,10 @@ npf_nat64_4to6_out(npf_session_t **sep, struct ifnet *ifp, npf_cache_t *npc, */ rc = npf_nat64_session_link(se4, se6); - if (rc < 0) - return NPF_DECISION_PASS; + if (rc < 0) { + *rcp = rc; + return NAT64_DECISION_DROP; + } if (!*sep) *sep = se6; @@ -1486,55 +1539,87 @@ npf_nat64_4to6_out(npf_session_t **sep, struct ifnet *ifp, npf_cache_t *npc, npf_nat64_add_pkt_out(npf_session_get_nat64(se6), rte_pktmbuf_pkt_len(*m)); - return NPF_DECISION_PASS; + return NAT64_DECISION_PASS; } -/* - * For NAT 6-to-4 the packet flow sequence is: - * - * request: v6(in) -> v4(in) -> v4(out) - * response: v4(in) -> v6(in) -> v6(out) - * - * Two session are created for the first packet in a data flow - An IPv6 - * session at v6(in) and an IPv4 session at v4(out). - * - * NAT 4-to-6 for a new packet flow is similar. - * - * nat64_hook is called at input when either of the following are true: - * - * 1. A NAT64 rule exists on the interface, or - * 2. A NAT64 session is found on ingress - * - * nat64_hook is called at output when: - * - * 1. The NPF_FLAG_NAT64 flag is set - * - * The NPF_FLAG_NAT64 flag is set by the NAT64 ingress routine if an egress - * session does not exists (typically only for the first packet only of a - * flow). - */ -npf_decision_t -nat64_hook(npf_action_t *action, const struct npf_config *npf_config, - npf_session_t **sep, struct ifnet *ifp, npf_cache_t *npc, - struct rte_mbuf **m, int dir, uint16_t *npf_flag) +int npf_nat64_npf_pack_pack(struct npf_nat64 *n64, struct npf_pack_nat64 *pn64) { - npf_decision_t decision; + npf_rule_t *rule; - if (npf_iscached(npc, NPC_IP4)) { - if (likely(dir == PFIL_IN)) - decision = npf_nat64_4to6_in(action, npf_config, sep, - ifp, npc, m, npf_flag); - else - decision = npf_nat64_6to4_out(sep, ifp, npc, m, - npf_flag); - } else { - if (likely(dir == PFIL_IN)) - decision = npf_nat64_6to4_in(action, npf_config, sep, - ifp, npc, m, npf_flag); - else - decision = npf_nat64_4to6_out(sep, ifp, npc, m, - npf_flag); + if (!n64 || !pn64) + return -EINVAL; + + rule = npf_nat64_get_rule(n64); + pn64->pn64_rule_hash = (rule ? npf_rule_get_hash(rule) : 0); + pn64->pn64_rproc_id = npf_nat64_get_rproc_id(n64); + pn64->pn64_map_flags = n64->n64_map_flags; + pn64->pn64_v6 = npf_nat64_is_v6(n64); + pn64->pn64_linked = npf_nat64_is_linked(n64); + npf_nat64_get_trans(n64, &pn64->pn64_t_addr, &pn64->pn64_t_port); + + return 0; +} + +int npf_nat64_npf_pack_restore(struct npf_session *se, + struct npf_pack_nat64 *pn64) +{ + struct npf_nat64 *n64; + npf_rule_t *rl; + int rc = -EINVAL; + + if (!se || !pn64) + return -EINVAL; + + /* Create a nat64 struct */ + n64 = zmalloc(sizeof(struct npf_nat64)); + if (!n64) + return -ENOMEM; + + rl = pn64->pn64_rule_hash ? + npf_get_rule_by_hash(pn64->pn64_rule_hash) : NULL; + + if (rl) { + npf_natpolicy_t *np; + + n64->n64_rule = npf_rule_get(rl); + + /* + * A NAT policy only exists in a NAT64 rule with some configs. + * We must take a reference on the NAT policy if we store it + * in n64. + */ + np = npf_rule_get_natpolicy(rl); + if (np) + n64->n64_np = npf_nat_policy_get(np); } - return decision; + n64->n64_rproc_id = pn64->pn64_rproc_id; + n64->n64_map_flags = pn64->pn64_map_flags; + n64->n64_vrfid = npf_session_get_vrfid(se); + + memcpy(&n64->n64_t_addr, &pn64->pn64_t_addr, sizeof(npf_addr_t)); + n64->n64_t_port = pn64->pn64_t_port; + + n64->n64_v6 = pn64->pn64_v6; + if (!pn64->pn64_linked) + goto error; + + rte_spinlock_init(&n64->n64_lock); + n64->n64_stats_in = zmalloc_aligned(NAT64_STATS_SIZE); + n64->n64_stats_out = zmalloc_aligned(NAT64_STATS_SIZE); + if (!n64->n64_stats_in || !n64->n64_stats_out) { + rc = -ENOMEM; + goto error; + } + + npf_session_set_nat64(se, n64); + + return 0; + +error: + if (n64->n64_np) + npf_nat_policy_put(n64->n64_np); + npf_rule_put(n64->n64_rule); + free(n64); + return rc; } diff --git a/src/npf/npf_nat64.h b/src/npf/npf_nat64.h index 46e259be..dbc07cda 100644 --- a/src/npf/npf_nat64.h +++ b/src/npf/npf_nat64.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -15,31 +15,94 @@ #include "npf/npf.h" #include "npf/npf_cache.h" #include "npf/npf_session.h" +#include "npf/npf_nat.h" /* Forward Declarations */ struct ifnet; struct npf_config; struct rte_mbuf; struct npf_nat64; +struct npf_pack_nat64; typedef struct npf_rule npf_rule_t; typedef struct npf_cache npf_cache_t; typedef struct npf_session npf_session_t; -npf_decision_t -nat64_hook(npf_action_t *action, const struct npf_config *npf_config, - npf_session_t **sep, struct ifnet *ifp, npf_cache_t *npc, - struct rte_mbuf **m, int dir, uint16_t *npf_flag); +/* + * Input + * UNMATCHED Did not match nat64 rule + * TO_V4 Switch from V6 to V4 + * TO_V6 Switch from V4 to V6 + * PASS n/a + * DROP Pkt ineligible, or error occurred + * + * Output + * UNMATCHED Not switched from other addr family + * TO_V4 n/a + * TO_V6 n/a + * PASS Switched from other af and both sessions exist + * DROP Error occurred + */ +typedef enum { + NAT64_DECISION_UNMATCHED, + NAT64_DECISION_TO_V4, + NAT64_DECISION_TO_V6, + NAT64_DECISION_PASS, + NAT64_DECISION_DROP, +} nat64_decision_t; + +static inline const char *nat64_decision_str(nat64_decision_t decision) +{ + switch (decision) { + case NAT64_DECISION_UNMATCHED: + return "UNMATCHED"; + case NAT64_DECISION_TO_V4: + return "TO_V4"; + case NAT64_DECISION_TO_V6: + return "TO_V6"; + case NAT64_DECISION_PASS: + return "PASS"; + case NAT64_DECISION_DROP: + return "DROP"; + }; + return "Unkn"; +} + +nat64_decision_t +npf_nat64_6to4_in(const struct npf_config *npf_config, + npf_session_t **sep, struct ifnet *ifp, npf_cache_t *npc, + struct rte_mbuf **m, uint16_t *npf_flag, int *rcp); + +nat64_decision_t +npf_nat64_4to6_in(const struct npf_config *npf_config, + npf_session_t **sep, struct ifnet *ifp, npf_cache_t *npc, + struct rte_mbuf **m, uint16_t *npf_flag, int *rcp); + +nat64_decision_t +npf_nat64_6to4_out(npf_session_t **sep, struct ifnet *ifp, npf_cache_t *npc, + struct rte_mbuf **m, const uint16_t *npf_flag, int *rcp); + +nat64_decision_t +npf_nat64_4to6_out(npf_session_t **sep, struct ifnet *ifp, npf_cache_t *npc, + struct rte_mbuf **m, const uint16_t *npf_flag, int *rcp); int npf_nat64_session_link(struct npf_session *se1, struct npf_session *se2); void npf_nat64_session_unlink(struct npf_session *se); void npf_nat64_session_destroy(struct npf_session *se); bool npf_nat64_session_is_nat64(npf_session_t *se); -bool npf_nat64_session_is_nat46(npf_session_t *se); void npf_nat64_session_json(json_writer_t *json, npf_session_t *se); npf_rule_t *npf_nat64_get_rule(struct npf_nat64 *n64); +int npf_nat64_get_rproc_id(struct npf_nat64 *n64); +uint8_t npf_nat64_is_v6(struct npf_nat64 *n64); +uint8_t npf_nat64_is_linked(struct npf_nat64 *n64); +void npf_nat64_get_trans(struct npf_nat64 *n64, + npf_addr_t *addr, in_port_t *port); bool npf_nat64_has_peer(struct npf_nat64 *n64); npf_session_t *npf_nat64_get_peer(struct npf_nat64 *n64); bool npf_nat64_session_log_enabled(struct npf_nat64 *n64); +int npf_nat64_npf_pack_pack(struct npf_nat64 *n64, + struct npf_pack_nat64 *pn64); +int npf_nat64_npf_pack_restore(struct npf_session *se, + struct npf_pack_nat64 *pn64); #endif /* _NPF_NAT64_H_ */ diff --git a/src/npf/npf_ncgen.c b/src/npf/npf_ncgen.c index e9431926..1816b2fb 100644 --- a/src/npf/npf_ncgen.c +++ b/src/npf/npf_ncgen.c @@ -333,7 +333,7 @@ npf_gennc_v4cidr(nc_ctx_t *ctx, int opts, const npf_addr_t *netaddr, * npf_gennc_mac_addr: match mac address */ void -npf_gennc_mac_addr(nc_ctx_t *ctx, int opts, struct ether_addr *addr) +npf_gennc_mac_addr(nc_ctx_t *ctx, int opts, struct rte_ether_addr *addr) { uint32_t *nc = npf_ncgen_getptr(ctx, 6 /* words */); @@ -344,7 +344,7 @@ npf_gennc_mac_addr(nc_ctx_t *ctx, int opts, struct ether_addr *addr) * last (unused) bytes are zero. */ nc[1] = 0; - memcpy(nc, addr, ETHER_ADDR_LEN); + memcpy(nc, addr, RTE_ETHER_ADDR_LEN); nc += 2; npf_ncgen_addjmp(ctx, &nc); @@ -430,19 +430,29 @@ npf_gennc_ttl(nc_ctx_t *ctx, uint8_t ttl) } /* - * npf_gennc_icmp: fragment to match (IPv4/IPv6) ICMP type and code. + * Fragment to match (IPv4/IPv6) ICMP type and code. + * + * This can also match on 'class' of ICMP - 'info' or 'error'. + * This by having class=true, and treating 'type' as a boolean + * flag with true meaning 'error'. */ void -npf_gennc_icmp(nc_ctx_t *ctx, int type, int code, bool ipv4) +npf_gennc_icmp(nc_ctx_t *ctx, int type, int code, bool ipv4, bool class) { uint32_t *nc = npf_ncgen_getptr(ctx, 4 /* words */); + uint32_t tc = 0; /* OP, code, type (2 words) */ *nc++ = ipv4 ? NPF_OPCODE_ICMP4 : NPF_OPCODE_ICMP6; - *nc++ = (type == -1 ? 0 : NC_ICMP_HAS_TYPE | - NC_ICMP_SET_TYPE_IN_OP(type)) | - (code == -1 ? 0 : NC_ICMP_HAS_CODE | - NC_ICMP_SET_CODE_IN_OP(code)); + if (class) { + tc |= NC_ICMP_HAS_CLASS | NC_ICMP_SET_TYPE_IN_OP(type); + } else { + if (type != -1) + tc |= NC_ICMP_HAS_TYPE | NC_ICMP_SET_TYPE_IN_OP(type); + if (code != -1) + tc |= NC_ICMP_HAS_CODE | NC_ICMP_SET_CODE_IN_OP(code); + } + *nc++ = tc; /* Comparison block (2 words). */ npf_ncgen_addjmp(ctx, &nc); @@ -511,16 +521,35 @@ npf_gennc_tcpfl(nc_ctx_t *ctx, uint8_t tf, uint8_t tf_mask) } /* - * npf_gennc_proto: fragment to match the protocol. + * npf_gennc_proto_final: match the L4 protocol. + */ +void +npf_gennc_proto_final(nc_ctx_t *ctx, uint8_t proto_final) +{ + uint32_t *nc = npf_ncgen_getptr(ctx, 4 /* words */); + + /* OP, code, type (2 words) */ + *nc++ = NPF_OPCODE_PROTO_FINAL; + *nc++ = proto_final; + + /* Comparison block (2 words). */ + npf_ncgen_addjmp(ctx, &nc); + + /* + 4 words. */ + npf_ncgen_putptr(ctx, nc); +} + +/* + * npf_gennc_proto_base: match the protocol in IPv4 or IPv6 header */ void -npf_gennc_proto(nc_ctx_t *ctx, uint8_t proto) +npf_gennc_proto_base(nc_ctx_t *ctx, uint8_t proto_base) { uint32_t *nc = npf_ncgen_getptr(ctx, 4 /* words */); /* OP, code, type (2 words) */ - *nc++ = NPF_OPCODE_PROTO; - *nc++ = proto; + *nc++ = NPF_OPCODE_PROTO_BASE; + *nc++ = proto_base; /* Comparison block (2 words). */ npf_ncgen_addjmp(ctx, &nc); diff --git a/src/npf/npf_ncgen.h b/src/npf/npf_ncgen.h index 16270300..71c1eb79 100644 --- a/src/npf/npf_ncgen.h +++ b/src/npf/npf_ncgen.h @@ -46,7 +46,7 @@ #include "npf/npf.h" #include "util.h" -struct ether_addr; +struct rte_ether_addr; /* * N-code generation interface. @@ -65,16 +65,17 @@ void npf_gennc_v6cidr(nc_ctx_t *ctx, int opts, const npf_addr_t *netaddr, const npf_netmask_t mask); void npf_gennc_v4cidr(nc_ctx_t *ctx, int opts, const npf_addr_t *netaddr, const npf_netmask_t mask); -void npf_gennc_mac_addr(nc_ctx_t *ctx, int opts, struct ether_addr *addr); +void npf_gennc_mac_addr(nc_ctx_t *ctx, int opts, struct rte_ether_addr *addr); void npf_gennc_addrfamily(nc_ctx_t *ctx, int family); void npf_gennc_ip_frag(nc_ctx_t *ctx); void npf_gennc_ports(nc_ctx_t *ctx, int opts, in_port_t from, in_port_t to); void npf_gennc_ttl(nc_ctx_t *ctx, uint8_t ttl); -void npf_gennc_icmp(nc_ctx_t *ctx, int type, int code, bool ipv4); +void npf_gennc_icmp(nc_ctx_t *ctx, int type, int code, bool ipv4, bool class); void npf_gennc_ip6_rt(nc_ctx_t *ctx, uint8_t type); void npf_gennc_tbl(nc_ctx_t *ctx, int opts, u_int tableid); void npf_gennc_tcpfl(nc_ctx_t *ctx, uint8_t tf, uint8_t tf_mask); -void npf_gennc_proto(nc_ctx_t *ctx, uint8_t proto); +void npf_gennc_proto_final(nc_ctx_t *ctx, uint8_t proto_final); +void npf_gennc_proto_base(nc_ctx_t *ctx, uint8_t proto_base); void npf_ncgen_matchdscp(nc_ctx_t *ctx, uint64_t matchdscpset); void npf_gennc_etherpcp(nc_ctx_t *ctx, uint8_t pcp); void npf_gennc_ethertype(nc_ctx_t *ctx, uint16_t etype); diff --git a/src/npf/npf_ncode.h b/src/npf/npf_ncode.h index b12fdb35..34f9963a 100644 --- a/src/npf/npf_ncode.h +++ b/src/npf/npf_ncode.h @@ -57,7 +57,6 @@ struct ifnet; int npf_ncode_process(npf_cache_t *npc, const npf_rule_t *rl, const struct ifnet *ifp, int dir, npf_session_t *se, struct rte_mbuf *nbuf); -int npf_ncode_validate(const void *nc, size_t sz, int *errat); /* Error codes. */ #define NPF_ERR_OPCODE -1 /* Invalid instruction. */ @@ -79,7 +78,8 @@ enum npf_opcode_type_enum { NPF_OPCODE_RET, NPF_OPCODE_BEQ, NPF_OPCODE_BNE, - NPF_OPCODE_PROTO, + NPF_OPCODE_PROTO_FINAL, + NPF_OPCODE_PROTO_BASE, NPF_OPCODE_ETHERADDR, NPF_OPCODE_ETHERPCP, NPF_OPCODE_IP4MASK, @@ -114,6 +114,7 @@ enum npf_opcode_type_enum { #define NC_ICMP_HAS_TYPE (1<<31) #define NC_ICMP_HAS_CODE (1<<30) +#define NC_ICMP_HAS_CLASS (1<<29) #define NC_ICMP_GET_TYPE_FROM_OP(x) ((x >> 8) & 0xFF) #define NC_ICMP_SET_TYPE_IN_OP(x) ((x & 0xFF) << 8) diff --git a/src/npf/npf_pack.c b/src/npf/npf_pack.c new file mode 100644 index 00000000..4c496434 --- /dev/null +++ b/src/npf/npf_pack.c @@ -0,0 +1,456 @@ +/* + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include +#include + +#include "dp_session.h" +#include "npf/npf_session.h" +#include "npf/npf_nat.h" +#include "npf/npf_nat64.h" +#include "npf/npf_pack.h" +#include "session/session_feature.h" +#include "vplane_debug.h" +#include "vplane_log.h" + +static_assert(NPF_PACK_MESSAGE_MAX_SIZE == 576, + "NPF_PACK_MESSAGE_MAX_SIZE changed"); + +uint32_t dp_session_buf_size_max(void) +{ + return NPF_PACK_NEW_SESSION_MAX_SIZE; +} + +static int npf_pack_session_pack_update(struct session *s, + struct npf_pack_session_update *csu, + uint32_t *len) +{ + struct npf_pack_sentry_packet *psp; + struct npf_pack_session_state *pst; + struct npf_pack_dp_sess_stats *stats; + struct npf_session *se; + struct ifnet *ifp; + int rc; + + if (!s || !csu) + return -EINVAL; + + csu->se_id = session_get_id(s); + psp = &csu->psp; + rc = session_npf_pack_sentry_pack(s, psp); + if (rc) + return rc; + + *len = sizeof(*csu); + + stats = &csu->stats; + rc = session_npf_pack_stats_pack(s, stats); + if (rc) + return rc; + + csu->se_feature_count = rte_atomic16_read(&s->se_feature_count); + if (!csu->se_feature_count) + return 0; + + ifp = dp_ifnet_byifname(psp->psp_ifname); + if (!ifp) + return -EINVAL; + + se = session_feature_get(s, ifp->if_index, SESSION_FEATURE_NPF); + if (!se) { + csu->se_feature_count = 0; + return 0; + } + + pst = &csu->pst; + + if (psp->psp_forw.sp_protocol == IPPROTO_TCP) + rc = npf_session_pack_state_pack_tcp(se, pst); + else + rc = npf_session_pack_state_pack_gen(se, pst); + + if (rc) + csu->se_feature_count = 0; + + return 0; +} + +static int +npf_pack_get_new_msg_type(struct session *s, enum pack_session_new *msg_type) +{ + if (!s) + return -ENOENT; + + bool is_nat = session_is_nat(s); + bool is_nat64 = session_is_nat64(s) || session_is_nat46(s); + + if (!is_nat && !is_nat64) + *msg_type = NPF_PACK_SESSION_NEW_FW; + + else if (is_nat && !is_nat64) + *msg_type = NPF_PACK_SESSION_NEW_NAT; + + else if (!is_nat && is_nat64) + *msg_type = NPF_PACK_SESSION_NEW_NAT64; + + else if (is_nat && is_nat64) + *msg_type = NPF_PACK_SESSION_NEW_NAT_NAT64; + + else + return -EINVAL; + + return 0; +} + +static int npf_pack_pack_session(struct session *s, + struct npf_session *se, + struct npf_pack_dp_session *pds, + struct npf_pack_sentry_packet *psp, + struct npf_pack_npf_session *pns, + struct npf_pack_session_state *pst, + struct npf_pack_dp_sess_stats *stats, + struct npf_pack_nat *pnt, + struct npf_pack_nat64 *pn64) +{ + struct npf_nat *nt; + struct npf_nat64 *n64; + int rc; + + if (!s || !se) + return -EINVAL; + + rc = session_npf_pack_pack(s, pds, psp, stats); + if (rc) { + RTE_LOG(ERR, DATAPLANE, + "npf_pack pack %lu: session pack failed\n", + session_get_id(s)); + return rc; + } + + rc = npf_session_npf_pack_pack(se, pns, pst); + if (rc) { + RTE_LOG(ERR, DATAPLANE, + "csycn pack %lu: npf session pack failed\n", + session_get_id(s)); + return rc; + } + + if (pnt) { + nt = npf_session_get_nat(se); + if (!nt) + return -ENOENT; + + rc = npf_nat_npf_pack_pack(nt, pnt, &psp->psp_back); + if (rc) { + RTE_LOG(ERR, DATAPLANE, + "cscyn pack %lu: nat session pack failed\n", + session_get_id(s)); + return rc; + } + } + if (pn64) { + n64 = npf_session_get_nat64(se); + if (!n64) + return -ENOENT; + + rc = npf_nat64_npf_pack_pack(n64, pn64); + if (rc) { + RTE_LOG(ERR, DATAPLANE, + "cscyn pack %lu: nat64 session pack failed\n", + session_get_id(s)); + return rc; + } + } + + return 0; +} + +static int npf_pack_pack_fw_session(struct session *s, + struct npf_session *se, + struct npf_pack_session_fw *cs) +{ + return npf_pack_pack_session(s, se, &cs->pds, &cs->psp, + &cs->pns, &cs->pst, &cs->stats, + NULL, NULL); +} + +static int npf_pack_pack_nat_session(struct session *s, + struct npf_session *se, + struct npf_pack_session_nat *cs) +{ + return npf_pack_pack_session(s, se, &cs->pds, &cs->psp, + &cs->pns, &cs->pst, &cs->stats, + &cs->pnt, NULL); +} + +static int npf_pack_pack_nat64_session(struct session *s, + struct npf_session *se, + struct npf_pack_session_nat64 *cs) +{ + return npf_pack_pack_session(s, se, &cs->pds, &cs->psp, + &cs->pns, &cs->pst, &cs->stats, + NULL, &cs->pn64); +} + +static int +npf_pack_pack_nat_nat64_session(struct session *s, + struct npf_session *se, + struct npf_pack_session_nat_nat64 *cs) +{ + return npf_pack_pack_session(s, se, &cs->pds, &cs->psp, + &cs->pns, &cs->pst, &cs->stats, + &cs->pnt, &cs->pn64); +} + +static int npf_pack_pack_one_session(struct session *s, + struct npf_session *se, + struct npf_pack_session_new *csn) +{ + struct npf_pack_session_hdr *psh; + enum pack_session_new msg_type = 0; + int rc; + + if (!s || !csn) + return -EINVAL; + + rc = npf_pack_get_new_msg_type(s, &msg_type); + if (rc) + return rc; + + psh = &csn->hdr; + psh->psh_type = msg_type; + + switch (psh->psh_type) { + case NPF_PACK_SESSION_NEW_FW: + psh->psh_len = NPF_PACK_NEW_FW_SESSION_SIZE; + rc = npf_pack_pack_fw_session( + s, se, (struct npf_pack_session_fw *)&csn->cs); + break; + + case NPF_PACK_SESSION_NEW_NAT: + psh->psh_len = NPF_PACK_NEW_NAT_SESSION_SIZE; + rc = npf_pack_pack_nat_session( + s, se, (struct npf_pack_session_nat *)&csn->cs); + break; + + case NPF_PACK_SESSION_NEW_NAT64: + psh->psh_len = NPF_PACK_NEW_NAT64_SESSION_SIZE; + rc = npf_pack_pack_nat64_session( + s, se, (struct npf_pack_session_nat64 *)&csn->cs); + break; + + case NPF_PACK_SESSION_NEW_NAT_NAT64: + psh->psh_len = NPF_PACK_NEW_NAT_NAT64_SESSION_SIZE; + rc = npf_pack_pack_nat_nat64_session( + s, se, + (struct npf_pack_session_nat_nat64 *)&csn->cs); + break; + }; + + return rc; +} + +static int npf_pack_pack_get_peer(struct npf_session *se, + struct session **s_peer, + struct npf_session **se_peer) +{ + struct npf_nat64 *n64; + struct session *sp; + struct npf_session *sep; + int rc = -ENOENT; + + n64 = npf_session_get_nat64(se); + if (!n64) + return rc; + if (!npf_nat64_has_peer(n64) || !npf_nat64_is_linked(n64)) + return rc; + + sep = npf_nat64_get_peer(n64); + if (!sep) + return rc; + + sp = npf_session_get_dp_session(sep); + if (!sp) + return rc; + + *s_peer = sp; + *se_peer = sep; + return 0; +} + +static int npf_pack_pack_peer_session(struct session *s, + struct npf_pack_session_new *csn, + struct npf_pack_session_new *csn_peer, + struct session *s_peer, + struct npf_session *se_peer) +{ + struct npf_pack_session_nat64 *cs; + struct npf_pack_session_nat64 *cs_peer; + int rc; + + rc = npf_pack_pack_one_session(s_peer, se_peer, csn_peer); + if (rc) { + RTE_LOG(ERR, DATAPLANE, + "npf_pack nat64 peer pack failed %lu\n", + session_get_id(s_peer)); + return rc; + } + + /* Set parent */ + cs = (struct npf_pack_session_nat64 *)&csn->cs; + cs_peer = (struct npf_pack_session_nat64 *)&csn_peer->cs; + if (session_base_parent(s_peer) == s && session_base_parent(s) == s) { + cs->pds.pds_parent = 1; + } else if (session_base_parent(s) == s_peer && + session_base_parent(s_peer) == s_peer) { + cs_peer->pds.pds_parent = 1; + } else { + RTE_LOG(ERR, DATAPLANE, + "npf_pack nat64 peer pack failed %lu, parent se link error\n", + session_get_id(s_peer)); + return -EINVAL; + } + + return 0; +} + +static int npf_pack_session_pack_new(struct session *s, + struct npf_pack_session_new *csn, + uint32_t *len, + struct session **peer) +{ + struct npf_pack_session_new *csn_peer; + struct session *s_peer = NULL; + struct npf_session *se_peer = NULL; + struct sentry *sen; + npf_session_t *se; + int rc; + + *len = 0; + + if (!s || !csn) + return -EINVAL; + + sen = rcu_dereference(s->se_sen); + if (!sen) + return -ENOENT; + + se = session_feature_get(s, sen->sen_ifindex, + SESSION_FEATURE_NPF); + if (!se) + return -ENOENT; + + rc = npf_pack_pack_one_session(s, se, csn); + if (rc) { + RTE_LOG(ERR, DATAPLANE, + "npf_pack pack %lu: session pack failed\n", + session_get_id(s)); + return rc; + } + *len += csn->hdr.psh_len; + + *peer = NULL; + if (!session_is_nat64(s) && !session_is_nat46(s)) + return 0; + + rc = npf_pack_pack_get_peer(se, &s_peer, &se_peer); + if (rc) { + RTE_LOG(ERR, DATAPLANE, + "npf_pack pack %lu: session peer not found for NAT64 session\n", + session_get_id(s)); + return rc; + } + /* Pack peer session */ + csn_peer = (struct npf_pack_session_new *)((char *)csn + + csn->hdr.psh_len); + rc = npf_pack_pack_peer_session(s, csn, csn_peer, s_peer, se_peer); + if (rc) { + RTE_LOG(ERR, DATAPLANE, + "npf_pack pack %lu: session pack failed\n", + session_get_id(s)); + return rc; + } + *len += csn_peer->hdr.psh_len; + *peer = s_peer; + + return 0; +} + + +static int session_pack_full(struct session *s, void *buf, uint32_t size, + uint32_t *packed_size, struct session **s_peer) +{ + struct npf_pack_session_new *ps_new = buf; + + *s_peer = NULL; + + if (size < sizeof(struct npf_pack_session_new)) + return -EINVAL; + + *packed_size = 0; + return npf_pack_session_pack_new(s, ps_new, packed_size, s_peer); +} + +static int session_pack_update(struct session *s, void *buf, uint32_t size, + uint32_t *packed_size) +{ + struct npf_pack_session_update *ps_update = buf; + + if (size < sizeof(struct npf_pack_session_update)) + return -EINVAL; + + *packed_size = 0; + return npf_pack_session_pack_update(s, ps_update, packed_size); +} + +int dp_session_pack(struct session *session, void *buf, uint32_t size, + enum session_pack_type spt, struct session **session_peer) +{ + struct npf_pack_message *msg = buf; + uint32_t dsize = size - sizeof(msg->hdr); + uint32_t dlen = 0; + int ret = -EINVAL; + + *session_peer = NULL; + + if (!session || (size < sizeof(msg->hdr)) + || (spt != SESSION_PACK_FULL && spt != SESSION_PACK_UPDATE)) + return ret; + + switch (spt) { + case SESSION_PACK_FULL: + ret = session_pack_full(session, &msg->data, dsize, + &dlen, session_peer); + break; + case SESSION_PACK_UPDATE: + ret = session_pack_update(session, &msg->data, dsize, &dlen); + break; + default: + RTE_LOG(ERR, DATAPLANE, "%s: Invalid pack_type %d", __func__, + spt); + return ret; + } + + if (ret == 0) { + msg->hdr.pmh_len = dlen + sizeof(msg->hdr); + msg->hdr.pmh_version = SESSION_PACK_VERSION; + msg->hdr.pmh_type = spt; + return (int)msg->hdr.pmh_len; + } + + if (ret == -EINVAL && dsize < dlen) { + dlen += sizeof(msg->hdr); + RTE_LOG(ERR, DATAPLANE, + "SESSION_PACK: Buffer too small: session %lu " + "needed %lu bytes given %u\n", + session_get_id(session), dlen + sizeof(msg->hdr), size); + } else + RTE_LOG(ERR, DATAPLANE, + "SESSION_PACK:session %lu error %d, len %u\n", + session_get_id(session), ret, size); + + return ret; +} diff --git a/src/npf/npf_pack.h b/src/npf/npf_pack.h new file mode 100644 index 00000000..2f4b1f1d --- /dev/null +++ b/src/npf/npf_pack.h @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef NPF_PACK_H +#define NPF_PACK_H + +#include "dp_session.h" +#include "npf/npf_state.h" +#include "session/session.h" + +#define NPF_PACK_NEW_FW_SESSION_SIZE \ + (sizeof(struct npf_pack_session_hdr) + \ + sizeof(struct npf_pack_session_fw)) + +#define NPF_PACK_NEW_NAT_SESSION_SIZE \ + (sizeof(struct npf_pack_session_hdr) + \ + sizeof(struct npf_pack_session_nat)) +#define NPF_PACK_NEW_NAT64_SESSION_SIZE \ + (sizeof(struct npf_pack_session_hdr) + \ + sizeof(struct npf_pack_session_nat64)) + +#define NPF_PACK_NEW_NAT_NAT64_SESSION_SIZE \ + (sizeof(struct npf_pack_session_hdr) + \ + sizeof(struct npf_pack_session_nat_nat64)) + +/* New session includes nat64 peer session */ +#define NPF_PACK_NEW_SESSION_MAX_SIZE (2 * sizeof(struct npf_pack_session_new)) +#define NPF_PACK_UPDATE_SESSION_SIZE (sizeof(struct npf_pack_session_update)) + +#define NPF_PACK_MESSAGE_MAX_SIZE NPF_PACK_NEW_SESSION_MAX_SIZE +#define NPF_PACK_MESSAGE_MIN_SIZE (sizeof(struct npf_pack_message_hdr)) + +#define SESSION_PACK_VERSION (0x0102) + +enum pack_session_new { + NPF_PACK_SESSION_NEW_FW = 1, + NPF_PACK_SESSION_NEW_NAT, + NPF_PACK_SESSION_NEW_NAT64, + NPF_PACK_SESSION_NEW_NAT_NAT64, +} __attribute__ ((__packed__)); + +/* + * From 'struct session' (except stats) + */ +struct npf_pack_dp_session { + uint64_t pds_id; /* for logging */ + uint32_t pds_custom_timeout; + uint32_t pds_timeout; + uint16_t pds_flags; + uint8_t pds_protocol; + uint8_t pds_protocol_state; + uint8_t pds_gen_state; + uint8_t pds_fw:1; + uint8_t pds_snat:1; + uint8_t pds_dnat:1; + uint8_t pds_nat64:1; + uint8_t pds_nat46:1; + uint8_t pds_parent:1; + uint8_t pds_alg:1; + uint8_t pds_in:1; + uint8_t pds_out:1; + uint8_t pds_app:1; + uint8_t pds_pad[1]; +} __attribute__ ((__packed__)); + +/* + * Stats from dataplane session, 'struct session'. These are separate from + * 'struct npf_pack_dp_session' since they are periodically updated. + */ +struct npf_pack_dp_sess_stats { + uint64_t pdss_pkts_in; + uint64_t pdss_bytes_in; + uint64_t pdss_pkts_out; + uint64_t pdss_bytes_out; +} __attribute__ ((__packed__)); + +struct npf_pack_sentry_packet { + struct sentry_packet psp_forw; + struct sentry_packet psp_back; + char psp_ifname[IFNAMSIZ]; +} __attribute__ ((__packed__)); + +/* + * From npf_session_t + */ +struct npf_pack_npf_session { + int pns_flags; + uint32_t pns_fw_rule_hash; + uint32_t pns_rproc_rule_hash; +} __attribute__ ((__packed__)); + +/* + * Packed npf_state_t + */ +struct npf_pack_session_state { + struct npf_tcp_window pst_tcp_win[2]; + union { + enum tcp_session_state pst_tcp_state; + enum dp_session_state pst_gen_state; + }; + uint8_t pst_pad[3]; +} __attribute__ ((__packed__)); + +/* + * Packed npf_nat_t + */ +struct npf_pack_nat { + uint16_t pnt_l3_chk; + uint16_t pnt_l4_chk; + uint32_t pnt_map_flags; + uint32_t pnt_rule_hash; + uint32_t pnt_taddr; + uint32_t pnt_oaddr; + uint16_t pnt_tport; + uint16_t pnt_oport; +} __attribute__ ((__packed__)); + +struct npf_pack_nat64 { + uint32_t pn64_rule_hash; + int32_t pn64_rproc_id; + struct in6_addr pn64_t_addr; + uint32_t pn64_map_flags; + in_port_t pn64_t_port; + uint8_t pn64_v6; + uint8_t pn64_linked; +} __attribute__ ((__packed__)); + +struct npf_pack_session_fw { + struct npf_pack_dp_session pds; + struct npf_pack_sentry_packet psp; + struct npf_pack_npf_session pns; + struct npf_pack_session_state pst; + struct npf_pack_dp_sess_stats stats; +} __attribute__ ((__packed__)); + +struct npf_pack_session_nat { + struct npf_pack_dp_session pds; + struct npf_pack_sentry_packet psp; + struct npf_pack_npf_session pns; + struct npf_pack_session_state pst; + struct npf_pack_dp_sess_stats stats; + struct npf_pack_nat pnt; +} __attribute__ ((__packed__)); + +struct npf_pack_session_nat64 { + struct npf_pack_dp_session pds; + struct npf_pack_sentry_packet psp; + struct npf_pack_npf_session pns; + struct npf_pack_session_state pst; + struct npf_pack_dp_sess_stats stats; + struct npf_pack_nat64 pn64; +} __attribute__ ((__packed__)); + +struct npf_pack_session_nat_nat64 { + struct npf_pack_dp_session pds; + struct npf_pack_sentry_packet psp; + struct npf_pack_npf_session pns; + struct npf_pack_session_state pst; + struct npf_pack_dp_sess_stats stats; + struct npf_pack_nat pnt; + struct npf_pack_nat64 pn64; +} __attribute__ ((__packed__)); + +struct npf_pack_message_hdr { + uint32_t pmh_len; + uint16_t pmh_version; + uint8_t pmh_flags; + enum session_pack_type pmh_type; +} __attribute__ ((__packed__)); + +static_assert(sizeof(struct npf_pack_message_hdr) == 8, + "sizeof npf_pack_message_hdr"); + +struct npf_pack_session_hdr { + uint32_t psh_len; + enum pack_session_new psh_type; + uint8_t psh_pad[3]; +} __attribute__ ((__packed__)); + +static_assert(sizeof(struct npf_pack_session_hdr) == 8, + "sizeof npf_pack_session_hdr"); + +struct npf_pack_session_new { + struct npf_pack_session_hdr hdr; + char cs[NPF_PACK_NEW_NAT_NAT64_SESSION_SIZE]; +} __attribute__ ((__packed__)); + +struct npf_pack_session_update { + uint64_t se_id; /* for UT */ + struct npf_pack_sentry_packet psp; + struct npf_pack_session_state pst; + struct npf_pack_dp_sess_stats stats; + uint16_t se_feature_count; + uint8_t pad[2]; +} __attribute__ ((__packed__)); + +struct npf_pack_message { + struct npf_pack_message_hdr hdr; + union { + char cs_new[NPF_PACK_NEW_SESSION_MAX_SIZE]; + struct npf_pack_session_update cs_update; + } data; +} __attribute__ ((__packed__)); + +bool npf_pack_validate_msg(struct npf_pack_message *msg, uint32_t size); +uint8_t npf_pack_get_msg_type(struct npf_pack_message *msg); +uint64_t npf_pack_get_session_id(struct npf_pack_message *msg); + +struct npf_pack_dp_sess_stats * +npf_pack_get_session_stats(struct npf_pack_message *msg); + +#endif /* NPF_PACK_H */ diff --git a/src/npf/npf_processor.c b/src/npf/npf_processor.c index 026729c1..6798c063 100644 --- a/src/npf/npf_processor.c +++ b/src/npf/npf_processor.c @@ -227,9 +227,13 @@ npf_ncode_process(npf_cache_t *npc, const npf_rule_t *rl, i_ptr = nc_fetch_word(i_ptr, &n); cmpval = npf_match_ip6_rt(npc, n); break; - case NPF_OPCODE_PROTO: + case NPF_OPCODE_PROTO_FINAL: i_ptr = nc_fetch_word(i_ptr, &n); - cmpval = npf_match_proto(npc, n); + cmpval = npf_match_proto_final(npc, n); + break; + case NPF_OPCODE_PROTO_BASE: + i_ptr = nc_fetch_word(i_ptr, &n); + cmpval = npf_match_proto_base(npc, n); break; case NPF_OPCODE_ETHERPCP: i_ptr = nc_fetch_word(i_ptr, &n); @@ -273,250 +277,3 @@ npf_ncode_process(npf_cache_t *npc, const npf_rule_t *rl, /* Failure case. */ return -1; } - -/* - * nc_ptr_check: validate that instruction pointer is not out of range. - * If not - advance by number of arguments and fetch specified argument. - */ -static int -nc_ptr_check(uintptr_t *iptr, const void *nc, size_t sz, - u_int nargs, uint32_t *val) -{ - const uint32_t *tptr = (const uint32_t *)*iptr; - u_int i; - - if ((uintptr_t)tptr < (uintptr_t)nc) - return NPF_ERR_JUMP; - - if ((uintptr_t)tptr + (nargs * sizeof(uint32_t)) > (uintptr_t)nc + sz) - return NPF_ERR_RANGE; - - for (i = 0; i < nargs; i++) { - if (val) - val[i] = *tptr; - tptr++; - } - *iptr = (uintptr_t)tptr; - return 0; -} - -static int -nc_noperands_check(uint n1, uint n2) -{ - return (n1 != n2) ? NPF_ERR_INVAL : 0; -} - -/* - * nc_insn_check: validate the instruction and its arguments. - */ -static int -nc_insn_check(const uintptr_t optr, const void *nc, size_t sz, - size_t *adv, size_t *jmp, bool *ret) -{ - uintptr_t iptr = optr; - uint32_t opcode; - uint noperands; - uint32_t operand[NPF_NOPERANDS_MAX] = {0}; - int error; - - /* Fetch the opcode */ - error = nc_ptr_check(&iptr, nc, sz, 1, &opcode); - if (error) - return error; - - noperands = npf_ncode_opcode_noperands(opcode); - if (noperands > NPF_NOPERANDS_MAX) - return NPF_ERR_INVAL; - - /* Prefetch the operands */ - error = nc_ptr_check(&iptr, nc, sz, noperands, operand); - - *ret = false; - *jmp = 0; - - /* - * Verify the expected number of operands, and verify operand values - * where possible - */ - switch (opcode) { - /* - * RISC-like instructions. - */ - case NPF_OPCODE_BEQ: - case NPF_OPCODE_BNE: - error = nc_noperands_check(noperands, 1); - if (error) - break; - /* Validate jump address. */ - - /* - * We must check for JMP 0 i.e. to oneself. Pass the jump - * address to the caller, it will validate if it is correct. - */ - if (!error && operand[0] == 0) - error = NPF_ERR_JUMP; - if (!error) - *jmp = operand[0] * sizeof(uint32_t); - break; - - case NPF_OPCODE_RET: - error = nc_noperands_check(noperands, 1); - *ret = true; - break; - /* - * CISC-like instructions. - */ - case NPF_OPCODE_IP4MASK: - error = nc_noperands_check(noperands, 3); - if (error) - break; - if (!operand[2] || (operand[2] > 32 && - operand[2] != NPF_NO_NETMASK)) - error = NPF_ERR_INVAL; - break; - case NPF_OPCODE_IP6MASK: - error = nc_noperands_check(noperands, 6); - if (error) - break; - if (!operand[5] || (operand[5] > NPF_MAX_NETMASK && - operand[5] != NPF_NO_NETMASK)) - error = NPF_ERR_INVAL; - break; - case NPF_OPCODE_TABLE: - error = nc_noperands_check(noperands, 2); - if (error) - break; - if (!npf_addrgrp_tid_valid(operand[1])) - error = NPF_ERR_TABLE; - break; - case NPF_OPCODE_PORTS: - error = nc_noperands_check(noperands, 2); - if (error) - break; - uint16_t port_start, port_end; - - port_start = operand[1] >> 16; - port_end = operand[1] & 0xffff; - if (!port_start || !port_end || port_start > port_end) - error = NPF_ERR_PORT; - break; - case NPF_OPCODE_TCP_FLAGS: - error = nc_noperands_check(noperands, 1); - break; - case NPF_OPCODE_ICMP4: - case NPF_OPCODE_ICMP6: - case NPF_OPCODE_IP6_RT: - error = nc_noperands_check(noperands, 1); - break; - case NPF_OPCODE_PROTO: - error = nc_noperands_check(noperands, 1); - if (error) - break; - uint8_t alen; - - alen = (operand[0] >> 8) & 0xff; - - if (alen != 0 && alen != 4 && alen != 16) - error = NPF_ERR_ALEN; - break; - case NPF_OPCODE_ETHERPCP: - error = nc_noperands_check(noperands, 1); - break; - case NPF_OPCODE_ETHERADDR: - error = nc_noperands_check(noperands, 3); - break; - case NPF_OPCODE_FRAGMENT: - error = nc_noperands_check(noperands, 0); - break; - case NPF_OPCODE_ADDRFAM: - error = nc_noperands_check(noperands, 1); - if (operand[0] != AF_INET && operand[0] != AF_INET6) - error = NPF_ERR_AF; - break; - case NPF_OPCODE_MATCHDSCP: - error = nc_noperands_check(noperands, 2); - break; - case NPF_OPCODE_ETHERTYPE: - error = nc_noperands_check(noperands, 1); - break; - case NPF_OPCODE_RPROC: - error = nc_noperands_check(noperands, 1); - break; - case _NPF_OPCODE_LAST: - /* Invalid instruction. */ - error = NPF_ERR_OPCODE; - } - if (error) { - return error; - } - *adv = iptr - optr; - return 0; -} - -/* - * nc_jmp_check: validate that jump address points to the instruction. - * Loop from the beginning of n-code until we hit jump address or error. - */ -static inline int -nc_jmp_check(const void *nc, size_t sz, const uintptr_t jaddr) -{ - uintptr_t iaddr = (uintptr_t)nc; - int error; - - assert(iaddr != jaddr); - do { - size_t _jmp, adv; - bool _ret; - - error = nc_insn_check(iaddr, nc, sz, &adv, &_jmp, &_ret); - if (error) { - break; - } - iaddr += adv; - - } while (iaddr != jaddr); - - return error; -} - -/* - * npf_ncode_validate: validate n-code. - * Performs the following operations: - * - * - Checks that each instruction is valid (i.e. existing opcode). - * - Checks that jumps are within n-code and to the instructions. - * - Checks that n-code returns, and processing is within n-code memory. - */ -int -npf_ncode_validate(const void *nc, size_t sz, int *errat) -{ - const uintptr_t nc_end = (uintptr_t)nc + sz; - uintptr_t iptr = (uintptr_t)nc; - int error; - bool ret; - - do { - size_t jmp, adv; - - /* Validate instruction and its arguments. */ - error = nc_insn_check(iptr, nc, sz, &adv, &jmp, &ret); - if (error) - break; - - /* If jumping, check that address points to the instruction. */ - if (jmp && nc_jmp_check(nc, sz, iptr + jmp)) { - /* Note: the actual error might be different. */ - return NPF_ERR_JUMP; - } - - /* Advance and check for the end of n-code memory block. */ - iptr += adv; - - } while (iptr != nc_end); - - if (!error) { - error = ret ? 0 : NPF_ERR_RANGE; - } - *errat = (iptr - (uintptr_t)nc) / sizeof(uint32_t); - return error; -} diff --git a/src/npf/npf_ptree.c b/src/npf/npf_ptree.c index 502f41db..cadd5b09 100644 --- a/src/npf/npf_ptree.c +++ b/src/npf/npf_ptree.c @@ -129,6 +129,15 @@ struct ptree_leaf { uint8_t pl_key[0]; /* Must be last */ }; +/* Ensure key starts on a word boundary */ +static_assert((offsetof(struct ptree_leaf, pl_key) & 0x3) == 0, + "ptree_leaf key must start on word boundary"); + +/* type field must be in same place in leaf and branch struct */ +static_assert(offsetof(struct ptree_node, pn_type) == + offsetof(struct ptree_leaf, pl_type), + "ptree type field not in correct place"); + /* * We pass and store pointers to nodes, so casts are required to access leaf * objects @@ -287,16 +296,9 @@ ptree_table_create(uint8_t keylen) (keylen & 0x3) != 0) return NULL; - /* Ensure key starts on a word boundary */ - assert((offsetof(struct ptree_leaf, pl_key) & 0x3) == 0); - if ((offsetof(struct ptree_leaf, pl_key) & 0x3) != 0) return NULL; - /* type field must be in same place in leaf and branch struct */ - assert(offsetof(struct ptree_node, pn_type) == - offsetof(struct ptree_leaf, pl_type)); - pt = zmalloc_aligned(sizeof(*pt)); if (!pt) return NULL; diff --git a/src/npf/npf_ptree.h b/src/npf/npf_ptree.h index 68a68a69..1db3ef1a 100644 --- a/src/npf/npf_ptree.h +++ b/src/npf/npf_ptree.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2017 by Brocade Communications Systems, Inc. * All rights reserved. * diff --git a/src/npf/npf_rc.c b/src/npf/npf_rc.c new file mode 100644 index 00000000..adea8f1c --- /dev/null +++ b/src/npf/npf_rc.c @@ -0,0 +1,888 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +/* + * npf return code counters + */ + +#include + +#include "util.h" +#include "pl_node.h" +#include "pipeline/nodes/pl_nodes_common.h" +#include "npf_shim.h" +#include "npf/zones/npf_zone_public.h" +#include "npf/npf_cmd.h" +#include "npf/npf_rc.h" + +/* + * The return codes are categorised into 4 main types + */ +enum rc_ctrl_cat { + RC_CAT_PASS, + RC_CAT_NOMATCH, + RC_CAT_BLOCK, + RC_CAT_DROP, +}; +#define RC_CAT_LAST RC_CAT_DROP +#define RC_CAT_SZ (RC_CAT_LAST+1) +#define RC_CAT_ALL (RC_CAT_LAST+2) + +static inline const char *rc_ctrl_cat2str(enum rc_ctrl_cat cat) +{ + switch (cat) { + case RC_CAT_PASS: + return "pass"; + case RC_CAT_NOMATCH: + return "unmatched"; + case RC_CAT_BLOCK: + return "block"; + case RC_CAT_DROP: + return "drop"; + }; + return "unkn"; +} + +struct rc_ctrl { + uint bm; /* Bitmap of return-code types */ + uint cat; /* Category - pass, block or drop */ +}; +static struct rc_ctrl npf_rc_ctrl[NPF_DIR_SZ][NPF_RC_SZ]; + +static void npf_rc_ctrl_init(void) +{ + static bool initd; + enum npf_rc_dir dir; + enum npf_rc_en rc; + + /* Only do once */ + if (initd) + return; + + /* + * We use a bitmap to determine which return-codes are used by which + * return code types. For example, fw6 does not use only of the NAT + * return codes. + */ + for (dir = 0; dir < NPF_DIR_SZ; dir++) + for (rc = 0; rc < NPF_RC_SZ; rc++) { + /* Init bitmap of rc types */ + switch (rc) { + case NPF_RC_UNMATCHED: + case NPF_RC_PASS: + case NPF_RC_BLOCK: + case NPF_RC_INTL: + npf_rc_ctrl[dir][rc].bm = RCT_BIT_ALL; + break; + + /* the following may occur from npf cache */ + case NPF_RC_L3_HDR_VER: + case NPF_RC_L3_HDR_LEN: + case NPF_RC_NON_IP: + case NPF_RC_L3_SHORT: + case NPF_RC_L3_PROTO: + case NPF_RC_L4_SHORT: + npf_rc_ctrl[dir][rc].bm = + (RCT_BIT_FW4 | RCT_BIT_FW6 | + RCT_BIT_NAT64 | RCT_BIT_L2 | + RCT_BIT_LOC | RCT_BIT_ACL4 | + RCT_BIT_ACL6); + break; + + /* the following may occur via npf_state_inspect */ + case NPF_RC_ICMP_ECHO: + case NPF_RC_TCP_SYN: + case NPF_RC_TCP_STATE: + case NPF_RC_TCP_WIN: + npf_rc_ctrl[dir][rc].bm = + (RCT_BIT_FW4 | RCT_BIT_FW6 | + RCT_BIT_NAT64 | RCT_BIT_LOC); + break; + + /* the following may occur when creating a session */ + case NPF_RC_ENOSTR: + case NPF_RC_SESS_ENOMEM: + case NPF_RC_SESS_LIMIT: + case NPF_RC_SESS_HOOK: + case NPF_RC_DP_SESS_ESTB: + npf_rc_ctrl[dir][rc].bm = + (RCT_BIT_FW4 | RCT_BIT_FW6 | + RCT_BIT_NAT64 | RCT_BIT_LOC); + break; + + /* NAT and NAT64 */ + case NPF_RC_MBUF_ENOMEM: + case NPF_RC_NAT_ENOSPC: + case NPF_RC_NAT_ENOMEM: + case NPF_RC_NAT_EADDRINUSE: + case NPF_RC_NAT_ERANGE: + case NPF_RC_NAT_E2BIG: + case NPF_RC_ICMP_ERR_NAT: + npf_rc_ctrl[dir][rc].bm |= + (RCT2BIT(NPF_RCT_FW4 | NPF_RCT_NAT64)); + break; + + /* NAT only */ + case NPF_RC_ALG_EEXIST: + case NPF_RC_ALG_ERR: + npf_rc_ctrl[dir][rc].bm |= + (RCT2BIT(NPF_RCT_FW4)); + break; + + /* NAT64 only */ + case NPF_RC_NAT64_4T6: + case NPF_RC_NAT64_6T4: + case NPF_RC_NAT64_ENOSPC: + case NPF_RC_NAT64_ENOMEM: + case NPF_RC_NAT64_6052: + case NPF_RC_L4_PROTO: + case NPF_RC_MBUF_ERR: + npf_rc_ctrl[dir][rc].bm |= + RCT2BIT(NPF_RCT_NAT64); + break; + } + + /* Init category */ + switch (rc) { + case NPF_RC_PASS: + case NPF_RC_ENOSTR: + case NPF_RC_NAT64_4T6: + case NPF_RC_NAT64_6T4: + npf_rc_ctrl[dir][rc].cat = RC_CAT_PASS; + break; + case NPF_RC_UNMATCHED: + npf_rc_ctrl[dir][rc].cat = RC_CAT_NOMATCH; + break; + case NPF_RC_BLOCK: + npf_rc_ctrl[dir][rc].cat = RC_CAT_BLOCK; + break; + case NPF_RC_L3_HDR_VER: + case NPF_RC_L3_HDR_LEN: + case NPF_RC_NON_IP: + case NPF_RC_L3_PROTO: + case NPF_RC_L4_PROTO: + case NPF_RC_L4_SHORT: + case NPF_RC_ICMP_ECHO: + case NPF_RC_TCP_SYN: + case NPF_RC_TCP_STATE: + case NPF_RC_TCP_WIN: + case NPF_RC_SESS_ENOMEM: + case NPF_RC_SESS_LIMIT: + case NPF_RC_SESS_HOOK: + case NPF_RC_DP_SESS_ESTB: + case NPF_RC_L3_SHORT: + case NPF_RC_MBUF_ENOMEM: + case NPF_RC_MBUF_ERR: + case NPF_RC_NAT_ENOSPC: + case NPF_RC_NAT_ENOMEM: + case NPF_RC_NAT_EADDRINUSE: + case NPF_RC_NAT_ERANGE: + case NPF_RC_NAT_E2BIG: + case NPF_RC_ICMP_ERR_NAT: + case NPF_RC_ALG_EEXIST: + case NPF_RC_ALG_ERR: + case NPF_RC_NAT64_ENOSPC: + case NPF_RC_NAT64_ENOMEM: + case NPF_RC_NAT64_6052: + case NPF_RC_INTL: + npf_rc_ctrl[dir][rc].cat = RC_CAT_DROP; + break; + } + } + + + initd = true; +} + +static bool +npf_rc_enabled(enum npf_rc_type rct, enum npf_rc_dir dir, enum npf_rc_en rc) +{ + if (rct >= NPF_RCT_SZ || dir >= NPF_DIR_SZ || rc >= NPF_RC_SZ) + return false; + + return ((npf_rc_ctrl[dir][rc].bm & RCT2BIT(rct)) != 0); +} + +/* + * Create npf counters. A set of counters is created per-interface. + */ +struct npf_rc_counts *npf_rc_counts_create(void) +{ + struct npf_rc_counts *rcc; + + static_assert(PFIL2RC(PFIL_IN) == NPF_RC_IN, + "npf rc is not correct"); + static_assert(PFIL2RC(PFIL_OUT) == NPF_RC_OUT, + "npf rc is not correct"); + + rcc = zmalloc_aligned((get_lcore_max() + 1) * + sizeof(struct npf_rc_counts)); + + return rcc; +} + +void npf_rc_counts_destroy(struct npf_rc_counts **rcc) +{ + if (*rcc) { + free(*rcc); + *rcc = NULL; + } +} + +/* + * return-code short string + */ +const char *npf_rc_str(int rc) +{ + if (rc < 0) + rc = -rc; + if (rc > NPF_RC_LAST) + rc = NPF_RC_INTL; + + switch ((enum npf_rc_en)rc) { + case NPF_RC_UNMATCHED: + return "RC_UNMATCHED"; + case NPF_RC_PASS: + return "RC_PASS"; + case NPF_RC_BLOCK: + return "RC_BLOCK"; + case NPF_RC_L3_HDR_VER: + return "RC_L3_HDR_VER"; + case NPF_RC_L3_HDR_LEN: + return "RC_L3_HDR_LEN"; + case NPF_RC_NON_IP: + return "RC_NON_IP"; + case NPF_RC_L3_SHORT: + return "RC_L3_SHORT"; + case NPF_RC_L4_SHORT: + return "RC_L4_SHORT"; + case NPF_RC_L3_PROTO: + return "RC_L3_PROTO"; + case NPF_RC_L4_PROTO: + return "RC_L4_PROTO"; + case NPF_RC_ICMP_ECHO: + return "RC_ICMP_ECHO"; + case NPF_RC_ENOSTR: + return "RC_ENOSTR"; + case NPF_RC_TCP_SYN: + return "RC_TCP_SYN"; + case NPF_RC_TCP_STATE: + return "RC_TCP_STATE"; + case NPF_RC_TCP_WIN: + return "RC_TCP_WIN"; + case NPF_RC_SESS_ENOMEM: + return "RC_SESS_ENOMEM"; + case NPF_RC_SESS_LIMIT: + return "RC_SESS_LIMIT"; + case NPF_RC_SESS_HOOK: + return "RC_SESS_HOOK"; + case NPF_RC_DP_SESS_ESTB: + return "RC_DP_SESS_ESTB"; + case NPF_RC_MBUF_ENOMEM: + return "RC_MBUF_ENOMEM"; + case NPF_RC_MBUF_ERR: + return "RC_MBUF_ERR"; + case NPF_RC_NAT_ENOSPC: + return "RC_NAT_ENOSPC"; + case NPF_RC_NAT_ENOMEM: + return "RC_NAT_ENOMEM"; + case NPF_RC_NAT_EADDRINUSE: + return "RC_NAT_EADDRINUSE"; + case NPF_RC_NAT_ERANGE: + return "RC_NAT_ERANGE"; + case NPF_RC_NAT_E2BIG: + return "RC_NAT_E2BIG"; + case NPF_RC_ICMP_ERR_NAT: + return "RC_ICMP_ERR_NAT"; + case NPF_RC_ALG_EEXIST: + return "RC_ALG_EEXIST"; + case NPF_RC_ALG_ERR: + return "RC_ALG_ERR"; + case NPF_RC_NAT64_4T6: + return "RC_NAT64_4T6"; + case NPF_RC_NAT64_6T4: + return "RC_NAT64_6T4"; + case NPF_RC_NAT64_ENOSPC: + return "RC_NAT64_ENOSPC"; + case NPF_RC_NAT64_ENOMEM: + return "RC_NAT64_ENOMEM"; + case NPF_RC_NAT64_6052: + return "RC_NAT64_6052"; + case NPF_RC_INTL: + break; + }; + return "RC_INTL"; +} + +/* + * return-code description + */ +const char *npf_rc_detail_str(int rc) +{ + if (rc < 0) + rc = -rc; + if (rc > NPF_RC_LAST) + rc = NPF_RC_INTL; + + switch ((enum npf_rc_en)rc) { + case NPF_RC_UNMATCHED: + return "unmatched"; + case NPF_RC_PASS: + return "pass"; + case NPF_RC_BLOCK: + return "block"; + case NPF_RC_L3_HDR_VER: + return "invalid IP header version field"; + case NPF_RC_L3_HDR_LEN: + return "invalid IP header length field"; + case NPF_RC_NON_IP: + return "non-IP packet"; + case NPF_RC_L3_PROTO: + return "protocol mismatch"; + case NPF_RC_L3_SHORT: + return "invalid layer 3 header"; + case NPF_RC_L4_SHORT: + return "invalid layer 4 header"; + case NPF_RC_L4_PROTO: + return "invalid layer 4 protocol"; + case NPF_RC_ICMP_ECHO: + return "unsolicited ICMP echo reply"; + case NPF_RC_ENOSTR: + return "unknown TCP reset"; + case NPF_RC_TCP_SYN: + return "missing TCP SYN"; + case NPF_RC_TCP_STATE: + return "invalid TCP flags"; + case NPF_RC_TCP_WIN: + return "TCP window error"; + case NPF_RC_SESS_ENOMEM: + return "no memory to create session"; + case NPF_RC_SESS_LIMIT: + return "session limiter"; + case NPF_RC_SESS_HOOK: + return "session hook"; + case NPF_RC_DP_SESS_ESTB: + return "failed to create dataplane session"; + case NPF_RC_MBUF_ENOMEM: + return "failed to allocate packet memory"; + case NPF_RC_MBUF_ERR: + return "failed to prepend or adjust packet buffer"; + case NPF_RC_NAT_ENOSPC: + return "failed to get NAT port mapping"; + case NPF_RC_NAT_ENOMEM: + return "no memory to create NAT"; + case NPF_RC_NAT_EADDRINUSE: + return "fragmented NAT port map"; + case NPF_RC_NAT_ERANGE: + return "NAT port range too small"; + case NPF_RC_NAT_E2BIG: + return "unable to fragment packet"; + case NPF_RC_ICMP_ERR_NAT: + return "failed to translate ICMP error embedded pkt"; + case NPF_RC_ALG_EEXIST: + return "ALG race condition"; + case NPF_RC_ALG_ERR: + return "ALG error"; + case NPF_RC_NAT64_4T6: + return "IPv4 to IPv6"; + case NPF_RC_NAT64_6T4: + return "IPv6 to IPv4"; + case NPF_RC_NAT64_ENOSPC: + return "Failed to get NAT64 port mapping"; + case NPF_RC_NAT64_ENOMEM: + return "Failed to allocate NAT64 memory"; + case NPF_RC_NAT64_6052: + return "failed to extract or encode rfc6052 NAT64 addr"; + case NPF_RC_INTL: + break; + }; + return "internal error"; +} + +/* + * Get count for one return-code in one direction + */ +static uint64_t +npf_rc_read(struct npf_rc_counts *rcc, enum npf_rc_type rct, + enum npf_rc_dir dir, enum npf_rc_en rc) +{ + uint64_t sum; + uint i; + + if (rc >= NPF_RC_SZ || dir >= NPF_DIR_SZ || rct >= NPF_RCT_SZ || !rcc) + return 0UL; + + sum = 0UL; + FOREACH_DP_LCORE(i) + sum += rcc[i].type[rct].dir[dir].count[rc]; + + return sum; +} + +static uint64_t +npf_rc_total(struct npf_rc_counts *rcc, enum npf_rc_type opt_rct, + enum npf_rc_dir opt_dir, enum rc_ctrl_cat opt_cat) +{ + enum npf_rc_type rct; + enum npf_rc_dir dir; + enum npf_rc_en rc; + uint64_t total = 0ul; + + /* For each return code type */ + for (rct = 0; rct < NPF_RCT_SZ; rct++) { + if (opt_rct != NPF_RCT_ALL && opt_rct != rct) + continue; + + /* For each direction */ + for (dir = 0; dir < NPF_DIR_SZ; dir++) { + if (opt_dir != NPF_DIR_ALL && opt_dir != dir) + continue; + + /* For each count */ + for (rc = 0; rc <= NPF_RC_LAST; rc++) { + if (opt_cat != RC_CAT_ALL && + opt_cat != npf_rc_ctrl[dir][rc].cat) + continue; + total += npf_rc_read(rcc, rct, dir, rc); + } + } + } + + return total; +} + +/* Show/clear command context */ +struct rcc_ctx { + json_writer_t *ctx_json; + enum npf_rc_type ctx_rct; + enum npf_rc_dir ctx_dir; + enum rc_ctrl_cat ctx_cat; + struct ifnet *ctx_ifp; + bool ctx_nonzero_only; + bool ctx_detail; + bool ctx_brief; + bool ctx_rpc; +}; + +/* + * Write detailed json for npf return code counters in one direction + */ +static void +npf_show_rc_dir_detail(json_writer_t *json, struct npf_rc_counts *rcc, + enum npf_rc_type rct, enum npf_rc_dir dir, + enum rc_ctrl_cat cat, struct rcc_ctx *ctx) +{ + enum npf_rc_en rc; + uint64_t count; + bool exception = false; + + /* + * We make an exception for nat64, and always return the ipv4-to-ipv6 + * and ipv6-to-ipv4 counts + */ + if (rct == NPF_RCT_NAT64 && cat == RC_CAT_PASS) + exception = true; + + if (!ctx->ctx_detail && !exception) + return; + + jsonw_name(json, "detail"); + jsonw_start_object(json); + + for (rc = 0; rc <= NPF_RC_LAST; rc++) { + if (cat != npf_rc_ctrl[dir][rc].cat) + continue; + + /* In this count enabled for this rc-type? */ + if (!npf_rc_enabled(rct, dir, rc)) + continue; + + count = npf_rc_read(rcc, rct, dir, rc); + jsonw_uint_field(json, npf_rc_str(rc), count); + } + jsonw_end_object(json); /* detail */ +} + +static void +npf_show_rc_dir_detail_rpc(json_writer_t *json, struct npf_rc_counts *rcc, + enum npf_rc_type rct, enum npf_rc_dir dir, + enum rc_ctrl_cat cat) +{ + uint64_t count; + + /* We initially only return two NAT64 'in' 'pass' detailed counts */ + if (rct == NPF_RCT_NAT64 && cat == RC_CAT_PASS && dir == NPF_RC_IN) { + + jsonw_name(json, "detail"); + jsonw_start_object(json); + + count = npf_rc_read(rcc, rct, dir, NPF_RC_NAT64_4T6); + jsonw_uint_field(json, "ipv4-to-ipv6", count); + + count = npf_rc_read(rcc, rct, dir, NPF_RC_NAT64_6T4); + jsonw_uint_field(json, "ipv6-to-ipv4", count); + + jsonw_end_object(json); + } +} + +/* + * Write json for npf return code counters in one direction + */ +static void +npf_show_rc_counts_dir(json_writer_t *json, struct npf_rc_counts *rcc, + enum npf_rc_type rct, enum npf_rc_dir dir, + const char *name, struct rcc_ctx *ctx) +{ + enum rc_ctrl_cat cat; + uint64_t count; + + jsonw_name(json, name); + jsonw_start_object(json); + + /* For each off pass, unmatched, block and drop */ + for (cat = 0; cat < RC_CAT_SZ; cat++) { + if (ctx->ctx_cat != RC_CAT_ALL && ctx->ctx_cat != cat) + continue; + + const char *cat_name = rc_ctrl_cat2str(cat); + + /* Total for this category */ + count = npf_rc_total(rcc, rct, dir, cat); + + jsonw_name(json, cat_name); + jsonw_start_object(json); + + jsonw_uint_field(json, "count", count); + + /* Conditionally show individual counts */ + if (ctx->ctx_rpc) + npf_show_rc_dir_detail_rpc(json, rcc, rct, dir, cat); + else + npf_show_rc_dir_detail(json, rcc, rct, dir, cat, ctx); + + jsonw_end_object(json); /* cat_name */ + } + + jsonw_end_object(json); +} + +/* + * Is the rc type feature enabled on the given interface? + */ +static bool +npf_rct_is_feature_enabled(enum npf_rc_type rct, struct ifnet *ifp) +{ + const struct npf_if *nif = rcu_dereference(ifp->if_npf); + const struct npf_config *npf_config = npf_if_conf(nif); + + switch (rct) { + case NPF_RCT_FW4: + if (pl_node_is_feature_enabled_by_inst(&ipv4_fw_in_feat, ifp) || + pl_node_is_feature_enabled_by_inst(&ipv4_fw_out_feat, ifp)) + return true; + return false; + + case NPF_RCT_FW6: + if (pl_node_is_feature_enabled_by_inst(&ipv6_fw_in_feat, ifp) || + pl_node_is_feature_enabled_by_inst(&ipv6_fw_out_feat, ifp)) + return true; + return false; + + case NPF_RCT_LOC: + if (npf_active(npf_config, NPF_LOCAL) || + npf_active(npf_global_config, NPF_LOCAL) || + npf_zone_local_is_set() || + npf_active(npf_config, NPF_ORIGINATE) || + npf_active(npf_global_config, NPF_ORIGINATE)) + return true; + return false; + + case NPF_RCT_L2: + if (npf_active(npf_config, NPF_BRIDGE)) + return true; + return false; + + case NPF_RCT_ACL4: + if (pl_node_is_feature_enabled_by_inst(&ipv4_acl_in_feat, + ifp) || + pl_node_is_feature_enabled_by_inst(&ipv4_acl_out_feat, + ifp)) + return true; + return false; + + case NPF_RCT_ACL6: + if (pl_node_is_feature_enabled_by_inst(&ipv6_acl_in_feat, + ifp) || + pl_node_is_feature_enabled_by_inst(&ipv6_acl_out_feat, + ifp)) + return true; + return false; + + case NPF_RCT_NAT64: + if (pl_node_is_feature_enabled_by_inst(&ipv6_nat64_in_feat, + ifp) || + pl_node_is_feature_enabled_by_inst(&ipv6_nat46_out_feat, + ifp)) + return true; + return false; + } + return false; +} + +/* + * Write json for npf return code counters for one interface + */ +static void npf_show_rc_counts_intf(struct ifnet *ifp, void *arg) +{ + struct rcc_ctx *ctx = arg; + struct npf_rc_counts *rcc; + enum npf_rc_type rct; + enum npf_rc_dir dir; + json_writer_t *json; + bool first_rct = true; + + if (is_lo(ifp)) + return; + + rcc = npf_if_get_rcc(ifp); + if (!rcc || !ctx) + return; + + uint64_t total; + + total = npf_rc_total(rcc, ctx->ctx_rct, ctx->ctx_dir, ctx->ctx_cat); + if (!total && ctx->ctx_nonzero_only) + return; + + json = ctx->ctx_json; + + for (rct = 0; rct < NPF_RCT_SZ; rct++) { + if (ctx->ctx_rct != NPF_RCT_ALL && ctx->ctx_rct != rct) + continue; + + if (!npf_rct_is_feature_enabled(rct, ifp)) + continue; + + /* Check totals for this rc-type */ + total = npf_rc_total(rcc, rct, ctx->ctx_dir, ctx->ctx_cat); + if (!total && ctx->ctx_nonzero_only) + continue; + + if (first_rct) { + jsonw_start_object(json); + jsonw_string_field(json, "name", ifp->if_name); + first_rct = false; + } + + jsonw_name(json, npf_rct_str(rct)); + jsonw_start_object(json); + + for (dir = 0; dir < NPF_DIR_SZ; dir++) { + if (ctx->ctx_dir != NPF_DIR_ALL && + ctx->ctx_dir != dir) + continue; + + npf_show_rc_counts_dir(json, rcc, rct, dir, + npf_rc_dir_str(dir), + ctx); + } + + jsonw_end_object(json); /* rct */ + } + + if (!first_rct) + jsonw_end_object(json); /* if_name */ +} + +/* + * Parse show/clear command arguments + */ +static int +npf_rc_counts_parse(FILE *f, int argc, char **argv, struct rcc_ctx *ctx) +{ + /* Default context/arguments */ + ctx->ctx_json = NULL; + ctx->ctx_rct = NPF_RCT_ALL; + ctx->ctx_dir = NPF_DIR_ALL; + ctx->ctx_cat = RC_CAT_ALL; + ctx->ctx_ifp = NULL; + ctx->ctx_nonzero_only = false; + ctx->ctx_detail = false; + ctx->ctx_brief = false; + ctx->ctx_rpc = false; + + /* All command options are in pairs */ + while (argc > 1) { + + if (!strcmp(argv[0], "type")) { + ctx->ctx_rct = npf_rct_str2enum(argv[1]); + + } else if (!strcmp(argv[0], "interface")) { + ctx->ctx_ifp = dp_ifnet_byifname(argv[1]); + + if (!ctx->ctx_ifp) { + npf_cmd_err(f, "%s", + "invalid interface %s", argv[1]); + return -EINVAL; + } + + } else if (!strcmp(argv[0], "dir")) { + if (!strcasecmp(argv[1], "in")) + ctx->ctx_dir = NPF_RC_IN; + else if (!strcasecmp(argv[1], "out")) + ctx->ctx_dir = NPF_RC_OUT; + + } else if (!strcmp(argv[0], "cat")) { + enum rc_ctrl_cat cat; + + for (cat = 0; cat < RC_CAT_SZ; cat++) { + if (!strcasecmp(argv[1], + rc_ctrl_cat2str(cat))) { + ctx->ctx_cat = cat; + break; + } + } + + } else if (!strcmp(argv[0], "nonzero")) { + if (!strcasecmp(argv[1], "true") || + !strcmp(argv[1], "1")) + ctx->ctx_nonzero_only = true; + + } else if (!strcmp(argv[0], "detail")) { + if (!strcasecmp(argv[1], "true") || + !strcmp(argv[1], "1")) + ctx->ctx_detail = true; + + } else if (!strcmp(argv[0], "brief")) { + if (!strcasecmp(argv[1], "true") || + !strcmp(argv[1], "1")) + ctx->ctx_brief = true; + + } else if (!strcmp(argv[0], "rpc")) { + if (!strcasecmp(argv[1], "true") || + !strcmp(argv[1], "1")) + ctx->ctx_rpc = true; + } + /* Silently ignore unknown options */ + + argc -= 2; + argv += 2; + } + + return 0; +} + +/* + * Write json for npf return code counters for one or all interfaces + * + * [npf-op rc show counters] interface type detail {true|false} + * brief {true|false} nonzero {true|false} + */ +int npf_show_rc_counts(FILE *f, int argc, char **argv) +{ + struct rcc_ctx ctx = { 0 }; + json_writer_t *json; + int rc; + + /* ctrl is only used for show output, so init onetime here */ + npf_rc_ctrl_init(); + + /* Parse the arguments */ + rc = npf_rc_counts_parse(f, argc, argv, &ctx); + if (rc < 0) + return rc; + + json = jsonw_new(f); + if (!json) + return -EINVAL; + + ctx.ctx_json = json; + jsonw_pretty(json, true); + + jsonw_name(json, "npf-rc-counts"); + jsonw_start_object(json); + + jsonw_name(json, "interfaces"); + jsonw_start_array(json); + + if (ctx.ctx_ifp) + npf_show_rc_counts_intf(ctx.ctx_ifp, &ctx); + else + dp_ifnet_walk(npf_show_rc_counts_intf, &ctx); + + jsonw_end_array(json); /* interfaces */ + jsonw_end_object(json); /* npf-rc-counts */ + jsonw_destroy(&json); + return 0; +} + +static void npf_clear_rc_counts_intf(struct ifnet *ifp, void *arg) +{ + struct rcc_ctx *ctx = arg; + struct npf_rc_counts *rcc; + enum npf_rc_type rct; + enum npf_rc_dir dir; + enum npf_rc_en rc; + uint i; + + rcc = npf_if_get_rcc(ifp); + if (!rcc || !ctx) + return; + + /* For each core .. */ + FOREACH_DP_LCORE(i) + /* For each rc type .. */ + for (rct = 0; rct < NPF_RCT_SZ; rct++) { + if (ctx->ctx_rct != NPF_RCT_ALL && ctx->ctx_rct != rct) + continue; + + /* For 'inbound' and 'outbound' .. */ + for (dir = 0; dir < NPF_DIR_SZ; dir++) { + if (ctx->ctx_dir != NPF_DIR_ALL && + ctx->ctx_dir != dir) + continue; + + /* For each return code count .. */ + for (rc = 0; rc < NPF_RC_SZ; rc++) { + enum rc_ctrl_cat cat; + + cat = npf_rc_ctrl[dir][rc].cat; + if (ctx->ctx_cat != RC_CAT_ALL && + ctx->ctx_cat != cat) + continue; + + rcc[i].type[rct].dir[dir].count[rc] = + 0UL; + } + } + } + +} + +/* + * Clear return code counters + * + * [npf-op rc clear counters] vrf type + */ +int npf_clear_rc_counts(FILE *f, int argc, char **argv) +{ + struct rcc_ctx ctx = { 0 }; + int rc; + + /* Parse the arguments */ + rc = npf_rc_counts_parse(f, argc, argv, &ctx); + if (rc < 0) + return rc; + + if (ctx.ctx_ifp) + npf_clear_rc_counts_intf(ctx.ctx_ifp, &ctx); + else + dp_ifnet_walk(npf_clear_rc_counts_intf, &ctx); + + return 0; +} + diff --git a/src/npf/npf_rc.h b/src/npf/npf_rc.h new file mode 100644 index 00000000..2804b90d --- /dev/null +++ b/src/npf/npf_rc.h @@ -0,0 +1,345 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef _NPF_RC_H_ +#define _NPF_RC_H_ + +#include +#include +#include +#include +#include + +#include "if_var.h" +#include "npf/npf_if.h" +#include "npf/npf.h" + +/* + * npf return codes + * + * The return code counter is effectively a multi-dimensional array of 64-bit + * counters arranged as follows: + * + * cpu-core[].type[].direction[].counter[] + * + * 'cpu-core' is at the outer level as it is the only dynamic element. + * + * 'type' is either IPv4 or IPv6 for npf_hook_track + * + * 'direction' is inbound or outbound. + */ + +/* + * Return code type. + * + * NPF_RCT_FW4 and NPF_RCT_FW6 are used for npf_hook_track IPv4 and IPv6. We + * may want to add further entries here for npf_hook_notrack at some point, + * but that is somewhat more complicated. + */ +enum npf_rc_type { + NPF_RCT_FW4 = 0, + NPF_RCT_FW6, + NPF_RCT_LOC, + NPF_RCT_L2, + NPF_RCT_ACL4, + NPF_RCT_ACL6, + NPF_RCT_NAT64, +}; +#define NPF_RCT_LAST NPF_RCT_NAT64 +#define NPF_RCT_SZ (NPF_RCT_LAST + 1) +#define NPF_RCT_ALL NPF_RCT_SZ + +#define RCT2BIT(_rct) (1 << (_rct)) + +#define RCT_BIT_FW4 RCT2BIT(NPF_RCT_FW4) +#define RCT_BIT_FW6 RCT2BIT(NPF_RCT_FW6) +#define RCT_BIT_LOC RCT2BIT(NPF_RCT_LOC) +#define RCT_BIT_L2 RCT2BIT(NPF_RCT_L2) +#define RCT_BIT_ACL4 RCT2BIT(NPF_RCT_ACL4) +#define RCT_BIT_ACL6 RCT2BIT(NPF_RCT_ACL6) +#define RCT_BIT_NAT64 RCT2BIT(NPF_RCT_NAT64) +#define RCT_BIT_ALL (RCT_BIT_FW4 | RCT_BIT_FW6 | RCT_BIT_LOC | \ + RCT_BIT_L2 | RCT_BIT_ACL4 | RCT_BIT_ACL6 | \ + RCT_BIT_NAT64) + +/* Eth type to rc type. For npf_hook_Track only */ +#define ETH2RCT(_et) (((_et) == htons(RTE_ETHER_TYPE_IPV4)) ? \ + NPF_RCT_FW4 : NPF_RCT_FW6) + + +static inline const char *npf_rct_str(enum npf_rc_type rct) +{ + switch (rct) { + case NPF_RCT_FW4: + return "ip"; + case NPF_RCT_FW6: + return "ip6"; + case NPF_RCT_LOC: + return "local"; + case NPF_RCT_L2: + return "l2"; + case NPF_RCT_ACL4: + return "ip-packet-filter"; + case NPF_RCT_ACL6: + return "ip6-packet-filter"; + case NPF_RCT_NAT64: + return "nat64"; + } + return "Unkn"; +} + +static inline enum npf_rc_type npf_rct_str2enum(const char *rct_str) +{ + enum npf_rc_type rct; + + for (rct = 0; rct < NPF_RCT_SZ; rct++) + if (!strcmp(rct_str, npf_rct_str(rct))) + return rct; + + return NPF_RCT_ALL; +} + +/* + * We keep inbound and outbound rc counts + */ +enum npf_rc_dir { + NPF_RC_IN = 0, + NPF_RC_OUT = 1 +}; +#define NPF_DIR_SZ 2 +#define NPF_DIR_ALL NPF_DIR_SZ + +/* Converts PFIL_IN or PFIL_OUT to 'enum npf_rc_dir' */ +#define PFIL2RC(_dir) ((_dir) >> 1) + +static inline const char *npf_rc_dir_str(enum npf_rc_dir dir) +{ + return (dir == NPF_RC_IN) ? "in" : "out"; +} + +/* + * Return codes + * + * Default return code is NPF_RC_UNMATCHED. This indicates that a node saw + * the packet, but the node had no effect on the packet disposition. + * + * The convention is to negate a return code if its an error or drop reason. + * So for example, a function might set "rc = -NPF_RC_INTL" if an internal + * error is detected. NPF_RC_PASS and NPF_RC_BLOCK are not errors or drops, + * so they would not be negated (we differentiate 'blocks' from 'drops'). + * + * The order they are listed here is the order that they are displayed it, + * subject to the rc type filters. + */ +enum npf_rc_en { + NPF_RC_UNMATCHED = 0, + NPF_RC_PASS, /* Matched session or pass rule, or no ruleset */ + NPF_RC_NAT64_6T4, /* IPv6 to IPv4 */ + NPF_RC_NAT64_4T6, /* IPv4 to IPv6 */ + NPF_RC_BLOCK, /* Explicit or implicit block */ + + /* Not enough L3 hdr present in pkt */ + NPF_RC_L3_SHORT, + + /* Not enough L4 hdr present in pkt or icmp err embd pkt */ + NPF_RC_L4_SHORT, + + /* L3 protocol value does not match pkt addr family */ + NPF_RC_L3_PROTO, + + /* No L4 ports or not ICMP echo */ + NPF_RC_L4_PROTO, + + /* A packet with a bad IP header version */ + NPF_RC_L3_HDR_VER, + + /* A packet with an invalid IP header length field */ + NPF_RC_L3_HDR_LEN, + + /* A Non-IP packet when an IPv4/IPv6 packet is required */ + NPF_RC_NON_IP, + + /* + * If a ping session does not exist, it can only be created by an ICMP + * echo request. If it exists, the fwd direction will conditionally + * ('strict' enabled) only pass requests and the backward only + * replies. Note, the 'strict' bit needs to be disabled because of MS + * Windows clients. NPF_RC_ICMP_ECHO rc occurs when an ICMP echo + * fails to meet these conditions, and is dropped. + */ + NPF_RC_ICMP_ECHO, + + /* + * TCP RST pkt not permitted to create a session. This is handled + * differently from other 'error' return codes. A session is not + * created, but the packet is allowed to pass. + */ + NPF_RC_ENOSTR, + + /* Only a TCP SYN may create a session (strict) */ + NPF_RC_TCP_SYN, + + /* Invalid state transition (strict) */ + NPF_RC_TCP_STATE, + + /* TCP window error */ + NPF_RC_TCP_WIN, + + /* Failed to allocate session */ + NPF_RC_SESS_ENOMEM, + + /* Session limiter */ + NPF_RC_SESS_LIMIT, + + /* Unspecified session hook dropped pkt */ + NPF_RC_SESS_HOOK, + + /* Dataplane session create failed */ + NPF_RC_DP_SESS_ESTB, + + /* New mbuf failed to be allocated */ + NPF_RC_MBUF_ENOMEM, + + /* No translation ports available */ + NPF_RC_NAT_ENOSPC, + + /* Failed to allocate a nat struct or port map */ + NPF_RC_NAT_ENOMEM, + + /* Fragmented NAT port mappings */ + NPF_RC_NAT_EADDRINUSE, + + /* NAT port range too small */ + NPF_RC_NAT_ERANGE, + + /* Pkt exceeds intf MTU and is marked DF */ + NPF_RC_NAT_E2BIG, + + /* Failed to translate pkt embedded in ICMP error */ + NPF_RC_ICMP_ERR_NAT, + + /* ALG tuple match race lost */ + NPF_RC_ALG_EEXIST, + + /* Unspecified ALG error */ + NPF_RC_ALG_ERR, + + /* No translation ports available */ + NPF_RC_NAT64_ENOSPC, + + /* nat64 malloc failure */ + NPF_RC_NAT64_ENOMEM, + + /* Error extracting/inserting v4 addrs from/into v6 addrs */ + NPF_RC_NAT64_6052, + + /* Failed to prepend or adj pktmbuf */ + NPF_RC_MBUF_ERR, + + NPF_RC_INTL, /* Internal error */ +}; +#define NPF_RC_LAST NPF_RC_INTL +#define NPF_RC_SZ (NPF_RC_LAST + 1) + +/* The value 0 means two things depending where its used */ +#define NPF_RC_OK NPF_RC_UNMATCHED + + +static inline enum npf_rc_en +npf_decision2rc(npf_decision_t decision) +{ + assert(NPF_RC_OK == 0); + + switch (decision) { + case NPF_DECISION_UNMATCHED: + return NPF_RC_UNMATCHED; + case NPF_DECISION_PASS: + return NPF_RC_PASS; + case NPF_DECISION_BLOCK: + case NPF_DECISION_BLOCK_UNACCOUNTED: + return NPF_RC_BLOCK; + case NPF_DECISION_UNKNOWN: + return NPF_RC_INTL; + } + return NPF_RC_INTL; +} + +/* + * Per-core return code counters + */ +struct npf_rc_counts { + struct _af { + struct _dir { + uint64_t count[NPF_RC_SZ]; + } dir[NPF_DIR_SZ]; + } type[NPF_RCT_SZ]; +}; + +/* + * Increment a return code counter + */ +static ALWAYS_INLINE void +npf_rc_inc(struct ifnet *ifp, enum npf_rc_type rct, enum npf_rc_dir dir, int rc, + npf_decision_t decision) +{ + assert(dir == NPF_RC_IN || dir == NPF_RC_OUT); + + if (likely(rc < 0)) + rc = -rc; + if (unlikely(rc > NPF_RC_LAST)) + rc = NPF_RC_INTL; + + /* Change return code if it is not already set */ + if (rc == NPF_RC_UNMATCHED && decision != NPF_DECISION_UNMATCHED) + rc = npf_decision2rc(decision); + + struct npf_rc_counts *rcc = npf_if_get_rcc(ifp); + if (unlikely(!rcc)) + return; + + rcc[dp_lcore_id()].type[rct].dir[dir].count[rc]++; +} + +/* + * NAT64 uses a different decision type + */ +static ALWAYS_INLINE void +npf_rc_inc_nat64(struct ifnet *ifp, enum npf_rc_dir dir, int rc) +{ + assert(dir == NPF_RC_IN || dir == NPF_RC_OUT); + + if (likely(rc < 0)) + rc = -rc; + if (unlikely(rc > NPF_RC_LAST)) + rc = NPF_RC_INTL; + + struct npf_rc_counts *rcc = npf_if_get_rcc(ifp); + if (unlikely(!rcc)) + return; + + rcc[dp_lcore_id()].type[NPF_RCT_NAT64].dir[dir].count[rc]++; +} + +/* + * Create return code counters + */ +struct npf_rc_counts *npf_rc_counts_create(void); + +void npf_rc_counts_destroy(struct npf_rc_counts **rcc); + +/* + * Return code short string and description + */ +const char *npf_rc_str(int rc); +const char *npf_rc_detail_str(int rc); + +/* + * Show/clear return code counters + */ +int npf_show_rc_counts(FILE *f, int argc, char **argv); +int npf_clear_rc_counts(FILE *f, int argc, char **argv); + +#endif /* _NPF_RC_H_ */ diff --git a/src/npf/npf_rte_acl.c b/src/npf/npf_rte_acl.c new file mode 100644 index 00000000..6425d707 --- /dev/null +++ b/src/npf/npf_rte_acl.c @@ -0,0 +1,848 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include +#include +#include +#include "vplane_log.h" +#include "npf_rte_acl.h" +#include +#include "../ip_funcs.h" +#include "../netinet6/ip6_funcs.h" + +#define MAX_TRANSACTION_ENTRIES 512 + +static rte_atomic32_t ctx_id; + +struct npf_match_ctx { + struct rte_acl_ctx *acl_ctx; + char *name; + uint16_t num_rules; + struct trans_entry *tr; + uint32_t tr_num_entries; + bool tr_in_progress; +}; + +enum rule_op { + RULE_OP_ADD, + RULE_OP_DELETE +}; + +struct trans_entry { + enum rule_op rule_op; + struct npf_match_ctx *trie; + struct rte_acl_rule *rule; +}; + +/* rte acl stuff */ +/* + * Rule and trace formats definitions. + */ +enum { + PROTO_FIELD_IPV4, + SRC_FIELD_IPV4, + DST_FIELD_IPV4, + SRCP_FIELD_IPV4, + DSTP_FIELD_IPV4, + NUM_FIELDS_IPV4 +}; + +/* + * That effectively defines order of IPV4 classifications: + * - PROTO + * - SRC IP ADDRESS + * - DST IP ADDRESS + * - PORTS (SRC and DST) + */ +enum { + RTE_ACL_IPV4_PROTO, + RTE_ACL_IPV4_SRC, + RTE_ACL_IPV4_DST, + RTE_ACL_IPV4_PORTS, + RTE_ACL_IPV4_NUM +}; + +/* + * rte-acl requires the first field in the rule to be 1 byte long. + * That is the reason for starting with the IP protocol number. + * The other fields are defined as offsets relative to the protocol + * field. + */ +static struct rte_acl_field_def ipv4_defs[NUM_FIELDS_IPV4] = { + [PROTO_FIELD_IPV4] = { + .type = RTE_ACL_FIELD_TYPE_RANGE, + .size = sizeof(uint8_t), + .field_index = PROTO_FIELD_IPV4, + .input_index = RTE_ACL_IPV4_PROTO, + .offset = 0, + }, + [SRC_FIELD_IPV4] = { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = SRC_FIELD_IPV4, + .input_index = RTE_ACL_IPV4_SRC, + .offset = (offsetof(struct rte_ipv4_hdr, src_addr) - + offsetof(struct rte_ipv4_hdr, next_proto_id)), + }, + [DST_FIELD_IPV4] = { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = DST_FIELD_IPV4, + .input_index = RTE_ACL_IPV4_DST, + .offset = (offsetof(struct rte_ipv4_hdr, dst_addr) - + offsetof(struct rte_ipv4_hdr, next_proto_id)), + }, + [SRCP_FIELD_IPV4] = { + .type = RTE_ACL_FIELD_TYPE_RANGE, + .size = sizeof(uint16_t), + .field_index = SRCP_FIELD_IPV4, + .input_index = RTE_ACL_IPV4_PORTS, + .offset = (sizeof(struct rte_ipv4_hdr) - + offsetof(struct rte_ipv4_hdr, next_proto_id)), + }, + [DSTP_FIELD_IPV4] = { + .type = RTE_ACL_FIELD_TYPE_RANGE, + .size = sizeof(uint16_t), + .field_index = DSTP_FIELD_IPV4, + .input_index = RTE_ACL_IPV4_PORTS, + .offset = (sizeof(struct rte_ipv4_hdr) - + offsetof(struct rte_ipv4_hdr, next_proto_id) + + sizeof(uint16_t)), + }, +}; + +#define IPV6_ADDR_LEN 16 +#define IPV6_ADDR_U16 (IPV6_ADDR_LEN / sizeof(uint16_t)) +#define IPV6_ADDR_U32 (IPV6_ADDR_LEN / sizeof(uint32_t)) + +enum { + PROTO_FIELD_IPV6, + SRC1_FIELD_IPV6, + SRC2_FIELD_IPV6, + SRC3_FIELD_IPV6, + SRC4_FIELD_IPV6, + DST1_FIELD_IPV6, + DST2_FIELD_IPV6, + DST3_FIELD_IPV6, + DST4_FIELD_IPV6, + SRCP_FIELD_IPV6, + DSTP_FIELD_IPV6, + NUM_FIELDS_IPV6 +}; + +/* + * rte-acl requires the first field in the rule to be 1 byte long. + * That is the reason for starting with the IP protocol number. + * The other fields are defined as offsets relative to the protocol + * field. + */ +static struct rte_acl_field_def ipv6_defs[NUM_FIELDS_IPV6] = { + { + .type = RTE_ACL_FIELD_TYPE_RANGE, + .size = sizeof(uint8_t), + .field_index = PROTO_FIELD_IPV6, + .input_index = PROTO_FIELD_IPV6, + .offset = 0, + }, + { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = SRC1_FIELD_IPV6, + .input_index = SRC1_FIELD_IPV6, + .offset = (offsetof(struct rte_ipv6_hdr, src_addr) - + offsetof(struct rte_ipv6_hdr, proto)), + }, + { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = SRC2_FIELD_IPV6, + .input_index = SRC2_FIELD_IPV6, + .offset = (offsetof(struct rte_ipv6_hdr, src_addr) - + offsetof(struct rte_ipv6_hdr, proto) + + sizeof(uint32_t)), + }, + { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = SRC3_FIELD_IPV6, + .input_index = SRC3_FIELD_IPV6, + .offset = (offsetof(struct rte_ipv6_hdr, src_addr) - + offsetof(struct rte_ipv6_hdr, proto) + + 2 * sizeof(uint32_t)), + }, + { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = SRC4_FIELD_IPV6, + .input_index = SRC4_FIELD_IPV6, + .offset = (offsetof(struct rte_ipv6_hdr, src_addr) - + offsetof(struct rte_ipv6_hdr, proto) + + 3 * sizeof(uint32_t)), + }, + { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = DST1_FIELD_IPV6, + .input_index = DST1_FIELD_IPV6, + .offset = (offsetof(struct rte_ipv6_hdr, dst_addr) + - offsetof(struct rte_ipv6_hdr, proto)), + }, + { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = DST2_FIELD_IPV6, + .input_index = DST2_FIELD_IPV6, + .offset = (offsetof(struct rte_ipv6_hdr, dst_addr) - + offsetof(struct rte_ipv6_hdr, proto) + + sizeof(uint32_t)), + }, + { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = DST3_FIELD_IPV6, + .input_index = DST3_FIELD_IPV6, + .offset = (offsetof(struct rte_ipv6_hdr, dst_addr) - + offsetof(struct rte_ipv6_hdr, proto) + + 2 * sizeof(uint32_t)), + }, + { + .type = RTE_ACL_FIELD_TYPE_MASK, + .size = sizeof(uint32_t), + .field_index = DST4_FIELD_IPV6, + .input_index = DST4_FIELD_IPV6, + .offset = (offsetof(struct rte_ipv6_hdr, dst_addr) - + offsetof(struct rte_ipv6_hdr, proto) + + 3 * sizeof(uint32_t)), + }, + { + .type = RTE_ACL_FIELD_TYPE_RANGE, + .size = sizeof(uint16_t), + .field_index = SRCP_FIELD_IPV6, + .input_index = SRCP_FIELD_IPV6, + .offset = (sizeof(struct rte_ipv6_hdr) - + offsetof(struct rte_ipv6_hdr, proto)), + }, + { + .type = RTE_ACL_FIELD_TYPE_RANGE, + .size = sizeof(uint16_t), + .field_index = DSTP_FIELD_IPV6, + .input_index = SRCP_FIELD_IPV6, + .offset = (sizeof(struct rte_ipv6_hdr) - + offsetof(struct rte_ipv6_hdr, proto) + + sizeof(uint16_t)), + }, +}; + +enum { + CB_FLD_SRC_ADDR, + CB_FLD_DST_ADDR, + CB_FLD_SRC_PORT_LOW, + CB_FLD_SRC_PORT_DLM, + CB_FLD_SRC_PORT_HIGH, + CB_FLD_DST_PORT_LOW, + CB_FLD_DST_PORT_DLM, + CB_FLD_DST_PORT_HIGH, + CB_FLD_PROTO, + CB_FLD_USERDATA, + CB_FLD_NUM, +}; + +RTE_ACL_RULE_DEF(acl4_rules, RTE_DIM(ipv4_defs)); +RTE_ACL_RULE_DEF(acl6_rules, RTE_DIM(ipv6_defs)); + +static uint32_t +acl_rule_hash(const void *data, uint32_t data_len, uint32_t init_val) +{ + const struct rte_acl_rule *rule = (const struct rte_acl_rule *) data; + + return rte_jhash(&rule->data.userdata, data_len, init_val); +} + +/* + * Packet matching callback functions which use the rte_acl API + */ + +int npf_rte_acl_init(int af, const char *name, uint32_t max_rules, + struct rte_mempool *mempool, struct rte_rcu_qsbr *rcu_v, + npf_match_ctx_t **m_ctx) +{ + size_t key_len = sizeof(((struct rte_acl_rule *) 0)->data.userdata); + struct rte_acl_param acl_param = { + .socket_id = SOCKET_ID_ANY, + .max_rule_num = max_rules, + .flags = ACL_F_USE_HASHTABLE, + .hash_func = acl_rule_hash, + .hash_key_len = key_len, + .rule_pool = mempool, + }; + struct rte_acl_rcu_config rcu_conf = { + .v = rcu_v, + .mode = RTE_ACL_QSBR_MODE_SYNC, + .thread_id = dp_lcore_id() + }; + char acl_name[RTE_ACL_NAMESIZE]; + npf_match_ctx_t *tmp_ctx; + size_t tr_sz, rule_size; + int32_t id; + int err; + + if (af == AF_INET) + rule_size = RTE_ACL_RULE_SZ(RTE_DIM(ipv4_defs)); + else + rule_size = RTE_ACL_RULE_SZ(RTE_DIM(ipv6_defs)); + + tmp_ctx = calloc(1, sizeof(npf_match_ctx_t)); + if (!tmp_ctx) { + RTE_LOG(ERR, DATAPLANE, + "Could not allocate match context for %s\n", name); + return -ENOMEM; + } + + /* + * rte_acl_create returns a pointer to an existing context if + * there is one of the same name. The NPF call flow involves + * re-creating an entire ruleset when there are changes to the + * configuration. In order to ensure that a new context is created + * each time, a unique number is suffixed to the name + */ + + id = rte_atomic32_add_return(&ctx_id, 1); + snprintf(acl_name, RTE_ACL_NAMESIZE, "%s-%d", name, id); + acl_param.name = acl_name; + + tmp_ctx->name = strdup(acl_name); + if (!tmp_ctx->name) { + RTE_LOG(ERR, DATAPLANE, + "Could not allocate name %s for ACL ctx\n", + acl_name); + free(tmp_ctx); + return -ENOMEM; + } + + acl_param.rule_size = rule_size; + + tmp_ctx->acl_ctx = rte_acl_create(&acl_param); + if (tmp_ctx->acl_ctx == NULL) { + RTE_LOG(ERR, DATAPLANE, + "Could not allocate ACL context for %s\n", + (af == AF_INET ? "ipv4" : "ipv6")); + free(tmp_ctx->name); + free(tmp_ctx); + return -ENOMEM; + } + + err = rte_acl_rcu_qsbr_add(tmp_ctx->acl_ctx, &rcu_conf); + if (err) { + RTE_LOG(ERR, DATAPLANE, "Failed to enable RCU for ACL ctx %s\n", + tmp_ctx->name); + goto error; + + } + + tr_sz = (sizeof(struct trans_entry) + rule_size) + * MAX_TRANSACTION_ENTRIES; + tmp_ctx->tr = rte_zmalloc("trie_transaction_records", tr_sz, + RTE_CACHE_LINE_SIZE); + if (!tmp_ctx->tr) { + RTE_LOG(ERR, DATAPLANE, + "Could not allocate transaction record memory pool for trie %s\n", + tmp_ctx->name); + err = -ENOMEM; + goto error; + } + + *m_ctx = tmp_ctx; + + return 0; + +error: + if (tmp_ctx->acl_ctx) + rte_acl_free(tmp_ctx->acl_ctx); + + if (tmp_ctx->name) + free(tmp_ctx->name); + + if (tmp_ctx) + free(tmp_ctx); + + return err; +} + +/* + * convert big-endian wildcard mask to mask + */ +static inline uint8_t wc_mask_to_mask(const uint8_t *wc_mask, uint8_t len) +{ + uint8_t mask = 0, tmp; + int8_t i, j; + + for (i = len - 1; i >= 0; i--) { + tmp = wc_mask[i]; + + for (j = 0; j < 8; j++) { + if (!(tmp & 0x1)) + break; + mask++; + tmp >>= 1; + } + if (j != 8) + break; + } + return ((len * 8) - mask); +} + +static int npf_rte_acl_record_transaction_entry(npf_match_ctx_t *m_ctx, + enum rule_op rule_op, + const struct rte_acl_rule + *acl_rule, size_t rule_sz) +{ + struct trans_entry *t_entry = NULL; + uintptr_t ptr; + + if (m_ctx->tr_num_entries >= MAX_TRANSACTION_ENTRIES) { + RTE_LOG(ERR, DATAPLANE, + "Number of transaction entries for trie %s exceeded (%u).\n", + m_ctx->name, MAX_TRANSACTION_ENTRIES); + return -ENOMEM; + } + + t_entry = &m_ctx->tr[m_ctx->tr_num_entries++]; + t_entry->rule_op = rule_op; + t_entry->trie = m_ctx; + + ptr = (uintptr_t) t_entry + sizeof(struct trans_entry); + + t_entry->rule = (struct rte_acl_rule *)ptr; + + memcpy(t_entry->rule, acl_rule, rule_sz); + return 0; +} + +/* + * convert IPv4 5 tuple and mask to ACL rule + * The rules are stored in NPF in network byte order. + * However rte_acl expects the rules to be in host byte order. + */ +static void npf_rte_acl_add_v4_rule(const uint8_t *match_addr, uint8_t *mask, + uint32_t rule_no, + struct acl4_rules *v4_rules) +{ + uint16_t val, val_mask; + + memset(v4_rules, 0, sizeof(*v4_rules)); + v4_rules->data.category_mask = 1; + v4_rules->data.priority = rule_no; + v4_rules->data.userdata = rule_no; + + /* + * Protocol id may either be unspecified or a specific value + */ + val = match_addr[NPC_GPR_PROTO_OFF_v4]; + val_mask = mask[NPC_GPR_PROTO_OFF_v4]; + if (val_mask == 0) + val_mask = val; + + v4_rules->field[PROTO_FIELD_IPV4].value.u8 = val; + v4_rules->field[PROTO_FIELD_IPV4].mask_range.u8 = val_mask; + + v4_rules->field[SRC_FIELD_IPV4].value.u32 = + rte_bswap32(*(uint32_t *)&match_addr[NPC_GPR_SADDR_OFF_v4]); + v4_rules->field[SRC_FIELD_IPV4].mask_range.u32 = + wc_mask_to_mask((const uint8_t *)&mask[NPC_GPR_SADDR_OFF_v4], + 4); + + v4_rules->field[DST_FIELD_IPV4].value.u32 = + rte_bswap32(*(uint32_t *)&match_addr[NPC_GPR_DADDR_OFF_v4]); + v4_rules->field[DST_FIELD_IPV4].mask_range.u32 = + wc_mask_to_mask((const uint8_t *)&mask[NPC_GPR_DADDR_OFF_v4], + 4); + + v4_rules->field[SRCP_FIELD_IPV4].value.u16 = + *(uint16_t *)&match_addr[NPC_GPR_SPORT_OFF_v4]; + v4_rules->field[SRCP_FIELD_IPV4].mask_range.u16 = + *(uint16_t *)&mask[NPC_GPR_SPORT_OFF_v4]; + + v4_rules->field[DSTP_FIELD_IPV4].value.u16 = + *(uint16_t *)&match_addr[NPC_GPR_DPORT_OFF_v4]; + v4_rules->field[DSTP_FIELD_IPV4].mask_range.u16 = + *(uint16_t *)&mask[NPC_GPR_DPORT_OFF_v4]; +} + +/* + * convert IPv6 5 tuple and mask to ACL rule + * The rules are stored in NPF in network byte order. + * However rte_acl expects the rules to be in host byte order. + */ +static void npf_rte_acl_add_v6_rule(uint8_t *match_addr, uint8_t *mask, + uint32_t rule_no, + struct acl6_rules *v6_rules) +{ + uint16_t val, val_mask; + uint8_t *v6_addr, *v6_mask; + + memset(v6_rules, 0, sizeof(*v6_rules)); + v6_rules->data.category_mask = 1; + v6_rules->data.priority = rule_no; + v6_rules->data.userdata = rule_no; + + /* + * Protocol id may either be unspecified or a specific value + */ + val = match_addr[NPC_GPR_PROTO_OFF_v6]; + val_mask = mask[NPC_GPR_PROTO_OFF_v6]; + if (val_mask == 0) + val_mask = val; + + v6_rules->field[PROTO_FIELD_IPV6].value.u8 = val; + v6_rules->field[PROTO_FIELD_IPV6].mask_range.u8 = val_mask; + + v6_addr = &match_addr[NPC_GPR_SADDR_OFF_v6]; + v6_mask = &mask[NPC_GPR_SADDR_OFF_v6]; + v6_rules->field[SRC1_FIELD_IPV6].value.u32 = + rte_bswap32(*(uint32_t *)v6_addr); + v6_rules->field[SRC1_FIELD_IPV6].mask_range.u32 = + wc_mask_to_mask(v6_mask, 4); + v6_addr += sizeof(uint32_t); + v6_mask += sizeof(uint32_t); + + v6_rules->field[SRC2_FIELD_IPV6].value.u32 = + rte_bswap32(*(uint32_t *)v6_addr); + v6_rules->field[SRC2_FIELD_IPV6].mask_range.u32 = + wc_mask_to_mask(v6_mask, 4); + v6_addr += sizeof(uint32_t); + v6_mask += sizeof(uint32_t); + + v6_rules->field[SRC3_FIELD_IPV6].value.u32 = + rte_bswap32(*(uint32_t *)v6_addr); + v6_rules->field[SRC3_FIELD_IPV6].mask_range.u32 = + wc_mask_to_mask(v6_mask, 4); + v6_addr += sizeof(uint32_t); + v6_mask += sizeof(uint32_t); + + v6_rules->field[SRC4_FIELD_IPV6].value.u32 = + rte_bswap32(*(uint32_t *)v6_addr); + v6_rules->field[SRC4_FIELD_IPV6].mask_range.u32 = + wc_mask_to_mask(v6_mask, 4); + + v6_addr = &match_addr[NPC_GPR_DADDR_OFF_v6]; + v6_mask = &mask[NPC_GPR_DADDR_OFF_v6]; + + v6_rules->field[DST1_FIELD_IPV6].value.u32 = + rte_bswap32(*(uint32_t *)v6_addr); + v6_rules->field[DST1_FIELD_IPV6].mask_range.u32 = + wc_mask_to_mask(v6_mask, 4); + v6_addr += sizeof(uint32_t); + v6_mask += sizeof(uint32_t); + + v6_rules->field[DST2_FIELD_IPV6].value.u32 = + rte_bswap32(*(uint32_t *)v6_addr); + v6_rules->field[DST2_FIELD_IPV6].mask_range.u32 = + wc_mask_to_mask(v6_mask, 4); + v6_addr += sizeof(uint32_t); + v6_mask += sizeof(uint32_t); + + v6_rules->field[DST3_FIELD_IPV6].value.u32 = + rte_bswap32(*(uint32_t *)v6_addr); + v6_rules->field[DST3_FIELD_IPV6].mask_range.u32 = + wc_mask_to_mask(v6_mask, 4); + v6_addr += sizeof(uint32_t); + v6_mask += sizeof(uint32_t); + + v6_rules->field[DST4_FIELD_IPV6].value.u32 = + rte_bswap32(*(uint32_t *)v6_addr); + v6_rules->field[DST4_FIELD_IPV6].mask_range.u32 = + wc_mask_to_mask(v6_mask, 4); + + v6_rules->field[SRCP_FIELD_IPV6].value.u16 = + *(uint16_t *)&match_addr[NPC_GPR_SPORT_OFF_v6]; + v6_rules->field[SRCP_FIELD_IPV6].mask_range.u16 = + *(uint16_t *)&mask[NPC_GPR_SPORT_OFF_v6]; + + v6_rules->field[DSTP_FIELD_IPV6].value.u16 = + *(uint16_t *)&match_addr[NPC_GPR_DPORT_OFF_v6]; + v6_rules->field[DSTP_FIELD_IPV6].mask_range.u16 = + *(uint16_t *)&mask[NPC_GPR_DPORT_OFF_v6]; +} + +static int _npf_rte_acl_add_rule(int af, npf_match_ctx_t *m_ctx, + const struct rte_acl_rule *acl_rule) +{ + int err; + + err = rte_acl_add_rules(m_ctx->acl_ctx, acl_rule, 1); + if (err) { + RTE_LOG(ERR, DATAPLANE, "Could not add rule for af %d : %d\n", + af, err); + return err; + } + + return 0; +} + +int npf_rte_acl_add_rule(int af, npf_match_ctx_t *m_ctx, uint32_t rule_no, + uint8_t *match_addr, uint8_t *mask, + void *match_ctx __rte_unused) +{ + struct acl4_rules v4_rules; + struct acl6_rules v6_rules; + const struct rte_acl_rule *acl_rule; + int err = 0; + size_t rule_sz; + + if (!m_ctx->tr_in_progress) { + RTE_LOG(ERR, DATAPLANE, + "Could not add rule %u for trie %s: no transaction in progress\n", + rule_no, m_ctx->name); + return -EINVAL; + } + + if (af == AF_INET) { + npf_rte_acl_add_v4_rule(match_addr, mask, rule_no, &v4_rules); + acl_rule = (const struct rte_acl_rule *)&v4_rules; + rule_sz = sizeof(struct acl4_rules); + } else { + npf_rte_acl_add_v6_rule(match_addr, mask, rule_no, &v6_rules); + acl_rule = (const struct rte_acl_rule *)&v6_rules; + rule_sz = sizeof(struct acl6_rules); + } + + err = npf_rte_acl_record_transaction_entry(m_ctx, RULE_OP_ADD, + acl_rule, rule_sz); + if (err) + return err; + + err = _npf_rte_acl_add_rule(af, m_ctx, acl_rule); + if (err < 0) + return err; + + m_ctx->num_rules++; + + return 0; +} + +static int npf_rte_acl_build(int af, npf_match_ctx_t **m_ctx) +{ + struct rte_acl_config cfg = { 0 }; + int err; + npf_match_ctx_t *ctx = *m_ctx; + + if (!ctx) + return -EINVAL; + + if (!ctx->num_rules) + return 0; + + cfg.num_categories = 1; + if (af == AF_INET) { + cfg.num_fields = RTE_DIM(ipv4_defs); + memcpy(cfg.defs, ipv4_defs, sizeof(ipv4_defs)); + } else { + cfg.num_fields = RTE_DIM(ipv6_defs); + memcpy(cfg.defs, ipv6_defs, sizeof(ipv6_defs)); + } + + /* build the runtime structures for added rules, with 2 categories. */ + err = rte_acl_build(ctx->acl_ctx, &cfg); + if (err != 0) { + /* handle error at build runtime structures for ACL context. */ + RTE_LOG(ERR, DATAPLANE, + "Could not build ACL rules for %s : %s\n", + (af == AF_INET ? "ipv4" : "ipv6"), strerror(-err)); + return err; + } + + return 0; +} + +static int +_npf_rte_acl_del_rule(int af, struct npf_match_ctx *m_ctx, + const struct rte_acl_rule *acl_rule) +{ + int err = 0; + + err = rte_acl_del_rule(m_ctx->acl_ctx, acl_rule); + if (err && err != -ENOENT) { + RTE_LOG(ERR, DATAPLANE, + "Could not remove rule for af %d : %d\n", af, err); + return err; + } + + /* Only reduce counter if there was a matching delete */ + if (err != -ENOENT) + m_ctx->num_rules--; + + return err; +} + +int npf_rte_acl_del_rule(int af, npf_match_ctx_t *m_ctx, uint32_t rule_no, + uint8_t *match_addr, uint8_t *mask) +{ + struct acl4_rules v4_rules; + struct acl6_rules v6_rules; + const struct rte_acl_rule *acl_rule; + int err = 0; + size_t rule_sz; + + + if (af == AF_INET) { + npf_rte_acl_add_v4_rule(match_addr, mask, rule_no, &v4_rules); + acl_rule = (const struct rte_acl_rule *)&v4_rules; + rule_sz = sizeof(struct acl4_rules); + } else { + npf_rte_acl_add_v6_rule(match_addr, mask, rule_no, &v6_rules); + acl_rule = (const struct rte_acl_rule *)&v6_rules; + rule_sz = sizeof(struct acl6_rules); + } + + if (!m_ctx->tr_in_progress) { + RTE_LOG(ERR, DATAPLANE, + "Could not delete rule %d from trie %s: no transaction in progress\n", + rule_no, m_ctx->name); + return -EINVAL; + } + + err = npf_rte_acl_record_transaction_entry(m_ctx, RULE_OP_DELETE, + acl_rule, rule_sz); + if (err) + return err; + + return _npf_rte_acl_del_rule(af, m_ctx, acl_rule); +} + +int npf_rte_acl_match(int af, npf_match_ctx_t *m_ctx, + npf_cache_t *npc __rte_unused, + struct npf_match_cb_data *data, + uint32_t *rule_no) +{ + int ret; + uint32_t results = 0; + const uint8_t *pkt_data[1]; + struct rte_mbuf *m = data->mbuf; + uint8_t *nlp; + + if (!m_ctx->num_rules) + return -ENOENT; + + if (af == AF_INET) { + nlp = (uint8_t *)iphdr(m); + nlp = RTE_PTR_ADD(nlp, offsetof(struct ip, ip_p)); + } else { + nlp = (uint8_t *)ip6hdr(m); + nlp = RTE_PTR_ADD(nlp, offsetof(struct rte_ipv6_hdr, proto)); + } + pkt_data[0] = nlp; + + ret = rte_acl_classify(m_ctx->acl_ctx, pkt_data, &results, 1, 1); + if (ret) + return -EINVAL; + + *rule_no = results; + return results ? 0 : -ENOENT; +} + +int npf_rte_acl_start_transaction(int af __unused, npf_match_ctx_t *m_ctx) +{ + if (m_ctx->tr_in_progress) { + RTE_LOG(ERR, DATAPLANE, + "Transaction already in progress for trie %s\n", + m_ctx->name); + return -EINPROGRESS; + } + + m_ctx->tr_in_progress = true; + return 0; +} + +/* Rollsback all operations of the current transaction. + * In the unexpected case that an individual rollback operation + * failed, this method will continue rolling back all other rules. + * + * Return code is smaller zero if at least one rollback action did + * not succeed. + */ +static int npf_rte_acl_rollback_transaction(int af, npf_match_ctx_t *m_ctx) +{ + uint32_t i; + int rc = 0; + + /* Rollbacks are not yet ready for prime-time. + * + * Transaction failures are considered fatal since then. + */ + rte_panic("Fatal error: NPF RTE ACL transaction failed.\n."); + + for (i = 0; i < m_ctx->tr_num_entries; i++) { + struct trans_entry *te = &m_ctx->tr[i]; + + switch (te->rule_op) { + case RULE_OP_ADD: + if (_npf_rte_acl_del_rule(af, te->trie, te->rule) < 0) + rc = -1; + break; + case RULE_OP_DELETE: + if (_npf_rte_acl_add_rule(af, te->trie, te->rule) < 0) + rc = -1; + break; + default: + RTE_LOG(ERR, DATAPLANE, + "Unexpected transaction rule operation (%d) for trie %s\n", + te->rule_op, m_ctx->name); + rc = -1; + break; + } + + if (rc < 0) { + RTE_LOG(ERR, DATAPLANE, + "Failed to rollback rule on trie %s\n", + m_ctx->name); + } + } + + return rc; +} + +int npf_rte_acl_commit_transaction(int af, npf_match_ctx_t *m_ctx) +{ + int rc = 0; + rc = npf_rte_acl_build(af, &m_ctx); + + /* build failed -> rollback transaction */ + if (rc < 0) { + if (npf_rte_acl_rollback_transaction(af, m_ctx) < 0) { + RTE_LOG(ERR, DATAPLANE, + "FATAL: Transaction rollback of trie failed %s\n", + m_ctx->name); + } + } + + m_ctx->tr_num_entries = 0; + m_ctx->tr_in_progress = false; + return rc; +} + +int npf_rte_acl_destroy(int af __rte_unused, npf_match_ctx_t **m_ctx) +{ + npf_match_ctx_t *ctx = *m_ctx; + + if (ctx) { + rte_acl_reset(ctx->acl_ctx); + rte_acl_free(ctx->acl_ctx); + free(ctx->name); + rte_free(ctx->tr); + free(ctx); + *m_ctx = NULL; + } + + return 0; +} + +size_t npf_rte_acl_rule_size(int af) +{ + if (af == AF_INET) + return RTE_ACL_RULE_SZ(RTE_DIM(ipv4_defs)); + + return RTE_ACL_RULE_SZ(RTE_DIM(ipv6_defs)); +} diff --git a/src/npf/npf_rte_acl.h b/src/npf/npf_rte_acl.h new file mode 100644 index 00000000..9ffaeda2 --- /dev/null +++ b/src/npf/npf_rte_acl.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef NPF_RTE_ACL_H +#define NPF_RTE_ACL_H + +#include +#include "npf_cache.h" +#include "npf_match.h" + +int npf_rte_acl_init(int af, const char *name, uint32_t max_rules, + struct rte_mempool *mempool, struct rte_rcu_qsbr *rcu_v, + npf_match_ctx_t **m_ctx); + +int npf_rte_acl_start_transaction(int af, npf_match_ctx_t *m_ctx); + +int npf_rte_acl_commit_transaction(int af, npf_match_ctx_t *m_ctx); + +int npf_rte_acl_add_rule(int af, npf_match_ctx_t *m_ctx, + uint32_t rule_no, + uint8_t *match_add, uint8_t *mask, + void *match_ctx); + +int npf_rte_acl_del_rule(int af, npf_match_ctx_t *m_ctx, uint32_t rule_no, + uint8_t *match_addr, uint8_t *mask); + +int npf_rte_acl_match(int af, npf_match_ctx_t *m_ctx, npf_cache_t *npc, + struct npf_match_cb_data *data, uint32_t *rule_no); + +int npf_rte_acl_destroy(int af, npf_match_ctx_t **m_ctx); + +size_t npf_rte_acl_rule_size(int af); + +#endif diff --git a/src/npf/npf_rule_gen.c b/src/npf/npf_rule_gen.c index 6b89a0c5..1b32667d 100644 --- a/src/npf/npf_rule_gen.c +++ b/src/npf/npf_rule_gen.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -355,9 +355,10 @@ npf_grouper_add_port(struct npf_rule_ctx *ctx, int options, uint16_t match, } static void -npf_grouper_add_icmpv4_type_code(struct npf_rule_ctx *ctx, int type, int code) +npf_grouper_add_icmpv4_type_code(struct npf_rule_ctx *ctx, bool class, + int type, int code) { - if (ctx->grouper_flags & GPR_SET_ICMPV4_TYPE_CODE) { + if (class || (ctx->grouper_flags & GPR_SET_ICMPV4_TYPE_CODE)) { type = -1; code = -1; } @@ -374,9 +375,10 @@ npf_grouper_add_icmpv4_type_code(struct npf_rule_ctx *ctx, int type, int code) } static void -npf_grouper_add_icmpv6_type_code(struct npf_rule_ctx *ctx, int type, int code) +npf_grouper_add_icmpv6_type_code(struct npf_rule_ctx *ctx, bool class, + int type, int code) { - if (ctx->grouper_flags & GPR_SET_ICMPV6_TYPE_CODE) { + if (class || (ctx->grouper_flags & GPR_SET_ICMPV6_TYPE_CODE)) { type = -1; code = -1; } @@ -394,12 +396,12 @@ npf_grouper_add_icmpv6_type_code(struct npf_rule_ctx *ctx, int type, int code) static void npf_grouper_add_icmp_type_code(struct npf_rule_ctx *ctx, int options, - int type, int code) + bool class, int type, int code) { if (options & NC_MATCH_ICMP) - npf_grouper_add_icmpv4_type_code(ctx, type, code); + npf_grouper_add_icmpv4_type_code(ctx, class, type, code); else - npf_grouper_add_icmpv6_type_code(ctx, type, code); + npf_grouper_add_icmpv6_type_code(ctx, class, type, code); } static int @@ -464,7 +466,7 @@ npf_gen_ncode_tcp_flags(nc_ctx_t *nc_ctx, char *tcp_flags) static int npf_gen_ncode_mac_addr(nc_ctx_t *nc_ctx, char *value, int options) { - struct ether_addr ma; + struct rte_ether_addr ma; uint8_t *ab = ma.addr_bytes; if (sscanf(value, "%2hhx:%2hhx:%2hhx:%2hhx:%2hhx:%2hhx", @@ -527,7 +529,7 @@ npf_parse_ip_addr(char *value, sa_family_t *fam, npf_addr_t *addr, return 0; } -static void +void npf_masklen_to_grouper_mask(sa_family_t fam, npf_netmask_t masklen, npf_addr_t *addr_mask) { @@ -742,7 +744,7 @@ npf_gen_ncode_port_group(struct npf_rule_ctx *ctx, char *value, .ctx = ctx, .options = options, .rule_count = 0, - .error = 0. + .error = 0, }; /* @@ -812,7 +814,7 @@ struct npf_icmp_name_table icmpv4_name_table[] = { { "timestamp-reply", 14, -1 }, { "address-mask-request", 17, -1 }, { "address-mask-reply", 18, -1 }, - { NULL } + { NULL, 0, 0 } }; struct npf_icmp_name_table icmpv6_name_table[] = { @@ -841,7 +843,7 @@ struct npf_icmp_name_table icmpv6_name_table[] = { { "redirect", 137, -1 }, { "mobile-prefix-solicitation", 146, -1 }, { "mobile-prefix-advertisement", 147, -1 }, - { NULL } + { NULL, 0, 0 } }; static int @@ -855,8 +857,8 @@ npf_gen_ncode_icmp_name(struct npf_rule_ctx *ctx, char *value, for (entry = table; entry->name; entry++) { if (strcmp(value, entry->name) == 0) { npf_gennc_icmp(ctx->nc_ctx, entry->type, entry->code, - (options & NC_MATCH_ICMP) != 0); - npf_grouper_add_icmp_type_code(ctx, options, + (options & NC_MATCH_ICMP) != 0, false); + npf_grouper_add_icmp_type_code(ctx, options, false, entry->type, entry->code); return 0; @@ -899,8 +901,35 @@ npf_gen_ncode_icmp(struct npf_rule_ctx *ctx, char *value, code = -1; npf_gennc_icmp(ctx->nc_ctx, type, code, - (options & NC_MATCH_ICMP) != 0); - npf_grouper_add_icmp_type_code(ctx, options, type, code); + (options & NC_MATCH_ICMP) != 0, false); + npf_grouper_add_icmp_type_code(ctx, options, false, type, code); + + return 0; +} + +static int +npf_gen_ncode_icmp_class(struct npf_rule_ctx *ctx, char *value, int options) +{ + bool error = false; + + if (strcmp(value, "error") == 0) { + error = true; + } else if (strcmp(value, "info") != 0) { + RTE_LOG(ERR, FIREWALL, "NPF: unexpected value in rule: " + "icmpv%s-class=%s\n", + (options & NC_MATCH_ICMP) ? "4" : "6", value); + return -EINVAL; + } + + npf_gennc_icmp(ctx->nc_ctx, error, 0, + (options & NC_MATCH_ICMP) != 0, true); + + /* + * For IPv4, grouper can not help. + * For IPv6, grouper eventually will help. + */ + npf_grouper_add_icmp_type_code(ctx, options, true, + error ? 0 : ICMP6_INFOMSG_MASK, 0); return 0; } @@ -976,7 +1005,7 @@ npf_gen_ncode_icmp_group(struct npf_rule_ctx *ctx, char *value, .ctx = ctx, .options = options, .rule_count = 0, - .error = 0. + .error = 0, }; enum npf_rule_class group_class = (options & NC_MATCH_ICMP) ? NPF_RULE_CLASS_ICMP_GROUP : NPF_RULE_CLASS_ICMPV6_GROUP; @@ -1095,7 +1124,7 @@ static int npf_gen_ncode_dscp_group(struct npf_rule_ctx *ctx, char *value) { int err; - uint64_t dscp_set; + uint64_t dscp_set = 0UL; err = npf_dscp_group_getmask(value, &dscp_set); if (err) @@ -1123,7 +1152,7 @@ npf_gen_ncode_protocol_list(struct npf_rule_ctx *ctx, char *value) return -EINVAL; } npf_grouper_add_proto(ctx, proto, 0); - npf_gennc_proto(ctx->nc_ctx, proto); + npf_gennc_proto_final(ctx->nc_ctx, proto); } return 0; @@ -1157,7 +1186,7 @@ npf_gen_ncode_protocol_group(struct npf_rule_ctx *ctx, char *value) struct group_cb_info info = { .ctx = ctx, .rule_count = 0, - .error = 0. + .error = 0, }; /* @@ -1231,36 +1260,48 @@ npf_gen_ncode(zhashx_t *config_ht, void **ncode, uint32_t *size, ipv6_route = zhashx_lookup(config_ht, "ipv6-route"); /* - * Handle protocol + * Handle final protocol (in extension chain) */ - value = zhashx_lookup(config_ht, "proto-final"); - if (!value) - value = zhashx_lookup(config_ht, "proto-base"); - if (!value) - value = zhashx_lookup(config_ht, "proto"); + char const *proto_key = "proto-final"; + value = zhashx_lookup(config_ht, proto_key); + if (!value) { + proto_key = "proto"; + value = zhashx_lookup(config_ht, proto_key); + } if (value) { char *endp; unsigned long proto = strtoul(value, &endp, 10); if (endp == value || proto > 255) { RTE_LOG(ERR, FIREWALL, "NPF: unexpected value in rule: " - "proto=%s\n", value); + "%s=%s\n", proto_key, value); err = -EINVAL; goto error; } + npf_grouper_add_proto(&ctx, proto, 0); /* - * Don't generate ncode for IPv6-route, as not an L4 header - * and the protocol is just used to allow ipv6-route matching. + * Protocol check is done in the TCP flags ncode */ - if (!ipv6_route) { - npf_grouper_add_proto(&ctx, proto, 0); - /* - * Protocol check is done in the TCP flags ncode - */ - if (!tcp_flags) - npf_gennc_proto(ctx.nc_ctx, proto); + if (!tcp_flags) + npf_gennc_proto_final(ctx.nc_ctx, proto); + } + + /* + * Handle base protocol in IP header + */ + value = zhashx_lookup(config_ht, "proto-base"); + if (value) { + char *endp; + unsigned long proto_base = strtoul(value, &endp, 10); + if (endp == value || proto_base > 255) { + RTE_LOG(ERR, FIREWALL, "NPF: unexpected value in rule: " + "proto-base=%s\n", value); + err = -EINVAL; + goto error; } + npf_gennc_proto_base(ctx.nc_ctx, proto_base); } + value = zhashx_lookup(config_ht, "protocol-group"); if (value) { err = npf_gen_ncode_protocol_group(&ctx, value); @@ -1407,6 +1448,12 @@ npf_gen_ncode(zhashx_t *config_ht, void **ncode, uint32_t *size, if (err) goto error; } + value = zhashx_lookup(config_ht, "icmpv6-class"); + if (value) { + err = npf_gen_ncode_icmp_class(&ctx, value, NC_MATCH_ICMP6); + if (err) + goto error; + } value = zhashx_lookup(config_ht, "icmpv6-group"); if (value) { err = npf_gen_ncode_icmp_group(&ctx, value, @@ -1584,7 +1631,7 @@ npf_process_nat_ip_masq(uint32_t *flags, uint8_t *addr_sz, npf_addr_t *taddr, *flags |= NPF_NAT_MASQ; *addr_sz = 4; - ifp = ifnet_byifname(attach_point); + ifp = dp_ifnet_byifname(attach_point); if (!ifp) { RTE_LOG(ERR, FIREWALL, "masquerade: interface \"%s\" does " "not exist\n", attach_point); @@ -1762,6 +1809,22 @@ npf_process_nat_config(npf_rule_t *rl, zhashx_t *config_ht) } } + /* + * NAT port allocation. Allowed values are "sequential" and "random". + * However only one flag is used, and this denotes when "sequential" + * is configured. + */ + value = zhashx_lookup(config_ht, "trans-port-alloc"); + if (value) { + if (strcmp(value, "sequential") == 0) + flags |= NPF_NAT_PA_SEQ; + else if (strcmp(value, "random") != 0) { + RTE_LOG(ERR, FIREWALL, "NPF: unexpected value in rule: " + "trans-port-alloc=%s\n", value); + return -EINVAL; + } + } + /* * Handle translation ports. */ @@ -1923,14 +1986,21 @@ npf_get_rule_match_string(zhashx_t *config_ht, char *buf, size_t *used_buf_len, buf_app_printf(buf, used_buf_len, total_buf_len, "family %s ", value); - value = zhashx_lookup(config_ht, "proto-final"); - if (!value) - value = zhashx_lookup(config_ht, "proto-base"); - if (!value) - value = zhashx_lookup(config_ht, "proto"); + value = zhashx_lookup(config_ht, "proto-base"); + if (value) { + buf_app_printf(buf, used_buf_len, total_buf_len, + "proto-base %s ", value); + } + + char const *proto_key = "proto-final"; + value = zhashx_lookup(config_ht, proto_key); + if (!value) { + proto_key = "proto"; + value = zhashx_lookup(config_ht, proto_key); + } if (value) buf_app_printf(buf, used_buf_len, total_buf_len, - "proto %s ", value); + "%s %s ", proto_key, value); value = zhashx_lookup(config_ht, "protocol-group"); if (value) @@ -1945,7 +2015,7 @@ npf_get_rule_match_string(zhashx_t *config_ht, char *buf, size_t *used_buf_len, value = zhashx_lookup(config_ht, "ipv6-route"); if (value) buf_app_printf(buf, used_buf_len, total_buf_len, - "type %s ", value); + "RH-type %s ", value); value = zhashx_lookup(config_ht, "icmpv4"); if (!value) diff --git a/src/npf/npf_rule_gen.h b/src/npf/npf_rule_gen.h index 4e73824e..8240accd 100644 --- a/src/npf/npf_rule_gen.h +++ b/src/npf/npf_rule_gen.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -17,7 +17,7 @@ #include "npf/npf.h" #include "npf/npf_session.h" -#include "src/npf/npf_cache.h" +#include "npf/npf_cache.h" /* Used for building grouper */ struct npf_rule_grouper_info { @@ -35,6 +35,9 @@ void buf_app_printf(char *buf, size_t *used_buf_len, int npf_parse_ip_addr(char *value, sa_family_t *fam, npf_addr_t *addr, npf_netmask_t *masklen, bool *negate); +void npf_masklen_to_grouper_mask(sa_family_t fam, npf_netmask_t masklen, + npf_addr_t *addr_mask); + int npf_gen_ncode(zhashx_t *config_ht, void **ncode, uint32_t *size, bool any_match_rprocs, struct npf_rule_grouper_info *grouper_info); diff --git a/src/npf/npf_ruleset.c b/src/npf/npf_ruleset.c index 65d05441..c36f9b7c 100644 --- a/src/npf/npf_ruleset.c +++ b/src/npf/npf_ruleset.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. */ @@ -80,16 +80,18 @@ #include "util.h" #include "vplane_debug.h" #include "vplane_log.h" +#include "npf_match.h" +#include "../ether.h" struct npf_attpt_item; -#define SHOW_BUF_LEN 8192 +#define NPF_RULE_HASH_MIN 1024 /* smallest hash table size */ +#define NPF_RULE_HASH_MAX 32768 /* largest hash table size + * Pick a suitably large value to + * allow for large IPsec rulesets + */ -/* - * Size of counter per-cpu array based on highest active lcore. - */ -#define NPF_RULE_STATS_SIZE (sizeof(struct npf_rule_stats) * \ - (get_lcore_max() + 1)) +#define SHOW_BUF_LEN 8192 /* For GC of rulesets */ static CDS_LIST_HEAD(ruleset_reap); @@ -124,11 +126,13 @@ struct npf_rule_group { struct cds_list_head rg_entry; /* used in chaining rule groups */ uint8_t rg_dir; /* direction - IN, OUT, or both */ + uint8_t rg_af; /* Addr family (0 for agnostic) */ - g2_config_t *rg_grouper; - g2_config_t *rg_grouper6; + npf_match_ctx_t *match_ctx_v4; + npf_match_ctx_t *match_ctx_v6; struct cds_list_head rg_rules; /* rules in this group */ + struct cds_lfht *rg_rules_ht; /* hash tbl for rules in this group */ /* @@ -143,7 +147,7 @@ struct npf_rule_group { /* Struct containing rule generation and state data. */ struct npf_rule_state { - uint32_t rs_hash; /* used for csync */ + uint32_t rs_hash; npf_rule_group_t *rs_rule_group; char *rs_config_line; zhashx_t *rs_config_ht; /* var=value hash */ @@ -156,6 +160,7 @@ struct npf_rule_state { /* npf_rule definition - read-only data. */ struct npf_rule { struct cds_list_head r_entry; + struct cds_lfht_node r_entry_ht; void *r_ncode; /* pointer to ncode */ npf_natpolicy_t *r_natp; /* nat policy */ struct npf_rule_stats *r_stats; /* rule stats */ @@ -170,14 +175,8 @@ struct npf_rule { uint8_t r_rproc_handle:1; }; -/* Only used for grouper to callback into the processor */ -struct npf_grouper_cb_data { - npf_cache_t *npc; - struct rte_mbuf *mbuf; - const struct ifnet *ifp; - int dir; - npf_session_t *se; -}; +/* Forward reference */ +static void npf_rule_clear_natpolicy(npf_rule_t *rl); npf_ruleset_t * npf_ruleset_create(enum npf_ruleset_type ruleset_type, @@ -201,8 +200,33 @@ npf_ruleset_create(enum npf_ruleset_type ruleset_type, return ruleset; } +static struct npf_rule_stats * +npf_rule_stats_get(struct npf_rule_stats *rl_stats) +{ + rte_atomic64_inc(&(rl_stats[0].refcnt)); + return rl_stats; +} + +static void npf_rule_stats_put(struct npf_rule_stats *rl_stats) +{ + if (rte_atomic64_dec_and_test(&(rl_stats[0].refcnt))) + free(rl_stats); +} + +static struct npf_rule_stats *npf_rule_stats_alloc(void) +{ + /* Allocate stats with highest lcore id as an array indice */ + struct npf_rule_stats *rl_stats = zmalloc_aligned( + sizeof(struct npf_rule_stats) * (get_lcore_max() + 1)); + + if (!rl_stats) + return NULL; + + return npf_rule_stats_get(rl_stats); +} + /* Allocate a rule and its subsystems */ -static npf_rule_t *npf_alloc_rule(void) +static npf_rule_t *npf_alloc_rule(uint32_t ruleset_type_flags) { npf_rule_t *rl; @@ -211,13 +235,15 @@ static npf_rule_t *npf_alloc_rule(void) return NULL; CDS_INIT_LIST_HEAD(&rl->r_entry); + cds_lfht_node_init(&rl->r_entry_ht); rte_atomic32_set(&rl->r_refcnt, 1); - /* Allocate stats w/ highest lcore id as array indice */ - rl->r_stats = zmalloc_aligned(NPF_RULE_STATS_SIZE); - if (!rl->r_stats) - goto bad_stats; + if (!(ruleset_type_flags & NPF_RS_FLAG_NO_STATS)) { + rl->r_stats = npf_rule_stats_alloc(); + if (!rl->r_stats) + goto bad_stats; + } rl->r_state = zmalloc_aligned(sizeof(struct npf_rule_state)); if (!rl->r_state) @@ -233,7 +259,8 @@ static npf_rule_t *npf_alloc_rule(void) bad_rproc: free(rl->r_state); bad_state: - free(rl->r_stats); + if (rl->r_stats) + npf_rule_stats_put(rl->r_stats); bad_stats: free(rl); return NULL; @@ -250,14 +277,15 @@ static void rule_free(npf_rule_t *rl) free((char *)rl->r_state->rs_rproc[i].config_arg); } - if (rl->r_natp) - npf_nat_policy_put(rl->r_natp); + /* Clear r_natp and release reference on NAT policy */ + npf_rule_clear_natpolicy(rl); zhashx_destroy(&rl->r_state->rs_config_ht); free(rl->r_state->rs_config_line); free(rl->r_state->rs_rproc); free(rl->r_state); - free(rl->r_stats); + if (rl->r_stats) + npf_rule_stats_put(rl->r_stats); free(rl->r_ncode); free(rl); } @@ -276,11 +304,18 @@ void npf_rule_put(npf_rule_t *rl) } static void -npf_free_rules(struct cds_list_head *rules) +npf_free_rules(npf_rule_group_t *rg) { npf_rule_t *rl, *tmp_rl; + struct cds_lfht_iter iter; - cds_list_for_each_entry_safe(rl, tmp_rl, rules, r_entry) { + if (rg->rg_rules_ht) { + cds_lfht_for_each_entry(rg->rg_rules_ht, &iter, rl, r_entry_ht) + cds_lfht_del(rg->rg_rules_ht, &rl->r_entry_ht); + cds_lfht_destroy(rg->rg_rules_ht, NULL); + } + + cds_list_for_each_entry_safe(rl, tmp_rl, &rg->rg_rules, r_entry) { /* Completely dissociate rule */ rl->r_state->rs_rule_group = NULL; cds_list_del(&rl->r_entry); @@ -292,14 +327,14 @@ void npf_free_group(npf_rule_group_t *rg) { /* Free the rules in this group */ - npf_free_rules(&rg->rg_rules); + npf_free_rules(rg); /* Remove from the list of groups */ cds_list_del_rcu(&rg->rg_entry); /* Release groupers */ - g2_destroy(&rg->rg_grouper); - g2_destroy(&rg->rg_grouper6); + npf_match_destroy(rg->rg_ruleset->rs_type, AF_INET, &rg->match_ctx_v4); + npf_match_destroy(rg->rg_ruleset->rs_type, AF_INET6, &rg->match_ctx_v6); free(rg->rg_name); free(rg); @@ -467,6 +502,9 @@ static void rule_clear_stats(npf_rule_t *rl) { unsigned int i; + if (!rl->r_stats) + return; + FOREACH_DP_LCORE(i) { rl->r_stats[i].pkts_ct = 0; rl->r_stats[i].bytes_ct = 0; @@ -478,69 +516,66 @@ static void rule_clear_stats(npf_rule_t *rl) void rule_sum_stats(const npf_rule_t *rl, struct npf_rule_stats *rs) { - unsigned int i; + unsigned int i, nprot; memset(rs, '\0', sizeof(struct npf_rule_stats)); + if (!rl->r_stats) + return; + FOREACH_DP_LCORE(i) { rs->bytes_ct += rl->r_stats[i].bytes_ct; rs->pkts_ct += rl->r_stats[i].pkts_ct; - rs->map_ports += rl->r_stats[i].map_ports; + for (nprot = NAT_PROTO_FIRST; nprot < NAT_PROTO_COUNT; + nprot++) { + rs->map_ports[nprot] += rl->r_stats[i].map_ports[nprot]; + } } } -void npf_rule_get_overall_used(npf_rule_t *rl, uint64_t *used, +void npf_rule_get_overall_used(npf_rule_t *rl, uint64_t used[], uint64_t *overall) { struct npf_rule_stats rs; - uint64_t t = 0; - uint64_t u = 0; + int nprot; - *used = 0; - *overall = 0; + *overall = npf_natpolicy_get_map_range(rl->r_natp); rule_sum_stats(rl, &rs); - if (rl->r_natp) { - t = npf_natpolicy_get_map_range(rl->r_natp); + for (nprot = NAT_PROTO_FIRST; nprot < NAT_PROTO_COUNT; nprot++) { /* - * Calculate what we used maintaining the current - * lie for DNAT usage. + * Note for DNAT ports are not taken from a pool, + * so 'used' is not limited by the total. */ - switch (npf_natpolicy_get_type(rl->r_natp)) { - case NPF_NATIN: - u = (rs.map_ports) > t ? t : rs.map_ports; - break; - case NPF_NATOUT: - u = rs.map_ports; - break; - } - - *overall = t; - *used = u; + used[nprot] = rs.map_ports[nprot]; } - } -void npf_rule_update_map_stats(npf_rule_t *rl, int nr_maps, uint32_t map_flags) +void npf_rule_update_map_stats(npf_rule_t *rl, int nr_maps, uint32_t map_flags, + uint8_t ip_prot) { unsigned int id = dp_lcore_id(); int ports = (map_flags & NPF_NAT_MAP_PORT) ? nr_maps : 0; + enum nat_proto nprot = nat_proto_from_ipproto(ip_prot); - if (rl) - rl->r_stats[id].map_ports += ports; + if (rl && rl->r_stats) + rl->r_stats[id].map_ports[nprot] += ports; } -static void rule_copy_stats(npf_rule_t *from, npf_rule_t *to) +static void rule_ref_stats(npf_rule_t *old, npf_rule_t *new) { - unsigned int i; + /* + * Release the statistics block allocated initially for the new + * rule, and instead reference the statistics associated with the + * old rule. + */ + if (new->r_stats) + npf_rule_stats_put(new->r_stats); - FOREACH_DP_LCORE(i) { - to->r_stats[i].pkts_ct += from->r_stats[i].pkts_ct; - to->r_stats[i].bytes_ct += from->r_stats[i].bytes_ct; - to->r_stats[i].map_ports += from->r_stats[i].map_ports; - } + if (old->r_stats) + new->r_stats = npf_rule_stats_get(old->r_stats); } /* @@ -557,50 +592,71 @@ npf_ncode_equal(void *nc1, size_t nc1_size, void *nc2, size_t nc2_size) } /* - * Copy stats from old rule to new rule if the rule is materially unchanged, - * i.e. if the ncode and action are unchanged. + * Reference stats of the old rule by the new rule if the rule is materially + * unchanged, i.e. if the ncode and action are unchanged. */ static void -npf_copy_stats_if_rule_unchanged(npf_rule_t *rl_from, npf_rule_t *rl_to) +npf_ref_stats_if_rule_unchanged(npf_rule_t *rl_old, npf_rule_t *rl_new) { /* Is bytecode different? */ - if (!npf_ncode_equal(rl_from->r_ncode, rl_from->r_nc_size, - rl_to->r_ncode, rl_to->r_nc_size)) + if (!npf_ncode_equal(rl_old->r_ncode, rl_old->r_nc_size, + rl_new->r_ncode, rl_new->r_nc_size)) return; /* Has action changed? */ - if (rl_from->r_pass != rl_to->r_pass) + if (rl_old->r_pass != rl_new->r_pass) return; - /* Rules are deemed unchanged, so copy stats */ - rule_copy_stats(rl_from, rl_to); + /* Rules are deemed unchanged, so reference the stats */ + rule_ref_stats(rl_old, rl_new); +} + +static int npf_rg_rule_match(struct cds_lfht_node *node, const void *key) +{ + const uint32_t *rule_no = key; + npf_rule_t *rl = caa_container_of(node, npf_rule_t, r_entry_ht); + + if (rl->r_state->rs_rule_no == *rule_no) + return 1; + + return 0; } static npf_rule_t * -npf_find_rule(struct cds_list_head *from_rules, npf_rule_t *match) +npf_find_rule(npf_rule_group_t *rg, npf_rule_t *match) { npf_rule_t *rl; + struct cds_lfht_iter iter; + struct cds_lfht_node *node; + + if (rg->rg_rules_ht) { + cds_lfht_lookup(rg->rg_rules_ht, match->r_state->rs_rule_no, + npf_rg_rule_match, &match->r_state->rs_rule_no, + &iter); + node = cds_lfht_iter_get_node(&iter); + return node ? caa_container_of(node, npf_rule_t, r_entry_ht) : + NULL; + } - cds_list_for_each_entry(rl, from_rules, r_entry) { + cds_list_for_each_entry(rl, &rg->rg_rules, r_entry) { if (match->r_state->rs_rule_no == rl->r_state->rs_rule_no) return rl; - else if (match->r_state->rs_rule_no < rl->r_state->rs_rule_no) + if (match->r_state->rs_rule_no < rl->r_state->rs_rule_no) return NULL; } return NULL; } static void -npf_copy_stats_group(npf_rule_group_t *rg_from, - npf_rule_group_t *rg_to) +npf_ref_stats_group(npf_rule_group_t *rg_old, npf_rule_group_t *rg_new) { - npf_rule_t *rl_from, *rl_to; + npf_rule_t *rl_old, *rl_new; - cds_list_for_each_entry(rl_to, &rg_to->rg_rules, r_entry) { - rl_from = npf_find_rule(&rg_from->rg_rules, rl_to); + cds_list_for_each_entry(rl_new, &rg_new->rg_rules, r_entry) { + rl_old = npf_find_rule(rg_old, rl_new); - if (rl_from) - npf_copy_stats_if_rule_unchanged(rl_from, rl_to); + if (rl_old) + npf_ref_stats_if_rule_unchanged(rl_old, rl_new); } } @@ -619,7 +675,7 @@ npf_find_rule_group(struct cds_list_head *from_groups, npf_rule_group_t *match) } /* - * Implements a copy of byte/packet statistics on rule + * References the byte/packet/map_ports statistics on rule * change. A rule is considered changed if the rule number changes * and/or the byte code changes. This leaves the following * behavior as a further enhancement: @@ -634,15 +690,20 @@ npf_find_rule_group(struct cds_list_head *from_groups, npf_rule_group_t *match) * has been validated. */ void -npf_copy_stats(npf_ruleset_t *from, npf_ruleset_t *to) +npf_ref_stats(npf_ruleset_t *old, npf_ruleset_t *new) { - npf_rule_group_t *rg_from, *rg_to; + npf_rule_group_t *rg_old, *rg_new; + uint32_t rs_type_flags; - cds_list_for_each_entry(rg_to, &to->rs_groups, rg_entry) { - rg_from = npf_find_rule_group(&from->rs_groups, rg_to); + rs_type_flags = npf_get_ruleset_type_flags(old->rs_type); + if (rs_type_flags & NPF_RS_FLAG_NO_STATS) + return; + + cds_list_for_each_entry(rg_new, &new->rs_groups, rg_entry) { + rg_old = npf_find_rule_group(&old->rs_groups, rg_new); - if (rg_from) - npf_copy_stats_group(rg_from, rg_to); + if (rg_old) + npf_ref_stats_group(rg_old, rg_new); } } @@ -653,6 +714,12 @@ npf_clear_stats(const npf_ruleset_t *ruleset, enum npf_rule_class group_class, npf_rule_group_t *rg; npf_rule_t *rl; + uint32_t rs_type_flags; + + rs_type_flags = npf_get_ruleset_type_flags(ruleset->rs_type); + if (rs_type_flags & NPF_RS_FLAG_NO_STATS) + return; + cds_list_for_each_entry(rg, &ruleset->rs_groups, rg_entry) { if (group_class == NPF_RULE_CLASS_COUNT || (group_class == rg->rg_class && @@ -670,7 +737,7 @@ npf_clear_stats(const npf_ruleset_t *ruleset, enum npf_rule_class group_class, void npf_add_pkt(npf_rule_t *rl, uint64_t bytes) { - if (rl == NULL) + if (rl == NULL || rl->r_stats == NULL) return; unsigned int core = dp_lcore_id(); @@ -761,7 +828,7 @@ npf_rule_get_ifp(const npf_rule_t *rl) attach_type != NPF_ATTACH_TYPE_INTERFACE) return NULL; - return ifnet_byifname(attach_point); + return dp_ifnet_byifname(attach_point); } static npf_rule_t * @@ -823,8 +890,8 @@ npf_get_rule_by_hash_cb(struct npf_attpt_item *ap, void *ctx) info->rl = npf_get_rule_by_hash_config(npf_conf, info->hash); if (info->rl) return false; /* cause walker to stop */ - else - return true; + + return true; } npf_rule_t * @@ -1023,13 +1090,26 @@ npf_json_rule(npf_rule_t *rl, bool is_nat, json_writer_t *json) } if (rl->r_natp) { - uint64_t total = 0; - uint64_t used = 0; + uint64_t total; + uint64_t used[NAT_PROTO_COUNT]; + enum nat_proto nprot; - npf_rule_get_overall_used(rl, &used, &total); + npf_rule_get_overall_used(rl, used, &total); jsonw_uint_field(json, "total_ts", total); - jsonw_uint_field(json, "used_ts", used); + + jsonw_name(json, "protocols"); + jsonw_start_array(json); + + for (nprot = NAT_PROTO_FIRST; nprot < NAT_PROTO_COUNT; + nprot++) { + jsonw_start_object(json); + jsonw_string_field(json, "protocol", + nat_proto_lc_str(nprot)); + jsonw_uint_field(json, "used_ts", used[nprot]); + jsonw_end_object(json); + } + jsonw_end_array(json); /* protocols */ buf[0] = '\0'; used_buf_len = 0; @@ -1117,6 +1197,7 @@ npf_rule_group_t * npf_rule_group_create(npf_ruleset_t *ruleset, enum npf_rule_class group_class, const char *group, uint8_t dir) { + uint32_t rs_type_flags; npf_rule_group_t *rg = calloc(1, sizeof(npf_rule_group_t)); if (!rg) @@ -1124,21 +1205,40 @@ npf_rule_group_create(npf_ruleset_t *ruleset, enum npf_rule_class group_class, CDS_INIT_LIST_HEAD(&rg->rg_entry); CDS_INIT_LIST_HEAD(&rg->rg_rules); + + rs_type_flags = npf_get_ruleset_type_flags(ruleset->rs_type); + if (rs_type_flags & NPF_RS_FLAG_HASH_TBL) { + rg->rg_rules_ht = cds_lfht_new(NPF_RULE_HASH_MIN, + NPF_RULE_HASH_MIN, + NPF_RULE_HASH_MAX, + CDS_LFHT_AUTO_RESIZE, + NULL); + if (!rg->rg_rules_ht) { + RTE_LOG(ERR, FIREWALL, + "Error: Could not allocate hash table for rules\n"); + goto err; + } + } rg->rg_ruleset = ruleset; rg->rg_dir = dir; rg->rg_class = group_class; - if (group) + if (group) { rg->rg_name = strdup(group); - if (!rg->rg_name) { - free(rg); - return NULL; + if (!rg->rg_name) + goto err; } /* Add group to ruleset, after the groups that are there. */ cds_list_add_tail_rcu(&rg->rg_entry, &ruleset->rs_groups); return rg; + +err: + if (rg->rg_rules_ht) + cds_lfht_destroy(rg->rg_rules_ht, NULL); + free(rg); + return NULL; } static uint32_t @@ -1148,8 +1248,17 @@ npf_rule_hash(npf_rule_t *rl) uint32_t hash = 0; const char *rg_name = rg->rg_name; - if (rg_name) - hash = rte_jhash(rg_name, strlen(rg_name), hash); + if (rg_name) { + /* + * The jhash reads in 4 byte words, so make sure + * that it doesn't read off the end of allocated mem. + */ + char __rg_name[RTE_ALIGN(strlen(rg_name), 4)] + __rte_aligned(sizeof(uint32_t)); + + memcpy(__rg_name, rg_name, strlen(rg_name)); + hash = rte_jhash(__rg_name, strlen(rg_name), hash); + } hash = rte_jhash_3words(rl->r_state->rs_rule_no, rl->r_nc_size, rg->rg_dir, hash); @@ -1272,30 +1381,35 @@ static int npf_add_rule_to_grouper(npf_rule_t *rl) { struct npf_rule_grouper_info *info = &rl->r_state->rs_grouper_info; + enum npf_ruleset_type rs_type = + rl->r_state->rs_rule_group->rg_ruleset->rs_type; + int err; /* * Insert the grouper entries for this rule into the grouper * associated with this group of rules. */ if (info->g_family != AF_INET6) { - if (!g2_create_rule(rl->r_state->rs_rule_group->rg_grouper, - rl->r_state->rs_rule_no, rl)) - return -ENOMEM; - if (!g2_add(rl->r_state->rs_rule_group->rg_grouper, 0, - NPC_GPR_SIZE_v4, info->g_v4_match, info->g_v4_mask)) - return -EINVAL; + err = npf_match_add_rule( + rs_type, AF_INET, + rl->r_state->rs_rule_group->match_ctx_v4, + rl->r_state->rs_rule_no, info->g_v4_match, + info->g_v4_mask, rl); + if (err) + return err; } /* * NAT64 might have a natpolicy, so always add IPv6 rule */ if (info->g_family != AF_INET) { - if (!g2_create_rule(rl->r_state->rs_rule_group->rg_grouper6, - rl->r_state->rs_rule_no, rl)) - return -ENOMEM; - if (!g2_add(rl->r_state->rs_rule_group->rg_grouper6, 0, - NPC_GPR_SIZE_v6, info->g_v6_match, info->g_v6_mask)) - return -EINVAL; + err = npf_match_add_rule( + rs_type, AF_INET6, + rl->r_state->rs_rule_group->match_ctx_v6, + rl->r_state->rs_rule_no, info->g_v6_match, + info->g_v6_mask, rl); + if (err) + return err; } return 0; @@ -1417,13 +1531,64 @@ npf_process_rule_config(npf_rule_t *rl) return 0; } +static zhashx_t *npf_rule_config_ht_init(void) +{ + zhashx_t *config_ht; + + config_ht = zhashx_new(); + if (!config_ht) + return NULL; + + zhashx_set_destructor(config_ht, (zhashx_destructor_fn *)zstr_free); + zhashx_set_duplicator(config_ht, (zhashx_duplicator_fn *)strdup); + + return config_ht; +} + +/* + * ACLs use rule 0 for group attributes. Parse that rule string and store any + * required params in the rule group. + */ +int npf_parse_group_acl_rule(npf_rule_group_t *rg, const char *rule_line) +{ + zhashx_t *tmp_config_ht; + const char *str; + int rc = 0; + + tmp_config_ht = npf_rule_config_ht_init(); + + rc = npf_parse_rule_line(tmp_config_ht, rule_line); + if (rc) + goto end; + + /* family */ + str = zhashx_lookup(tmp_config_ht, "family"); + if (str) { + uint8_t af = 0; + + if (!strcmp(str, "inet")) + af = AF_INET; + else if (!strcmp(str, "inet6")) + af = AF_INET6; + + if (af) + rg->rg_af = af; + } + +end: + zhashx_destroy(&tmp_config_ht); + return rc; +} + int -npf_make_rule(npf_rule_group_t *rg, uint32_t rule_no, const char *rule_line) +npf_make_rule(npf_rule_group_t *rg, uint32_t rule_no, const char *rule_line, + uint32_t ruleset_type_flags) { + struct cds_lfht_node *ret_node = NULL; npf_rule_t *rl; int ret; - rl = npf_alloc_rule(); + rl = npf_alloc_rule(ruleset_type_flags); if (!rl) { RTE_LOG(ERR, FIREWALL, "Error: rule allocation failed\n"); return -ENOMEM; @@ -1436,7 +1601,7 @@ npf_make_rule(npf_rule_group_t *rg, uint32_t rule_no, const char *rule_line) goto error; } - rl->r_state->rs_config_ht = zhashx_new(); + rl->r_state->rs_config_ht = npf_rule_config_ht_init(); if (!rl->r_state->rs_config_ht) { RTE_LOG(ERR, FIREWALL, "Error: rule hash table allocation " "failed\n"); @@ -1444,11 +1609,6 @@ npf_make_rule(npf_rule_group_t *rg, uint32_t rule_no, const char *rule_line) goto error; } - zhashx_set_destructor(rl->r_state->rs_config_ht, - (zhashx_destructor_fn *)zstr_free); - zhashx_set_duplicator(rl->r_state->rs_config_ht, - (zhashx_duplicator_fn *)strdup); - /* * Add a back reference to the group and insert in the rule into * its group. @@ -1463,6 +1623,22 @@ npf_make_rule(npf_rule_group_t *rg, uint32_t rule_no, const char *rule_line) */ rl->r_state->rs_rule_no = rule_no; + /* + * Add rule to hash table (if present) to enable faster lookups + */ + if (rg->rg_rules_ht) { + ret_node = cds_lfht_add_unique(rg->rg_rules_ht, + rl->r_state->rs_rule_no, + npf_rg_rule_match, + &rl->r_state->rs_rule_no, + &rl->r_entry_ht); + + if (ret_node != &rl->r_entry_ht) { + ret = -EEXIST; + goto error; + } + } + ret = npf_parse_rule_line(rl->r_state->rs_config_ht, rule_line); if (ret) { RTE_LOG(ERR, FIREWALL, "Error: parsing rule line: %s - %s\n", @@ -1482,6 +1658,8 @@ npf_make_rule(npf_rule_group_t *rg, uint32_t rule_no, const char *rule_line) return 0; error: cds_list_del(&rl->r_entry); + if (rg->rg_rules_ht && ret_node == &rl->r_entry_ht) + cds_lfht_del(rg->rg_rules_ht, &rl->r_entry_ht); npf_rule_put(rl); return ret; @@ -1572,18 +1750,43 @@ npf_rproc_match(npf_cache_t *npc, struct rte_mbuf *m, const npf_rule_t *rl, return true; } -void -npf_grouper_init(npf_rule_group_t *rg) +int +npf_match_setup(npf_rule_group_t *rg, uint32_t max_rules) { - rg->rg_grouper = g2_init(NPC_GPR_SIZE_v4); - rg->rg_grouper6 = g2_init(NPC_GPR_SIZE_v6); + int err; + enum npf_ruleset_type rs_type = rg->rg_ruleset->rs_type; + + DP_DEBUG(NPF, DEBUG, DATAPLANE, "Creating ruleset of size %d\n", + max_rules); + + err = npf_match_init(rs_type, AF_INET, rg->rg_name, + max_rules, &rg->match_ctx_v4); + if (err) + return err; + + err = npf_match_init(rs_type, AF_INET6, rg->rg_name, + max_rules, &rg->match_ctx_v6); + if (err) { + npf_match_destroy(rs_type, AF_INET, &rg->match_ctx_v4); + return err; + } + + return 0; } void -npf_grouper_optimize(npf_rule_group_t *rg) +npf_match_optimize(npf_rule_group_t *rg) { - g2_optimize(&rg->rg_grouper); - g2_optimize(&rg->rg_grouper6); + int err; + enum npf_ruleset_type rs_type = rg->rg_ruleset->rs_type; + + err = npf_match_build(rs_type, AF_INET, &rg->match_ctx_v4); + if (err) + RTE_LOG(ERR, DATAPLANE, "Could not rebuild IPv4 grouper\n"); + + err = npf_match_build(rs_type, AF_INET6, &rg->match_ctx_v6); + if (err) + RTE_LOG(ERR, DATAPLANE, "Could not rebuild IPv6 grouper\n"); } static ALWAYS_INLINE @@ -1604,7 +1807,7 @@ bool npf_rule_match(npf_cache_t *npc, struct rte_mbuf *nbuf, bool npf_rule_proc(const void *d, const void *r) { - const struct npf_grouper_cb_data *pd = d; + const struct npf_match_cb_data *pd = d; const npf_rule_t *rl = r; return npf_rule_match(pd->npc, pd->mbuf, pd->ifp, pd->dir, pd->se, rl); @@ -1621,11 +1824,12 @@ npf_ruleset_inspect(npf_cache_t *npc, struct rte_mbuf *nbuf, { npf_rule_group_t *rg = NULL; npf_rule_t *rl; + int match; if (unlikely(ruleset == NULL)) return NULL; - struct npf_grouper_cb_data pd = { + struct npf_match_cb_data pd = { .npc = npc, .mbuf = nbuf, .ifp = ifp, @@ -1634,29 +1838,58 @@ npf_ruleset_inspect(npf_cache_t *npc, struct rte_mbuf *nbuf, }; cds_list_for_each_entry_rcu(rg, &ruleset->rs_groups, rg_entry) { + enum npf_ruleset_type rs_type = rg->rg_ruleset->rs_type; + /* Match the direction. */ if ((rg->rg_dir & dir) == 0) continue; - if (likely(npf_iscached(npc, NPC_GROUPER))) { - uint8_t *pkt = (uint8_t *)npc->npc_grouper; + /* + * update rule group in context. The current rule group + * being used is passed in the match context to enable + * easy search for the rule when a match is found + */ + pd.rg = rg; + int af = 0; + void *match_ctx = NULL; + + if (!npc) { + uint16_t et = ethhdr(nbuf)->ether_type; + + if (et == htons(RTE_ETHER_TYPE_IPV4)) { + af = AF_INET; + match_ctx = rg->match_ctx_v4; + } else if (et == htons(RTE_ETHER_TYPE_IPV6)) { + af = AF_INET6; + match_ctx = rg->match_ctx_v6; + } + } else if (likely(npf_iscached(npc, NPC_GROUPER))) { if (likely(npf_iscached(npc, NPC_IP4))) { - if (rg->rg_grouper) { - rl = g2_eval4(rg->rg_grouper, pkt, &pd); - if (rl) - return rl; - continue; - } + af = AF_INET; + match_ctx = rg->match_ctx_v4; } else if (npf_iscached(npc, NPC_IP6)) { - if (rg->rg_grouper6) { - rl = g2_eval6(rg->rg_grouper6, pkt, - &pd); - if (rl) - return rl; - continue; - } + af = AF_INET6; + match_ctx = rg->match_ctx_v6; } + } else { + /* Grouper is not setup for fragments, for example */ + if (likely(npf_iscached(npc, NPC_IP4))) + af = AF_INET; + else if (npf_iscached(npc, NPC_IP6)) + af = AF_INET6; + } + + /* Match the address-family if set. */ + if (rg->rg_af && rg->rg_af != af) + continue; + + if (match_ctx) { + match = npf_match_classify(rs_type, af, match_ctx, + npc, &pd, &rl); + if (match) + return rl; + continue; } /* @@ -1665,6 +1898,8 @@ npf_ruleset_inspect(npf_cache_t *npc, struct rte_mbuf *nbuf, * grouper support - so perform a slow search of the list. */ cds_list_for_each_entry_rcu(rl, &rg->rg_rules, r_entry) { + if (unlikely(!npc)) + break; if (npf_rule_match(npc, nbuf, ifp, dir, se, rl)) return rl; } @@ -1679,8 +1914,8 @@ npf_rule_decision(npf_rule_t *rl) /* Match. Either pass or block */ if (rl->r_pass) return NPF_DECISION_PASS; - else - return NPF_DECISION_BLOCK; + + return NPF_DECISION_BLOCK; } return NPF_DECISION_UNMATCHED; } @@ -1719,8 +1954,27 @@ npf_type_of_ruleset(const npf_ruleset_t *ruleset) return ruleset ? ruleset->rs_type : NPF_RS_TYPE_COUNT; } +/* AF_INET, AF_INET6, or 0 if both or unknown */ +uint8_t npf_ruleset_af(npf_rule_group_t *rg) +{ + return rg->rg_af; +} + +/* + * returns true if the ruleset depends on the NPF cache + * having been populated. Currently the only exception to this is + * IPSec. The implementation should eventually move to a flag + * that expresses the dependency on the cache as opposed to + * specific ruleset types + */ +bool npf_ruleset_uses_cache(const npf_ruleset_t *ruleset) +{ + return (ruleset->rs_type != NPF_RS_IPSEC); +} + /* Update (as needed) all rules for a masquerade addr change */ -void npf_ruleset_update_masquerade(const struct ifnet *ifp, npf_ruleset_t *rs) +void npf_ruleset_update_masquerade(const struct ifnet *ifp, + const npf_ruleset_t *rs) { npf_rule_group_t *rg; npf_rule_t *rl; @@ -1757,11 +2011,28 @@ void npf_ruleset_update_masquerade(const struct ifnet *ifp, npf_ruleset_t *rs) } } +/* + * Set NAT policy in rule, and take reference on NAT policy + */ void npf_rule_set_natpolicy(npf_rule_t *rl, npf_natpolicy_t *np) { + /* Take reference on NAT policy */ + np = npf_nat_policy_get(np); + rcu_xchg_pointer(&rl->r_natp, np); } +/* + * Clear NAT policy in rule, and release reference on NAT policy + */ +static void npf_rule_clear_natpolicy(npf_rule_t *rl) +{ + npf_natpolicy_t *np = rcu_xchg_pointer(&rl->r_natp, NULL); + + if (np) + npf_nat_policy_put(np); +} + npf_natpolicy_t *npf_rule_get_natpolicy(const npf_rule_t *rl) { return rcu_dereference(rl->r_natp); @@ -1841,3 +2112,27 @@ npf_rulenc_dump(const npf_rule_t *rl) printf("-> %s\n", rl->r_pass ? "pass" : "block"); } #endif + +npf_rule_t *npf_rule_group_find_rule(npf_rule_group_t *rg, + uint32_t rule_no) +{ + npf_rule_t *rl; + struct cds_lfht_node *node; + struct cds_lfht_iter iter; + + if (rg->rg_rules_ht) { + cds_lfht_lookup(rg->rg_rules_ht, rule_no, npf_rg_rule_match, + &rule_no, &iter); + node = cds_lfht_iter_get_node(&iter); + rl = node ? caa_container_of(node, npf_rule_t, r_entry_ht) : + NULL; + return rl; + } + + cds_list_for_each_entry(rl, &rg->rg_rules, r_entry) { + if (rule_no == rl->r_state->rs_rule_no) + return rl; + } + + return NULL; +} diff --git a/src/npf/npf_ruleset.h b/src/npf/npf_ruleset.h index 748393ea..d7554393 100644 --- a/src/npf/npf_ruleset.h +++ b/src/npf/npf_ruleset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. */ @@ -52,7 +52,8 @@ typedef uint16_t rule_no_t; #include "npf/config/npf_rule_group.h" #include "npf/config/npf_ruleset_type.h" #include "npf/npf.h" -#include "pktmbuf.h" +#include "npf/nat/nat_proto.h" +#include "pktmbuf_internal.h" /* Forward Declarations */ struct ifnet; @@ -71,13 +72,21 @@ typedef struct { uint8_t _unused : 2; } npf_rproc_result_t; +/* + * This structures primary use is in a per-core array, and so it is aligned + * to 64-byte boundary to ensure that different cores access different cache + * lines. + */ struct npf_rule_stats { uint64_t pkts_ct; uint64_t bytes_ct; - uint64_t map_ports; /* NAT mapped ports stats */ - uint64_t pad[5]; + uint64_t map_ports[NAT_PROTO_COUNT]; /* NAT mapped ports stats */ + rte_atomic64_t refcnt; /* only refcnt of index 0 is used */ + uint64_t pad[2]; }; +static_assert(sizeof(struct npf_rule_stats) == 64, "not size of cache line"); + /** * Used to select rulesets on attachment points to perform actions on, * such as showing them, clearing statistics, dumping generation @@ -109,12 +118,13 @@ void npf_ruleset_gc_init(void); npf_ruleset_t *npf_ruleset_create(enum npf_ruleset_type ruleset_type, enum npf_attach_type attach_type, const char *attach_point); -void npf_ruleset_update_masquerade(const struct ifnet *ifp, npf_ruleset_t *rs); +void npf_ruleset_update_masquerade(const struct ifnet *ifp, + const npf_ruleset_t *rs); void npf_rule_set_natpolicy(npf_rule_t *rl, npf_natpolicy_t *np); npf_natpolicy_t *npf_rule_get_natpolicy(const npf_rule_t *rl); void npf_free_group(npf_rule_group_t *rg); -void npf_ruleset_free(npf_ruleset_t *ruleset); -void npf_copy_stats(npf_ruleset_t *from, npf_ruleset_t *to); +void npf_ruleset_free(npf_ruleset_t *rs); +void npf_ref_stats(npf_ruleset_t *old, npf_ruleset_t *new); void npf_clear_stats(const npf_ruleset_t *ruleset, enum npf_rule_class group_class, const char *group_name, rule_no_t rule_no); @@ -122,7 +132,8 @@ npf_rule_t *npf_rule_get(npf_rule_t *rl); void npf_rule_put(npf_rule_t *rl); void npf_add_pkt(npf_rule_t *rl, uint64_t bytes); const void *npf_get_ncode(const npf_rule_t *rl); -void npf_rule_update_map_stats(npf_rule_t *rl, int n, uint32_t flags); +void npf_rule_update_map_stats(npf_rule_t *rl, int n, uint32_t flags, + uint8_t ip_prot); void npf_rule_get_overall_used(npf_rule_t *rl, uint64_t *used, uint64_t *overall); rule_no_t npf_rule_get_num(npf_rule_t *rl); @@ -140,8 +151,9 @@ int npf_json_ruleset(const npf_ruleset_t *ruleset, json_writer_t *json); npf_rule_group_t *npf_rule_group_create(npf_ruleset_t *ruleset, enum npf_rule_class group_class, const char *group, uint8_t dir); +int npf_parse_group_acl_rule(npf_rule_group_t *rg, const char *rule_line); int npf_make_rule(npf_rule_group_t *rg, uint32_t rule_no, - const char *rule_line); + const char *rule_line, uint32_t ruleset_type_flags); void *npf_rule_rproc_handle_for_logger(npf_rule_t *rl); bool npf_rule_has_rproc_actions(npf_rule_t *rl); bool npf_rule_has_rproc_logger(npf_rule_t *rl); @@ -150,8 +162,8 @@ bool npf_rproc_action(npf_cache_t *npc, struct rte_mbuf **nbuf, npf_session_t *se, npf_rproc_result_t *result); bool npf_rproc_match(npf_cache_t *npc, struct rte_mbuf *m, const npf_rule_t *rl, const struct ifnet *ifp, int dir, npf_session_t *se); -void npf_grouper_init(npf_rule_group_t *rg); -void npf_grouper_optimize(npf_rule_group_t *rg); +int npf_match_setup(npf_rule_group_t *rg, uint32_t max_rules); +void npf_match_optimize(npf_rule_group_t *rg); bool npf_rule_proc(const void *d, const void *r); npf_rule_t *npf_ruleset_inspect(npf_cache_t *npc, struct rte_mbuf *nbuf, const npf_ruleset_t *ruleset, @@ -163,8 +175,10 @@ void npf_ruleset_set_stateful(npf_rule_group_t *rg, bool value); bool npf_ruleset_is_stateful(const npf_ruleset_t *ruleset); bool npf_rule_stateful(const npf_rule_t *rl); enum npf_ruleset_type npf_type_of_ruleset(const npf_ruleset_t *ruleset); +uint8_t npf_ruleset_af(npf_rule_group_t *rg); const char *npf_ruleset_get_name(npf_rule_group_t *rg); +bool npf_ruleset_uses_cache(const npf_ruleset_t *ruleset); /* * Walk all ruleset groups in a ruleset config @@ -192,4 +206,14 @@ void npf_rulenc_dump(const npf_rule_t *rl); #endif int npf_flush_rulesets(void); +/* + * Find a rule matching the rule number. + * + * Used by clients of rte-acl since there is no + * facility to store and directly return the pointer + * to the rule (as is done with grouper) + */ +npf_rule_t *npf_rule_group_find_rule(npf_rule_group_t *rg, + uint32_t rule_no); + #endif /* NPF_RULESET_H */ diff --git a/src/npf/npf_session.c b/src/npf/npf_session.c index de8f2029..aa9400e5 100644 --- a/src/npf/npf_session.c +++ b/src/npf/npf_session.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. */ @@ -64,10 +64,10 @@ #include "if_var.h" #include "json_writer.h" #include "npf/npf.h" -#include "npf/alg/npf_alg_public.h" +#include "npf/alg/alg_npf.h" #include "npf/config/npf_config.h" #include "npf/config/npf_ruleset_type.h" -#include "npf/dpi/dpi.h" +#include "npf/dpi/dpi_internal.h" #include "npf/rproc/npf_rproc.h" #include "npf/rproc/npf_ext_session_limit.h" #include "npf/npf_dataplane_session.h" @@ -75,6 +75,8 @@ #include "npf/npf_if.h" #include "npf/npf_nat.h" #include "npf/npf_nat64.h" +#include "npf/npf_pack.h" +#include "npf/npf_rc.h" #include "npf/npf_ruleset.h" #include "npf/npf_session.h" #include "npf/npf_state.h" @@ -82,7 +84,8 @@ #include "npf/npf_cache.h" #include "npf/npf_rule_gen.h" #include "npf_shim.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" +#include "session/session_watch.h" #include "urcu.h" #include "vplane_log.h" @@ -110,9 +113,14 @@ struct npf_session { /* --- cacheline 2 boundary (128 bytes) --- */ struct npf_session *s_parent; /* NULL if this == parent */ uint8_t s_proto; - uint8_t s_proto_idx; + enum npf_proto_idx s_proto_idx; }; +static_assert(offsetof(struct npf_session, s_nat) == 64, + "first cache line exceeded"); +static_assert(offsetof(struct npf_session, s_parent) == 128, + "second cache line exceeded"); + /* * Session flags: * - PFIL_IN and PFIL_OUT values are reserved for direction. @@ -121,6 +129,7 @@ struct npf_session { * - SE_EXPIRE: explicitly expire the session. * - SE_GC_PASS_TWO: in the 2nd pass of the GC process * - SE_SECONDARY: an ALG created secondary flow + * - SE_LOCAL_ZONE_NAT: Indicates NAT session for local traffic * - SE_IF_DISABLED: The interface associated with this session was disabled */ #define SE_ACTIVE 0x004 @@ -128,6 +137,7 @@ struct npf_session { #define SE_EXPIRE 0x010 #define SE_GC_PASS_TWO 0x020 #define SE_SECONDARY 0x040 +#define SE_LOCAL_ZONE_NAT 0x080 #define SE_IF_DISABLED 0x100 #define SE_NAT_PINHOLE 0x200 @@ -136,56 +146,29 @@ struct npf_session { */ static uint64_t npf_log_flag; -#define NPF_SET_SESSION_LOG_FLAG(p, f) (npf_log_flag |= (1ull << ((p<<4) + f))) -#define NPF_CLR_SESSION_LOG_FLAG(p, f) (npf_log_flag &= ~(1ull << ((p<<4) + f))) -#define NPF_TST_SESSION_LOG_FLAG(p, f) (npf_log_flag & (1ull << ((p<<4) + f))) -#define NPF_SESSION_LOG_MASK(p) (0x000000000000ffffull << (p<<4)) - /* Forward reference */ static void sess_clear_nat64_peer(npf_session_t *se); -/* - * Get the dataplane session ID given an npf session. If se or se->s_session - * are NULL then 0 is returned. - */ -uint64_t npf_session_get_id(struct npf_session *se) +static void npf_sess_log_flag_set(enum npf_proto_idx proto_idx, uint8_t state) { - if (se) - return session_get_id(se->s_session); - return 0; + uint8_t shift = (proto_idx << 4) + state; + npf_log_flag |= (1ull << shift); } -/* - * Get session timeout value. - * - * The state-dependent timeout value is overridden with a custom timeout if: - * - * a) the session tuple matched a configured custom timeout at the time - * the session was created, and - * b) the session state is steady (i.e. is in 'established' state). - */ -static int -npf_session_get_timeout(const npf_session_t *se) +static void npf_sess_log_flag_clr(enum npf_proto_idx proto_idx, uint8_t state) { - return npf_timeout_get(&se->s_state, se->s_proto_idx, - se->s_session->se_custom_timeout); -} - -static inline bool npf_test_session_log_proto(uint8_t proto_idx) -{ - assert(NPF_PROTO_IDX_TCP == 0); - assert(NPF_PROTO_IDX_UDP == 1); - assert(NPF_PROTO_IDX_ICMP == 2); - assert(NPF_PROTO_IDX_OTHER == 3); - assert(NPF_TCPS_LAST < 16); - - return (npf_log_flag & NPF_SESSION_LOG_MASK(proto_idx)) != 0; + uint8_t shift = (proto_idx << 4) + state; + npf_log_flag &= ~(1ull << shift); } +/* + * Is a specific session state log flag set for the given protocol and state? + */ static inline bool -npf_test_session_log_flag(uint8_t state, uint8_t proto_idx) +npf_sess_log_flag_tst(enum npf_proto_idx proto_idx, uint8_t state) { - return NPF_TST_SESSION_LOG_FLAG(proto_idx, state) != 0; + uint8_t shift = (proto_idx << 4) + state; + return (npf_log_flag & (1ull << shift)) != 0ull; } /* @@ -196,21 +179,27 @@ void npf_reset_session_log(void) npf_log_flag = 0; } -static void __cold_func -npf_session_log(npf_session_t *se, uint8_t state) +/* + * Get the dataplane session ID given an npf session. If se or se->s_session + * are NULL then 0 is returned. + */ +uint64_t npf_session_get_id(struct npf_session *se) { - /* return immediately if the flag is not set */ - if (!npf_test_session_log_flag(state, se->s_proto_idx)) - return; + return se ? session_get_id(se->s_session) : 0ull; +} +static void __cold_func +npf_session_log(npf_session_t *se, const char *state_name, uint32_t timeout, + uint8_t proto, const char *proto_name) +{ /* Cannot log unactivated sessions */ - if (!se->s_session) + if (unlikely(!se->s_session)) return; struct sentry *sen = rcu_dereference(se->s_session->se_sen); /* Racing with session expiration */ - if (!sen) + if (unlikely(!sen)) return; const void *saddr; @@ -222,16 +211,9 @@ npf_session_log(npf_session_t *se, uint8_t state) char srcip_str[INET6_ADDRSTRLEN]; char dstip_str[INET6_ADDRSTRLEN]; char dpi_info_str[MAX_DPI_LOG_SIZE]; - uint8_t proto = se->s_proto; - int timeout = npf_session_get_timeout(se); - const char *state_name = - npf_state_get_state_name(state, se->s_proto_idx); - const char *proto_name = - npf_get_protocol_name_from_idx(se->s_proto_idx); session_sentry_extract(sen, &if_index, &af, &saddr, &sid, &daddr, &did); - inet_ntop(af, saddr, srcip_str, sizeof(srcip_str)); inet_ntop(af, daddr, dstip_str, sizeof(dstip_str)); @@ -251,6 +233,44 @@ npf_session_log(npf_session_t *se, uint8_t state) dpi_info_str); } +static inline void +npf_session_tcp_log(npf_session_t *se, enum tcp_session_state state) +{ + uint32_t timeout; + npf_state_t *nst = &se->s_state; + + /* return immediately if the flag is not set */ + if (likely(!npf_sess_log_flag_tst(NPF_PROTO_IDX_TCP, (uint8_t)state))) + return; + + const char *state_name = npf_state_get_tcp_name(state); + + timeout = npf_tcp_timeout_get(nst, state, + se->s_session->se_custom_timeout); + + npf_session_log(se, state_name, timeout, IPPROTO_TCP, "tcp"); +} + +static inline void +npf_session_gen_log(npf_session_t *se, enum dp_session_state state, + uint8_t proto_idx) +{ + uint32_t timeout; + npf_state_t *nst = &se->s_state; + + /* return immediately if the flag is not set */ + if (likely(!npf_sess_log_flag_tst(proto_idx, (uint8_t)state))) + return; + + const char *state_name = dp_session_state_name(state, true); + const char *proto_name = npf_get_protocol_name_from_idx(proto_idx); + + timeout = npf_gen_timeout_get(nst, state, proto_idx, + se->s_session->se_custom_timeout); + + npf_session_log(se, state_name, timeout, se->s_proto, proto_name); +} + /* * Log nat64 and nat46 sessions * 1. After egress session is created, or @@ -278,6 +298,7 @@ npf_session_nat64_log(npf_session_t *se, bool created) int l = 0, sz = sizeof(msg); char srcip_str[INET6_ADDRSTRLEN]; char dstip_str[INET6_ADDRSTRLEN]; + struct sentry *peer_sen; const void *saddr; const void *daddr; uint32_t if_index; @@ -289,9 +310,9 @@ npf_session_nat64_log(npf_session_t *se, bool created) created ? "created":"closed"); /* Ingress session */ - sen = peer ? rcu_dereference(peer->s_session->se_sen) : NULL; - if (created && sen) { - session_sentry_extract(sen, &if_index, &af, &saddr, &sid, + peer_sen = peer ? rcu_dereference(peer->s_session->se_sen) : NULL; + if (created && peer_sen) { + session_sentry_extract(peer_sen, &if_index, &af, &saddr, &sid, &daddr, &did); inet_ntop(af, saddr, srcip_str, sizeof(srcip_str)); inet_ntop(af, daddr, dstip_str, sizeof(dstip_str)); @@ -303,40 +324,37 @@ npf_session_nat64_log(npf_session_t *se, bool created) } /* Only (or Egress) session */ - sen = se ? rcu_dereference(se->s_session->se_sen) : NULL; - if (sen) { - session_sentry_extract(sen, &if_index, &af, &saddr, &sid, - &daddr, &did); - inet_ntop(af, saddr, srcip_str, sizeof(srcip_str)); - inet_ntop(af, daddr, dstip_str, sizeof(dstip_str)); + session_sentry_extract(sen, &if_index, &af, &saddr, &sid, + &daddr, &did); + inet_ntop(af, saddr, srcip_str, sizeof(srcip_str)); + inet_ntop(af, daddr, dstip_str, sizeof(dstip_str)); - l += snprintf(msg+l, sz-l, "%s[%lu] %s/%u->%s/%u %s", - (peer && created) ? ", ":" ", - se->s_session->se_id, - srcip_str, ntohs(sid), dstip_str, ntohs(did), - ifnet_indextoname_safe(if_index)); + l += snprintf(msg+l, sz-l, "%s[%lu] %s/%u->%s/%u %s", + (peer && created) ? ", ":" ", + se->s_session->se_id, + srcip_str, ntohs(sid), dstip_str, ntohs(did), + ifnet_indextoname_safe(if_index)); - const char *proto_name; - const char *ruleset_name; - rule_no_t rule_number = 0; - npf_rule_t *rl; + const char *proto_name; + const char *ruleset_name; + rule_no_t rule_number = 0; + npf_rule_t *rl; - proto_name = npf_get_protocol_name_from_idx(se->s_proto_idx); - rl = npf_nat64_get_rule(se->s_nat64); - ruleset_name = npf_rule_get_name(rl); - if (rl) - rule_number = npf_rule_get_num(rl); + proto_name = npf_get_protocol_name_from_idx(se->s_proto_idx); + rl = npf_nat64_get_rule(se->s_nat64); + ruleset_name = npf_rule_get_name(rl); + if (rl) + rule_number = npf_rule_get_num(rl); - l += snprintf(msg+l, sz-l, ", %s", proto_name); + l += snprintf(msg+l, sz-l, ", %s", proto_name); - if (ruleset_name) - l += snprintf(msg+l, sz-l, " %s/%u", - ruleset_name, rule_number); + if (ruleset_name) + l += snprintf(msg+l, sz-l, " %s/%u", + ruleset_name, rule_number); - if (!created) - snprintf(msg+l, sz-l, " [%lu]", - peer ? peer->s_session->se_id : 0); - } + if (!created) + snprintf(msg+l, sz-l, " [%lu]", + peer ? peer->s_session->se_id : 0); if (npf_nat64_session_is_nat64(se)) RTE_LOG(NOTICE, NAT64, "%s\n", msg); @@ -365,6 +383,12 @@ void npf_session_add_fw_rule(npf_session_t *s, npf_rule_t *r) } } +bool npf_session_is_fw(npf_session_t *s) +{ + return s && (s->s_flags & SE_PASS) != 0; +} + + /* Set the expire flag and contact ALG framework */ static void sess_set_expired(npf_session_t *se) { @@ -381,6 +405,17 @@ static void sess_set_expired(npf_session_t *se) } } +void npf_session_set_local_zone_nat(npf_session_t *se) +{ + if (se && !(se->s_flags & SE_LOCAL_ZONE_NAT)) + se->s_flags |= SE_LOCAL_ZONE_NAT; +} + +bool npf_session_is_local_zone_nat(npf_session_t *se) +{ + return se && (se->s_flags & SE_LOCAL_ZONE_NAT); +} + /* Clear parent */ static void sess_clear_parent(npf_session_t *se) { @@ -390,8 +425,13 @@ static void sess_clear_parent(npf_session_t *se) /* Closes a session, which will result in it being marked as expired. */ static void sess_close(npf_session_t *se) { - npf_state_set_closed_state(&se->s_state, - (se->s_flags & SE_ACTIVE), se->s_proto_idx); + if (se->s_proto_idx == NPF_PROTO_IDX_TCP) + npf_state_set_tcp_closed(&se->s_state, se, + (se->s_flags & SE_ACTIVE)); + else + npf_state_set_gen_closed(&se->s_state, se, + (se->s_flags & SE_ACTIVE), + se->s_proto_idx); } void npf_session_set_appfw_decision(npf_session_t *se, npf_decision_t decision) @@ -587,42 +627,99 @@ static void npf_session_add_rproc_rule(npf_session_t *s, npf_rule_t *r) /* * Update initial dataplane state/timeout */ -void npf_session_update_state(npf_session_t *se) +void npf_session_update_state(npf_session_t *se, struct session *s) +{ + if (se->s_proto_idx == NPF_PROTO_IDX_TCP) + npf_state_update_tcp_session(s, &se->s_state); + else + npf_state_update_gen_session(s, se->s_proto_idx, &se->s_state); +} + +/* + * Calls session watch hook if needed + */ +static inline void npf_session_do_watch(npf_session_t *se, + enum dp_session_hook hook) { - npf_state_update_session_state(se->s_session, se->s_proto_idx, - &se->s_state); + if (!is_watch_on()) + return; + + if (se->s_session) + session_do_watch(se->s_session, hook); } /* - * Callback from npf_state.c after a session changes state. + * Callback from npf_state.c after a UDP, ICMP etc. session changes state. */ -void -npf_session_state_change(npf_state_t *nst, uint8_t old_state, - uint8_t state, uint8_t proto_idx) +void npf_session_gen_state_change(npf_session_t *se, npf_state_t *nst, + enum dp_session_state old_state, + enum dp_session_state new_state, + enum npf_proto_idx proto_idx) { - npf_session_t *se = caa_container_of(nst, npf_session_t, s_state); - npf_rule_t *rproc_rl; + /* session logging */ + npf_session_gen_log(se, new_state, proto_idx); + + /* Update the dataplane session state/timeout */ + npf_state_update_gen_session(se->s_session, proto_idx, nst); + + /* Call session limit rproc if state has changed */ + if (new_state != old_state) { + npf_rule_t *rproc_rl; + void *handle; + + rproc_rl = npf_session_get_rproc_rule(se); + handle = npf_rule_rproc_handle_from_id(rproc_rl, + NPF_RPROC_ID_SLIMIT); + + if (handle) + npf_sess_limit_state_change(handle, old_state, + new_state); + } + + if (new_state == SESSION_STATE_CLOSED) + sess_set_expired(se); + + npf_session_do_watch(se, SESSION_STATE_CHANGE); +} - /* session logging if enabled */ - if (npf_test_session_log_proto(proto_idx)) - npf_session_log(se, state); +/* + * Callback from npf_state.c after a TCP session changes state. + */ +void npf_session_tcp_state_change(npf_session_t *se, npf_state_t *nst, + enum tcp_session_state old_state, + enum tcp_session_state new_state) +{ + /* session logging */ + npf_session_tcp_log(se, new_state); /* Update the dataplane session state/timeout */ - npf_state_update_session_state(se->s_session, se->s_proto_idx, - &se->s_state); + npf_state_update_tcp_session(se->s_session, nst); + + /* Call session limit rproc if state has changed */ + if (new_state != old_state) { + npf_rule_t *rproc_rl; + void *handle; + + rproc_rl = npf_session_get_rproc_rule(se); + handle = npf_rule_rproc_handle_from_id(rproc_rl, + NPF_RPROC_ID_SLIMIT); - /* Session rproc */ - rproc_rl = npf_session_get_rproc_rule(se); + if (handle) { + enum dp_session_state old_gen_st, new_gen_st; - void *handle = npf_rule_rproc_handle_from_id(rproc_rl, - NPF_RPROC_ID_SLIMIT); - if (handle && state != old_state) - npf_sess_limit_state_change(handle, proto_idx, - old_state, state); + old_gen_st = npf_state_tcp2gen(old_state); + new_gen_st = npf_state_tcp2gen(new_state); - if (npf_state_get_generic_state(proto_idx, state) == - NPF_ANY_SESSION_CLOSED) + if (old_gen_st != new_gen_st) + npf_sess_limit_state_change(handle, old_gen_st, + new_gen_st); + } + } + + if (new_state == NPF_TCPS_CLOSED) sess_set_expired(se); + + npf_session_do_watch(se, SESSION_STATE_CHANGE); } /* @@ -670,6 +767,7 @@ npf_session_inspect(npf_cache_t *npc, struct rte_mbuf *nbuf, return NULL; bool sforw = false; + int rc; /* Try to find an existing session */ se = npf_session_find(nbuf, di, ifp, &sforw, internal_hairpin); @@ -682,16 +780,18 @@ npf_session_inspect(npf_cache_t *npc, struct rte_mbuf *nbuf, return se; /* Update the state of a session based on the supplied packet */ - if (unlikely(!npf_state_inspect(npc, nbuf, &se->s_state, sforw))) { + rc = npf_state_inspect(npc, nbuf, se, &se->s_state, + se->s_proto_idx, sforw); + if (unlikely(rc < 0)) { /* Silently block invalid packets. */ - *error = -ENETUNREACH; + *error = rc; return NULL; } /* Give the session packet hook a chance to see and drop it */ if (se->s_hook) { if (!se->s_hook(se, npc, nbuf, di)) { - *error = -ENETUNREACH; + *error = -NPF_RC_SESS_HOOK; return NULL; } } @@ -705,6 +805,8 @@ npf_session_inspect(npf_cache_t *npc, struct rte_mbuf *nbuf, * If this is not an ICMP error packet, and we fail to find one, * then we look to see if we should create a 'parent' tuple based * session, returning that if we do. + * + * Note that if '*error' is set < 0 then the packet is dropped. */ npf_session_t * npf_session_inspect_or_create(npf_cache_t *npc, struct rte_mbuf *nbuf, @@ -720,7 +822,8 @@ npf_session_inspect_or_create(npf_cache_t *npc, struct rte_mbuf *nbuf, se = npf_session_inspect(npc, nbuf, ifp, di, error, internal_hairpin); if (se) { /* - * Allow through packets matching sessions for: + * ZBF skip processing tries to approximate IBF behaviour. + * So this allows through packets matching sessions for: * 1) Stateful firewall rules * 2) ALG enabled secondary flows * 3) Reverse NAT traffic @@ -735,6 +838,7 @@ npf_session_inspect_or_create(npf_cache_t *npc, struct rte_mbuf *nbuf, /* this will potentially create a tuple based session */ if (!*error) { se = npf_alg_session(npc, nbuf, ifp, di, error); + if (se) { npc->npc_proto_idx = se->s_proto_idx; *npf_flag |= NPF_FLAG_IN_SESSION; @@ -764,7 +868,7 @@ icmp_err_session: __cold_label; * If any of the checks fail then the cache is invalidated, and NULL returned. */ static npf_session_t * -npf_session_find_valid_cached(struct rte_mbuf *mbuf, bool check_if_dir, +npf_session_find_valid_cached(struct rte_mbuf *mbuf, const struct ifnet *ifp, int dir) { npf_session_t *se = NULL; @@ -772,7 +876,7 @@ npf_session_find_valid_cached(struct rte_mbuf *mbuf, bool check_if_dir, if (pktmbuf_mdata_exists(mbuf, PKT_MDATA_SESSION)) { struct pktmbuf_mdata *mdata = pktmbuf_mdata(mbuf); se = mdata->md_session; - if ((se->s_flags & SE_EXPIRE) || (check_if_dir && + if ((se->s_flags & SE_EXPIRE) || (ifp && (npf_session_get_if_index(se) != ifp->if_index || !npf_session_forward_dir(se, dir)))) { pktmbuf_mdata_clear(mbuf, PKT_MDATA_SESSION); @@ -790,7 +894,7 @@ npf_session_find_valid_cached(struct rte_mbuf *mbuf, bool check_if_dir, npf_session_t * npf_session_find_cached(struct rte_mbuf *mbuf) { - return npf_session_find_valid_cached(mbuf, false, NULL, 0); + return npf_session_find_valid_cached(mbuf, NULL, 0); } /* @@ -802,7 +906,7 @@ npf_session_t * npf_session_find_or_create(npf_cache_t *npc, struct rte_mbuf *mbuf, const struct ifnet *ifp, int dir, int *error) { - npf_session_t *se = npf_session_find_valid_cached(mbuf, true, ifp, dir); + npf_session_t *se = npf_session_find_valid_cached(mbuf, ifp, dir); if (se) return se; @@ -819,8 +923,7 @@ npf_session_find_or_create(npf_cache_t *npc, struct rte_mbuf *mbuf, return NULL; /* Create a session for this packet */ if (!se) - se = npf_session_establish(npc, mbuf, ifp, dir, - error); + se = npf_session_establish(npc, mbuf, ifp, dir, error); if (!se || *error) return NULL; } @@ -838,25 +941,6 @@ npf_session_find_or_create(npf_cache_t *npc, struct rte_mbuf *mbuf, return se; } - -/* - * Find a session matching the packet passed in, possibly looking - * with swapped keys (for reverse flows). - * The caller should ensure a session exists before calling here. - */ -npf_session_t * -npf_session_lookup(struct rte_mbuf *m, npf_cache_t *npc, - const struct ifnet *ifp, const int di) -{ - /* Can the packet have session tracking state? */ - if (!npf_session_trackable_p(npc)) - return NULL; - - bool sforw = false; - - return npf_session_find(m, di, ifp, &sforw, NULL); -} - /* * Session create rproc. Return 'false' to block session creation. */ @@ -917,14 +1001,14 @@ npf_session_create(npf_cache_t *npc, struct rte_mbuf *nbuf, /* session rproc */ if (!npf_rproc_session_create(npc, nbuf, ifp, di, &rproc_rl)) { - *error = -ECONNREFUSED; + *error = -NPF_RC_SESS_LIMIT; return NULL; } /* Allocate and initialize new state. */ se = zmalloc_aligned(sizeof(npf_session_t)); if (unlikely(se == NULL)) { - *error = -ENOMEM; + *error = -NPF_RC_SESS_ENOMEM; return NULL; } @@ -948,7 +1032,7 @@ npf_session_establish(npf_cache_t *npc, struct rte_mbuf *nbuf, npf_session_t *se = NULL; uint8_t proto; - *error = 0; + assert(*error == 0); /* Can the packet create session tracking state */ if (!npf_session_trackable_p(npc)) @@ -966,7 +1050,10 @@ npf_session_establish(npf_cache_t *npc, struct rte_mbuf *nbuf, se->s_vrfid = pktmbuf_get_vrf(nbuf); /* Initialize protocol state. */ - npf_state_init(se->s_vrfid, npc->npc_proto_idx, &se->s_state); + if (!npf_state_init(se->s_vrfid, npc->npc_proto_idx, &se->s_state)) { + *error = -NPF_RC_INTL; + goto fail; + } se->s_proto = proto; se->s_if_idx = ifp->if_index; @@ -978,8 +1065,10 @@ npf_session_establish(npf_cache_t *npc, struct rte_mbuf *nbuf, * session handle. */ *error = npf_alg_session_init(se, npc, di); - if (*error) + if (*error) { + *error = -NPF_RC_ALG_ERR; goto fail; + } return se; @@ -994,16 +1083,18 @@ bool npf_session_is_active(const npf_session_t *se) } int npf_session_activate(npf_session_t *se, const struct ifnet *ifp, - npf_cache_t *npc, struct rte_mbuf *nbuf) + npf_cache_t *npc, struct rte_mbuf *nbuf) { + npf_state_t *nst = &se->s_state; int rc; if ((se->s_flags & SE_ACTIVE) == 0) { - if (unlikely(!npf_state_inspect(npc, nbuf, - &se->s_state, true))) { + rc = npf_state_inspect(npc, nbuf, se, nst, se->s_proto_idx, + true); + if (unlikely(rc < 0)) { /* Silently block invalid packets. */ npf_session_destroy(se); - return -ENETUNREACH; + return rc; } /* @@ -1011,16 +1102,18 @@ int npf_session_activate(npf_session_t *se, const struct ifnet *ifp, * CLOSED. We want to allow the packet though, but not * activate the session. */ - if (npf_tcp_state_is_closed(&se->s_state, se->s_proto_idx)) { + if (se->s_proto_idx == NPF_PROTO_IDX_TCP && + nst->nst_tcp_state == NPF_TCPS_CLOSED) { npf_session_destroy(se); - return -ENOSTR; + return -NPF_RC_ENOSTR; } /* * Create a dataplane session with the npf session as * a feature. */ - rc = npf_dataplane_session_establish(se, npc, nbuf, ifp); + bool out = (se->s_flags & PFIL_OUT) != 0; + rc = npf_dataplane_session_establish(se, npc, nbuf, ifp, out); if (rc) return rc; @@ -1028,8 +1121,10 @@ int npf_session_activate(npf_session_t *se, const struct ifnet *ifp, se->s_flags |= SE_ACTIVE; - if (npf_nat64_session_log_enabled(se->s_nat64)) + if (unlikely(npf_nat64_session_log_enabled(se->s_nat64))) npf_session_nat64_log(se, true); + + npf_session_do_watch(se, SESSION_ACTIVATE); } return 0; @@ -1073,7 +1168,7 @@ void npf_session_destroy(npf_session_t *se) /* Decrement per-interface count if activated and still valid */ if ((se->s_flags & (SE_IF_DISABLED|SE_ACTIVE)) == SE_ACTIVE) { - struct ifnet *ifp = ifnet_byifindex(se->s_if_idx); + struct ifnet *ifp = dp_ifnet_byifindex(se->s_if_idx); if (ifp) npf_if_session_dec(ifp); @@ -1120,12 +1215,6 @@ npf_nat_t *npf_session_get_nat(const npf_session_t *se) return NULL; } -/* Get natpolicy of session nat */ -npf_natpolicy_t *npf_session_get_natpolicy(npf_session_t *se) -{ - return npf_nat_get_policy(se->s_nat); -} - void npf_session_set_dp_session(npf_session_t *se, struct session *s) { se->s_session = s; @@ -1185,6 +1274,18 @@ bool npf_session_set_dpi(npf_session_t *se, void *data) uint64_t * const ptr = (uint64_t *)&se->s_dpi; uint64_t const new = (uintptr_t)data; uint64_t const expected = 0; + + /* Mark this session as containing DPI. + * + * The dataplane session might not exist yet, + * in which case session_set_app() will be called from + * npf_dataplane_session_establish + * when the session is activated. + */ + struct session *s = npf_session_get_dp_session(se); + if (s) + session_set_app(s); + if (rte_atomic64_cmpset(ptr, expected, new)) return true; @@ -1220,13 +1321,10 @@ void npf_session_expire(npf_session_t *se) sess_close(se); session_link_walk(se->s_session, true, sess_expire, &se->s_if_idx); - } -} -bool -npf_session_is_expired(const npf_session_t *se) -{ - return (se->s_flags & SE_EXPIRE) != 0; + /* Send out expiry only if the watch was acked before */ + npf_session_do_watch(se, SESSION_EXPIRE); + } } /* @@ -1307,91 +1405,144 @@ static void sess_clear_nat64_peer(npf_session_t *se) } int -npf_enable_session_log(const char *proto, const char *state) +npf_enable_session_log(const char *proto_name, const char *state_name) { - uint8_t state_index = 0, proto_idx; + enum npf_proto_idx proto_idx; - if (!proto || !state) + if (!proto_name || !state_name) return -1; - proto_idx = npf_proto_idx_from_str(proto); + proto_idx = npf_proto_idx_from_str(proto_name); if (proto_idx == NPF_PROTO_IDX_NONE) return -1; /* timeout state no longer used so ignore request to enable log */ - if (strcmp(state, "timeout") == 0) + if (strcmp(state_name, "timeout") == 0) return 0; if (proto_idx == NPF_PROTO_IDX_TCP) { - state_index = npf_map_str_to_tcp_state(state); - if (!npf_state_tcp_state_is_valid(state_index)) + enum tcp_session_state tcp_state; + + tcp_state = npf_map_str_to_tcp_state(state_name); + + if (tcp_state == NPF_TCPS_NONE) return -1; + + npf_sess_log_flag_set(NPF_PROTO_IDX_TCP, (uint8_t)tcp_state); } else { - state_index = npf_map_str_to_generic_state(state); - if (!npf_state_generic_state_is_valid(state_index)) + enum dp_session_state gen_state; + + gen_state = dp_session_name2state(state_name); + + if (gen_state == SESSION_STATE_NONE) return -1; + + npf_sess_log_flag_set(proto_idx, (uint8_t)gen_state); } - NPF_SET_SESSION_LOG_FLAG(proto_idx, state_index); return 0; } int -npf_disable_session_log(const char *proto, const char *state) +npf_disable_session_log(const char *proto_name, const char *state_name) { - uint8_t state_index = 0, proto_idx; + enum npf_proto_idx proto_idx; - if (!proto || !state) + if (!proto_name || !state_name) return -1; - proto_idx = npf_proto_idx_from_str(proto); + proto_idx = npf_proto_idx_from_str(proto_name); if (proto_idx == NPF_PROTO_IDX_NONE) return -1; /* timeout state no longer used so ignore request to disable log */ - if (strcmp(state, "timeout") == 0) + if (strcmp(state_name, "timeout") == 0) return 0; if (proto_idx == NPF_PROTO_IDX_TCP) { - state_index = npf_map_str_to_tcp_state(state); - if (!npf_state_tcp_state_is_valid(state_index)) + enum tcp_session_state tcp_state; + + tcp_state = npf_map_str_to_tcp_state(state_name); + + if (tcp_state == NPF_TCPS_NONE) return -1; + + npf_sess_log_flag_clr(NPF_PROTO_IDX_TCP, (uint8_t)tcp_state); } else { - state_index = npf_map_str_to_generic_state(state); - if (!npf_state_generic_state_is_valid(state_index)) + enum dp_session_state gen_state; + + gen_state = dp_session_name2state(state_name); + + if (gen_state == SESSION_STATE_NONE) return -1; + + npf_sess_log_flag_clr(proto_idx, (uint8_t)gen_state); } - NPF_CLR_SESSION_LOG_FLAG(proto_idx, state_index); return 0; } -int npf_session_json_nat(json_writer_t *json, npf_session_t *se) +static void npf_session_json_rule(json_writer_t *json, npf_rule_t *rl) +{ + if (!rl) + return; + + const char *name = npf_rule_get_name(rl); + rule_no_t num = npf_rule_get_num(rl); + + jsonw_name(json, "rule"); + jsonw_start_object(json); + + jsonw_string_field(json, "name", name ? name : ""); + jsonw_uint_field(json, "number", num); + + jsonw_end_object(json); +} + +static void npf_session_json_nat(json_writer_t *json, npf_session_t *se) { npf_addr_t taddr; uint16_t tport; int type; u_int masq = 0; char buf[INET_ADDRSTRLEN]; + npf_nat_t *nt = se->s_nat; - if (!npf_nat_info(se->s_nat, &type, &taddr, &tport, &masq)) - return -ENOENT; + if (!nt || !npf_nat_info(nt, &type, &taddr, &tport, &masq)) + return; jsonw_name(json, "nat"); jsonw_start_object(json); + jsonw_uint_field(json, "trans_type", type); jsonw_string_field(json, "trans_addr", inet_ntop(AF_INET, &taddr, buf, sizeof(buf))); jsonw_uint_field(json, "trans_port", ntohs(tport)); jsonw_uint_field(json, "masquerade", masq); + + npf_session_json_rule(json, npf_nat_get_rule(nt)); + jsonw_end_object(json); +} - return 0; +static void npf_session_json_fw(json_writer_t *json, npf_session_t *se) +{ + npf_rule_t *rl = npf_session_get_fw_rule(se); + + if (!rl) + return; + + jsonw_name(json, "firewall"); + jsonw_start_object(json); + + npf_session_json_rule(json, rl); + + jsonw_end_object(json); } void npf_session_feature_json(json_writer_t *json, npf_session_t *se) { - struct ifnet *ifp = ifnet_byifindex(se->s_if_idx); + struct ifnet *ifp = dp_ifnet_byifindex(se->s_if_idx); if (ifp) jsonw_string_field(json, "interface", ifp->if_name); @@ -1400,6 +1551,10 @@ void npf_session_feature_json(json_writer_t *json, npf_session_t *se) jsonw_uint_field(json, "flags", se->s_flags); + /* Firewall json */ + if (npf_session_is_fw(se)) + npf_session_json_fw(json, se); + /* NAT json */ if (se->s_nat) npf_session_json_nat(json, se); @@ -1411,6 +1566,28 @@ void npf_session_feature_json(json_writer_t *json, npf_session_t *se) /* DPI json */ if (se->s_dpi) dpi_info_json(se->s_dpi, json); + + /* ALG json */ + if (se->s_alg) + npf_alg_session_json(json, se, se->s_alg); +} + +int npf_session_feature_nat_info(npf_session_t *se, uint32_t *taddr, + uint16_t *tport) +{ + npf_nat_t *nt = se->s_nat; + npf_addr_t npf_taddr; + int type; + uint masq = 0; + + if (!nt) + return -EINVAL; + + if (!npf_nat_info(nt, &type, &npf_taddr, tport, &masq)) + return -EINVAL; + + *taddr = npf_taddr.s6_addr32[0]; + return 0; } static inline const char *npf_session_log_event( @@ -1517,6 +1694,26 @@ npf_session_log_parent_id(char *buf, size_t *used_buf_len, " parent-id=%lu", s->se_link->sl_parent->se_id); } +static inline void +npf_session_log_counters(char *buf, size_t *used_buf_len, + const size_t total_buf_len, + struct session *s __unused, + enum session_log_event event) +{ + /* Only emit this for deletion and periodic events, + * and specifically not for creation events. + */ + if (event == SESSION_LOG_DELETION || + event == SESSION_LOG_PERIODIC) { + buf_app_printf(buf, used_buf_len, total_buf_len, + " out=%lu/%lu in=%lu/%lu", + rte_atomic64_read(&s->se_pkts_out), + rte_atomic64_read(&s->se_bytes_out), + rte_atomic64_read(&s->se_pkts_in), + rte_atomic64_read(&s->se_bytes_in)); + } +} + static inline void npf_session_log_rule_info(char *buf, size_t *used_buf_len, const size_t total_buf_len, npf_session_t *se) @@ -1633,10 +1830,11 @@ void npf_session_feature_log(enum session_log_event event, struct session *s, session_sentry_extract(sen, &if_index, &af, &saddr, &sid, &daddr, &did); buf_app_printf(buf, &used_buf_len, sizeof(buf), - " ifname=%s session-id=%lu proto=%s(%u)", + " ifname=%s session-id=%lu proto=%s(%u) dir=%s", ifnet_indextoname_safe(if_index), s->se_id, npf_get_protocol_name_from_num(s->se_protocol), - s->se_protocol); + s->se_protocol, + npf_session_forward_dir(se, PFIL_IN) ? "in" : "out"); npf_session_log_addrs(buf, &used_buf_len, sizeof(buf), af, saddr, daddr); @@ -1646,8 +1844,7 @@ void npf_session_feature_log(enum session_log_event event, struct session *s, npf_session_log_parent_id(buf, &used_buf_len, sizeof(buf), s); - /* NB: fn will be created when per-session statistics is added */ - // npf_session_log_counters(buf, &used_buf_len, sizeof(buf), s); + npf_session_log_counters(buf, &used_buf_len, sizeof(buf), s, event); npf_session_log_rule_info(buf, &used_buf_len, sizeof(buf), se); @@ -1664,3 +1861,241 @@ void npf_session_feature_log(enum session_log_event event, struct session *s, RTE_LOG(NOTICE, FIREWALL, "%s\n", buf); } +void npf_save_stats(npf_session_t *se, int dir, uint64_t bytes) +{ + assert(se); + + if (se->s_session) { + se_save_stats(se->s_session, + dir == PFIL_IN ? true : false, + bytes); + npf_session_do_watch(se, SESSION_STATS_UPDATE); + } +} + +/* + * Pack session state for protocols other than TCP + */ +int npf_session_pack_state_pack_gen(struct npf_session *se, + struct npf_pack_session_state *pst) +{ + if (!se || !pst) + return -EINVAL; + + npf_state_pack_gen(&se->s_state, pst); + return 0; +} + +/* + * Pack session state for TCP + */ +int npf_session_pack_state_pack_tcp(struct npf_session *se, + struct npf_pack_session_state *pst) +{ + if (!se || !pst) + return -EINVAL; + + npf_state_pack_tcp(&se->s_state, pst); + return 0; +} + +/* + * Restore session state for protocols other than TCP + */ +static int +npf_session_pack_state_restore_gen(struct npf_session *se, + struct npf_pack_session_state *pst, + vrfid_t vrfid, + enum npf_proto_idx proto_idx) +{ + npf_state_t *nst; + bool state_changed = false; + + nst = &se->s_state; + npf_state_init(vrfid, proto_idx, nst); + + npf_state_pack_update_gen(nst, pst, proto_idx, &state_changed); + return 0; +} + +/* + * Restore session state for TCP + */ +static int +npf_session_pack_state_restore_tcp(struct npf_session *se, + struct npf_pack_session_state *pst, + vrfid_t vrfid) +{ + npf_state_t *nst; + bool state_changed = false; + + nst = &se->s_state; + npf_state_init(vrfid, NPF_PROTO_IDX_TCP, nst); + + npf_state_pack_update_tcp(nst, pst, &state_changed); + return 0; +} + +/* + * State update for protocols other than TCP + */ +int npf_session_pack_state_update_gen(struct npf_session *se, + struct npf_pack_session_state *pst) +{ + npf_state_t *nst; + enum dp_session_state old_state; + struct session *s; + enum npf_proto_idx proto_idx; + bool state_changed = false; + + if (!se || !pst) + return -EINVAL; + + nst = &se->s_state; + proto_idx = se->s_proto_idx; + old_state = nst->nst_gen_state; + + npf_state_pack_update_gen(nst, pst, proto_idx, &state_changed); + + if (state_changed) + npf_session_gen_state_change(se, nst, old_state, + pst->pst_gen_state, proto_idx); + + s = se->s_session; + if (s) + s->se_etime = get_dp_uptime() + + session_get_npf_pack_timeout(s); + + return 0; +} + +/* + * State update for TCP + */ +int npf_session_pack_state_update_tcp(struct npf_session *se, + struct npf_pack_session_state *pst) +{ + npf_state_t *nst; + enum tcp_session_state old_state; + struct session *s; + bool state_changed = false; + + if (!se || !pst) + return -EINVAL; + + nst = &se->s_state; + old_state = nst->nst_tcp_state; + + npf_state_pack_update_tcp(nst, pst, &state_changed); + + if (state_changed) + npf_session_tcp_state_change(se, nst, old_state, + pst->pst_tcp_state); + + s = se->s_session; + if (s) + s->se_etime = get_dp_uptime() + + session_get_npf_pack_timeout(s); + + return 0; +} + +int npf_session_npf_pack_pack(npf_session_t *se, + struct npf_pack_npf_session *pns, + struct npf_pack_session_state *pst) +{ + npf_rule_t *rule; + int rc; + + if (!se || !pns) + return -EINVAL; + + + /* + * Do not sync SE_ACTIVE flag. The rcvr will call + * npf_session_npf_pack_activate to set the SE_ACTIVE flag and + * increment the intf session count. If the SE_ACTIVE flag is already + * set, then an error in the unpacking routine *before* + * npf_session_npf_pack_activate is called can result in + * npf_if_session_dec decrementing the session count erroneously. + */ + pns->pns_flags = se->s_flags & ~SE_ACTIVE; + + rule = npf_session_get_fw_rule(se); + pns->pns_fw_rule_hash = (rule ? npf_rule_get_hash(rule) : 0); + rule = npf_session_get_rproc_rule(se); + pns->pns_rproc_rule_hash = (rule ? npf_rule_get_hash(rule) : 0); + + if (se->s_proto_idx == NPF_PROTO_IDX_TCP) + rc = npf_session_pack_state_pack_tcp(se, pst); + else + rc = npf_session_pack_state_pack_gen(se, pst); + + return rc; +} + +struct npf_session * +npf_session_npf_pack_restore(struct npf_pack_npf_session *pns, + struct npf_pack_session_state *pst, + vrfid_t vrfid, uint8_t protocol, + uint32_t ifindex) +{ + npf_rule_t *fw_rl; + npf_rule_t *rproc_rl; + npf_session_t *se; + int rc; + + if (!pns || !pst) + return NULL; + + se = zmalloc_aligned(sizeof(*se)); + if (!se) + return NULL; + + fw_rl = pns->pns_fw_rule_hash ? + npf_get_rule_by_hash(pns->pns_fw_rule_hash) : NULL; + if (fw_rl) + npf_session_add_fw_rule(se, fw_rl); + + rproc_rl = pns->pns_rproc_rule_hash ? + npf_get_rule_by_hash(pns->pns_rproc_rule_hash) : NULL; + if (rproc_rl) + npf_session_add_rproc_rule(se, rproc_rl); + + se->s_flags = pns->pns_flags; + se->s_vrfid = vrfid; + se->s_if_idx = ifindex; + se->s_proto = protocol; + se->s_proto_idx = npf_proto_idx_from_proto(protocol); + + if (se->s_proto_idx == NPF_PROTO_IDX_TCP) + rc = npf_session_pack_state_restore_tcp(se, pst, vrfid); + else + rc = npf_session_pack_state_restore_gen(se, pst, vrfid, + se->s_proto_idx); + + if (rc) + goto error; + + rte_spinlock_init(&se->s_state.nst_lock); + + return se; + +error: + if (fw_rl) + npf_rule_put(fw_rl); + if (rproc_rl) + npf_rule_put(rproc_rl); + free(se); + return NULL; +} + +int npf_session_npf_pack_activate(struct npf_session *se, struct ifnet *ifp) +{ + if (!se || !ifp) + return -EINVAL; + + npf_if_session_inc(ifp); + se->s_flags |= SE_ACTIVE; + return 0; +} diff --git a/src/npf/npf_session.h b/src/npf/npf_session.h index d97d7cd2..cec0a81e 100644 --- a/src/npf/npf_session.h +++ b/src/npf/npf_session.h @@ -53,6 +53,8 @@ typedef struct npf_session npf_session_t; struct npf_alg; struct npf_session; struct rte_mbuf; +struct npf_pack_npf_session; +struct npf_pack_session_state; /* Forward Declarations */ typedef struct npf_rule npf_rule_t; @@ -73,6 +75,7 @@ typedef bool session_pkt_hook(npf_session_t *se, npf_cache_t *npc, uint64_t npf_session_get_id(struct npf_session *se); void npf_session_add_fw_rule(npf_session_t *s, npf_rule_t *r); +bool npf_session_is_fw(npf_session_t *s); /* Appfw */ void npf_session_set_appfw_decision(npf_session_t *, npf_decision_t); @@ -97,18 +100,16 @@ npf_session_t *npf_session_inspect_or_create(npf_cache_t *npc, const int di, uint16_t *npf_flag, int *error, bool *internal_hairpin); npf_session_t *npf_session_find(struct rte_mbuf *m, int di, - const struct ifnet *ifp, bool *sforw, bool *internal_hairpin); + const struct ifnet *ifp, bool *sfwd, bool *internal_hairpin); npf_session_t *npf_session_find_or_create(npf_cache_t *npc, struct rte_mbuf *mbuf, const struct ifnet *ifp, int dir, int *error); npf_session_t *npf_session_find_by_npc(npf_cache_t *npc, const int di, const struct ifnet *ifp, bool embedded); -npf_session_t *npf_session_lookup(struct rte_mbuf *m, npf_cache_t *npc, - const struct ifnet *ifp, const int di); npf_session_t *npf_session_establish(npf_cache_t *npc, struct rte_mbuf *nbuf, const struct ifnet *ifp, const int di, int *error); -void npf_session_update_state(npf_session_t *se); +void npf_session_update_state(npf_session_t *se, struct session *s); uint8_t npf_session_get_proto(npf_session_t *se); bool npf_session_is_active(const npf_session_t *se); bool npf_session_is_child(const npf_session_t *se); @@ -116,7 +117,6 @@ int npf_session_activate(npf_session_t *se, const struct ifnet *ifp, npf_cache_t *npc, struct rte_mbuf *nbuf); vrfid_t npf_session_get_vrfid(npf_session_t *se); npf_nat_t *npf_session_get_nat(const npf_session_t *se); -npf_natpolicy_t *npf_session_get_natpolicy(npf_session_t *se); void npf_session_setnat(npf_session_t *se, npf_nat_t *nt, bool pinhole); void npf_session_set_dp_session(npf_session_t *se, struct session *s); @@ -126,18 +126,17 @@ int npf_session_sentry_extract(npf_session_t *se, uint32_t *if_index, int *af, npf_addr_t **dst, uint16_t *did); void npf_session_expire(npf_session_t *se); -bool npf_session_is_expired(const npf_session_t *se); void npf_session_destroy(npf_session_t *se); bool npf_session_is_pass(const npf_session_t *se, npf_rule_t **rl); bool npf_session_is_nat_pinhole(const npf_session_t *se, int dir); bool npf_session_forward_dir(npf_session_t *se, int di); npf_nat_t *npf_session_retnat(npf_session_t *se, const int di, bool *forw); -int npf_session_json_nat(json_writer_t *json, npf_session_t *se); - void npf_session_feature_json(json_writer_t *json, npf_session_t *se); void npf_session_feature_log(enum session_log_event event, struct session *s, struct session_feature *sf); +int npf_session_feature_nat_info(npf_session_t *se, uint32_t *taddr, + uint16_t *tport); void npf_session_set_nat64(npf_session_t *se, struct npf_nat64 *nat64); struct npf_nat64 *npf_session_get_nat64(npf_session_t *se); @@ -153,5 +152,29 @@ void *npf_session_get_dpi(npf_session_t *se); void npf_session_set_pkt_hook(npf_session_t *se, session_pkt_hook *fn); +void npf_session_set_local_zone_nat(npf_session_t *se); +bool npf_session_is_local_zone_nat(npf_session_t *se); + void npf_session_disassoc_nif(unsigned int if_index); + +void npf_save_stats(npf_session_t *se, int dir, uint64_t bytes); + +int npf_session_pack_state_pack_gen(struct npf_session *se, + struct npf_pack_session_state *pst); +int npf_session_pack_state_pack_tcp(struct npf_session *se, + struct npf_pack_session_state *pst); +int npf_session_pack_state_update_gen(struct npf_session *se, + struct npf_pack_session_state *pst); +int npf_session_pack_state_update_tcp(struct npf_session *se, + struct npf_pack_session_state *pst); +int npf_session_npf_pack_pack(npf_session_t *se, + struct npf_pack_npf_session *pns, + struct npf_pack_session_state *pst); +struct npf_session * +npf_session_npf_pack_restore(struct npf_pack_npf_session *pns, + struct npf_pack_session_state *pst, + vrfid_t vrfid, uint8_t protocol, + uint32_t ifindex); +int npf_session_npf_pack_activate(struct npf_session *se, struct ifnet *ifp); + #endif /* NPF_SESSION_H */ diff --git a/src/npf/npf_state.c b/src/npf/npf_state.c index 9fd7af67..f9d0dca4 100644 --- a/src/npf/npf_state.c +++ b/src/npf/npf_state.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. */ @@ -50,7 +50,7 @@ #include #include "json_writer.h" -#include "vrf.h" +#include "vrf_internal.h" #include "npf/npf.h" #include "npf/config/npf_config.h" #include "npf/config/npf_ruleset_type.h" @@ -61,23 +61,8 @@ #include "npf/npf_vrf.h" #include "npf/rproc/npf_rproc.h" #include "npf_shim.h" - -struct rte_mbuf; - -/* - * Generic state name for UDP and ICMP, and other non-TCP protocols. - * - * Logger uses the upper-case form shown here. - * npf commands use the lower-case form. - * json uses use the lower-case form, plus hyphens replaced with underscores. - */ -static const char *npf_state_generic_name[NPF_ANY_SESSION_NSTATES] = { - [NPF_ANY_SESSION_NONE] = "NONE", - [NPF_ANY_SESSION_NEW] = "NEW", - [NPF_ANY_SESSION_ESTABLISHED] = "ESTABLISHED", - [NPF_ANY_SESSION_TERMINATING] = "TERMINATING", - [NPF_ANY_SESSION_CLOSED] = "CLOSED", -}; +#include "npf/npf_pack.h" +#include "npf/npf_rc.h" /* * TCP state name. @@ -103,27 +88,50 @@ static const char *npf_state_tcp_name[NPF_TCP_NSTATES] = { [NPF_TCPS_CLOSED] = "CLOSED", }; -static const uint8_t npf_generic_fsm[NPF_ANY_SESSION_NSTATES][2] = { - [NPF_ANY_SESSION_NONE] = { - [NPF_FLOW_FORW] = NPF_ANY_SESSION_NEW, +static const uint8_t npf_generic_fsm[SESSION_STATE_SIZE][NPF_FLOW_SZ] = { + [SESSION_STATE_NONE] = { + [NPF_FLOW_FORW] = SESSION_STATE_NEW, }, - [NPF_ANY_SESSION_NEW] = { - [NPF_FLOW_FORW] = NPF_ANY_SESSION_NEW, - [NPF_FLOW_BACK] = NPF_ANY_SESSION_ESTABLISHED, + [SESSION_STATE_NEW] = { + [NPF_FLOW_FORW] = SESSION_STATE_NEW, + [NPF_FLOW_BACK] = SESSION_STATE_ESTABLISHED, }, - [NPF_ANY_SESSION_ESTABLISHED] = { - [NPF_FLOW_FORW] = NPF_ANY_SESSION_ESTABLISHED, - [NPF_FLOW_BACK] = NPF_ANY_SESSION_ESTABLISHED, + [SESSION_STATE_ESTABLISHED] = { + [NPF_FLOW_FORW] = SESSION_STATE_ESTABLISHED, + [NPF_FLOW_BACK] = SESSION_STATE_ESTABLISHED, }, }; static struct npf_state_stats *stats; static bool npf_state_icmp_strict; -#define stats_inc_tcp(a) (stats[dp_lcore_id()].ss_tcp_ct[(a)]++) -#define stats_inc(a, b) (stats[dp_lcore_id()].ss_ct[(a)][(b)]++) -#define stats_dec(a, b) (stats[dp_lcore_id()].ss_ct[(a)][(b)]--) -#define stats_dec_tcp(a) (stats[dp_lcore_id()].ss_tcp_ct[(a)]--) +static inline void stats_inc_tcp(enum tcp_session_state tcp_state) +{ + if (likely(tcp_state <= NPF_TCPS_LAST)) + stats[dp_lcore_id()].ss_tcp_ct[tcp_state]++; +} + +static inline void stats_dec_tcp(enum tcp_session_state tcp_state) +{ + if (likely(tcp_state <= NPF_TCPS_LAST)) + stats[dp_lcore_id()].ss_tcp_ct[tcp_state]--; +} + +static inline void stats_inc(enum npf_proto_idx proto_idx, + enum dp_session_state state) +{ + if (likely(state <= SESSION_STATE_LAST && + proto_idx <= NPF_PROTO_IDX_LAST)) + stats[dp_lcore_id()].ss_ct[proto_idx][state]++; +} + +static inline void stats_dec(enum npf_proto_idx proto_idx, + enum dp_session_state state) +{ + if (likely(state <= SESSION_STATE_LAST && + proto_idx <= NPF_PROTO_IDX_LAST)) + stats[dp_lcore_id()].ss_ct[proto_idx][state]--; +} /* state stats - create/destroy */ void npf_state_stats_create(void) @@ -150,53 +158,68 @@ void npf_state_set_icmp_strict(bool value) * direction in a case of connection-orientated protocol. Returns true on * success and false otherwise (e.g. if protocol is not supported). */ -void npf_state_init(vrfid_t vrfid, uint8_t proto_idx, npf_state_t *nst) +bool +npf_state_init(vrfid_t vrfid, enum npf_proto_idx proto_idx, npf_state_t *nst) { - assert(NPF_ANY_SESSION_LAST < 255); - assert(NPF_TCPS_LAST < 255); - assert(NPF_TCPS_OK <= 255); - assert(NPF_TCPS_OK > NPF_TCPS_LAST); + static_assert(SESSION_STATE_LAST < 255, + "session state last is too big"); + static_assert(NPF_TCPS_LAST < 255, + "npf tcps last is too big"); rte_spinlock_init(&nst->nst_lock); /* Take reference on vrf npf timeout struct */ - nst->nst_to = npf_timeout_ref_get(vrf_get_npf_timeout_rcu(vrfid)); + struct npf_timeout *to = vrf_get_npf_timeout_rcu(vrfid); + if (!to) + return false; + + npf_timeout_ref_get(to); + rcu_assign_pointer(nst->nst_to, to); if (proto_idx == NPF_PROTO_IDX_TCP) { - nst->nst_state = NPF_TCPS_NONE; + nst->nst_tcp_state = NPF_TCPS_NONE; + nst->nst_gen_state = SESSION_STATE_NONE; stats_inc_tcp(NPF_TCPS_NONE); } else { - nst->nst_state = NPF_ANY_SESSION_NONE; - stats_inc(proto_idx, NPF_ANY_SESSION_NONE); + nst->nst_gen_state = SESSION_STATE_NONE; + stats_inc(proto_idx, SESSION_STATE_NONE); } + + return true; } /* Called from npf_session_destroy */ -void npf_state_destroy(npf_state_t *nst, uint8_t proto_idx) +void npf_state_destroy(npf_state_t *nst, enum npf_proto_idx proto_idx) { + struct npf_timeout *to; + if (proto_idx == NPF_PROTO_IDX_TCP) - stats_dec_tcp(nst->nst_state); + stats_dec_tcp(nst->nst_tcp_state); else - stats_dec(proto_idx, nst->nst_state); + stats_dec(proto_idx, nst->nst_gen_state); + + to = rcu_dereference(nst->nst_to); + to = rcu_cmpxchg_pointer(&nst->nst_to, to, NULL); /* Release reference on vrf npf timeout struct */ - npf_timeout_ref_put(nst->nst_to); + if (to) + npf_timeout_ref_put(to); } /* * Set generic session state. */ static inline void -npf_state_generic_state_set(npf_state_t *nst, uint8_t proto_idx, - uint8_t state, bool *state_changed) +npf_state_set_gen(npf_state_t *nst, enum npf_proto_idx proto_idx, + enum dp_session_state state, bool *state_changed) { - if (unlikely(nst->nst_state != state)) { - uint8_t old_state = nst->nst_state; + if (unlikely(nst->nst_gen_state != state)) { + enum dp_session_state old_state = nst->nst_gen_state; stats_dec(proto_idx, old_state); stats_inc(proto_idx, state); - nst->nst_state = state; + nst->nst_gen_state = state; *state_changed = true; } } @@ -205,142 +228,246 @@ npf_state_generic_state_set(npf_state_t *nst, uint8_t proto_idx, * Set TCP session state. */ static inline void -npf_state_tcp_state_set(npf_state_t *nst, uint8_t state, bool *state_changed) +npf_state_set_tcp(npf_state_t *nst, enum tcp_session_state state, + bool *state_changed) { - if (unlikely(state != NPF_TCPS_OK && nst->nst_state != state)) { - uint8_t old_state = nst->nst_state; + if (unlikely(nst->nst_tcp_state != state)) { + enum tcp_session_state old_state = nst->nst_tcp_state; stats_dec_tcp(old_state); stats_inc_tcp(state); - nst->nst_state = state; + nst->nst_tcp_state = state; + nst->nst_gen_state = npf_state_tcp2gen(state); *state_changed = true; } } /* - * npf_state_inspect: inspect the packet according to the protocol state. - * - * Return true if packet is considered to match the state (e.g. for TCP, - * the packet belongs to the tracked connection) and false otherwise. + * State inspect for sessions other than TCP and ICMP + */ +static inline int +npf_state_inspect_other(npf_session_t *se, npf_state_t *nst, + enum npf_proto_idx proto_idx, + enum npf_flow_dir flow_dir) +{ + bool state_changed = false; + enum dp_session_state old_state, new_state; + + rte_spinlock_lock(&nst->nst_lock); + + old_state = nst->nst_gen_state; + + new_state = npf_generic_fsm[nst->nst_gen_state][flow_dir]; + + npf_state_set_gen(nst, proto_idx, new_state, &state_changed); + + rte_spinlock_unlock(&nst->nst_lock); + + if (state_changed) + npf_session_gen_state_change(se, nst, old_state, new_state, + proto_idx); + + return 0; +} + +/* + * State inspect for TCP sessions */ -bool npf_state_inspect(const npf_cache_t *npc, struct rte_mbuf *nbuf, - npf_state_t *nst, bool forw) +static inline int +npf_state_inspect_tcp(const npf_cache_t *npc, struct rte_mbuf *nbuf, + npf_session_t *se, npf_state_t *nst, + enum npf_flow_dir flow_dir) { - const uint8_t proto_idx = npf_cache_proto_idx(npc); - const int di = forw ? NPF_FLOW_FORW : NPF_FLOW_BACK; - bool ret = true; bool state_changed = false; - uint8_t state; - uint8_t old_state; + enum tcp_session_state old_state, new_state; + int rc = 0; rte_spinlock_lock(&nst->nst_lock); - old_state = nst->nst_state; + old_state = nst->nst_tcp_state; + + new_state = npf_state_tcp(npc, nbuf, nst, flow_dir, &rc); + + if (rc == 0) + npf_state_set_tcp(nst, new_state, &state_changed); + + rte_spinlock_unlock(&nst->nst_lock); + + if (state_changed) + npf_session_tcp_state_change(se, nst, old_state, new_state); + + return rc; +} + +/* + * State inspect for ICMP sessions + */ +static inline int +npf_state_inspect_icmp(const npf_cache_t *npc, npf_session_t *se, + npf_state_t *nst, enum npf_flow_dir flow_dir) +{ + bool state_changed = false; + enum dp_session_state old_state, new_state; + int rc = 0; + + rte_spinlock_lock(&nst->nst_lock); + + old_state = nst->nst_gen_state; + + /* + * If a ping session does not exist, it can only be created by an ICMP + * echo request. If it exists, the fwd direction will conditionally + * ('strict' enabled) only pass requests and the backward only + * replies. Note, the 'strict' bit needs to be disabled because of MS + * Windows clients. + */ + if ((npf_state_icmp_strict || old_state == SESSION_STATE_NONE) && + unlikely((flow_dir == NPF_FLOW_FORW) ^ + npf_iscached(npc, NPC_ICMP_ECHO_REQ))) + rc = -NPF_RC_ICMP_ECHO; + + if (rc == 0) { + new_state = npf_generic_fsm[nst->nst_gen_state][flow_dir]; + + npf_state_set_gen(nst, NPF_PROTO_IDX_ICMP, new_state, + &state_changed); + } + + rte_spinlock_unlock(&nst->nst_lock); + + if (state_changed) + npf_session_gen_state_change(se, nst, old_state, new_state, + NPF_PROTO_IDX_ICMP); + + return rc; +} + +/* + * npf_state_inspect: inspect the packet according to the protocol state. + * + * Return 0 if packet is considered to match the state (e.g. for TCP, the + * packet belongs to the tracked connection) and return code (< 0) otherwise. + */ +int npf_state_inspect(const npf_cache_t *npc, struct rte_mbuf *nbuf, + npf_session_t *se, npf_state_t *nst, + enum npf_proto_idx proto_idx, bool forw) +{ + enum npf_flow_dir flow_dir = forw ? NPF_FLOW_FORW : NPF_FLOW_BACK; + int rc = 0; switch (proto_idx) { + case NPF_PROTO_IDX_UDP: + case NPF_PROTO_IDX_OTHER: + rc = npf_state_inspect_other(se, nst, proto_idx, flow_dir); + break; case NPF_PROTO_IDX_TCP: - state = npf_state_tcp(npc, nbuf, nst, di); - if (unlikely(state == NPF_TCPS_ERR)) { - ret = false; - break; - } - npf_state_tcp_state_set(nst, state, &state_changed); + rc = npf_state_inspect_tcp(npc, nbuf, se, nst, flow_dir); break; case NPF_PROTO_IDX_ICMP: - state = nst->nst_state; - if ((npf_state_icmp_strict || state == NPF_ANY_SESSION_NONE) && - unlikely(forw ^ npf_iscached(npc, NPC_ICMP_ECHO_REQ))) { - ret = false; - break; - } - /* fall through */ - default: - state = npf_generic_fsm[nst->nst_state][di]; - - npf_state_generic_state_set(nst, proto_idx, state, - &state_changed); + rc = npf_state_inspect_icmp(npc, se, nst, flow_dir); break; - } - rte_spinlock_unlock(&nst->nst_lock); + }; + + return rc; +} + +/* + * Mark (non-TCP) session state as 'closed' for the period that it is going + * through garbage collection. + */ +void npf_state_set_gen_closed(npf_state_t *nst, npf_session_t *se, bool lock, + enum npf_proto_idx proto_idx) +{ + enum dp_session_state old_state; + bool state_changed = false; + + if (lock) + rte_spinlock_lock(&nst->nst_lock); + + old_state = nst->nst_gen_state; + + npf_state_set_gen(nst, proto_idx, SESSION_STATE_CLOSED, &state_changed); + + if (lock) + rte_spinlock_unlock(&nst->nst_lock); if (state_changed) - npf_session_state_change(nst, old_state, state, proto_idx); - return ret; + npf_session_gen_state_change(se, nst, old_state, + SESSION_STATE_CLOSED, proto_idx); } /* - * Mark session state as 'closed' for the period that it is going through + * Mark TCP session state as 'closed' for the period that it is going through * garbage collection. */ -void npf_state_set_closed_state(npf_state_t *nst, bool lock, uint8_t proto_idx) +void npf_state_set_tcp_closed(npf_state_t *nst, npf_session_t *se, bool lock) { - uint8_t old_state; - uint8_t state; + enum tcp_session_state old_state; bool state_changed = false; if (lock) rte_spinlock_lock(&nst->nst_lock); - old_state = nst->nst_state; + old_state = nst->nst_tcp_state; - if (proto_idx == NPF_PROTO_IDX_TCP) { - state = NPF_TCPS_CLOSED; - npf_state_tcp_state_set(nst, NPF_TCPS_CLOSED, - &state_changed); - } else { - state = NPF_ANY_SESSION_CLOSED; - npf_state_generic_state_set(nst, proto_idx, - NPF_ANY_SESSION_CLOSED, &state_changed); - } + npf_state_set_tcp(nst, NPF_TCPS_CLOSED, &state_changed); if (lock) rte_spinlock_unlock(&nst->nst_lock); if (state_changed) - npf_session_state_change(nst, old_state, state, proto_idx); + npf_session_tcp_state_change(se, nst, old_state, + NPF_TCPS_CLOSED); } /* - * Update the dataplane session (if present) state/timeout with the - * current NPF protocol state. + * Update a dataplane session other than TCP (if present) state/timeout with + * the current NPF protocol state. * * This is called during NPF activation and protocol state changes. */ -void npf_state_update_session_state(struct session *s, uint8_t proto_idx, - const npf_state_t *nst) +void npf_state_update_gen_session(struct session *s, + enum npf_proto_idx proto_idx, + const npf_state_t *nst) { - uint32_t to; + if (unlikely(!s)) + return; - if (s) { - to = npf_timeout_get(nst, proto_idx, s->se_custom_timeout); - session_set_protocol_state_timeout(s, nst->nst_state, to); - } -} + uint32_t timeout; + enum dp_session_state gen_state = nst->nst_gen_state; -static const char *npf_state_get_state_generic_name(uint8_t index) -{ - if (!npf_state_generic_state_is_valid(index)) - return NULL; - return npf_state_generic_name[index]; -} + timeout = npf_gen_timeout_get(nst, gen_state, proto_idx, + s->se_custom_timeout); -static const char *npf_state_get_state_tcp_name(uint8_t index) -{ - if (!npf_state_tcp_state_is_valid(index)) - return NULL; - return npf_state_tcp_name[index]; + /* Protocol state and gen state are the same */ + session_set_protocol_state_timeout(s, gen_state, gen_state, timeout); } /* - * npf_state_get_state_name: return state name for logging purpose + * Update a dataplane TCP session state/timeout with the current NPF protocol + * state. */ -const char *npf_state_get_state_name(uint8_t state, uint8_t proto_idx) +void npf_state_update_tcp_session(struct session *s, const npf_state_t *nst) { - if (proto_idx == NPF_PROTO_IDX_TCP) - return npf_state_get_state_tcp_name(state); - else - return npf_state_get_state_generic_name(state); + if (unlikely(!s)) + return; + + uint32_t timeout; + enum tcp_session_state tcp_state = nst->nst_tcp_state; + enum dp_session_state gen_state = npf_state_tcp2gen(tcp_state); + + timeout = npf_tcp_timeout_get(nst, tcp_state, s->se_custom_timeout); + + /* Protocol state and gen state are different */ + session_set_protocol_state_timeout(s, tcp_state, gen_state, timeout); +} + +const char *npf_state_get_tcp_name(enum tcp_session_state state) +{ + if (state <= NPF_TCPS_LAST) + return npf_state_tcp_name[state]; + return "none"; } /* @@ -374,59 +501,35 @@ static void npf_str_to_log_name(const char *src, char *dst, int len) dst[i] = '\0'; } -static void -npf_state_get_state_name_json(uint8_t state, uint8_t proto_idx, - char *dst, int len) -{ - const char *upper; - - if (proto_idx == NPF_PROTO_IDX_TCP) - upper = npf_state_get_state_tcp_name(state); - else - upper = npf_state_get_state_generic_name(state); - - npf_str_to_json_name(upper, dst, len); -} - -bool npf_state_is_steady(const npf_state_t *nst, const uint8_t proto_idx) -{ - if (proto_idx == NPF_PROTO_IDX_TCP) - return (nst->nst_state == NPF_TCPS_ESTABLISHED ? true : false); - else - return (nst->nst_state == NPF_ANY_SESSION_ESTABLISHED) ? - true : false; -} - /* - * Returns true if protocol is TCP and state is CLOSED + * Generic state name used in summary stats + * + * For UDP, ICMP, and other we are not interested in SESSION_STATE_NONE or + * SESSION_STATE_TERMINATING. + * + * Note that these names are different from those returned by + * dp_session_state_name. */ -bool npf_tcp_state_is_closed(const npf_state_t *nst, const uint8_t proto_idx) -{ - if (proto_idx == NPF_PROTO_IDX_TCP) - return nst->nst_state == NPF_TCPS_CLOSED; - return false; -} - -/* convert CLI generic state to numerical value */ -uint8_t npf_map_str_to_generic_state(const char *name) +static const char *npf_state_name_summary_json(enum dp_session_state state) { - uint8_t state; - char upper[40]; - - npf_str_to_log_name(name, upper, sizeof(upper)); - - for (state = NPF_ANY_SESSION_FIRST; - state <= NPF_ANY_SESSION_LAST; state++) - if (strcmp(upper, npf_state_generic_name[state]) == 0) - return state; - - return NPF_ANY_SESSION_NSTATES; + switch (state) { + case SESSION_STATE_NEW: + return "new"; + case SESSION_STATE_ESTABLISHED: + return "established"; + case SESSION_STATE_CLOSED: + return "closed"; + case SESSION_STATE_TERMINATING: + case SESSION_STATE_NONE: + break; + }; + return "none"; } /* convert CLI TCP state to numerical value */ -uint8_t npf_map_str_to_tcp_state(const char *name) +enum tcp_session_state npf_map_str_to_tcp_state(const char *name) { - uint8_t state; + enum tcp_session_state state; char upper[40]; npf_str_to_log_name(name, upper, sizeof(upper)); @@ -436,7 +539,7 @@ uint8_t npf_map_str_to_tcp_state(const char *name) if (strcmp(upper, npf_state_tcp_name[state]) == 0) return state; - return NPF_TCP_NSTATES; + return NPF_TCPS_NONE; } /* @@ -461,7 +564,7 @@ uint32_t npf_state_get_custom_timeout(vrfid_t vrfid, npf_cache_t *npc, void npf_state_stats_json(json_writer_t *json) { - uint8_t state, proto; + enum npf_proto_idx proto; uint32_t tmp; uint32_t i; char name[40]; @@ -475,19 +578,24 @@ void npf_state_stats_json(json_writer_t *json) FOREACH_DP_LCORE(i) { for (proto = NPF_PROTO_IDX_FIRST; proto <= NPF_PROTO_IDX_LAST; proto++) - stats[i].ss_ct[proto][NPF_ANY_SESSION_CLOSED] += - stats[i].ss_ct[proto][NPF_ANY_SESSION_NONE]; + stats[i].ss_ct[proto][SESSION_STATE_CLOSED] += + stats[i].ss_ct[proto][SESSION_STATE_NONE]; } jsonw_name(json, "tcp"); jsonw_start_object(json); - for (state = NPF_TCPS_FIRST; state <= NPF_TCPS_LAST; state++) { - npf_state_get_state_name_json(state, NPF_PROTO_IDX_TCP, name, - sizeof(name)); + enum tcp_session_state tcp_state; + + for (tcp_state = NPF_TCPS_FIRST; tcp_state <= NPF_TCPS_LAST; + tcp_state++) { + /* Copy state name to name[] buffer */ + npf_str_to_json_name(npf_state_get_tcp_name(tcp_state), + name, sizeof(name)); + tmp = 0; FOREACH_DP_LCORE(i) - tmp += stats[i].ss_tcp_ct[state]; + tmp += stats[i].ss_tcp_ct[tcp_state]; jsonw_uint_field(json, name, tmp); } @@ -498,16 +606,21 @@ void npf_state_stats_json(json_writer_t *json) */ for (proto = NPF_PROTO_IDX_FIRST; proto <= NPF_PROTO_IDX_LAST; proto++) { + enum dp_session_state state; + if (proto == NPF_PROTO_IDX_TCP) continue; jsonw_name(json, npf_get_protocol_name_from_idx(proto)); jsonw_start_object(json); - for (state = NPF_ANY_SESSION_FIRST; - state <= NPF_ANY_SESSION_LAST; state++) { - npf_state_get_state_name_json(state, proto, name, - sizeof(name)), + for (state = SESSION_STATE_FIRST; + state <= SESSION_STATE_LAST; state++) { + + /* Copy state name to name[] buffer */ + npf_str_to_json_name(npf_state_name_summary_json(state), + name, sizeof(name)); + tmp = 0; FOREACH_DP_LCORE(i) tmp += stats[i].ss_ct[proto][state]; @@ -522,15 +635,64 @@ void npf_state_stats_json(json_writer_t *json) void npf_state_dump(const npf_state_t *nst __unused) { - const npf_tcpstate_t *fst = &nst->nst_tcpst[0]; - const npf_tcpstate_t *tst = &nst->nst_tcpst[1]; + const struct npf_tcp_window *fst = &nst->nst_tcp_win[NPF_FLOW_FORW]; + const struct npf_tcp_window *tst = &nst->nst_tcp_win[NPF_FLOW_BACK]; printf("\tstate (%p) %d:\n\t\t" "F { end %u maxend %u mwin %u wscale %u }\n\t\t" "T { end %u maxend %u mwin %u wscale %u }\n", - nst, nst->nst_state, + nst, nst->nst_tcp_state, fst->nst_end, fst->nst_maxend, fst->nst_maxwin, fst->nst_wscale, tst->nst_end, tst->nst_maxend, tst->nst_maxwin, tst->nst_wscale ); } #endif + +/* Pack non-TCP session state */ +void npf_state_pack_gen(npf_state_t *nst, struct npf_pack_session_state *pst) +{ + pst->pst_gen_state = nst->nst_gen_state; +} + +/* Pack TCP session state */ +void npf_state_pack_tcp(npf_state_t *nst, struct npf_pack_session_state *pst) +{ + enum npf_flow_dir fl; + + for (fl = NPF_FLOW_FIRST; fl <= NPF_FLOW_LAST; fl++) + memcpy(&pst->pst_tcp_win[fl], &nst->nst_tcp_win[fl], + sizeof(*pst->pst_tcp_win)); + + pst->pst_tcp_state = nst->nst_tcp_state; +} + +/* Update non-TCP session state from a connsync restore or update */ +void npf_state_pack_update_gen(npf_state_t *nst, + struct npf_pack_session_state *pst, + enum npf_proto_idx proto_idx, + bool *state_changed) +{ + rte_spinlock_lock(&nst->nst_lock); + + npf_state_set_gen(nst, proto_idx, pst->pst_gen_state, state_changed); + + rte_spinlock_unlock(&nst->nst_lock); +} + +/* Update TCP session state from a connsync restore or update */ +void npf_state_pack_update_tcp(npf_state_t *nst, + struct npf_pack_session_state *pst, + bool *state_changed) +{ + enum npf_flow_dir fl; + + rte_spinlock_lock(&nst->nst_lock); + + for (fl = NPF_FLOW_FIRST; fl <= NPF_FLOW_LAST; fl++) + memcpy(&nst->nst_tcp_win[fl], &pst->pst_tcp_win[fl], + sizeof(*nst->nst_tcp_win)); + + npf_state_set_tcp(nst, pst->pst_tcp_state, state_changed); + + rte_spinlock_unlock(&nst->nst_lock); +} diff --git a/src/npf/npf_state.h b/src/npf/npf_state.h index d3b62dd4..1f7c58f9 100644 --- a/src/npf/npf_state.h +++ b/src/npf/npf_state.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. */ @@ -46,37 +46,21 @@ #include "npf/npf_cache.h" #include "npf/npf_ruleset.h" #include "util.h" -#include "vrf.h" +#include "vrf_internal.h" +#include "dp_session.h" struct rte_mbuf; +struct npf_pack_session_state; /* Forward Declarations */ typedef struct npf_cache npf_cache_t; -/* - * Generic session states and timeout table. - * - * Note: used for connnection-less protocols. - * only npf_state.c and npf_shim.c inlcude this header file. - */ -typedef enum { - NPF_ANY_SESSION_NONE = 0, - NPF_ANY_SESSION_NEW, - NPF_ANY_SESSION_ESTABLISHED, - NPF_ANY_SESSION_TERMINATING, - NPF_ANY_SESSION_CLOSED, -} ANY_STATES; - -#define NPF_ANY_SESSION_FIRST NPF_ANY_SESSION_NONE -#define NPF_ANY_SESSION_LAST NPF_ANY_SESSION_CLOSED -#define NPF_ANY_SESSION_NSTATES (NPF_ANY_SESSION_LAST + 1) - /* * NPF TCP states. Note: these states are different from the TCP FSM * states of RFC 793. The packet filter is a man-in-the-middle. */ -typedef enum { - NPF_TCPS_NONE = 0, +enum tcp_session_state { + NPF_TCPS_NONE, NPF_TCPS_SYN_SENT, NPF_TCPS_SIMSYN_SENT, NPF_TCPS_SYN_RECEIVED, @@ -90,87 +74,70 @@ typedef enum { NPF_TCPS_TIME_WAIT, NPF_TCPS_RST_RECEIVED, NPF_TCPS_CLOSED, -} TCP_STATES; +} __attribute__ ((__packed__)); #define NPF_TCPS_FIRST NPF_TCPS_NONE #define NPF_TCPS_LAST NPF_TCPS_CLOSED #define NPF_TCP_NSTATES (NPF_TCPS_LAST + 1) -/* - * NPF_TCPS_OK is used to denote that *no* state change should take place. - * It *must* be greater than NPF_TCPS_LAST. - */ -#define NPF_TCPS_OK NPF_TCP_NSTATES -#define NPF_TCPS_ERR (NPF_TCP_NSTATES + 1) - - /* State statistics struct */ struct npf_state_stats { uint32_t ss_tcp_ct[NPF_TCP_NSTATES]; - uint32_t ss_ct[NPF_PROTO_IDX_COUNT][NPF_ANY_SESSION_NSTATES]; + uint32_t ss_ct[NPF_PROTO_IDX_COUNT][SESSION_STATE_SIZE]; uint32_t ss_nat_cnt; /* used only for session_summary */ }; -#define NPF_FLOW_FORW 0 -#define NPF_FLOW_BACK 1 +enum npf_flow_dir { + NPF_FLOW_FORW, + NPF_FLOW_BACK +}; +#define NPF_FLOW_FIRST NPF_FLOW_FORW +#define NPF_FLOW_LAST NPF_FLOW_BACK +#define NPF_FLOW_SZ (NPF_FLOW_LAST + 1) -typedef struct { +/* + * TCP session state for windowing. Two per TCP session. One for each + * direction. + */ +struct npf_tcp_window { uint32_t nst_end; uint32_t nst_maxend; + /* Keep track of maximum window seen */ uint32_t nst_maxwin; + /* Window scaling. From options in syn-ack, if present */ uint8_t nst_wscale; -} npf_tcpstate_t; + uint8_t nst_pad[3]; +}; +static_assert(sizeof(struct npf_tcp_window) == 16, + "struct npf_tcp_window != 16"); + +/* + * npf session state and timeout + */ typedef struct { rte_spinlock_t nst_lock; - uint8_t nst_state; - npf_tcpstate_t nst_tcpst[2]; + enum tcp_session_state nst_tcp_state; + enum dp_session_state nst_gen_state; + uint8_t nst_pad[2]; + struct npf_tcp_window nst_tcp_win[NPF_FLOW_SZ]; struct npf_timeout *nst_to; } npf_state_t; +static_assert(sizeof(npf_state_t) == 48, "npf_state_t != 48"); -static inline bool -npf_state_generic_state_is_valid(uint8_t state) -{ - assert(NPF_ANY_SESSION_FIRST == 0); - return state <= NPF_ANY_SESSION_LAST; -} - -static inline bool -npf_state_tcp_state_is_valid(uint8_t state) -{ - assert(NPF_TCPS_FIRST == 0); - return state <= NPF_TCPS_LAST; -} - -static inline bool -npf_state_is_valid(uint8_t proto_idx, uint8_t state) -{ - if (proto_idx == NPF_PROTO_IDX_TCP) - return npf_state_tcp_state_is_valid(state); - else - return npf_state_generic_state_is_valid(state); -} - -/* - * Get generic state. Non-TCP protocols already use generic state. - * Convert TCP state to generic state. - */ -static inline uint8_t -npf_state_get_generic_state(uint8_t proto_idx, uint8_t state) +static inline enum dp_session_state +npf_state_tcp2gen(enum tcp_session_state tcp_state) { - if (proto_idx != NPF_PROTO_IDX_TCP) - return state; - - switch (state) { + switch (tcp_state) { case NPF_TCPS_NONE: - return NPF_ANY_SESSION_NONE; + return SESSION_STATE_NONE; case NPF_TCPS_SYN_SENT: case NPF_TCPS_SIMSYN_SENT: case NPF_TCPS_SYN_RECEIVED: - return NPF_ANY_SESSION_NEW; + return SESSION_STATE_NEW; case NPF_TCPS_ESTABLISHED: - return NPF_ANY_SESSION_ESTABLISHED; + return SESSION_STATE_ESTABLISHED; case NPF_TCPS_FIN_SENT: case NPF_TCPS_FIN_RECEIVED: case NPF_TCPS_CLOSE_WAIT: @@ -179,41 +146,30 @@ npf_state_get_generic_state(uint8_t proto_idx, uint8_t state) case NPF_TCPS_LAST_ACK: case NPF_TCPS_TIME_WAIT: case NPF_TCPS_RST_RECEIVED: - return NPF_ANY_SESSION_TERMINATING; + return SESSION_STATE_TERMINATING; case NPF_TCPS_CLOSED: - return NPF_ANY_SESSION_CLOSED; + return SESSION_STATE_CLOSED; }; - return NPF_ANY_SESSION_CLOSED; -} - -static inline bool npf_state_is_established(uint8_t proto, uint8_t state) -{ - if (proto == IPPROTO_TCP) - return state == NPF_TCPS_ESTABLISHED; - return state == NPF_ANY_SESSION_ESTABLISHED; -} - -static inline bool npf_state_is_closing(uint8_t proto, uint8_t state) -{ - if (proto == IPPROTO_TCP) - return state > NPF_TCPS_ESTABLISHED; - return state > NPF_ANY_SESSION_ESTABLISHED; + return SESSION_STATE_CLOSED; } void npf_state_stats_create(void); void npf_state_stats_destroy(void); -void npf_state_init(vrfid_t vrfid, uint8_t proto_idx, npf_state_t *nst); -void npf_state_destroy(npf_state_t *nst, uint8_t proto_idx); -bool npf_state_inspect(const npf_cache_t *npc, struct rte_mbuf *nbuf, - npf_state_t *nst, bool forw); -void npf_state_update_session_state(struct session *s, uint8_t proto_idx, - const npf_state_t *nst); -void npf_state_set_closed_state(npf_state_t *nst, bool lock, uint8_t proto_idx); -const char *npf_state_get_state_name(uint8_t state, uint8_t proto_idx); -bool npf_state_is_steady(const npf_state_t *nst, const uint8_t proto_idx); -bool npf_tcp_state_is_closed(const npf_state_t *nst, const uint8_t proto_idx); -uint8_t npf_map_str_to_generic_state(const char *state); -uint8_t npf_map_str_to_tcp_state(const char *state); +bool npf_state_init(vrfid_t vrfid, enum npf_proto_idx proto_idx, + npf_state_t *nst); +void npf_state_destroy(npf_state_t *nst, enum npf_proto_idx proto_idx); +int npf_state_inspect(const npf_cache_t *npc, struct rte_mbuf *nbuf, + npf_session_t *se, npf_state_t *nst, + enum npf_proto_idx proto_idx, bool forw); +void npf_state_update_gen_session(struct session *s, + enum npf_proto_idx proto_idx, + const npf_state_t *nst); +void npf_state_update_tcp_session(struct session *s, const npf_state_t *nst); +void npf_state_set_gen_closed(npf_state_t *nst, npf_session_t *se, bool lock, + enum npf_proto_idx proto_idx); +void npf_state_set_tcp_closed(npf_state_t *nst, npf_session_t *se, bool lock); +const char *npf_state_get_tcp_name(enum tcp_session_state state); +enum tcp_session_state npf_map_str_to_tcp_state(const char *name); uint32_t npf_state_get_custom_timeout(vrfid_t vrfid, npf_cache_t *npc, struct rte_mbuf *nbuf); void npf_state_stats_json(json_writer_t *json); @@ -221,8 +177,13 @@ void npf_state_stats_json(json_writer_t *json); void npf_state_dump(const npf_state_t *nst); #endif -void npf_session_state_change(npf_state_t *nst, uint8_t old_state, - uint8_t new_state, uint8_t proto_idx); +void npf_session_gen_state_change(npf_session_t *se, npf_state_t *nst, + enum dp_session_state old_state, + enum dp_session_state new_state, + enum npf_proto_idx proto_idx); +void npf_session_tcp_state_change(npf_session_t *se, npf_state_t *nst, + enum tcp_session_state old_state, + enum tcp_session_state new_state); void npf_state_set_icmp_strict(bool value); @@ -231,15 +192,37 @@ void npf_state_set_icmp_strict(bool value); void npf_state_tcp_init(void); /* - * npf_state_tcp returns either: + * npf_state_tcp: inspect TCP segment, determine whether it belongs to + * the connection and track its state. + * + * Returns either: * 1. the new TCP state, - * 2. NPF_TCPS_OK, if no state change is required, or - * 3. NPF_TCPS_ERR if the packet should be discarded + * 2. the old state, if no state change is required or if an error occurred. + * + * Any error is set in the '*error' parameter. If one is returned then the + * packet should be discarded */ -uint8_t npf_state_tcp(const npf_cache_t *npc, struct rte_mbuf *nbuf, - npf_state_t *nst, int di); -uint32_t npf_state_get_tcp_seq(int di, npf_state_t *nst); +enum tcp_session_state npf_state_tcp(const npf_cache_t *npc, + struct rte_mbuf *nbuf, npf_state_t *nst, + const enum npf_flow_dir di, int *error); void npf_state_set_tcp_strict(bool value); +/* Pack non-TCP session state */ +void npf_state_pack_gen(npf_state_t *nst, struct npf_pack_session_state *pst); + +/* Pack TCP session state */ +void npf_state_pack_tcp(npf_state_t *nst, struct npf_pack_session_state *pst); + +/* Update non-TCP session state from a connsync restore or update */ +void npf_state_pack_update_gen(npf_state_t *nst, + struct npf_pack_session_state *pst, + enum npf_proto_idx proto_idx, + bool *state_changed); + +/* Update TCP session state from a connsync restore or update */ +void npf_state_pack_update_tcp(npf_state_t *nst, + struct npf_pack_session_state *pst, + bool *state_changed); + #endif /* NPF_STATE_H */ diff --git a/src/npf/npf_state_tcp.c b/src/npf/npf_state_tcp.c index 3f7b30f3..bdd70645 100644 --- a/src/npf/npf_state_tcp.c +++ b/src/npf/npf_state_tcp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. */ @@ -50,6 +50,7 @@ #include #include "npf/npf_cache.h" +#include "npf/npf_rc.h" #include "npf/npf_state.h" struct rte_mbuf; @@ -86,10 +87,10 @@ static inline enum npf_tcpfc npf_tcpfl2case(const uint8_t tcpfl) enum npf_tcpfc c; u_int i; - assert(TH_FIN == 0x01); - assert(TH_SYN == 0x02); - assert(TH_ACK == 0x10); - assert(TH_RST == 0x04); + static_assert(TH_FIN == 0x01, "tcp flag has wrong value"); + static_assert(TH_SYN == 0x02, "tcp flag has wrong value"); + static_assert(TH_ACK == 0x10, "tcp flag has wrong value"); + static_assert(TH_RST == 0x04, "tcp flag has wrong value"); /* * Flags are shifted to use four least significant bits, thus each @@ -128,14 +129,10 @@ static inline enum npf_tcpfc npf_tcpfl2case(const uint8_t tcpfl) #define sS2 NPF_TCPS_SIMSYN_SENT /*TCP_CONNTRACK_SYN_SENT2*/ /* - * sIV and sIG are only used as the values stored in npf_tcp_strict_fsm + * Lookup table for TCP strict. Used to determine if the flagcase is valid + * for a given packet direction and the current session . */ -#define sIG 0 /* Ignore */ -#define sIV NPF_TCP_NSTATES /* Invalid */ - - - -static uint8_t npf_tcp_strict_fsm[2][TCPFC_COUNT][NPF_TCP_NSTATES]; +static bool npf_tcp_strict_is_valid[NPF_FLOW_SZ][TCPFC_COUNT][NPF_TCP_NSTATES]; /* * NPF transition table of a tracked TCP connection. @@ -146,7 +143,8 @@ static uint8_t npf_tcp_strict_fsm[2][TCPFC_COUNT][NPF_TCP_NSTATES]; * * Note that this state is different from the state in each end (host). */ -static uint8_t npf_tcp_fsm[NPF_TCP_NSTATES][2][TCPFC_COUNT] = { +static enum tcp_session_state +npf_tcp_fsm[NPF_TCP_NSTATES][NPF_FLOW_SZ][TCPFC_COUNT] = { [NPF_TCPS_NONE] = { [NPF_FLOW_FORW] = { /* Handshake (1): initial SYN. */ @@ -154,7 +152,7 @@ static uint8_t npf_tcp_fsm[NPF_TCP_NSTATES][2][TCPFC_COUNT] = { /* We have missed some of all of the the handshake */ [TCPFC_ACK] = NPF_TCPS_ESTABLISHED, [TCPFC_SYNACK] = NPF_TCPS_SYN_RECEIVED, - [TCPFC_INVALID] = NPF_TCPS_ERR, + [TCPFC_INVALID] = NPF_TCPS_NONE, [TCPFC_FIN] = NPF_TCPS_FIN_SENT, [TCPFC_RST] = NPF_TCPS_CLOSED, }, @@ -162,7 +160,7 @@ static uint8_t npf_tcp_fsm[NPF_TCP_NSTATES][2][TCPFC_COUNT] = { [NPF_TCPS_SYN_SENT] = { [NPF_FLOW_FORW] = { /* SYN may be retransmitted. */ - [TCPFC_SYN] = NPF_TCPS_OK, + [TCPFC_SYN] = NPF_TCPS_NONE, [TCPFC_RST] = NPF_TCPS_RST_RECEIVED, }, [NPF_FLOW_BACK] = { @@ -176,14 +174,14 @@ static uint8_t npf_tcp_fsm[NPF_TCP_NSTATES][2][TCPFC_COUNT] = { [NPF_TCPS_SIMSYN_SENT] = { [NPF_FLOW_FORW] = { /* Original SYN re-transmission. */ - [TCPFC_SYN] = NPF_TCPS_OK, + [TCPFC_SYN] = NPF_TCPS_NONE, /* SYN-ACK response to simultaneous SYN. */ [TCPFC_SYNACK] = NPF_TCPS_SYN_RECEIVED, [TCPFC_RST] = NPF_TCPS_RST_RECEIVED, }, [NPF_FLOW_BACK] = { /* Simultaneous SYN re-transmission.*/ - [TCPFC_SYN] = NPF_TCPS_OK, + [TCPFC_SYN] = NPF_TCPS_NONE, /* SYN-ACK response to original SYN. */ [TCPFC_SYNACK] = NPF_TCPS_SYN_RECEIVED, /* FIN may occur early. */ @@ -201,9 +199,9 @@ static uint8_t npf_tcp_fsm[NPF_TCP_NSTATES][2][TCPFC_COUNT] = { }, [NPF_FLOW_BACK] = { /* SYN-ACK may be retransmitted. */ - [TCPFC_SYNACK] = NPF_TCPS_OK, + [TCPFC_SYNACK] = NPF_TCPS_NONE, /* XXX: ACK of late SYN in simultaneous case? */ - [TCPFC_ACK] = NPF_TCPS_OK, + [TCPFC_ACK] = NPF_TCPS_NONE, /* FIN may occur early. */ [TCPFC_FIN] = NPF_TCPS_FIN_RECEIVED, [TCPFC_RST] = NPF_TCPS_RST_RECEIVED, @@ -215,13 +213,13 @@ static uint8_t npf_tcp_fsm[NPF_TCP_NSTATES][2][TCPFC_COUNT] = { * FIN packets may have ACK set. */ [NPF_FLOW_FORW] = { - [TCPFC_ACK] = NPF_TCPS_OK, + [TCPFC_ACK] = NPF_TCPS_NONE, /* FIN by the sender. */ [TCPFC_FIN] = NPF_TCPS_FIN_SENT, [TCPFC_RST] = NPF_TCPS_RST_RECEIVED, }, [NPF_FLOW_BACK] = { - [TCPFC_ACK] = NPF_TCPS_OK, + [TCPFC_ACK] = NPF_TCPS_NONE, /* FIN by the receiver. */ [TCPFC_FIN] = NPF_TCPS_FIN_RECEIVED, [TCPFC_RST] = NPF_TCPS_RST_RECEIVED, @@ -230,8 +228,8 @@ static uint8_t npf_tcp_fsm[NPF_TCP_NSTATES][2][TCPFC_COUNT] = { [NPF_TCPS_FIN_SENT] = { [NPF_FLOW_FORW] = { /* FIN may be re-transmitted. Late ACK as well. */ - [TCPFC_ACK] = NPF_TCPS_OK, - [TCPFC_FIN] = NPF_TCPS_OK, + [TCPFC_ACK] = NPF_TCPS_NONE, + [TCPFC_FIN] = NPF_TCPS_NONE, [TCPFC_RST] = NPF_TCPS_RST_RECEIVED, }, [NPF_FLOW_BACK] = { @@ -252,20 +250,20 @@ static uint8_t npf_tcp_fsm[NPF_TCP_NSTATES][2][TCPFC_COUNT] = { [TCPFC_RST] = NPF_TCPS_RST_RECEIVED, }, [NPF_FLOW_BACK] = { - [TCPFC_ACK] = NPF_TCPS_OK, - [TCPFC_FIN] = NPF_TCPS_OK, + [TCPFC_ACK] = NPF_TCPS_NONE, + [TCPFC_FIN] = NPF_TCPS_NONE, [TCPFC_RST] = NPF_TCPS_RST_RECEIVED, }, }, [NPF_TCPS_CLOSE_WAIT] = { /* Sender has sent the FIN and closed its end. */ [NPF_FLOW_FORW] = { - [TCPFC_ACK] = NPF_TCPS_OK, + [TCPFC_ACK] = NPF_TCPS_NONE, [TCPFC_FIN] = NPF_TCPS_LAST_ACK, [TCPFC_RST] = NPF_TCPS_RST_RECEIVED, }, [NPF_FLOW_BACK] = { - [TCPFC_ACK] = NPF_TCPS_OK, + [TCPFC_ACK] = NPF_TCPS_NONE, [TCPFC_FIN] = NPF_TCPS_LAST_ACK, [TCPFC_RST] = NPF_TCPS_RST_RECEIVED, }, @@ -273,12 +271,12 @@ static uint8_t npf_tcp_fsm[NPF_TCP_NSTATES][2][TCPFC_COUNT] = { [NPF_TCPS_FIN_WAIT] = { /* Receiver has closed its end. */ [NPF_FLOW_FORW] = { - [TCPFC_ACK] = NPF_TCPS_OK, + [TCPFC_ACK] = NPF_TCPS_NONE, [TCPFC_FIN] = NPF_TCPS_LAST_ACK, [TCPFC_RST] = NPF_TCPS_RST_RECEIVED, }, [NPF_FLOW_BACK] = { - [TCPFC_ACK] = NPF_TCPS_OK, + [TCPFC_ACK] = NPF_TCPS_NONE, [TCPFC_FIN] = NPF_TCPS_LAST_ACK, [TCPFC_RST] = NPF_TCPS_RST_RECEIVED, }, @@ -310,27 +308,27 @@ static uint8_t npf_tcp_fsm[NPF_TCP_NSTATES][2][TCPFC_COUNT] = { [NPF_FLOW_FORW] = { [TCPFC_SYN] = NPF_TCPS_SYN_SENT, /* Prevent TIME-WAIT assassination (RFC 1337).*/ - [TCPFC_RST] = NPF_TCPS_OK, + [TCPFC_RST] = NPF_TCPS_NONE, }, [NPF_FLOW_BACK] = { /* Prevent TIME-WAIT assassination (RFC 1337).*/ - [TCPFC_RST] = NPF_TCPS_OK, + [TCPFC_RST] = NPF_TCPS_NONE, }, }, }; /* - * Change the uninitialized state machine values from 0 (NPF_TCPS_NONE) to - * NPF_TCPS_OK, which is effectively a NOP, i.e. no state transition will - * occur. The prevents unexpected flags and state combinations from forcing - * the session to CLOSED state. + * Change the uninitialized state machine values from 0 (NPF_TCPS_NONE) to the + * same state value, i.e. no state transition will occur. The prevents + * unexpected flags and state combinations from forcing the session to CLOSED + * state. */ static void npf_state_tcp_fsm_init(void) { - uint8_t state; + enum tcp_session_state state; uint di, fc; - assert(NPF_TCPS_NONE == 0); + static_assert(NPF_TCPS_NONE == 0, "npf tcps none should be 0"); for (state = NPF_TCPS_FIRST; state <= NPF_TCPS_LAST; state++) { /* Forwards */ @@ -338,80 +336,71 @@ static void npf_state_tcp_fsm_init(void) for (fc = 0; fc < TCPFC_COUNT; fc++) if (npf_tcp_fsm[state][di][fc] == NPF_TCPS_NONE) - npf_tcp_fsm[state][di][fc] = NPF_TCPS_OK; + npf_tcp_fsm[state][di][fc] = state; /* Back */ di = NPF_FLOW_BACK; for (fc = 0; fc < TCPFC_COUNT; fc++) if (npf_tcp_fsm[state][di][fc] == NPF_TCPS_NONE) - npf_tcp_fsm[state][di][fc] = NPF_TCPS_OK; + npf_tcp_fsm[state][di][fc] = state; } } -void -npf_state_tcp_init(void) +void npf_state_tcp_init(void) { - uint8_t state; + enum tcp_session_state state; - /*compared to: nf_conntrack_proto_tcp.c */ - - /* sIG is 0 */ - memset(npf_tcp_strict_fsm, 0, sizeof(npf_tcp_strict_fsm)); - /* for receiving initial tcp syn packet */ + memset(npf_tcp_strict_is_valid, true, sizeof(npf_tcp_strict_is_valid)); /* for receiving initial tcp syn ack packet */ - npf_tcp_strict_fsm[NPF_FLOW_FORW][TCPFC_SYNACK][sNO] = sIV; - npf_tcp_strict_fsm[NPF_FLOW_FORW][TCPFC_SYNACK][sSS] = sIV; - npf_tcp_strict_fsm[NPF_FLOW_FORW][TCPFC_SYNACK][sES] = sIV; - npf_tcp_strict_fsm[NPF_FLOW_FORW][TCPFC_SYNACK][sFW] = sIV; - npf_tcp_strict_fsm[NPF_FLOW_FORW][TCPFC_SYNACK][sCW] = sIV; - npf_tcp_strict_fsm[NPF_FLOW_FORW][TCPFC_SYNACK][sLA] = sIV; - npf_tcp_strict_fsm[NPF_FLOW_FORW][TCPFC_SYNACK][sTW] = sIV; - npf_tcp_strict_fsm[NPF_FLOW_FORW][TCPFC_SYNACK][sCL] = sIV; + npf_tcp_strict_is_valid[NPF_FLOW_FORW][TCPFC_SYNACK][sNO] = false; + npf_tcp_strict_is_valid[NPF_FLOW_FORW][TCPFC_SYNACK][sSS] = false; + npf_tcp_strict_is_valid[NPF_FLOW_FORW][TCPFC_SYNACK][sES] = false; + npf_tcp_strict_is_valid[NPF_FLOW_FORW][TCPFC_SYNACK][sFW] = false; + npf_tcp_strict_is_valid[NPF_FLOW_FORW][TCPFC_SYNACK][sCW] = false; + npf_tcp_strict_is_valid[NPF_FLOW_FORW][TCPFC_SYNACK][sLA] = false; + npf_tcp_strict_is_valid[NPF_FLOW_FORW][TCPFC_SYNACK][sTW] = false; + npf_tcp_strict_is_valid[NPF_FLOW_FORW][TCPFC_SYNACK][sCL] = false; /* for receiving initial tcp FIN packet */ - npf_tcp_strict_fsm[NPF_FLOW_FORW][TCPFC_FIN][sNO] = sIV; - npf_tcp_strict_fsm[NPF_FLOW_FORW][TCPFC_FIN][sSS] = sIV; - npf_tcp_strict_fsm[NPF_FLOW_FORW][TCPFC_FIN][sS2] = sIV; + npf_tcp_strict_is_valid[NPF_FLOW_FORW][TCPFC_FIN][sNO] = false; + npf_tcp_strict_is_valid[NPF_FLOW_FORW][TCPFC_FIN][sSS] = false; + npf_tcp_strict_is_valid[NPF_FLOW_FORW][TCPFC_FIN][sS2] = false; /* ack */ - npf_tcp_strict_fsm[NPF_FLOW_FORW][TCPFC_ACK][sNO] = sIV; - npf_tcp_strict_fsm[NPF_FLOW_FORW][TCPFC_ACK][sSS] = sIV; - npf_tcp_strict_fsm[NPF_FLOW_FORW][TCPFC_ACK][sS2] = sIV; + npf_tcp_strict_is_valid[NPF_FLOW_FORW][TCPFC_ACK][sNO] = false; + npf_tcp_strict_is_valid[NPF_FLOW_FORW][TCPFC_ACK][sSS] = false; + npf_tcp_strict_is_valid[NPF_FLOW_FORW][TCPFC_ACK][sS2] = false; - /* rst */ /* invalid flag combinations */ - for (state = NPF_TCPS_FIRST; state <= NPF_TCPS_LAST; state++) - npf_tcp_strict_fsm[NPF_FLOW_FORW][TCPFC_INVALID][state] = sIV; - - /*reply*/ + for (state = NPF_TCPS_FIRST; state <= NPF_TCPS_LAST; state++) { + npf_tcp_strict_is_valid[NPF_FLOW_FORW][TCPFC_INVALID][state] = + false; + npf_tcp_strict_is_valid[NPF_FLOW_BACK][TCPFC_INVALID][state] = + false; + } /*syn*/ - npf_tcp_strict_fsm[NPF_FLOW_BACK][TCPFC_SYN][sNO] = sIV; - npf_tcp_strict_fsm[NPF_FLOW_BACK][TCPFC_SYN][sSR] = sIV; - npf_tcp_strict_fsm[NPF_FLOW_BACK][TCPFC_SYN][sES] = sIV; - npf_tcp_strict_fsm[NPF_FLOW_BACK][TCPFC_SYN][sFW] = sIV; - npf_tcp_strict_fsm[NPF_FLOW_BACK][TCPFC_SYN][sCW] = sIV; - npf_tcp_strict_fsm[NPF_FLOW_BACK][TCPFC_SYN][sLA] = sIV; - npf_tcp_strict_fsm[NPF_FLOW_BACK][TCPFC_SYN][sTW] = sIV; - npf_tcp_strict_fsm[NPF_FLOW_BACK][TCPFC_SYN][sCL] = sIV; + npf_tcp_strict_is_valid[NPF_FLOW_BACK][TCPFC_SYN][sNO] = false; + npf_tcp_strict_is_valid[NPF_FLOW_BACK][TCPFC_SYN][sSR] = false; + npf_tcp_strict_is_valid[NPF_FLOW_BACK][TCPFC_SYN][sES] = false; + npf_tcp_strict_is_valid[NPF_FLOW_BACK][TCPFC_SYN][sFW] = false; + npf_tcp_strict_is_valid[NPF_FLOW_BACK][TCPFC_SYN][sCW] = false; + npf_tcp_strict_is_valid[NPF_FLOW_BACK][TCPFC_SYN][sLA] = false; + npf_tcp_strict_is_valid[NPF_FLOW_BACK][TCPFC_SYN][sTW] = false; + npf_tcp_strict_is_valid[NPF_FLOW_BACK][TCPFC_SYN][sCL] = false; /*synack*/ - npf_tcp_strict_fsm[NPF_FLOW_BACK][TCPFC_SYNACK][sNO] = sIV; + npf_tcp_strict_is_valid[NPF_FLOW_BACK][TCPFC_SYNACK][sNO] = false; /*fin*/ - npf_tcp_strict_fsm[NPF_FLOW_BACK][TCPFC_FIN][sNO] = sIV; - npf_tcp_strict_fsm[NPF_FLOW_BACK][TCPFC_FIN][sSS] = sIV; - npf_tcp_strict_fsm[NPF_FLOW_BACK][TCPFC_FIN][sS2] = sIV; + npf_tcp_strict_is_valid[NPF_FLOW_BACK][TCPFC_FIN][sNO] = false; + npf_tcp_strict_is_valid[NPF_FLOW_BACK][TCPFC_FIN][sSS] = false; + npf_tcp_strict_is_valid[NPF_FLOW_BACK][TCPFC_FIN][sS2] = false; /* ack */ - npf_tcp_strict_fsm[NPF_FLOW_BACK][TCPFC_ACK][sNO] = sIV; - - /* rst */ - /* invalid flag combinations */ - for (state = NPF_TCPS_FIRST; state <= NPF_TCPS_LAST; state++) - npf_tcp_strict_fsm[NPF_FLOW_BACK][TCPFC_INVALID][state] = sIV; + npf_tcp_strict_is_valid[NPF_FLOW_BACK][TCPFC_ACK][sNO] = false; npf_state_tcp_fsm_init(); } @@ -422,17 +411,16 @@ npf_state_tcp_init(void) * and thus part of the connection we are tracking. */ static bool -npf_tcp_inwindow(const npf_cache_t *npc, struct rte_mbuf *nbuf, npf_state_t *nst, - const int di) +npf_tcp_inwindow(const npf_cache_t *npc, struct rte_mbuf *nbuf, + npf_state_t *nst, const enum npf_flow_dir di) { const struct tcphdr * const th = &npc->npc_l4.tcp; const uint8_t tcpfl = th->th_flags; - npf_tcpstate_t *fstate, *tstate; + struct npf_tcp_window *fstate, *tstate; int tcpdlen, ackskew; tcp_seq seq, ack, end; uint32_t win; - assert(di == NPF_FLOW_FORW || di == NPF_FLOW_BACK); assert(npf_cache_ipproto(npc) == IPPROTO_TCP); /* @@ -447,7 +435,8 @@ npf_tcp_inwindow(const npf_cache_t *npc, struct rte_mbuf *nbuf, npf_state_t *nst * III) ACK <= MAX { RCV.SEQ + RCV.LEN } * IV) ACK >= MAX { RCV.SEQ + RCV.LEN } - MAXACKWIN * - * Let these members of npf_tcpstate_t be the maximum seen values of: + * Let these members of struct npf_tcp_window be the maximum seen + * values of: * nst_end - SEQ + LEN * nst_maxend - ACK + MAX(WIN, 1) * nst_maxwin - MAX(WIN, 1) @@ -462,8 +451,8 @@ npf_tcp_inwindow(const npf_cache_t *npc, struct rte_mbuf *nbuf, npf_state_t *nst end++; } - fstate = &nst->nst_tcpst[di]; - tstate = &nst->nst_tcpst[!di]; + fstate = &nst->nst_tcp_win[di]; + tstate = &nst->nst_tcp_win[!di]; win = win ? (win << fstate->nst_wscale) : 1; /* @@ -530,7 +519,7 @@ npf_tcp_inwindow(const npf_cache_t *npc, struct rte_mbuf *nbuf, npf_state_t *nst if (unlikely(tcpfl & TH_RST)) { /* RST to the initial SYN may have zero SEQ - fix it up. */ - if (seq == 0 && nst->nst_state == NPF_TCPS_SYN_SENT) { + if (seq == 0 && nst->nst_tcp_state == NPF_TCPS_SYN_SENT) { end = fstate->nst_end; seq = end; } @@ -588,53 +577,62 @@ npf_tcp_inwindow(const npf_cache_t *npc, struct rte_mbuf *nbuf, npf_state_t *nst return true; } -/* - * Return a TCP sequence number to be used for spoofed TCP resets - */ -uint32_t -npf_state_get_tcp_seq(int di, npf_state_t *nst) -{ - return nst->nst_tcpst[di].nst_end; -} - /* * npf_state_tcp: inspect TCP segment, determine whether it belongs to - * the connection and track its state. Returns either: + * the connection and track its state. + * + * Returns either: * 1. the new TCP state, - * 2. NPF_TCPS_OK, if no state change is required, or - * 3. NPF_TCPS_ERR if the packet should be discarded + * 2. the old state, if no state change is required or if an error occurred. + * + * Any error is set in the '*error' parameter. If one is returned then the + * packet should be discarded */ -uint8_t +enum tcp_session_state npf_state_tcp(const npf_cache_t *npc, struct rte_mbuf *nbuf, npf_state_t *nst, - int di) + const enum npf_flow_dir di, int *error) { const struct tcphdr * const th = &npc->npc_l4.tcp; const uint8_t tcpfl = th->th_flags; - const uint8_t state = nst->nst_state; - uint8_t nstate; + const enum tcp_session_state old_state = nst->nst_tcp_state; + enum tcp_session_state new_state; const enum npf_tcpfc flagcase = npf_tcpfl2case(tcpfl); + assert(di <= NPF_FLOW_LAST); + /* Look for a transition to a new state. */ - nstate = npf_tcp_fsm[state][di][flagcase]; + new_state = npf_tcp_fsm[old_state][di][flagcase]; - /* only filter on invalid state transitions */ - /* let npf actually handle the state transitions */ + /* + * Only filter on invalid state transitions. Let npf actually handle + * the state transitions. + */ if (npf_state_tcp_strict) { /* Only a SYN or RST can create a session. */ - if (state == NPF_TCPS_NONE && + if (old_state == NPF_TCPS_NONE && (tcpfl & CORE_TCP_FLAGS) != TH_SYN && - (tcpfl & TH_RST) == 0) - return NPF_TCPS_ERR; + (tcpfl & TH_RST) == 0) { + *error = -NPF_RC_TCP_SYN; + return old_state; + } - if (npf_tcp_strict_fsm[di][flagcase][state] == sIV) - return NPF_TCPS_ERR; + /* + * Is the flagcase valid for the packet direction and current + * state? + */ + if (!npf_tcp_strict_is_valid[di][flagcase][old_state]) { + *error = -NPF_RC_TCP_STATE; + return old_state; + } } /* Determine whether TCP packet really belongs to this connection. */ - if (!npf_tcp_inwindow(npc, nbuf, nst, di)) - return NPF_TCPS_ERR; + if (!npf_tcp_inwindow(npc, nbuf, nst, di)) { + *error = -NPF_RC_TCP_WIN; + return old_state; + } - return nstate; + return new_state; } void npf_state_set_tcp_strict(bool value) diff --git a/src/npf/npf_tblset.c b/src/npf/npf_tblset.c index da5d8197..db6ee4e4 100644 --- a/src/npf/npf_tblset.c +++ b/src/npf/npf_tblset.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -21,7 +21,7 @@ * Managed table service. * * Tables entries are created and deleted (using a name) from config on the - * master thread, and may be looked-up from the dataplane forwarding threads + * main thread, and may be looked-up from the dataplane forwarding threads * using a tableset handle and a table ID. * * Table entries are created via a named reference in config (e.g. firewall @@ -40,7 +40,7 @@ * * The user is responsible for rcu-assigning their "struct npf_tbl" pointer. * - * Changes to the hash table should only take place from the master thread. + * Changes to the hash table should only take place from the main thread. * * A table may be re-sized if it reaches its maximum size and the * TS_TBL_RESIZE flag is set. @@ -60,7 +60,9 @@ struct npf_tbl_entry { char *te_name; /* entry name */ struct rcu_head te_rcu; /* rcu for freeing an entry */ struct npf_tbl *te_tbl; /* back pointer to table */ - uint te_id; /* ID/index */ + uint32_t te_id; /* ID/index */ + rte_atomic32_t te_refcnt; + npf_tbl_entry_free_fn *te_free_fn; #ifndef NDEBUG uint32_t te_memguard; /* used to verify te_data ptr */ #endif @@ -79,6 +81,7 @@ struct npf_tbl { struct rcu_head nt_rcu; /* rcu for freeing struct npf_tbl */ uint8_t nt_flags; uint32_t nt_id; /* user table id */ + npf_tbl_entry_free_fn *nt_entry_free_fn; /* Start search for next available table index at nt_hint */ uint nt_hint; /* used to find a free slot */ uint nt_entry_data_sz; /* size of te_data */ @@ -199,6 +202,12 @@ npf_tbl_create(uint32_t id, uint tbl_sz, uint tbl_sz_max, uint data_sz, return nt; } +void npf_tbl_set_entry_freefn(struct npf_tbl *nt, + npf_tbl_entry_free_fn *free_fn) +{ + nt->nt_entry_free_fn = free_fn; +} + static void npf_tbl_destroy_rcu(struct rcu_head *head) { @@ -210,6 +219,26 @@ npf_tbl_destroy_rcu(struct rcu_head *head) free(nt); } +/* + * Destroy all table entries + */ +static int npf_tbl_destroy_entries(struct npf_tbl *nt) +{ + struct npf_tbl_entry *te; + uint i; + int rc = 0; + + for (i = 0; i < nt->nt_sz; i++) { + te = nt->nt_table[i]; + if (te) { + rc = npf_tbl_entry_remove(nt, te->te_data); + if (rc) + return rc; + } + } + return 0; +} + /* * Destroy table. */ @@ -219,6 +248,9 @@ npf_tbl_destroy(struct npf_tbl *nt) if (!nt) return -EINVAL; + /* Delete and free all table entries */ + npf_tbl_destroy_entries(nt); + /* Table must be empty */ if (nt->nt_nentries != 0) return -EEXIST; @@ -240,16 +272,6 @@ npf_tbl_destroy(struct npf_tbl *nt) return 0; } -/* - * Get users table ID - */ -uint32_t npf_tbl_id(struct npf_tbl *nt) -{ - if (nt) - return nt->nt_id; - return 0; -} - /* * Number of entries in the table */ @@ -310,21 +332,26 @@ static int npf_tbl_resize(struct npf_tbl *nt) * created, and is set to the next slot when a slot filled. Except when a * slot is emptied, in which case 'hint' becomes the lower of the current * 'hint' and the newly emptied slot. + * + * Returns 0 for success, or less thanb 0 for error. */ static int -npf_tbl_entry_id_alloc(struct npf_tbl *nt, uint hint) +npf_tbl_entry_id_alloc(struct npf_tbl *nt, uint32_t hint, uint32_t *id) { - uint i, id = hint; + uint32_t i, tmp = hint; if (nt->nt_nentries >= nt->nt_sz) return -ENOSPC; for (i = 0; i < nt->nt_sz; i++) { - if (!nt->nt_table[id]) - return id; + if (!nt->nt_table[tmp]) { + /* Empty slot found */ + *id = tmp; + return 0; + } - if (++id >= nt->nt_sz) - id = 0; + if (++tmp >= nt->nt_sz) + tmp = 0; } /* should never get here if nt_nentries is accurate */ @@ -347,6 +374,8 @@ npf_tbl_entry_create(struct npf_tbl *nt, const char *name) return NULL; te->te_name = strdup(name); + rte_atomic32_set(&te->te_refcnt, 0); + te->te_free_fn = nt->nt_entry_free_fn; #ifndef NDEBUG te->te_memguard = TS_MEMGUARD; #endif @@ -360,8 +389,8 @@ npf_tbl_entry_create(struct npf_tbl *nt, const char *name) * * Path #1: * - * npf_tbl_entry_remove -> zhash_delete -> npf_tbl_hash_freefn -> call_rcu -> - * npf_tbl_entry_free_rcu -> _npf_tbl_entry_destroy + * npf_tbl_entry_remove -> zhash_delete -> npf_tbl_zhash_delete_cb + * -> call_rcu -> npf_tbl_entry_free_rcu -> _npf_tbl_entry_destroy * * Path #2: * @@ -374,6 +403,10 @@ _npf_tbl_entry_destroy(struct npf_tbl_entry *te) if (!te || te->te_tbl) return -EINVAL; + /* Let client cleanup its data first */ + if (te->te_free_fn) + (*te->te_free_fn)(te->te_data); + if (te->te_name) free(te->te_name); @@ -408,24 +441,65 @@ npf_tbl_entry_free_rcu(struct rcu_head *head) } /* - * Callback via the zhash_delete function. + * Take reference on table entry + */ +static struct npf_tbl_entry *_npf_tbl_entry_get(struct npf_tbl_entry *te) +{ + if (te) + rte_atomic32_inc(&te->te_refcnt); + return te; +} + +void *npf_tbl_entry_get(void *td) +{ + struct npf_tbl_entry *te; + + te = npf_tbl_data2entry(td); + if (!te) + return NULL; + + _npf_tbl_entry_get(te); + return td; +} + +/* + * Release reference on table entry */ -static void npf_tbl_hash_freefn(void *data) +static void _npf_tbl_entry_put(struct npf_tbl_entry *te) +{ + if (te && rte_atomic32_dec_and_test(&te->te_refcnt)) + call_rcu(&te->te_rcu, npf_tbl_entry_free_rcu); +} + +void npf_tbl_entry_put(void *td) +{ + struct npf_tbl_entry *te; + + te = npf_tbl_data2entry(td); + if (!te) + return; + _npf_tbl_entry_put(te); +} + +/* + * Callback from zhash_delete + */ +static void npf_tbl_zhash_delete_cb(void *data) { struct npf_tbl_entry *te = data; - /* Remove from tableset array after RCU grace period */ - call_rcu(&te->te_rcu, npf_tbl_entry_free_rcu); + _npf_tbl_entry_put(te); } /* * Insert an entry into a table */ -int -npf_tbl_entry_insert(struct npf_tbl *nt, void *td) +int npf_tbl_entry_insert(struct npf_tbl *nt, void *td, uint32_t *tid) { struct npf_tbl_entry *te; - int id, rc; + int rc; + + *tid = NPF_TBLID_NONE; if (!nt || (nt->nt_flags & TS_TBL_ACTIVE) == 0) return -EINVAL; @@ -438,36 +512,39 @@ npf_tbl_entry_insert(struct npf_tbl *nt, void *td) return -EEXIST; /* Get a free slot in the table */ - id = npf_tbl_entry_id_alloc(nt, nt->nt_hint); + rc = npf_tbl_entry_id_alloc(nt, nt->nt_hint, tid); /* Try and resize table if it is full */ - if (id == -ENOSPC) { + if (rc == -ENOSPC) { rc = npf_tbl_resize(nt); if (rc < 0) return rc; - id = npf_tbl_entry_id_alloc(nt, nt->nt_hint); + rc = npf_tbl_entry_id_alloc(nt, nt->nt_hint, tid); } - if (id < 0) - return id; + if (rc < 0) + return rc; /* Insert into hash table */ if (zhash_insert(nt->nt_hash, te->te_name, te) < 0) return -EEXIST; /* Insert into table array */ - te->te_id = id; + te->te_id = *tid; rcu_assign_pointer(nt->nt_table[te->te_id], te); nt->nt_nentries++; - nt->nt_hint = id + 1; + nt->nt_hint = *tid + 1; /* mark entry as being inserted into table */ te->te_tbl = nt; - /* Set hash table free function. */ - zhash_freefn(nt->nt_hash, te->te_name, npf_tbl_hash_freefn); + /* Set zhash_delete callback function. */ + zhash_freefn(nt->nt_hash, te->te_name, npf_tbl_zhash_delete_cb); + + /* Take reference on table entry */ + _npf_tbl_entry_get(te); - return id; + return 0; } /* @@ -500,7 +577,12 @@ npf_tbl_entry_remove(struct npf_tbl *nt, void *td) /* mark entry as being removed from table */ te->te_tbl = NULL; - /* Schedule the entry destruction via zhash free fn */ + /* + * Schedule the entry destruction via zhash free fn. + * + * This will call _npf_tbl_entry_put to release the reference we took + * when inserted. + */ zhash_delete(nt->nt_hash, te->te_name); return 0; @@ -517,6 +599,9 @@ npf_tbl_walk(struct npf_tbl *nt, npf_tbl_walk_cb *cb, void *ctx) uint i; int rc = 0; + if (!nt) + return -1; + for (i = 0; i < nt->nt_sz; i++) { te = nt->nt_table[i]; if (te) { @@ -531,8 +616,7 @@ npf_tbl_walk(struct npf_tbl *nt, npf_tbl_walk_cb *cb, void *ctx) /* * Lookup entry name is hash table, and return entry ID. */ -int -npf_tbl_name2id(struct npf_tbl *nt, const char *name) +uint32_t npf_tbl_name2id(struct npf_tbl *nt, const char *name) { struct npf_tbl_entry *te; @@ -540,7 +624,7 @@ npf_tbl_name2id(struct npf_tbl *nt, const char *name) if (te) return te->te_id; - return -ENOENT; + return NPF_TBLID_NONE; } /* diff --git a/src/npf/npf_tblset.h b/src/npf/npf_tblset.h index e17b8f37..833f6b02 100644 --- a/src/npf/npf_tblset.h +++ b/src/npf/npf_tblset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -9,7 +9,7 @@ * @brief API to managed table service * * Tables entries are created and deleted (using a name) from config on the - * master thread, and may be looked-up from the dataplane forwarding threads + * main thread, and may be looked-up from the dataplane forwarding threads * using a tableset handle and a table ID. * * Table entries are created via a named reference in config (e.g. firewall @@ -28,7 +28,7 @@ * * The user is responsible for rcu-assigning their "struct npf_tbl" pointer. * - * Changes to the hash table should only take place from the master thread. + * Changes to the hash table should only take place from the main thread. * * A table may be re-sized if it reaches its maximum size and the * TS_TBL_RESIZE flag is set. @@ -100,20 +100,6 @@ struct npf_tbl *npf_tbl_create(uint32_t id, uint tbl_sz, uint tbl_sz_max, */ int npf_tbl_destroy(struct npf_tbl *nt); -/** - * @brief Get table ID. - * - * Return the table ID value that the user passed to npf_tbl_create - * - * @param nt Table handle - * @return Table ID. - * - * Example: - * - * uint32_t tid = npf_tbl_id(foo_tbl); - */ -uint32_t npf_tbl_id(struct npf_tbl *nt); - /** * @brief Get number of entries in a table * @@ -126,6 +112,17 @@ uint32_t npf_tbl_id(struct npf_tbl *nt); */ uint npf_tbl_size(struct npf_tbl *nt); +/** + * @brief Set entry free function + * + * Free or tidy client data + */ +typedef void (npf_tbl_entry_free_fn)(void *data); + +void npf_tbl_set_entry_freefn(struct npf_tbl *nt, + npf_tbl_entry_free_fn *free_fn); + + /** * @brief Create a table entry * @@ -175,10 +172,11 @@ int npf_tbl_entry_destroy(void *td); * * @param nt Table handle * @param td Pointer to entry data object within table entry - * @return Table ID greater or equal to 0 if successful, less than 0 if - * unsuccessful + * @param tid Pointer to the table ID allocated by this function. + * Set to NPF_TBLID_NONE if unsuccessful. + * @return 0 if successful, less than 0 if unsuccessful */ -int npf_tbl_entry_insert(struct npf_tbl *nt, void *td); +int npf_tbl_entry_insert(struct npf_tbl *nt, void *td, uint32_t *tid); /** * @brief Remove entry from table and destroy it @@ -202,6 +200,16 @@ int npf_tbl_entry_insert(struct npf_tbl *nt, void *td); */ int npf_tbl_entry_remove(struct npf_tbl *nt, void *td); +/** + * @brief Take reference on table entry + */ +void *npf_tbl_entry_get(void *td); + +/** + * @brief Release reference on table entry + */ +void npf_tbl_entry_put(void *td); + /** * @brief Table walk callback function * @@ -246,9 +254,9 @@ int npf_tbl_walk(struct npf_tbl *nt, npf_tbl_walk_cb *cb, void *ctx); * * @param nt Table handle * @param name Table entry name - * @return Table ID or -ENOENT if not found + * @return Table ID or NPF_TBLID_NONE if not found */ -int npf_tbl_name2id(struct npf_tbl *nt, const char *name); +uint32_t npf_tbl_name2id(struct npf_tbl *nt, const char *name); /** * @brief Get the table entry name for a given table entry ID @@ -281,4 +289,17 @@ void *npf_tbl_name_lookup(struct npf_tbl *nt, const char *name); */ void *npf_tbl_id_lookup(struct npf_tbl *nt, uint id); +/** + * @brief Is this table ID valid? + * + * May be called from a forwarding thread. + * + * @param id Table entry ID + * @return true if valid, else false + */ +static ALWAYS_INLINE bool npf_tbl_id_is_valid(uint id) +{ + return id != NPF_TBLID_NONE; +} + #endif /* NPF_TBLSET_H */ diff --git a/src/npf/npf_timeouts.c b/src/npf/npf_timeouts.c index 49838674..4860c4c3 100644 --- a/src/npf/npf_timeouts.c +++ b/src/npf/npf_timeouts.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -16,13 +16,22 @@ #include "urcu.h" #include "vplane_log.h" +/* Default timeout for new sessions */ +#define NPF_NEW_SESS_TIMEOUT 30 + static void timeout_init(struct npf_timeout *to) { + enum npf_proto_idx proto; + to->to_set_count = 0; + + /* + * TCP session state timeouts + */ to->to_tcp[NPF_TCPS_NONE] = 0; /* Unsynchronised states. */ - to->to_tcp[NPF_TCPS_SYN_SENT] = 30; - to->to_tcp[NPF_TCPS_SIMSYN_SENT] = 30; + to->to_tcp[NPF_TCPS_SYN_SENT] = NPF_NEW_SESS_TIMEOUT; + to->to_tcp[NPF_TCPS_SIMSYN_SENT] = NPF_NEW_SESS_TIMEOUT; to->to_tcp[NPF_TCPS_SYN_RECEIVED] = 60; /* Established: 24 hours. */ to->to_tcp[NPF_TCPS_ESTABLISHED] = 60 * 60 * 24; @@ -39,20 +48,18 @@ static void timeout_init(struct npf_timeout *to) to->to_tcp[NPF_TCPS_RST_RECEIVED] = 10; to->to_tcp[NPF_TCPS_CLOSED] = 0; - to->to[NPF_PROTO_IDX_UDP][NPF_ANY_SESSION_NONE] = 0; - to->to[NPF_PROTO_IDX_UDP][NPF_ANY_SESSION_NEW] = 30; - to->to[NPF_PROTO_IDX_UDP][NPF_ANY_SESSION_ESTABLISHED] = 60; - to->to[NPF_PROTO_IDX_UDP][NPF_ANY_SESSION_CLOSED] = 0; - - to->to[NPF_PROTO_IDX_ICMP][NPF_ANY_SESSION_NONE] = 0; - to->to[NPF_PROTO_IDX_ICMP][NPF_ANY_SESSION_NEW] = 30; - to->to[NPF_PROTO_IDX_ICMP][NPF_ANY_SESSION_ESTABLISHED] = 60; - to->to[NPF_PROTO_IDX_ICMP][NPF_ANY_SESSION_CLOSED] = 0; - - to->to[NPF_PROTO_IDX_OTHER][NPF_ANY_SESSION_NONE] = 0; - to->to[NPF_PROTO_IDX_OTHER][NPF_ANY_SESSION_NEW] = 30; - to->to[NPF_PROTO_IDX_OTHER][NPF_ANY_SESSION_ESTABLISHED] = 60; - to->to[NPF_PROTO_IDX_OTHER][NPF_ANY_SESSION_CLOSED] = 0; + /* + * Non-TCP session state timeouts + */ + for (proto = NPF_PROTO_IDX_FIRST; proto <= NPF_PROTO_IDX_LAST; + proto++) { + if (proto == NPF_PROTO_IDX_TCP) + continue; + to->to[proto][SESSION_STATE_NONE] = 0; + to->to[proto][SESSION_STATE_NEW] = NPF_NEW_SESS_TIMEOUT; + to->to[proto][SESSION_STATE_ESTABLISHED] = 60; + to->to[proto][SESSION_STATE_CLOSED] = 0; + } } /* Take reference on timeout structure */ @@ -70,58 +77,76 @@ void npf_timeout_ref_put(struct npf_timeout *to) free(to); } -/* Set a state timeout */ -int npf_timeout_set(vrfid_t vrfid, enum npf_timeout_action action, - uint8_t proto_idx, uint8_t state, uint32_t tout) +/* + * Set a state timeout for sessions other than TCP + */ +int npf_gen_timeout_set(struct npf_timeout *to, enum npf_proto_idx proto_idx, + enum dp_session_state state, uint32_t tout) { - struct npf_timeout *to; - struct vrf *vrf; + if (!to || state == SESSION_STATE_NONE || state > SESSION_STATE_LAST) + return -1; - /* - * We can race with VRF creation, so manage VRF reference counts - * to maintain state - */ - vrf = vrf_find_or_create(vrfid); - if (!vrf) - return -EINVAL; - to = vrf_get_npf_timeout_rcu(vrfid); - if (!to) - return -EINVAL; - - - /* Manage ref count */ - switch (action) { - case TIMEOUT_SET: - vrf_find_or_create(vrfid); /* Inc on set */ - to->to_set_count++; - break; - case TIMEOUT_DEL: - vrf_delete_by_ptr(vrf); /* Dec on reset */ - to->to_set_count--; - break; - }; - - if (proto_idx == NPF_PROTO_IDX_TCP) - to->to_tcp[state] = tout; - else - to->to[proto_idx][state] = tout; - - /* Always release initial reference */ - vrf_delete_by_ptr(vrf); + to->to[proto_idx][state] = tout; + return 0; +} + +/* + * Set a state timeout for TCP sessions + */ +int npf_tcp_timeout_set(struct npf_timeout *to, enum tcp_session_state state, + uint32_t tout) +{ + if (!to || state == NPF_TCPS_NONE || state > NPF_TCPS_LAST) + return -1; + + to->to_tcp[state] = tout; return 0; } -/* Get a state timeout */ -uint32_t npf_timeout_get(const npf_state_t *nst, uint8_t proto_idx, - uint32_t custom) +/* + * Get session timeout value for sessions other than TCP. + * + * The state-dependent timeout value is overridden with a custom timeout if: + * + * a) the session tuple matched a configured custom timeout at the time + * the session was created, and + * b) the session state is steady (i.e. is in 'established' state). + */ +uint32_t npf_gen_timeout_get(const npf_state_t *nst, + enum dp_session_state state, + enum npf_proto_idx proto_idx, uint32_t custom) { - if (npf_state_is_steady(nst, proto_idx) && custom) + /* Custom timeout only applies to Established sessions */ + if (custom && state == SESSION_STATE_ESTABLISHED) return custom; - if (proto_idx == NPF_PROTO_IDX_TCP) - return nst->nst_to->to_tcp[nst->nst_state]; + const struct npf_timeout *to; + + to = rcu_dereference(nst->nst_to); + if (unlikely(!to)) + return NPF_NEW_SESS_TIMEOUT; + + return to->to[proto_idx][state]; +} + +/* + * Get session state timeout for TCP sessions + */ +uint32_t npf_tcp_timeout_get(const npf_state_t *nst, + enum tcp_session_state tcp_state, + uint32_t custom) +{ + /* Custom timeout only applies to Established sessions */ + if (custom && tcp_state == NPF_TCPS_ESTABLISHED) + return custom; + + const struct npf_timeout *to; + + to = rcu_dereference(nst->nst_to); + if (unlikely(!to)) + return NPF_NEW_SESS_TIMEOUT; - return nst->nst_to->to[proto_idx][nst->nst_state]; + return to->to_tcp[tcp_state]; } static void timeout_reset(struct vrf *vrf, struct npf_timeout *to) @@ -154,7 +179,7 @@ struct npf_timeout *npf_timeout_create_instance(void) { struct npf_timeout *to; - to = malloc_aligned(sizeof(struct npf_timeout)); + to = zmalloc_aligned(sizeof(struct npf_timeout)); if (to) { timeout_init(to); rte_atomic32_init(&to->to_refcnt); diff --git a/src/npf/npf_timeouts.h b/src/npf/npf_timeouts.h index 2a7a3d63..72fd9126 100644 --- a/src/npf/npf_timeouts.h +++ b/src/npf/npf_timeouts.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -27,7 +27,7 @@ struct npf_timeout { rte_atomic32_t to_refcnt; uint32_t to_set_count; uint32_t to_tcp[NPF_TCP_NSTATES]; - uint32_t to[NPF_PROTO_IDX_COUNT][NPF_ANY_SESSION_NSTATES]; + uint32_t to[NPF_PROTO_IDX_COUNT][SESSION_STATE_SIZE]; }; enum npf_timeout_action { @@ -36,10 +36,15 @@ enum npf_timeout_action { }; /* Protos */ -int npf_timeout_set(vrfid_t vrfid, enum npf_timeout_action action, - uint8_t proto_idx, uint8_t state, uint32_t tout); -uint32_t npf_timeout_get(const npf_state_t *nst, uint8_t proto_idx, - uint32_t custom); +int npf_gen_timeout_set(struct npf_timeout *to, enum npf_proto_idx proto_idx, + enum dp_session_state state, uint32_t tout); +int npf_tcp_timeout_set(struct npf_timeout *to, enum tcp_session_state state, + uint32_t tout); +uint32_t npf_gen_timeout_get(const npf_state_t *nst, + enum dp_session_state state, + enum npf_proto_idx proto_idx, uint32_t custom); +uint32_t npf_tcp_timeout_get(const npf_state_t *nst, + enum tcp_session_state tcp_state, uint32_t custom); void npf_timeout_reset(void); struct npf_timeout *npf_timeout_create_instance(void); void npf_timeout_destroy_instance(struct npf_timeout *to); diff --git a/src/npf/npf_unpack.c b/src/npf/npf_unpack.c new file mode 100644 index 00000000..da86c0f2 --- /dev/null +++ b/src/npf/npf_unpack.c @@ -0,0 +1,496 @@ +/* + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include +#include +#include +#include + +#include "dp_session.h" +#include "npf/npf_session.h" +#include "npf/npf_nat.h" +#include "npf/npf_nat64.h" +#include "npf/npf_pack.h" +#include "session/session_feature.h" +#include "vplane_debug.h" +#include "vplane_log.h" + + +static int npf_pack_get_session_from_init_sentry(struct sentry_packet *sp, + struct session **cs, + struct npf_session **cse) +{ + struct npf_session *se; + struct session *s; + struct sentry *sen; + bool forw; + int rc; + + if (!sp) + return -EINVAL; + + rc = session_lookup_by_sentry_packet(sp, &s, &forw); + if (rc) + return rc; + + sen = rcu_dereference(s->se_sen); + if (!sen) + return -ENOENT; + + se = session_feature_get(s, sen->sen_ifindex, SESSION_FEATURE_NPF); + if (!se) + return -ENOENT; + + *cse = se; + *cs = s; + + return 0; +} + +static +int npf_pack_session_unpack_update(struct npf_pack_session_update *csu) +{ + struct npf_pack_dp_sess_stats *stats; + struct npf_pack_sentry_packet *psp; + struct npf_session *se; + struct ifnet *ifp; + struct session *s; + int rc; + + if (!csu) + return -EINVAL; + + psp = &csu->psp; + if (!psp) + return -EINVAL; + + rc = session_npf_pack_sentry_restore(psp, &ifp); + if (rc) + return -EINVAL; + + rc = npf_pack_get_session_from_init_sentry(&psp->psp_forw, &s, &se); + if (rc) + goto error; + + if (s && !csu->se_feature_count) { + session_expire(s, NULL); + return 0; + } + + if (s && se) { + if (s->se_protocol == IPPROTO_TCP) + rc = npf_session_pack_state_update_tcp(se, &csu->pst); + else + rc = npf_session_pack_state_update_gen(se, &csu->pst); + if (rc) + goto error; + + stats = &csu->stats; + rc = session_npf_pack_stats_restore(s, stats); + if (rc) + goto error; + } + + return 0; + + error: + return rc; +} + +static +int npf_pack_restore_session(struct npf_pack_dp_session *pds, + struct npf_pack_sentry_packet *psp, + struct npf_pack_npf_session *pns, + struct npf_pack_session_state *pst, + struct npf_pack_dp_sess_stats *stats, + struct npf_pack_nat *pnt, + struct npf_pack_nat64 *nat64, + struct npf_session **npf_se) +{ + struct session *s = NULL; + struct npf_session *se = NULL; + struct ifnet *ifp; + int rc = -EINVAL; + + if (!pds || !psp || !pns || !pst || !stats) + return rc; + + ifp = dp_ifnet_byifname(psp->psp_ifname); + if (!ifp) { + RTE_LOG(ERR, DATAPLANE, + "npf_pack session %lu restore: Invalid ifname %s\n", + pds->pds_id, psp->psp_ifname); + goto error; + } + + se = npf_session_npf_pack_restore(pns, pst, ifp->if_vrfid, + pds->pds_protocol, ifp->if_index); + if (!se) { + RTE_LOG(ERR, DATAPLANE, + "npf_pack npf session restore failed %lu\n", + pds->pds_id); + goto error; + } + + if (pnt) { + rc = npf_nat_npf_pack_restore(se, pnt, ifp); + if (rc) { + RTE_LOG(ERR, DATAPLANE, + "npf_pack nat session restore failed %lu %s\n", + pds->pds_id, strerror(-rc)); + goto error; + } + } + + if (nat64) { + rc = npf_nat64_npf_pack_restore(se, nat64); + if (rc) { + RTE_LOG(ERR, DATAPLANE, + "npf_pack nat64 session restore failed %lu %s\n", + pds->pds_id, strerror(-rc)); + goto error; + } + } + + rc = session_npf_pack_restore(pds, psp, stats, &s); + if (rc) { + RTE_LOG(ERR, DATAPLANE, + "npf_pack DP session restore failed %lu, %s\n", + pds->pds_id, strerror(-rc)); + goto error; + } + npf_session_set_dp_session(se, s); + + rc = session_feature_add(s, ifp->if_index, SESSION_FEATURE_NPF, se); + if (rc) { + RTE_LOG(ERR, DATAPLANE, + "npf_pack NPF feature add failed %lu, %s\n", + session_get_id(s), strerror(-rc)); + goto error; + } + + rc = npf_session_npf_pack_activate(se, ifp); + if (rc) { + RTE_LOG(ERR, DATAPLANE, + "npf_pack npf session activate failed %lu\n", + session_get_id(s)); + goto error; + } + *npf_se = se; + + return 0; + + error: + if (se) + npf_session_destroy(se); + if (s) + session_expire(s, NULL); + return rc; +} + +static int npf_pack_unpack_fw_session(struct npf_pack_session_fw *cs, + struct npf_session **se) +{ + return npf_pack_restore_session(&cs->pds, &cs->psp, + &cs->pns, &cs->pst, &cs->stats, + NULL, NULL, se); +} + +static int npf_pack_unpack_nat_session(struct npf_pack_session_nat *cs, + struct npf_session **se) +{ + return npf_pack_restore_session(&cs->pds, &cs->psp, + &cs->pns, &cs->pst, &cs->stats, + &cs->pnt, NULL, se); +} + +static int npf_pack_unpack_nat64_session(struct npf_pack_session_nat64 *cs, + struct npf_session **se) +{ + return npf_pack_restore_session(&cs->pds, &cs->psp, + &cs->pns, &cs->pst, &cs->stats, + NULL, &cs->pn64, se); +} + +static int +npf_pack_unpack_nat_nat64_session(struct npf_pack_session_nat_nat64 *cs, + struct npf_session **se) +{ + return npf_pack_restore_session(&cs->pds, &cs->psp, + &cs->pns, &cs->pst, &cs->stats, + &cs->pnt, &cs->pn64, se); +} + +static void npf_pack_delete_old_session(struct npf_pack_dp_session *pds, + struct npf_pack_sentry_packet *psp) +{ + struct session *s = NULL; + struct npf_session *se = NULL; + + if (!pds || !psp) + return; + + if (!npf_pack_get_session_from_init_sentry(&psp->psp_forw, &s, &se)) { + if (s) + session_expire(s, NULL); + } +} + +static int npf_pack_unpack_one_session(struct npf_pack_session_new *csn, + struct npf_session **se) +{ + struct npf_pack_session_hdr *psh; + struct npf_pack_session_fw *cs; + struct npf_pack_sentry_packet *psp; + struct ifnet *ifp; + int rc; + + if (!csn) + return -EINVAL; + + cs = (struct npf_pack_session_fw *)&csn->cs; + + psp = &cs->psp; + if (!psp) + return -EINVAL; + + rc = session_npf_pack_sentry_restore(psp, &ifp); + if (rc) + return -EINVAL; + + npf_pack_delete_old_session(&cs->pds, psp); + psh = &csn->hdr; + + switch (psh->psh_type) { + case NPF_PACK_SESSION_NEW_FW: + if (psh->psh_len < NPF_PACK_NEW_FW_SESSION_SIZE) + return -EINVAL; + + rc = npf_pack_unpack_fw_session( + (struct npf_pack_session_fw *)&csn->cs, se); + break; + + case NPF_PACK_SESSION_NEW_NAT: + if (psh->psh_len < NPF_PACK_NEW_NAT_SESSION_SIZE) + return -EINVAL; + + rc = npf_pack_unpack_nat_session( + (struct npf_pack_session_nat *)&csn->cs, se); + break; + + case NPF_PACK_SESSION_NEW_NAT64: + if (psh->psh_len < NPF_PACK_NEW_NAT64_SESSION_SIZE) + return -EINVAL; + + rc = npf_pack_unpack_nat64_session( + (struct npf_pack_session_nat64 *)&csn->cs, se); + break; + + case NPF_PACK_SESSION_NEW_NAT_NAT64: + if (psh->psh_len < NPF_PACK_NEW_NAT_NAT64_SESSION_SIZE) + return -EINVAL; + + rc = npf_pack_unpack_nat_nat64_session( + (struct npf_pack_session_nat_nat64 *)&csn->cs, se); + break; + }; + + return rc; +} + +static int npf_pack_unpack_peer_session(struct npf_pack_session_new *csn, + struct npf_session *se, + struct npf_session **se_peer) +{ + struct npf_session *sep; + struct npf_pack_session_new *csn_peer; + struct npf_pack_session_nat64 *cs; + struct npf_pack_session_nat64 *cs_peer; + int rc; + + cs = (struct npf_pack_session_nat64 *)&csn->cs; + if (!cs->pds.pds_nat64 && !cs->pds.pds_nat46) + return 0; + + csn_peer = (struct npf_pack_session_new *)((char *)csn + + csn->hdr.psh_len); + + rc = npf_pack_unpack_one_session(csn_peer, &sep); + if (rc || !sep) { + RTE_LOG(ERR, DATAPLANE, + "npf_pack peer session restore failed %lu\n", + cs->pds.pds_id); + return rc; + } + + cs_peer = (struct npf_pack_session_nat64 *)&csn_peer->cs; + if ((cs->pds.pds_parent && cs_peer->pds.pds_parent) || + (!cs->pds.pds_parent && !cs_peer->pds.pds_parent)) + return rc; + + if (cs->pds.pds_parent) + rc = npf_nat64_session_link(se, sep); + else + rc = npf_nat64_session_link(sep, se); + if (rc) + return rc; + + *se_peer = sep; + return 0; +} + +static +int npf_pack_session_unpack_new(struct npf_pack_session_new *csn) +{ + struct npf_session *se = NULL; + struct npf_session *se_peer = NULL; + int rc; + + if (!csn) + return -EINVAL; + + rc = npf_pack_unpack_one_session(csn, &se); + if (rc || !se) + goto error; + + /* Restore peer session */ + rc = npf_pack_unpack_peer_session(csn, se, &se_peer); + if (rc) + goto error; + return 0; + error: + if (se) + npf_session_destroy(se); + if (se_peer) + npf_session_destroy(se_peer); + return rc; +} + +bool npf_pack_validate_msg(struct npf_pack_message *msg, uint32_t size) +{ + struct npf_pack_message_hdr *hdr; + + if (!msg) + return false; + + if (size > NPF_PACK_MESSAGE_MAX_SIZE || + size < NPF_PACK_MESSAGE_MIN_SIZE) + return false; + + hdr = &msg->hdr; + + if (!hdr) + return false; + if (hdr->pmh_len != size) + return false; + if (hdr->pmh_version != SESSION_PACK_VERSION) { + RTE_LOG(ERR, DATAPLANE, + "npf_pack unpack: Invalid version %u\n", + hdr->pmh_version); + return false; + } + if (hdr->pmh_type == SESSION_PACK_FULL) { + if (size > NPF_PACK_NEW_SESSION_MAX_SIZE) + return false; + } else if (hdr->pmh_type == SESSION_PACK_UPDATE) { + if (size < NPF_PACK_UPDATE_SESSION_SIZE) + return false; + } else { + RTE_LOG(ERR, DATAPLANE, + "npf_pack unpack: Invalid message type %u\n", + hdr->pmh_type); + return false; + } + return true; +} + +static int npf_pack_unpack_session(void *data, uint32_t size, + enum session_pack_type *spt) +{ + struct npf_pack_message *msg = data; + struct npf_pack_message_hdr *hdr; + struct npf_pack_session_new *csn; + struct npf_pack_session_update *csu; + int rc = -EINVAL; + + *spt = 0; + if (!npf_pack_validate_msg(msg, size)) + return rc; + + hdr = &msg->hdr; + *spt = hdr->pmh_type; + + if (hdr->pmh_type == SESSION_PACK_FULL) { + csn = (struct npf_pack_session_new *)&msg->data.cs_new; + rc = npf_pack_session_unpack_new(csn); + if (rc) + return rc; + } else if (hdr->pmh_type == SESSION_PACK_UPDATE) { + csu = &msg->data.cs_update; + rc = npf_pack_session_unpack_update(csu); + if (rc) + return rc; + } + return 0; +} + +int dp_session_restore(void *buf, uint32_t size, enum session_pack_type *spt) +{ + return npf_pack_unpack_session(buf, size, spt); +} + +/* For npf_pack UT */ +uint8_t npf_pack_get_msg_type(struct npf_pack_message *msg) +{ + return msg->hdr.pmh_type; +} + +/* For npf_pack UT */ +uint64_t npf_pack_get_session_id(struct npf_pack_message *msg) +{ + struct npf_pack_message_hdr *hdr; + struct npf_pack_session_new *csn; + struct npf_pack_session_update *csu; + struct npf_pack_dp_session *pds; + struct npf_pack_session_fw *fw; + + hdr = &msg->hdr; + + if (hdr->pmh_type == SESSION_PACK_FULL) { + csn = (struct npf_pack_session_new *)&msg->data.cs_new; + fw = (struct npf_pack_session_fw *)&csn->cs; + pds = (struct npf_pack_dp_session *)&fw->pds; + return pds->pds_id; + } + if (hdr->pmh_type == SESSION_PACK_UPDATE) { + csu = &msg->data.cs_update; + return csu->se_id; + } + return 0; +} + +/* For npf_pack UT */ +struct npf_pack_dp_sess_stats * +npf_pack_get_session_stats(struct npf_pack_message *msg) +{ + struct npf_pack_message_hdr *hdr; + struct npf_pack_session_new *csn; + struct npf_pack_session_update *csu; + struct npf_pack_session_fw *fw; + + hdr = &msg->hdr; + + if (hdr->pmh_type == SESSION_PACK_FULL) { + csn = (struct npf_pack_session_new *)&msg->data.cs_new; + fw = (struct npf_pack_session_fw *)&csn->cs; + return &fw->stats; + } + if (hdr->pmh_type == SESSION_PACK_UPDATE) { + csu = &msg->data.cs_update; + return &csu->stats; + } + return NULL; +} diff --git a/src/npf/npf_vrf.c b/src/npf/npf_vrf.c index 45769fc1..8c702618 100644 --- a/src/npf/npf_vrf.c +++ b/src/npf/npf_vrf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -11,12 +11,12 @@ #include #include "util.h" -#include "vrf.h" +#include "vrf_internal.h" #include "vplane_log.h" #include "dp_event.h" #include "npf/config/npf_attach_point.h" -#include "npf/alg/npf_alg_public.h" +#include "npf/alg/alg_npf.h" #include "npf/npf_timeouts.h" #include "npf/npf_if.h" #include "npf/npf_vrf.h" @@ -24,10 +24,10 @@ /* * Ideally we would have a per-vrf set of ruleset counters in order to handle - * rulesets such as nat64, where firewall pipeline features are added to all - * interface when just one interface has such a ruleset. + * rulesets such as nat64 and zones, where firewall pipeline features are + * added to all interface when just one interface has such a ruleset. * - * However we use a single global set of ruleset counters for nat64 + * However we use a single global set of ruleset counters for nat64, zones * etc. for two reasons: * * 1. npf does not cleanly handle vrfs being deleted (specifically, the @@ -47,11 +47,6 @@ void vrf_set_npf_timeout(struct vrf *vrf, struct npf_timeout *to) rcu_assign_pointer(vrf->v_to, to); } -struct npf_timeout *vrf_get_npf_timeout(struct vrf *vrf) -{ - return vrf ? vrf->v_to : NULL; -} - struct npf_timeout *vrf_get_npf_timeout_rcu(vrfid_t vrf_id) { struct vrf *vrf = get_vrf(vrf_id); @@ -87,7 +82,7 @@ void npf_vrf_create(struct vrf *vrf) if (vrf->v_id == VRF_INVALID_ID) return; - ext_vrfid = vrf_get_external_id(vrf->v_id); + ext_vrfid = dp_vrf_get_external_id(vrf->v_id); snprintf(vrfid_str, sizeof(vrfid_str), "%u", ext_vrfid); int rc = npf_attpt_item_set_up(NPF_ATTACH_TYPE_VRF, vrfid_str, @@ -124,7 +119,7 @@ void npf_vrf_delete(struct vrf *vrf) char vrfid_str[32]; snprintf(vrfid_str, sizeof(vrfid_str), "%u", - vrf_get_external_id(vrf->v_id)); + dp_vrf_get_external_id(vrf->v_id)); int rc = npf_attpt_item_set_down(NPF_ATTACH_TYPE_VRF, vrfid_str); if (rc != 0) { RTE_LOG(ERR, FIREWALL, "failed to detach per-vrf " @@ -166,27 +161,15 @@ void npf_gbl_rs_count_incr(enum npf_ruleset_type rs_type) return; } - /* - * Increment interface feature ref counts for this ruleset type for - * all interfaces when the ruleset count changes from 0 to 1 if it is - * a 'global' type. - */ if (npf_rs_count[rs_type]++ == 0) { - enum npf_rs_flag rfl; + enum if_feat_flag ffl; - /* Are features applied for all interfaces? */ - rfl = npf_get_ruleset_type_flags(rs_type); - - if ((rfl & NPF_RS_FLAG_FEAT_GBL) != 0) { - enum if_feat_flag ffl; + /* Add niif reference for all interfaces */ + npf_if_reference_all(); - /* Add niif reference for all interfaces */ - npf_if_reference_all(); - - /* Enable features for all interfaces */ - ffl = npf_get_ruleset_type_feat_flags(rs_type); - if_feat_all_refcnt_incr(ffl); - } + /* Enable features for all interfaces */ + ffl = npf_get_ruleset_type_feat_flags(rs_type); + if_feat_all_refcnt_incr(ffl); } } @@ -206,27 +189,15 @@ void npf_gbl_rs_count_decr(enum npf_ruleset_type rs_type) return; } - /* - * Decrement interface feature ref counts for this ruleset type for - * all interfaces when the ruleset count changes from 1 to 0 if it is - * a 'global' type. - */ if (--npf_rs_count[rs_type] == 0) { - enum npf_rs_flag rfl; - - /* Are features applied for all interfaces? */ - rfl = npf_get_ruleset_type_flags(rs_type); + enum if_feat_flag ffl; - if ((rfl & NPF_RS_FLAG_FEAT_GBL) != 0) { - enum if_feat_flag ffl; + /* Disable features for all interfaces */ + ffl = npf_get_ruleset_type_feat_flags(rs_type); + if_feat_all_refcnt_decr(ffl); - /* Disable features for all interfaces */ - ffl = npf_get_ruleset_type_feat_flags(rs_type); - if_feat_all_refcnt_decr(ffl); - - /* Remove niif reference for all interfaces */ - npf_if_release_all(); - } + /* Remove niif reference for all interfaces */ + npf_if_release_all(); } } @@ -236,10 +207,10 @@ void npf_gbl_rs_count_decr(enum npf_ruleset_type rs_type) * Check if any ruleset global counts are greater than zero. If so, enable * those features on the interface and return true. * - * Typically this is used when rulesets such as nat64 require features to be - * enabled for all interfaces and not just the interfaces with the nat64 - * configuration. In these situations the npf_rs_count[] for the relevant - * ruleset will be greater than 0. + * Typically this is used when rulesets such as nat64 or zones require + * features to be enabled for all interfaces and not just the interfaces with + * the nat64/zone configuration. In these situations the npf_rs_count[] for + * the relevant ruleset will be greater than 0. * * Typically this function will be useful for when interfaces (such as vlan * interfaces) are created after bootup. @@ -255,7 +226,7 @@ void npf_vrf_if_index_set(struct ifnet *ifp) /* Is global ruleset count > 0? */ if (npf_rs_count[rs_type] > 0 && - (rfl & NPF_RS_FLAG_FEAT_GBL) != 0) { + (rfl & NPF_RS_FLAG_FEAT_INTF_ALL) != 0) { enum if_feat_flag ffl; /* Enable features on interface */ @@ -288,7 +259,7 @@ void npf_vrf_if_index_unset(struct ifnet *ifp) rfl = npf_get_ruleset_type_flags(rs_type); if (npf_rs_count[rs_type] > 0 && - (rfl & NPF_RS_FLAG_FEAT_GBL) != 0) { + (rfl & NPF_RS_FLAG_FEAT_INTF_ALL) != 0) { enum if_feat_flag ffl; /* Disable features on interface */ diff --git a/src/npf/npf_vrf.h b/src/npf/npf_vrf.h index 14407c0d..f955d5f9 100644 --- a/src/npf/npf_vrf.h +++ b/src/npf/npf_vrf.h @@ -1,12 +1,12 @@ #ifndef _NPF_VRF_H_ #define _NPF_VRF_H_ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ -#include "vrf.h" +#include "vrf_internal.h" #include "util.h" #include "npf/config/npf_ruleset_type.h" @@ -14,7 +14,6 @@ struct vrf; struct npf_config; void vrf_set_npf_timeout(struct vrf *vrf, struct npf_timeout *to); -struct npf_timeout *vrf_get_npf_timeout(struct vrf *vrf); struct npf_timeout *vrf_get_npf_timeout_rcu(vrfid_t vrf_id); void vrf_set_npf_alg(struct vrf *vrf, struct npf_alg_instance *ai); diff --git a/src/npf/rldb.c b/src/npf/rldb.c new file mode 100644 index 00000000..f52313e9 --- /dev/null +++ b/src/npf/rldb.c @@ -0,0 +1,944 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include +#include +#include + +#include "npf_rte_acl.h" +#include "npf_rule_gen.h" + +#include "main.h" +#include "urcu.h" +#include "util.h" +#include "vplane_log.h" + +#include "rldb.h" + +#define RLDB_ERR(args...) RTE_LOG(ERR, DATAPLANE, args) + +#define RLDB_MAX_RULES (1 << 13) +#define RLDB_MAX_ELEMENTS (2 * RLDB_MAX_RULES) + +#define GLOBAL_MIN_BUCKETS (2 << 6) +#define GLOBAL_MAX_BUCKETS (2 << 10) + +#define RLDB_MIN_BUCKETS (1 << 16) +#define RLDB_MAX_BUCKETS (1 << 17) + +struct rldb_db_handle { + npf_match_ctx_t *match_ctx; + uint32_t flags; + uint16_t af; + struct rte_acl_rule *acl_rules; + struct cds_lfht *ht; + struct cds_lfht_node ht_node; + struct rldb_stats stats; + /* --- cacheline 1 boundary (64 bytes) was 40 bytes ago --- */ + char name[RLDB_NAME_MAX]; +}; + +struct rldb_rule_handle { + uint32_t rule_no; + struct cds_lfht_node ht_node; + struct rldb_rule_spec rule; +}; + +static struct rte_mempool *rldb_acl4_mempool; +static struct rte_mempool *rldb_acl6_mempool; + +static struct rte_mempool *rldb_rh_mempool; +static struct cds_lfht *rldb_global_ht; + +static bool rldb_disabled; + +static rte_atomic32_t rldb_counter; + +/* + * initialize infrastructure for rule database + */ +int rldb_init(void) +{ + int rc; + rldb_rh_mempool = rte_mempool_create("rldb_rh_pool", RLDB_MAX_ELEMENTS, + sizeof(struct rldb_rule_handle), + 0, 0, NULL, NULL, NULL, NULL, + rte_socket_id(), 0); + + if (!rldb_rh_mempool) { + RLDB_ERR("Could not allocate rldb rule-handle pool\n"); + return -ENOMEM; + } + + rldb_acl4_mempool = rte_mempool_create("rldb_acl4_pool", + RLDB_MAX_ELEMENTS, + npf_rte_acl_rule_size(AF_INET), + 0, 0, NULL, NULL, NULL, NULL, + rte_socket_id(), 0); + + if (!rldb_acl4_mempool) { + RLDB_ERR("Could not allocate rldb acl pool for IPv4\n"); + return -ENOMEM; + } + + rldb_acl6_mempool = rte_mempool_create("rldb_acl6_pool", + RLDB_MAX_ELEMENTS, + npf_rte_acl_rule_size(AF_INET6), + 0, 0, NULL, NULL, NULL, NULL, + rte_socket_id(), 0); + + if (!rldb_acl6_mempool) { + RLDB_ERR("Could not allocate rldb acl pool for IPvi6\n"); + return -ENOMEM; + } + + rldb_global_ht = cds_lfht_new(GLOBAL_MIN_BUCKETS, + GLOBAL_MIN_BUCKETS, + GLOBAL_MAX_BUCKETS, + CDS_LFHT_AUTO_RESIZE, NULL); + + if (!rldb_global_ht) { + RLDB_ERR("Could not allocate rldb id hashtable\n"); + rc = -ENOMEM; + goto error; + } + + rldb_disabled = false; + + return 0; + +error: + rldb_cleanup(); + return rc; +} + +static int rldb_name_match(struct cds_lfht_node *node, const void *key) +{ + const char *key_name = key; + + struct rldb_db_handle *db = caa_container_of(node, + struct rldb_db_handle, + ht_node); + + if (strcmp(key_name, db->name) == 0) + return 1; + + return 0; +} + +static void rldb_db_handle_destroy(struct rldb_db_handle *db) +{ + if (!db) + return; + + cds_lfht_destroy(db->ht, NULL); + + if (db->match_ctx) + npf_rte_acl_destroy(db->af, &db->match_ctx); + + free(db); +} + +/* + * Borrowed from nat_pool.c: + * ----8<---- + * rte_jhash reads from memory in 4-byte chunks. If the length of 'name' is + * not a multiple of 4 bytes then it may try and read memory that is not + * mapped. Issue was detected by valgrind. + * ---->8----- + * + * Also spotted by AddressSanitizer: global-buffer-overflow + */ +static uint32_t rldb_name_hash(const char *name, size_t name_len) +{ + char buf[name_len+3]; + + memcpy(buf, name, name_len); + return rte_jhash(buf, name_len, 0); +} + +/* + * create rule database of specified name + */ +int rldb_create(const char *name, uint32_t flags, struct rldb_db_handle **_db) +{ + uint32_t hash; + struct rldb_db_handle *db = NULL; + struct rte_mempool *rule_mempool; + size_t name_len; + struct cds_lfht_node *node; + int id, rc = 0; + + if (!name) + return -EINVAL; + + if (rldb_disabled) { + RLDB_ERR("RLDB is not initialized\n"); + return -ENODEV; + } + + name_len = strnlen(name, RLDB_NAME_MAX); + if (name_len == RLDB_NAME_MAX || name_len == 0) + return -EINVAL; + + db = zmalloc_aligned(sizeof(*db)); + if (!db) { + RLDB_ERR("Could not allocate memory for rldb: \"%s\".\n", name); + rc = -ENOMEM; + goto error; + } + + id = rte_atomic32_add_return(&rldb_counter, 1); + snprintf(db->name, RLDB_NAME_MAX, "%s-%d", name, id); + + if (flags & NPFRL_FLAG_V4_PFX) { + db->af = AF_INET; + rule_mempool = rldb_acl4_mempool; + } else if (flags & NPFRL_FLAG_V6_PFX) { + db->af = AF_INET6; + rule_mempool = rldb_acl6_mempool; + } else { + rc = -EAFNOSUPPORT; + goto error; + } + + db->flags = flags; + + db->ht = cds_lfht_new(RLDB_MIN_BUCKETS, + RLDB_MIN_BUCKETS, + RLDB_MAX_BUCKETS, CDS_LFHT_AUTO_RESIZE, NULL); + + if (!db->ht) { + RLDB_ERR("Could not allocate rldb hashtable\n"); + rc = -ENOMEM; + goto error; + } + + cds_lfht_node_init(&db->ht_node); + + hash = rldb_name_hash(name, name_len); + node = cds_lfht_add_unique(rldb_global_ht, hash, rldb_name_match, + &db->name, &db->ht_node); + if (node != &db->ht_node) { + RLDB_ERR("Could not add rldb: database with the name \"%s\" " + "already exists.\n", name); + rc = -EEXIST; + goto error; + } + + rc = npf_rte_acl_init(db->af, db->name, RLDB_MAX_RULES, + rule_mempool, dp_rcu_qsbr_get(), &db->match_ctx); + if (rc < 0) { + RLDB_ERR + ("Could not add rldb (%s): NPF rte_acl could not be " + "initialized\n", name); + goto error; + } + + *_db = db; + + return 0; + +error: + if (db) { + cds_lfht_del(rldb_global_ht, &db->ht_node); + rldb_db_handle_destroy(db); + } + + return rc; +} + +static void rldb_prepare_rule_v4(struct rldb_rule_spec *rule, + uint8_t *match_addr, uint8_t *mask) +{ + uint8_t proto = 0; + uint16_t loport = 0, hiport = 0; + struct rldb_v4_prefix *pfx; + + /* protocol */ + if (rule->rldb_flags & NPFRL_FLAG_PROTO) + proto = rule->rldb_proto.npfrl_proto; + + match_addr[NPC_GPR_PROTO_OFF_v4] = proto; + mask[NPC_GPR_PROTO_OFF_v4] = proto ? 0 : ~0; + + /* src addr */ + if (rule->rldb_flags & NPFRL_FLAG_SRC_PFX) { + pfx = &rule->rldb_src_addr.v4_pfx; + *(uint32_t *) &match_addr[NPC_GPR_SADDR_OFF_v4] = + *(uint32_t *) &pfx->npfrl_bytes; + *(uint32_t *) &mask[NPC_GPR_SADDR_OFF_v4] = + htonl(npf_prefix_to_host_mask4(pfx->npfrl_plen)); + } + + /* dst addr */ + if (rule->rldb_flags & NPFRL_FLAG_DST_PFX) { + pfx = &rule->rldb_dst_addr.v4_pfx; + *(uint32_t *) &match_addr[NPC_GPR_DADDR_OFF_v4] = + *(uint32_t *) &pfx->npfrl_bytes; + *(uint32_t *) &mask[NPC_GPR_DADDR_OFF_v4] = + htonl(npf_prefix_to_host_mask4(pfx->npfrl_plen)); + } + + /* src port */ + if (rule->rldb_flags & NPFRL_FLAG_SRC_PORT_RANGE) { + loport = rule->rldb_src_port_range.npfrl_loport; + hiport = rule->rldb_src_port_range.npfrl_hiport; + } else { + loport = 0; + hiport = 0xFFFF; + } + + match_addr[NPC_GPR_SPORT_OFF_v4 + 1] = loport >> 8; + match_addr[NPC_GPR_SPORT_OFF_v4] = loport & 0xFF; + + mask[NPC_GPR_SPORT_OFF_v4 + 1] = hiport >> 8; + mask[NPC_GPR_SPORT_OFF_v4] = hiport & 0xFF; + + /* dst port */ + if (rule->rldb_flags & NPFRL_FLAG_DST_PORT_RANGE) { + loport = rule->rldb_dst_port_range.npfrl_loport; + hiport = rule->rldb_dst_port_range.npfrl_hiport; + } else { + loport = 0; + hiport = 0xFFFF; + } + + match_addr[NPC_GPR_DPORT_OFF_v4 + 1] = loport >> 8; + match_addr[NPC_GPR_DPORT_OFF_v4] = loport & 0xFF; + + mask[NPC_GPR_DPORT_OFF_v4 + 1] = hiport >> 8; + mask[NPC_GPR_DPORT_OFF_v4] = hiport & 0xFF; +} + +static void rldb_prepare_rule_v6(struct rldb_rule_spec *rule, + uint8_t *match_addr, uint8_t *mask) +{ + uint8_t proto = 0; + uint16_t loport = 0, hiport = 0; + unsigned int i; + struct in6_addr addr_mask; + uint8_t *addr_mask_ptr; + struct rldb_v6_prefix *pfx; + + /* protocol */ + if (rule->rldb_flags & NPFRL_FLAG_PROTO) + proto = rule->rldb_proto.npfrl_proto; + + match_addr[NPC_GPR_PROTO_OFF_v6] = proto; + mask[NPC_GPR_PROTO_OFF_v6] = proto ? 0 : ~0; + + /* src addr */ + if (rule->rldb_flags & NPFRL_FLAG_SRC_PFX) { + pfx = &rule->rldb_src_addr.v6_pfx; + npf_masklen_to_grouper_mask(AF_INET6, pfx->npfrl_plen, + &addr_mask); + addr_mask_ptr = (uint8_t *) &addr_mask.s6_addr; + for (i = 0; i < NPC_GPR_SADDR_LEN_v6; i++) { + match_addr[NPC_GPR_SADDR_OFF_v6 + i] = + pfx->npfrl_bytes[i]; + mask[NPC_GPR_SADDR_OFF_v6 + i] = addr_mask_ptr[i]; + } + } + + /* dst addr */ + if (rule->rldb_flags & NPFRL_FLAG_DST_PFX) { + pfx = &rule->rldb_dst_addr.v6_pfx; + npf_masklen_to_grouper_mask(AF_INET6, pfx->npfrl_plen, + &addr_mask); + addr_mask_ptr = (uint8_t *) &addr_mask.s6_addr; + for (i = 0; i < NPC_GPR_DADDR_LEN_v6; i++) { + match_addr[NPC_GPR_DADDR_OFF_v6 + i] = + pfx->npfrl_bytes[i]; + mask[NPC_GPR_DADDR_OFF_v6 + i] = addr_mask_ptr[i]; + } + } + + /* src port */ + if (rule->rldb_flags & NPFRL_FLAG_SRC_PORT_RANGE) { + loport = rule->rldb_src_port_range.npfrl_loport; + hiport = rule->rldb_src_port_range.npfrl_hiport; + } else { + loport = 0; + hiport = 0xFFFF; + } + + match_addr[NPC_GPR_SPORT_OFF_v6] = loport >> 8; + match_addr[NPC_GPR_SPORT_OFF_v6 + 1] = loport & 0xFF; + + mask[NPC_GPR_SPORT_OFF_v6] = hiport >> 8; + mask[NPC_GPR_SPORT_OFF_v6 + 1] = hiport & 0xFF; + + /* dst port */ + if (rule->rldb_flags & NPFRL_FLAG_DST_PORT_RANGE) { + loport = rule->rldb_dst_port_range.npfrl_loport; + hiport = rule->rldb_dst_port_range.npfrl_hiport; + } else { + loport = 0; + hiport = 0xFFFF; + } + + match_addr[NPC_GPR_DPORT_OFF_v6] = loport >> 8; + match_addr[NPC_GPR_DPORT_OFF_v6 + 1] = loport & 0xFF; + + mask[NPC_GPR_DPORT_OFF_v6] = hiport >> 8; + mask[NPC_GPR_DPORT_OFF_v6 + 1] = hiport & 0xFF; +} + +/* + * start a sequence of operations + */ +int rldb_start_transaction(struct rldb_db_handle *db) +{ + if (!db) + return -EINVAL; + + if (rldb_disabled) { + RLDB_ERR("RLDB is not initialized\n"); + return -ENODEV; + } + + return npf_rte_acl_start_transaction(db->af, db->match_ctx); +} + +/* + * commit a sequence of operations + */ +int rldb_commit_transaction(struct rldb_db_handle *db) +{ + int rc; + + if (!db) + return -EINVAL; + + if (rldb_disabled) { + RLDB_ERR("RLDB is not initialized\n"); + return -ENODEV; + } + + rc = npf_rte_acl_commit_transaction(db->af, db->match_ctx); + if (rc < 0) + goto error; + + db->stats.rldb_transaction_cnt++; + + return 0; +error: + db->stats.rldb_err.transaction_failed++; + return rc; +} + +static int rldb_rule_match(struct cds_lfht_node *node, const void *key) +{ + const uint32_t *key_rule_no = key; + + struct rldb_rule_handle *rh = caa_container_of(node, + struct rldb_rule_handle, + ht_node); + + return rh->rule_no == *key_rule_no; +} + +static int rldb_rule_handle_create(uint32_t rule_no, + struct rldb_rule_spec const *in_spec, + struct rldb_rule_handle **out_rh) +{ + int rc; + struct rldb_rule_handle *rh; + struct rte_mempool_cache *cache; + + cache = rte_mempool_default_cache(rldb_rh_mempool, rte_lcore_id()); + if (unlikely(rte_mempool_generic_get(rldb_rh_mempool, (void *)&rh, + 1, cache) != 0)) { + RLDB_ERR + ("Could not allocate memory from rldb memory pool for " + "rule %u.\n", rule_no); + rc = -ENOMEM; + goto error; + } + + memset(rh, 0, sizeof(*rh)); + + rh->rule_no = rule_no; + memcpy(&rh->rule, in_spec, sizeof(rh->rule)); + + if (out_rh) + *out_rh = rh; + + return 0; + +error: + return rc; +} + +static void rldb_rule_handle_destroy(struct rldb_rule_handle *rh) +{ + struct rte_mempool_cache *cache; + + cache = rte_mempool_default_cache(rldb_rh_mempool, rte_lcore_id()); + rte_mempool_generic_put(rldb_rh_mempool, (void *)&rh, 1, cache); +} + +/* + * add rule to the specified database + * + * rule_no MUST NOT be 0. + */ +int rldb_add_rule(struct rldb_db_handle *db, uint32_t rule_no, + struct rldb_rule_spec const *in_spec, + struct rldb_rule_handle **out_rh) +{ + int rc; + struct rldb_rule_handle *rh = NULL; + struct cds_lfht_node *node; + uint8_t match_addr[NPC_GPR_SIZE_v6] = { 0 }; + uint8_t mask[NPC_GPR_SIZE_v6] = { 0 }; + + if (!db || !rule_no || !in_spec || !out_rh) + return -EINVAL; + + if (rldb_disabled) + return -ENODEV; + + rc = rldb_rule_handle_create(rule_no, in_spec, &rh); + if (rc < 0) { + RLDB_ERR("Could not create rule handle for rule %u\n", rule_no); + goto error; + } + + switch (db->af) { + case AF_INET: + rldb_prepare_rule_v4(&rh->rule, match_addr, mask); + break; + case AF_INET6: + rldb_prepare_rule_v6(&rh->rule, match_addr, mask); + break; + default: + rc = -EAFNOSUPPORT; + goto error; + } + + node = cds_lfht_add_unique(db->ht, rule_no, + rldb_rule_match, &rh, &rh->ht_node); + if (node != &rh->ht_node) { + RLDB_ERR("Could not add rule %u to rldb \"%s\".\n", + rule_no, db->name); + rc = -EEXIST; + goto error; + } + + rc = npf_rte_acl_add_rule(db->af, db->match_ctx, rh->rule_no, + match_addr, mask, NULL); + if (rc < 0) { + RLDB_ERR("Failed to add ACL rule: %u\n", rh->rule_no); + goto delete_and_error; + } + + *out_rh = rh; + + db->stats.rldb_rules_added++; + db->stats.rldb_rule_cnt++; + + return 0; + +delete_and_error: + cds_lfht_del(db->ht, &rh->ht_node); + +error: + if (rh) + rldb_rule_handle_destroy(rh); + + db->stats.rldb_err.rule_add_failed++; + return rc; +} + +/* + * delete rule from the specified database + */ +int rldb_del_rule(struct rldb_db_handle *db, struct rldb_rule_handle *rh) +{ + int rc; + uint32_t rule_no; + uint8_t match_addr[NPC_GPR_SIZE_v6]; + uint8_t mask[NPC_GPR_SIZE_v6]; + + if (!db || !rh) + return -EINVAL; + + if (rldb_disabled) + return -ENODEV; + + switch (db->af) { + case AF_INET: + rldb_prepare_rule_v4(&rh->rule, match_addr, mask); + break; + case AF_INET6: + rldb_prepare_rule_v6(&rh->rule, match_addr, mask); + break; + default: + rc = -EAFNOSUPPORT; + goto error; + } + + rule_no = rh->rule_no; + + rc = npf_rte_acl_del_rule(db->af, db->match_ctx, rule_no, + match_addr, mask); + if (rc < 0) { + RLDB_ERR("Failed to remove ACL rule %u from ACL trie\n", + rule_no); + goto error; + + } + + cds_lfht_del(db->ht, &rh->ht_node); + rldb_rule_handle_destroy(rh); + + db->stats.rldb_rules_deleted++; + db->stats.rldb_rule_cnt--; + + return 0; + +error: + db->stats.rldb_err.rule_del_failed++; + return rc; +} + +/* + * find rule by rule number + */ +int rldb_find_rule(struct rldb_db_handle *db, uint32_t rule_no, + struct rldb_rule_handle **out_rh) +{ + struct cds_lfht_node *node; + struct cds_lfht_iter iter; + + if (!out_rh || !db || !rule_no) + return -EINVAL; + + if (rldb_disabled) + return -ENODEV; + + cds_lfht_lookup(db->ht, rule_no, rldb_rule_match, &rule_no, &iter); + + node = cds_lfht_iter_get_node(&iter); + + /* no match */ + if (!node) { + *out_rh = NULL; + return -ENOENT; + } + + *out_rh = caa_container_of(node, struct rldb_rule_handle, ht_node); + + return 0; +} + +/* + * match packets against rules in the specified database + */ +int rldb_match(struct rldb_db_handle *db, + /* array of packets to be matched */ + struct rte_mbuf *m[], + /* number of packets */ + uint32_t num_packets, struct rldb_result *result) +{ + uint32_t rule_no = 0; + struct rldb_rule_handle *rh; + struct npf_match_cb_data data = { 0 }; + int rc = 0; + + if (!db || !m || num_packets != 1) + return -EINVAL; + + if (rldb_disabled) + return -ENODEV; + + /* non-npc variant. Supports only standard 5-tuple packets */ + data.mbuf = m[0]; + rc = npf_rte_acl_match(db->af, db->match_ctx, NULL, &data, &rule_no); + if (rc == -ENOENT) + goto error; + + if (rc != 0 && rc != -ENOENT) + goto error; + + if (result) { + rc = rldb_find_rule(db, rule_no, &rh); + if (rc < 0) + goto error; + + result->rldb_rule_no = rule_no; + result->rldb_user_data = rh->rule.rldb_user_data; + } + +error: + db->stats.rldb_err.rule_match_failed++; + return rc; +} + +/* + * get statistics at database level + */ +int rldb_get_stats(struct rldb_db_handle *db, struct rldb_stats *stats) +{ + if (!db) + return -EINVAL; + + if (rldb_disabled) { + RLDB_ERR("RLDB is not initialized\n"); + return -ENODEV; + } + + memcpy(stats, &db->stats, sizeof(*stats)); + + return 0; +} + +/* + * clear statistics at database level + */ +int rldb_clear_stats(struct rldb_db_handle *db) +{ + if (!db) + return -EINVAL; + + if (rldb_disabled) { + RLDB_ERR("RLDB is not initialized\n"); + return -ENODEV; + } + + memset(&db->stats, 0, sizeof(db->stats)); + + return 0; +} + +/* + * walk rule database + */ +void rldb_walk(struct rldb_db_handle *db, rldb_walker_t walker, void *userdata) +{ + struct cds_lfht_iter iter; + struct rldb_rule_handle *rh; + + if (!db || !walker) + return; + + if (rldb_disabled) { + RLDB_ERR("RLDB is not initialized\n"); + return; + } + + cds_lfht_for_each_entry(db->ht, &iter, rh, ht_node) { + if (walker(rh, userdata) < 0) + return; + } +} + +#define PREFIX_STRLEN (INET6_ADDRSTRLEN + sizeof("/128")) + +static const char *rldb_prefix_str(uint16_t family, union rldb_pfx *rldb_pfx, + char *buf, size_t blen) +{ + char addrbuf[INET6_ADDRSTRLEN]; + const char *addrstr; + uint32_t count; + int16_t prefix_len = -1; + + switch (family) { + case AF_INET: + addrstr = + inet_ntop(family, (void *)&rldb_pfx->v4_pfx.npfrl_bytes[0], + addrbuf, sizeof(addrbuf)); + prefix_len = rldb_pfx->v4_pfx.npfrl_plen; + break; + case AF_INET6: + addrstr = + inet_ntop(family, (void *)&rldb_pfx->v6_pfx.npfrl_bytes[0], + addrbuf, sizeof(addrbuf)); + prefix_len = rldb_pfx->v6_pfx.npfrl_plen; + break; + default: + addrstr = NULL; + } + + count = snprintf(buf, blen, "%s", addrstr ? : "[bad address]"); + if (prefix_len >= 0) + snprintf(buf + count, blen - count, "/%d", prefix_len); + + return buf; +} + +static const char *rldb_port_range(struct rldb_l4port_range *pr, char *buf, + size_t blen) +{ + int rc; + + if (pr->npfrl_loport == pr->npfrl_hiport) + rc = snprintf(buf, blen, "%u", pr->npfrl_loport); + else + rc = snprintf(buf, blen, "%u-%u", pr->npfrl_loport, + pr->npfrl_hiport); + + if (rc < 0) + snprintf(buf, blen, "[bad port-range]"); + + return buf; +} + +static void rldb_dump_rule_spec(struct rldb_rule_spec *rule, json_writer_t *wr) +{ + char prefix_buf[PREFIX_STRLEN]; + uint16_t af = 0; + + if (rule->rldb_flags & NPFRL_FLAG_V4_PFX) + af = AF_INET; + else if (rule->rldb_flags & NPFRL_FLAG_V6_PFX) + af = AF_INET6; + + jsonw_uint_field(wr, "priority", rule->rldb_priority); + jsonw_uint_field(wr, "flags", rule->rldb_flags); + + jsonw_string_field(wr, "src_addr", + rldb_prefix_str(af, &rule->rldb_src_addr, prefix_buf, + sizeof(prefix_buf))); + + jsonw_string_field(wr, "dst_addr", + rldb_prefix_str(af, &rule->rldb_dst_addr, prefix_buf, + sizeof(prefix_buf))); + + jsonw_uint_field(wr, "proto", rule->rldb_proto.npfrl_proto); + + jsonw_string_field(wr, "sport", + rldb_port_range(&rule->rldb_src_port_range, + prefix_buf, sizeof(prefix_buf))); + jsonw_string_field(wr, "dport", + rldb_port_range(&rule->rldb_dst_port_range, + prefix_buf, sizeof(prefix_buf))); +} + +/* + * dump rule database in json form + */ +void rldb_dump(struct rldb_db_handle *db, json_writer_t *wr) +{ + struct cds_lfht_iter iter; + struct rldb_rule_handle *rh; + struct rldb_stats *stats; + + if (!db || !wr) + return; + + if (rldb_disabled) { + RLDB_ERR("RLDB is not initialized\n"); + return; + } + + jsonw_string_field(wr, "name", db->name); + jsonw_uint_field(wr, "flags", db->flags); + + /* stats */ + stats = &db->stats; + + jsonw_name(wr, "stats"); + jsonw_start_object(wr); + jsonw_uint_field(wr, "rules_added", stats->rldb_rules_added); + jsonw_uint_field(wr, "rules_deleted", stats->rldb_rules_deleted); + jsonw_uint_field(wr, "rule_cnt", stats->rldb_rule_cnt); + jsonw_uint_field(wr, "transaction_cnt", stats->rldb_transaction_cnt); + + jsonw_name(wr, "error-counters"); + jsonw_start_object(wr); + jsonw_uint_field(wr, "rule_add_failed", + db->stats.rldb_err.rule_add_failed); + jsonw_uint_field(wr, "rule_del_failed", + db->stats.rldb_err.rule_del_failed); + jsonw_uint_field(wr, "rule_match_failed", + db->stats.rldb_err.rule_match_failed); + jsonw_uint_field(wr, "transaction_failed", + db->stats.rldb_err.transaction_failed); + jsonw_end_object(wr); + + jsonw_end_object(wr); + + /* rules */ + + jsonw_name(wr, "rules"); + + jsonw_start_array(wr); + cds_lfht_for_each_entry(db->ht, &iter, rh, ht_node) { + jsonw_start_object(wr); + jsonw_uint_field(wr, "rule_no", rh->rule_no); + rldb_dump_rule_spec(&rh->rule, wr); + jsonw_end_object(wr); + } + jsonw_end_array(wr); +} + +/* + * destroy specified rule database + */ +int rldb_destroy(struct rldb_db_handle *db) +{ + struct cds_lfht_iter iter; + struct rldb_rule_handle *rh; + + if (!db) + return -EINVAL; + + if (rldb_disabled) { + RLDB_ERR("RLDB is not initialized\n"); + return -ENODEV; + } + + if (db->ht) { + cds_lfht_for_each_entry(db->ht, &iter, rh, ht_node) { + if (!cds_lfht_del(db->ht, &rh->ht_node)) + rldb_rule_handle_destroy(rh); + } + } + + cds_lfht_del(rldb_global_ht, &db->ht_node); + rldb_db_handle_destroy(db); + + return 0; +} + +/* + * clean up infrastructure set up for rule database + */ +int rldb_cleanup(void) +{ + int rc = 0; + struct cds_lfht_iter iter; + struct rldb_db_handle *db; + + if (rldb_global_ht) { + cds_lfht_for_each_entry(rldb_global_ht, &iter, db, ht_node) { + rldb_destroy(db); + } + + cds_lfht_destroy(rldb_global_ht, NULL); + } + + if (rldb_rh_mempool) + rte_mempool_free(rldb_rh_mempool); + + if (rldb_acl4_mempool) + rte_mempool_free(rldb_acl4_mempool); + + if (rldb_acl6_mempool) + rte_mempool_free(rldb_acl6_mempool); + + rldb_rh_mempool = NULL; + rldb_acl4_mempool = NULL; + rldb_acl6_mempool = NULL; + rldb_global_ht = NULL; + + rldb_disabled = true; + + return rc; +} diff --git a/src/npf/rldb.h b/src/npf/rldb.h new file mode 100644 index 00000000..4b1a2c2e --- /dev/null +++ b/src/npf/rldb.h @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#ifndef NPF_RULE_DB_H + +#define NPF_RULE_DB_H + +#include +#include +#include + +#define RLDB_NAME_MAX 64 + +enum rldb_l3_field { + RLDB_L3F_SRC = 1, + RLDB_L3F_DST, + RLDB_L3F_PROTO, + RLDB_L3F__LEN +}; + +struct rldb_v4_prefix { + uint8_t npfrl_plen; + uint8_t npfrl_bytes[4]; +}; + +struct rldb_v6_prefix { + uint8_t npfrl_plen; + uint8_t npfrl_bytes[16]; +}; + +struct rldb_proto { + uint8_t npfrl_proto; + uint8_t npfrl_unknown : 1; +}; + +struct rldb_l4port_range { + uint16_t npfrl_loport; + uint16_t npfrl_hiport; +}; + +enum rldb_npfrl_flags { + NPFRL_FLAG_V4_PFX = 0x00000001, + NPFRL_FLAG_V6_PFX = 0x00000002, + NPFRL_FLAG_SRC_PFX = 0x00000004, + NPFRL_FLAG_DST_PFX = 0x00000008, + NPFRL_FLAG_PROTO = 0x00000010, + NPFRL_FLAG_SRC_PORT_RANGE = 0x00000020, + NPFRL_FLAG_DST_PORT_RANGE = 0x00000040, +}; + +union rldb_pfx { + struct rldb_v4_prefix v4_pfx; + struct rldb_v6_prefix v6_pfx; +}; + +struct rldb_rule_spec { + uintptr_t rldb_user_data; + uint32_t rldb_priority; + uint32_t rldb_flags; /* NPFRL_FLAG_* */ + union rldb_pfx rldb_src_addr; + union rldb_pfx rldb_dst_addr; + struct rldb_proto rldb_proto; + struct rldb_l4port_range rldb_src_port_range; + struct rldb_l4port_range rldb_dst_port_range; +}; + +struct rldb_result { + uint32_t rldb_rule_no; + uintptr_t rldb_user_data; +}; + +struct rldb_stats { + uint64_t rldb_rules_added; + uint64_t rldb_rules_deleted; + uint64_t rldb_rule_cnt; + uint64_t rldb_transaction_cnt; + struct err_cntrs { + uint64_t rule_add_failed; + uint64_t rule_del_failed; + uint64_t rule_match_failed; + uint64_t transaction_failed; + } rldb_err; +}; + +struct rldb_db_handle; +struct rldb_rule_handle; + +/* + * initialize infrastructure for rule database + */ +int rldb_init(void); + +/* + * create rule database of specified name + */ +int rldb_create(const char *name, uint32_t flags, struct rldb_db_handle **db); + +/* + * start a sequence of operations + */ +int rldb_start_transaction(struct rldb_db_handle *db); + +/* + * commit a sequence of operations + */ +int rldb_commit_transaction(struct rldb_db_handle *db); + +/* + * add rule to the specified database + */ +int rldb_add_rule(struct rldb_db_handle *db, uint32_t rule_no, + struct rldb_rule_spec const *in_spec, + struct rldb_rule_handle **out_rh); + +/* + * delete a rule from the specified database + */ +int rldb_del_rule(struct rldb_db_handle *db, struct rldb_rule_handle *rh); + +/* + * find rules by rule number + */ +int rldb_find_rule(struct rldb_db_handle *db, uint32_t rule_no, + struct rldb_rule_handle **out_rh); + +/* + * match packets against rules in the specified database + */ +int rldb_match(struct rldb_db_handle *db, struct rte_mbuf *m[], + uint32_t num_packets, struct rldb_result *result); + +/* + * get statistics at database level + */ +int rldb_get_stats(struct rldb_db_handle *db, struct rldb_stats *stats); + +/* + * clear statistics at database level + */ +int rldb_clear_stats(struct rldb_db_handle *db); + +/* + * callback prototype for walker + */ +typedef int (*rldb_walker_t)(const struct rldb_rule_handle *rh, void *userdata); + +/* + * walk rule database + */ +void rldb_walk(struct rldb_db_handle *db, rldb_walker_t walker, void *userdata); + +/* + * dump rule database in json form + */ +void rldb_dump(struct rldb_db_handle *db, json_writer_t *wr); + +/* + * destroy specified rule database + */ +int rldb_destroy(struct rldb_db_handle *db); + +/* + * clean up infrastructure set up for rule database + */ +int rldb_cleanup(void); + +#endif /* NPF_RULE_DB_H */ diff --git a/src/npf/rproc/npf_ext_action_group.c b/src/npf/rproc/npf_ext_action_group.c index c70906b8..c55fcbe5 100644 --- a/src/npf/rproc/npf_ext_action_group.c +++ b/src/npf/rproc/npf_ext_action_group.c @@ -115,7 +115,7 @@ npf_action_group_cb(void *param, struct npf_cfg_rule_walk_state *state) * at least one isn't configured. NB order should always be mark then * police, or mark or police only. */ - if (strncmp(&(state->rule[0]), PREFIX_STR, CMD_POS)) + if (strncmp(&(state->rule[0]), PREFIX_STR, CMD_POS) != 0) goto format_err; if (!strncmp(&(state->rule[CMD_POS]), "markdscp(", 9)) { @@ -398,6 +398,18 @@ npf_action_group_rule_cb(void *param, struct npf_cfg_rule_walk_state *state) return true; } +void npf_action_group_show_policer(struct npf_act_grp *act_grp, + struct qos_show_context *context) +{ + json_writer_t *wr = context->wr; + + do { + if (act_grp->policer_hndl) + policer_show(wr, act_grp->policer_hndl); + act_grp = act_grp->next; + } while (act_grp); +} + static void npf_action_group_json(json_writer_t *json, npf_rule_t *rl __unused, const char *params __unused, void *handle) diff --git a/src/npf/rproc/npf_ext_action_group.h b/src/npf/rproc/npf_ext_action_group.h index 4ed0ecfb..14b3381c 100644 --- a/src/npf/rproc/npf_ext_action_group.h +++ b/src/npf/rproc/npf_ext_action_group.h @@ -8,10 +8,13 @@ #define NPF_EXT_ACTION_GROUP_H #include "npf/npf_ruleset.h" +#include "qos.h" struct npf_act_grp; void npf_action_group_show(json_writer_t *wr, struct npf_act_grp *ptr, const char *name); +void npf_action_group_show_policer(struct npf_act_grp *act_grp, + struct qos_show_context *context); #endif /* NPF_EXT_ACTION_GROUP_H */ diff --git a/src/npf/rproc/npf_ext_app.c b/src/npf/rproc/npf_ext_app.c index d6d1ffac..3b4f1e3d 100644 --- a/src/npf/rproc/npf_ext_app.c +++ b/src/npf/rproc/npf_ext_app.c @@ -1,11 +1,11 @@ /* - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ /* - * Application rproc and application database. + * User-defined application rproc. */ #include @@ -15,351 +15,21 @@ #include "ip_funcs.h" #include "ip6_funcs.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "npf/npf_ruleset.h" #include "npf/rproc/npf_rproc.h" -#include "npf/dpi/dpi.h" -#include "npf/dpi/dpi_private.h" - -#define APP_NAME_HT_SIZE 32 -#define APP_NAME_HT_MIN 32 -#define APP_NAME_HT_MAX 8192 -#define APP_NAME_HT_FLAGS (CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING) - -#define APP_ID_HT_SIZE 32 -#define APP_ID_HT_MIN 32 -#define APP_ID_HT_MAX 8192 -#define APP_ID_HT_FLAGS (CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING) - - -/* Application database hash tables. Appls are hashed by name and by ID. */ -static struct cds_lfht *app_name_ht; /* Hash table of application names */ -static struct cds_lfht *app_id_ht; /* Hash table of application IDs */ - -/* Application database entry. */ -struct adb_entry { - char *ae_name; /* Name string */ - uint32_t ae_id; /* Application ID */ - uint32_t ae_refcount; /* Refcount */ - struct cds_lfht_node ae_name_ht_node; /* App name hash table */ - struct cds_lfht_node ae_id_ht_node; /* App ID hash table */ -}; +#include "npf/dpi/dpi_internal.h" +#include "npf/dpi/dpi_user.h" +#include "npf/dpi/npf_appdb.h" +#include "npf/dpi/npf_typedb.h" /* App information to be saved for later. */ struct app_info { struct adb_entry *ai_app_name; struct adb_entry *ai_app_proto; - uint64_t ai_app_type; /* bitfield */ + struct tdb_entry *ai_app_type; }; -static uint32_t name_hash_seed; - -/* - * Match function for the app name hash table. - * Returns zero for a non-match, and non-zero for a match. - */ -static int -appdb_name_match(struct cds_lfht_node *ht_node, const void *data) -{ - struct adb_entry *entry = caa_container_of( - ht_node, struct adb_entry, ae_name_ht_node); - - return !strcmp(data, entry->ae_name); -} - -/* - * Lookup the given application name in the application DB. - * Return a pointer to the entry, or NULL if not found. - */ -static struct adb_entry * -appdb_find_name(const char *name) -{ - struct cds_lfht_iter iter; - unsigned long hash = rte_jhash(name, strlen(name), - name_hash_seed); - - if (!app_name_ht) - return NULL; - - cds_lfht_lookup(app_name_ht, hash, appdb_name_match, - name, &iter); - - struct cds_lfht_node *ht_node = - cds_lfht_iter_get_node(&iter); - - if (ht_node) - return caa_container_of(ht_node, - struct adb_entry, - ae_name_ht_node); - else - return NULL; -} - -/* - * Convert the given app DB name entry to JSON. - */ -int -appdb_name_entry_to_json(json_writer_t *json, void *data) -{ - struct adb_entry *entry = data; - char buf[11]; /* "id" is u32. "0x" + 8 digits + null = 11. */ - - jsonw_name(json, entry->ae_name); - jsonw_start_object(json); - snprintf(buf, 11, "%#x", entry->ae_id); - jsonw_string_field(json, "id", buf); - jsonw_uint_field(json, "refcount", entry->ae_refcount); - jsonw_end_object(json); - - return 0; -} - -/* - * Walk the app name hash. - */ -int -appdb_name_walk(json_writer_t *json, app_walker_t callback) -{ - struct cds_lfht_iter iter; - struct adb_entry *entry; - int rc = 0; - - if (!app_name_ht) - return rc; - - cds_lfht_for_each_entry(app_name_ht, &iter, entry, ae_name_ht_node) { - rc = callback(json, entry); - if (rc) - break; - } - - return rc; -} - -/* - * Lookup the given application name in the application DB. - * Return the application ID, or DPI_APP_NA if not found. - */ -uint32_t -appdb_name_to_id(const char *name) -{ - struct adb_entry *entry = appdb_find_name(name); - - return entry ? entry->ae_id : DPI_APP_NA; -} - -/* - * Match function for the app id hash table. - * Returns zero for a non-match, and non-zero for a match. - */ -static int -appdb_id_match(struct cds_lfht_node *ht_node, const void *data) -{ - struct adb_entry *entry = caa_container_of( - ht_node, struct adb_entry, ae_id_ht_node); - const uint32_t *id = data; - - return *id == entry->ae_id; -} - -/* - * Lookup the given application ID in the application DB. - * Return a pointer to the entry, or NULL if not found. - */ -static struct adb_entry * -appdb_find_id(uint32_t app_id) -{ - struct cds_lfht_iter iter; - unsigned long hash = app_id; - - if (!app_id_ht) - return NULL; - - cds_lfht_lookup(app_id_ht, hash, appdb_id_match, - &app_id, &iter); - - struct cds_lfht_node *ht_node = - cds_lfht_iter_get_node(&iter); - - if (ht_node) - return caa_container_of(ht_node, - struct adb_entry, - ae_id_ht_node); - else - return NULL; -} - -/* Convert the given app DB ID entry to JSON. */ -int -appdb_id_entry_to_json(json_writer_t *json, void *data) -{ - struct adb_entry *entry = data; - char buf[11]; /* "id" is u32. "0x" + 8 digits + null = 11. */ - - snprintf(buf, 11, "%#x", entry->ae_id); - jsonw_name(json, buf); - jsonw_start_object(json); - jsonw_string_field(json, "name", entry->ae_name); - jsonw_uint_field(json, "refcount", entry->ae_refcount); - jsonw_end_object(json); - - return 0; -} - -/* Walk the app ID hash. */ -int -appdb_id_walk(json_writer_t *json, app_walker_t callback) -{ - struct cds_lfht_iter iter; - struct adb_entry *entry; - int rc = 0; - - if (!app_id_ht) - return rc; - - cds_lfht_for_each_entry(app_id_ht, &iter, entry, ae_id_ht_node) { - rc = callback(json, entry); - if (rc) - break; - } - - return rc; -} - -/* - * Lookup the given application ID in the application DB. - * Return the application name, or NULL if not found. - */ -char * -appdb_id_to_name(uint32_t app_id) -{ - struct adb_entry *entry = appdb_find_id(app_id); - - return entry ? entry->ae_name : NULL; -} - -/* - * Find an existing app DB entry with the given name and increment its refcount. - * If not found, then create a new entry. - */ -static struct adb_entry * -adb_find_or_alloc(char *name) -{ - /* No name? No entry. */ - if ((!name) || (!*name)) - return NULL; - - /* First, search for an existing entry. */ - struct adb_entry *entry = appdb_find_name(name); - if (entry) { - /* We only need to bump the refcount. for an existing entry. */ - entry->ae_refcount++; - return entry; - } - - /* Not found, so create a new app DB entry. */ - entry = zmalloc_aligned(sizeof(struct adb_entry)); - if (!entry) - return NULL; - - entry->ae_name = strdup(name); - if (!entry->ae_name) { - free(entry); - return NULL; - } - - /* Internally assigned application IDs all have the Q bit set. */ - static uint32_t user_app_id = APP_ID_Q | DPI_APP_BASE; - - /* - * Search for existing Qosmos app ID. - * No need to search the ADB since appdb_find_name - * already did that above. - */ - entry->ae_id = dpi_app_name_to_id_qosmos(name); - if (entry->ae_id == DPI_APP_NA) - /* No Qosmos ID, so allocate an internal ID. */ - entry->ae_id = DPI_ENGINE_USER | user_app_id++; - else { - /* - * This is a user-defined, Qosmos compatible ID. - * So change the Qosmos engine ID to the "user" engine ID. - */ - entry->ae_id &= DPI_APP_MASK; - entry->ae_id |= DPI_ENGINE_USER; - } - - entry->ae_refcount = 1; - - /* Add to app name hash table. */ - cds_lfht_node_init(&entry->ae_name_ht_node); - unsigned long name_hash = rte_jhash(name, strlen(name), - name_hash_seed); - cds_lfht_add(app_name_ht, name_hash, &entry->ae_name_ht_node); - - /* Add to app ID hash table. */ - cds_lfht_node_init(&entry->ae_id_ht_node); - unsigned long id_hash = entry->ae_id; - cds_lfht_add(app_id_ht, id_hash, &entry->ae_id_ht_node); - - return entry; -} - -/* - * Decrement the given appDB entry's refcount. - * If zero then remove the entry from the appDB. - */ -static bool -adb_dealloc(struct adb_entry *entry) -{ - if (!entry) - return false; - - if (--entry->ae_refcount == 0) { - cds_lfht_del(app_name_ht, &entry->ae_name_ht_node); - cds_lfht_del(app_id_ht, &entry->ae_id_ht_node); - free(entry->ae_name); - free(entry); - } - - return true; -} - -/* Initialisation. */ -static bool -app_ht_init(void) -{ - static bool init; - - if (init) - return true; - - app_name_ht = cds_lfht_new(APP_NAME_HT_SIZE, - APP_NAME_HT_MIN, - APP_NAME_HT_MAX, - APP_NAME_HT_FLAGS, - NULL); - - if (!app_name_ht) - return false; - - app_id_ht = cds_lfht_new(APP_ID_HT_SIZE, - APP_ID_HT_MIN, - APP_ID_HT_MAX, - APP_ID_HT_FLAGS, - NULL); - - if (!app_id_ht) { - cds_lfht_destroy(app_name_ht, NULL); - app_name_ht = NULL; - return false; - } - - name_hash_seed = random(); - init = true; - return true; -} - /* * App rproc constructor. * Save application information from the rule for later matching. @@ -367,13 +37,10 @@ app_ht_init(void) static int app_ctor(npf_rule_t *rl __unused, const char *params, void **handle) { - /* Ensure the DPI engine is enabled */ - if (!dpi_init()) - return -ENOMEM; - - /* Ensure hash tables have been init'd */ - if (!app_ht_init()) - return -ENOMEM; + /* Ensure the user DPI engine is enabled */ + int ret = dpi_init(IANA_USER); + if (ret != 0) + return ret; /* * Application name, type, and proto are received from the config layer @@ -383,8 +50,8 @@ app_ctor(npf_rule_t *rl __unused, const char *params, void **handle) */ /* Take a copy of params which we can modify. */ - char *args = strdup(params); - if (!args) + char *name = strdup(params); + if (!name) return -ENOMEM; /* Memory to store the app info. */ @@ -392,7 +59,7 @@ app_ctor(npf_rule_t *rl __unused, const char *params, void **handle) zmalloc_aligned(sizeof(struct app_info)); if (!app_info) { - free(args); + free(name); return -ENOMEM; } @@ -402,36 +69,40 @@ app_ctor(npf_rule_t *rl __unused, const char *params, void **handle) * overwrite it with a '\0' * and get the type string at X+1. */ - char *delim1 = strchr(args, ','); - if (delim1 == NULL) { - free(args); - free(app_info); - return -EINVAL; - } - *delim1 = '\0'; + char *type = strchr(name, ','); + if (type == NULL) + goto err_bad_args; - /* Now "args" contains the null-terminated app name. */ - app_info->ai_app_name = adb_find_or_alloc(args); + *type = '\0'; + type++; - /* - * strtoll reads the type number, - * storing the delimiting comma in 'delim2' - * - * Use delim2 because strtoll(c+1, &c, ...) doesn't work. - */ - char *delim2; - app_info->ai_app_type = (int64_t) strtoll(delim1+1, &delim2, 10); + /* Now "name" contains the null-terminated app name. */ + app_info->ai_app_name = appdb_find_or_alloc(name); + + char *proto = strchr(type, ','); + if (!proto) { + appdb_dealloc(app_info->ai_app_name); + goto err_bad_args; + } + *proto = '\0'; + proto++; + app_info->ai_app_type = typedb_find_or_alloc(type); /* - * "delim2" points to the comma between the type and the proto. - * The proto follows the type at delim2+1. + * "proto" points to the comma between the type and the proto. + * The proto follows the type at proto+1. */ - app_info->ai_app_proto = adb_find_or_alloc(delim2+1); + app_info->ai_app_proto = appdb_find_or_alloc(proto); *handle = app_info; - free(args); + free(name); return 0; + +err_bad_args: + free(name); + free(app_info); + return -EINVAL; } /* @@ -446,9 +117,11 @@ app_dtor(void *handle) struct app_info *app_info = handle; - adb_dealloc(app_info->ai_app_name); - adb_dealloc(app_info->ai_app_proto); + appdb_dealloc(app_info->ai_app_name); + appdb_dealloc(app_info->ai_app_proto); + typedb_dealloc(app_info->ai_app_type); free(handle); + dpi_terminate(IANA_USER); } /* @@ -471,7 +144,8 @@ app_action(npf_cache_t *npc __unused, struct rte_mbuf **nbuf __unused, if (!arg) return true; - struct dpi_flow *dpi_flow = npf_session_get_dpi(se); + struct user_flow *dpi_flow = (struct user_flow *)dpi_get_engine_flow( + npf_session_get_dpi(se), IANA_USER); if (!dpi_flow) return true; @@ -482,18 +156,9 @@ app_action(npf_cache_t *npc __unused, struct rte_mbuf **nbuf __unused, * Use DPI_APP_USER_NA rather than DPI_APP_NA if there's no name / proto * else appfw_decision will exit early. */ - dpi_flow->app_name = - app_info->ai_app_name ? app_info->ai_app_name->ae_id - : DPI_APP_USER_NA; - dpi_flow->app_proto = - app_info->ai_app_proto ? app_info->ai_app_proto->ae_id - : DPI_APP_USER_NA; - dpi_flow->app_type = app_info->ai_app_type; - dpi_flow->key = NULL; - dpi_flow->wrkr_id = 0; /* NB not dp_lcore_id() since no DPI work. */ - dpi_flow->offloaded = true; - dpi_flow->error = false; - dpi_flow->update_stats = true; + dpi_flow->application = appdb_entry_get_id(app_info->ai_app_name); + dpi_flow->protocol = appdb_entry_get_id(app_info->ai_app_proto); + dpi_flow->type = typedb_entry_get_id(app_info->ai_app_type); return true; /* Continue rproc processing. */ } diff --git a/src/npf/rproc/npf_ext_appfw.c b/src/npf/rproc/npf_ext_appfw.c index ed6e8642..f94c4af3 100644 --- a/src/npf/rproc/npf_ext_appfw.c +++ b/src/npf/rproc/npf_ext_appfw.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. + * * Copyright (c) 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -20,14 +21,16 @@ #include "compiler.h" #include "npf/config/npf_rule_group.h" -#include "npf/dpi/dpi.h" +#include "npf/dpi/dpi_internal.h" #include "npf/npf.h" #include "npf/rproc/npf_rproc.h" #include "npf/npf_cache.h" #include "npf/npf_session.h" +#include "npf/dpi/dpi_internal.h" +#include "npf/app_group/app_group_db.h" +#include "npf/app_group/app_group.h" #include "util.h" -struct dpi_flow; struct ifnet; struct rte_mbuf; @@ -38,10 +41,12 @@ struct rte_mbuf; struct appfw_rule { struct cds_list_head ar_list; const char *ar_group; /* Name of this group */ + struct agdb_entry *ar_app_grp; /* Application resource group */ uint16_t ar_rule_num; /* Rule number */ - uint32_t ar_protocol; /* Qosmos integer ids */ - uint32_t ar_name; - uint64_t ar_type; + uint32_t ar_protocol; /* Protocol ID */ + uint32_t ar_id; /* Application ID */ + uint32_t ar_type; /* Application type */ + uint8_t ar_engine; /* Engine ID */ npf_decision_t ar_decision; /* accept/drop */ }; @@ -53,6 +58,29 @@ struct appfw_handle { int ah_initial_dir; }; +struct appfw_cb_data { + struct app_group *group; + bool result; +}; + +static int +appfw_group_callback(uint8_t engine, uint32_t app, uint32_t proto, + uint32_t type, void *data) +{ + struct appfw_cb_data *handle = data; + + handle->result = app_group_find_app(handle->group, app) + || app_group_find_proto(handle->group, proto) + || app_group_find_type(handle->group, type, engine); + + if (handle->result) + /* Stop further callbacks */ + return 1; + + /* Continue search */ + return 0; +} + static void appfw_free_handle(struct appfw_handle *ah) { struct appfw_rule *ar; @@ -62,6 +90,7 @@ static void appfw_free_handle(struct appfw_handle *ah) cds_list_del(&ar->ar_list); free(ar); } + free(ah); } @@ -91,14 +120,21 @@ static int appfw_parse_rule_elements(struct appfw_handle *ah, *p = '\0'; v = ++p; - /* Convert to Qosmos integers */ + /* Convert engine, protocol, name and type to integers. + * Engine is assumed to have already be initialised before + * the protocol, type and name branches. + */ if (!strcmp(k, "protocol")) - ar->ar_protocol = dpi_app_name_to_id(v) & DPI_APP_MASK; + ar->ar_protocol = dpi_app_name_to_id(ar->ar_engine, v); else if (!strcmp(k, "type")) { - uint32_t tmp = dpi_app_type_name_to_id(v); - ar->ar_type = tmp ? (1L << (tmp - 1)) : 0; + ar->ar_type = dpi_app_type_name_to_id(ar->ar_engine, + v); } else if (!strcmp(k, "name")) - ar->ar_name = dpi_app_name_to_id(v) & DPI_APP_MASK; + ar->ar_id = dpi_app_name_to_id(ar->ar_engine, v); + else if (!strcmp(k, "engine")) + ar->ar_engine = dpi_engine_name_to_id(v); + else if (!strcmp(k, "group")) + ar->ar_app_grp = app_group_db_find_name(v); else if (!strcmp(k, "action")) { if (!strcmp(v, "drop")) ar->ar_decision = NPF_DECISION_BLOCK; @@ -117,7 +153,6 @@ static int appfw_parse_rule_elements(struct appfw_handle *ah, /* * Parse an app-fw rule into its DPI components. - * Note we translate the fields into their Qosmos integer * equivalents. */ static bool appfw_rule_parse(void *data, struct npf_cfg_rule_walk_state *state) @@ -129,17 +164,28 @@ static bool appfw_rule_parse(void *data, struct npf_cfg_rule_walk_state *state) if (!ar) goto fail; - ar->ar_name = DPI_APP_NA; + ar->ar_id = DPI_APP_NA; ar->ar_protocol = DPI_APP_NA; ar->ar_type = 0; + ar->ar_engine = IANA_RESERVED; ar->ar_rule_num = state->index; ar->ar_group = state->group; ar->ar_decision = NPF_DECISION_UNKNOWN; + ar->ar_app_grp = NULL; ah->ah_parse_rc = appfw_parse_rule_elements(ah, ar, state->rule); if (ah->ah_parse_rc) goto fail; + /* Add engine to handle */ + if (ar->ar_engine != IANA_RESERVED) { + int ret = dpi_init(ar->ar_engine); + if (ret != 0) { + ah->ah_parse_rc = ret; + goto fail; + } + } + cds_list_add_tail(&ar->ar_list, &ah->ah_rules); return true; fail: @@ -147,65 +193,79 @@ static bool appfw_rule_parse(void *data, struct npf_cfg_rule_walk_state *state) return false; } +/* Match the given application firewall rule (ar) + * against the given protocol, application name, and application type. + * + * Return true on a match; false if no match. + * + * NB fields in the appFW rule (ar) are set to DPI_APP_NA + * if they are not used in the match. + */ static bool appfw_match_rule(struct appfw_rule *ar, uint32_t proto, - uint32_t name, uint64_t app_bits) + uint32_t name, uint32_t type) { - /* - * Discard the engine bits from the app name and proto - * so we match nomatter which engine. - */ - name &= DPI_APP_MASK; - proto &= DPI_APP_MASK; - /* Match most-specific to least-specific */ - if (ar->ar_protocol != DPI_APP_NA && ar->ar_name != DPI_APP_NA) { - if ((proto == ar->ar_protocol) && (name == ar->ar_name)) + if (ar->ar_protocol != DPI_APP_NA && ar->ar_id != DPI_APP_NA) { + if ((proto == ar->ar_protocol) && (name == ar->ar_id)) return true; } - if ((ar->ar_name != DPI_APP_NA) && (name == ar->ar_name)) + if ((ar->ar_id != DPI_APP_NA) && (name == ar->ar_id)) return true; if ((ar->ar_protocol != DPI_APP_NA) && (proto == ar->ar_protocol)) return true; - if (ar->ar_type & app_bits) + if ((ar->ar_type != DPI_APP_NA) && (type == ar->ar_type)) return true; - return false; -} - -static uint32_t appfw_pkt_count(struct dpi_flow *df) -{ - uint32_t cnt; - const struct dpi_flow_stats *ds = dpi_flow_get_stats(df, true); - - cnt = ds->pkts; - ds = dpi_flow_get_stats(df, false); - cnt += ds->pkts; - return cnt; + return false; } static npf_decision_t appfw_decision(struct appfw_handle *ah, struct dpi_flow *dpi_flow) { struct appfw_rule *ar; - uint32_t proto = dpi_flow_get_app_proto(dpi_flow); - - /* These are terminal values that will not change */ - if (proto == DPI_APP_NA || proto == DPI_APP_ERROR) - return ah->ah_no_match_action; + struct appfw_cb_data data; + uint32_t name; + uint32_t proto; + uint32_t type; + uint8_t engine_id; /* * If offloaded, or hit pkt limit, then run the app-fw * rules, as we will shall make the decision. */ - uint32_t pkt_count = appfw_pkt_count(dpi_flow); - - if (dpi_flow_get_offloaded(dpi_flow) || (pkt_count >= APPFW_MAX_PKTS)) { - uint32_t name = dpi_flow_get_app_name(dpi_flow); - uint64_t app_bits = dpi_flow_get_app_type(dpi_flow); - + if (dpi_flow_get_offloaded(dpi_flow) || + dpi_flow_pkt_count_maxed(dpi_flow, APPFW_MAX_PKTS)) { cds_list_for_each_entry(ar, &ah->ah_rules, ar_list) { - if (appfw_match_rule(ar, proto, name, app_bits)) - return ar->ar_decision; + engine_id = ar->ar_engine; + + /* Rule either has a valid engine or group */ + + if (engine_id != IANA_RESERVED) { + name = dpi_flow_get_app_id(engine_id, + dpi_flow); + proto = dpi_flow_get_app_proto(engine_id, + dpi_flow); + type = dpi_flow_get_app_type(engine_id, + dpi_flow); + + /* Skip terminal values, they never match */ + if (name == DPI_APP_NA || name == DPI_APP_ERROR + || proto == DPI_APP_NA + || proto == DPI_APP_ERROR) + continue; + + if (appfw_match_rule(ar, proto, name, type)) + return ar->ar_decision; + } else if (ar->ar_app_grp) { + data.result = false; + data.group = ar->ar_app_grp->group; + + dpi_flow_for_each_engine(dpi_flow, + appfw_group_callback, + &data); + if (data.result) + return ar->ar_decision; + } } return ah->ah_no_match_action; } @@ -223,9 +283,6 @@ appfw_ctor(npf_rule_t *rl, const char *params, void **handle) char *token; int rc; - if (!dpi_init()) - return -ENOMEM; - /* create the handle for this rproc instance */ ah = zmalloc_aligned(sizeof(struct appfw_handle)); if (!ah) @@ -247,6 +304,10 @@ appfw_ctor(npf_rule_t *rl, const char *params, void **handle) * Only if set to 'accept' does this come down. */ ah->ah_no_match_action = NPF_DECISION_BLOCK; + rc = dpi_init(IANA_RESERVED); + if (rc != 0) { + goto fail; + } while ((token = strtok_r(str, ",", &tmp)) != NULL) { npf_cfg_rule_group_walk(NPF_RULE_CLASS_APP_FW, token, ah, @@ -259,6 +320,10 @@ appfw_ctor(npf_rule_t *rl, const char *params, void **handle) } ah->ah_initial_dir = npf_rule_get_dir(rl); + /* Ensure that the DPI engine outputs are enabled. */ + dpi_refcount_inc(IANA_USER); + dpi_refcount_inc(IANA_NDPI); + *handle = ah; return 0; @@ -271,6 +336,10 @@ appfw_ctor(npf_rule_t *rl, const char *params, void **handle) static void appfw_dtor(void *handle) { + /* Disable the DPI engine outputs. */ + dpi_refcount_dec(IANA_USER); + dpi_refcount_dec(IANA_NDPI); + appfw_free_handle(handle); } @@ -320,9 +389,11 @@ appfw_action(npf_cache_t *npc, struct rte_mbuf **nbuf, void *arg, */ dpi_flow = npf_session_get_dpi(se); if (!dpi_flow) { + uint8_t engines[] = { IANA_USER, IANA_NDPI }; + rc = dpi_session_first_packet(se, npc, *nbuf, - ah->ah_initial_dir); - if (rc) + ah->ah_initial_dir, 2, engines); + if (rc != 0) goto drop; dpi_flow = npf_session_get_dpi(se); if (!dpi_flow) diff --git a/src/npf/rproc/npf_ext_dpi.c b/src/npf/rproc/npf_ext_dpi.c index d76254e1..dbbae278 100644 --- a/src/npf/rproc/npf_ext_dpi.c +++ b/src/npf/rproc/npf_ext_dpi.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -16,7 +16,7 @@ #include "compiler.h" #include "npf/npf.h" -#include "npf/dpi/dpi.h" +#include "npf/dpi/dpi_internal.h" #include "npf/rproc/npf_rproc.h" #include "npf/npf_cache.h" #include "npf/npf_session.h" @@ -27,8 +27,9 @@ struct rte_mbuf; /* DPI information to be saved for later. */ struct dpi_info { - uint32_t app_name; - uint64_t app_type_bitfield; + uint32_t app_id; + uint32_t app_type; + uint8_t engine_id; }; /* Save DPI information from the rule for later matching. */ @@ -42,50 +43,58 @@ dpi_ctor(npf_rule_t *rl __unused, const char *params, void **handle) * Here we convert the strings to IDs and save them for later matching. */ - /* Ensure the engine is enabled */ - if (!dpi_init()) - return -ENOMEM; - /* * The name and type are comma-separated, * so we find the comma at position X, * overwrite it with a '\0' * and get the type string at X+1. */ - char *args = strdup(params); - if (!args) + char *param_str = strdup(params); + if (!param_str) return -ENOMEM; - char *c = strchr(args, ','); - if (c == NULL) { - free(args); + char *name = strchr(param_str, ','); + if (!name) { + free(param_str); + return -EINVAL; + } + *name = '\0'; + name++; + + char *type = strchr(name, ','); + if (!type) { + free(param_str); return -EINVAL; } - *c = '\0'; + *type = '\0'; + type++; + + uint8_t engine_id = dpi_engine_name_to_id(param_str); + + /* Ensure the engine is enabled */ + int ret = dpi_init(engine_id); + if (ret != 0) { + free(param_str); + return ret; + } /* Memory to store the DPI info. */ struct dpi_info *dpi_info = zmalloc_aligned(sizeof(struct dpi_info)); if (!dpi_info) { - free(args); + free(param_str); return -ENOMEM; } - /* - * If the name-to-id lookups fail, we store ID zero - * which dpi_match characterises as "not applicable". - * - * Discard the engine bits from the app name - * so we match the app nomatter which engine. - */ - dpi_info->app_name = dpi_app_name_to_id(args) & DPI_APP_MASK; - uint32_t app_type = dpi_app_type_name_to_id(c+1); - dpi_info->app_type_bitfield = - app_type ? (1L << (app_type - 1)) : 0; + dpi_info->engine_id = engine_id; + dpi_info->app_id = dpi_app_name_to_id(engine_id, name) & DPI_APP_MASK; + dpi_info->app_type = dpi_app_type_name_to_id(engine_id, type); *handle = dpi_info; - free(args); + free(param_str); + + dpi_refcount_inc(engine_id); return 0; } @@ -94,6 +103,15 @@ dpi_ctor(npf_rule_t *rl __unused, const char *params, void **handle) static void dpi_dtor(void *handle) { + if (!handle) + return; + + struct dpi_info *dpi_info = handle; + uint8_t engine_id = dpi_info->engine_id; + + if (dpi_refcount_dec(engine_id) == 0) + dpi_terminate(engine_id); + free(handle); } @@ -126,6 +144,16 @@ static bool dpi_match(npf_cache_t *npc, struct rte_mbuf *mbuf, const struct ifnet *ifp, int dir, npf_session_t *se, void *arg) { + /* Get the DPI info that we stashed away when the rule was created. */ + struct dpi_info *dpi_info = arg; + + /* + * The rule says to match DPI info, but the details are not available. + * "This should never happen", but drop the traffic if it does. + */ + if (!dpi_info) + goto drop; + /* We only have sessions for IP packets */ if (!npf_iscached(npc, NPC_IP46)) return false; @@ -150,7 +178,9 @@ dpi_match(npf_cache_t *npc, struct rte_mbuf *mbuf, const struct ifnet *ifp, /* Find or attach the DPI flow info. Do first packet inspection */ struct dpi_flow *dpi_flow = npf_session_get_dpi(se); if (!dpi_flow) { - int error = dpi_session_first_packet(se, npc, mbuf, dir); + uint8_t engines[] = {IANA_USER, IANA_NDPI}; + int error = dpi_session_first_packet(se, npc, mbuf, + dir, 2, engines); if (error) goto drop; dpi_flow = npf_session_get_dpi(se); @@ -163,29 +193,20 @@ dpi_match(npf_cache_t *npc, struct rte_mbuf *mbuf, const struct ifnet *ifp, goto drop; /* Extract the previously cached result */ - const uint32_t app_name = dpi_flow_get_app_name(dpi_flow); - uint64_t app_type_bitfield = dpi_flow_get_app_type(dpi_flow); - - /* Get the DPI info that we stashed away when the rule was created. */ - struct dpi_info *dpi_info = arg; + const uint32_t app_id = dpi_flow_get_app_id(dpi_info->engine_id, + dpi_flow); + uint32_t app_type = dpi_flow_get_app_type(dpi_info->engine_id, + dpi_flow); /* - * The rule says to match DPI info, but the details are not available. - * "This should never happen", but drop the traffic if it does. + * App ID only applies if set. */ - if (!dpi_info) - goto drop; + bool r = (dpi_info->app_id && + (dpi_info->app_id == (app_id & DPI_APP_MASK))) || + (dpi_info->app_type && + (dpi_info->app_type == app_type)); - /* - * App name only applies if set; - * explicitly checked. - * - * App type only applies if set; - * checked by & op since X && (X & Y) == X & Y. - */ - return (dpi_info->app_name && - (dpi_info->app_name == (app_name & DPI_APP_MASK))) || - (dpi_info->app_type_bitfield & app_type_bitfield); + return r; drop: /* diff --git a/src/npf/rproc/npf_ext_log.c b/src/npf/rproc/npf_ext_log.c index 2e659b69..8b825fc9 100644 --- a/src/npf/rproc/npf_ext_log.c +++ b/src/npf/rproc/npf_ext_log.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. */ /* @@ -67,7 +67,7 @@ #include "npf/npf_cache.h" #include "npf/npf_session.h" #include "npf/rproc/npf_ext_log.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "util.h" #define BUF_SIZE 64 @@ -88,12 +88,12 @@ npf_log_mac_fields(const struct rte_mbuf *mbuf, char const *mprefix, char *macs_buf, char const *eprefix, char *etype_buf) { - if (pktmbuf_l2_len(mbuf) != ETHER_HDR_LEN && - pktmbuf_l2_len(mbuf) != VLAN_HDR_LEN) + if (dp_pktmbuf_l2_len(mbuf) != RTE_ETHER_HDR_LEN && + dp_pktmbuf_l2_len(mbuf) != VLAN_HDR_LEN) return; - const struct ether_hdr *eth - = rte_pktmbuf_mtod(mbuf, struct ether_hdr *); + const struct rte_ether_hdr *eth + = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); unsigned int pl; char *bp; @@ -118,7 +118,7 @@ npf_log_mac_fields(const struct rte_mbuf *mbuf, *bp++ = '\0'; /* Now the ethertype */ - uint16_t etype = ntohs(ethtype(mbuf, ETHER_TYPE_VLAN)); + uint16_t etype = ntohs(ethtype(mbuf, RTE_ETHER_TYPE_VLAN)); snprintf(etype_buf, BUF_SIZE, "%s%04X", eprefix, etype); } @@ -557,11 +557,11 @@ npf_log_pkt(npf_cache_t *npc, struct rte_mbuf *mbuf, npf_rule_t *rl, uint16_t ether_proto; if (npf_iscached(npc, NPC_IP4)) - ether_proto = htons(ETHER_TYPE_IPv4); + ether_proto = htons(RTE_ETHER_TYPE_IPV4); else - ether_proto = htons(ETHER_TYPE_IPv6); + ether_proto = htons(RTE_ETHER_TYPE_IPV6); - void *n_ptr = pktmbuf_mtol3(mbuf, char *) + npf_cache_hlen(npc); + void *n_ptr = dp_pktmbuf_mtol3(mbuf, char *) + npf_cache_hlen(npc); /* Find the start of the packet embedded in the ICMP error. */ n_ptr = nbuf_advance(&mbuf, n_ptr, ICMP_MINLEN); @@ -571,9 +571,10 @@ npf_log_pkt(npf_cache_t *npc, struct rte_mbuf *mbuf, npf_rule_t *rl, /* Init the embedded npc. */ npf_cache_t enpc; npf_cache_init(&enpc); + enpc.npc_srcdst = NULL; /* Inspect the embedded packet. */ - if (!npf_cache_all_at(&enpc, mbuf, n_ptr, ether_proto, true)) + if (!npf_cache_all_at(&enpc, mbuf, n_ptr, ether_proto)) goto simple_ip; npf_log_ip_pkt(&enpc, err_buf, sizeof(err_buf), "", diff --git a/src/npf/rproc/npf_ext_mark.c b/src/npf/rproc/npf_ext_mark.c index 7ea0e9c0..6f7593d1 100644 --- a/src/npf/rproc/npf_ext_mark.c +++ b/src/npf/rproc/npf_ext_mark.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2013-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -23,7 +23,7 @@ #include "npf/npf_mbuf.h" #include "npf/rproc/npf_rproc.h" #include "npf/npf_ruleset.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "util.h" #include "vplane_log.h" #include "qos.h" @@ -164,6 +164,9 @@ npf_mark_arg_create(npf_rule_t *rl, const char *params, uint32_t tag, void npf_remark_dscp(npf_cache_t *npc, struct rte_mbuf **m, uint8_t n, npf_rproc_result_t *result) { + if (unlikely(!npf_iscached(npc, NPC_IP46))) + return; + if (unlikely(npf_prepare_for_l4_header_change(m, npc) != 0)) { if (net_ratelimit()) RTE_LOG(ERR, FIREWALL, diff --git a/src/npf/rproc/npf_ext_nat64.c b/src/npf/rproc/npf_ext_nat64.c index 66508fee..0f4fa155 100644 --- a/src/npf/rproc/npf_ext_nat64.c +++ b/src/npf/rproc/npf_ext_nat64.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -62,11 +62,23 @@ nat64_create(struct nat64 **n6p, npf_rule_t *rl) uint32_t match_mask = 0; uint8_t addr_sz = 4; + /* + * Either an address group or a start-stop range (from a cfgd + * range or prefix) should exist. + */ if (new->n6_src.nm_addr_table_id != NPF_TBLID_NONE) { table_id = new->n6_src.nm_addr_table_id; flags |= NPF_NAT_TABLE; + } else if (new->n6_src.nm_start_addr.s6_addr32[0] == 0 || + new->n6_src.nm_stop_addr.s6_addr32[0] == 0) { + rc = -EINVAL; + goto error; } + /* Use all ports for each address */ + new->n6_src.nm_start_port = 1; + new->n6_src.nm_stop_port = 65535; + /* * Create an address-port map and set r_natp pointer in rule * to point to it @@ -144,6 +156,42 @@ nat64_process_range(uint8_t *addr_sz, npf_addr_t *taddr, return 0; } +/* + * Get address range from an IPv4 prefix. All addresses params are in network + * order. + */ +static void nat64_prefix_to_range(uint32_t net_prefix, uint8_t plen, + uint32_t *start, uint32_t *stop) +{ + uint32_t prefix = ntohl(net_prefix); + + plen = MIN(plen, 32); + + /* Convert prefix to address range */ + if (plen == 32) { + *start = htonl(prefix); + *stop = htonl(prefix); + return; + } + + uint32_t first, last, mask; + + first = prefix; + mask = 0xFFFFFFFFUL << (32 - plen); + last = (first | ~mask); + first = (first & mask); + + if (plen < 31) { + if ((first & 0xFF) == 0) + first += 1; + if ((last & 0xFF) == 255) + last -= 1; + } + + *start = htonl(first); + *stop = htonl(last); +} + /* * Parse nat64 rproc parameters * @@ -166,6 +214,12 @@ nat64_parse_params(struct nat64 *n6, char *item, char *value) if (rc < 0) return -EINVAL; + + nat64_prefix_to_range(n6->n6_src.nm_addr.s6_addr32[0], + n6->n6_src.nm_mask, + &n6->n6_src.nm_start_addr.s6_addr32[0], + &n6->n6_src.nm_stop_addr.s6_addr32[0]); + } else if (!strcmp(item, "daddr")) { /* * Destination address or prefix @@ -176,6 +230,12 @@ nat64_parse_params(struct nat64 *n6, char *item, char *value) if (rc < 0) return -EINVAL; + + nat64_prefix_to_range(n6->n6_dst.nm_addr.s6_addr32[0], + n6->n6_dst.nm_mask, + &n6->n6_dst.nm_start_addr.s6_addr32[0], + &n6->n6_dst.nm_stop_addr.s6_addr32[0]); + } else if (!strcmp(item, "spl")) { char *endp; ulong pfxlen; @@ -207,10 +267,6 @@ nat64_parse_params(struct nat64 *n6, char *item, char *value) n6->n6_src.nm_af = (addr_sz == 4 ? AF_INET : AF_INET6); - /* Use all ports for each address in pool */ - n6->n6_src.nm_start_port = 1; - n6->n6_src.nm_stop_port = 65535; - } else if (!strcmp(item, "sgroup")) { /* * Source address group, for use with NPF_NAT64_OVERLOAD @@ -282,6 +338,15 @@ nat64_validate_mapping(struct nat64_map *nm, bool is_src) /* Only v4 source addr pools are supported */ if (!is_src || nm->nm_af != AF_INET) return -EINVAL; + + /* + * Either and address-group or a start/stop address should be + * setup + */ + if (nm->nm_addr_table_id == NPF_TBLID_NONE && + (nm->nm_start_addr.s6_addr32[0] == 0 || + nm->nm_stop_addr.s6_addr32[0] == 0)) + return -EINVAL; break; case NPF_NAT64_NONE: return -EINVAL; diff --git a/src/npf/rproc/npf_ext_nptv6.c b/src/npf/rproc/npf_ext_nptv6.c index 7e27e2ec..75c4ca7c 100644 --- a/src/npf/rproc/npf_ext_nptv6.c +++ b/src/npf/rproc/npf_ext_nptv6.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -220,7 +220,7 @@ nptv6_validate_params(struct nptv6 *np) in6_prefix_cpy(&outer, &np->np_out_prefix, np->np_out_prefixlen); - if (in6_prefix_eq(&inner, &outer, MAX(np->np_in_prefixlen, + if (dp_in6_prefix_eq(&inner, &outer, MAX(np->np_in_prefixlen, np->np_out_prefixlen))) return -EINVAL; @@ -662,7 +662,7 @@ nptv6_translate_icmp(const struct nptv6 *np, const struct in6_addr *pfx, uint plen; int rc; - n_ptr = rte_pktmbuf_mtod_offset(mbuf, char *, pktmbuf_l2_len(mbuf)); + n_ptr = rte_pktmbuf_mtod_offset(mbuf, char *, dp_pktmbuf_l2_len(mbuf)); if (dir == PFIL_IN) { /* @@ -670,7 +670,7 @@ nptv6_translate_icmp(const struct nptv6 *np, const struct in6_addr *pfx, * prefix matches the external network prefix */ rc = nbuf_advfetch(&mbuf, &n_ptr, - pktmbuf_l3_len(mbuf) + + dp_pktmbuf_l3_len(mbuf) + sizeof(struct icmp6_hdr) + offsetof(struct ip6_hdr, ip6_src), sizeof(addr), &addr); @@ -685,7 +685,7 @@ nptv6_translate_icmp(const struct nptv6 *np, const struct in6_addr *pfx, * prefix matches the internal network prefix */ rc = nbuf_advfetch(&mbuf, &n_ptr, - pktmbuf_l3_len(mbuf) + + dp_pktmbuf_l3_len(mbuf) + sizeof(struct icmp6_hdr) + offsetof(struct ip6_hdr, ip6_dst), sizeof(addr), &addr); @@ -696,7 +696,7 @@ nptv6_translate_icmp(const struct nptv6 *np, const struct in6_addr *pfx, plen = np->np_in_prefixlen; } - if (!in6_prefix_eq(&addr, match, plen)) + if (!dp_in6_prefix_eq(&addr, match, plen)) return; /* @@ -724,7 +724,7 @@ nptv6_translate(npf_cache_t *npc, struct rte_mbuf **nbuf, void *arg, struct in6_addr trans; int icmp; - uint hdr_len = pktmbuf_l2_len(*nbuf) + pktmbuf_l3_len(*nbuf); + uint hdr_len = dp_pktmbuf_l2_len(*nbuf) + dp_pktmbuf_l3_len(*nbuf); if (unlikely(npf_iscached(npc, NPC_ICMP_ERR))) hdr_len += sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr); @@ -791,7 +791,7 @@ nptv6_translate(npf_cache_t *npc, struct rte_mbuf **nbuf, void *arg, /* * Write the translated address back to the packet, and update cache */ - void *n_ptr = pktmbuf_mtol3(mbuf, void *); + void *n_ptr = dp_pktmbuf_mtol3(mbuf, void *); npf_rwrip6(npc, mbuf, n_ptr, np->np_dir, &trans); diff --git a/src/npf/rproc/npf_ext_policer.c b/src/npf/rproc/npf_ext_policer.c index 0cfc0223..a9b0bce3 100644 --- a/src/npf/rproc/npf_ext_policer.c +++ b/src/npf/rproc/npf_ext_policer.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2013-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -24,7 +24,7 @@ #include "npf/config/npf_attach_point.h" #include "npf/rproc/npf_rproc.h" #include "npf/npf_ruleset.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "qos.h" #include "util.h" #include "vplane_log.h" @@ -171,15 +171,15 @@ npf_policer_create(npf_rule_t *rl, const char *params, void **handle) po->tc = tc; tcs_per_sec = ONE_SECOND / tc; po->rate = rate / tcs_per_sec; - if (po->rate < ETHER_MAX_VLAN_FRAME_LEN) { - tcs_per_sec = rate / ETHER_MAX_VLAN_FRAME_LEN; + if (po->rate < RTE_ETHER_MAX_VLAN_FRAME_LEN) { + tcs_per_sec = rate / RTE_ETHER_MAX_VLAN_FRAME_LEN; if (!tcs_per_sec) { tcs_per_sec = 1; po->tc = ONE_SECOND; } else po->tc = ONE_SECOND / tcs_per_sec; - po->rate = ETHER_MAX_VLAN_FRAME_LEN; + po->rate = RTE_ETHER_MAX_VLAN_FRAME_LEN; } po->burst = burst; @@ -237,10 +237,10 @@ npf_policer_destroy(void *handle) } static inline void -update_tokens(int32_t credit, struct npf_policer *po, const uint64_t ticks) +update_tokens(uint32_t credit, struct npf_policer *po, const uint64_t ticks) { po->time += ticks; - if (credit > (int32_t)(po->rate + po->burst)) + if (credit > (po->rate + po->burst)) credit = po->rate + po->burst; rte_atomic32_set(&po->credit, credit); } @@ -266,7 +266,7 @@ npf_policer(npf_cache_t *npc, struct rte_mbuf **nbuf, void *arg, if (po->type == POLICE_BYTES) { uint64_t lapsed; - int intervals; + unsigned int intervals; rte_spinlock_lock(&po->lock); @@ -302,7 +302,7 @@ npf_policer(npf_cache_t *npc, struct rte_mbuf **nbuf, void *arg, * report L3 bytes sent/dropped, for token bucket we include * the L2 overhead if configured. */ - tokens = rte_pktmbuf_pkt_len(*nbuf) - pktmbuf_l2_len(*nbuf); + tokens = rte_pktmbuf_pkt_len(*nbuf) - dp_pktmbuf_l2_len(*nbuf); tok_with_oh = tokens + po->overhead; if (tok_with_oh < 0) tok_with_oh = 1; @@ -340,7 +340,7 @@ npf_policer(npf_cache_t *npc, struct rte_mbuf **nbuf, void *arg, } rte_spinlock_unlock(&po->lock); - tokens = rte_pktmbuf_pkt_len(*nbuf) - pktmbuf_l2_len(*nbuf); + tokens = rte_pktmbuf_pkt_len(*nbuf) - dp_pktmbuf_l2_len(*nbuf); } core = dp_lcore_id(); @@ -370,6 +370,47 @@ npf_policer(npf_cache_t *npc, struct rte_mbuf **nbuf, void *arg, return true; } +static void +policer_get_stats(void *arg, unsigned int *excess, + unsigned int *excess_bytes) +{ + struct npf_policer *po = arg; + unsigned int id; + + *excess = *excess_bytes = 0; + + FOREACH_DP_LCORE(id) { + *excess += po->cntrs[id].excess; + *excess_bytes += po->cntrs[id].bytes_excess; + } +} + +void policer_show(json_writer_t *wr, void *arg) +{ + struct npf_policer *po = arg; + unsigned int excess, excess_b; + uint32_t credit; + + policer_get_stats(po, &excess, &excess_b); + if (!excess) + return; + + credit = rte_atomic32_read(&po->credit); + jsonw_start_object(wr); + jsonw_uint_field(wr, "time", po->time); + jsonw_uint_field(wr, "tc", po->tc); + jsonw_uint_field(wr, "credit", credit); + jsonw_uint_field(wr, "rate", po->rate); + jsonw_uint_field(wr, "burst", po->burst); + jsonw_int_field(wr, "overhead", po->overhead); + jsonw_int_field(wr, "action", po->action); + jsonw_int_field(wr, "mark_val", po->mark_val); + jsonw_int_field(wr, "loc", po->lock.locked); + jsonw_uint_field(wr, "soft_ticks", soft_ticks); + jsonw_uint_field(wr, "lapsed", (soft_ticks - po->time)); + jsonw_end_object(wr); +} + static void npf_policer_clear_stats(void *arg) { diff --git a/src/npf/rproc/npf_ext_session_limit.c b/src/npf/rproc/npf_ext_session_limit.c index b70fecea..bb69910e 100644 --- a/src/npf/rproc/npf_ext_session_limit.c +++ b/src/npf/rproc/npf_ext_session_limit.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -1156,54 +1156,44 @@ npf_sess_limit_update_rates(struct npf_sess_limit_param_t *lp) /* * Called when the session belonging to a limit-enabled rproc rule changes - * state. May be called from both master and forwarding threads. + * state. May be called from both main and forwarding threads. */ -void npf_sess_limit_state_change(void *handle, uint8_t proto_idx, - uint8_t prev_state, uint8_t state) +void npf_sess_limit_state_change(void *handle, enum dp_session_state prev_state, + enum dp_session_state state) { struct npf_sess_limit_param_t *lp = handle; - /* - * We dont care about the various types of TCP half-open state, for - * example, so convert to a generic state - */ - state = npf_state_get_generic_state(proto_idx, state); - prev_state = npf_state_get_generic_state(proto_idx, prev_state); - - if (state == prev_state) - return; - rte_spinlock_lock(&lp->lp_lock); switch (prev_state) { - case NPF_ANY_SESSION_NONE: + case SESSION_STATE_NONE: /* do nothing */ break; - case NPF_ANY_SESSION_NEW: + case SESSION_STATE_NEW: lp->lp_new_ct--; break; - case NPF_ANY_SESSION_ESTABLISHED: + case SESSION_STATE_ESTABLISHED: lp->lp_estab_ct--; break; - case NPF_ANY_SESSION_TERMINATING: + case SESSION_STATE_TERMINATING: /* Only occurs for TCP sessions */ lp->lp_term_ct--; break; - case NPF_ANY_SESSION_CLOSED: + case SESSION_STATE_CLOSED: /* Will not happen */ break; }; switch (state) { - case NPF_ANY_SESSION_NONE: + case SESSION_STATE_NONE: /* do nothing */ break; - case NPF_ANY_SESSION_NEW: + case SESSION_STATE_NEW: lp->lp_new_ct++; if (lp->lp_new_ct >= lp->lp_max_new_ct) @@ -1212,14 +1202,14 @@ void npf_sess_limit_state_change(void *handle, uint8_t proto_idx, npf_sess_limit_update_rates(lp); break; - case NPF_ANY_SESSION_ESTABLISHED: + case SESSION_STATE_ESTABLISHED: lp->lp_estab_ct++; if (lp->lp_estab_ct >= lp->lp_max_estab_ct) lp->lp_max_estab_ct = lp->lp_estab_ct; break; - case NPF_ANY_SESSION_TERMINATING: + case SESSION_STATE_TERMINATING: /* Only occurs for TCP sessions */ lp->lp_term_ct++; @@ -1227,7 +1217,7 @@ void npf_sess_limit_state_change(void *handle, uint8_t proto_idx, lp->lp_max_term_ct = lp->lp_term_ct; break; - case NPF_ANY_SESSION_CLOSED: + case SESSION_STATE_CLOSED: break; }; diff --git a/src/npf/rproc/npf_ext_session_limit.h b/src/npf/rproc/npf_ext_session_limit.h index e3787771..e0848d85 100644 --- a/src/npf/rproc/npf_ext_session_limit.h +++ b/src/npf/rproc/npf_ext_session_limit.h @@ -31,10 +31,10 @@ bool npf_sess_limit_check(npf_rule_t *rl); /* * Called when the session belonging to a limit-enabled rproc rule changes - * state. May be called from both master and forwarding threads. + * state. May be called from both main and forwarding threads. */ -void npf_sess_limit_state_change(void *handle, uint8_t proto_idx, - uint8_t prev_state, uint8_t state); +void npf_sess_limit_state_change(void *handle, enum dp_session_state prev_state, + enum dp_session_state state); #endif diff --git a/src/npf/rproc/npf_ext_setvrf.c b/src/npf/rproc/npf_ext_setvrf.c index 037c7f41..f18094da 100644 --- a/src/npf/rproc/npf_ext_setvrf.c +++ b/src/npf/rproc/npf_ext_setvrf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -16,7 +16,7 @@ #include "npf/npf.h" #include "npf/rproc/npf_rproc.h" #include "npf/npf_ruleset.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "vplane_log.h" struct ifnet; @@ -59,7 +59,7 @@ npf_setvrf(npf_cache_t *npc __unused, struct rte_mbuf **m, void *arg, return true; vrfid = (uintptr_t)arg; - vrf = vrf_get_rcu_from_external(vrfid); + vrf = dp_vrf_get_rcu_from_external(vrfid); pktmbuf_set_vrf(*m, vrf ? vrf->v_id : VRF_INVALID_ID); return true; } diff --git a/src/npf/rproc/npf_ext_tag.c b/src/npf/rproc/npf_ext_tag.c index 2a72fba6..823dc821 100644 --- a/src/npf/rproc/npf_ext_tag.c +++ b/src/npf/rproc/npf_ext_tag.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -14,7 +14,7 @@ #include "compiler.h" #include "npf/npf_cache.h" #include "npf/rproc/npf_rproc.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "vplane_log.h" struct ifnet; diff --git a/src/npf/rproc/npf_rproc.c b/src/npf/rproc/npf_rproc.c index a64f2ece..b1c17c80 100644 --- a/src/npf/rproc/npf_rproc.c +++ b/src/npf/rproc/npf_rproc.c @@ -79,6 +79,10 @@ static const npf_rproc_ops_t *npf_rproc_handlers[] = { [NPF_RPROC_ID_LAST] = NULL }; +static_assert(ARRAY_SIZE(npf_rproc_handlers) - 1 == NPF_RPROC_ID_LAST, + "npf rproc handlers iswrong size"); + + unsigned int npf_rproc_max_rprocs(void) { return ARRAY_SIZE(npf_rproc_handlers) - 1; @@ -105,12 +109,7 @@ int npf_create_rproc(const npf_rproc_ops_t *ops, npf_rule_t *rl, const char *args, void **handle) { - /* - * assert at unit-test time that npf_rproc_id enum and - * rproc_handlers[] array size match. - */ assert(npf_rproc_max_rprocs() == NPF_RPROC_ID_LAST); - assert(ARRAY_SIZE(npf_rproc_handlers) - 1 == NPF_RPROC_ID_LAST); if (!ops->ro_ctor) { *handle = NULL; diff --git a/src/npf/rproc/npf_rproc.h b/src/npf/rproc/npf_rproc.h index 9143199d..1ebda2bd 100644 --- a/src/npf/rproc/npf_rproc.h +++ b/src/npf/rproc/npf_rproc.h @@ -83,6 +83,7 @@ enum npf_rproc_id { NPF_RPROC_ID_CTR_DEF, NPF_RPROC_ID_CTR_REF, NPF_RPROC_ID_COUNTER, + NPF_RPROC_ID_APP_GRP, /* Insert new ID above this comment */ NPF_RPROC_ID_LAST, }; @@ -135,9 +136,9 @@ const npf_rproc_ops_t *npf_find_rproc_by_id(enum npf_rproc_id ro_id); enum npf_rproc_id npf_rproc_get_id(const npf_rproc_ops_t *ops); /* npf_ext_policer.c */ -void police_enable_inner_marking(void *handle); +void police_enable_inner_marking(void *arg); -void police_disable_inner_marking(void *handle); +void police_disable_inner_marking(void *arg); void npf_policer_json(json_writer_t *json, npf_rule_t *rl, @@ -161,4 +162,6 @@ void npf_markpcp_json(json_writer_t *json, npf_rule_t *rl, const char *params, void *handle); +void policer_show(json_writer_t *wr, void *arg); + #endif /* NPF_RPROC_H */ diff --git a/src/npf/zones/npf_zone_private.c b/src/npf/zones/npf_zone_private.c new file mode 100644 index 00000000..0a916fb9 --- /dev/null +++ b/src/npf/zones/npf_zone_private.c @@ -0,0 +1,1088 @@ +/* + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "json_writer.h" +#include "compiler.h" +#include "if_var.h" +#include "npf/npf.h" +#include "npf/config/npf_attach_point.h" +#include "npf/config/npf_config.h" +#include "npf/config/npf_rule_group.h" +#include "npf/config/npf_ruleset_type.h" +#include "npf/npf_addrgrp.h" +#include "npf/npf_cache.h" +#include "npf/npf_if.h" +#include "npf/npf_nat.h" +#include "npf/npf_ruleset.h" +#include "npf/npf_session.h" +#include "npf/npf_state.h" +#include "npf/npf_timeouts.h" +#include "npf/rproc/npf_ext_session_limit.h" +#include "npf_shim.h" +#include "pktmbuf_internal.h" +#include "urcu.h" +#include "vplane_log.h" +#include "npf/zones/npf_zone_public.h" +#include "npf/zones/npf_zone_private.h" + +/* + * npf_zone policy policy + * +-------+ +-------+ +-------+ + * | ZONEA | policy | ZONEB | | ZONEC | + * | |-------------->| |------->| | + * | | list+hash | | | | + * +-------+ +-------+ +-------+ + * intf | ^ | | + * list | | v v + * | | back npf_config npf_config + * v | ptr + * +-----+ +------+ attach-point attach-point + * | | | | "ZONEA>ZONEB" "ZONEA>ZONEC" + * | |--------->| | + * | | | | + * +-----+ +------+ + * npf if npf_zone_intf + * + * | ^ + * | | + * | | + * v | + * +-----+ +------+ + * | | | | + * | |--------->| | + * | | | | + * +-----+ +------+ + * npf if npf_zone_intf + * + * + * In the output context, we use the receive interface and transmit interface + * to get the 'from' and 'to' zones. + * + * To find the relevant policy (which contains the ruleset), we hash the + * 'to' zone pointer and lookup the 'from' zones policy hash table. + */ + +/* + * zone instance + */ +struct npf_zone_inst { + struct cds_list_head zi_zone_list; /* npf_zone list */ + uint32_t zi_zone_count; +}; + +/* Single, global, zone instance */ +static struct npf_zone_inst *zone_inst; + +/* + * zone policy + * + * A zone policy is created on a 'from' for every 'to' zone reference. For + * example is rulesets are configured for ZONEA to ZONEB and ZONEA to ZONEC + * then zone policies are created for ZONEB and ZONEC, and these are added to + * the hash table of ZONEA. + * + * Also, when a zone policy makes reference to a zone that currently does not + * exist, then a zone structure will be created for it. This is necessary + * since we use the pointer to a zone structure to generate a hash value and + * for the hash lookup match. + */ +struct npf_zone_policy { + struct cds_list_head zp_list_node; /* nz_policy_list node */ + struct cds_lfht_node zp_lfht_node; + char *zp_name; + /* Locks held by zone policy list and attach point */ + uint32_t zp_refcnt; + struct npf_zone *zp_to_zone; + struct npf_config *zp_conf; +}; + +/* + * zone + * + * Interface and policy lists are only ever updated from the main thread. + * The policy hash table is updated by the main thread, but looked-up by the + * forwarding threads. + */ +struct npf_zone { + struct cds_list_head nz_node; /* zi_zone_list node */ + char *nz_name; + bool nz_local; + uint32_t nz_hash; /* hash of nz pointer */ + uint32_t nz_refcnt; + struct cds_list_head nz_intf_list; /* npf_zone_intf list */ + uint32_t nz_intf_count; + struct cds_list_head nz_policy_list; + struct cds_lfht *nz_policy_ht; + uint32_t nz_policy_count; +}; + +/* + * zone interface + */ +struct npf_zone_intf { + struct cds_list_head zif_node; /* nz_intf_list node */ + char *zif_ifname; + struct npf_zone *zif_zone; /* back ptr to zone */ + uint32_t zif_refcnt; +}; + +struct npf_zone *local_zone; + + +/* Forward reference */ +static uint32_t npf_zone_policy_ht_hash(uintptr_t nz); +static int npf_zone_list_insert(struct npf_zone *nz); +static int npf_zone_list_remove(struct npf_zone *nz); +static void npf_zone_list_remove_all(struct npf_zone_inst *zi); +static void npf_zone_intf_list_remove_all(struct npf_zone **nzp); +static void npf_zone_policy_remove_all(struct npf_zone **nzp); +static struct npf_zone_policy *npf_zone_policy_create(const char *policy_name); +static void npf_zone_policy_destroy(struct npf_zone_policy **zpp); + + +/*************************** instance **********************************/ + +static struct npf_zone_inst * +npf_zone_inst_create(void) +{ + struct npf_zone_inst *zi; + + zi = zmalloc_aligned(sizeof(*zi)); + if (!zi) + return NULL; + + CDS_INIT_LIST_HEAD(&zi->zi_zone_list); + + return zi; +} + +static struct npf_zone_inst * +npf_zone_inst_find_or_create(void) +{ + if (!zone_inst) + zone_inst = npf_zone_inst_create(); + + return zone_inst; +} + +static struct npf_zone_inst * +npf_zone_inst_find(void) +{ + return zone_inst; +} + +void +npf_zone_inst_destroy_private(void) +{ + if (!zone_inst) + return; + + npf_zone_list_remove_all(zone_inst); + free(zone_inst); + zone_inst = NULL; +} + +/* + * Remove zone from instance list and destroy. Should only be called when ref + * count reaches zero. + */ +static void +npf_zone_destroy(struct npf_zone **nzp) +{ + struct npf_zone *nz = *nzp; + + if (!nz) + return; + + assert(nz->nz_refcnt == 0); + assert(nz->nz_intf_count == 0); + + npf_zone_list_remove(nz); + + if (nz->nz_name) + free(nz->nz_name); + if (nz->nz_policy_ht) + cds_lfht_destroy(nz->nz_policy_ht, NULL); + + free(nz); + *nzp = NULL; +} + +/* Hash table config */ +#define ZONE_POLICY_HT_INIT 32 +#define ZONE_POLICY_HT_MIN 32 +#define ZONE_POLICY_HT_MAX 1024 + +/* + * Create a zone structure, and insert it into zone instance list. The caller + * should call npf_zone_get to increment the zones ref count. + */ +static struct npf_zone * +npf_zone_create(const char *name) +{ + struct npf_zone *nz; + int rc; + + nz = zmalloc_aligned(sizeof(*nz)); + if (!nz) + return NULL; + + nz->nz_name = strdup(name); + nz->nz_refcnt = 0; + CDS_INIT_LIST_HEAD(&nz->nz_intf_list); + CDS_INIT_LIST_HEAD(&nz->nz_policy_list); + nz->nz_policy_ht = cds_lfht_new(ZONE_POLICY_HT_INIT, + ZONE_POLICY_HT_MIN, + ZONE_POLICY_HT_MAX, + CDS_LFHT_AUTO_RESIZE | + CDS_LFHT_ACCOUNTING, NULL); + if (!nz->nz_policy_ht) { + npf_zone_destroy(&nz); + return NULL; + } + + /* + * Pre-compute the hash for use when looking up the zone policy + * corresponding to this zone in another zones hash table. + */ + nz->nz_hash = npf_zone_policy_ht_hash((uintptr_t)nz); + + rc = npf_zone_list_insert(nz); + if (rc) { + npf_zone_destroy(&nz); + return NULL; + } + + return nz; +} + +/* + * A ref count is held for a zone when either user configures a zone, an + * interface is added to a zone, or when a zone policy references a zone. + */ +static void +npf_zone_get(struct npf_zone *nz) +{ + nz->nz_refcnt++; +} + +static void +npf_zone_put(struct npf_zone **nzp) +{ + struct npf_zone *nz = *nzp; + + assert(nzp); + assert(nz); + + if (nz && --nz->nz_refcnt == 0) + npf_zone_destroy(nzp); +} + +static struct npf_zone * +npf_zone_list_find(const char *name) +{ + struct npf_zone *nz; + struct npf_zone_inst *zi; + + if (!name) + return NULL; + + zi = npf_zone_inst_find(); + if (!zi) + return NULL; + + if (zi->zi_zone_count == 0) + return NULL; + + cds_list_for_each_entry(nz, &zi->zi_zone_list, nz_node) { + if (!strcmp(name, nz->nz_name)) + return nz; + } + return NULL; +} + +static int +npf_zone_list_insert(struct npf_zone *nz) +{ + struct npf_zone_inst *zi; + + zi = npf_zone_inst_find_or_create(); + if (!zi) + return -EINVAL; + + cds_list_add_tail(&nz->nz_node, &zi->zi_zone_list); + zi->zi_zone_count++; + + return 0; +} + +static int +npf_zone_list_remove(struct npf_zone *nz) +{ + struct npf_zone_inst *zi; + + /* + * A zone might not be in the list if npf_zone_create failed to add it + */ + if (cds_list_empty(&nz->nz_node)) + return -1; + + zi = npf_zone_inst_find(); + if (!zi) + return -1; + + cds_list_del(&nz->nz_node); + zi->zi_zone_count--; + + npf_zone_intf_list_remove_all(&nz); + npf_zone_policy_remove_all(&nz); + + return 0; +} + +static void +npf_zone_list_remove_all(struct npf_zone_inst *zi) +{ + struct npf_zone *nz, *tmp; + + if (zi->zi_zone_count == 0) + return; + + cds_list_for_each_entry_safe(nz, tmp, &zi->zi_zone_list, nz_node) { + cds_list_del(&nz->nz_node); + zi->zi_zone_count--; + npf_zone_intf_list_remove_all(&nz); + npf_zone_policy_remove_all(&nz); + npf_zone_put(&nz); + } +} + +static struct npf_zone * +npf_zone_find_or_create(const char *name) +{ + struct npf_zone *nz; + + nz = npf_zone_list_find(name); + if (!nz) + nz = npf_zone_create(name); + + return nz; +} + +struct npf_zone *npf_zone_zif2zone_private(const struct npf_zone_intf *zif) +{ + if (zif) + return zif->zif_zone; + return NULL; +} + +struct npf_zone *npf_zone_local(void) +{ + return local_zone; +} + +/************************* zone intf *********************************/ + +static void +npf_zone_intf_destroy(struct npf_zone_intf **zifp) +{ + struct npf_zone_intf *zif = *zifp; + + if (zif) { + if (zif->zif_ifname) + free(zif->zif_ifname); + free(zif); + *zifp = NULL; + } +} + +static struct npf_zone_intf * +npf_zone_intf_create(const char *ifname) +{ + struct npf_zone_intf *zif; + + zif = zmalloc_aligned(sizeof(*zif)); + if (!zif) + return NULL; + + zif->zif_ifname = strdup(ifname); + if (!zif->zif_ifname) { + npf_zone_intf_destroy(&zif); + return NULL; + } + zif->zif_refcnt = 0; + + return zif; +} + +void +npf_zone_intf_get(struct npf_zone_intf *zif) +{ + zif->zif_refcnt++; +} + +void +npf_zone_intf_put(struct npf_zone_intf **zifp) +{ + struct npf_zone_intf *zif = *zifp; + + assert(zif); + assert(zif->zif_refcnt > 0); + + if (zif && --zif->zif_refcnt == 0) + npf_zone_intf_destroy(zifp); +} + +/* + * Lookup interface by interface name in a zones interface list + */ +static struct npf_zone_intf * +npf_zone_intf_list_find(const struct npf_zone *nz, const char *ifname) +{ + struct npf_zone_intf *zif; + + if (nz->nz_intf_count == 0) + return NULL; + + cds_list_for_each_entry(zif, &nz->nz_intf_list, zif_node) { + if (!strcmp(ifname, zif->zif_ifname)) + return zif; + } + return NULL; +} + +static int +npf_zone_intf_list_insert(struct npf_zone *nz, struct npf_zone_intf *zif) +{ + assert(zif->zif_zone == NULL); + + /* Store back pointer to zone in zone intf */ + zif->zif_zone = nz; + npf_zone_get(nz); + + /* Add zone intf to zone list */ + cds_list_add_tail(&zif->zif_node, &nz->nz_intf_list); + nz->nz_intf_count++; + npf_zone_intf_get(zif); + + return 0; +} + +static int +npf_zone_intf_list_remove(struct npf_zone **nzp, struct npf_zone_intf **zifp) +{ + struct npf_zone *nz = *nzp; + struct npf_zone_intf *zif = *zifp; + + if (!nz || !zif) + return 0; + + assert(nz == zif->zif_zone); + + cds_list_del(&zif->zif_node); + nz->nz_intf_count--; + + zif->zif_zone = NULL; + npf_zone_intf_put(zifp); + npf_zone_put(nzp); + + return 0; +} + +static void +npf_zone_intf_list_remove_all(struct npf_zone **nzp) +{ + struct npf_zone_intf *zif, *tmp; + struct npf_zone *nz = *nzp; + + if (!nz || nz->nz_intf_count == 0) + return; + + cds_list_for_each_entry_safe(zif, tmp, &nz->nz_intf_list, zif_node) { + npf_zone_intf_list_remove(nzp, &zif); + } +} + +/* + * Get zones config. Called from forwarding threads. + */ +struct npf_config * +npf_zone_config(const struct npf_zone *fm_zone, + const struct npf_zone *to_zone) +{ + struct npf_zone_policy *zp; + + if (!fm_zone || !to_zone) + return NULL; + + zp = npf_zone_policy_ht_lookup(fm_zone, to_zone); + if (!zp) + return NULL; + + return rcu_dereference(zp->zp_conf); +} + +static void +npf_zone_policy_destroy(struct npf_zone_policy **zpp) +{ + struct npf_zone_policy *zp = *zpp; + + if (zp) { + *zpp = NULL; + if (zp->zp_to_zone) + npf_zone_put(&zp->zp_to_zone); + if (zp->zp_name) + free(zp->zp_name); + free(zp); + } +} + +static struct npf_zone_policy * +npf_zone_policy_create(const char *policy_name) +{ + struct npf_zone_policy *zp; + + zp = zmalloc_aligned(sizeof(*zp)); + if (!zp) + return NULL; + + zp->zp_name = strdup(policy_name); + if (!zp->zp_name) { + npf_zone_policy_destroy(&zp); + return NULL; + } + zp->zp_refcnt = 0; + + /* + * Create the 'to' zone to which this policy refers to. We use the + * 'to' zone pointer to create a hash when storing the zone policy in + * the 'from' zones hash table, and for the hash match function in the + * hash lookup. + */ + zp->zp_to_zone = npf_zone_find_or_create(policy_name); + + if (!zp->zp_to_zone) { + npf_zone_policy_destroy(&zp); + return NULL; + } + npf_zone_get(zp->zp_to_zone); + + return zp; +} + +static void +npf_zone_policy_get(struct npf_zone_policy *zp) +{ + zp->zp_refcnt++; +} + +static void +npf_zone_policy_put(struct npf_zone_policy **zpp) +{ + struct npf_zone_policy *zp = *zpp; + + assert(zpp); + assert(zp); + + if (zp && --zp->zp_refcnt == 0) + npf_zone_policy_destroy(zpp); +} + +static struct npf_zone_policy * +npf_zone_policy_list_find(const struct npf_zone *nz, const char *policy_name) +{ + struct npf_zone_policy *zp; + + if (!policy_name || nz->nz_policy_count == 0) + return NULL; + + cds_list_for_each_entry(zp, &nz->nz_policy_list, zp_list_node) { + if (!strcmp(policy_name, zp->zp_name)) + return zp; + } + return NULL; +} + +static int +npf_zone_policy_list_insert(struct npf_zone *nz, struct npf_zone_policy *zp) +{ + cds_list_add_tail(&zp->zp_list_node, &nz->nz_policy_list); + nz->nz_policy_count++; + + return 0; +} + +static int +npf_zone_policy_list_remove(struct npf_zone *nz, struct npf_zone_policy *zp) +{ + cds_list_del(&zp->zp_list_node); + nz->nz_policy_count--; + + return 0; +} + +static uint32_t +npf_zone_policy_ht_hash(const uintptr_t nz) +{ + return rte_jhash_2words((uint64_t)nz >> 32, (uint32_t)nz, 0); +} + +static int +npf_zone_policy_ht_match(struct cds_lfht_node *ht_node, const void *key) +{ + struct npf_zone *nz = (struct npf_zone *)key; + struct npf_zone_policy *zp = caa_container_of(ht_node, + struct npf_zone_policy, + zp_lfht_node); + + return zp->zp_to_zone == nz; +} + +static int +npf_zone_policy_ht_insert(struct npf_zone *nz, struct npf_zone_policy *zp) +{ + struct cds_lfht_node *node; + + node = cds_lfht_add_unique( + nz->nz_policy_ht, + npf_zone_policy_ht_hash((uintptr_t)zp->zp_to_zone), + npf_zone_policy_ht_match, + zp->zp_to_zone, + &zp->zp_lfht_node); + + if (node != &zp->zp_lfht_node) { + npf_zone_policy_destroy(&zp); + return -EEXIST; + } + return 0; +} + +struct npf_zone_policy * +npf_zone_policy_ht_lookup(const struct npf_zone *fm_zone, + const struct npf_zone *to_zone) +{ + struct cds_lfht_iter iter; + struct cds_lfht_node *node; + struct npf_zone_policy *zp = NULL; + + cds_lfht_lookup(fm_zone->nz_policy_ht, to_zone->nz_hash, + npf_zone_policy_ht_match, to_zone, &iter); + + node = cds_lfht_iter_get_node(&iter); + if (node) + zp = caa_container_of(node, struct npf_zone_policy, + zp_lfht_node); + + return zp; +} + +static int +npf_zone_policy_ht_remove(const struct npf_zone *nz, + struct npf_zone_policy *zp) +{ + if (npf_zone_policy_ht_lookup(nz, zp->zp_to_zone)) { + cds_lfht_del(nz->nz_policy_ht, &zp->zp_lfht_node); + return 0; + } + return -EINVAL; +} + +/* + * Remove all zone policies from a zone + */ +static void +npf_zone_policy_remove_all(struct npf_zone **nzp) +{ + struct npf_zone_policy *zp, *tmp; + struct npf_zone *nz = *nzp; + int rc; + + if (!nz || nz->nz_policy_count == 0) + return; + + cds_list_for_each_entry_safe(zp, tmp, &nz->nz_policy_list, + zp_list_node) { + rc = npf_zone_policy_ht_remove(nz, zp); + if (rc) + continue; + + npf_zone_policy_list_remove(nz, zp); + npf_zone_policy_put(&zp); + } +} + + +/*************************** config **********************************/ + +int npf_zone_cfg(const char *name) +{ + struct npf_zone *nz; + + /* Zone may already exist if a policy refers to it */ + nz = npf_zone_find_or_create(name); + if (!nz) + return -ENOMEM; + + npf_zone_get(nz); + + return 0; +} + +int npf_zone_uncfg(const char *name) +{ + struct npf_zone *nz; + + nz = npf_zone_list_find(name); + if (!nz) + return 0; + + if (nz == local_zone) + local_zone = NULL; + + npf_zone_put(&nz); + + return 0; +} + +/* + * Set or clear the assigned local_zone + */ +int npf_zone_local_set(const char *name, bool set) +{ + struct npf_zone *nz; + + nz = npf_zone_list_find(name); + if (!nz) + return -EEXIST; + + if (set) { + if (local_zone) + /* Only one zone can be the local zone */ + return -EINVAL; + local_zone = nz; + nz->nz_local = true; + } else { + if (local_zone != nz) + return -EINVAL; + local_zone = NULL; + nz->nz_local = false; + } + + return 0; +} + +/* + * Add a policy to a zone + */ +int +npf_zone_policy_add(const char *zname, const char *policy_name) +{ + struct npf_zone *nz; + struct npf_zone_policy *zp; + char *ap_name; + int rc; + + nz = npf_zone_list_find(zname); + if (!nz) + return -EINVAL; + + if (npf_zone_policy_list_find(nz, policy_name)) + return -EINVAL; + + zp = npf_zone_policy_create(policy_name); + if (!zp) + return 0; + + rc = npf_zone_policy_ht_insert(nz, zp); + if (rc) + return rc; + + ap_name = alloca(strlen(zname) + strlen(policy_name) + 2); + sprintf(ap_name, "%s>%s", zname, policy_name); + + rc = npf_attpt_item_set_up(NPF_ATTACH_TYPE_ZONE, ap_name, &zp->zp_conf, + NULL); + if (rc != 0) { + RTE_LOG(ERR, DATAPLANE, "NPF attpt raise fail: zone/%s\n", + ap_name); + npf_zone_policy_ht_remove(nz, zp); + return rc; + } + + npf_zone_policy_list_insert(nz, zp); + + /* + * Take a single ref count for the policy being in both hash table and + * list + */ + npf_zone_policy_get(zp); + return 0; +} + +int +npf_zone_policy_del(const char *zname, const char *policy_name) +{ + struct npf_zone *nz; + struct npf_zone_policy *zp; + char *ap_name; + int rc; + + nz = npf_zone_list_find(zname); + if (!nz) + return 0; + + zp = npf_zone_policy_list_find(nz, policy_name); + if (!zp) + return 0; + + /* remove from hash table */ + rc = npf_zone_policy_ht_remove(nz, zp); + if (rc) + return rc; + + /* remove from list */ + npf_zone_policy_list_remove(nz, zp); + + ap_name = alloca(strlen(zname) + strlen(policy_name) + 2); + sprintf(ap_name, "%s>%s", zname, policy_name); + rc = npf_attpt_item_set_down(NPF_ATTACH_TYPE_ZONE, ap_name); + if (rc != 0) + RTE_LOG(ERR, DATAPLANE, "NPF attpt down fail: zone/%s\n", + ap_name); + + npf_zone_policy_put(&zp); + return 0; +} + +/* + * Adds zone intf struct to zone list. Interface may not yet exist. + */ +int npf_zone_intf_add(const char *zname, const char *ifname) +{ + struct npf_zone *nz; + struct npf_zone_intf *zif; + int rc; + + nz = npf_zone_list_find(zname); + if (!nz) + return -EINVAL; + + zif = npf_zone_intf_list_find(nz, ifname); + if (zif) + return -EINVAL; + + zif = npf_zone_intf_create(ifname); + if (!zif) + return -ENOMEM; + + npf_zone_intf_list_insert(nz, zif); + + struct ifnet *ifp = dp_ifnet_byifname(ifname); + + assert(npf_zone_ifname2zif(ifname) == zif); + + /* + * Interface may not exist yet. If this is the case then the + * remainder of this initialization occurs from npf_if_enable when the + * interface is created, and an index assigned to it. + */ + if (!ifp || !ifp->if_index) + return 0; + + /* Set pointer from npf_if_internal to zone intf */ + rc = npf_if_zone_assign(ifp, zif, true); + if (!rc) + npf_zone_intf_get(zif); + + return rc; +} + +int npf_zone_intf_del(const char *zname, const char *ifname) +{ + struct npf_zone *nz; + struct npf_zone_intf *zif; + int rc; + + nz = npf_zone_list_find(zname); + if (!nz) + return 0; + + zif = npf_zone_intf_list_find(nz, ifname); + if (!zif) + return 0; + + /* remove from list and free */ + npf_zone_intf_list_remove(&nz, &zif); + + /* Interface may have been removed, or may have never existed */ + struct ifnet *ifp = dp_ifnet_byifname(ifname); + + /* + * ifp will be NULL if the interface was deleted before the zone + * config was removed. In this case, npf_zone_if_index_unset will have + * already called npf_if_zone_assign and npf_zone_intf_put. + */ + if (!ifp || !ifp->if_index) + return 0; + + /* Clear pointer from npf_if_internal to zone intf */ + rc = npf_if_zone_assign(ifp, NULL, true); + if (!rc && zif) + npf_zone_intf_put(&zif); + + return rc; +} + +/* + * npf_zone_ifname2zif is used when an interface is created after it has been + * added to a zone. + */ +struct npf_zone_intf * +npf_zone_ifname2zif(const char *ifname) +{ + struct npf_zone *nz; + struct npf_zone_inst *zi; + struct npf_zone_intf *zif; + + if (!ifname) + return NULL; + + zi = npf_zone_inst_find(); + if (!zi) + return NULL; + + if (zi->zi_zone_count == 0) + return NULL; + + cds_list_for_each_entry(nz, &zi->zi_zone_list, nz_node) { + zif = npf_zone_intf_list_find(nz, ifname); + if (zif) + return zif; + } + return NULL; +} + +/********************* Show commands ***********************/ + +static void +npf_zone_show_interface(json_writer_t *json, const struct npf_zone_intf *zif) +{ + jsonw_start_object(json); + + jsonw_string_field(json, "name", zif->zif_ifname); + + jsonw_end_object(json); +} + +static void +npf_zone_show_policy(json_writer_t *json, const struct npf_zone *nz, + const struct npf_zone_policy *zp, uint8_t flags) +{ + char ap_name[100]; + + snprintf(ap_name, sizeof(ap_name), "%s>%s", + nz->nz_name, zp->zp_name); + + jsonw_start_object(json); + + jsonw_string_field(json, "name", zp->zp_name); + + if (flags & NPF_ZONES_SHOW_RSETS) { + struct npf_attpt_item *ap; + + jsonw_name(json, "config"); + jsonw_start_array(json); + + if (npf_attpt_item_find_up(NPF_ATTACH_TYPE_ZONE, + ap_name, &ap) >= 0) + npf_show_attach_point_rulesets(json, ap, NPF_ZONE); + + jsonw_end_array(json); + } + + jsonw_end_object(json); +} + +static void +npf_zone_show_zone(json_writer_t *json, const struct npf_zone *nz, + const char *policy, uint8_t flags) +{ + jsonw_start_object(json); + + jsonw_string_field(json, "name", nz->nz_name); + jsonw_bool_field(json, "local-zone", nz->nz_local && nz == local_zone); + + /* Interface list */ + if (flags & NPF_ZONES_SHOW_INTFS) { + struct npf_zone_intf *zif; + + jsonw_name(json, "interfaces"); + jsonw_start_array(json); + + cds_list_for_each_entry(zif, &nz->nz_intf_list, zif_node) + npf_zone_show_interface(json, zif); + + jsonw_end_array(json); /* interface list */ + } + + /* Policy list */ + if (flags & (NPF_ZONES_SHOW_POLS | NPF_ZONES_SHOW_RSETS)) { + struct npf_zone_policy *zp; + + jsonw_name(json, "policies"); + jsonw_start_array(json); + + cds_list_for_each_entry(zp, &nz->nz_policy_list, zp_list_node) + if (!policy || !strcmp(policy, zp->zp_name)) + npf_zone_show_policy(json, nz, zp, flags); + + jsonw_end_array(json); /* policy list */ + } + + jsonw_end_object(json); /* zone object */ +} + +void +npf_zone_show_private(json_writer_t *json, const char *zone, + const char *policy, uint8_t flags) +{ + struct npf_zone_inst *zi; + struct npf_zone *nz; + + zi = npf_zone_inst_find(); + + jsonw_pretty(json, true); + jsonw_name(json, "zones"); + jsonw_start_array(json); + + if (zi) { + cds_list_for_each_entry(nz, &zi->zi_zone_list, nz_node) { + /* Looking for one particular zone? */ + if (zone && strcmp(zone, nz->nz_name) != 0) + continue; + + /* + * Do not show zones that have been instantiated by a + * zones policy reference only. The local-zone never + * has any associated interfaces. + */ + if (nz->nz_intf_count == 0 && !nz->nz_local) + continue; + + npf_zone_show_zone(json, nz, policy, flags); + } + } + + jsonw_end_array(json); +} diff --git a/src/npf/zones/npf_zone_private.h b/src/npf/zones/npf_zone_private.h new file mode 100644 index 00000000..5dae4a3e --- /dev/null +++ b/src/npf/zones/npf_zone_private.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef NPF_ZONE_PRIVATE_H +#define NPF_ZONE_PRIVATE_H + +#include +#include "npf/npf.h" + +struct npf_zone; +struct npf_zone_intf; + +#define NPF_ZONES_SHOW_INTFS 0x01 +#define NPF_ZONES_SHOW_POLS 0x02 +#define NPF_ZONES_SHOW_RSETS 0x04 +#define NPF_ZONES_SHOW_ALL (NPF_ZONES_SHOW_INTFS | NPF_ZONES_SHOW_POLS | \ + NPF_ZONES_SHOW_RSETS) + +/* local zone */ +struct npf_zone *npf_zone_local(void); +extern struct npf_zone *local_zone; + +/* Get interface zone */ +struct npf_zone *npf_zone_zif2zone_private(const struct npf_zone_intf *zif); + +struct npf_zone_policy *npf_zone_policy_ht_lookup( + const struct npf_zone *fm_zone, const struct npf_zone *to_zone); + +int npf_zone_cfg(const char *name); +int npf_zone_uncfg(const char *name); +int npf_zone_local_set(const char *name, bool set); +int npf_zone_policy_add(const char *zname, const char *policy_name); +int npf_zone_policy_del(const char *zname, const char *policy_name); +int npf_zone_intf_add(const char *name, const char *ifname); +int npf_zone_intf_del(const char *name, const char *ifname); +void npf_zone_intf_get(struct npf_zone_intf *zif); +void npf_zone_intf_put(struct npf_zone_intf **zifp); +struct npf_zone_intf *npf_zone_ifname2zif(const char *ifname); + +struct npf_config *npf_zone_config(const struct npf_zone *fm_zone, + const struct npf_zone *to_zone); + +void npf_zone_show_private(json_writer_t *json, const char *zone, + const char *policy, uint8_t flags); + +void npf_zone_inst_destroy_private(void); + +#endif diff --git a/src/npf/zones/npf_zone_public.c b/src/npf/zones/npf_zone_public.c new file mode 100644 index 00000000..2ad688d1 --- /dev/null +++ b/src/npf/zones/npf_zone_public.c @@ -0,0 +1,454 @@ +/* + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + + +#include +#include "urcu.h" +#include "if_var.h" + +#include "vplane_log.h" +#include "npf/npf.h" +#include "npf/npf_cmd.h" +#include "npf/npf_nat.h" +#include "npf/npf_cache.h" +#include "npf/npf_if.h" +#include "npf/npf_rc.h" +#include "npf/npf_ruleset.h" +#include "npf/npf_session.h" +#include "npf/rproc/npf_ext_log.h" +#include "npf/config/npf_config.h" +#include "npf/config/npf_ruleset_type.h" + +#include "npf/zones/npf_zone_public.h" +#include "npf/zones/npf_zone_private.h" + +#ifndef NZONEFW + +/* + * Return true if a local zone is assigned + */ +bool npf_zone_local_is_set(void) +{ + return local_zone != NULL; +} + +/* + * Called by npf_hook_track in direction PFIL_OUT if destination interface is + * in a zone. + * + * Gets the zone config and returns true if both source and destination + * interfaces are in a zone and there are rules between the two zones, + * else sets decision to PASS or BLOCK and returns false. + * + * Packets from the router itself are marked with flag NPF_FLAG_FROM_US, and + * are never blocked. + * + * Packets from tunnels, or kernel forwarded packets, will have an unknown + * input interface, and hence no 'from' zone. These will be blocked. + */ +static bool npf_get_zone_config(struct ifnet *in_ifp, + const struct npf_zone *to_zone, + uint16_t npf_flags, npf_decision_t *decision, + struct npf_config **npf_config) +{ + const struct npf_zone *from_zone = NULL; + + if (npf_flags & NPF_FLAG_FROM_US) { + if (npf_flags & NPF_FLAG_FROM_LOCAL) + from_zone = npf_zone_local(); + + if (!from_zone) { + *decision = NPF_DECISION_PASS; + return false; + } + } else if (in_ifp) + from_zone = npf_if_zone(in_ifp); + + if (!from_zone) { + *decision = NPF_DECISION_BLOCK; + return false; + } + + if (from_zone == to_zone) { + *decision = NPF_DECISION_PASS; + return false; + } + + /* + * Make stateful ZBF work like stateful IBF, namely that a block + * rule can not affect the stateful return traffic. Otherwise + * stateful return traffic is allowed to pass. + */ + if (npf_flags & NPF_FLAG_IN_SESSION) { + *decision = NPF_DECISION_PASS; + return false; + } + + /* Get the zone configuration */ + *npf_config = npf_zone_config(from_zone, to_zone); + + if (!*npf_config) { + /* no configuration between the two zones. */ + if (npf_flags & NPF_FLAG_FROM_LOCAL) + *decision = NPF_DECISION_PASS; + else + *decision = NPF_DECISION_BLOCK; + return false; + } + + return true; +} + +/* + * Zone firewall output hook + */ +bool +npf_zone_hook(struct ifnet *in_ifp, struct npf_if *nif, uint16_t npf_flags, + struct npf_config **fw_config, npf_decision_t *decision, + enum npf_ruleset_type *rlset_type, bool *reverse_stateful) +{ + struct npf_zone *to_zone = npf_nif_zone(nif); + + if (likely(!to_zone)) { + + /* + * Block zone to non-zone; NB from-us sometimes looks + * like from zone + */ + if (unlikely((npf_flags & NPF_FLAG_FROM_ZONE) && + !(npf_flags & NPF_FLAG_FROM_US))) { + *decision = NPF_DECISION_BLOCK; + return true; + } + } else { + /* Get the zones configuration */ + if (!npf_get_zone_config(in_ifp, to_zone, npf_flags, + decision, fw_config)) + return true; + + *rlset_type = NPF_RS_ZONE; + *reverse_stateful = false; + } + + return false; +} + +/* + * Local zone firewall. For packets delivered *to* the router. Bypass DNAT + * in fw-in node if session has SE_LOCAL flag set + */ +static npf_decision_t +npf_local_zone_fw(struct ifnet *ifp, struct rte_mbuf **m, + npf_cache_t *npc, struct npf_config *zone_config, + npf_session_t *se) +{ + npf_decision_t decision = NPF_DECISION_PASS; + npf_rule_t *rl; + int error = 0; + + const npf_ruleset_t *rlset; + + rlset = npf_get_ruleset(zone_config, NPF_RS_ZONE); + + rl = npf_ruleset_inspect(npc, *m, rlset, se, NULL, PFIL_OUT); + decision = npf_rule_decision(rl); + + /* Log any firewall matched rule now */ + if (unlikely(npf_rule_has_rproc_logger(rl))) + npf_log_pkt(npc, *m, rl, PFIL_IN); + + /* + * Establish a "pass" session, if required. Just proceed, if session + * creation fails (e.g. due to unsupported protocol). + */ + if (rl && npf_rule_stateful(rl) && decision == NPF_DECISION_PASS) { + if (!se) { + se = npf_session_establish(npc, *m, ifp, PFIL_IN, + &error); + if (unlikely(error)) { + decision = NPF_DECISION_BLOCK; + goto stats; + } + } + npf_session_add_fw_rule(se, rl); + } + +stats: + npf_add_pkt(rl, rte_pktmbuf_pkt_len(*m)); + + if (se && ifp) { + if (decision != NPF_DECISION_BLOCK) { + /* N.B. se may be consumed */ + error = npf_session_activate(se, ifp, npc, *m); + if (error == 0) { + /* Attach the session to the packet */ + struct pktmbuf_mdata *mdata = pktmbuf_mdata(*m); + mdata->md_session = se; + pktmbuf_mdata_set(*m, PKT_MDATA_SESSION); + } else { + if (error != -NPF_RC_ENOSTR) + decision = NPF_DECISION_BLOCK; + } + } else if (!npf_session_is_active(se)) { + npf_session_destroy(se); + } else if (error) { + pktmbuf_mdata_clear(*m, PKT_MDATA_SESSION); + npf_session_expire(se); + } + } + return decision; +} + +/* + * Local zones hook. Return true to discard. + */ +bool +npf_local_zone_hook(struct ifnet *ifp, struct rte_mbuf **m, + struct npf_cache *npc, struct npf_session *se, + struct npf_if *nif) +{ + struct npf_zone *local_zone = npf_zone_local(); + + if (!local_zone) + return false; + + struct npf_zone *from_zone = npf_nif_zone(nif); + + if (from_zone) { + struct npf_config *zone_config; + npf_decision_t decision; + + zone_config = npf_zone_config(from_zone, local_zone); + if (!zone_config) + return true; /* discard */ + + /* + * Do we need to un-DNAT before zones rulesets? + */ + if (se && pktmbuf_mdata_exists(*m, PKT_MDATA_DNAT)) { + if (npf_local_undnat(m, npc, se)) + return true; /* discard */ + + /* + * Mark session such that subsequent packets + * will bypass DNAT and route lookup, and be + * sent direct to ipv4-local node after + * ipv4-fw-in node. + */ + npf_session_set_local_zone_nat(se); + } + decision = npf_local_zone_fw(ifp, m, npc, zone_config, + se); + + if (decision != NPF_DECISION_PASS) + return true; /* discard */ + } + + return false; +} + +/* + * Zone show command + */ +int npf_zone_show(FILE *fp, int argc, char **argv) +{ + const char *zone = NULL, *policy = NULL; + uint8_t flags = NPF_ZONES_SHOW_ALL; + char *endp; + + if (argc >= 1 && strcmp(argv[0], "all") != 0) + zone = argv[0]; + + if (argc >= 2 && strcmp(argv[1], "all") != 0) + policy = argv[1]; + + if (argc >= 3) { + flags = strtoul(argv[2], &endp, 10); + if (*endp) { + npf_cmd_err(fp, "invalid flags"); + return -1; + } + } + + json_writer_t *json = jsonw_new(fp); + + if (json == NULL) { + RTE_LOG(ERR, DATAPLANE, "failed to create json stream\n"); + return -1; + } + + npf_zone_show_private(json, zone, policy, flags); + jsonw_destroy(&json); + return 0; +} + +int +npf_zone_cfg_add(FILE *f, int argc, char **argv) +{ + if (argc < 1) { + npf_cmd_err(f, "%s", npf_cmd_str_missing); + return -1; + } + + if (npf_zone_cfg(argv[0]) < 0) { + npf_cmd_err(f, "error adding zone %s", argv[0]); + return -1; + } + return 0; +} + +int +npf_zone_cfg_remove(FILE *f, int argc, char **argv) +{ + if (argc < 1) { + npf_cmd_err(f, "%s", npf_cmd_str_missing); + return -1; + } + + if (npf_zone_uncfg(argv[0]) < 0) { + npf_cmd_err(f, "error deleting zone %s", argv[0]); + return -1; + } + return 0; +} + +int +npf_zone_cfg_local(FILE *f, int argc, char **argv) +{ + if (argc < 2) { + npf_cmd_err(f, "%s", npf_cmd_str_missing); + return -1; + } + + bool set = strcmp(argv[1], "set") == 0; + + if (npf_zone_local_set(argv[0], set) < 0) { + npf_cmd_err(f, "error setting %s as local zone", argv[0]); + return -1; + } + return 0; +} + +int +npf_zone_cfg_policy_add(FILE *f, int argc, char **argv) +{ + if (argc < 2) { + npf_cmd_err(f, "%s", npf_cmd_str_missing); + return -1; + } + + if (npf_zone_policy_add(argv[0], argv[1])) { + npf_cmd_err(f, "Failed to add policy %s to zone %s", + argv[1], argv[0]); + return -1; + } + return 0; +} + +int +npf_zone_cfg_policy_remove(FILE *f, int argc, char **argv) +{ + if (argc < 2) { + npf_cmd_err(f, "%s", npf_cmd_str_missing); + return -1; + } + + if (npf_zone_policy_del(argv[0], argv[1]) < 0) { + npf_cmd_err(f, "Failed to remove policy %s from zone %s", + argv[1], argv[0]); + return -1; + } + return 0; +} + +int +npf_zone_cfg_intf_add(FILE *f, int argc, char **argv) +{ + if (argc < 2) { + npf_cmd_err(f, "%s", npf_cmd_str_missing); + return -1; + } + + if (npf_zone_intf_add(argv[0], argv[1]) < 0) { + npf_cmd_err(f, "Failed to add interface %s to zone %s", + argv[1], argv[0]); + return -1; + } + return 0; +} + +int +npf_zone_cfg_intf_remove(FILE *f, int argc, char **argv) +{ + if (argc < 2) { + npf_cmd_err(f, "%s", npf_cmd_str_missing); + return -1; + } + + if (npf_zone_intf_del(argv[0], argv[1]) < 0) { + npf_cmd_err(f, "Failed to remove interface %s from zone %s", + argv[1], argv[0]); + return -1; + } + return 0; +} + +void +npf_zone_inst_destroy(void) +{ + npf_zone_inst_destroy_private(); +} + +struct npf_zone *npf_zone_zif2zone(const struct npf_zone_intf *zif) +{ + return npf_zone_zif2zone_private(zif); +} + +/* + * Indirect callback for dataplane DP_EVT_IF_INDEX_SET event. Should be + * called under niif_lock. + */ +int npf_zone_if_index_set(struct ifnet *ifp) +{ + struct npf_zone_intf *zif; + int rc = 0; + + /* + * If this interface is in a zone, then reference the associated + * zone interface structure. + */ + zif = npf_zone_ifname2zif(ifp->if_name); + if (zif) { + rc = npf_if_zone_assign(ifp, zif, false); + if (!rc) + npf_zone_intf_get(zif); + } + return rc; +} + +/* + * Indirect callback for dataplane DP_EVT_IF_INDEX_UNSET event. This is + * necessary for when an interface is deleted before zones config is removed. + * See also npf_zone_intf_del. + */ +int npf_zone_if_index_unset(struct ifnet *ifp) +{ + struct npf_zone_intf *zif = npf_if_zone_intf(ifp); + int rc = 0; + + /* + * If associated with a zone, then disassociate. + */ + if (zif) { + /* Clear pointer from npf_if_internal to zone intf */ + rc = npf_if_zone_assign(ifp, NULL, false); + if (!rc) + npf_zone_intf_put(&zif); + } + return rc; +} + +#endif /* NZONEFW */ diff --git a/src/npf/zones/npf_zone_public.h b/src/npf/zones/npf_zone_public.h new file mode 100644 index 00000000..540df707 --- /dev/null +++ b/src/npf/zones/npf_zone_public.h @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef NPF_ZONE_PUBLIC_H +#define NPF_ZONE_PUBLIC_H + +#include "npf/config/npf_config.h" +#include "npf/config/npf_ruleset_type.h" + +struct ifnet; +struct npf_if; +struct npf_cache; +struct npf_session; +struct npf_zone_intf; + +#ifndef NZONEFW + +bool npf_zone_local_is_set(void); + +bool npf_zone_hook(struct ifnet *in_ifp, struct npf_if *nif, + uint16_t npf_flags, struct npf_config **fw_config, + npf_decision_t *decision, enum npf_ruleset_type *rlset_type, + bool *reverse_stateful); + +bool npf_local_zone_hook(struct ifnet *ifp, struct rte_mbuf **m, + struct npf_cache *npc, struct npf_session *se, + struct npf_if *nif); + +int npf_zone_show(FILE *fp, int argc, char **argv); + +int npf_zone_cfg_add(FILE *f, int argc, char **argv); +int npf_zone_cfg_remove(FILE *f, int argc, char **argv); +int npf_zone_cfg_local(FILE *f, int argc, char **argv); +int npf_zone_cfg_policy_add(FILE *f, int argc, char **argv); +int npf_zone_cfg_policy_remove(FILE *f, int argc, char **argv); +int npf_zone_cfg_intf_add(FILE *f, int argc, char **argv); +int npf_zone_cfg_intf_remove(FILE *f, int argc, char **argv); + +void npf_zone_inst_destroy(void); +struct npf_zone *npf_zone_zif2zone(const struct npf_zone_intf *zif); +int npf_zone_if_index_set(struct ifnet *ifp); +int npf_zone_if_index_unset(struct ifnet *ifp); + +#else /* ~NZONEFW */ + +static inline bool npf_zone_local_is_set(void) +{ + return false; +} + +static inline bool +npf_zone_hook(struct ifnet *in_ifp __unused, + struct npf_if *nif __unused, + uint16_t npf_flags __unused, + struct npf_config **fw_config __unused, + npf_decision_t *decision __unused, + enum npf_ruleset_type *rlset_type __unused, + bool *reverse_stateful __unused) +{ + return false; +} + +static inline bool +npf_local_zone_hook(struct ifnet *ifp __unused, + struct rte_mbuf **m __unused, + struct npf_cache *npc __unused, + struct npf_session *se __unused, + struct npf_if *nif __unused) +{ + return false; +} + +static inline int +npf_zone_show(FILE *fp __unused, int argc __unused, char **argv __unused) +{ + return 0; +} + +static inline int +npf_zone_cfg_add(FILE *f __unused, int argc __unused, char **argv __unused) +{ + return 0; +} + +static inline int +npf_zone_cfg_remove(FILE *f __unused, int argc __unused, char **argv __unused) +{ + return 0; +} + +static inline int +npf_zone_cfg_local(FILE *f __unused, int argc __unused, char **argv __unused) +{ + return 0; +} + +static inline int +npf_zone_cfg_policy_add(FILE *f __unused, int argc __unused, + char **argv __unused) +{ + return 0; +} + +static inline int +npf_zone_cfg_policy_remove(FILE *f __unused, int argc __unused, + char **argv __unused) +{ + return 0; +} + +static inline int +npf_zone_cfg_intf_add(FILE *f __unused, int argc __unused, + char **argv __unused) +{ + return 0; +} + +static inline int +npf_zone_cfg_intf_remove(FILE *f __unused, int argc __unused, + char **argv __unused) +{ + return 0; +} + +static inline void +npf_zone_inst_destroy(void) +{ +} + +static inline struct +npf_zone *npf_zone_zif2zone(const struct npf_zone_intf *zif __unused) +{ + return NULL; +} + +static inline int npf_zone_if_index_set(struct ifnet *ifp __unused) +{ + return 0; +} + +static inline int npf_zone_if_index_unset(struct ifnet *ifp __unused); +{ + return 0; +} + +#endif /* ~NZONEFW */ + +#endif /* NPF_ZONE_PUBLIC_H */ diff --git a/src/npf_shim.c b/src/npf_shim.c index b880e4ba..3ff0c018 100644 --- a/src/npf_shim.c +++ b/src/npf_shim.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -18,7 +18,7 @@ #include "if_var.h" #include "npf/npf.h" #include "npf/npf_apm.h" -#include "npf/alg/npf_alg_public.h" +#include "npf/alg/alg_npf.h" #include "npf/config/npf_attach_point.h" #include "npf/config/npf_config.h" #include "npf/config/npf_rule_group.h" @@ -27,24 +27,27 @@ #include "npf/npf_apm.h" #include "npf/npf_addrgrp.h" #include "npf/npf_cache.h" +#include "npf/npf_rc.h" #include "npf/npf_event.h" #include "npf/npf_if.h" #include "npf/npf_if_feat.h" -#include "npf/npf_nat64.h" #include "npf/npf_nat.h" #include "npf/npf_ruleset.h" #include "npf/npf_session.h" #include "npf/npf_state.h" +#include "npf/npf_vrf.h" #include "npf/npf_timeouts.h" +#include "npf/zones/npf_zone_public.h" #include "npf/rproc/npf_rproc.h" #include "npf/rproc/npf_ext_session_limit.h" #include "npf_shim.h" #include "npf/rproc/npf_ext_log.h" #include "npf/nat/nat_pool_public.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" +#include "rldb.h" #include "urcu.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" #include "ip_icmp.h" struct npf_ruleset; @@ -61,38 +64,50 @@ struct npf_config *npf_global_config __hot_data; npf_result_t npf_hook_notrack(const npf_ruleset_t *rlset, struct rte_mbuf **m, struct ifnet *ifp, int dir, uint16_t npf_flags, - uint16_t eth_type) + uint16_t eth_type, int *rcp) { - npf_cache_t npc, *n; + npf_cache_t npc, *n = NULL; uint32_t tag_val = 0; bool tag_set = false; npf_rule_t *rl; + npf_rproc_result_t rproc_result = { + .decision = NPF_DECISION_UNMATCHED, + }; - /* - * Use the global per-core cache if the packet has been - * reassembled, else use a local cache - * - * Note that both branches will clear any cached tag - */ - if (pktmbuf_mdata_exists(*m, PKT_MDATA_DEFRAG)) { - n = npf_get_cache(&npf_flags, *m, eth_type); - if (!n) - goto result; - } else { - n = &npc; - /* Initialize packet information cache. */ - npf_cache_init(n); + if (npf_ruleset_uses_cache(rlset)) { + int rc = 0; - /* Cache everything. drop if junk. */ - if (unlikely(!npf_cache_all(n, *m, eth_type))) - goto result; + /* + * Use the global per-core cache if the packet has been + * reassembled, else use a local cache + * + * Note that both branches will clear any cached tag + */ + if (pktmbuf_mdata_exists(*m, PKT_MDATA_DEFRAG)) { + n = npf_get_cache(&npf_flags, *m, eth_type, &rc); + if (!n) { + if (rcp) + *rcp = rc; + goto result; + } + } else { + n = &npc; + /* Initialize packet information cache. */ + npf_cache_init(n); + + /* Cache everything. drop if junk. */ + rc = npf_cache_all(n, *m, eth_type); + if (unlikely(rc < 0 && rc != -NPF_RC_NON_IP)) { + if (rcp) + *rcp = rc; + goto result; + } + } } rl = npf_ruleset_inspect(n, *m, rlset, NULL, ifp, dir); - npf_rproc_result_t rproc_result = { - .decision = npf_rule_decision(rl), - }; + rproc_result.decision = npf_rule_decision(rl); if (rproc_result.decision != NPF_DECISION_UNMATCHED) { /* Log any matched rule immediately */ @@ -230,8 +245,7 @@ npf_hook_track(struct ifnet *in_ifp, struct rte_mbuf **m, struct npf_config *fw_config = NULL; npf_session_t *se = NULL; npf_rule_t *rl = NULL; - int error = 0; - npf_decision_t decision; + npf_decision_t decision = NPF_DECISION_UNMATCHED; npf_action_t action = NPF_ACTION_NORMAL; enum npf_ruleset_type rlset_type = NPF_RS_TYPE_COUNT; const npf_ruleset_t *rlset; @@ -239,6 +253,7 @@ npf_hook_track(struct ifnet *in_ifp, struct rte_mbuf **m, bool too_big = false; struct npf_config *nif_config = npf_if_conf(nif); struct ifnet *ifp = nif->nif_ifp; + int rc = NPF_RC_UNMATCHED; /* * Parse the packet, note this also clears any cached tag. @@ -247,7 +262,7 @@ npf_hook_track(struct ifnet *in_ifp, struct rte_mbuf **m, * however if we get here due to DPI, we may. That is fine as * the subsequent logic should simply pass those fragments. */ - npf_cache_t *npc = npf_get_cache(&npf_flags, *m, eth_type); + npf_cache_t *npc = npf_get_cache(&npf_flags, *m, eth_type, &rc); if (unlikely(!npc)) { decision = NPF_DECISION_BLOCK; @@ -263,32 +278,31 @@ npf_hook_track(struct ifnet *in_ifp, struct rte_mbuf **m, * try to create a 'parent' tuple based session. */ se = npf_session_inspect_or_create(npc, *m, ifp, dir, &npf_flags, - &error, &internal_hairpin); - if (unlikely(error)) { + &rc, &internal_hairpin); + if (unlikely(rc < 0)) { decision = NPF_DECISION_BLOCK; goto result; } /* SNAT forward (OUT), DNAT reply */ - if (dir == PFIL_OUT) { + if (dir == PFIL_OUT && !internal_hairpin) { npf_nat_t *nt = npf_session_get_nat(se); if (nt) { - error = nat_do_subsequent(npc, m, se, nt, dir); + rc = nat_do_subsequent(npc, m, se, nt, dir); snat_result: - if (unlikely(error)) { - if (error == -E2BIG) { + if (unlikely(rc < 0)) { + if (rc == -NPF_RC_NAT_E2BIG) { too_big = true; - error = 0; /* TCP sends probes */ + rc = 0; /* TCP sends probes */ } decision = NPF_DECISION_BLOCK; goto stats; } } else if (unlikely(npf_iscached(npc, NPC_ICMP_ERR))) { - error = nat_do_icmp_err(npc, m, ifp, dir); + rc = nat_do_icmp_err(npc, m, ifp, dir); goto snat_result; } else if (unlikely(npf_active(nif_config, NPF_SNAT))) { - error = nat_try_initial(nif_config, npc, &se, m, ifp, - dir); + rc = nat_try_initial(nif_config, npc, &se, m, ifp, dir); goto snat_result; } } @@ -308,7 +322,8 @@ npf_hook_track(struct ifnet *in_ifp, struct rte_mbuf **m, } /* - * Determine the ruleset type and any reverse ruleset + * Determine the ruleset type and any reverse ruleset, + * allowing for possible stateful ZBF pass session. */ bool reverse_stateful; @@ -328,6 +343,14 @@ npf_hook_track(struct ifnet *in_ifp, struct rte_mbuf **m, rlset_type = NPF_RS_FW_OUT; reverse_stateful = fw_active & NPF_FW_STATE_IN; + + if (unlikely((npf_flags & NPF_FLAG_FROM_ZONE) || + npf_nif_zone(nif))) { + if (npf_zone_hook(in_ifp, nif, npf_flags, &fw_config, + &decision, &rlset_type, + &reverse_stateful)) + goto done; + } } /* Inspect FW ruleset */ @@ -352,9 +375,8 @@ npf_hook_track(struct ifnet *in_ifp, struct rte_mbuf **m, */ if (rl && npf_rule_stateful(rl)) { if (!se) { - se = npf_session_establish(npc, *m, ifp, dir, - &error); - if (unlikely(error)) { + se = npf_session_establish(npc, *m, ifp, dir, &rc); + if (unlikely(rc < 0)) { decision = NPF_DECISION_BLOCK; goto stats; } @@ -368,21 +390,30 @@ npf_hook_track(struct ifnet *in_ifp, struct rte_mbuf **m, npf_log_pkt(npc, *m, rl, dir); /* DNAT forward (IN), SNAT reply */ - if (dir == PFIL_IN) { + if (dir == PFIL_IN && !internal_hairpin) { npf_nat_t *nt = npf_session_get_nat(se); if (nt) { - error = nat_do_subsequent(npc, m, se, nt, dir); + /* + * If destined for local, bypass DNAT. The session is + * only marked as local when the first packet passes + * through npf_local_fw. + */ + if (unlikely(npf_session_is_local_zone_nat(se))) { + action = NPF_ACTION_TO_LOCAL; + goto stats; + } + + rc = nat_do_subsequent(npc, m, se, nt, dir); dnat_result: - if (unlikely(error)) { + if (unlikely(rc < 0)) { decision = NPF_DECISION_BLOCK; goto stats; } } else if (unlikely(npf_iscached(npc, NPC_ICMP_ERR))) { - error = nat_do_icmp_err(npc, m, ifp, dir); + rc = nat_do_icmp_err(npc, m, ifp, dir); goto dnat_result; } else if (unlikely(npf_active(nif_config, NPF_DNAT))) { - error = nat_try_initial(nif_config, npc, &se, m, ifp, - dir); + rc = nat_try_initial(nif_config, npc, &se, m, ifp, dir); goto dnat_result; } } @@ -401,38 +432,26 @@ npf_hook_track(struct ifnet *in_ifp, struct rte_mbuf **m, } done: - if (decision != NPF_DECISION_BLOCK) { - /* ALLOWABLE: UNMATCHED AND PASS */ - if ((dir == PFIL_IN && (npf_active(fw_config, - NAT64_OR_NAT46(eth_type)) || - npf_session_is_nat64(se))) || - (npf_flags & (NPF_FLAG_FROM_IPV6 | NPF_FLAG_FROM_IPV4))) { - decision = nat64_hook(&action, nif_config, &se, ifp, - npc, m, dir, &npf_flags); - - /* - * Note, after this point session may be IPv6 and - * packet IPv4 (or vice-versa) - */ - } - } - if (se) { if (decision != NPF_DECISION_BLOCK) { /* N.B. se may be consumed */ - error = npf_session_activate(se, ifp, npc, *m); - if (error == 0) { + rc = npf_session_activate(se, ifp, npc, *m); + if (rc == 0) { /* Attach the session to the packet */ struct pktmbuf_mdata *mdata = pktmbuf_mdata(*m); mdata->md_session = se; pktmbuf_mdata_set(*m, PKT_MDATA_SESSION); + + /* Save session stats. */ + npf_save_stats(se, dir, + rte_pktmbuf_pkt_len(*m)); } else { - if (error != -ENOSTR) + if (rc != -NPF_RC_ENOSTR) decision = NPF_DECISION_BLOCK; } } else if (!npf_session_is_active(se)) { npf_session_destroy(se); - } else if (error) { + } else if (rc < 0) { pktmbuf_mdata_clear(*m, PKT_MDATA_SESSION); npf_session_expire(se); } @@ -440,17 +459,23 @@ npf_hook_track(struct ifnet *in_ifp, struct rte_mbuf **m, result: /* - * Generate any ICMP or ICMPv6 errors; the original packet is - * always blocked (i.e. decision == NPF_DECISION_BLOCK). - * Only IPv4 ICMP 'Too Big' for the moment. + * Can jump here blocking the packet due to failing to cache the + * packet, or errors returned trying to create or lookup a session. */ if (in_ifp && unlikely(too_big)) { + /* + * Generate any ICMP or ICMPv6 "too big" errors. + * Only IPv4 ICMP 'Too Big' for the moment. + */ IPSTAT_INC_IFP(ifp, IPSTATS_MIB_FRAGFAILS); icmp_error_out(in_ifp, *m, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htons(ifp->if_mtu), ifp); } + /* Increment return code counter */ + npf_rc_inc(ifp, ETH2RCT(eth_type), PFIL2RC(dir), rc, decision); + return (npf_result_t) { .decision = decision, .action = action, @@ -474,12 +499,15 @@ npf_init(void) npf_ruleset_gc_init(); npf_state_stats_create(); nat_pool_init(); + rldb_init(); int rc = npf_attpt_item_set_up(NPF_ATTACH_TYPE_GLOBAL, "", &npf_global_config, NULL); if (rc != 0) RTE_LOG(ERR, DATAPLANE, "failed to register global rulesets " "with NPF\n"); + else + npf_gbl_attach_point_init(); } void npf_cleanup(void) @@ -489,6 +517,7 @@ void npf_cleanup(void) npf_if_cleanup(); npf_state_stats_destroy(); nat_pool_uninit(); + rldb_cleanup(); } static int @@ -512,6 +541,10 @@ npf_local_dnat(struct rte_mbuf **m, npf_cache_t *npc, npf_session_t *se) bool npf_local_fw(struct ifnet *ifp, struct rte_mbuf **m, uint16_t ether_type) { struct npf_if *nif = rcu_dereference(ifp->if_npf); + bool rv = false; + npf_result_t result = { .decision = NPF_DECISION_UNMATCHED }; + int rc = NPF_RC_UNMATCHED; + bool rc_inc = false; /* set true to increment rc counts */ /* * If there is no npf config on the input interface then jump straight @@ -523,12 +556,15 @@ bool npf_local_fw(struct ifnet *ifp, struct rte_mbuf **m, uint16_t ether_type) const struct npf_config *npf_config = npf_if_conf(nif); npf_cache_t npc; - void *n_ptr = pktmbuf_mtol3(*m, void *); npf_cache_init(&npc); - if (!npf_cache_all_at(&npc, *m, n_ptr, ether_type, false)) - return true; /* discard */ + rc = npf_cache_all(&npc, *m, ether_type); + if (rc < 0) { + rc_inc = true; + rv = true; /* discard */ + goto end; + } /* Find the session */ npf_session_t *se = npf_session_find_cached(*m); @@ -537,9 +573,34 @@ bool npf_local_fw(struct ifnet *ifp, struct rte_mbuf **m, uint16_t ether_type) if (se && npf_session_get_if_index(se) != ifp->if_index) se = NULL; + /* If "passing" session found - skip the zones ruleset inspection */ npf_rule_t *rl = NULL; - if (se) - (void)npf_session_is_pass(se, &rl); + if (se) { + rc_inc = true; + + if ((npf_session_is_pass(se, &rl) || + npf_session_is_nat_pinhole(se, PFIL_IN) || + npf_session_is_child(se))) { + rc = NPF_RC_PASS; + goto skip_local_zone; + } + } + + /* + * Local zone firewall + */ + if (unlikely(npf_zone_local_is_set() && + !npf_iscached(&npc, NPC_IPFRAG))) { + + rc_inc = true; + if (npf_local_zone_hook(ifp, m, &npc, se, nif)) { + rc = NPF_RC_BLOCK; + rv = true; /* discard */ + goto end; + } + } + +skip_local_zone: /* Log any firewall matched rule now */ if (unlikely(npf_rule_has_rproc_logger(rl))) @@ -549,42 +610,117 @@ bool npf_local_fw(struct ifnet *ifp, struct rte_mbuf **m, uint16_t ether_type) * Do we need to DNAT? Either we bypassed DNAT in npf_hook_track, or * we undid DNAT above. */ - if (se && npf_active(npf_config, NPF_DNAT) && + if (se && (npf_session_is_local_zone_nat(se) || + npf_active(npf_config, NPF_DNAT)) && !pktmbuf_mdata_exists(*m, PKT_MDATA_DNAT)) { - if (npf_local_dnat(m, &npc, se)) - return true; + rc_inc = true; + if (npf_local_dnat(m, &npc, se)) { + rc = NPF_RC_BLOCK; + rv = true; + goto end; + } } /* * Local firewall is done post-DNAT */ if (npf_active(npf_config, NPF_LOCAL)) { - npf_result_t result; + rc_inc = true; result = npf_hook_notrack(npf_get_ruleset(npf_config, NPF_RS_LOCAL), m, ifp, PFIL_IN, 0, - ether_type); - if (result.decision == NPF_DECISION_BLOCK) - return true; /* discard */ - else if (result.decision == NPF_DECISION_PASS) - return false; /* retain */ + ether_type, &rc); + + if (result.decision == NPF_DECISION_BLOCK) { + rv = true; /* discard */ + goto end; + } else if (result.decision == NPF_DECISION_PASS) { + rc = NPF_RC_PASS; + rv = false; /* retain */ + goto end; + } /* No match, so try the global firewall rules. */ } global_fw: if (npf_active(npf_global_config, NPF_LOCAL)) { - npf_result_t result; + rc_inc = true; result = npf_hook_notrack(npf_get_ruleset(npf_global_config, NPF_RS_LOCAL), m, ifp, PFIL_IN, 0, - ether_type); - if (result.decision == NPF_DECISION_BLOCK) - return true; /* discard */ + ether_type, &rc); + + if (result.decision == NPF_DECISION_BLOCK) { + rv = true; /* discard */ + } else if (result.decision == NPF_DECISION_PASS) { + rc = NPF_RC_PASS; + rv = false; /* retain */ + } + } + +end: + /* Increment return code counter? */ + if (rc_inc) + npf_rc_inc(ifp, NPF_RCT_LOC, NPF_RC_IN, rc, result.decision); + + return rv; +} + +bool npf_originate_fw(struct ifnet *ifp, uint16_t npf_flags, + struct rte_mbuf **m, uint16_t ether_type) +{ + struct npf_if *nif = rcu_dereference(ifp->if_npf); + const struct npf_config *npf_config = npf_if_conf(nif); + bool rv = false; + npf_result_t result = { .decision = NPF_DECISION_UNMATCHED }; + int rc = NPF_RC_UNMATCHED; + bool rc_inc = false; + + /* + * Local zone firewall will be done in fw_out processing + */ + + if (npf_active(npf_config, NPF_ORIGINATE)) { + + rc_inc = true; + result = npf_hook_notrack(npf_get_ruleset(npf_config, + NPF_RS_ORIGINATE), m, ifp, PFIL_OUT, npf_flags, + ether_type, &rc); + + if (result.decision == NPF_DECISION_BLOCK) { + rv = true; /* discard */ + goto end; + } else if (result.decision == NPF_DECISION_PASS) { + rc = NPF_RC_PASS; + rv = false; /* retain */ + goto end; + } + } + + /* No match, so try the global firewall rules. */ + if (npf_active(npf_global_config, NPF_ORIGINATE)) { + + rc_inc = true; + result = npf_hook_notrack(npf_get_ruleset(npf_global_config, + NPF_RS_ORIGINATE), m, ifp, PFIL_OUT, npf_flags, + ether_type, &rc); + + if (result.decision == NPF_DECISION_BLOCK) { + rv = true; /* discard */ + } else if (result.decision == NPF_DECISION_PASS) { + rc = NPF_RC_PASS; + rv = false; /* retain */ + } } - return false; +end: + /* Increment return code counter? */ + if (rc_inc) + npf_rc_inc(ifp, NPF_RCT_LOC, NPF_RC_OUT, rc, result.decision); + + return rv; } /* @@ -605,6 +741,7 @@ void npf_reset_config(enum cont_src_en cont_src) npf_sess_limit_inst_destroy(); npf_timeout_reset(); npf_alg_reset(true); + npf_zone_inst_destroy(); } void npf_print_state_stats(json_writer_t *json) @@ -612,29 +749,6 @@ void npf_print_state_stats(json_writer_t *json) npf_state_stats_json(json); } -/* - * Pass-through to write NAT json for a dataplane session - */ -int npf_json_nat_session(json_writer_t *json, void *data) -{ - npf_session_t *se = data; - - return npf_session_json_nat(json, se); -} - -/* Get a custome session timeout if configured */ -uint32_t npf_custom_session_timeout(vrfid_t vrfid, uint16_t eth_type, - struct rte_mbuf *m) -{ - npf_cache_t npc; - - npf_cache_init(&npc); - if (unlikely(!npf_cache_all(&npc, m, eth_type))) - return 0; - - return npf_state_get_custom_timeout(vrfid, &npc, m); -} - /* Shim routine for determining whether this NPF session is natted */ bool npf_feature_is_nat(void *data) { diff --git a/src/npf_shim.h b/src/npf_shim.h index c1684c05..55d7de23 100644 --- a/src/npf_shim.h +++ b/src/npf_shim.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -18,7 +18,7 @@ #include "npf/npf.h" #include "session/session.h" #include "util.h" -#include "vrf.h" +#include "vrf_internal.h" struct ifnet; /* Forward Declarations */ @@ -36,7 +36,7 @@ npf_result_t npf_hook_track(struct ifnet *in_ifp, struct rte_mbuf **m, uint16_t eth_type); npf_result_t npf_hook_notrack(const npf_ruleset_t *rlset, struct rte_mbuf **m, struct ifnet *ifp, int dir, uint16_t npf_flags, - uint16_t eth_type); + uint16_t eth_type, int *rcp); void npf_vrf_create(struct vrf *vrf); @@ -45,12 +45,10 @@ void npf_vrf_destroy(struct vrf *vrf); struct npf_config *vrf_get_npf_conf_rcu(vrfid_t vrf_id); bool npf_local_fw(struct ifnet *ifp, struct rte_mbuf **m, uint16_t ether_type); +bool npf_originate_fw(struct ifnet *ifp, uint16_t npf_flags, + struct rte_mbuf **m, uint16_t ether_type); void npf_reset_config(enum cont_src_en cont_src); void npf_print_state_stats(json_writer_t *json); -int npf_json_nat_session(json_writer_t *json, void *data); - -uint32_t npf_custom_session_timeout(vrfid_t vrfid, uint16_t eth_type, - struct rte_mbuf *m); bool npf_feature_is_nat(void *data); #endif /* NPF_SHIM */ diff --git a/src/nsh.c b/src/nsh.c index 127ff4ad..a34f04ee 100644 --- a/src/nsh.c +++ b/src/nsh.c @@ -1,7 +1,7 @@ /*- * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. - * Copyright (c) 2017, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017,2019, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -17,274 +17,6 @@ #include "vplane_debug.h" #include "vplane_log.h" -static void nsh_write_base_hdr(void *nsh_start, enum nsh_np nxtproto, - int md_type, unsigned int mdata_size) -{ - struct nsh *nsh_base = nsh_start; - - nsh_base->bh_u.bh = nsh_base->sph_u.sph = 0; - nsh_base->nsh_ver = NSH_V0; - nsh_base->nsh_oam = 0; - nsh_base->nsh_crit = 1; - nsh_base->nsh_mdtype = md_type; - nsh_base->nsh_len = mdata_size/NSH_LEN_UNIT; - nsh_base->nsh_nxtproto = nxtproto; - nsh_base->nsh_spi = 0; - nsh_base->nsh_si = 1; - nsh_base->bh_u.bh = htonl(nsh_base->bh_u.bh); - nsh_base->sph_u.sph = htonl(nsh_base->sph_u.sph); -} - -/* add hdr with Type 1 metadata */ -int nsh_add_t1_hdr(struct rte_mbuf *pak, enum nsh_np nxtproto, - struct nsh_md_t1 *t1_hdr) -{ - unsigned int nsh_size = sizeof(struct nsh) + sizeof(struct nsh_md_t1); - char *nsh_start; - struct nsh_md_t1 *nsh_md; - - nsh_start = rte_pktmbuf_prepend(pak, nsh_size); - if (nsh_start == NULL) { - DP_DEBUG(NSH, ERR, NSH, - "Insufficient space for NSH Type 1\n"); - return -ENOMEM; - } - - nsh_write_base_hdr(nsh_start, nxtproto, NSH_MD_T1, nsh_size); - nsh_md = (struct nsh_md_t1 *)(nsh_start + sizeof(struct nsh)); - nsh_md->u1.md1_npc = htonl(t1_hdr->u1.md1_npc); - nsh_md->u2.md1_nsc = htonl(t1_hdr->u2.md1_nsc); - nsh_md->u3.md1_spc = htonl(t1_hdr->u3.md1_spc); - nsh_md->u4.md1_ssc = htonl(t1_hdr->u4.md1_ssc); - - return 0; -} - -/* get expected metadata size */ -int nsh_get_metadata_size(struct nsh_tlv *tlv_arr, unsigned int num_tlvs, - unsigned int *nsh_size) -{ - unsigned int i; - unsigned int attr_size = 0; - - for (i = 0; i < num_tlvs; i++) { - if (tlv_arr[i].ntlv_len % NSH_LEN_UNIT) { - DP_DEBUG(NSH, ERR, NSH, - "Invalid length %d specified for attribute %d\n", - tlv_arr[i].ntlv_len, i); - return -EINVAL; - } - attr_size += sizeof(struct nsh_md_t2) + tlv_arr[i].ntlv_len; - } - *nsh_size = sizeof(struct nsh) + attr_size; - - return 0; -} - -/* add hdr with Type 2 metadata */ -int nsh_add_t2_hdr(char *buf, unsigned int len, enum nsh_np nxtproto, - struct nsh_tlv *tlv_arr, unsigned int num_tlvs) -{ - struct nsh *nsh_start = (struct nsh *)buf; - unsigned int i; - struct nsh_md_t2 *md2h; - char *cursor; - uint32_t *attr_ptr; - - nsh_write_base_hdr(nsh_start, nxtproto, NSH_MD_T2, len); - cursor = (char *)((uintptr_t)nsh_start + sizeof(struct nsh)); - md2h = (struct nsh_md_t2 *)cursor; - attr_ptr = (uint32_t *)(cursor + sizeof(*md2h)); - for (i = 0; i < num_tlvs; i++) { - - if ((uintptr_t)md2h >= ((uintptr_t)nsh_start + len)) { - DP_DEBUG(NSH, ERR, NSH, - "Insufficient space to add TLV %d\n", i); - return -ENOMEM; - } - - md2h->md2_tlvc = tlv_arr[i].ntlv_class; - md2h->md2_crit = 1; - md2h->md2_rsvd = 0; - md2h->md2_type = tlv_arr[i].ntlv_type; - - if (md2h->md2_tlvc != NSH_MD_CLASS_BROCADE_VROUTER) - return -EINVAL; - - switch (md2h->md2_type) { - case NSH_MD_TYPE_IFINDEX_IN: - case NSH_MD_TYPE_IFINDEX_OUT: - case NSH_MD_TYPE_MWID: - case NSH_MD_TYPE_VRF_ID: - md2h->md2_len = sizeof(uint32_t)/NSH_LEN_UNIT; - *attr_ptr = htonl(*(uint32_t *)(tlv_arr[i].ntlv_val)); - break; - - case NSH_MD_TYPE_ADDR_IPv4_NH: - md2h->md2_len = - sizeof(struct in_addr)/NSH_LEN_UNIT; - *attr_ptr = htonl(*((uint32_t *)tlv_arr[i].ntlv_val)); - break; - case NSH_MD_TYPE_ADDR_IPv6_NH: - { - uint32_t *addr = - (uint32_t *)tlv_arr[i].ntlv_val; - int j; - - md2h->md2_len = - sizeof(struct in6_addr)/NSH_LEN_UNIT; - - for (j = 0; j < NSH_MD_LEN_ADDR_IPv6; j++) - attr_ptr[j] = htonl(addr[j]); - } - break; - default: - return -EINVAL; - } - cursor += sizeof(*md2h) + (md2h->md2_len * NSH_LEN_UNIT); - md2h->md2_hdr = htonl(md2h->md2_hdr); - md2h = (struct nsh_md_t2 *)cursor; - attr_ptr = (uint32_t *)(cursor + sizeof(*md2h)); - } - return 0; -} - - -static int nsh_extract_t1_md(struct nsh *nsh_base, struct nsh_tlv *tlv_arr, - unsigned int max_tlvs, unsigned int *num_tlvs) -{ - int i; - uint32_t *attr_ptr; - - if (nsh_base->nsh_len != NSH_T1_LEN) - return -EINVAL; - - if (max_tlvs < NSH_MD1_NUM_ATTRS) - return -ENOMEM; - - attr_ptr = (uint32_t *)((uintptr_t)nsh_base + - sizeof(*nsh_base)); - for (i = 0; i < NSH_MD1_NUM_ATTRS; i++) { - *attr_ptr = ntohl(*attr_ptr); - tlv_arr[i].ntlv_type = NSH_TLVC_UINT32; - tlv_arr[i].ntlv_len = sizeof(uint32_t); - tlv_arr[i].ntlv_val = attr_ptr; - attr_ptr++; - } - *num_tlvs = NSH_MD1_NUM_ATTRS; - - return 0; -} - -static int nsh_extract_t2_md(struct nsh *nsh_base, struct nsh_tlv *tlv_arr, - unsigned int max_tlvs, unsigned int *num_tlvs) -{ - unsigned int i; - uint32_t *attr_ptr; - char *cursor, *attr_end; - struct nsh_md_t2 *md2h; - uint16_t attr_size; - - if (nsh_base->nsh_len < NSH_T2_MIN_LEN) - return -EINVAL; - - cursor = (char *)((uintptr_t)nsh_base + sizeof(*nsh_base)); - attr_end = (char *)((uintptr_t)nsh_base + (nsh_base->nsh_len * - NSH_LEN_UNIT)); - i = 0; - while (cursor < attr_end) { - if (i >= max_tlvs) - return -ENOMEM; - - md2h = (struct nsh_md_t2 *)cursor; - attr_ptr = (uint32_t *)(cursor + sizeof(*md2h)); - md2h->md2_hdr = ntohl(md2h->md2_hdr); - attr_size = md2h->md2_len * NSH_LEN_UNIT; - if (md2h->md2_tlvc != NSH_MD_CLASS_BROCADE_VROUTER) - return -EINVAL; - - switch (md2h->md2_type) { - case NSH_MD_TYPE_IFINDEX_IN: - case NSH_MD_TYPE_IFINDEX_OUT: - case NSH_MD_TYPE_MWID: - case NSH_MD_TYPE_VRF_ID: - if (unlikely(attr_size != sizeof(uint32_t))) - return -EINVAL; - - *attr_ptr = ntohl(*attr_ptr); - break; - - case NSH_MD_TYPE_ADDR_IPv4_NH: - if (attr_size != sizeof(struct in_addr)) - return -EINVAL; - *attr_ptr = ntohl(*attr_ptr); - break; - case NSH_MD_TYPE_ADDR_IPv6_NH: - { - uint32_t *addr = (uint32_t *) attr_ptr; - int j; - - if (attr_size != sizeof(struct in6_addr)) - return -EINVAL; - - for (j = 0; j < NSH_MD_LEN_ADDR_IPv6; j++) - addr[j] = htonl(addr[j]); - } - break; - default: - return -EINVAL; - } - tlv_arr[i].ntlv_class = md2h->md2_tlvc; - tlv_arr[i].ntlv_type = md2h->md2_type; - tlv_arr[i].ntlv_len = - (md2h->md2_len * NSH_LEN_UNIT); - tlv_arr[i].ntlv_val = attr_ptr; - cursor += (sizeof(*md2h) + tlv_arr[i].ntlv_len); - i++; - } - *num_tlvs = i; - return 0; -} - - -/* parse hdr and extract fields into tlv array. - * No additional memory is allocated. TLV value pointers point - * into payload of buffer - */ -int nsh_extract(struct rte_mbuf *pak, struct nsh **nsh, struct nsh_tlv *tlv_arr, - unsigned int max_tlvs, unsigned int *num_tlvs) -{ - int err; - struct nsh *nsh_start; - - nsh_start = rte_pktmbuf_mtod(pak, struct nsh *); - - nsh_start->bh_u.bh = ntohl(nsh_start->bh_u.bh); - nsh_start->sph_u.sph = ntohl(nsh_start->sph_u.sph); - - if (nsh_start->nsh_mdtype == NSH_MD_T1) - err = nsh_extract_t1_md(nsh_start, tlv_arr, max_tlvs, num_tlvs); - else if (nsh_start->nsh_mdtype == NSH_MD_T2) - err = nsh_extract_t2_md(nsh_start, tlv_arr, max_tlvs, num_tlvs); - else - err = -EINVAL; - - if (err != 0) - return err; - - DP_DEBUG(NSH, INFO, NSH, - "Rcvd NSH (%d TLVs, Size %ld): BH = 0x%x, SPH = 0x%x\n", - *num_tlvs, (nsh_start->nsh_len * NSH_LEN_UNIT), - nsh_start->bh_u.bh, nsh_start->sph_u.sph); - - *nsh = nsh_start; - if (nsh_start->nsh_nxtproto == NSH_NP_IPv4 || - nsh_start->nsh_nxtproto == NSH_NP_IPv6) - pak->l2_len = 0; - rte_pktmbuf_adj(pak, (nsh_start->nsh_len * NSH_LEN_UNIT)); - return 0; -} - int nsh_get_payload(struct nsh *nsh_start, enum nsh_np *nxtproto, void **nsh_payload) { diff --git a/src/nsh.h b/src/nsh.h index 7a7c2adb..279dd2b4 100644 --- a/src/nsh.h +++ b/src/nsh.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -201,26 +201,6 @@ struct nsh_tlv { void *ntlv_val; }; -/* add hdr with Type 1 metadata */ -int nsh_add_t1_hdr(struct rte_mbuf *pak, enum nsh_np nxtproto, - struct nsh_md_t1 *t1_hdr); - -/* get expected size of metadata */ -int nsh_get_metadata_size(struct nsh_tlv *tlv_arr, unsigned int num_tlvs, - unsigned int *nsh_size); - -/* add hdr with Type 2 metadata */ -int nsh_add_t2_hdr(char *buf, unsigned int len, enum nsh_np nxtproto, - struct nsh_tlv *tlv_arr, unsigned int num_tlvs); - -/* parse hdr and extract fields into tlv array. - * No additional memory is allocated. TLV value pointers point - * into payload of buffer - */ -int nsh_extract(struct rte_mbuf *pak, struct nsh **nsh_start, - struct nsh_tlv *tlv_arr, unsigned int max_tlvs, - unsigned int *num_tlvs); - /* * Parse hdr, return payload proto and pointer to payload */ int nsh_get_payload(struct nsh *nsh_start, enum nsh_np *nxtproto, diff --git a/src/pathmonitor/pathmonitor.h b/src/pathmonitor/pathmonitor.h index 2e8b2647..1cae2126 100644 --- a/src/pathmonitor/pathmonitor.h +++ b/src/pathmonitor/pathmonitor.h @@ -1,7 +1,7 @@ /* * Path monitor dataplane code * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2017 by Brocade Communications Systems, Inc. * All rights reserved. * diff --git a/src/pathmonitor/pathmonitor_cmds.c b/src/pathmonitor/pathmonitor_cmds.c index f673c2b7..3f96f165 100644 --- a/src/pathmonitor/pathmonitor_cmds.c +++ b/src/pathmonitor/pathmonitor_cmds.c @@ -3,7 +3,7 @@ * * Copyright (c) 2017 by Brocade Communications Systems, Inc. * All rights reserved. - * Copyright (c) 2017, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017,2019, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * @@ -24,7 +24,7 @@ * Deletion of an entry mirrors the creation logic: RPROC destructor * call(s) and the receipt of a "delete" command from monitord. * - * All 4 operations take place in the context of the control (master) + * All 4 operations take place in the context of the control (main) * thread, i.e. these commands are serialized. * * The "init" command is used to establish the (user configured) initial @@ -298,7 +298,7 @@ pathmon_show(FILE *f) * * Take care if any new commands are introduced; the "init" and "delete" * commands (as well as the RPROC functions) are serialized through the - * master thread. The other commands ("show" & "update") operate on the + * main thread. The other commands ("show" & "update") operate on the * console thread. */ int diff --git a/src/pd_show.c b/src/pd_show.c index bbf31315..b0f9ce3e 100644 --- a/src/pd_show.c +++ b/src/pd_show.c @@ -11,11 +11,13 @@ #include #include "json_writer.h" +#include "mpls/mpls_label_table.h" #include "pd_show.h" #include "route.h" #include "vplane_log.h" #include "fal.h" #include "ipmc_pd_show.h" +#include "vrf_internal.h" static const char * const pd_obj_state_names[] = { "full", @@ -55,6 +57,10 @@ static const struct pd_show_cmd pd_show_cmd_table[] = { mroute_get_pd_subset_data, "Show route" }, { "mroute6", NULL, mroute6_hw_stats_get, mroute6_get_pd_subset_data, "Show route" }, + { "mpls-route", NULL, mpls_label_table_hw_stats_get, + mpls_label_table_get_pd_subset_data, "Show route" }, + { "vrf", NULL, vrf_table_hw_stats_get, + vrf_table_get_pd_subset_data, "Show VRF" }, { NULL, NULL, NULL, NULL, NULL }, }; @@ -75,7 +81,7 @@ static int pd_show_dataplane(FILE *f, const char *name, show_hw = false; for (cmd = pd_show_cmd_table; cmd->name; ++cmd) { - if (name && strcmp(cmd->name, name)) + if (name && (strcmp(cmd->name, name) != 0)) continue; if (subset != PD_OBJ_STATE_LAST) { @@ -123,7 +129,7 @@ static enum pd_obj_state pd_obj_state_parse(const char *name) { int i; - for (i = PD_OBJ_STATE_PARTIAL; i < PD_OBJ_STATE_LAST; i++) + for (i = PD_OBJ_STATE_FULL; i < PD_OBJ_STATE_LAST; i++) if (strcasecmp(pd_obj_state_names[i], name) == 0) return i; @@ -165,6 +171,8 @@ enum pd_obj_state fal_state_to_pd_state(int fal_state) switch (fal_state) { case 0: return PD_OBJ_STATE_FULL; + case FAL_RC_NOT_REQ: + return PD_OBJ_STATE_NOT_NEEDED; case -ENOSPC: return PD_OBJ_STATE_NO_RESOURCE; case -EOPNOTSUPP: @@ -172,3 +180,14 @@ enum pd_obj_state fal_state_to_pd_state(int fal_state) } return PD_OBJ_STATE_ERROR; } + +bool fal_state_is_obj_present(enum pd_obj_state pd_obj_state) +{ + switch (pd_obj_state) { + case PD_OBJ_STATE_FULL: + case PD_OBJ_STATE_PARTIAL: + return true; + default: + return false; + } +} diff --git a/src/pd_show.h b/src/pd_show.h index 43f10753..4f154dde 100644 --- a/src/pd_show.h +++ b/src/pd_show.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. + * Copyright (c) 2018-2019, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -39,4 +39,7 @@ struct pd_obj_state_and_flags { int cmd_pd(FILE *f, int argc, char **argv); enum pd_obj_state fal_state_to_pd_state(int fal_state); + +bool fal_state_is_obj_present(enum pd_obj_state pd_obj_state); + #endif /* PD_SHOW_H */ diff --git a/src/pipeline/meson.build b/src/pipeline/meson.build new file mode 100644 index 00000000..d976e4d4 --- /dev/null +++ b/src/pipeline/meson.build @@ -0,0 +1,157 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + +pipeline_sources = files( + 'pl_commands.c', + 'pl_node.c', + 'pl_node_boot.c' +) + +pipeline_node_sources = files( + 'nodes/l2_bridge_in.c', + 'nodes/l2_capture.c', + 'nodes/cross_connect/l2_cross_connect_node.c', + 'nodes/cross_connect/l2_cross_connect_cmd.c', + 'nodes/cross_connect/cross_connect.c', + 'nodes/ipv4_drop.c', + 'nodes/ipv6_drop.c', + 'nodes/l2_consume.c', + 'nodes/l2_ether_in.c', + 'nodes/l2_ether_forward.c', + 'nodes/l2_ether_lookup.c', + 'nodes/l2_hw_hdr.c', + 'nodes/l2_local.c', + 'nodes/l2_output.c', + 'nodes/l2_portmonitor.c', + 'nodes/l2_portmonitor_hw.c', + 'nodes/l2_vlan_mod.c', + 'nodes/pppoe/l2_pppoe_node.c', + 'nodes/pppoe/l2_pppoe_cmd.c', + 'nodes/pppoe/pppoe.c', + 'nodes/l2_sw_vlan.c', + 'nodes/l3_acl.c', + 'nodes/l3_arp.c', + 'nodes/l3_dpi.c', + 'nodes/l3_fw_in.c', + 'nodes/l3_fw_out.c', + 'nodes/l3_nat64.c', + 'nodes/l3_pbr.c', + 'nodes/l3_tcp_mss.c', + 'nodes/l3_v4_cgnat.c', + 'nodes/l3_v4_defrag.c', + 'nodes/l3_v4_encap.c', + 'nodes/l3_v4_gre.c', + 'nodes/l3_v4_ipsec.c', + 'nodes/l3_v4_l2tpv3.c', + 'nodes/l3_v4_l4.c', + 'nodes/l3_v4_no_address.c', + 'nodes/l3_v4_no_forwarding.c', + 'nodes/l3_v4_out.c', + 'nodes/l3_v4_out_spath.c', + 'nodes/l3_v4_post_route_lookup.c', + 'nodes/l3_v4_route_lookup.c', + 'nodes/l3_v4_rpf.c', + 'nodes/l3_v4_udp.c', + 'nodes/l3_v4_val.c', + 'nodes/l3_v6_defrag.c', + 'nodes/l3_v6_encap.c', + 'nodes/l3_v6_ipsec.c', + 'nodes/l3_v6_l4.c', + 'nodes/l3_v6_no_address.c', + 'nodes/l3_v6_no_forwarding.c', + 'nodes/l3_v6_out.c', + 'nodes/l3_v6_out_spath.c', + 'nodes/l3_v6_post_route_lookup.c', + 'nodes/l3_v6_route_lookup.c', + 'nodes/l3_v6_udp.c', + 'nodes/l3_v6_val.c', + 'nodes/l3_v6_nptv6.c', + 'nodes/term.c', + 'nodes/term_drop.c' +) + +pl_gen_fused_opts = [ + '--include', 'pl_fused_gen.h', + '--include', 'nodes/pl_nodes_common.h', + '--include', 'pl_fused.h', + '--entry', 'vyatta:ether-forward', + '--entry', 'vyatta:ether-in', + '--entry', 'vyatta:arp-in-nothot', + '--entry', 'vyatta:ipv4-validate', + '--entry', 'vyatta:ipv6-validate', + '--entry', 'vyatta:ipv4-route-lookup', + '--entry', 'vyatta:ipv4-out', + '--entry', 'vyatta:ipv6-out', + '--entry', 'vyatta:ipv4-out-spath', + '--entry', 'vyatta:ipv6-out-spath', + '--entry', 'vyatta:ipv6-defrag-out-spath', + '--entry', 'vyatta:ipv4-encap', + '--entry', 'vyatta:ipv6-encap', + '--entry', 'vyatta:ipv4-encap-only', + '--entry', 'vyatta:ipv6-encap-only', + '--entry', 'vyatta:ipv4-l4', + '--entry', 'vyatta:ipv6-l4', + '--entry', 'vyatta:l2-consume', + '--entry', 'vyatta:l2-local', + '--entry', 'vyatta:l2-output', + '--entry', 'vyatta:term-drop', + '--entry', 'vyatta:ipv4-drop', + '--entry', 'vyatta:ipv6-drop', + '--feature-point', 'vyatta:ether-lookup', + '--feature-point', 'vyatta:ipv4-drop', + '--feature-point', 'vyatta:ipv4-l4', + '--feature-point', 'vyatta:ipv4-validate', + '--feature-point', 'vyatta:ipv4-route-lookup', + '--feature-point', 'vyatta:ipv4-out', + '--feature-point', 'vyatta:ipv4-out-spath', + '--feature-point', 'vyatta:ipv4-encap', + '--feature-point', 'vyatta:ipv4-udp-in', + '--feature-point', 'vyatta:ipv6-drop', + '--feature-point', 'vyatta:ipv6-l4', + '--feature-point', 'vyatta:ipv6-validate', + '--feature-point', 'vyatta:ipv6-route-lookup', + '--feature-point', 'vyatta:ipv6-out', + '--feature-point', 'vyatta:ipv6-out-spath', + '--feature-point', 'vyatta:ipv6-encap', + '--feature-point', 'vyatta:ipv6-udp-in', + '--feature-point', 'vyatta:l2-consume', + '--feature-point', 'vyatta:l2-local', + '--feature-point', 'vyatta:l2-output', + '--feature-point', 'vyatta:term-drop' +] + +pl_gen_fused = files('../../scripts/pl_gen_fused') + +pl_fused_gen_h = custom_target('pl_fused_gen.h', + output: 'pl_fused_gen.h', + input: pipeline_node_sources, + command: [pl_gen_fused, pl_gen_fused_opts, '--header-out', '@OUTPUT@', '@INPUT@'] +) + +pl_fused_gen_c = custom_target('pl_fused_gen.c', + output: 'pl_fused_gen.c', + input: pipeline_node_sources, + command: [pl_gen_fused, pl_gen_fused_opts, '--impl-out', '@OUTPUT@', '@INPUT@'], + depends: pl_fused_gen_h +) + +pipeline_inc = include_directories('.') + +# TODO: Consider making a static_library +# pipeline_lib = static_library('pipeline', +# sources: [ +# pl_fused_gen_c, +# pipeline_sources, +# pipeline_node_sources +# ], +# include_directories: [public_include, internal_inc], +# dependencies: [ +# dpdk_dep +# ] +# ) + +pipeline_dep = declare_dependency( + include_directories: pipeline_inc, + sources: [pl_fused_gen_h], + # link_with: pipeline_lib +) diff --git a/src/pipeline/nodes/cross_connect/cross_connect.c b/src/pipeline/nodes/cross_connect/cross_connect.c index af62e1a2..cbde8925 100644 --- a/src/pipeline/nodes/cross_connect/cross_connect.c +++ b/src/pipeline/nodes/cross_connect/cross_connect.c @@ -1,7 +1,7 @@ /* * Cross-Connect * - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2014-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -21,12 +21,12 @@ #include "if_var.h" #include "main.h" #include "pipeline/nodes/pl_nodes_common.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_node.h" #include "urcu.h" #include "util.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" /* * conn_cfg_list are used to store @@ -109,12 +109,12 @@ cross_connect_unlink(struct ifnet *src_ifp, bool config) static void conn_update(const char *ifname1, const char *ifname2) { - struct ifnet *src_ifp = ifnet_byifname(ifname1); + struct ifnet *src_ifp = dp_ifnet_byifname(ifname1); bool insert = false; struct conn_session *session; if (src_ifp) - cross_connect_link(src_ifp, ifnet_byifname(ifname2), + cross_connect_link(src_ifp, dp_ifnet_byifname(ifname2), true); session = conn_session_byname(ifname1); @@ -150,7 +150,7 @@ int cross_connect_set(const XConnectConfig__CommandType cmd, const char *ifname1, const char *ifname2) { if (cmd == XCONNECT_CONFIG__COMMAND_TYPE__REMOVE) { - struct ifnet *src_ifp = ifnet_byifname(ifname1); + struct ifnet *src_ifp = dp_ifnet_byifname(ifname1); if (src_ifp) cross_connect_unlink(src_ifp, true); @@ -178,21 +178,21 @@ void cross_connect_rename(struct ifnet *ifp, const char *ifname) cds_list_for_each_entry_rcu(conn, &conn_cfg_list, conn_list) { if (strncmp(conn->local_if_name, ifname, IFNAMSIZ) == 0 || strncmp(conn->peer_if_name, ifname, IFNAMSIZ) == 0) { - struct ifnet *src_ifp = ifnet_byifname( + struct ifnet *src_ifp = dp_ifnet_byifname( conn->local_if_name); if (!src_ifp) continue; cross_connect_unlink(src_ifp, false); cross_connect_link(src_ifp, - ifnet_byifname(conn->peer_if_name), + dp_ifnet_byifname( + conn->peer_if_name), false); } } } -static void notify_cross_connect_new_link(struct ifnet *intf, - uint32_t idx __unused) +static void notify_cross_connect_new_link(struct ifnet *intf) { struct conn_session *conn; @@ -200,11 +200,12 @@ static void notify_cross_connect_new_link(struct ifnet *intf, if (strncmp(conn->local_if_name, intf->if_name, IFNAMSIZ) == 0) cross_connect_link(intf, - ifnet_byifname(conn->peer_if_name), + dp_ifnet_byifname( + conn->peer_if_name), true); if (strncmp(conn->peer_if_name, intf->if_name, IFNAMSIZ) == 0) { - struct ifnet *src_ifp = ifnet_byifname( + struct ifnet *src_ifp = dp_ifnet_byifname( conn->local_if_name); if (src_ifp) cross_connect_link(src_ifp, intf, false); @@ -223,7 +224,7 @@ static void conn_del_if(struct ifnet *ifp, void *arg) static void notify_cross_connect_del_link(struct ifnet *intf, uint32_t idx __unused) { - ifnet_walk(conn_del_if, intf); + dp_ifnet_walk(conn_del_if, intf); } static const struct dp_event_ops cross_connect_events = { diff --git a/src/pipeline/nodes/cross_connect/l2_cross_connect_cmd.c b/src/pipeline/nodes/cross_connect/l2_cross_connect_cmd.c index fd629f67..c608443a 100644 --- a/src/pipeline/nodes/cross_connect/l2_cross_connect_cmd.c +++ b/src/pipeline/nodes/cross_connect/l2_cross_connect_cmd.c @@ -2,7 +2,7 @@ * l2_cross_connect_cmd.c * * - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -36,13 +36,13 @@ static void conn_show_session(void *s, void *arg) session->local_if_name); jsonw_string_field(wr, "peer_ifname", session->peer_if_name); - ifp = ifnet_byifname(session->local_if_name); + ifp = dp_ifnet_byifname(session->local_if_name); jsonw_uint_field(wr, "local_ifindex", ifp ? ifp->if_index : 0); peer_ifp = ifp ? rcu_dereference(ifp->if_xconnect) : NULL; jsonw_uint_field(wr, "configured_peer_ifindex", peer_ifp ? peer_ifp->if_index : 0); - ifp = ifnet_byifname(session->peer_if_name); + ifp = dp_ifnet_byifname(session->peer_if_name); jsonw_uint_field(wr, "peer_ifindex", ifp ? ifp->if_index : 0); @@ -98,7 +98,7 @@ static int cmd_xconnect(struct pl_command *cmd) if (cmd->argc == 1) conn_session_walk(conn_show_session, wr); else { - struct ifnet *ifp = ifnet_byifname(cmd->argv[1]); + struct ifnet *ifp = dp_ifnet_byifname(cmd->argv[1]); if (ifp && ifp->if_softc) { struct l2tp_softc *sc = ifp->if_softc; conn_show_session(sc->sclp_session, wr); @@ -108,7 +108,8 @@ static int cmd_xconnect(struct pl_command *cmd) jsonw_destroy(&wr); return 0; - } else if (strcmp(cmd->argv[0], "clear") == 0) { + } + if (strcmp(cmd->argv[0], "clear") == 0) { if (cmd->argc == 1) l2tp_init_stats(NULL); else diff --git a/src/pipeline/nodes/cross_connect/l2_cross_connect_node.c b/src/pipeline/nodes/cross_connect/l2_cross_connect_node.c index 19f6d579..a1d6eac9 100644 --- a/src/pipeline/nodes/cross_connect/l2_cross_connect_node.c +++ b/src/pipeline/nodes/cross_connect/l2_cross_connect_node.c @@ -1,7 +1,7 @@ /* * l2_cross_connect_node.c * - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -18,7 +18,7 @@ #include "urcu.h" ALWAYS_INLINE unsigned int -cross_connect_process(struct pl_packet *pkt) +cross_connect_process(struct pl_packet *pkt, void *context __unused) { struct ifnet *ifp = pkt->in_ifp; struct rte_mbuf *m = pkt->mbuf; @@ -37,8 +37,7 @@ cross_connect_process(struct pl_packet *pkt) if (out_ifp->if_type == IFT_L2TPETH) { if (ifp->if_parent) pktmbuf_convert_rx_to_tx_vlan(m); - l2tp_output(out_ifp, m, - ifp->if_parent ? ifp->if_vlan : 0); + l2tp_output(out_ifp, m); } else { pkt->l2_proto = ETH_P_TEB; pkt->out_ifp = out_ifp; diff --git a/src/pipeline/nodes/ipv4_drop.c b/src/pipeline/nodes/ipv4_drop.c new file mode 100644 index 00000000..736b41cb --- /dev/null +++ b/src/pipeline/nodes/ipv4_drop.c @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * SPDX-License-Identifier: LGPL-2.1-only + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "compiler.h" +#include "ether.h" +#include "if_var.h" +#include "if/macvlan.h" +#include "main.h" +#include "pktmbuf_internal.h" +#include "pl_common.h" +#include "pl_fused.h" +#include "pl_node.h" +#include "pl_nodes_common.h" +#include "util.h" + +struct pl_node; + +/* + * Ipv4 drop feature instance is global, so we can store it in a global var. + */ +uint16_t ipv4_drop_features; + +static inline struct pl_node *ipv4_drop_feat_list_to_node(void) +{ + /* our imaginary node */ + return (struct pl_node *)&ipv4_drop_features; +} + +static inline uint16_t * +drop_node_to_ipv4_drop_feat_list(struct pl_node *node) +{ + /* the node is a fiction of our imagination */ + return (uint16_t *)node; +} + +ALWAYS_INLINE unsigned int +ipv4_drop_process_common(struct pl_packet *pkt, void *context __unused, + enum pl_mode mode) +{ + /* + * As this is a feature run once it is decided that the packet is + * to be dropped the features can not change that decision. It will + * still be dropped. The feature return value can not change that + * so don't check it. + */ + switch (mode) { + case PL_MODE_FUSED: + pipeline_fused_ipv4_drop_features( + pkt, ipv4_drop_feat_list_to_node()); + break; + case PL_MODE_FUSED_NO_DYN_FEATS: + pipeline_fused_ipv4_drop_no_dyn_features( + pkt, ipv4_drop_feat_list_to_node()); + break; + case PL_MODE_REGULAR: + pl_node_invoke_enabled_features( + ipv4_drop_node_ptr, + ipv4_drop_feat_list_to_node(), + pkt); + break; + } + + if (pkt->in_ifp) + IPSTAT_INC_IFP(pkt->in_ifp, IPSTATS_MIB_INDISCARDS); + + rte_pktmbuf_free(pkt->mbuf); + pkt->mbuf = NULL; + + return IPV4_DROP_ACCEPT; +} + +ALWAYS_INLINE unsigned int +ipv4_drop_process(struct pl_packet *p, void *context) +{ + return ipv4_drop_process_common(p, context, PL_MODE_REGULAR); +} + +static int +ipv4_drop_feat_change(struct pl_node *node, + struct pl_feature_registration *feat, + enum pl_node_feat_action action) +{ + uint16_t *feature_list = drop_node_to_ipv4_drop_feat_list(node); + + return pl_node_feat_change_u16(feature_list, feat, action); +} + +ALWAYS_INLINE bool +ipv4_drop_feat_iterate(struct pl_node *node, bool first, + unsigned int *feature_id, void **context, + void **storage_ctx __unused) +{ + uint16_t *feature_list = drop_node_to_ipv4_drop_feat_list(node); + + /* No support for instance context at the moment */ + return pl_node_feat_iterate_u16(feature_list, first, + feature_id, context); +} + +static struct pl_node * +ipv4_drop_node_lookup(const char *name) +{ + if (strcmp(name, "all") == 0) + return ipv4_drop_feat_list_to_node(); + + return NULL; +} + +/* Register Node */ +PL_REGISTER_NODE(ipv4_drop_node) = { + .name = "vyatta:ipv4-drop", + .type = PL_PROC, + .handler = ipv4_drop_process, + .feat_change = ipv4_drop_feat_change, + .feat_iterate = ipv4_drop_feat_iterate, + .lookup_by_name = ipv4_drop_node_lookup, + .num_next = IPV4_DROP_NUM, + .next = { + [IPV4_DROP_ACCEPT] = "term-finish", + } +}; + +struct pl_node_registration *const ipv4_drop_node_ptr = + &ipv4_drop_node; + +/* + * show features ipv4_drop + */ +static int cmd_pl_show_feat_ipv4_drop(struct pl_command *cmd) +{ + json_writer_t *wr; + + wr = jsonw_new(cmd->fp); + if (!wr) + return 0; + + jsonw_name(wr, "features"); + jsonw_start_object(wr); + + jsonw_name(wr, "global"); + jsonw_start_array(wr); + pl_node_iter_features(ipv4_drop_node_ptr, &ipv4_drop_features, + pl_print_feats, wr); + jsonw_end_array(wr); + + jsonw_end_object(wr); + jsonw_destroy(&wr); + return 0; +} + +PL_REGISTER_OPCMD(pl_show_feat_ipv4_drop) = { + .cmd = "show features ipv4_drop", + .handler = cmd_pl_show_feat_ipv4_drop, +}; diff --git a/src/pipeline/nodes/ipv6_drop.c b/src/pipeline/nodes/ipv6_drop.c new file mode 100644 index 00000000..8b3b4bad --- /dev/null +++ b/src/pipeline/nodes/ipv6_drop.c @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * SPDX-License-Identifier: LGPL-2.1-only + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "compiler.h" +#include "ether.h" +#include "if_var.h" +#include "if/macvlan.h" +#include "main.h" +#include "pktmbuf_internal.h" +#include "pl_common.h" +#include "pl_fused.h" +#include "pl_node.h" +#include "pl_nodes_common.h" +#include "util.h" + +struct pl_node; + +/* + * Ipv6 drop feature instance is global, so we can store it in a global var. + */ +uint16_t ipv6_drop_features; + +static inline struct pl_node *ipv6_drop_feat_list_to_node(void) +{ + /* our imaginary node */ + return (struct pl_node *)&ipv6_drop_features; +} + +static inline uint16_t * +drop_node_to_ipv6_drop_feat_list(struct pl_node *node) +{ + /* the node is a fiction of our imagination */ + return (uint16_t *)node; +} + +ALWAYS_INLINE unsigned int +ipv6_drop_process_common(struct pl_packet *pkt, void *context __unused, + enum pl_mode mode) +{ + /* + * As this is a feature run once it is decided that the packet is + * to be dropped the features can not change that decision. It will + * still be dropped. The feature return value can not change that + * so don't check it. + */ + switch (mode) { + case PL_MODE_FUSED: + pipeline_fused_ipv6_drop_features( + pkt, ipv6_drop_feat_list_to_node()); + break; + case PL_MODE_FUSED_NO_DYN_FEATS: + pipeline_fused_ipv6_drop_no_dyn_features( + pkt, ipv6_drop_feat_list_to_node()); + break; + case PL_MODE_REGULAR: + pl_node_invoke_enabled_features( + ipv6_drop_node_ptr, + ipv6_drop_feat_list_to_node(), + pkt); + break; + } + + if (pkt->in_ifp) + IP6STAT_INC_IFP(pkt->in_ifp, IPSTATS_MIB_INDISCARDS); + + rte_pktmbuf_free(pkt->mbuf); + pkt->mbuf = NULL; + + return IPV6_DROP_ACCEPT; +} + +ALWAYS_INLINE unsigned int +ipv6_drop_process(struct pl_packet *p, void *context) +{ + return ipv6_drop_process_common(p, context, PL_MODE_REGULAR); +} + +static int +ipv6_drop_feat_change(struct pl_node *node, + struct pl_feature_registration *feat, + enum pl_node_feat_action action) +{ + uint16_t *feature_list = drop_node_to_ipv6_drop_feat_list(node); + + return pl_node_feat_change_u16(feature_list, feat, action); +} + +ALWAYS_INLINE bool +ipv6_drop_feat_iterate(struct pl_node *node, bool first, + unsigned int *feature_id, void **context, + void **storage_ctx __unused) +{ + uint16_t *feature_list = drop_node_to_ipv6_drop_feat_list(node); + + /* No support for instance context at the moment */ + return pl_node_feat_iterate_u16(feature_list, first, + feature_id, context); +} + +static struct pl_node * +ipv6_drop_node_lookup(const char *name) +{ + if (strcmp(name, "all") == 0) + return ipv6_drop_feat_list_to_node(); + + return NULL; +} + +/* Register Node */ +PL_REGISTER_NODE(ipv6_drop_node) = { + .name = "vyatta:ipv6-drop", + .type = PL_PROC, + .handler = ipv6_drop_process, + .feat_change = ipv6_drop_feat_change, + .feat_iterate = ipv6_drop_feat_iterate, + .lookup_by_name = ipv6_drop_node_lookup, + .num_next = IPV6_DROP_NUM, + .next = { + [IPV6_DROP_ACCEPT] = "term-finish", + } +}; + +struct pl_node_registration *const ipv6_drop_node_ptr = + &ipv6_drop_node; + +/* + * show features ipv6_drop + */ +static int cmd_pl_show_feat_ipv6_drop(struct pl_command *cmd) +{ + json_writer_t *wr; + + wr = jsonw_new(cmd->fp); + if (!wr) + return 0; + + jsonw_name(wr, "features"); + jsonw_start_object(wr); + + jsonw_name(wr, "global"); + jsonw_start_array(wr); + pl_node_iter_features(ipv6_drop_node_ptr, &ipv6_drop_features, + pl_print_feats, wr); + jsonw_end_array(wr); + + jsonw_end_object(wr); + jsonw_destroy(&wr); + return 0; +} + +PL_REGISTER_OPCMD(pl_show_feat_ipv6_drop) = { + .cmd = "show features ipv6_drop", + .handler = cmd_pl_show_feat_ipv6_drop, +}; diff --git a/src/pipeline/nodes/l2_bridge_in.c b/src/pipeline/nodes/l2_bridge_in.c index 9427f894..df0bbd4a 100644 --- a/src/pipeline/nodes/l2_bridge_in.c +++ b/src/pipeline/nodes/l2_bridge_in.c @@ -1,7 +1,7 @@ /* * l2_bridge_in.c * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -13,23 +13,23 @@ #include #include "compiler.h" -#include "bridge.h" -#include "bridge_port.h" +#include "if/bridge/bridge.h" +#include "if/bridge/bridge_port.h" #include "if_var.h" #include "pl_common.h" #include "pl_fused.h" #include "urcu.h" static inline bool -bridge_has_vlan_filter(struct ifnet *master) +bridge_has_vlan_filter(struct ifnet *bridge) { - struct bridge_softc *sc = master->if_softc; + struct bridge_softc *sc = bridge->if_softc; return sc->scbr_vlan_filter; } ALWAYS_INLINE unsigned int -bridge_in_process(struct pl_packet *pkt) +bridge_in_process(struct pl_packet *pkt, void *context __unused) { struct ifnet *ifp = pkt->in_ifp; struct rte_mbuf *m = pkt->mbuf; diff --git a/src/pipeline/nodes/l2_capture.c b/src/pipeline/nodes/l2_capture.c index cd9c4a1a..d041cf8b 100644 --- a/src/pipeline/nodes/l2_capture.c +++ b/src/pipeline/nodes/l2_capture.c @@ -1,7 +1,7 @@ /* * l2_capture.c * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -13,7 +13,7 @@ #include "pl_fused.h" ALWAYS_INLINE unsigned int -capture_in_process(struct pl_packet *pkt) +capture_in_process(struct pl_packet *pkt, void *context __unused) { struct rte_mbuf *m = pkt->mbuf; @@ -23,6 +23,17 @@ capture_in_process(struct pl_packet *pkt) return CAPTURE_IN_ACCEPT; } +ALWAYS_INLINE unsigned int +capture_out_process(struct pl_packet *pkt, void *context __unused) +{ + struct rte_mbuf *m = pkt->mbuf; + + if (capture_if_use_common_cap_points(pkt->out_ifp)) + capture_burst(pkt->out_ifp, &m, 1); + + return CAPTURE_IN_ACCEPT; +} + /* Register Node */ PL_REGISTER_NODE(capture_in_node) = { .name = "vyatta:capture-in", @@ -41,3 +52,22 @@ PL_REGISTER_FEATURE(capture_ether_in_feat) = { .id = PL_ETHER_LOOKUP_FUSED_FEAT_CAPTURE, .visit_after = "sw-vlan-in", }; + +/* Register Node */ +PL_REGISTER_NODE(capture_out_node) = { + .name = "vyatta:capture-out", + .type = PL_PROC, + .handler = capture_out_process, + .num_next = CAPTURE_OUT_NUM, + .next = { + [CAPTURE_OUT_ACCEPT] = "term-noop", + } +}; + +PL_REGISTER_FEATURE(capture_l2_output_feat) = { + .name = "vyatta:capture-l2-output", + .node_name = "capture-out", + .feature_point = "l2-output", + .id = PL_L2_OUTPUT_FUSED_FEAT_CAPTURE_OUT, + .visit_after = "portmonitor-out", +}; diff --git a/src/pipeline/nodes/l2_consume.c b/src/pipeline/nodes/l2_consume.c new file mode 100644 index 00000000..939d1cc6 --- /dev/null +++ b/src/pipeline/nodes/l2_consume.c @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * SPDX-License-Identifier: LGPL-2.1-only + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "compiler.h" +#include "ether.h" +#include "if_var.h" +#include "if/macvlan.h" +#include "main.h" +#include "pktmbuf_internal.h" +#include "pl_common.h" +#include "pl_fused.h" +#include "pl_node.h" +#include "pl_nodes_common.h" +#include "util.h" + +struct pl_node; + +/* + * Term drop feature instance is global, so we can store it in a global var. + */ +uint16_t l2_consume_features; + +static inline struct pl_node *l2_consume_feat_list_to_node(void) +{ + /* our imaginary node */ + return (struct pl_node *)&l2_consume_features; +} + +static inline uint16_t * +drop_node_to_l2_consume_feat_list(struct pl_node *node) +{ + /* the node is a fiction of our imagination */ + return (uint16_t *)node; +} + +ALWAYS_INLINE unsigned int +l2_consume_process_common(struct pl_packet *pkt, void *context __unused, + enum pl_mode mode) +{ + switch (mode) { + case PL_MODE_FUSED: + if (!pipeline_fused_l2_consume_features( + pkt, l2_consume_feat_list_to_node())) + return L2_CONSUME_FINISH; + break; + case PL_MODE_FUSED_NO_DYN_FEATS: + if (!pipeline_fused_l2_consume_no_dyn_features( + pkt, l2_consume_feat_list_to_node())) + return L2_CONSUME_FINISH; + break; + case PL_MODE_REGULAR: + if (!pl_node_invoke_enabled_features( + l2_consume_node_ptr, + l2_consume_feat_list_to_node(), + pkt)) + return L2_CONSUME_FINISH; + break; + } + + return L2_CONSUME_ACCEPT; +} + +ALWAYS_INLINE unsigned int +l2_consume_process(struct pl_packet *p, void *context) +{ + return l2_consume_process_common(p, context, PL_MODE_REGULAR); +} + +static int +l2_consume_feat_change(struct pl_node *node, + struct pl_feature_registration *feat, + enum pl_node_feat_action action) +{ + uint16_t *feature_list = drop_node_to_l2_consume_feat_list(node); + + return pl_node_feat_change_u16(feature_list, feat, action); +} + +ALWAYS_INLINE bool +l2_consume_feat_iterate(struct pl_node *node, bool first, + unsigned int *feature_id, void **context, + void **storage_ctx __unused) +{ + uint16_t *feature_list = drop_node_to_l2_consume_feat_list(node); + + /* No support for instance context at the moment */ + return pl_node_feat_iterate_u16(feature_list, first, + feature_id, context); +} + +static struct pl_node * +l2_consume_node_lookup(const char *name) +{ + if (strcmp(name, "all") == 0) + return l2_consume_feat_list_to_node(); + + return NULL; +} + +/* Register Node */ +PL_REGISTER_NODE(l2_consume_node) = { + .name = "vyatta:l2-consume", + .type = PL_PROC, + .handler = l2_consume_process, + .feat_change = l2_consume_feat_change, + .feat_iterate = l2_consume_feat_iterate, + .lookup_by_name = l2_consume_node_lookup, + .num_next = L2_CONSUME_NUM, + .next = { + [L2_CONSUME_ACCEPT] = "term-noop", + [L2_CONSUME_FINISH] = "term-finish", + } +}; + +struct pl_node_registration *const l2_consume_node_ptr = + &l2_consume_node; + +/* + * show features l2_consume + */ +static int cmd_pl_show_feat_l2_consume(struct pl_command *cmd) +{ + json_writer_t *wr; + + wr = jsonw_new(cmd->fp); + if (!wr) + return 0; + + jsonw_name(wr, "features"); + jsonw_start_object(wr); + + jsonw_name(wr, "global"); + jsonw_start_array(wr); + pl_node_iter_features(l2_consume_node_ptr, &l2_consume_features, + pl_print_feats, wr); + jsonw_end_array(wr); + + jsonw_end_object(wr); + jsonw_destroy(&wr); + return 0; +} + +PL_REGISTER_OPCMD(pl_show_feat_l2_consume) = { + .cmd = "show features l2_consume", + .handler = cmd_pl_show_feat_l2_consume, +}; diff --git a/src/pipeline/nodes/l2_ether_forward.c b/src/pipeline/nodes/l2_ether_forward.c index f17fabaf..2a4f0186 100644 --- a/src/pipeline/nodes/l2_ether_forward.c +++ b/src/pipeline/nodes/l2_ether_forward.c @@ -1,7 +1,7 @@ /* * l2_ether_in.c * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -28,28 +28,30 @@ #include "pl_fused.h" ALWAYS_INLINE unsigned int -ether_forward_process(struct pl_packet *pkt) +ether_forward_process(struct pl_packet *pkt, void *context __unused) { uint16_t et = ethhdr(pkt->mbuf)->ether_type; - if (likely(et == htons(ETHER_TYPE_IPv4))) + if (likely(et == htons(RTE_ETHER_TYPE_IPV4))) return ETHER_FORWARD_V4_ACCEPT; - if (likely(et == htons(ETHER_TYPE_IPv6))) + if (likely(et == htons(RTE_ETHER_TYPE_IPV6))) return ETHER_FORWARD_V6_ACCEPT; - if (et == htons(ETHER_TYPE_ARP)) + if (et == htons(RTE_ETHER_TYPE_ARP)) return ETHER_FORWARD_ARP_ACCEPT; - else if (et == htons(ETH_P_MPLS_UC)) + if (et == htons(ETH_P_MPLS_UC)) mpls_labeled_input(pkt->in_ifp, pkt->mbuf); else if (et == htons(ETH_P_PPP_DISC) || et == htons(ETH_P_PPP_SES)) return ETHER_FORWARD_PPPOE_ACCEPT; + else if (et == htons(ETH_P_SLOW)) + return ETHER_FORWARD_LOCAL; else if (unlikely(et != htons(ETH_P_LLDP))) { /* Assume 802.2 is used for IEEE control protocols */ if (unlikely(ntohs(et) > ETH_P_802_3_MIN)) { /* Drop unknown protocols */ if_incr_unknown(pkt->in_ifp); return ETHER_FORWARD_DROP; - } else - return ETHER_FORWARD_LOCAL; + } + return ETHER_FORWARD_LOCAL; } else { /* always LLDP packets through to kernel*/ return ETHER_FORWARD_LOCAL; @@ -62,9 +64,7 @@ ether_forward_process(struct pl_packet *pkt) PL_REGISTER_NODE(ether_forward_node) = { .name = "vyatta:ether-forward", .type = PL_PROC, - .init = NULL, .handler = ether_forward_process, - .disable = false, .num_next = ETHER_FORWARD_NUM, .next = { [ETHER_FORWARD_V4_ACCEPT] = "ipv4-validate", diff --git a/src/pipeline/nodes/l2_ether_in.c b/src/pipeline/nodes/l2_ether_in.c index 2301d352..66fadb0e 100644 --- a/src/pipeline/nodes/l2_ether_in.c +++ b/src/pipeline/nodes/l2_ether_in.c @@ -1,7 +1,7 @@ /* * l2_ether_in.c * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -13,17 +13,17 @@ #include #include "compiler.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" ALWAYS_INLINE unsigned int -ether_in_process(struct pl_packet *pkt) +ether_in_process(struct pl_packet *pkt, void *context __unused) { struct rte_mbuf *mbuf = pkt->mbuf; mbuf->tx_offload = 0; - pktmbuf_l2_len(mbuf) = ETHER_HDR_LEN; + dp_pktmbuf_l2_len(mbuf) = RTE_ETHER_HDR_LEN; return ETHER_IN_ACCEPT; } @@ -32,9 +32,7 @@ ether_in_process(struct pl_packet *pkt) PL_REGISTER_NODE(ether_in_node) = { .name = "vyatta:ether-in", .type = PL_PROC, - .init = NULL, .handler = ether_in_process, - .disable = false, .num_next = ETHER_IN_NUM, .next = { [ETHER_IN_ACCEPT] = "ether-lookup", diff --git a/src/pipeline/nodes/l2_ether_lookup.c b/src/pipeline/nodes/l2_ether_lookup.c index 3e7ad599..2c5ad766 100644 --- a/src/pipeline/nodes/l2_ether_lookup.c +++ b/src/pipeline/nodes/l2_ether_lookup.c @@ -1,7 +1,7 @@ /* * l2_ether_lookup.c * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -19,9 +19,9 @@ #include "compiler.h" #include "ether.h" #include "if_var.h" -#include "macvlan.h" +#include "if/macvlan.h" #include "main.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" #include "pl_node.h" @@ -68,7 +68,7 @@ static struct ifnet *vlan_lookup(struct ifnet *ifp, ifstat = &ifp->if_data[lcore_id]; ++ifstat->ifi_ivlan; - vid = vid_from_pkt(m, ETHER_TYPE_VLAN); + vid = vid_from_pkt(m, RTE_ETHER_TYPE_VLAN); ifp = if_vlan_lookup(ifp, vid); if (!ifp) @@ -78,7 +78,7 @@ static struct ifnet *vlan_lookup(struct ifnet *ifp, goto drop; if_incr_in(ifp, m); - vid_decap(m, ETHER_TYPE_VLAN); + vid_decap(m, RTE_ETHER_TYPE_VLAN); } return ifp; @@ -97,11 +97,12 @@ no_vlan: __cold_label; * dp0portx [vlan ] [macvlan] */ ALWAYS_INLINE unsigned int -ether_lookup_process_common(struct pl_packet *pkt, enum pl_mode mode) +ether_lookup_process_common(struct pl_packet *pkt, void *context __unused, + enum pl_mode mode) { struct rte_mbuf *m = pkt->mbuf; struct ifnet *ifp = pkt->in_ifp; - const struct ether_hdr *eth; + const struct rte_ether_hdr *eth; struct if_data *ifstat; switch (mode) { @@ -125,13 +126,13 @@ ether_lookup_process_common(struct pl_packet *pkt, enum pl_mode mode) } eth = ethhdr(m); - if (unlikely(is_multicast_ether_addr(ð->d_addr))) { + if (unlikely(rte_is_multicast_ether_addr(ð->d_addr))) { ifstat = &ifp->if_data[dp_lcore_id()]; ifstat->ifi_imulticast++; macvlan_flood(ifp, m); - if (is_broadcast_ether_addr(ð->d_addr)) { + if (rte_is_broadcast_ether_addr(ð->d_addr)) { pkt->l2_pkt_type = L2_PKT_BROADCAST; pkt_mbuf_set_l2_traffic_type(pkt->mbuf, L2_PKT_BROADCAST); @@ -143,7 +144,8 @@ ether_lookup_process_common(struct pl_packet *pkt, enum pl_mode mode) } else { pkt->l2_pkt_type = L2_PKT_UNICAST; - if (unlikely(!ether_addr_equal(&ifp->eth_addr, ð->d_addr)) && + if (unlikely(!rte_ether_addr_equal(&ifp->eth_addr, + ð->d_addr)) && (!(m->ol_flags & PKT_RX_VLAN) || !ifp->if_vlantbl)) { struct ifnet *macvlan_ifp; @@ -189,9 +191,9 @@ no_address: __cold_label; } ALWAYS_INLINE unsigned int -ether_lookup_process(struct pl_packet *p) +ether_lookup_process(struct pl_packet *p, void *context) { - return ether_lookup_process_common(p, PL_MODE_REGULAR); + return ether_lookup_process_common(p, context, PL_MODE_REGULAR); } static int @@ -204,20 +206,37 @@ ether_lookup_feat_change(struct pl_node *node, return pl_node_feat_change_u16(&ifp->ether_in_features, feat, action); } +static int +ether_lookup_feat_change_all(struct pl_feature_registration *feat, + enum pl_node_feat_action action) +{ + return if_node_instance_feat_change_all(feat, action, + ether_lookup_feat_change); +} + ALWAYS_INLINE bool ether_lookup_feat_iterate(struct pl_node *node, bool first, - unsigned int *feature_id, void **context) + unsigned int *feature_id, void **context, + void **storage_ctx) { + bool ret; struct ifnet *ifp = ether_lookup_node_to_ifp(node); - return pl_node_feat_iterate_u16(&ifp->ether_in_features, first, - feature_id, context); + ret = pl_node_feat_iterate_u16(&ifp->ether_in_features, first, + feature_id, context); + if (ret) + *storage_ctx = if_node_instance_get_storage_internal( + ifp, + PL_FEATURE_POINT_ETHER_LOOKUP_ID, + *feature_id); + + return ret; } static struct pl_node * ether_lookup_node_lookup(const char *name) { - struct ifnet *ifp = ifnet_byifname(name); + struct ifnet *ifp = dp_ifnet_byifname(name); return ifp ? ifp_to_ether_lookup_node(ifp) : NULL; } @@ -227,8 +246,13 @@ PL_REGISTER_NODE(ether_lookup_node) = { .type = PL_PROC, .handler = ether_lookup_process, .feat_change = ether_lookup_feat_change, + .feat_change_all = ether_lookup_feat_change_all, .feat_iterate = ether_lookup_feat_iterate, .lookup_by_name = ether_lookup_node_lookup, + .feat_reg_context = if_node_instance_register_storage, + .feat_unreg_context = if_node_instance_unregister_storage, + .feat_get_context = if_node_instance_get_storage, + .feat_setup_cleanup_cb = if_node_instance_set_cleanup_cb, .num_next = ETHER_LOOKUP_NUM, .next = { [ETHER_LOOKUP_ACCEPT] = "ether-forward", @@ -239,3 +263,16 @@ PL_REGISTER_NODE(ether_lookup_node) = { struct pl_node_registration *const ether_lookup_node_ptr = ðer_lookup_node; + +/* + * show features ether_lookup [interface ] + */ +static int cmd_pl_show_feat_ether_lookup(struct pl_command *cmd) +{ + return if_node_instance_feat_print(cmd, ether_lookup_node_ptr); +} + +PL_REGISTER_OPCMD(pl_show_feat_ether_lookup) = { + .cmd = "show features ether_lookup", + .handler = cmd_pl_show_feat_ether_lookup, +}; diff --git a/src/pipeline/nodes/l2_hw_hdr.c b/src/pipeline/nodes/l2_hw_hdr.c index 4c4a13ee..0dbd9398 100644 --- a/src/pipeline/nodes/l2_hw_hdr.c +++ b/src/pipeline/nodes/l2_hw_hdr.c @@ -1,7 +1,7 @@ /* * l2_hw_hdr.c * - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -11,6 +11,7 @@ #include "ether.h" #include "fal_plugin.h" +#include "capture.h" #include "compiler.h" #include "main.h" #include "pl_common.h" @@ -31,7 +32,8 @@ static int (*l2_hw_hdr_rx_feat_framer)(struct rte_mbuf *buf, static bool (*l2_hw_hdr_rx_framer)(struct rte_mbuf *buf, uint16_t *dpdk_port); -int __externally_visible +__FOR_EXPORT +int fal_rx_bp_framer_enable(bool enable, uint32_t bp_port, bool shared_channel, uint16_t ether_proto, int (*feat_framer)(struct rte_mbuf *buf, @@ -84,7 +86,8 @@ int default_feat_framer(struct rte_mbuf *mbuf, uint16_t *dpdk_port, /* TODO Deprecate/Remove when plugins start using the new API * fal_rx_bp_framer_enable */ -bool __externally_visible +__FOR_EXPORT +bool l2_hw_hdr_rx_enable(bool enable, uint32_t bp_port, bool shared_channel, uint16_t ether_proto, bool (*framer)(struct rte_mbuf *buf, @@ -127,7 +130,8 @@ l2_hw_hdr_rx_process(struct pl_packet *pkt) rc = l2_hw_hdr_rx_feat_framer(pkt->mbuf, &dpdk_port, &feat_info); - if (rc != FAL_RET_ETHER_INPUT) { + if ((rc != FAL_RET_ETHER_INPUT) && + (rc != FAL_RET_CAPTURE_HW_INPUT)) { feature_info = calloc(1, sizeof(*feature_info)); if (!feature_info) @@ -145,7 +149,8 @@ l2_hw_hdr_rx_process(struct pl_packet *pkt) if (!ifp) goto drop; - pkt->mbuf->port = dpdk_port; + if (!is_team(ifp)) + pkt->mbuf->port = dpdk_port; /* * Packet capture, monitor, and dispatch. Due to @@ -171,6 +176,19 @@ l2_hw_hdr_rx_process(struct pl_packet *pkt) pkt->mbuf->port = dpdk_port; pkt->in_ifp = ifp; return HW_HDR_IN_PORTMONITOR; + + case FAL_RET_CAPTURE_HW_INPUT: + ifp = ifnet_byport(dpdk_port); + + if (!ifp) + goto drop; + + buff->port = dpdk_port; + capture_hardware(ifp, buff); + return HW_HDR_IN_CONSUME; + + case FAL_RET_PLUGIN_CONSUMED: + return HW_HDR_IN_CONSUME; } drop: if_incr_dropped(pkt->in_ifp); @@ -178,7 +196,7 @@ l2_hw_hdr_rx_process(struct pl_packet *pkt) } ALWAYS_INLINE unsigned int -l2_hw_hdr_in_check_process(struct pl_packet *pkt) +l2_hw_hdr_in_check_process(struct pl_packet *pkt, void *context __unused) { struct rte_mbuf *m = pkt->mbuf; diff --git a/src/pipeline/nodes/l2_local.c b/src/pipeline/nodes/l2_local.c new file mode 100644 index 00000000..236ca6c0 --- /dev/null +++ b/src/pipeline/nodes/l2_local.c @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * SPDX-License-Identifier: LGPL-2.1-only + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "compiler.h" +#include "ether.h" +#include "if_var.h" +#include "if/macvlan.h" +#include "main.h" +#include "pktmbuf_internal.h" +#include "pl_common.h" +#include "pl_fused.h" +#include "pl_node.h" +#include "pl_nodes_common.h" +#include "util.h" + +struct pl_node; + +/* + * Term drop feature instance is global, so we can store it in a global var. + */ +uint16_t l2_local_features; + +static inline struct pl_node *l2_local_feat_list_to_node(void) +{ + /* our imaginary node */ + return (struct pl_node *)&l2_local_features; +} + +static inline uint16_t * +drop_node_to_l2_local_feat_list(struct pl_node *node) +{ + /* the node is a fiction of our imagination */ + return (uint16_t *)node; +} + +ALWAYS_INLINE unsigned int +l2_local_process_common(struct pl_packet *pkt, void *context __unused, + enum pl_mode mode) +{ + switch (mode) { + case PL_MODE_FUSED: + if (!pipeline_fused_l2_local_features( + pkt, l2_local_feat_list_to_node())) + return L2_LOCAL_FINISH; + break; + case PL_MODE_FUSED_NO_DYN_FEATS: + if (!pipeline_fused_l2_local_no_dyn_features( + pkt, l2_local_feat_list_to_node())) + return L2_LOCAL_FINISH; + break; + case PL_MODE_REGULAR: + if (!pl_node_invoke_enabled_features( + l2_local_node_ptr, + l2_local_feat_list_to_node(), + pkt)) + return L2_LOCAL_FINISH; + break; + } + + local_packet_internal(pkt->in_ifp, pkt->mbuf); + return L2_LOCAL_ACCEPT; +} + +ALWAYS_INLINE unsigned int +l2_local_process(struct pl_packet *p, void *context) +{ + return l2_local_process_common(p, context, PL_MODE_REGULAR); +} + +static int +l2_local_feat_change(struct pl_node *node, + struct pl_feature_registration *feat, + enum pl_node_feat_action action) +{ + uint16_t *feature_list = drop_node_to_l2_local_feat_list(node); + + return pl_node_feat_change_u16(feature_list, feat, action); +} + +ALWAYS_INLINE bool +l2_local_feat_iterate(struct pl_node *node, bool first, + unsigned int *feature_id, void **context, + void **storage_ctx __unused) +{ + uint16_t *feature_list = drop_node_to_l2_local_feat_list(node); + + /* No support for instance context at the moment */ + return pl_node_feat_iterate_u16(feature_list, first, + feature_id, context); +} + +static struct pl_node * +l2_local_node_lookup(const char *name) +{ + if (strcmp(name, "all") == 0) + return l2_local_feat_list_to_node(); + + return NULL; +} + +/* Register Node */ +PL_REGISTER_NODE(l2_local_node) = { + .name = "vyatta:l2-local", + .type = PL_PROC, + .handler = l2_local_process, + .feat_change = l2_local_feat_change, + .feat_iterate = l2_local_feat_iterate, + .lookup_by_name = l2_local_node_lookup, + .num_next = L2_LOCAL_NUM, + .next = { + [L2_LOCAL_ACCEPT] = "term-finish", + [L2_LOCAL_FINISH] = "term-finish", + } +}; + +struct pl_node_registration *const l2_local_node_ptr = + &l2_local_node; + +/* + * show features l2_local + */ +static int cmd_pl_show_feat_l2_local(struct pl_command *cmd) +{ + json_writer_t *wr; + + wr = jsonw_new(cmd->fp); + if (!wr) + return 0; + + jsonw_name(wr, "features"); + jsonw_start_object(wr); + + jsonw_name(wr, "global"); + jsonw_start_array(wr); + pl_node_iter_features(l2_local_node_ptr, &l2_local_features, + pl_print_feats, wr); + jsonw_end_array(wr); + + jsonw_end_object(wr); + jsonw_destroy(&wr); + return 0; +} + +PL_REGISTER_OPCMD(pl_show_feat_l2_local) = { + .cmd = "show features l2_local", + .handler = cmd_pl_show_feat_l2_local, +}; diff --git a/src/pipeline/nodes/l2_output.c b/src/pipeline/nodes/l2_output.c new file mode 100644 index 00000000..25e53d90 --- /dev/null +++ b/src/pipeline/nodes/l2_output.c @@ -0,0 +1,141 @@ +/* + * l2_output.c + * + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * SPDX-License-Identifier: LGPL-2.1-only + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "compiler.h" +#include "ether.h" +#include "if_var.h" +#include "if/macvlan.h" +#include "main.h" +#include "pktmbuf_internal.h" +#include "pl_common.h" +#include "pl_fused.h" +#include "pl_node.h" +#include "pl_nodes_common.h" +#include "util.h" + +struct pl_node; + +static inline struct pl_node *ifp_to_l2_output_node(struct ifnet *ifp) +{ + /* our imaginary node */ + return (struct pl_node *)ifp; +} + +static inline struct ifnet *l2_output_node_to_ifp(struct pl_node *node) +{ + /* the node is a fiction of our imagination */ + return (struct ifnet *)node; +} + +ALWAYS_INLINE unsigned int +l2_output_process_common(struct pl_packet *pkt, void *context __unused, + enum pl_mode mode) +{ + struct ifnet *out_ifp = pkt->out_ifp; + + switch (mode) { + case PL_MODE_FUSED: + if (!pipeline_fused_l2_output_features( + pkt, ifp_to_l2_output_node(out_ifp))) + return L2_OUTPUT_DROP; + break; + case PL_MODE_FUSED_NO_DYN_FEATS: + if (!pipeline_fused_l2_output_no_dyn_features( + pkt, ifp_to_l2_output_node(out_ifp))) + return L2_OUTPUT_DROP; + break; + case PL_MODE_REGULAR: + if (!pl_node_invoke_enabled_features( + l2_output_node_ptr, + ifp_to_l2_output_node(out_ifp), + pkt)) + return L2_OUTPUT_DROP; + break; + } + + return L2_OUTPUT_ACCEPT; +} + +ALWAYS_INLINE unsigned int +l2_output_process(struct pl_packet *p, void *context) +{ + return l2_output_process_common(p, context, PL_MODE_REGULAR); +} + +static int +l2_output_feat_change(struct pl_node *node, + struct pl_feature_registration *feat, + enum pl_node_feat_action action) +{ + struct ifnet *ifp = l2_output_node_to_ifp(node); + + return pl_node_feat_change_u16(&ifp->l2_output_features, feat, action); +} + +static int +l2_output_feat_change_all(struct pl_feature_registration *feat, + enum pl_node_feat_action action) +{ + return if_node_instance_feat_change_all(feat, action, + l2_output_feat_change); +} + +ALWAYS_INLINE bool +l2_output_feat_iterate(struct pl_node *node, bool first, + unsigned int *feature_id, void **context, + void **storage_ctx) +{ + bool ret; + struct ifnet *ifp = l2_output_node_to_ifp(node); + + ret = pl_node_feat_iterate_u16(&ifp->l2_output_features, first, + feature_id, context); + if (ret) + *storage_ctx = if_node_instance_get_storage_internal( + ifp, + PL_FEATURE_POINT_L2_OUTPUT_ID, + *feature_id); + + return ret; +} + +static struct pl_node * +l2_output_node_lookup(const char *name) +{ + struct ifnet *ifp = dp_ifnet_byifname(name); + return ifp ? ifp_to_l2_output_node(ifp) : NULL; +} + +/* Register Node */ +PL_REGISTER_NODE(l2_output_node) = { + .name = "vyatta:l2-output", + .type = PL_PROC, + .handler = l2_output_process, + .feat_change = l2_output_feat_change, + .feat_change_all = l2_output_feat_change_all, + .feat_iterate = l2_output_feat_iterate, + .lookup_by_name = l2_output_node_lookup, + .feat_reg_context = if_node_instance_register_storage, + .feat_unreg_context = if_node_instance_unregister_storage, + .feat_setup_cleanup_cb = if_node_instance_set_cleanup_cb, + .num_next = L2_OUTPUT_NUM, + .next = { + [L2_OUTPUT_ACCEPT] = "term-noop", + [L2_OUTPUT_DROP] = "term-drop", + } +}; + +struct pl_node_registration *const l2_output_node_ptr = + &l2_output_node; diff --git a/src/pipeline/nodes/l2_portmonitor.c b/src/pipeline/nodes/l2_portmonitor.c index fef3b882..c52720c0 100644 --- a/src/pipeline/nodes/l2_portmonitor.c +++ b/src/pipeline/nodes/l2_portmonitor.c @@ -1,7 +1,7 @@ /* * l2_portmonitor.c * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -10,13 +10,13 @@ #include #include "compiler.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" #include "portmonitor/portmonitor.h" ALWAYS_INLINE unsigned int -portmonitor_in_process(struct pl_packet *pkt) +portmonitor_in_process(struct pl_packet *pkt, void *context __unused) { struct rte_mbuf *m = pkt->mbuf; @@ -27,12 +27,27 @@ portmonitor_in_process(struct pl_packet *pkt) if (unlikely(m != pkt->mbuf)) { pkt->mbuf = m; - pkt->l3_hdr = pktmbuf_mtol3(m, void *); + pkt->l3_hdr = dp_pktmbuf_mtol3(m, void *); } return PORTMONITOR_IN_ACCEPT; } +ALWAYS_INLINE unsigned int +portmonitor_out_process(struct pl_packet *pkt, void *context __unused) +{ + struct rte_mbuf *m = pkt->mbuf; + + portmonitor_src_vif_tx_output(pkt->out_ifp, &m); + + if (unlikely(m != pkt->mbuf)) { + pkt->mbuf = m; + pkt->l3_hdr = dp_pktmbuf_mtol3(m, void *); + } + + return PORTMONITOR_OUT_ACCEPT; +} + /* Register Node */ PL_REGISTER_NODE(portmonitor_in_node) = { .name = "vyatta:portmonitor-in", @@ -52,3 +67,22 @@ PL_REGISTER_FEATURE(portmonitor_in_feat) = { .id = PL_ETHER_LOOKUP_FUSED_FEAT_PORTMONITOR, .visit_after = "capture-ether-in", }; + +/* Register Node */ +PL_REGISTER_NODE(portmonitor_out_node) = { + .name = "vyatta:portmonitor-out", + .type = PL_PROC, + .handler = portmonitor_out_process, + .num_next = PORTMONITOR_OUT_NUM, + .next = { + [PORTMONITOR_OUT_ACCEPT] = "term-noop", + } +}; + +PL_REGISTER_FEATURE(portmonitor_out_feat) = { + .name = "vyatta:portmonitor-out", + .node_name = "portmonitor-out", + .feature_point = "l2-output", + .id = PL_L2_OUTPUT_FUSED_FEAT_PORTMONITOR_OUT, + .visit_after = "vlan-modify-out", +}; diff --git a/src/pipeline/nodes/l2_portmonitor_hw.c b/src/pipeline/nodes/l2_portmonitor_hw.c index 90da6856..39357f90 100644 --- a/src/pipeline/nodes/l2_portmonitor_hw.c +++ b/src/pipeline/nodes/l2_portmonitor_hw.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -7,14 +7,14 @@ #include #include "compiler.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_node.h" #include "pl_fused.h" #include "fal.h" #include "portmonitor/portmonitor_hw.h" ALWAYS_INLINE unsigned int -portmonitor_hw_in_process(struct pl_packet *pkt) +portmonitor_hw_in_process(struct pl_packet *pkt, void *context __unused) { struct ifnet *ifp; struct rte_mbuf *m = pkt->mbuf; diff --git a/src/pipeline/nodes/l2_sw_vlan.c b/src/pipeline/nodes/l2_sw_vlan.c index 468677c5..d59a064d 100644 --- a/src/pipeline/nodes/l2_sw_vlan.c +++ b/src/pipeline/nodes/l2_sw_vlan.c @@ -2,7 +2,7 @@ * l2_sw_vlan.c * * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -19,7 +19,7 @@ #include "pl_fused.h" ALWAYS_INLINE unsigned int -sw_vlan_in_process(struct pl_packet *pkt) +sw_vlan_in_process(struct pl_packet *pkt, void *context __unused) { struct rte_mbuf *m = pkt->mbuf; struct ifnet *ifp = pkt->in_ifp; diff --git a/src/pipeline/nodes/l2_vlan_mod_ingress.c b/src/pipeline/nodes/l2_vlan_mod.c similarity index 50% rename from src/pipeline/nodes/l2_vlan_mod_ingress.c rename to src/pipeline/nodes/l2_vlan_mod.c index 2a7413fe..91470b7c 100644 --- a/src/pipeline/nodes/l2_vlan_mod_ingress.c +++ b/src/pipeline/nodes/l2_vlan_mod.c @@ -1,11 +1,12 @@ /* - * l2_vlan_mod_ingress.c + * l2_vlan_mod.c * * - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ +#include "compiler.h" #include "if_var.h" #include "ether.h" #include "util.h" @@ -53,8 +54,8 @@ static struct rte_mbuf *vlan_modify_ingress(struct ifnet *ifp, return ret; } -inline __attribute__((always_inline)) unsigned int -vlan_modify_in_check_process(struct pl_packet *pkt) +ALWAYS_INLINE unsigned int +vlan_modify_in_check_process(struct pl_packet *pkt, void *context __unused) { struct rte_mbuf *ret; @@ -66,6 +67,50 @@ vlan_modify_in_check_process(struct pl_packet *pkt) return VLAN_MOD_IN_DROP; } +static struct rte_mbuf * +vlan_modify_egress(struct ifnet *ifp, struct rte_mbuf *m) +{ + struct rte_mbuf *ret; + struct vlan_mod_ft_cls_action *action; + uint16_t vlan; + + vlan = vlan_mod_get_vlan(m, ifp, VLAN_MOD_DIR_EGRESS); + if (vlan == 0) + return m; + action = vlan_modify_get_action(ifp, vlan, VLAN_MOD_DIR_EGRESS); + if (!action) + return m; + + switch (action->data.vlan.action) { + case VLAN_MOD_FILTER_ACT_VLAN_POP: + ret = vlan_mod_tag_pop(ifp, &m, VLAN_MOD_DIR_EGRESS); + break; + case VLAN_MOD_FILTER_ACT_VLAN_PUSH: + ret = vlan_mod_tag_push(ifp, &m, action, VLAN_MOD_DIR_EGRESS); + break; + case VLAN_MOD_FILTER_ACT_VLAN_MOD: + ret = vlan_mod_tag_modify(ifp, &m, action, VLAN_MOD_DIR_EGRESS); + break; + default: + ret = NULL; + } + + return ret; +} + +ALWAYS_INLINE unsigned int +vlan_modify_out_check_process(struct pl_packet *pkt, void *context __unused) +{ + struct rte_mbuf *ret; + + ret = vlan_modify_egress(pkt->out_ifp, pkt->mbuf); + if (ret) { + pkt->mbuf = ret; + return VLAN_MOD_OUT_ACCEPT; + } + return VLAN_MOD_OUT_DROP; +} + /* Register Node */ PL_REGISTER_NODE(vlan_mod_in_node) = { .name = "vyatta:vlan-modify-in", @@ -85,3 +130,22 @@ PL_REGISTER_FEATURE(vlan_mod_in_feat) = { .id = PL_ETHER_LOOKUP_FUSED_FEAT_VLAN_MOD_INGRESS, .visit_after = "portmonitor-in", }; + +/* Register Node */ +PL_REGISTER_NODE(vlan_mod_out_node) = { + .name = "vyatta:vlan-modify-out", + .type = PL_PROC, + .handler = vlan_modify_out_check_process, + .num_next = VLAN_MOD_OUT_NUM, + .next = { + [VLAN_MOD_OUT_ACCEPT] = "term-noop", + [VLAN_MOD_OUT_DROP] = "term-drop", + } +}; + +PL_REGISTER_FEATURE(vlan_mod_out_feat) = { + .name = "vyatta:vlan-modify-out", + .node_name = "vlan-modify-out", + .feature_point = "l2-output", + .id = PL_L2_OUTPUT_FUSED_FEAT_VLAN_MOD_EGRESS, +}; diff --git a/src/pipeline/nodes/l3_acl.c b/src/pipeline/nodes/l3_acl.c index 9ef8d125..b4c739dd 100644 --- a/src/pipeline/nodes/l3_acl.c +++ b/src/pipeline/nodes/l3_acl.c @@ -1,7 +1,7 @@ /* * l3_acl.c * - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -19,10 +19,11 @@ #include "npf/npf.h" #include "npf/npf_if.h" #include "npf/npf_cache.h" +#include "npf/npf_rc.h" #include "npf/rproc/npf_ext_log.h" #include "npf/config/npf_config.h" #include "npf/config/npf_ruleset_type.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" #include "util.h" @@ -40,6 +41,8 @@ ip_acl_process_common(struct pl_packet *pkt, bool v4, int dir) struct ifnet *ifp; unsigned long bitmask; enum npf_ruleset_type rs_type; + npf_decision_t decision = NPF_DECISION_UNMATCHED; + int rc = NPF_RC_UNMATCHED; if (dir == PFIL_IN) { bitmask = NPF_ACL_IN; @@ -76,20 +79,25 @@ ip_acl_process_common(struct pl_packet *pkt, bool v4, int dir) struct rte_mbuf *m = pkt->mbuf; uint16_t const ethertype = - v4 ? htons(ETHER_TYPE_IPv4) : htons(ETHER_TYPE_IPv6); + v4 ? htons(RTE_ETHER_TYPE_IPV4) : htons(RTE_ETHER_TYPE_IPV6); npf_cache_init(&npc); - if (unlikely(!npf_cache_all(&npc, m, ethertype))) + rc = npf_cache_all(&npc, m, ethertype); + if (unlikely(rc < 0)) goto drop; /* Run the ruleset, get the decision */ npf_rule_t *rl = npf_ruleset_inspect(&npc, m, npf_ruleset, NULL, ifp, dir); - npf_decision_t decision = npf_rule_decision(rl); + decision = npf_rule_decision(rl); /* Optimise for specific drops, and implicit accept */ if (likely(decision == NPF_DECISION_UNMATCHED)) { accept: + /* Increment return code counter */ + npf_rc_inc(ifp, v4 ? NPF_RCT_ACL4 : NPF_RCT_ACL6, + PFIL2RC(dir), rc, decision); + if (dir == PFIL_IN) return v4 ? IPV4_ACL_IN_ACCEPT : IPV6_ACL_IN_ACCEPT; @@ -109,40 +117,57 @@ ip_acl_process_common(struct pl_packet *pkt, bool v4, int dir) if (unlikely(m != pkt->mbuf)) { pkt->mbuf = m; - pkt->l3_hdr = pktmbuf_mtol3(m, void *); + pkt->l3_hdr = dp_pktmbuf_mtol3(m, void *); } if (decision == NPF_DECISION_PASS) goto accept; + rc = NPF_RC_BLOCK; + drop: + /* Increment return code counter */ + npf_rc_inc(ifp, v4 ? NPF_RCT_ACL4 : NPF_RCT_ACL6, + PFIL2RC(dir), rc, NPF_DECISION_BLOCK); + if (dir == PFIL_IN) return v4 ? IPV4_ACL_IN_DROP : IPV6_ACL_IN_DROP; return v4 ? IPV4_ACL_OUT_DROP : IPV6_ACL_OUT_DROP; } - ALWAYS_INLINE unsigned int -ipv4_acl_process_in(struct pl_packet *pkt) +ipv4_acl_process_in(struct pl_packet *pkt, void *context __unused) { return ip_acl_process_common(pkt, V4_PKT, PFIL_IN); } ALWAYS_INLINE unsigned int -ipv6_acl_process_in(struct pl_packet *pkt) +ipv6_acl_process_in(struct pl_packet *pkt, void *context __unused) { return ip_acl_process_common(pkt, V6_PKT, PFIL_IN); } ALWAYS_INLINE unsigned int -ipv4_acl_process_out(struct pl_packet *pkt) +ipv4_acl_process_out(struct pl_packet *pkt, void *context __unused) +{ + return ip_acl_process_common(pkt, V4_PKT, PFIL_OUT); +} + +ALWAYS_INLINE unsigned int +ipv6_acl_process_out(struct pl_packet *pkt, void *context __unused) +{ + return ip_acl_process_common(pkt, V6_PKT, PFIL_OUT); +} + +ALWAYS_INLINE unsigned int +ipv4_acl_process_out_spath(struct pl_packet *pkt, void *context __unused) { return ip_acl_process_common(pkt, V4_PKT, PFIL_OUT); } ALWAYS_INLINE unsigned int -ipv6_acl_process_out(struct pl_packet *pkt) +ipv6_acl_process_out_spath(struct pl_packet *pkt, void *context __unused) { return ip_acl_process_common(pkt, V6_PKT, PFIL_OUT); } @@ -193,6 +218,28 @@ PL_REGISTER_NODE(ipv6_acl_out_node) = { } }; +PL_REGISTER_NODE(ipv4_acl_out_spath_node) = { + .name = "vyatta:ipv4-acl-out-spath", + .type = PL_PROC, + .handler = ipv4_acl_process_out_spath, + .num_next = IPV4_ACL_OUT_SPATH_NUM, + .next = { + [IPV4_ACL_OUT_SPATH_ACCEPT] = "term-noop", + [IPV4_ACL_OUT_SPATH_DROP] = "term-drop", + } +}; + +PL_REGISTER_NODE(ipv6_acl_out_spath_node) = { + .name = "vyatta:ipv6-acl-out-spath", + .type = PL_PROC, + .handler = ipv6_acl_process_out_spath, + .num_next = IPV6_ACL_OUT_SPATH_NUM, + .next = { + [IPV6_ACL_OUT_SPATH_ACCEPT] = "term-noop", + [IPV6_ACL_OUT_SPATH_DROP] = "term-drop", + } +}; + /* Register Features */ PL_REGISTER_FEATURE(ipv4_acl_in_feat) = { .name = "vyatta:ipv4-acl-in", @@ -221,3 +268,17 @@ PL_REGISTER_FEATURE(ipv6_acl_out_feat) = { .feature_point = "ipv6-encap", .id = PL_L3_V6_ENCAP_FUSED_FEAT_ACL, }; + +PL_REGISTER_FEATURE(ipv4_acl_out_spath_feat) = { + .name = "vyatta:ipv4-acl-out-spath", + .node_name = "ipv4-acl-out-spath", + .feature_point = "ipv4-out-spath", + .id = PL_L3_V4_OUT_SPATH_FUSED_FEAT_ACL, +}; + +PL_REGISTER_FEATURE(ipv6_acl_out_spath_feat) = { + .name = "vyatta:ipv6-acl-out-spath", + .node_name = "ipv6-acl-out-spath", + .feature_point = "ipv6-out-spath", + .id = PL_L3_V6_OUT_SPATH_FUSED_FEAT_ACL, +}; diff --git a/src/pipeline/nodes/l3_arp.c b/src/pipeline/nodes/l3_arp.c index 36110d5a..9cea5aef 100644 --- a/src/pipeline/nodes/l3_arp.c +++ b/src/pipeline/nodes/l3_arp.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * @@ -49,26 +49,26 @@ #include "arp.h" #include "compat.h" -#include "config.h" +#include "config_internal.h" #include "ether.h" -#include "gre.h" +#include "if/gre.h" +#include "if/macvlan.h" #include "if_ether.h" #include "if_llatbl.h" #include "if_var.h" #include "ip_addr.h" -#include "macvlan.h" #include "main.h" -#include "nh.h" +#include "nh_common.h" #include "pl_common.h" #include "pl_fused.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "route.h" #include "route_flags.h" #include "urcu.h" #include "util.h" #include "vplane_debug.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" /* * Since Dataplane only supports Ethernet, use a simplified form of ARP @@ -84,9 +84,9 @@ */ struct ether_arp { struct arphdr ea_hdr; /* fixed-size header */ - u_int8_t arp_sha[ETHER_ADDR_LEN];/* sender hardware address */ + u_int8_t arp_sha[RTE_ETHER_ADDR_LEN];/* sender hardware address */ u_int8_t arp_spa[4]; /* sender protocol address */ - u_int8_t arp_tha[ETHER_ADDR_LEN];/* target hardware address */ + u_int8_t arp_tha[RTE_ETHER_ADDR_LEN];/* target hardware address */ u_int8_t arp_tpa[4]; /* target protocol address */ }; #define arp_hrd ea_hdr.ar_hrd @@ -101,34 +101,35 @@ struct ether_arp { /* Turn a request into a reply and send it */ static int arp_reply(struct ifnet *ifp, struct rte_mbuf *m, - const struct ether_addr *ea, in_addr_t taddr) + const struct rte_ether_addr *ea, in_addr_t taddr) { - struct ether_hdr *eh = rte_pktmbuf_mtod(m, struct ether_hdr *); + struct rte_ether_hdr *eh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); struct ether_arp *ah = (struct ether_arp *) (eh + 1); in_addr_t dst_ip; ah->arp_op = htons(ARPOP_REPLY); - memcpy(ah->arp_tha, ah->arp_sha, ETHER_ADDR_LEN); - memcpy(ah->arp_sha, ea, ETHER_ADDR_LEN); + memcpy(ah->arp_tha, ah->arp_sha, RTE_ETHER_ADDR_LEN); + memcpy(ah->arp_sha, ea, RTE_ETHER_ADDR_LEN); memcpy(ah->arp_tpa, ah->arp_spa, sizeof(struct in_addr)); memcpy(ah->arp_spa, &taddr, sizeof(struct in_addr)); - memcpy(&eh->d_addr, ah->arp_tha, ETHER_ADDR_LEN); - memcpy(&eh->s_addr, ah->arp_sha, ETHER_ADDR_LEN); + memcpy(&eh->d_addr, ah->arp_tha, RTE_ETHER_ADDR_LEN); + memcpy(&eh->s_addr, ah->arp_sha, RTE_ETHER_ADDR_LEN); char b1[INET_ADDRSTRLEN], b2[ETH_ADDR_STR_LEN]; ARP_DEBUG("send reply for %s (%s) on %s\n", inet_ntop(AF_INET, &taddr, b1, sizeof(b1)), - ether_ntoa_r((const struct ether_addr *)(ah->arp_sha), b2), + ether_ntoa_r((const struct rte_ether_addr *)(ah->arp_sha), + b2), ifp->if_name); ARPSTAT_INC(if_vrfid(ifp), txreplies); if (is_gre(ifp) && !(ifp->if_flags & IFF_NOARP)) { memcpy(&dst_ip, ah->arp_tpa, sizeof(ah->arp_tpa)); - if (!gre_tunnel_encap(ifp, ifp, &dst_ip, m, ETHER_TYPE_ARP)) + if (!gre_tunnel_encap(ifp, ifp, &dst_ip, m, RTE_ETHER_TYPE_ARP)) return ARP_IN_NOTHOT_FINISH; } return ARP_IN_NOTHOT_L2_OUT; @@ -156,13 +157,13 @@ static bool arp_proxy(struct ifnet *ifp, in_addr_t addr, struct rte_mbuf *m, /* Is there a route to this address */ pktmbuf_set_vrf(m, if_vrfid(ifp)); - struct next_hop *nxt = rt_lookup(addr, RT_TABLE_MAIN, m); + struct next_hop *nxt = dp_rt_lookup(addr, RT_TABLE_MAIN, m); if (nxt == NULL || (nxt->flags & (RTF_REJECT|RTF_BLACKHOLE|RTF_BROADCAST))) return false; /* Don't send proxy if on same interface */ - if (nh4_get_ifp(nxt) == ifp) + if (dp_nh_get_ifp(nxt) == ifp) return false; /* Respond with own address */ @@ -174,7 +175,7 @@ static bool arp_proxy(struct ifnet *ifp, in_addr_t addr, struct rte_mbuf *m, * only if the target IP address is configured on the incoming interface. * (Equivalent to arp_ignore=1 in Linux) */ -static int arp_ignore(struct ifnet *ifp, const struct ether_addr *enaddr, +static int arp_ignore(struct ifnet *ifp, const struct rte_ether_addr *enaddr, in_addr_t src, in_addr_t target) { struct if_addr *ifa; @@ -208,11 +209,11 @@ static int arp_ignore(struct ifnet *ifp, const struct ether_addr *enaddr, } ALWAYS_INLINE unsigned int -arp_in_nothot_process(struct pl_packet *pkt) +arp_in_nothot_process(struct pl_packet *pkt, void *context __unused) { struct ifnet *ifp = pkt->in_ifp; struct rte_mbuf *m = pkt->mbuf; - struct ether_hdr *eh; + struct rte_ether_hdr *eh; struct ether_arp *ah; struct llentry *la; in_addr_t itaddr, isaddr; @@ -223,8 +224,9 @@ arp_in_nothot_process(struct pl_packet *pkt) struct ifnet *vrrp_ifp; int resp; - eh = rte_pktmbuf_mtod(m, struct ether_hdr *); - vrrp_ifp = macvlan_get_vrrp_if(ifp, (struct ether_addr *)&eh->d_addr); + eh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); + vrrp_ifp = macvlan_get_vrrp_if(ifp, + (struct rte_ether_addr *)&eh->d_addr); if (vrrp_ifp) pkt->in_ifp = ifp = vrrp_ifp; @@ -238,7 +240,13 @@ arp_in_nothot_process(struct pl_packet *pkt) memcpy(&isaddr, ah->arp_spa, sizeof(isaddr)); memcpy(&itaddr, ah->arp_tpa, sizeof(itaddr)); - if (unlikely(is_multicast_ether_addr(&eh->d_addr))) { + if (unlikely(rte_is_multicast_ether_addr(&eh->d_addr))) { + struct sockaddr sock_storage; + struct sockaddr_in *ip_storage = + (struct sockaddr_in *) &sock_storage; + + ip_storage->sin_family = AF_INET; + ip_storage->sin_addr.s_addr = itaddr; /* Lookup based on the target IP address * * Note that this causes GARPs to only be processed in @@ -251,7 +259,7 @@ arp_in_nothot_process(struct pl_packet *pkt) * replies, but ARP replies should never be * multicasted anyway. */ - vrrp_ifp = macvlan_get_vrrp_ip_if(ifp, itaddr); + vrrp_ifp = macvlan_get_vrrp_ip_if(ifp, &sock_storage); /* overriding the interface at this point does bypass * the own-MAC check in arp_input_validate, but that's * fine as we know at this point the destination @@ -265,7 +273,7 @@ arp_in_nothot_process(struct pl_packet *pkt) if (op == ARPOP_REPLY) ARPSTAT_INC(if_vrfid(ifp), rxreplies); - rc = arp_ignore(ifp, (struct ether_addr *) ah->arp_sha, + rc = arp_ignore(ifp, (struct rte_ether_addr *) ah->arp_sha, isaddr, itaddr); if (rc != 0) { if (rc == -ENOENT && op == ARPOP_REQUEST && @@ -276,7 +284,7 @@ arp_in_nothot_process(struct pl_packet *pkt) ifp->if_name); ARPSTAT_INC(if_vrfid(ifp), proxy); pkt->in_ifp = NULL; - pkt->l2_proto = ETHER_TYPE_ARP; + pkt->l2_proto = RTE_ETHER_TYPE_ARP; pkt->out_ifp = ifp; return resp; } @@ -324,7 +332,7 @@ arp_in_nothot_process(struct pl_packet *pkt) la = in_lltable_lookup(ifp, garp ? 0 : LLE_CREATE, isaddr); if (la) { lladdr_update(ifp, la, - (struct ether_addr *) ah->arp_sha, 0); + (struct rte_ether_addr *) ah->arp_sha, 0); /* Allow packet to bleed back to keep local tables in sync. */ if ((op == ARPOP_REPLY) || garp) { @@ -351,7 +359,7 @@ arp_in_nothot_process(struct pl_packet *pkt) * Shortcut.. the receiving interface is the target. */ pkt->in_ifp = NULL; - pkt->l2_proto = ETHER_TYPE_ARP; + pkt->l2_proto = RTE_ETHER_TYPE_ARP; pkt->out_ifp = ifp; return arp_reply(ifp, m, &ifp->eth_addr, itaddr); } @@ -389,7 +397,7 @@ PL_REGISTER_NODE(arp_in_nothot_node) = { }; ALWAYS_INLINE unsigned int -arp_in_process(struct pl_packet *pkt) +arp_in_process(struct pl_packet *pkt, void *context __unused) { arp_input(pkt->in_ifp, pkt->mbuf); return ARP_IN_FINISH; diff --git a/src/pipeline/nodes/l3_dpi.c b/src/pipeline/nodes/l3_dpi.c index 36452d03..1de631ce 100644 --- a/src/pipeline/nodes/l3_dpi.c +++ b/src/pipeline/nodes/l3_dpi.c @@ -1,7 +1,5 @@ /* - * l3_dpi.c - * - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -19,8 +17,8 @@ #include "npf/npf.h" #include "npf/npf_cache.h" #include "npf/npf_session.h" -#include "npf/dpi/dpi.h" -#include "pktmbuf.h" +#include "npf/dpi/dpi_internal.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" #include "util.h" @@ -38,6 +36,7 @@ ip_dpi_process_common(struct pl_packet *pkt, bool v4, int dir) struct rte_mbuf *m = pkt->mbuf; npf_session_t *se = npf_session_find_cached(m); + int error = 0; /* If already present, we have nothing to do; e.g. f/w did it */ if (se && npf_session_get_dpi(se)) @@ -45,7 +44,7 @@ ip_dpi_process_common(struct pl_packet *pkt, bool v4, int dir) /* Ensure we have a cached packet */ uint16_t const ethertype = - v4 ? htons(ETHER_TYPE_IPv4) : htons(ETHER_TYPE_IPv6); + v4 ? htons(RTE_ETHER_TYPE_IPV4) : htons(RTE_ETHER_TYPE_IPV6); /* * Avoid passing &pkt->npf_flags into npf_get_cache as doing * so prevents an optimisation in fused mode whereby the @@ -53,7 +52,7 @@ ip_dpi_process_common(struct pl_packet *pkt, bool v4, int dir) * instead using registers. */ uint16_t npf_flags = pkt->npf_flags; - npf_cache_t *npc = npf_get_cache(&npf_flags, m, ethertype); + npf_cache_t *npc = npf_get_cache(&npf_flags, m, ethertype, &error); if (!npc) goto done; pkt->npf_flags = npf_flags; @@ -69,14 +68,15 @@ ip_dpi_process_common(struct pl_packet *pkt, bool v4, int dir) goto done; struct ifnet *ifp = pkt->in_ifp; - int error = 0; + se = npf_session_find_or_create(npc, m, ifp, dir, &error); if (!se || error) goto done; } /* Attach the DPI flow info, do first packet inspection */ - (void)dpi_session_first_packet(se, npc, m, dir); + uint8_t engines[] = {IANA_USER, IANA_NDPI}; + (void)dpi_session_first_packet(se, npc, m, dir, 2, engines); done: if (dir == PFIL_IN) @@ -87,25 +87,25 @@ ip_dpi_process_common(struct pl_packet *pkt, bool v4, int dir) ALWAYS_INLINE unsigned int -ipv4_dpi_process_in(struct pl_packet *pkt) +ipv4_dpi_process_in(struct pl_packet *pkt, void *context __unused) { return ip_dpi_process_common(pkt, V4_PKT, PFIL_IN); } ALWAYS_INLINE unsigned int -ipv6_dpi_process_in(struct pl_packet *pkt) +ipv6_dpi_process_in(struct pl_packet *pkt, void *context __unused) { return ip_dpi_process_common(pkt, V6_PKT, PFIL_IN); } ALWAYS_INLINE unsigned int -ipv4_dpi_process_out(struct pl_packet *pkt) +ipv4_dpi_process_out(struct pl_packet *pkt, void *context __unused) { return ip_dpi_process_common(pkt, V4_PKT, PFIL_OUT); } ALWAYS_INLINE unsigned int -ipv6_dpi_process_out(struct pl_packet *pkt) +ipv6_dpi_process_out(struct pl_packet *pkt, void *context __unused) { return ip_dpi_process_common(pkt, V6_PKT, PFIL_OUT); } diff --git a/src/pipeline/nodes/l3_fw_in.c b/src/pipeline/nodes/l3_fw_in.c index ff779173..8b27f9e5 100644 --- a/src/pipeline/nodes/l3_fw_in.c +++ b/src/pipeline/nodes/l3_fw_in.c @@ -2,7 +2,7 @@ * l3_fw_in.c * * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -20,7 +20,7 @@ #include "npf/npf_cmd.h" #include "npf/npf_if.h" #include "npf_shim.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" #include "urcu.h" @@ -43,6 +43,9 @@ ip_fw_in_process_common(struct pl_packet *pkt, bool v4) struct npf_if *nif = rcu_dereference(ifp->if_npf); + if (npf_if_zone_is_enabled(nif)) + pkt->npf_flags |= NPF_FLAG_FROM_ZONE; + /* what is the best way to define app specific data? */ if (npf_if_active(nif, bitmask)) { struct rte_mbuf *m = pkt->mbuf; @@ -50,12 +53,12 @@ ip_fw_in_process_common(struct pl_packet *pkt, bool v4) npf_result_t result = npf_hook_track(ifp, &m, nif, PFIL_IN, pkt->npf_flags, - v4 ? htons(ETHER_TYPE_IPv4) : - htons(ETHER_TYPE_IPv6)); + v4 ? htons(RTE_ETHER_TYPE_IPV4) : + htons(RTE_ETHER_TYPE_IPV6)); if (unlikely(m != pkt->mbuf)) { pkt->mbuf = m; - pkt->l3_hdr = pktmbuf_mtol3(m, void *); + pkt->l3_hdr = dp_pktmbuf_mtol3(m, void *); } if (unlikely(result.decision != NPF_DECISION_PASS)) @@ -63,24 +66,20 @@ ip_fw_in_process_common(struct pl_packet *pkt, bool v4) pkt->npf_flags = result.flags; - if (unlikely(result.action == NPF_ACTION_TO_V6)) - return IPV4_FW_IN_TO_V6; - else if (unlikely(result.action == NPF_ACTION_TO_V4)) - return IPV6_FW_IN_TO_V4; - else if (unlikely(result.action == NPF_ACTION_TO_LOCAL)) + if (unlikely(result.action == NPF_ACTION_TO_LOCAL)) return IPV4_FW_IN_TO_LOCAL; } return v4 ? IPV4_FW_IN_ACCEPT : IPV6_FW_IN_ACCEPT; } ALWAYS_INLINE unsigned int -ipv4_fw_in_process(struct pl_packet *pkt) +ipv4_fw_in_process(struct pl_packet *pkt, void *context __unused) { return ip_fw_in_process_common(pkt, V4_PKT); } ALWAYS_INLINE unsigned int -ipv6_fw_in_process(struct pl_packet *pkt) +ipv6_fw_in_process(struct pl_packet *pkt, void *context __unused) { return ip_fw_in_process_common(pkt, V6_PKT); } @@ -93,7 +92,6 @@ PL_REGISTER_NODE(ipv4_fw_in_node) = { .num_next = IPV4_FW_NUM, .next = { [IPV4_FW_IN_ACCEPT] = "term-noop", - [IPV4_FW_IN_TO_V6] = "term-v4-to-v6", [IPV4_FW_IN_TO_LOCAL] = "ipv4-local", [IPV4_FW_IN_DROP] = "term-drop", } @@ -106,7 +104,6 @@ PL_REGISTER_NODE(ipv6_fw_in_node) = { .num_next = IPV6_FW_NUM, .next = { [IPV6_FW_IN_ACCEPT] = "term-noop", - [IPV6_FW_IN_TO_V4] = "term-v6-to-v4", [IPV6_FW_IN_DROP] = "ipv6-drop" } }; diff --git a/src/pipeline/nodes/l3_fw_out.c b/src/pipeline/nodes/l3_fw_out.c index 3b21f459..61a387c8 100644 --- a/src/pipeline/nodes/l3_fw_out.c +++ b/src/pipeline/nodes/l3_fw_out.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * @@ -15,10 +15,12 @@ #include "npf/npf.h" #include "npf/npf_if.h" #include "npf_shim.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" #include "urcu.h" +#include "ip_funcs.h" +#include "ip6_funcs.h" enum { V4_PKT = true, @@ -38,41 +40,81 @@ ip_fw_out_process_common(struct pl_packet *pkt, bool v4) struct npf_if *nif = rcu_dereference(ifp->if_npf); - /* Output NPF Firewall and NAT */ - if (npf_if_active(nif, bitmask) || - (nif && - (pkt->npf_flags & (NPF_FLAG_FROM_IPV6 | NPF_FLAG_FROM_IPV4)))) { + /* Output NPF Firewall and NAT or Zone and NAT */ + if (npf_if_active(nif, bitmask)) { npf_result_t result; struct rte_mbuf *m = pkt->mbuf; result = npf_hook_track(pkt->in_ifp, &m, nif, PFIL_OUT, pkt->npf_flags, - v4 ? htons(ETHER_TYPE_IPv4) : - htons(ETHER_TYPE_IPv6)); + v4 ? htons(RTE_ETHER_TYPE_IPV4) : + htons(RTE_ETHER_TYPE_IPV6)); if (unlikely(m != pkt->mbuf)) { pkt->mbuf = m; - pkt->l3_hdr = pktmbuf_mtol3(m, void *); + pkt->l3_hdr = dp_pktmbuf_mtol3(m, void *); } if (unlikely(result.decision != NPF_DECISION_PASS)) return v4 ? IPV4_FW_OUT_DROP : IPV6_FW_OUT_DROP; - /* Discard result.flags as no change can happen */ - } + + /* Update npf_flags for nat64 */ + pkt->npf_flags = result.flags; + + } else if ((pkt->npf_flags & NPF_FLAG_FROM_ZONE) && + !(pkt->npf_flags & NPF_FLAG_FROM_US)) + /* Zone to non-zone (no fw) -> drop */ + return v4 ? IPV4_FW_OUT_DROP : IPV6_FW_OUT_DROP; return v4 ? IPV4_FW_OUT_ACCEPT : IPV6_FW_OUT_ACCEPT; } ALWAYS_INLINE unsigned int -ipv4_fw_out_process(struct pl_packet *pkt) +ipv4_fw_out_process(struct pl_packet *pkt, void *context __unused) +{ + return ip_fw_out_process_common(pkt, V4_PKT); +} + +ALWAYS_INLINE unsigned int +ipv6_fw_out_process(struct pl_packet *pkt, void *context __unused) +{ + return ip_fw_out_process_common(pkt, V6_PKT); +} + +ALWAYS_INLINE unsigned int +ipv4_fw_out_spath_process(struct pl_packet *pkt, void *context __unused) { return ip_fw_out_process_common(pkt, V4_PKT); } ALWAYS_INLINE unsigned int -ipv6_fw_out_process(struct pl_packet *pkt) +ipv6_fw_out_spath_process(struct pl_packet *pkt, void *context __unused) { return ip_fw_out_process_common(pkt, V6_PKT); } +ALWAYS_INLINE unsigned int +ipv4_fw_orig_process(struct pl_packet *pkt, void *context __unused) +{ + if (pkt->npf_flags & NPF_FLAG_FROM_US) { + if (ipv4_originate_filter_flags(pkt->out_ifp, pkt->mbuf, + pkt->npf_flags)) + return IPV4_FW_ORIG_DROP; + } + + return IPV4_FW_ORIG_ACCEPT; +} + +ALWAYS_INLINE unsigned int +ipv6_fw_orig_process(struct pl_packet *pkt, void *context __unused) +{ + if (pkt->npf_flags & NPF_FLAG_FROM_US) { + if (ipv6_originate_filter_flags(pkt->out_ifp, pkt->mbuf, + pkt->npf_flags)) + return IPV6_FW_ORIG_DROP; + } + + return IPV6_FW_ORIG_ACCEPT; +} + /* Register Node */ PL_REGISTER_NODE(ipv4_fw_out_node) = { .name = "vyatta:ipv4-fw-out", @@ -96,6 +138,50 @@ PL_REGISTER_NODE(ipv6_fw_out_node) = { } }; +PL_REGISTER_NODE(ipv4_fw_orig_node) = { + .name = "vyatta:ipv4-fw-orig", + .type = PL_PROC, + .handler = ipv4_fw_orig_process, + .num_next = IPV4_FW_ORIG_NUM, + .next = { + [IPV4_FW_ORIG_ACCEPT] = "term-noop", + [IPV4_FW_ORIG_DROP] = "term-drop", + } +}; + +PL_REGISTER_NODE(ipv6_fw_orig_node) = { + .name = "vyatta:ipv6-fw-orig", + .type = PL_PROC, + .handler = ipv6_fw_orig_process, + .num_next = IPV6_FW_ORIG_NUM, + .next = { + [IPV6_FW_ORIG_ACCEPT] = "ipv6-fw-out", + [IPV6_FW_ORIG_DROP] = "ipv6-drop", + } +}; + +PL_REGISTER_NODE(ipv4_fw_out_spath_node) = { + .name = "vyatta:ipv4-fw-out-spath", + .type = PL_PROC, + .handler = ipv4_fw_out_spath_process, + .num_next = IPV4_FW_OUT_SPATH_NUM, + .next = { + [IPV4_FW_OUT_SPATH_ACCEPT] = "term-noop", + [IPV4_FW_OUT_SPATH_DROP] = "term-drop", + } +}; + +PL_REGISTER_NODE(ipv6_fw_out_spath_node) = { + .name = "vyatta:ipv6-fw-out-spath", + .type = PL_PROC, + .handler = ipv6_fw_out_spath_process, + .num_next = IPV6_FW_OUT_SPATH_NUM, + .next = { + [IPV6_FW_OUT_SPATH_ACCEPT] = "term-noop", + [IPV6_FW_OUT_SPATH_DROP] = "term-drop", + } +}; + /* Register Features */ PL_REGISTER_FEATURE(ipv4_fw_out_feat) = { .name = "vyatta:ipv4-fw-out", @@ -112,3 +198,31 @@ PL_REGISTER_FEATURE(ipv6_fw_out_feat) = { .id = PL_L3_V6_OUT_FUSED_FEAT_FW, .visit_after = "vyatta:ipv6-defrag-out", }; + +PL_REGISTER_FEATURE(ipv4_fw_orig_feat) = { + .name = "vyatta:ipv4-fw-orig", + .node_name = "ipv4-fw-orig", + .feature_point = "ipv4-out-spath", + .id = PL_L3_V4_OUT_FUSED_FEAT_FW_ORIG, +}; + +PL_REGISTER_FEATURE(ipv6_fw_orig_feat) = { + .name = "vyatta:ipv6-fw-orig", + .node_name = "ipv6-fw-orig", + .feature_point = "ipv6-out-spath", + .id = PL_L3_V6_OUT_FUSED_FEAT_FW_ORIG, +}; + +PL_REGISTER_FEATURE(ipv4_fw_out_spath_feat) = { + .name = "vyatta:ipv4-fw-out-spath", + .node_name = "ipv4-fw-out-spath", + .feature_point = "ipv4-out-spath", + .id = PL_L3_V4_OUT_SPATH_FUSED_FEAT_FW, +}; + +PL_REGISTER_FEATURE(ipv6_fw_out_spath_feat) = { + .name = "vyatta:ipv6-fw-out-spath", + .node_name = "ipv6-fw-out-spath", + .feature_point = "ipv6-out-spath", + .id = PL_L3_V6_OUT_SPATH_FUSED_FEAT_FW, +}; diff --git a/src/pipeline/nodes/l3_nat64.c b/src/pipeline/nodes/l3_nat64.c new file mode 100644 index 00000000..2c66217d --- /dev/null +++ b/src/pipeline/nodes/l3_nat64.c @@ -0,0 +1,463 @@ +/* + * l3_nat64.c + * + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#include +#include +#include +#include + +#include "compiler.h" +#include "if_var.h" +#include "pktmbuf_internal.h" +#include "pl_common.h" +#include "pl_fused.h" +#include "urcu.h" + +#include "npf/config/npf_config.h" +#include "npf/npf.h" +#include "npf/npf_cmd.h" +#include "npf/npf_if.h" +#include "npf/npf_rc.h" +#include "npf_shim.h" +#include "npf/npf_nat64.h" + +/* + * For NAT 6-to-4 the packet flow sequence is: + * + * request: v6(in) -> v4(in) -> v4(out) + * response: v4(in) -> v6(in) -> v6(out) + * + * Two session are created for the first packet in a data flow - An IPv6 + * session at v6-in and an IPv4 session at v4-out. + * + * The input code will add PKT_MDATA_INVAR_NAT64 metadata to the first packet. + * The output code will detect that metadata, create a session, and link it to + * the input session. + * + * For subsequent times through the output session, the only work done by the + * output code is to increment stats. + * + * NAT 4-to-6 for a new packet flow is similar. + * + * npf_nat64_4to6_in or npf_nat64_6to4_in are called at input when either of + * the following are true: + * + * 1. A NAT64 rule exists on the interface, or + * 2. A NAT64 session is found on ingress + * + * npf_nat64_4to6_out or npf_nat64_6to4_out are called at output when the + * packet npf_flags say that the packet has been switched from the other + * address family path. + */ +enum { + V4_PKT = true, + V6_PKT = false +}; + +/* + * First look for a session cached in the packet, else lookup session table. + */ +static npf_session_t * +nat64_session_find(npf_cache_t *npc, struct rte_mbuf *m, struct ifnet *ifp, + int dir, int *error) +{ + npf_session_t *se; + + se = npf_session_find_cached(m); + if (se) + return se; + + se = npf_session_inspect(npc, m, ifp, dir, error, NULL); + + if (*error || !se) + return NULL; + + /* Attach the session to the packet */ + struct pktmbuf_mdata *mdata = pktmbuf_mdata(m); + mdata->md_session = se; + pktmbuf_mdata_set(m, PKT_MDATA_SESSION); + + return se; +} + +/* + * NAT64 Common Input Process + */ +static ALWAYS_INLINE unsigned int +nat64_in_process_common(struct pl_packet *pkt, struct npf_if *nif, bool v4, + uint16_t eth_type) +{ + struct ifnet *ifp = pkt->in_ifp; + struct npf_config *nif_config; + nat64_decision_t decision = NAT64_DECISION_UNMATCHED; + struct rte_mbuf *m; + uint16_t npf_flags; + npf_session_t *se; + npf_cache_t *npc; + int rc = NPF_RC_UNMATCHED; + int rv = v4 ? IPV4_NAT46_IN_ACCEPT : IPV6_NAT64_IN_ACCEPT; + + npf_flags = pkt->npf_flags; + m = pkt->mbuf; + nif_config = npf_if_conf(nif); + + npc = npf_get_cache(&npf_flags, m, eth_type, &rc); + if (unlikely(!npc)) + goto end; + + se = nat64_session_find(npc, m, ifp, PFIL_IN, &rc); + + if (unlikely(rc < 0)) + goto end; + + if (!npf_active(nif_config, v4 ? NPF_NAT46 : NPF_NAT64) && + !npf_session_is_nat64(se)) + /* + * We don't want to increment the rc counter when there is no + * nat64 config or session on an interface. + */ + return rv; + + /* + * Either we found a nat64 session, or there is nat64 config on the + * interface. + */ + /* Hook */ + if (v4) + decision = npf_nat64_4to6_in(nif_config, &se, ifp, npc, + &m, &npf_flags, &rc); + else + decision = npf_nat64_6to4_in(nif_config, &se, ifp, npc, + &m, &npf_flags, &rc); + + if (se) { + if (decision != NAT64_DECISION_DROP) { + rc = npf_session_activate(se, ifp, npc, m); + if (rc == NPF_RC_OK) { + /* Attach the session to the packet */ + struct pktmbuf_mdata *mdata = pktmbuf_mdata(m); + mdata->md_session = se; + pktmbuf_mdata_set(m, PKT_MDATA_SESSION); + + /* Save session stats. */ + npf_save_stats(se, PFIL_IN, + rte_pktmbuf_pkt_len(m)); + } else { + if (rc != -NPF_RC_ENOSTR) + decision = NAT64_DECISION_DROP; + } + } else if (!npf_session_is_active(se)) { + npf_session_destroy(se); + } else if (rc < 0) { + pktmbuf_mdata_clear(m, PKT_MDATA_SESSION); + npf_session_expire(se); + } + } + + if (unlikely(m != pkt->mbuf)) { + pkt->mbuf = m; + pkt->l3_hdr = dp_pktmbuf_mtol3(m, void *); + } + +end: + switch (decision) { + case NAT64_DECISION_UNMATCHED: + rv = v4 ? IPV4_NAT46_IN_ACCEPT : IPV6_NAT64_IN_ACCEPT; + rc = NPF_RC_UNMATCHED; + break; + case NAT64_DECISION_TO_V4: + pkt->npf_flags = npf_flags; + rv = IPV6_NAT64_IN_TO_V4; + rc = NPF_RC_NAT64_6T4; + break; + case NAT64_DECISION_TO_V6: + pkt->npf_flags = npf_flags; + rv = IPV4_NAT46_IN_TO_V6; + rc = NPF_RC_NAT64_4T6; + break; + case NAT64_DECISION_PASS: + pkt->npf_flags = npf_flags; + rv = v4 ? IPV4_NAT46_IN_ACCEPT : IPV6_NAT64_IN_ACCEPT; + rc = NPF_RC_PASS; + break; + case NAT64_DECISION_DROP: + rv = v4 ? IPV4_NAT46_IN_DROP : IPV6_NAT64_IN_DROP; + break; + }; + + /* Increment return code counter */ + npf_rc_inc_nat64(ifp, NPF_RC_IN, rc); + + return rv; +} + + +/* + * NAT64 Common Output Process + * + * This function will *only* be called for packets that have switched paths. + */ +static unsigned int +nat64_out_process_common(struct pl_packet *pkt, bool v4, uint16_t eth_type) +{ + struct ifnet *ifp = pkt->out_ifp; + nat64_decision_t decision = NAT64_DECISION_UNMATCHED; + struct rte_mbuf *m; + uint16_t npf_flags; + npf_session_t *se; + npf_cache_t *npc; + int rc = NPF_RC_UNMATCHED; + int rv = 0; + + npf_flags = pkt->npf_flags; + m = pkt->mbuf; + + npc = npf_get_cache(&npf_flags, m, eth_type, &rc); + if (unlikely(!npc)) + goto end; + + se = nat64_session_find(npc, m, ifp, PFIL_OUT, &rc); + + if (unlikely(rc < 0)) + goto end; + + /* Hook */ + if (v4) + decision = npf_nat64_6to4_out(&se, ifp, npc, &m, &npf_flags, + &rc); + else + decision = npf_nat64_4to6_out(&se, ifp, npc, &m, &npf_flags, + &rc); + + if (se) { + if (decision != NAT64_DECISION_DROP) { + rc = npf_session_activate(se, ifp, npc, m); + if (rc == NPF_RC_OK) { + /* Attach the session to the packet */ + struct pktmbuf_mdata *mdata = pktmbuf_mdata(m); + mdata->md_session = se; + pktmbuf_mdata_set(m, PKT_MDATA_SESSION); + + /* Save session stats. */ + npf_save_stats(se, PFIL_OUT, + rte_pktmbuf_pkt_len(m)); + } else { + if (rc != -NPF_RC_ENOSTR) + decision = NAT64_DECISION_DROP; + } + } else if (!npf_session_is_active(se)) { + npf_session_destroy(se); + } else if (rc < 0) { + pktmbuf_mdata_clear(m, PKT_MDATA_SESSION); + npf_session_expire(se); + } + } + + if (unlikely(m != pkt->mbuf)) { + pkt->mbuf = m; + pkt->l3_hdr = dp_pktmbuf_mtol3(m, void *); + } + +end: + switch (decision) { + case NAT64_DECISION_UNMATCHED: + rv = v4 ? IPV4_NAT64_OUT_ACCEPT : IPV6_NAT46_OUT_ACCEPT; + rc = NPF_RC_UNMATCHED; + break; + case NAT64_DECISION_TO_V4: /* Will not occur in output. For compiler */ + case NAT64_DECISION_TO_V6: /* Will not occur in output. For compiler */ + case NAT64_DECISION_PASS: + rv = v4 ? IPV4_NAT64_OUT_ACCEPT : IPV6_NAT46_OUT_ACCEPT; + rc = NPF_RC_PASS; + break; + case NAT64_DECISION_DROP: + rv = v4 ? IPV4_NAT64_OUT_DROP : IPV6_NAT46_OUT_DROP; + break; + }; + + /* Increment return code counter */ + npf_rc_inc_nat64(ifp, NPF_RC_OUT, rc); + + return rv; +} + +/* + * NAT64 In. v6 packet + */ +ALWAYS_INLINE unsigned int ipv6_nat64_in_process(struct pl_packet *pkt, + void *context __unused) +{ + struct npf_if *nif = rcu_dereference(pkt->in_ifp->if_npf); + unsigned int rv; + + /* + * Input process expects either a session or active nat64 config. + */ + if (!npf_if_active(nif, NPF_IF_SESSION | NPF_NAT64)) + return IPV6_NAT64_IN_ACCEPT; + + /* + * Packet is IPv6 + */ + rv = nat64_in_process_common(pkt, nif, + V6_PKT, htons(RTE_ETHER_TYPE_IPV6)); + + return rv; +} + +/* + * NAT64 Out. v4 packet + */ +ALWAYS_INLINE unsigned int ipv4_nat64_out_process(struct pl_packet *pkt, + void *context __unused) +{ + unsigned int rv; + + /* + * Output process only expects packet to have switched paths from + * IPv6. An output session may or may not exist yet. + */ + if ((pkt->npf_flags & NPF_FLAG_FROM_IPV6) == 0) + return IPV4_NAT64_OUT_ACCEPT; + + /* + * Packet is IPv4 + */ + rv = nat64_out_process_common(pkt, V4_PKT, htons(RTE_ETHER_TYPE_IPV4)); + + return rv; +} + +/* + * NAT46 In. v4 packet + */ +ALWAYS_INLINE unsigned int ipv4_nat46_in_process(struct pl_packet *pkt, + void *context __unused) +{ + struct npf_if *nif = rcu_dereference(pkt->in_ifp->if_npf); + unsigned int rv; + + /* + * Input process expects either a session or active nat46 config. + */ + if (!npf_if_active(nif, NPF_IF_SESSION | NPF_NAT46)) + return IPV4_NAT46_IN_ACCEPT; + + /* + * Packet is IPv4 + */ + rv = nat64_in_process_common(pkt, nif, + V4_PKT, htons(RTE_ETHER_TYPE_IPV4)); + + return rv; +} + +/* + * NAT46 In. v6 packet + */ +ALWAYS_INLINE unsigned int ipv6_nat46_out_process(struct pl_packet *pkt, + void *context __unused) +{ + unsigned int rv; + + /* + * Output process only expects packet to have switched paths from + * IPv4. An output session may or may not exist yet. + */ + if ((pkt->npf_flags & NPF_FLAG_FROM_IPV4) == 0) + return IPV6_NAT46_OUT_ACCEPT; + + /* + * Packet is IPv6 + */ + rv = nat64_out_process_common(pkt, V6_PKT, htons(RTE_ETHER_TYPE_IPV6)); + return rv; +} + +/* Register Node */ +PL_REGISTER_NODE(ipv6_nat64_in_node) = { + .name = "vyatta:ipv6-nat64-in", + .type = PL_PROC, + .handler = ipv6_nat64_in_process, + .num_next = IPV6_NAT64_IN_NUM, + .next = { + [IPV6_NAT64_IN_ACCEPT] = "term-noop", + [IPV6_NAT64_IN_TO_V4] = "term-v6-to-v4", + [IPV6_NAT64_IN_DROP] = "term-drop", + } +}; + +/* Register Node */ +PL_REGISTER_NODE(ipv4_nat64_out_node) = { + .name = "vyatta:ipv4-nat64-out", + .type = PL_PROC, + .handler = ipv4_nat64_out_process, + .num_next = IPV4_NAT64_OUT_NUM, + .next = { + [IPV4_NAT64_OUT_ACCEPT] = "term-noop", + [IPV4_NAT64_OUT_DROP] = "term-drop", + } +}; + +/* Register Node */ +PL_REGISTER_NODE(ipv4_nat46_in_node) = { + .name = "vyatta:ipv4-nat46-in", + .type = PL_PROC, + .handler = ipv4_nat46_in_process, + .num_next = IPV4_NAT46_IN_NUM, + .next = { + [IPV4_NAT46_IN_ACCEPT] = "term-noop", + [IPV4_NAT46_IN_TO_V6] = "term-v4-to-v6", + [IPV4_NAT46_IN_DROP] = "term-drop", + } +}; + +/* Register Node */ +PL_REGISTER_NODE(ipv6_nat46_out_node) = { + .name = "vyatta:ipv6-nat46-out", + .type = PL_PROC, + .handler = ipv6_nat46_out_process, + .num_next = IPV6_NAT46_OUT_NUM, + .next = { + [IPV6_NAT46_OUT_ACCEPT] = "term-noop", + [IPV6_NAT46_OUT_DROP] = "term-drop", + } +}; + +/* Register Features */ +PL_REGISTER_FEATURE(ipv6_nat64_in_feat) = { + .name = "vyatta:ipv6-nat64-in", + .node_name = "ipv6-nat64-in", + .feature_point = "ipv6-validate", + .id = PL_L3_V6_IN_FUSED_FEAT_NAT64, +}; + +/* Register Features */ +PL_REGISTER_FEATURE(ipv4_nat64_out_feat) = { + .name = "vyatta:ipv4-nat64-out", + .node_name = "ipv4-nat64-out", + .feature_point = "ipv4-out", + .id = PL_L3_V4_OUT_FUSED_FEAT_NAT64, + .visit_after = "vyatta:ipv4-fw-out", +}; + +/* Register Features */ +PL_REGISTER_FEATURE(ipv4_nat46_in_feat) = { + .name = "vyatta:ipv4-nat46-in", + .node_name = "ipv4-nat46-in", + .feature_point = "ipv4-validate", + .id = PL_L3_V4_IN_FUSED_FEAT_NAT46, +}; + +/* Register Features */ +PL_REGISTER_FEATURE(ipv6_nat46_out_feat) = { + .name = "vyatta:ipv6-nat46-out", + .node_name = "ipv6-nat46-out", + .feature_point = "ipv6-out", + .id = PL_L3_V6_OUT_FUSED_FEAT_NAT46, + .visit_after = "vyatta:ipv6-fw-out", +}; diff --git a/src/pipeline/nodes/l3_pbr.c b/src/pipeline/nodes/l3_pbr.c index 3c745c2b..5fca2095 100644 --- a/src/pipeline/nodes/l3_pbr.c +++ b/src/pipeline/nodes/l3_pbr.c @@ -2,7 +2,7 @@ * l3_pbr.c * * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -24,7 +24,7 @@ #include "npf/npf.h" #include "npf/npf_if.h" #include "npf_shim.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" #include "route.h" @@ -32,7 +32,7 @@ #include "route_v6.h" #include "urcu.h" #include "util.h" -#include "vrf.h" +#include "vrf_internal.h" struct rte_mbuf; @@ -46,8 +46,8 @@ static ALWAYS_INLINE bool { if (v4) return rt_valid_tblid(pktmbuf_get_vrf(m), tblid); - else - return rt6_valid_tblid(pktmbuf_get_vrf(m), tblid); + + return rt6_valid_tblid(pktmbuf_get_vrf(m), tblid); } static ALWAYS_INLINE unsigned int @@ -70,7 +70,7 @@ ip_pbr_process_common(struct pl_packet *pkt, bool v4) return IPV4_PBR_ACCEPT; } else { struct ip6_hdr *ip6 = pkt->l3_hdr; - struct next_hop_v6 *nxt; + struct next_hop *nxt; nxt = rt6_lookup_fast(vrf, &ip6->ip6_dst, RT_TABLE_MAIN, @@ -86,12 +86,12 @@ ip_pbr_process_common(struct pl_packet *pkt, bool v4) npf_result_t result = npf_hook_notrack(npf_get_ruleset(npf_config, NPF_RS_PBR), &m, ifp, PFIL_IN, 0, - v4 ? htons(ETHER_TYPE_IPv4) - : htons(ETHER_TYPE_IPv6)); + v4 ? htons(RTE_ETHER_TYPE_IPV4) + : htons(RTE_ETHER_TYPE_IPV6), NULL); if (unlikely(m != pkt->mbuf)) { pkt->mbuf = m; - pkt->l3_hdr = pktmbuf_mtol3(m, void *); + pkt->l3_hdr = dp_pktmbuf_mtol3(m, void *); } if (unlikely(result.decision == NPF_DECISION_BLOCK)) @@ -100,20 +100,6 @@ ip_pbr_process_common(struct pl_packet *pkt, bool v4) if (result.tag_set) pkt->tblid = result.tag; - /* - * Tableids in PBR range must be mapped to kernel table - * id for non-default VRF. - */ - vrfid = pktmbuf_get_vrf(pkt->mbuf); - vrf = vrf_get_rcu_fast(vrfid); - if (vrfid != VRF_DEFAULT_ID && - tableid_in_pbr_range(pkt->tblid)) { - if (vrf && vrf->v_pbrtablemap[pkt->tblid]) - pkt->tblid = vrf->v_pbrtablemap[pkt->tblid]; - else - return v4 ? IPV4_PBR_DROP : IPV6_PBR_DROP; - } - if (unlikely(!ip_pbr_is_tblid_valid(pkt->mbuf, pkt->tblid, v4))) return v4 ? IPV4_PBR_DROP : IPV6_PBR_DROP; @@ -123,13 +109,13 @@ ip_pbr_process_common(struct pl_packet *pkt, bool v4) ALWAYS_INLINE unsigned int -ipv4_pbr_process(struct pl_packet *pkt) +ipv4_pbr_process(struct pl_packet *pkt, void *context __unused) { return ip_pbr_process_common(pkt, V4_PKT); } ALWAYS_INLINE unsigned int -ipv6_pbr_process(struct pl_packet *pkt) +ipv6_pbr_process(struct pl_packet *pkt, void *context __unused) { return ip_pbr_process_common(pkt, V6_PKT); } diff --git a/src/pipeline/nodes/l3_tcp_mss.c b/src/pipeline/nodes/l3_tcp_mss.c index 7c822049..7bc6853c 100644 --- a/src/pipeline/nodes/l3_tcp_mss.c +++ b/src/pipeline/nodes/l3_tcp_mss.c @@ -1,7 +1,7 @@ /* * TCP MSS Clamp pipeline feature node * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -27,7 +27,7 @@ #include "if_var.h" #include "in_cksum.h" #include "npf/npf_mbuf.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" #include "pl_node.h" @@ -103,7 +103,7 @@ tcp_fetch_mss(struct rte_mbuf *mbuf, uint16_t l3_len, int opts_len, * buffer. This returns NULL if the packet is fragmented such that * all the requested data is not in the packet. */ - opts = (uint8_t *)rte_pktmbuf_read(mbuf, pktmbuf_l2_len(mbuf) + + opts = (uint8_t *)rte_pktmbuf_read(mbuf, dp_pktmbuf_l2_len(mbuf) + l3_len + sizeof(struct tcphdr), opts_len, buf); if (!opts) @@ -201,7 +201,7 @@ tcp_mss_clamp(struct rte_mbuf **mbuf, enum tcp_mss_af af, struct ifnet *ifp, */ rc = pktmbuf_prepare_for_header_change( mbuf, - pktmbuf_l2_len(*mbuf) + + dp_pktmbuf_l2_len(*mbuf) + l3_len + sizeof(struct tcphdr) + opts_len); if (rc) @@ -209,7 +209,7 @@ tcp_mss_clamp(struct rte_mbuf **mbuf, enum tcp_mss_af af, struct ifnet *ifp, /* Update MSS in mbuf */ n_ptr = rte_pktmbuf_mtod_offset(*mbuf, char *, - pktmbuf_l2_len(*mbuf) + l3_len); + dp_pktmbuf_l2_len(*mbuf) + l3_len); rc = nbuf_advstore(mbuf, &n_ptr, mss_offset, sizeof(mss), &mss); if (rc) return; @@ -223,7 +223,7 @@ tcp_mss_clamp(struct rte_mbuf **mbuf, enum tcp_mss_af af, struct ifnet *ifp, /* Update TCP checksum in mbuf */ n_ptr = rte_pktmbuf_mtod_offset(*mbuf, char *, - pktmbuf_l2_len(*mbuf) + l3_len); + dp_pktmbuf_l2_len(*mbuf) + l3_len); rc = nbuf_advfetch(mbuf, &n_ptr, offsetof(struct tcphdr, check), @@ -283,7 +283,7 @@ tcp_mss_process_common(struct rte_mbuf **mbuf, uint8_t *l3_hdr, * IPv4 input node */ ALWAYS_INLINE unsigned int -ipv4_tcp_mss_in_process(struct pl_packet *pkt) +ipv4_tcp_mss_in_process(struct pl_packet *pkt, void *context __unused) { struct rte_mbuf *mbuf = pkt->mbuf; struct iphdr *ip = pkt->l3_hdr; @@ -302,7 +302,7 @@ ipv4_tcp_mss_in_process(struct pl_packet *pkt) /* mbuf may have changed */ if (mbuf != pkt->mbuf) { pkt->mbuf = mbuf; - pkt->l3_hdr = pktmbuf_mtol3(mbuf, void *); + pkt->l3_hdr = dp_pktmbuf_mtol3(mbuf, void *); } return IPV4_TCP_MSS_IN_CONTINUE; @@ -312,7 +312,7 @@ ipv4_tcp_mss_in_process(struct pl_packet *pkt) * IPv6 input node */ ALWAYS_INLINE unsigned int -ipv6_tcp_mss_in_process(struct pl_packet *pkt) +ipv6_tcp_mss_in_process(struct pl_packet *pkt, void *context __unused) { struct rte_mbuf *mbuf = pkt->mbuf; uint8_t ipproto; @@ -326,14 +326,14 @@ ipv6_tcp_mss_in_process(struct pl_packet *pkt) if (ipproto != IPPROTO_TCP) return IPV6_TCP_MSS_IN_CONTINUE; - l3_len -= pktmbuf_l2_len(pkt->mbuf); + l3_len -= dp_pktmbuf_l2_len(pkt->mbuf); tcp_mss_process_common(&mbuf, pkt->l3_hdr, TCP_MSS_V6, pkt->in_ifp, l3_len); /* mbuf may have changed */ if (mbuf != pkt->mbuf) { pkt->mbuf = mbuf; - pkt->l3_hdr = pktmbuf_mtol3(mbuf, void *); + pkt->l3_hdr = dp_pktmbuf_mtol3(mbuf, void *); } return IPV6_TCP_MSS_IN_CONTINUE; @@ -379,7 +379,7 @@ PL_REGISTER_FEATURE(ipv6_tcp_mss_in_feat) = { * IPv4 output node */ ALWAYS_INLINE unsigned int -ipv4_tcp_mss_out_process(struct pl_packet *pkt) +ipv4_tcp_mss_out_process(struct pl_packet *pkt, void *context __unused) { struct rte_mbuf *mbuf = pkt->mbuf; struct iphdr *ip = pkt->l3_hdr; @@ -398,7 +398,7 @@ ipv4_tcp_mss_out_process(struct pl_packet *pkt) /* mbuf may have changed */ if (mbuf != pkt->mbuf) { pkt->mbuf = mbuf; - pkt->l3_hdr = pktmbuf_mtol3(mbuf, void *); + pkt->l3_hdr = dp_pktmbuf_mtol3(mbuf, void *); } return IPV4_TCP_MSS_OUT_CONTINUE; @@ -408,7 +408,7 @@ ipv4_tcp_mss_out_process(struct pl_packet *pkt) * IPv6 output node */ ALWAYS_INLINE unsigned int -ipv6_tcp_mss_out_process(struct pl_packet *pkt) +ipv6_tcp_mss_out_process(struct pl_packet *pkt, void *context __unused) { struct rte_mbuf *mbuf = pkt->mbuf; uint8_t ipproto; @@ -422,14 +422,14 @@ ipv6_tcp_mss_out_process(struct pl_packet *pkt) if (ipproto != IPPROTO_TCP) return IPV6_TCP_MSS_OUT_CONTINUE; - l3_len -= pktmbuf_l2_len(pkt->mbuf); + l3_len -= dp_pktmbuf_l2_len(pkt->mbuf); tcp_mss_process_common(&mbuf, pkt->l3_hdr, TCP_MSS_V6, pkt->out_ifp, l3_len); /* mbuf may have changed */ if (mbuf != pkt->mbuf) { pkt->mbuf = mbuf; - pkt->l3_hdr = pktmbuf_mtol3(mbuf, void *); + pkt->l3_hdr = dp_pktmbuf_mtol3(mbuf, void *); } return IPV6_TCP_MSS_OUT_CONTINUE; @@ -598,7 +598,7 @@ tcp_mss_feat_enable_cmd(TCPMSSConfig *tcpmss_msg, struct pb_msg *msg) struct ifnet *ifp; int rc; - ifp = ifnet_byifname(tcpmss_msg->ifname); + ifp = dp_ifnet_byifname(tcpmss_msg->ifname); if (!ifp) { enum tcp_mss_af af = TCP_MSS_V4; if (tcpmss_msg->af == TCPMSSCONFIG__ADDRESS_FAMILY__TCP_MSS_V6) @@ -609,9 +609,6 @@ tcp_mss_feat_enable_cmd(TCPMSSConfig *tcpmss_msg, struct pb_msg *msg) return rc; } - assert(ARRAY_SIZE(ifp->tcp_mss_type) == TCP_MSS_AF_SIZE); - assert(ARRAY_SIZE(ifp->tcp_mss_value) == TCP_MSS_AF_SIZE); - if (tcpmss_msg->mtu_option == TCPMSSCONFIG__MTUTYPE__MTU) { ifp->tcp_mss_type[tcpmss_msg->af] = TCP_MSS_MTU; ifp->tcp_mss_value[tcpmss_msg->af] = 0; @@ -621,14 +618,14 @@ tcp_mss_feat_enable_cmd(TCPMSSConfig *tcpmss_msg, struct pb_msg *msg) else if (tcpmss_msg->mtu_option == TCPMSSCONFIG__MTUTYPE__LIMIT) ifp->tcp_mss_type[tcpmss_msg->af] = TCP_MSS_LIMIT; else { - pb_cmd_err(msg, "Bad option %d\n", - tcpmss_msg->mtu_option); + dp_pb_cmd_err(msg, "Bad option %d\n", + tcpmss_msg->mtu_option); return -EINVAL; } /* 'val' is allowed to be 1-UINT16_MAX */ if (tcpmss_msg->value == 0 || tcpmss_msg->value > UINT16_MAX) { - pb_cmd_err(msg, "Bad value %d\n", tcpmss_msg->value); + dp_pb_cmd_err(msg, "Bad value %d\n", tcpmss_msg->value); return -EINVAL; } @@ -663,9 +660,10 @@ tcp_mss_feat_disable_cmd(TCPMSSConfig *tcpmss_msg, struct pb_msg *msg) if (!ret) return 1; - ifp = ifnet_byifname(tcpmss_msg->ifname); + ifp = dp_ifnet_byifname(tcpmss_msg->ifname); if (!ifp) { - pb_cmd_err(msg, "Missing interface %s\n", tcpmss_msg->ifname); + dp_pb_cmd_err(msg, "Missing interface %s\n", + tcpmss_msg->ifname); return -EINVAL; } @@ -712,7 +710,7 @@ tcp_mss_feat_cmd(struct pb_msg *msg) * Replay any stored configuration now that the interface has been created */ static void -tcp_mss_event_if_index_set(struct ifnet *ifp, uint32_t ifindex __unused) +tcp_mss_event_if_index_set(struct ifnet *ifp) { struct tcp_mss_if_list_entry *le; diff --git a/src/pipeline/nodes/l3_v4_cgnat.c b/src/pipeline/nodes/l3_v4_cgnat.c index c7d28a7f..b8265679 100644 --- a/src/pipeline/nodes/l3_v4_cgnat.c +++ b/src/pipeline/nodes/l3_v4_cgnat.c @@ -1,7 +1,7 @@ /* * l3_v4_cgnat.c * - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -19,57 +19,32 @@ #include "compiler.h" #include "ether.h" #include "if_var.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" #include "urcu.h" #include "util.h" +#include "ip_funcs.h" #include "ip_icmp.h" #include "in_cksum.h" #include "npf/npf.h" #include "npf/npf_mbuf.h" -#include "npf/nat/nat_pool.h" +#include "npf/nat/nat_pool_public.h" #include "npf/cgnat/cgn.h" #include "npf/apm/apm.h" -#include "npf/cgnat/cgn_errno.h" +#include "npf/cgnat/cgn_rc.h" #include "npf/cgnat/cgn_if.h" #include "npf/cgnat/cgn_map.h" #include "npf/cgnat/cgn_mbuf.h" #include "npf/cgnat/cgn_policy.h" +#include "npf/cgnat/cgn_public.h" #include "npf/cgnat/cgn_session.h" #include "npf/cgnat/cgn_source.h" +#include "npf/cgnat/cgn_test.h" -/* - * CGNAT Error Counters. - * - * Counts fall into three types: - * - * 1. Packets that did not match a cgnat policy - * 2. Packets unsuitable for translation - * 3. Packets that we tried to translate, but failed. - */ -rte_atomic64_t cgn_errors[CGN_DIR_SZ][CGN_ERRNO_SZ]; - -/* Global counters */ -rte_atomic64_t cgn_hairpinned_pkts; /* Hairpinned packets */ - -static inline void _cgn_error_inc(int error, int dir) -{ - if (error < 0) - error = -error; - if (error > CGN_ERRNO_LAST) - error = CGN_ERR_UNKWN; - rte_atomic64_inc(&cgn_errors[dir][error]); -} - -void cgn_error_inc(int error, int dir) -{ - _cgn_error_inc(error, dir); -} - enum cgnat_result { CGNAT_DROP, CGNAT_DROP_NO_MAP, /* Failed to get a mapping */ @@ -78,85 +53,93 @@ enum cgnat_result { CGNAT_REFLECT, }; +#include "npf/npf_if.h" +#include "npf/alg/alg_npf.h" + +static inline bool +ipv4_cgnat_out_bypass(struct ifnet *ifp, struct rte_mbuf *mbuf) +{ + /* Check bypass enable/disable option */ + if (likely(!cgn_snat_alg_bypass_gbl)) + return false; + + /* Is SNAT configured on interface? */ + if (!npf_snat_active(ifp)) + return false; + + /* Does pkt match an ALG session or tuple? */ + if (!npf_alg_bypass_cgnat(ifp, mbuf)) + return false; + + return true; +} + /* - * cgnat_try_initial. - * - * Currently this is only called with dir CGN_DIR_OUT, but that might not - * always be the case. + * cgnat_try_initial. Sessions are always created in an 'outbound' context. */ static struct cgn_session * -cgnat_try_initial(struct ifnet *ifp, struct cgn_packet *cpk, int dir, - int *error) +cgnat_try_initial(struct ifnet *ifp, struct cgn_packet *cpk, + struct rte_mbuf *mbuf, int *error) { - struct cgn_source *src = NULL; struct cgn_session *cse; struct cgn_policy *cp; int rc = 0; - uint32_t oaddr, taddr; - uint16_t tport; vrfid_t vrfid = cpk->cpk_vrfid; - if (dir == CGN_DIR_OUT) - oaddr = cpk->cpk_saddr; - else - oaddr = cpk->cpk_daddr; + /* Mapping info */ + struct cgn_map cmi = { + .cmi_reserved = false, + .cmi_proto = cpk->cpk_proto, + .cmi_oid = cpk->cpk_sid, + .cmi_oaddr = cpk->cpk_saddr, + .cmi_tid = 0, + .cmi_taddr = 0, + .cmi_src = NULL, + }; - /* - * Lookup source address in policy list on the interface - */ - cp = cgn_if_find_policy_by_addr(ifp, oaddr); + /* Find policy from the source address */ + cp = cgn_if_find_policy_by_addr(ifp, cmi.cmi_oaddr); if (!cp) { *error = -CGN_PCY_ENOENT; - return NULL; + goto error; + } + + /* Should SNAT-ALG pkts bypass CGNAT? */ + if (unlikely(ipv4_cgnat_out_bypass(ifp, mbuf))) { + *error = -CGN_PCY_BYPASS; + goto error; } /* Check if session table is full *before* getting a mapping. */ if (unlikely(cgn_session_table_full)) { *error = -CGN_S1_ENOSPC; - return NULL; + goto error; } /* Allocate public address and port */ - rc = cgn_map_get(cp, vrfid, cpk->cpk_proto, oaddr, - &taddr, &tport, &src); + rc = cgn_map_get(&cmi, cp, vrfid); if (rc) { *error = rc; - return NULL; + goto error; } /* Create a session. */ - cse = cgn_session_establish(cpk, dir, taddr, tport, error, src); - if (!cse) { - /* Release mapping */ - cgn_map_put(cp->cp_pool, vrfid, dir, cpk->cpk_proto, oaddr, - taddr, tport); - return NULL; - } - - return cse; -} + cse = cgn_session_establish(cpk, &cmi, error); + if (!cse) + goto error; -/* - * Create a mapping and session via control plane - */ -struct cgn_session * -cgn_session_map(struct ifnet *ifp, struct cgn_packet *cpk, int dir, - int *error) -{ - struct cgn_session *cse; + /* Check if we want to record sub-sessions */ + cgn_session_try_enable_sub_sess(cse, cp, cmi.cmi_oaddr); - cse = cgnat_try_initial(ifp, cpk, dir, error); - if (!cse || *error != 0) - return NULL; + return cse; - *error = cgn_session_activate(cse, cpk, dir); +error: + if (cmi.cmi_reserved) + /* Release mapping */ + cgn_map_put(&cmi, vrfid); - if (*error) { - cgn_session_destroy(cse, false); - return NULL; - } - return cse; + return NULL; } /* @@ -175,7 +158,7 @@ cgn_session_map(struct ifnet *ifp, struct cgn_packet *cpk, int dir, */ static ALWAYS_INLINE void cgn_translate_at(struct cgn_packet *cpk, struct cgn_session *cse, - int dir, void *n_ptr, bool embd, bool undo) + enum cgn_dir dir, void *n_ptr, bool embd, bool undo) { uint32_t taddr; uint16_t tport; @@ -257,7 +240,7 @@ cgn_translate_at(struct cgn_packet *cpk, struct cgn_session *cse, */ static void cgn_untranslate_at(struct cgn_packet *cpk, struct cgn_session *cse, - int dir, void *n_ptr) + enum cgn_dir dir, void *n_ptr) { cgn_translate_at(cpk, cse, dir, n_ptr, false, true); } @@ -268,7 +251,7 @@ cgn_untranslate_at(struct cgn_packet *cpk, struct cgn_session *cse, * Translate the outer IP header of an ICMP error message. */ static void -cgn_translate_l3_at(struct cgn_packet *cpk, int dir, void *n_ptr, +cgn_translate_l3_at(struct cgn_packet *cpk, enum cgn_dir dir, void *n_ptr, uint32_t new_addr) { uint32_t old_addr; @@ -319,13 +302,13 @@ struct rte_mbuf *cgn_copy_or_clone_and_undo(struct rte_mbuf *mbuf, if (!cse) return NULL; - int dir = did_cgnat_out ? CGN_DIR_OUT : CGN_DIR_IN; + enum cgn_dir dir = did_cgnat_out ? CGN_DIR_OUT : CGN_DIR_IN; /* Validate the session */ struct ifnet *cse_ifp = (struct ifnet *)(did_cgnat_out ? out_ifp : in_ifp); - if (cgn_session_get_ifindex(cse) != cse_ifp->if_index) + if (cgn_session_ifindex(cse) != cse_ifp->if_index) return NULL; /* Make a clone or copy, and set up to untranslate */ @@ -342,14 +325,14 @@ struct rte_mbuf *cgn_copy_or_clone_and_undo(struct rte_mbuf *mbuf, struct cgn_packet cpk; /* Inspect the packet. */ - error = cgn_cache_all(unnat, pktmbuf_l2_len(unnat), cse_ifp, dir, + error = cgn_cache_all(unnat, dp_pktmbuf_l2_len(unnat), cse_ifp, dir, &cpk, false); if (error) { rte_pktmbuf_free(unnat); return NULL; } - void *n_ptr = pktmbuf_mtol3(unnat, void *); + void *n_ptr = dp_pktmbuf_mtol3(unnat, void *); cgn_untranslate_at(&cpk, cse, dir, n_ptr); @@ -360,12 +343,12 @@ struct rte_mbuf *cgn_copy_or_clone_and_undo(struct rte_mbuf *mbuf, * ICMP error message. Look for a cgnat'd embedded packet, and translate if * found. * - * If an error is encounted then we just return a single error number, + * If an error is encountered then we just return a single error number, * '-CGN_BUF_ICMP' regardless of the actual error. */ static int ipv4_cgnat_icmp_err(struct cgn_packet *ocpk, struct ifnet *ifp, - struct rte_mbuf **mbufp, int dir) + struct rte_mbuf **mbufp, enum cgn_dir dir) { struct rte_mbuf *mbuf = *mbufp; struct cgn_session *cse; @@ -375,7 +358,7 @@ ipv4_cgnat_icmp_err(struct cgn_packet *ocpk, struct ifnet *ifp, int error = 0; /* Find the start of the packet embedded in the ICMP error. */ - offs = pktmbuf_l2_len(mbuf) + pktmbuf_l3_len(mbuf) + ICMP_MINLEN; + offs = dp_pktmbuf_l2_len(mbuf) + dp_pktmbuf_l3_len(mbuf) + ICMP_MINLEN; /* Inspect the embedded packet. */ error = cgn_cache_all(mbuf, offs, ifp, dir, &ecpk, true); @@ -392,7 +375,9 @@ ipv4_cgnat_icmp_err(struct cgn_packet *ocpk, struct ifnet *ifp, * Ensure both the outer and inner headers are both in the first mbuf * segment. */ - error = pktmbuf_prepare_for_header_change(mbufp, offs + ecpk.cpk_hlen); + error = pktmbuf_prepare_for_header_change(mbufp, offs + + ecpk.cpk_l3_len + + ecpk.cpk_l4_len); if (error) return -CGN_BUF_ICMP; @@ -452,7 +437,7 @@ ipv4_cgnat_icmp_err(struct cgn_packet *ocpk, struct ifnet *ifp, * With the embedded packet having now been translated, we adjust the * outer packet accordingly. */ - n_ptr = pktmbuf_mtol3(mbuf, void *); + n_ptr = dp_pktmbuf_mtol3(mbuf, void *); cgn_translate_l3_at(ocpk, dir, n_ptr, (dir == CGN_DIR_OUT) ? ecpk.cpk_daddr : ecpk.cpk_saddr); @@ -463,9 +448,9 @@ ipv4_cgnat_icmp_err(struct cgn_packet *ocpk, struct ifnet *ifp, if ((ecpk.cpk_info & CPK_ICMP_EMBD_SHORT) != 0) { struct icmp *icmp; - icmp = pktmbuf_mtol4(mbuf, struct icmp *); + icmp = dp_pktmbuf_mtol4(mbuf, struct icmp *); icmp->icmp_cksum = 0; - icmp->icmp_cksum = in4_cksum_mbuf(mbuf, NULL, icmp); + icmp->icmp_cksum = dp_in4_cksum_mbuf(mbuf, NULL, icmp); return 0; } @@ -486,7 +471,7 @@ ipv4_cgnat_icmp_err(struct cgn_packet *ocpk, struct ifnet *ifp, ~ecpk.cpk_saddr, ecpk.cpk_daddr); - ic = (struct icmp *)((char *)n_ptr + pktmbuf_l3_len(mbuf)); + ic = (struct icmp *)((char *)n_ptr + dp_pktmbuf_l3_len(mbuf)); ic->icmp_cksum = ip_fixup16_cksum(ic->icmp_cksum, 0, icmp_cksum_delta); @@ -500,13 +485,13 @@ ipv4_cgnat_icmp_err(struct cgn_packet *ocpk, struct ifnet *ifp, */ static enum cgnat_result ipv4_cgnat_common(struct cgn_packet *cpk, struct ifnet *ifp, - struct rte_mbuf **mbufp, int dir, int *errorp) + struct rte_mbuf **mbufp, enum cgn_dir dir, int *errorp) { enum cgnat_result result = CGNAT_ACCEPT; struct rte_mbuf *mbuf = NULL; struct cgn_session *cse; void *n_ptr = NULL; - bool new = false; + bool new_inactive_session = false; int error = 0; /* ICMP error message? */ @@ -520,7 +505,14 @@ ipv4_cgnat_common(struct cgn_packet *cpk, struct ifnet *ifp, } /* Look for existing session */ - cse = cgn_session_inspect(cpk, dir); + cse = cgn_session_inspect(cpk, dir, &error); + + /* + * One reason the inspect might fail is if max-dest-per-session is + * reached. + */ + if (unlikely(error < 0)) + goto error; if (unlikely(!cse)) { /* Only create sessions for outbound flows */ @@ -530,10 +522,10 @@ ipv4_cgnat_common(struct cgn_packet *cpk, struct ifnet *ifp, } /* Get policy and mapping. Create a session. */ - cse = cgnat_try_initial(ifp, cpk, dir, &error); + cse = cgnat_try_initial(ifp, cpk, *mbufp, &error); if (!cse) goto error; - new = true; + new_inactive_session = true; } /* We can jump back here for hairpinned packets */ @@ -544,8 +536,9 @@ ipv4_cgnat_common(struct cgn_packet *cpk, struct ifnet *ifp, * in the first segment, or if the mbuf is shared. */ error = pktmbuf_prepare_for_header_change(mbufp, - pktmbuf_l2_len(*mbufp) + - cpk->cpk_hlen); + dp_pktmbuf_l2_len(*mbufp) + + cpk->cpk_l3_len + + cpk->cpk_l4_len); if (unlikely(error)) { error = -CGN_BUF_ENOMEM; goto error; @@ -553,7 +546,7 @@ ipv4_cgnat_common(struct cgn_packet *cpk, struct ifnet *ifp, /* mbuf might have changed above, so dereference here */ mbuf = *mbufp; - n_ptr = pktmbuf_mtol3(mbuf, void *); + n_ptr = dp_pktmbuf_mtol3(mbuf, void *); /* Translate */ cgn_translate_at(cpk, cse, dir, n_ptr, false, false); @@ -566,7 +559,7 @@ ipv4_cgnat_common(struct cgn_packet *cpk, struct ifnet *ifp, pktmbuf_mdata_set(mbuf, pkt_flags); - if (new) { + if (new_inactive_session) { /* Activate new session */ error = cgn_session_activate(cse, cpk, dir); if (unlikely(error)) @@ -577,6 +570,14 @@ ipv4_cgnat_common(struct cgn_packet *cpk, struct ifnet *ifp, * 3. No memory for nested session */ goto error; + + /* + * Session is now activated and in hash tables, so clear + * new_inactive_session boolean. If an error occurs after + * this point then the session will be cleaned-up by the + * garbage collector instead of being destroyed here. + */ + new_inactive_session = false; } /* @@ -593,15 +594,19 @@ ipv4_cgnat_common(struct cgn_packet *cpk, struct ifnet *ifp, if (unlikely(cgn_hairpinning_gbl && dir == CGN_DIR_OUT)) { struct cgn_session *hp_cse; - hp_cse = cgn_session_lookup(cpk, CGN_DIR_IN); + /* Change pkt cache hash key to the dest addr and port */ + cpk->cpk_key.k_addr = cpk->cpk_daddr; + cpk->cpk_key.k_port = cpk->cpk_did; + + hp_cse = cgn_session_lookup(&cpk->cpk_key, CGN_DIR_IN); if (hp_cse != NULL) { cse = hp_cse; - error = cgn_cache_all(mbuf, pktmbuf_l2_len(mbuf), ifp, - dir, cpk, false); + error = cgn_cache_all(mbuf, dp_pktmbuf_l2_len(mbuf), + ifp, dir, cpk, false); if (error) goto error; - rte_atomic64_inc(&cgn_hairpinned_pkts); + cgn_rc_inc(CGN_DIR_OUT, CGN_HAIRPINNED); dir = CGN_DIR_IN; result = CGNAT_REFLECT; goto translate; @@ -624,7 +629,7 @@ ipv4_cgnat_common(struct cgn_packet *cpk, struct ifnet *ifp, * cleans up after itself if it encounters an error, so all we need to * do here is destroy a new session, if one was created. */ - if (new) + if (new_inactive_session) cgn_session_destroy(cse, false); switch (error) { @@ -635,6 +640,12 @@ ipv4_cgnat_common(struct cgn_packet *cpk, struct ifnet *ifp, */ result = CGNAT_ACCEPT; break; + case -CGN_PCY_BYPASS: + /* + * Bypass CGNAT for packets matching SNAT-ALG flows. + */ + result = CGNAT_ACCEPT; + break; case -CGN_BUF_PROTO: case -CGN_BUF_ICMP: result = CGNAT_DROP_NO_PROTO; @@ -651,29 +662,78 @@ ipv4_cgnat_common(struct cgn_packet *cpk, struct ifnet *ifp, return result; } +/* + * Unit-test wrapper around cgnat + */ +bool ipv4_cgnat_test(struct rte_mbuf **mbufp, struct ifnet *ifp, + enum cgn_dir dir, int *error) +{ + enum cgnat_result result; + struct rte_mbuf *mbuf = *mbufp; + struct cgn_packet cpk; + bool rv = true; + + /* Extract interesting fields from packet */ + *error = cgn_cache_all(mbuf, dp_pktmbuf_l2_len(mbuf), ifp, dir, + &cpk, false); + + if (likely(*error == 0)) { + result = ipv4_cgnat_common(&cpk, ifp, &mbuf, + dir, error); + + if (unlikely(mbuf != *mbufp)) + *mbufp = mbuf; + } else + /* Packet not suitable for translation */ + result = CGNAT_DROP; + + switch (result) { + case CGNAT_ACCEPT: + break; + + case CGNAT_DROP_NO_MAP: + case CGNAT_DROP_NO_PROTO: + /* fall through (No ICMP error sent for incoming traffic) */ + case CGNAT_DROP: + /* + * Allow packets that matched a firewall or nat session to + * bypass CGNAT drops + */ + if (pktmbuf_mdata_exists(mbuf, PKT_MDATA_SESSION)) { + *error = 0; + rv = true; + } else + rv = false; + break; + + case CGNAT_REFLECT: + break; + } + + if (unlikely(*error)) + cgn_rc_inc(CGN_DIR_IN, *error); + + return rv; +} + /* * Is the given address a CGNAT pool address? */ static bool cgn_is_pool_address(struct ifnet *ifp, uint32_t addr) { - struct cgn_intf *ci; + struct cds_list_head *policy_list; struct cgn_policy *cp; - /* Get cgnat interface structure. */ - ci = npf_if_get_cgn(ifp); - if (!ci) + /* Get cgnat policy list */ + policy_list = cgn_if_get_policy_list(ifp); + if (!policy_list) return false; - /* NAT pool addresses are in host-byte order */ - addr = ntohl(addr); - /* For each cgnat policy ... */ - cds_list_for_each_entry_rcu(cp, &ci->ci_policy_list, cp_list_node) { - if (!cp->cp_pool || !cp->cp_pool->np_ranges) - continue; + cds_list_for_each_entry_rcu(cp, policy_list, cp_list_node) { /* Is addr in one of this pools address ranges? */ - if (nat_pool_addr_range(cp->cp_pool, addr) >= 0) + if (nat_pool_is_pool_addr(cp->cp_pool, addr)) return true; } return false; @@ -683,7 +743,7 @@ static bool cgn_is_pool_address(struct ifnet *ifp, uint32_t addr) * cgnat in */ ALWAYS_INLINE unsigned int -ipv4_cgnat_in_process(struct pl_packet *pkt) +ipv4_cgnat_in_process(struct pl_packet *pkt, void *context __unused) { struct ifnet *ifp = pkt->in_ifp; enum cgnat_result result; @@ -693,7 +753,7 @@ ipv4_cgnat_in_process(struct pl_packet *pkt) uint rc = IPV4_CGNAT_IN_ACCEPT; /* Extract interesting fields from packet */ - error = cgn_cache_all(mbuf, pktmbuf_l2_len(mbuf), ifp, CGN_DIR_IN, + error = cgn_cache_all(mbuf, dp_pktmbuf_l2_len(mbuf), ifp, CGN_DIR_IN, &cpk, false); if (likely(error == 0)) { @@ -702,7 +762,7 @@ ipv4_cgnat_in_process(struct pl_packet *pkt) if (unlikely(mbuf != pkt->mbuf)) { pkt->mbuf = mbuf; - pkt->l3_hdr = pktmbuf_mtol3(mbuf, void *); + pkt->l3_hdr = dp_pktmbuf_mtol3(mbuf, void *); } } else /* Packet not suitable for translation */ @@ -731,8 +791,25 @@ ipv4_cgnat_in_process(struct pl_packet *pkt) * in any NAT pool used by CGNAT policies on this interface. */ if (!cgn_is_pool_address(ifp, cpk.cpk_daddr)) { + cgn_rc_inc(CGN_DIR_IN, CGN_POOL_ENOENT); rc = IPV4_CGNAT_IN_ACCEPT; error = 0; + + } else if ((cpk.cpk_info & CPK_ICMP_ECHO_REQ) != 0) { + /* + * If pkt is an ICMP echo req sent to a CGNAT pool + * address then send an echo reply to the sender, and + * drop the original pkt. + */ + if (icmp_echo_reply_out(pkt->in_ifp, pkt->mbuf, true)) { + /* + * Echo reply successfully sent. Set 'error' + * so that there are accounted for, and then + * drop the original packet. + */ + rc = IPV4_CGNAT_IN_DROP; + error = -CGN_ICMP_ECHOREQ; + } } break; @@ -741,7 +818,7 @@ ipv4_cgnat_in_process(struct pl_packet *pkt) } if (unlikely(error)) - _cgn_error_inc(error, CGN_DIR_IN); + cgn_rc_inc(CGN_DIR_IN, error); return rc; } @@ -749,18 +826,18 @@ ipv4_cgnat_in_process(struct pl_packet *pkt) /* * cgnat out */ -ALWAYS_INLINE unsigned int -ipv4_cgnat_out_process(struct pl_packet *pkt) +ALWAYS_INLINE unsigned int ipv4_cgnat_out_process(struct pl_packet *pkt, + void *context __unused) { struct ifnet *ifp = pkt->out_ifp; - enum cgnat_result result; struct rte_mbuf *mbuf = pkt->mbuf; + enum cgnat_result result; struct cgn_packet cpk; int error = 0; uint rc = IPV4_CGNAT_OUT_ACCEPT; /* Extract interesting fields from packet */ - error = cgn_cache_all(mbuf, pktmbuf_l2_len(mbuf), ifp, CGN_DIR_OUT, + error = cgn_cache_all(mbuf, dp_pktmbuf_l2_len(mbuf), ifp, CGN_DIR_OUT, &cpk, false); if (likely(error == 0)) { @@ -774,7 +851,7 @@ ipv4_cgnat_out_process(struct pl_packet *pkt) if (unlikely(mbuf != pkt->mbuf)) { pkt->mbuf = mbuf; - pkt->l3_hdr = pktmbuf_mtol3(mbuf, void *); + pkt->l3_hdr = dp_pktmbuf_mtol3(mbuf, void *); } } else /* Packet not suitable for translation */ @@ -815,7 +892,7 @@ ipv4_cgnat_out_process(struct pl_packet *pkt) } if (unlikely(error)) - _cgn_error_inc(error, CGN_DIR_OUT); + cgn_rc_inc(CGN_DIR_OUT, error); return rc; } diff --git a/src/pipeline/nodes/l3_v4_defrag.c b/src/pipeline/nodes/l3_v4_defrag.c index f7ab84d7..d79be1c3 100644 --- a/src/pipeline/nodes/l3_v4_defrag.c +++ b/src/pipeline/nodes/l3_v4_defrag.c @@ -1,7 +1,7 @@ /* * l3_v4_defrag.c * - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -18,7 +18,7 @@ #include "urcu.h" ALWAYS_INLINE unsigned int -ipv4_defrag_in_process(struct pl_packet *pkt) +ipv4_defrag_in_process(struct pl_packet *pkt, void *context __unused) { struct rte_mbuf *m = pkt->mbuf; /* Reassemble packets if required */ @@ -67,13 +67,13 @@ ipv4_defrag_out_internal(struct pl_packet *pkt) } ALWAYS_INLINE unsigned int -ipv4_defrag_out_process(struct pl_packet *pkt) +ipv4_defrag_out_process(struct pl_packet *pkt, void *context __unused) { return ipv4_defrag_out_internal(pkt); } ALWAYS_INLINE unsigned int -ipv4_defrag_out_spath_process(struct pl_packet *pkt) +ipv4_defrag_out_spath_process(struct pl_packet *pkt, void *context __unused) { return ipv4_defrag_out_internal(pkt); } @@ -109,7 +109,7 @@ PL_REGISTER_NODE(ipv4_defrag_out_spath_node) = { .handler = ipv4_defrag_out_spath_process, .num_next = IPV4_DEFRAG_OUT_SPATH_NUM, .next = { - [IPV4_DEFRAG_OUT_SPATH_ACCEPT] = "ipv4-fw-out", + [IPV4_DEFRAG_OUT_SPATH_ACCEPT] = "term-noop", [IPV4_DEFRAG_OUT_SPATH_FINISH] = "term-finish" } }; @@ -128,3 +128,10 @@ PL_REGISTER_FEATURE(ipv4_defrag_out_feat) = { .feature_point = "ipv4-out", .id = PL_L3_V4_OUT_FUSED_FEAT_DEFRAG, }; + +PL_REGISTER_FEATURE(ipv4_defrag_out_spath_feat) = { + .name = "vyatta:ipv4-defrag-out-spath", + .node_name = "ipv4-defrag-out-spath", + .feature_point = "ipv4-out-spath", + .id = PL_L3_V4_OUT_SPATH_FUSED_FEAT_DEFRAG, +}; diff --git a/src/pipeline/nodes/l3_v4_encap.c b/src/pipeline/nodes/l3_v4_encap.c index 8a6f87c5..aa2a1edd 100644 --- a/src/pipeline/nodes/l3_v4_encap.c +++ b/src/pipeline/nodes/l3_v4_encap.c @@ -1,7 +1,7 @@ /* * l3_v4_encap.c * - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -14,14 +14,16 @@ #include "compiler.h" #include "if_var.h" +#include "if_llatbl.h" #include "ip_funcs.h" +#include "ip_mcast.h" #include "pl_common.h" #include "pl_fused.h" #include "pl_node.h" #include "pl_nodes_common.h" #include "route.h" #include "route_flags.h" -#include "nh.h" +#include "nh_common.h" #include "arp.h" #include "snmp_mib.h" @@ -48,18 +50,26 @@ static ALWAYS_INLINE bool ipv4_encap_eth_from_nh4(struct rte_mbuf *mbuf, const struct next_hop *nh, in_addr_t addr) { - struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *); - struct ifnet *out_ifp = nh4_get_ifp(nh); /* Needed for VRRP */ + struct rte_ether_hdr *eth_hdr = + rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); + struct ifnet *out_ifp = dp_nh_get_ifp(nh); /* Needed for VRRP */ - ether_addr_copy(&out_ifp->eth_addr, ð_hdr->s_addr); + rte_ether_addr_copy(&out_ifp->eth_addr, ð_hdr->s_addr); /* If already resolved, use the link level encap */ - struct llentry *lle = nh4_get_lle(nh); + struct llentry *lle = nh_get_lle(nh); if (likely(lle != NULL)) { if (llentry_copy_mac(lle, ð_hdr->d_addr)) return true; } + /* Derive a multicast MAC address from the IP address */ + if (unlikely(nh->flags & RTF_MULTICAST)) { + mcast_dst_eth_addr_t eth_daddr = mcast_dst_eth_addr(addr); + rte_ether_addr_copy(ð_daddr.as_addr, ð_hdr->d_addr); + return true; + } + /* Not yet resolved, so try to do so */ if (likely(arpresolve_fast(out_ifp, mbuf, addr, ð_hdr->d_addr) == 0)) return true; @@ -103,11 +113,13 @@ ipv4_encap_features(struct pl_packet *pkt, enum pl_mode mode) static ALWAYS_INLINE unsigned int ipv4_encap_process_internal(struct pl_packet *pkt, enum pl_mode mode) { - if (!ipv4_encap_features(pkt, mode)) - return IPV4_ENCAP_DROPPED; + struct ifnet *in_ifp = pkt->in_ifp; + + /* Only run the encap features if this is a forwarded packet */ + if (in_ifp && !ipv4_encap_features(pkt, mode)) + return IPV4_ENCAP_FEAT_CONSUME; struct next_hop *nh = pkt->nxt.v4; - struct ifnet *in_ifp = pkt->in_ifp; struct ifnet *out_ifp = pkt->out_ifp; struct rte_mbuf *mbuf = pkt->mbuf; uint16_t l2_proto = pkt->l2_proto; @@ -116,7 +128,7 @@ ipv4_encap_process_internal(struct pl_packet *pkt, enum pl_mode mode) in_addr_t addr; if (nh->flags & RTF_GATEWAY) { - addr = nh->gateway; + addr = nh->gateway.address.ip_v4.s_addr; } else { struct iphdr *ip; @@ -136,46 +148,56 @@ ipv4_encap_process_internal(struct pl_packet *pkt, enum pl_mode mode) if (unlikely(out_ifp->if_type == IFT_TUNNEL_GRE)) { if (unlikely(!gre_tunnel_encap(in_ifp, out_ifp, &addr, mbuf, l2_proto))) - return IPV4_ENCAP_FAIL; + return IPV4_ENCAP_NEIGH_RES_CONSUME; return IPV4_ENCAP_L2_OUT; } /* Assume all other interface types use ethernet encap. */ if (!ipv4_encap_eth_from_nh4(mbuf, nh, addr)) - return IPV4_ENCAP_FAIL; + return IPV4_ENCAP_NEIGH_RES_CONSUME; return IPV4_ENCAP_L2_OUT; } ALWAYS_INLINE unsigned int -ipv4_encap_process_common(struct pl_packet *pkt, enum pl_mode mode) +ipv4_encap_process_common(struct pl_packet *pkt, void *context __unused, + enum pl_mode mode) { struct ifnet *out_ifp = pkt->out_ifp; int rc = ipv4_encap_process_internal(pkt, mode); - if (rc == IPV4_ENCAP_L2_OUT) - IPSTAT_INC_IFP(out_ifp, IPSTATS_MIB_OUTFORWDATAGRAMS); + /* + * Either way the packet has been handed to "lower layers" to + * be transmitted. + */ + if (rc == IPV4_ENCAP_L2_OUT || rc == IPV4_ENCAP_NEIGH_RES_CONSUME) { + if (pkt->nxt.v4->flags & RTF_MULTICAST) + IPSTAT_INC_IFP(out_ifp, IPSTATS_MIB_OUTMCASTPKTS); + else + IPSTAT_INC_IFP(out_ifp, IPSTATS_MIB_OUTFORWDATAGRAMS); + } return rc; } ALWAYS_INLINE unsigned int -ipv4_encap_only_process_common(struct pl_packet *pkt, enum pl_mode mode) +ipv4_encap_only_process_common(struct pl_packet *pkt, void *context __unused, + enum pl_mode mode) { return ipv4_encap_process_internal(pkt, mode); } ALWAYS_INLINE unsigned int -ipv4_encap_process(struct pl_packet *p) +ipv4_encap_process(struct pl_packet *p, void *context) { - return ipv4_encap_process_common(p, PL_MODE_REGULAR); + return ipv4_encap_process_common(p, context, PL_MODE_REGULAR); } ALWAYS_INLINE unsigned int -ipv4_encap_only_process(struct pl_packet *p) +ipv4_encap_only_process(struct pl_packet *p, void *context) { - return ipv4_encap_only_process_common(p, PL_MODE_REGULAR); + return ipv4_encap_only_process_common(p, context, PL_MODE_REGULAR); } static int @@ -189,27 +211,46 @@ ipv4_encap_feat_change(struct pl_node *node, action); } +static int +ipv4_encap_feat_change_all(struct pl_feature_registration *feat, + enum pl_node_feat_action action) +{ + return if_node_instance_feat_change_all(feat, action, + ipv4_encap_feat_change); +} + ALWAYS_INLINE bool ipv4_encap_feat_iterate(struct pl_node *node, bool first, - unsigned int *feature_id, void **context) + unsigned int *feature_id, void **context, + void **storage_ctx) { struct ifnet *ifp = ipv4_encap_node_to_ifp(node); + bool ret; + + ret = pl_node_feat_iterate_u8(&ifp->ip_encap_features, first, + feature_id, context); + if (ret) + *storage_ctx = if_node_instance_get_storage_internal( + ifp, + PL_FEATURE_POINT_IPV4_ENCAP_ID, + *feature_id); - return pl_node_feat_iterate_u8(&ifp->ip_encap_features, first, - feature_id, context); + return ret; } ALWAYS_INLINE bool ipv4_encap_only_feat_iterate(struct pl_node *node, bool first, - unsigned int *feature_id, void **context) + unsigned int *feature_id, void **context, + void **storage_ctx) { - return ipv4_encap_feat_iterate(node, first, feature_id, context); + return ipv4_encap_feat_iterate(node, first, feature_id, context, + storage_ctx); } static struct pl_node * ipv4_encap_node_lookup(const char *name) { - struct ifnet *ifp = ifnet_byifname(name); + struct ifnet *ifp = dp_ifnet_byifname(name); return ifp ? ifp_to_ipv4_encap_node(ifp) : NULL; } @@ -219,13 +260,18 @@ PL_REGISTER_NODE(ipv4_encap_node) = { .type = PL_PROC, .handler = ipv4_encap_process, .feat_change = ipv4_encap_feat_change, + .feat_change_all = ipv4_encap_feat_change_all, .feat_iterate = ipv4_encap_feat_iterate, .lookup_by_name = ipv4_encap_node_lookup, + .feat_reg_context = if_node_instance_register_storage, + .feat_unreg_context = if_node_instance_unregister_storage, + .feat_get_context = if_node_instance_get_storage, + .feat_setup_cleanup_cb = if_node_instance_set_cleanup_cb, .num_next = IPV4_ENCAP_NUM, .next = { [IPV4_ENCAP_L2_OUT] = "l2-out", - [IPV4_ENCAP_DROPPED] = "term-finish", - [IPV4_ENCAP_FAIL] = "term-finish", + [IPV4_ENCAP_FEAT_CONSUME] = "term-finish", + [IPV4_ENCAP_NEIGH_RES_CONSUME] = "term-finish", } }; @@ -238,9 +284,22 @@ PL_REGISTER_NODE(ipv4_encap_only_node) = { .num_next = IPV4_ENCAP_ONLY_NUM, .next = { [IPV4_ENCAP_ONLY_L2_OUT] = "term-noop", - [IPV4_ENCAP_ONLY_DROPPED] = "term-finish", - [IPV4_ENCAP_ONLY_FAIL] = "term-finish", + [IPV4_ENCAP_ONLY_FEAT_CONSUME] = "term-finish", + [IPV4_ENCAP_ONLY_NEIGH_RES_CONSUME] = "term-finish", } }; struct pl_node_registration *const ipv4_encap_node_ptr = &ipv4_encap_node; + +/* + * show features ipv4_encap [interface ] + */ +static int cmd_pl_show_feat_ipv4_encap(struct pl_command *cmd) +{ + return if_node_instance_feat_print(cmd, ipv4_encap_node_ptr); +} + +PL_REGISTER_OPCMD(pl_show_feat_ipv4_encap) = { + .cmd = "show features ipv4_encap", + .handler = cmd_pl_show_feat_ipv4_encap, +}; diff --git a/src/pipeline/nodes/l3_v4_gre.c b/src/pipeline/nodes/l3_v4_gre.c new file mode 100644 index 00000000..9a4804aa --- /dev/null +++ b/src/pipeline/nodes/l3_v4_gre.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#include "vrf_internal.h" +#include "if/gre.h" +#include "ip_funcs.h" + +#include "../pl_node.h" +#include "../pl_fused.h" + +ALWAYS_INLINE unsigned int +ipv4_gre_in_process(struct pl_packet *pkt, void *context __unused) +{ + struct rte_mbuf *m = pkt->mbuf; + struct iphdr *ip = iphdr(m); + int rc; + + rc = ip_gre_tunnel_in(&m, ip); + if (likely(rc == 0)) + return IPV4_GRE_CONSUME; + if (rc < 0) + return IPV4_GRE_DROP; + + pkt->mbuf = m; + return IPV4_GRE_ACCEPT; +} + +/* GRE decap feature */ +PL_REGISTER_NODE(ipv4_gre_in_node) = { + .name = "vyatta:ipv4-gre-in", + .type = PL_PROC, + .handler = ipv4_gre_in_process, + .num_next = IPV4_GRE_NUM, + .next = { + [IPV4_GRE_ACCEPT] = "term-noop", + [IPV4_GRE_DROP] = "term-drop", + [IPV4_GRE_CONSUME] = "term-finish", + } +}; + +PL_REGISTER_FEATURE(ipv4_gre_in_feat) = { + .name = "vyatta:ipv4-gre-in", + .node_name = "ipv4-gre-in", + .feature_point = "ipv4-l4", + .always_on = true, + .id = PL_L3_V4_L4_FUSED_FEAT_GRE_IN, + .feat_type = IPPROTO_GRE, +}; diff --git a/src/pipeline/nodes/l3_v4_ipsec.c b/src/pipeline/nodes/l3_v4_ipsec.c index 27ee5044..ffe19b8f 100644 --- a/src/pipeline/nodes/l3_v4_ipsec.c +++ b/src/pipeline/nodes/l3_v4_ipsec.c @@ -2,7 +2,7 @@ * l3_v4_ipsec.c * * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -14,24 +14,26 @@ #include #include "compiler.h" +#include "crypto/crypto.h" #include "crypto/crypto_forward.h" #include "ip_funcs.h" -#include "nh.h" -#include "pktmbuf.h" +#include "nh_common.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" ALWAYS_INLINE unsigned int -ipv4_ipsec_out_process(struct pl_packet *pkt) +ipv4_ipsec_out_process(struct pl_packet *pkt, void *context __unused) { struct ifnet *ifp = pkt->in_ifp; - union next_hop_v4_or_v6_ptr nh = {NULL}; + struct next_hop *nh = NULL; /* Returns true if packet was consumed by IPsec */ struct rte_mbuf *m = pkt->mbuf; if (unlikely(crypto_policy_check_outbound(ifp, &m, pkt->tblid, - htons(ETHER_TYPE_IPv4), &nh))) - return IPV4_IPSEC_CONSUME; + htons(RTE_ETHER_TYPE_IPV4), + &nh))) + return IPV4_IPSEC_OUT_CONSUME; /* * If the crypto code returned a next hop, then the policy @@ -39,14 +41,33 @@ ipv4_ipsec_out_process(struct pl_packet *pkt) * the next hop is pointing at. The packet will then be put back * in the crypto path. */ - if (unlikely(nh.v4 != NULL)) - pkt->nxt.v4 = nh.v4; + if (unlikely(nh != NULL)) + pkt->nxt.v4 = nh; if (unlikely(m != pkt->mbuf)) { pkt->mbuf = m; pkt->l3_hdr = iphdr(m); } - return IPV4_IPSEC_ACCEPT; + return IPV4_IPSEC_OUT_ACCEPT; +} + +ALWAYS_INLINE unsigned int +ipv4_ipsec_in_process(struct pl_packet *pkt, void *context __unused) +{ + struct rte_mbuf *m = pkt->mbuf; + struct iphdr *ip = iphdr(m); + struct ifnet *ifp = pkt->in_ifp; + int rc, spi; + + spi = crypto_retrieve_spi((unsigned char *)ip + + dp_pktmbuf_l3_len(m)); + rc = crypto_enqueue_inbound_v4(m, ip, ifp, spi); + if (likely(rc == 0)) + return IPV4_IPSEC_IN_CONSUME; + if (rc < 0) + return IPV4_IPSEC_IN_DROP; + + return IPV4_IPSEC_IN_ACCEPT; } /* Crypto encryption feature */ @@ -54,10 +75,10 @@ PL_REGISTER_NODE(ipv4_ipsec_out_node) = { .name = "vyatta:ipv4-ipsec-out", .type = PL_PROC, .handler = ipv4_ipsec_out_process, - .num_next = IPV4_IPSEC_NUM, + .num_next = IPV4_IPSEC_OUT_NUM, .next = { - [IPV4_IPSEC_ACCEPT] = "term-noop", - [IPV4_IPSEC_CONSUME] = "term-finish", + [IPV4_IPSEC_OUT_ACCEPT] = "term-noop", + [IPV4_IPSEC_OUT_CONSUME] = "term-finish", } }; @@ -67,3 +88,25 @@ PL_REGISTER_FEATURE(ipv4_ipsec_out_feat) = { .feature_point = "ipv4-route-lookup", .id = PL_L3_V4_ROUTE_LOOKUP_FUSED_FEAT_IPSEC, }; + +/* Crypto encryption feature */ +PL_REGISTER_NODE(ipv4_ipsec_in_node) = { + .name = "vyatta:ipv4-ipsec-in", + .type = PL_PROC, + .handler = ipv4_ipsec_in_process, + .num_next = IPV4_IPSEC_IN_NUM, + .next = { + [IPV4_IPSEC_IN_ACCEPT] = "term-noop", + [IPV4_IPSEC_IN_CONSUME] = "term-finish", + [IPV4_IPSEC_IN_DROP] = "term-drop", + } +}; + +PL_REGISTER_FEATURE(ipv4_ipsec_in_feat) = { + .name = "vyatta:ipv4-ipsec-in", + .node_name = "ipv4-ipsec-in", + .feature_point = "ipv4-l4", + .always_on = true, + .id = PL_L3_V4_L4_FUSED_FEAT_IPSEC_IN, + .feat_type = IPPROTO_ESP, +}; diff --git a/src/pipeline/nodes/l3_v4_l2tpv3.c b/src/pipeline/nodes/l3_v4_l2tpv3.c new file mode 100644 index 00000000..0c6641bf --- /dev/null +++ b/src/pipeline/nodes/l3_v4_l2tpv3.c @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#include "compiler.h" +#include "vrf_internal.h" +#include "if/gre.h" +#include "ip_funcs.h" +#include "l2tp/l2tpeth.h" + +#include "../pl_node.h" +#include "../pl_fused.h" + +ALWAYS_INLINE unsigned int +ipv4_l2tpv3_in_process(struct pl_packet *pkt, void *context __unused) +{ + struct rte_mbuf *m = pkt->mbuf; + struct iphdr *ip = iphdr(m); + int rc; + + rc = l2tp_ipv4_recv_encap(m, ip); + if (likely(rc == 0)) + return IPV4_L2TPV3_CONSUME; + if (rc < 0) + return IPV4_L2TPV3_DROP; + + return IPV4_L2TPV3_ACCEPT; +} + +/* GRE decap feature */ +PL_REGISTER_NODE(ipv4_l2tpv3_in_node) = { + .name = "vyatta:ipv4-l2tpv3-in", + .type = PL_PROC, + .handler = ipv4_l2tpv3_in_process, + .num_next = IPV4_L2TPV3_NUM, + .next = { + [IPV4_L2TPV3_ACCEPT] = "term-noop", + [IPV4_L2TPV3_DROP] = "term-drop", + [IPV4_L2TPV3_CONSUME] = "term-finish", + } +}; + +PL_REGISTER_FEATURE(ipv4_l2tpv3_in_feat) = { + .name = "vyatta:ipv4-l2tpv3-in", + .node_name = "ipv4-l2tpv3-in", + .feature_point = "ipv4-l4", + .always_on = true, + .id = PL_L3_V4_L4_FUSED_FEAT_L2TPV3_IN, + .feat_type = IPPROTO_L2TPV3, +}; diff --git a/src/pipeline/nodes/l3_v4_l4.c b/src/pipeline/nodes/l3_v4_l4.c index a4f687d3..597ca724 100644 --- a/src/pipeline/nodes/l3_v4_l4.c +++ b/src/pipeline/nodes/l3_v4_l4.c @@ -2,65 +2,147 @@ * l3_v4_l4.c * * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ #include +#include +#include #include "compiler.h" +#include "crypto/crypto_forward.h" +#include "crypto/crypto.h" #include "ip_funcs.h" +#include "l2tp/l2tpeth.h" #include "pl_common.h" #include "pl_fused.h" +#include "pl_node.h" +#include "pl_nodes_common.h" +#include "vrf_internal.h" struct pl_node; -ALWAYS_INLINE unsigned int -ipv4_l4_process_common(struct pl_packet *pkt, enum pl_mode mode __unused) +/* Size of the feat hash table */ +#define L4_FEAT_HASH_MIN 4 +#define L4_FEAT_HASH_MAX 32 + +static struct cds_lfht *l3_v4_l4_feat_ht; + +static inline int +ipv4_l4_feat_match(struct cds_lfht_node *node, const void *key) { - int rc; - struct rte_mbuf *m = pkt->mbuf; - struct ifnet *ifp = pkt->in_ifp; + const uint32_t *feat_type = key; + const struct pl_feature_registration *feat; - rc = l4_input(&m, ifp); - if (rc == 0) - return IPV4_L4_CONSUME; - else if (rc > 0) { - pkt->mbuf = m; - return IPV4_L4_ACCEPT; - } else - return IPV4_L4_DROP; + feat = caa_container_of(node, const struct pl_feature_registration, + feat_node); + + if (feat && feat->feat_type == *feat_type) + return 1; + + return 0; } -ALWAYS_INLINE unsigned int -ipv4_l4_process(struct pl_packet *pkt) +ALWAYS_INLINE int +ipv4_l4_find_feat_id_by_type(uint32_t feat_type) +{ + struct cds_lfht_iter iter; + struct cds_lfht_node *ret_node; + const struct pl_feature_registration *feat; + + if (!l3_v4_l4_feat_ht) + return 0; + + cds_lfht_lookup(l3_v4_l4_feat_ht, feat_type, + ipv4_l4_feat_match, &feat_type, + &iter); + + ret_node = cds_lfht_iter_get_node(&iter); + if (ret_node) { + feat = caa_container_of(ret_node, + struct pl_feature_registration, + feat_node); + return feat->id; + } + return 0; +} + +static ALWAYS_INLINE int +ipv4_l4_feat_add_type(struct pl_node_registration *node __unused, + struct pl_feature_registration *feat, + uint32_t feat_type) { - return ipv4_l4_process_common(pkt, PL_MODE_REGULAR); + struct cds_lfht_node *ret_node; + + if (!l3_v4_l4_feat_ht) { + l3_v4_l4_feat_ht = cds_lfht_new(L4_FEAT_HASH_MIN, + L4_FEAT_HASH_MIN, + L4_FEAT_HASH_MAX, + CDS_LFHT_AUTO_RESIZE, + NULL); + if (l3_v4_l4_feat_ht == NULL) + rte_panic("Can't allocate ft node hash\n"); + } + ret_node = cds_lfht_add_unique(l3_v4_l4_feat_ht, feat_type, + ipv4_l4_feat_match, &feat_type, + &feat->feat_node); + return (ret_node != &feat->feat_node) ? EEXIST : 0; } -static int -ipv4_l4_feat_change(struct pl_node *node __unused, - struct pl_feature_registration *feat __unused, - enum pl_node_feat_action action __unused) +static ALWAYS_INLINE bool +ipv4_l4_pre_process(struct pl_packet *pkt, struct ifnet *ifp) { - /* - * TODO - Will be added as hash table where only the relevant feature - * will be called based on the registered proto - */ - return 0; + struct rte_mbuf *m = pkt->mbuf; + + if (crypto_policy_check_inbound_terminating(ifp, &m, + htons(RTE_ETHER_TYPE_IPV4))) + return 0; + + return true; } -ALWAYS_INLINE bool -ipv4_l4_feat_iterate(struct pl_node *node __unused, bool first __unused, - unsigned int *feature_id __unused, void **context __unused) +ALWAYS_INLINE unsigned int +ipv4_l4_process_common(struct pl_packet *pkt, void *context __unused, + enum pl_mode mode) +{ + struct rte_mbuf *m = pkt->mbuf; + struct iphdr *ip = iphdr(m); + struct ifnet *ifp = pkt->in_ifp; + uint32_t feat_type = ip->protocol; + + if (!ipv4_l4_pre_process(pkt, ifp)) + return IPV4_L4_CONSUME; + + switch (mode) { + case PL_MODE_FUSED: + if (!pipeline_fused_ipv4_l4_features( + pkt, + ipv4_l4_find_feat_id_by_type_fused(feat_type))) + return IPV4_L4_CONSUME; + break; + case PL_MODE_FUSED_NO_DYN_FEATS: + if (!pipeline_fused_ipv4_l4_no_dyn_features( + pkt, + ipv4_l4_find_feat_id_by_type_fused_no_dyn_features(feat_type))) + return IPV4_L4_CONSUME; + break; + case PL_MODE_REGULAR: + if (!pl_node_invoke_feature_by_type( + ipv4_l4_node_ptr, + feat_type, pkt)) + return IPV4_L4_CONSUME; + break; + } + return IPV4_L4_ACCEPT; +} + +ALWAYS_INLINE unsigned int +ipv4_l4_process(struct pl_packet *pkt, void *context) { - /* - * TODO - Will be added as hash table where only the relevant feature - * will be called based on the registered proto - */ - return false; + return ipv4_l4_process_common(pkt, context, PL_MODE_REGULAR); } /* Register Node */ @@ -68,12 +150,44 @@ PL_REGISTER_NODE(ipv4_l4_node) = { .name = "vyatta:ipv4-l4", .type = PL_PROC, .handler = ipv4_l4_process, - .feat_change = ipv4_l4_feat_change, - .feat_iterate = ipv4_l4_feat_iterate, + .feat_type_insert = ipv4_l4_feat_add_type, + .feat_type_find = ipv4_l4_find_feat_id_by_type, .num_next = IPV4_L4_NUM, .next = { [IPV4_L4_ACCEPT] = "ipv4-local", - [IPV4_L4_DROP] = "term-drop", + [IPV4_L4_DROP] = "ipv4-drop", [IPV4_L4_CONSUME] = "term-finish", } }; + +struct pl_node_registration *const ipv4_l4_node_ptr = &ipv4_l4_node; + +/* + * show features ipv4_l4 + */ +static int cmd_pl_show_feat_ipv4_l4(struct pl_command *cmd) +{ + json_writer_t *wr; + + wr = jsonw_new(cmd->fp); + if (!wr) + return 0; + + jsonw_name(wr, "features"); + jsonw_start_object(wr); + + jsonw_name(wr, "global"); + jsonw_start_array(wr); + pl_node_iter_features(ipv4_l4_node_ptr, NULL, + pl_print_feats, wr); + jsonw_end_array(wr); + + jsonw_end_object(wr); + jsonw_destroy(&wr); + return 0; +} + +PL_REGISTER_OPCMD(pl_show_feat_ipv4_l4) = { + .cmd = "show features ipv4_l4", + .handler = cmd_pl_show_feat_ipv4_l4, +}; diff --git a/src/pipeline/nodes/l3_v4_no_address.c b/src/pipeline/nodes/l3_v4_no_address.c index 144316b2..02e289a2 100644 --- a/src/pipeline/nodes/l3_v4_no_address.c +++ b/src/pipeline/nodes/l3_v4_no_address.c @@ -1,7 +1,7 @@ /* * IPv4 no address feature * - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -12,16 +12,17 @@ #include "compiler.h" #include "if_var.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" #include "pl_node.h" #include "pl_nodes_common.h" -#include "vrf.h" +#include "vrf_internal.h" ALWAYS_INLINE unsigned int -ipv4_in_no_address_process(struct pl_packet *pkt __unused) +ipv4_in_no_address_process(struct pl_packet *pkt __unused, + void *context __unused) { /* * Special case of DHCP client, RFC2131 semantics diff --git a/src/pipeline/nodes/l3_v4_no_forwarding.c b/src/pipeline/nodes/l3_v4_no_forwarding.c index d8458ad5..6c40e7e3 100644 --- a/src/pipeline/nodes/l3_v4_no_forwarding.c +++ b/src/pipeline/nodes/l3_v4_no_forwarding.c @@ -1,7 +1,7 @@ /* * IPv4 no forwarding feature * - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ diff --git a/src/pipeline/nodes/l3_v4_out.c b/src/pipeline/nodes/l3_v4_out.c index 72e68fc1..dabe98c6 100644 --- a/src/pipeline/nodes/l3_v4_out.c +++ b/src/pipeline/nodes/l3_v4_out.c @@ -1,7 +1,7 @@ /* * l3_v4_out.c * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -96,7 +96,8 @@ ipv4_out_features(struct pl_packet *pkt, enum pl_mode mode) } ALWAYS_INLINE unsigned int -ipv4_out_process_common(struct pl_packet *pkt, enum pl_mode mode) +ipv4_out_process_common(struct pl_packet *pkt, void *context __unused, + enum pl_mode mode) { if (!ipv4_out_features(pkt, mode)) return IPV4_OUT_FINISH; @@ -127,18 +128,18 @@ ipv4_out_process_common(struct pl_packet *pkt, enum pl_mode mode) if (likely(!too_big)) { pkt->l2_proto = ETH_P_IP; return IPV4_OUT_ENCAP; - } else { - struct ipv4_out_frag_ctx ctx = {nxt, in_ifp, pkt->l2_pkt_type}; - ip_fragment(out_ifp, pkt->mbuf, &ctx, ipv4_out_frag); } + struct ipv4_out_frag_ctx ctx = {nxt, in_ifp, pkt->l2_pkt_type}; + ip_fragment(out_ifp, pkt->mbuf, &ctx, ipv4_out_frag); + return IPV4_OUT_FINISH; } ALWAYS_INLINE unsigned int -ipv4_out_process(struct pl_packet *p) +ipv4_out_process(struct pl_packet *p, void *context) { - return ipv4_out_process_common(p, PL_MODE_REGULAR); + return ipv4_out_process_common(p, context, PL_MODE_REGULAR); } static int @@ -152,20 +153,38 @@ ipv4_out_feat_change(struct pl_node *node, action); } +static int +ipv4_out_feat_change_all(struct pl_feature_registration *feat, + enum pl_node_feat_action action) +{ + return if_node_instance_feat_change_all(feat, action, + ipv4_out_feat_change); +} + ALWAYS_INLINE bool ipv4_out_feat_iterate(struct pl_node *node, bool first, - unsigned int *feature_id, void **context) + unsigned int *feature_id, void **context, + void **storage_ctx) { struct ifnet *ifp = ipv4_out_node_to_ifp(node); + bool ret; + + + ret = pl_node_feat_iterate_u16(&ifp->ip_out_features, first, + feature_id, context); + if (ret) + *storage_ctx = if_node_instance_get_storage_internal( + ifp, + PL_FEATURE_POINT_IPV4_OUT_ID, + *feature_id); - return pl_node_feat_iterate_u16(&ifp->ip_out_features, first, - feature_id, context); + return ret; } static struct pl_node * ipv4_out_node_lookup(const char *name) { - struct ifnet *ifp = ifnet_byifname(name); + struct ifnet *ifp = dp_ifnet_byifname(name); return ifp ? ifp_to_ipv4_out_node(ifp) : NULL; } @@ -175,8 +194,13 @@ PL_REGISTER_NODE(ipv4_out_node) = { .type = PL_PROC, .handler = ipv4_out_process, .feat_change = ipv4_out_feat_change, + .feat_change_all = ipv4_out_feat_change_all, .feat_iterate = ipv4_out_feat_iterate, .lookup_by_name = ipv4_out_node_lookup, + .feat_reg_context = if_node_instance_register_storage, + .feat_unreg_context = if_node_instance_unregister_storage, + .feat_get_context = if_node_instance_get_storage, + .feat_setup_cleanup_cb = if_node_instance_set_cleanup_cb, .num_next = IPV4_OUT_NUM, .next = { [IPV4_OUT_ENCAP] = "ipv4-encap", @@ -186,3 +210,16 @@ PL_REGISTER_NODE(ipv4_out_node) = { }; struct pl_node_registration *const ipv4_out_node_ptr = &ipv4_out_node; + +/* + * show features ipv4_out [interface ] + */ +static int cmd_pl_show_feat_ipv4_out(struct pl_command *cmd) +{ + return if_node_instance_feat_print(cmd, ipv4_out_node_ptr); +} + +PL_REGISTER_OPCMD(pl_show_feat_ipv4_out) = { + .cmd = "show features ipv4_out", + .handler = cmd_pl_show_feat_ipv4_out, +}; diff --git a/src/pipeline/nodes/l3_v4_out_spath.c b/src/pipeline/nodes/l3_v4_out_spath.c new file mode 100644 index 00000000..e0162ff6 --- /dev/null +++ b/src/pipeline/nodes/l3_v4_out_spath.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * SPDX-License-Identifier: LGPL-2.1-only + */ +#include +#include +#include +#include +#include +#include +#include + +#include "if_var.h" +#include "pktmbuf_internal.h" +#include "pl_common.h" +#include "pl_fused.h" +#include "pl_node.h" +#include "pl_nodes_common.h" + +struct pl_node; + +static inline struct pl_node *ifp_to_ipv4_out_spath_node(struct ifnet *ifp) +{ + /* our imaginary node */ + return (struct pl_node *)ifp; +} + +static inline struct ifnet *ipv4_out_spath_node_to_ifp(struct pl_node *node) +{ + /* the node is a fiction of our imagination */ + return (struct ifnet *)node; +} + +ALWAYS_INLINE unsigned int +ipv4_out_spath_process_common(struct pl_packet *pkt, void *context __unused, + enum pl_mode mode) +{ + struct ifnet *out_ifp = pkt->out_ifp; + + switch (mode) { + case PL_MODE_FUSED: + if (!pipeline_fused_ipv4_out_spath_features( + pkt, ifp_to_ipv4_out_spath_node(out_ifp))) + return IPV4_OUT_SPATH_FINISH; + break; + case PL_MODE_FUSED_NO_DYN_FEATS: + if (!pipeline_fused_ipv4_out_spath_no_dyn_features( + pkt, ifp_to_ipv4_out_spath_node(out_ifp))) + return IPV4_OUT_SPATH_FINISH; + break; + case PL_MODE_REGULAR: + if (!pl_node_invoke_enabled_features( + ipv4_out_spath_node_ptr, + ifp_to_ipv4_out_spath_node(out_ifp), + pkt)) + return IPV4_OUT_SPATH_FINISH; + break; + } + return IPV4_OUT_SPATH_ACCEPT; +} + +ALWAYS_INLINE unsigned int +ipv4_out_spath_process(struct pl_packet *p, void *context) +{ + return ipv4_out_spath_process_common(p, context, PL_MODE_REGULAR); +} + +static int +ipv4_out_spath_feat_change(struct pl_node *node, + struct pl_feature_registration *feat, + enum pl_node_feat_action action) +{ + struct ifnet *ifp = ipv4_out_spath_node_to_ifp(node); + + return pl_node_feat_change_u16(&ifp->ip_out_spath_features, feat, + action); +} + +static int +ipv4_out_spath_feat_change_all(struct pl_feature_registration *feat, + enum pl_node_feat_action action) +{ + return if_node_instance_feat_change_all(feat, action, + ipv4_out_spath_feat_change); +} + +ALWAYS_INLINE bool +ipv4_out_spath_feat_iterate(struct pl_node *node, bool first, + unsigned int *feature_id, void **context, + void **storage_ctx __unused) +{ + struct ifnet *ifp = ipv4_out_spath_node_to_ifp(node); + bool ret; + + ret = pl_node_feat_iterate_u16(&ifp->ip_out_spath_features, first, + feature_id, context); + + return ret; +} + +static struct pl_node * +ipv4_out_spath_node_lookup(const char *name) +{ + struct ifnet *ifp = dp_ifnet_byifname(name); + return ifp ? ifp_to_ipv4_out_spath_node(ifp) : NULL; +} + +/* Register Node */ +PL_REGISTER_NODE(ipv4_out_spath_node) = { + .name = "vyatta:ipv4-out-spath", + .type = PL_PROC, + .handler = ipv4_out_spath_process, + .feat_change = ipv4_out_spath_feat_change, + .feat_change_all = ipv4_out_spath_feat_change_all, + .feat_iterate = ipv4_out_spath_feat_iterate, + .lookup_by_name = ipv4_out_spath_node_lookup, + .num_next = IPV4_OUT_SPATH_NUM, + .next = { + [IPV4_OUT_SPATH_ACCEPT] = "term-noop", + [IPV4_OUT_SPATH_FINISH] = "term-finish", + } +}; + +struct pl_node_registration *const ipv4_out_spath_node_ptr = + &ipv4_out_spath_node; + +/* + * show features ipv4_out_spath [interface ] + */ +static int cmd_pl_show_feat_ipv4_out_spath(struct pl_command *cmd) +{ + return if_node_instance_feat_print(cmd, ipv4_out_spath_node_ptr); +} + +PL_REGISTER_OPCMD(pl_show_feat_ipv4_out_spath) = { + .cmd = "show features ipv4_out_spath", + .handler = cmd_pl_show_feat_ipv4_out_spath, +}; diff --git a/src/pipeline/nodes/l3_v4_post_route_lookup.c b/src/pipeline/nodes/l3_v4_post_route_lookup.c index 55cab82c..a5fb97a1 100644 --- a/src/pipeline/nodes/l3_v4_post_route_lookup.c +++ b/src/pipeline/nodes/l3_v4_post_route_lookup.c @@ -2,7 +2,7 @@ * l3_v4_post_route_lookup.c * * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -17,24 +17,25 @@ #include #include "compiler.h" +#include "if/macvlan.h" #include "if_var.h" #include "ip_funcs.h" #include "ip_icmp.h" #include "ip_ttl.h" -#include "macvlan.h" #include "mpls/mpls.h" #include "mpls/mpls_forward.h" -#include "nh.h" -#include "pktmbuf.h" +#include "nh_common.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" #include "route.h" #include "route_flags.h" #include "snmp_mib.h" #include "urcu.h" +#include "npf/npf.h" ALWAYS_INLINE unsigned int -ipv4_post_route_lookup_process(struct pl_packet *pkt) +ipv4_post_route_lookup_process(struct pl_packet *pkt, void *context __unused) { struct next_hop *nxt = pkt->nxt.v4; struct ifnet *ifp = pkt->in_ifp; @@ -45,9 +46,9 @@ ipv4_post_route_lookup_process(struct pl_packet *pkt) icmp_error(ifp, pkt->mbuf, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, 0); return IPV4_POST_ROUTE_LOOKUP_DROP; - } else if (unlikely(nxt->flags & (RTF_SLOWPATH | RTF_LOCAL))) { - return IPV4_POST_ROUTE_LOOKUP_LOCAL; } + if (unlikely(nxt->flags & (RTF_SLOWPATH | RTF_LOCAL))) + return IPV4_POST_ROUTE_LOOKUP_LOCAL; struct iphdr *ip = pkt->l3_hdr; decrement_ttl(ip); @@ -69,15 +70,13 @@ ipv4_post_route_lookup_process(struct pl_packet *pkt) /* MPLS imposition required because nh has given us a label */ if (unlikely(nh_outlabels_present(&nxt->outlabels))) { - union next_hop_v4_or_v6_ptr mpls_nh = { .v4 = nxt }; - - mpls_unlabeled_input(ifp, pkt->mbuf, - NH_TYPE_V4GW, mpls_nh, ip->ttl); + mpls_unlabeled_input(ifp, pkt->mbuf, MPT_IPV4, + NH_TYPE_V4GW, nxt, ip->ttl); return IPV4_POST_ROUTE_LOOKUP_CONSUME; } /* nxt->ifp may be changed by netlink messages. */ - struct ifnet *nxt_ifp = nh4_get_ifp(nxt); + struct ifnet *nxt_ifp = dp_nh_get_ifp(nxt); /* Destination device is not up? */ if (unlikely(!nxt_ifp || !(nxt_ifp->if_flags & IFF_UP))) { @@ -99,13 +98,21 @@ ipv4_post_route_lookup_process(struct pl_packet *pkt) in_addr_t addr; /* Store next hop address */ if (nxt->flags & RTF_GATEWAY) - addr = nxt->gateway; + addr = nxt->gateway.address.ip_v4.s_addr; else addr = ip->daddr; if (ip_same_network(ifp, addr, ip->saddr) && - ip_redirects_get()) + ip_redirects_get()) { icmp_error(ifp, pkt->mbuf, ICMP_REDIRECT, ICMP_REDIR_HOST, addr); + /* + * Cache will have been used for handling + * the ICMP redirect, so ensure it is created + * again when continuing with the original + * packet. + */ + pkt->npf_flags |= NPF_FLAG_CACHE_EMPTY; + } } /* macvlan mac passthrough check & replace ifp */ diff --git a/src/pipeline/nodes/l3_v4_route_lookup.c b/src/pipeline/nodes/l3_v4_route_lookup.c index d15cc43f..bcc03cc0 100644 --- a/src/pipeline/nodes/l3_v4_route_lookup.c +++ b/src/pipeline/nodes/l3_v4_route_lookup.c @@ -1,7 +1,7 @@ /* * l3_v4_route_lookup.c * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -20,7 +20,7 @@ #include "ip_icmp.h" #include "ip_mcast.h" #include "main.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" @@ -28,7 +28,7 @@ #include "pl_nodes_common.h" #include "route.h" #include "snmp_mib.h" -#include "vrf.h" +#include "vrf_internal.h" struct pl_node; @@ -52,7 +52,8 @@ ipv4_route_lookup_node_to_vrf(struct pl_node *node) } static ALWAYS_INLINE unsigned int -_ipv4_route_lookup_process_common(struct pl_packet *pkt, enum pl_mode mode, +_ipv4_route_lookup_process_common(struct pl_packet *pkt, void *context __unused, + enum pl_mode mode, enum ipv4_route_lookup_mode lkup_mode) { struct ifnet *ifp = pkt->in_ifp; @@ -147,23 +148,24 @@ _ipv4_route_lookup_process_common(struct pl_packet *pkt, enum pl_mode mode, } ALWAYS_INLINE unsigned int -ipv4_route_lookup_process_common(struct pl_packet *pkt, enum pl_mode mode) +ipv4_route_lookup_process_common(struct pl_packet *pkt, void *context __unused, + enum pl_mode mode) { - return _ipv4_route_lookup_process_common(pkt, mode, + return _ipv4_route_lookup_process_common(pkt, context, mode, IPV4_LKUP_MODE_ROUTER); } ALWAYS_INLINE unsigned int -ipv4_route_lookup_process(struct pl_packet *pkt) +ipv4_route_lookup_process(struct pl_packet *pkt, void *context) { - return _ipv4_route_lookup_process_common(pkt, PL_MODE_REGULAR, + return _ipv4_route_lookup_process_common(pkt, context, PL_MODE_REGULAR, IPV4_LKUP_MODE_ROUTER); } ALWAYS_INLINE unsigned int -ipv4_route_lookup_host_process(struct pl_packet *pkt) +ipv4_route_lookup_host_process(struct pl_packet *pkt, void *context) { - return _ipv4_route_lookup_process_common(pkt, PL_MODE_REGULAR, + return _ipv4_route_lookup_process_common(pkt, context, PL_MODE_REGULAR, IPV4_LKUP_MODE_HOST); } @@ -180,10 +182,14 @@ ipv4_route_lookup_feat_change(struct pl_node *node, ALWAYS_INLINE bool ipv4_route_lookup_feat_iterate(struct pl_node *node, bool first, - unsigned int *feature_id, void **context) + unsigned int *feature_id, void **context, + void **storage_ctx) { struct vrf *vrf = ipv4_route_lookup_node_to_vrf(node); + if (first) + *storage_ctx = NULL; + return pl_node_feat_iterate_u16(&vrf->v_ip_post_rlkup_features, first, feature_id, context); } @@ -238,3 +244,84 @@ PL_REGISTER_NODE(ipv4_route_lookup_host_node) = { struct pl_node_registration *const ipv4_route_lookup_node_ptr = &ipv4_route_lookup_node; + +struct pl_show_vrf_ctx { + json_writer_t *json; + char *vrfname; +}; + +static void +pl_show_ipv4_route_lookup(struct vrf *vrf, struct pl_show_vrf_ctx *ctx) +{ + json_writer_t *wr = ctx->json; + vrfid_t vrfid = dp_vrf_get_vid(vrf); + const char *vrfname; + + vrfname = (vrfid == VRF_DEFAULT_ID) ? "default" : vrf_get_name(vrfid); + + if (ctx->vrfname && (strcmp(ctx->vrfname, vrfname) != 0) && + (strcmp(ctx->vrfname, "all") != 0)) + return; + + jsonw_start_object(wr); + jsonw_name(wr, vrfname); + + jsonw_start_array(wr); + pl_node_iter_features(ipv4_route_lookup_node_ptr, vrf, + pl_print_feats, wr); + jsonw_end_array(wr); + + jsonw_end_object(wr); +} + +/* + * show features ipv4_route_lookup [vrf ] + */ +static int cmd_pl_show_feat_ipv4_route_lookup(struct pl_command *cmd) +{ + int argc = cmd->argc; + char **argv = cmd->argv; + char *opt, *vrfname = NULL; + json_writer_t *wr; + vrfid_t vrf_id; + struct vrf *vrf; + + while (argc > 0) { + opt = next_arg(&argc, &argv); + + if (!strcmp(opt, "vrf")) { + vrfname = next_arg(&argc, &argv); + if (!vrfname) + return 0; + } + } + + wr = jsonw_new(cmd->fp); + if (!wr) + return 0; + + struct pl_show_vrf_ctx ctx = { + .json = wr, + .vrfname = vrfname, + }; + + jsonw_name(wr, "features"); + jsonw_start_object(wr); + + jsonw_name(wr, "vrf"); + jsonw_start_array(wr); + + VRF_FOREACH(vrf, vrf_id) + pl_show_ipv4_route_lookup(vrf, &ctx); + + jsonw_end_array(wr); + + jsonw_end_object(wr); + jsonw_destroy(&wr); + return 0; +} + +PL_REGISTER_OPCMD(pl_show_feat_ipv4_route_lookup) = { + .cmd = "show features ipv4_route_lookup", + .handler = cmd_pl_show_feat_ipv4_route_lookup, +}; diff --git a/src/pipeline/nodes/l3_v4_rpf.c b/src/pipeline/nodes/l3_v4_rpf.c index a5eddc54..12988fc1 100644 --- a/src/pipeline/nodes/l3_v4_rpf.c +++ b/src/pipeline/nodes/l3_v4_rpf.c @@ -1,7 +1,7 @@ /* * l3_v4_rpf.c * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -18,7 +18,7 @@ #include "compat.h" #include "compiler.h" #include "if_var.h" -#include "nh.h" +#include "nh_common.h" #include "pl_common.h" #include "pl_fused.h" #include "route.h" @@ -30,7 +30,7 @@ struct rte_mbuf; /* * Validate source address matches to prevent IP spoofing per RFC3704. */ -static __attribute__((noinline)) bool +static __noinline bool verify_path(in_addr_t src, struct ifnet *ifp, uint32_t tbl, struct rte_mbuf *m) { @@ -40,7 +40,7 @@ verify_path(in_addr_t src, struct ifnet *ifp, uint32_t tbl, if (!src) return true; - nxt = rt_lookup(src, tbl, m); + nxt = dp_rt_lookup(src, tbl, m); if (nxt == NULL) return false; @@ -51,7 +51,7 @@ verify_path(in_addr_t src, struct ifnet *ifp, uint32_t tbl, /* if ifp is in strict mode, check for that incoming * interface matches the route. */ - if (ifp->ip_rpf_strict && nh4_get_ifp(nxt) != ifp) + if (ifp->ip_rpf_strict && dp_nh_get_ifp(nxt) != ifp) return false; /* found valid route */ @@ -59,7 +59,7 @@ verify_path(in_addr_t src, struct ifnet *ifp, uint32_t tbl, } ALWAYS_INLINE unsigned int -ipv4_rpf_process(struct pl_packet *pkt) +ipv4_rpf_process(struct pl_packet *pkt, void *context __unused) { struct iphdr *ip = pkt->l3_hdr; struct ifnet *ifp = pkt->in_ifp; diff --git a/src/pipeline/nodes/l3_v4_udp.c b/src/pipeline/nodes/l3_v4_udp.c new file mode 100644 index 00000000..7a86884e --- /dev/null +++ b/src/pipeline/nodes/l3_v4_udp.c @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#include +#include "ip_funcs.h" + +#include "compiler.h" +#include "../pl_common.h" +#include "../pl_node.h" +#include "../pl_fused.h" +#include "pl_nodes_common.h" + +/* Size of the feat hash table */ +#define L4_FEAT_HASH_MIN 4 +#define L4_FEAT_HASH_MAX 32 + +static struct cds_lfht *l3_v4_udp_feat_ht; + +static inline int +ipv4_udp_in_feat_match(struct cds_lfht_node *node, const void *key) +{ + const uint32_t *feat_type = key; + const struct pl_feature_registration *feat; + + feat = caa_container_of(node, const struct pl_feature_registration, + feat_node); + + if (feat && feat->feat_type == *feat_type) + return 1; + + return 0; +} + +ALWAYS_INLINE int +ipv4_udp_in_find_feat_id_by_type(uint32_t feat_type) +{ + struct cds_lfht_iter iter; + struct cds_lfht_node *ret_node; + const struct pl_feature_registration *feat; + + if (!l3_v4_udp_feat_ht) + return 0; + + cds_lfht_lookup(l3_v4_udp_feat_ht, feat_type, + ipv4_udp_in_feat_match, &feat_type, + &iter); + + ret_node = cds_lfht_iter_get_node(&iter); + if (ret_node) { + feat = caa_container_of(ret_node, + struct pl_feature_registration, + feat_node); + return feat->id; + } + return 0; +} + +static ALWAYS_INLINE int +ipv4_udp_in_feat_add_type(struct pl_node_registration *node __unused, + struct pl_feature_registration *feat, + uint32_t feat_type) +{ + struct cds_lfht_node *ret_node; + + if (!l3_v4_udp_feat_ht) { + l3_v4_udp_feat_ht = cds_lfht_new(L4_FEAT_HASH_MIN, + L4_FEAT_HASH_MIN, + L4_FEAT_HASH_MAX, + CDS_LFHT_AUTO_RESIZE, + NULL); + if (l3_v4_udp_feat_ht == NULL) + rte_panic("Can't allocate ft node hash\n"); + } + ret_node = cds_lfht_add_unique(l3_v4_udp_feat_ht, feat_type, + ipv4_udp_in_feat_match, &feat_type, + &feat->feat_node); + return (ret_node != &feat->feat_node) ? EEXIST : 0; +} + +static ALWAYS_INLINE int +ipv4_udp_in_feat_rem_type(struct pl_node_registration *node __unused, + struct pl_feature_registration *feat __unused, + uint32_t feat_type) +{ + struct cds_lfht_node *ret_node; + struct cds_lfht_iter iter; + + if (!l3_v4_udp_feat_ht) + return -ENOENT; + + cds_lfht_lookup(l3_v4_udp_feat_ht, feat_type, + ipv4_udp_in_feat_match, &feat_type, + &iter); + + ret_node = cds_lfht_iter_get_node(&iter); + if (!ret_node) + return -ENOENT; + + return cds_lfht_del(l3_v4_udp_feat_ht, ret_node); +} + +ALWAYS_INLINE unsigned int +ipv4_udp_in_process_common(struct pl_packet *pkt, void *context __unused, + enum pl_mode mode) +{ + struct rte_mbuf *m = pkt->mbuf; + struct iphdr *ip = iphdr(m); + struct udphdr *udp; + struct ifnet *ifp = pkt->in_ifp; + int rc; + uint32_t feat_type; + + + rc = ip_udp_tunnel_in(&m, ip, ifp); + if (likely(rc == 0)) + return IPV4_UDP_CONSUME; + if (rc < 0) + return IPV4_UDP_DROP; + + pkt->mbuf = m; + + udp = dp_pktmbuf_mtol4(m, struct udphdr *); + feat_type = udp->dest; + + switch (mode) { + case PL_MODE_FUSED: + if (!pipeline_fused_ipv4_udp_in_features( + pkt, + ipv4_udp_in_find_feat_id_by_type_fused(feat_type))) + return IPV4_UDP_CONSUME; + break; + case PL_MODE_FUSED_NO_DYN_FEATS: + if (!pipeline_fused_ipv4_udp_in_no_dyn_features( + pkt, + ipv4_udp_in_find_feat_id_by_type_fused_no_dyn_features(feat_type))) + return IPV4_UDP_CONSUME; + break; + case PL_MODE_REGULAR: + if (!pl_node_invoke_feature_by_type( + ipv4_udp_in_node_ptr, + feat_type, pkt)) + return IPV4_UDP_CONSUME; + break; + } + return IPV4_UDP_ACCEPT; +} + +ALWAYS_INLINE unsigned int +ipv4_udp_in_process(struct pl_packet *pkt, void *context) +{ + return ipv4_udp_in_process_common(pkt, context, PL_MODE_REGULAR); +} + +PL_REGISTER_NODE(ipv4_udp_in_node) = { + .name = "vyatta:ipv4-udp-in", + .type = PL_PROC, + .handler = ipv4_udp_in_process, + .feat_type_insert = ipv4_udp_in_feat_add_type, + .feat_type_remove = ipv4_udp_in_feat_rem_type, + .feat_type_find = ipv4_udp_in_find_feat_id_by_type, + .num_next = IPV4_UDP_NUM, + .next = { + [IPV4_UDP_ACCEPT] = "term-noop", + [IPV4_UDP_DROP] = "term-drop", + [IPV4_UDP_CONSUME] = "term-finish", + } +}; + +PL_REGISTER_FEATURE(ipv4_udp_in_feat) = { + .name = "vyatta:ipv4-udp-in", + .node_name = "ipv4-udp-in", + .feature_point = "ipv4-l4", + .always_on = true, + .id = PL_L3_V4_L4_FUSED_FEAT_UDP_IN, + .feat_type = IPPROTO_UDP, +}; + +struct pl_node_registration *const ipv4_udp_in_node_ptr = &ipv4_udp_in_node; + +/* + * show features ipv4_udp_in + */ +static int cmd_pl_show_feat_ipv4_udp_in(struct pl_command *cmd) +{ + json_writer_t *wr; + + wr = jsonw_new(cmd->fp); + if (!wr) + return 0; + + jsonw_name(wr, "features"); + jsonw_start_object(wr); + + jsonw_name(wr, "global"); + jsonw_start_array(wr); + pl_node_iter_features(ipv4_udp_in_node_ptr, NULL, + pl_print_feats, wr); + jsonw_end_array(wr); + + jsonw_end_object(wr); + jsonw_destroy(&wr); + return 0; +} + +PL_REGISTER_OPCMD(pl_show_feat_ipv4_udp_in) = { + .cmd = "show features ipv4_udp_in", + .handler = cmd_pl_show_feat_ipv4_udp_in, +}; diff --git a/src/pipeline/nodes/l3_v4_val.c b/src/pipeline/nodes/l3_v4_val.c index 89a4e3a1..023ea6a4 100644 --- a/src/pipeline/nodes/l3_v4_val.c +++ b/src/pipeline/nodes/l3_v4_val.c @@ -2,7 +2,7 @@ * l3_v4_val.c * * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -18,14 +18,14 @@ #include "if_var.h" #include "ip_funcs.h" #include "npf/npf.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" #include "pl_node.h" #include "pl_nodes_common.h" #include "snmp_mib.h" #include "util.h" -#include "vrf.h" +#include "vrf_internal.h" struct pl_node; @@ -42,7 +42,8 @@ static inline struct ifnet *ipv4_val_node_to_ifp(struct pl_node *node) } ALWAYS_INLINE unsigned int -ipv4_validate_process_common(struct pl_packet *pkt, enum pl_mode mode) +ipv4_validate_process_common(struct pl_packet *pkt, void *context __unused, + enum pl_mode mode) { struct iphdr *ip = iphdr(pkt->mbuf); struct ifnet *ifp = pkt->in_ifp; @@ -88,9 +89,9 @@ ipv4_validate_process_common(struct pl_packet *pkt, enum pl_mode mode) } ALWAYS_INLINE unsigned int -ipv4_validate_process(struct pl_packet *p) +ipv4_validate_process(struct pl_packet *p, void *context) { - return ipv4_validate_process_common(p, PL_MODE_REGULAR); + return ipv4_validate_process_common(p, context, PL_MODE_REGULAR); } static int @@ -103,20 +104,37 @@ ipv4_validate_feat_change(struct pl_node *node, return pl_node_feat_change_u16(&ifp->ip_in_features, feat, action); } +static int +ipv4_validate_feat_change_all(struct pl_feature_registration *feat, + enum pl_node_feat_action action) +{ + return if_node_instance_feat_change_all(feat, action, + ipv4_validate_feat_change); +} + ALWAYS_INLINE bool ipv4_validate_feat_iterate(struct pl_node *node, bool first, - unsigned int *feature_id, void **context) + unsigned int *feature_id, void **context, + void **storage_ctx) { struct ifnet *ifp = ipv4_val_node_to_ifp(node); + bool ret; - return pl_node_feat_iterate_u16(&ifp->ip_in_features, first, + ret = pl_node_feat_iterate_u16(&ifp->ip_in_features, first, feature_id, context); + if (ret) + *storage_ctx = if_node_instance_get_storage_internal( + ifp, + PL_FEATURE_POINT_IPV4_VALIDATE_ID, + *feature_id); + + return ret; } static struct pl_node * ipv4_validate_node_lookup(const char *name) { - struct ifnet *ifp = ifnet_byifname(name); + struct ifnet *ifp = dp_ifnet_byifname(name); return ifp ? ifp_to_ipv4_val_node(ifp) : NULL; } @@ -126,8 +144,13 @@ PL_REGISTER_NODE(ipv4_validate_node) = { .type = PL_PROC, .handler = ipv4_validate_process, .feat_change = ipv4_validate_feat_change, + .feat_change_all = ipv4_validate_feat_change_all, .feat_iterate = ipv4_validate_feat_iterate, .lookup_by_name = ipv4_validate_node_lookup, + .feat_reg_context = if_node_instance_register_storage, + .feat_unreg_context = if_node_instance_unregister_storage, + .feat_get_context = if_node_instance_get_storage, + .feat_setup_cleanup_cb = if_node_instance_set_cleanup_cb, .num_next = IPV4_VAL_NUM, .next = { [IPV4_VAL_ACCEPT] = "ipv4-route-lookup", @@ -138,3 +161,16 @@ PL_REGISTER_NODE(ipv4_validate_node) = { struct pl_node_registration *const ipv4_validate_node_ptr = &ipv4_validate_node; + +/* + * show features ipv4_validate [interface ] + */ +static int cmd_pl_show_feat_ipv4_validate(struct pl_command *cmd) +{ + return if_node_instance_feat_print(cmd, ipv4_validate_node_ptr); +} + +PL_REGISTER_OPCMD(pl_show_feat_ipv4_validate) = { + .cmd = "show features ipv4_validate", + .handler = cmd_pl_show_feat_ipv4_validate, +}; diff --git a/src/pipeline/nodes/l3_v6_defrag.c b/src/pipeline/nodes/l3_v6_defrag.c index 3f53f40e..91aa3aa7 100644 --- a/src/pipeline/nodes/l3_v6_defrag.c +++ b/src/pipeline/nodes/l3_v6_defrag.c @@ -1,7 +1,7 @@ /* * l3_v6_defrag.c * - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -25,12 +25,13 @@ #include "urcu.h" ALWAYS_INLINE unsigned int -ipv6_defrag_in_process(struct pl_packet *pkt) +ipv6_defrag_in_process(struct pl_packet *pkt, void *context __unused) { pkt->npf_flags = NPF_FLAG_CACHE_EMPTY; /* - * Use firewall configuration for firewall and NAT + * If there is a zone use zone configuration for zone and NAT + * Otherwise use firewall configuration for firewall and NAT */ struct npf_if *nif = rcu_dereference(pkt->in_ifp->if_npf); @@ -104,13 +105,13 @@ ipv6_defrag_out_internal(struct pl_packet *pkt) } ALWAYS_INLINE unsigned int -ipv6_defrag_out_process(struct pl_packet *pkt) +ipv6_defrag_out_process(struct pl_packet *pkt, void *context __unused) { return ipv6_defrag_out_internal(pkt); } ALWAYS_INLINE unsigned int -ipv6_defrag_out_spath_process(struct pl_packet *pkt) +ipv6_defrag_out_spath_process(struct pl_packet *pkt, void *context __unused) { return ipv6_defrag_out_internal(pkt); } @@ -146,7 +147,7 @@ PL_REGISTER_NODE(ipv6_defrag_out_spath_node) = { .handler = ipv6_defrag_out_spath_process, .num_next = IPV6_DEFRAG_OUT_SPATH_NUM, .next = { - [IPV6_DEFRAG_OUT_SPATH_ACCEPT] = "ipv6-fw-out", + [IPV6_DEFRAG_OUT_SPATH_ACCEPT] = "term-noop", [IPV6_DEFRAG_OUT_SPATH_FINISH] = "term-finish" } }; @@ -165,3 +166,10 @@ PL_REGISTER_FEATURE(ipv6_defrag_out_feat) = { .feature_point = "ipv6-out", .id = PL_L3_V6_OUT_FUSED_FEAT_DEFRAG, }; + +PL_REGISTER_FEATURE(ipv6_defrag_out_spath_feat) = { + .name = "vyatta:ipv6-defrag-out-spath", + .node_name = "ipv6-defrag-out-spath", + .feature_point = "ipv6-out-spath", + .id = PL_L3_V6_OUT_SPATH_FUSED_FEAT_DEFRAG, +}; diff --git a/src/pipeline/nodes/l3_v6_encap.c b/src/pipeline/nodes/l3_v6_encap.c index 49caaed3..12289891 100644 --- a/src/pipeline/nodes/l3_v6_encap.c +++ b/src/pipeline/nodes/l3_v6_encap.c @@ -1,7 +1,7 @@ /* * l3_v6_encap.c * - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -15,9 +15,10 @@ #include "compiler.h" #include "if_var.h" #include "ip6_funcs.h" +#include "ip_mcast.h" #include "nd6_nbr.h" -#include "nh.h" -#include "pktmbuf.h" +#include "nh_common.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" #include "pl_node.h" @@ -46,21 +47,29 @@ static inline struct ifnet *ipv6_encap_node_to_ifp(struct pl_node *node) * L2 dest mac resolution, and set from that. */ static ALWAYS_INLINE bool -ipv6_encap_eth_from_nh6(struct rte_mbuf *mbuf, const struct next_hop_v6 *nh, +ipv6_encap_eth_from_nh6(struct rte_mbuf *mbuf, const struct next_hop *nh, struct in6_addr *addr, struct ifnet *in_ifp) { - struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *); - struct ifnet *out_ifp = nh6_get_ifp(nh); /* Needed for VRRP */ + struct rte_ether_hdr *eth_hdr = + rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); + struct ifnet *out_ifp = dp_nh_get_ifp(nh); /* Needed for VRRP */ - ether_addr_copy(&out_ifp->eth_addr, ð_hdr->s_addr); + rte_ether_addr_copy(&out_ifp->eth_addr, ð_hdr->s_addr); /* If already resolved, use the link level encap */ - struct llentry *lle = nh6_get_lle(nh); + struct llentry *lle = nh_get_lle(nh); if (likely(lle != NULL)) { if (llentry_copy_mac(lle, ð_hdr->d_addr)) return true; } + /* Derive a multicast MAC address from the IP address */ + if (unlikely(nh->flags & RTF_MULTICAST)) { + mcast_dst_eth_addr_t eth_daddr = mcast6_dst_eth_addr(addr); + rte_ether_addr_copy(ð_daddr.as_addr, ð_hdr->d_addr); + return true; + } + /* Not yet resolved, so try to do so */ if (likely(nd6_resolve_fast(in_ifp, out_ifp, mbuf, addr, ð_hdr->d_addr) == 0)) @@ -106,9 +115,9 @@ static ALWAYS_INLINE unsigned int ipv6_encap_process_internal(struct pl_packet *pkt, enum pl_mode mode) { if (!ipv6_encap_features(pkt, mode)) - return IPV6_ENCAP_DROPPED; + return IPV6_ENCAP_FEAT_CONSUME; - struct next_hop_v6 *nh = pkt->nxt.v6; + struct next_hop *nh = pkt->nxt.v6; struct ifnet *in_ifp = pkt->in_ifp; struct ifnet *out_ifp = pkt->out_ifp; struct rte_mbuf *mbuf = pkt->mbuf; @@ -117,7 +126,7 @@ ipv6_encap_process_internal(struct pl_packet *pkt, enum pl_mode mode) struct in6_addr addr; if (nh->flags & RTF_GATEWAY) { - addr = nh->gateway; + addr = nh->gateway.address.ip_v6; } else { struct ip6_hdr *ip6; @@ -135,40 +144,50 @@ ipv6_encap_process_internal(struct pl_packet *pkt, enum pl_mode mode) /* Assume all other interface types use ethernet encap. */ if (!ipv6_encap_eth_from_nh6(mbuf, nh, &addr, in_ifp)) - return IPV6_ENCAP_FAIL; + return IPV6_ENCAP_NEIGH_RES_CONSUME; return IPV6_ENCAP_L2_OUT; } ALWAYS_INLINE unsigned int -ipv6_encap_process_common(struct pl_packet *pkt, enum pl_mode mode) +ipv6_encap_process_common(struct pl_packet *pkt, void *context __unused, + enum pl_mode mode) { struct ifnet *out_ifp = pkt->out_ifp; int rc = ipv6_encap_process_internal(pkt, mode); - if (rc == IPV6_ENCAP_L2_OUT) - IP6STAT_INC_IFP(out_ifp, IPSTATS_MIB_OUTFORWDATAGRAMS); + /* + * Either way the packet has been handed to "lower layers" to + * be transmitted. + */ + if (rc == IPV6_ENCAP_L2_OUT || rc == IPV6_ENCAP_NEIGH_RES_CONSUME) { + if (pkt->nxt.v6->flags & RTF_MULTICAST) + IP6STAT_INC_IFP(out_ifp, IPSTATS_MIB_OUTMCASTPKTS); + else + IP6STAT_INC_IFP(out_ifp, IPSTATS_MIB_OUTFORWDATAGRAMS); + } return rc; } ALWAYS_INLINE unsigned int -ipv6_encap_only_process_common(struct pl_packet *pkt, enum pl_mode mode) +ipv6_encap_only_process_common(struct pl_packet *pkt, void *context __unused, + enum pl_mode mode) { return ipv6_encap_process_internal(pkt, mode); } ALWAYS_INLINE unsigned int -ipv6_encap_process(struct pl_packet *p) +ipv6_encap_process(struct pl_packet *p, void *context) { - return ipv6_encap_process_common(p, PL_MODE_REGULAR); + return ipv6_encap_process_common(p, context, PL_MODE_REGULAR); } ALWAYS_INLINE unsigned int -ipv6_encap_only_process(struct pl_packet *p) +ipv6_encap_only_process(struct pl_packet *p, void *context) { - return ipv6_encap_only_process_common(p, PL_MODE_REGULAR); + return ipv6_encap_only_process_common(p, context, PL_MODE_REGULAR); } static int @@ -182,27 +201,46 @@ ipv6_encap_feat_change(struct pl_node *node, action); } +static int +ipv6_encap_feat_change_all(struct pl_feature_registration *feat, + enum pl_node_feat_action action) +{ + return if_node_instance_feat_change_all(feat, action, + ipv6_encap_feat_change); +} + ALWAYS_INLINE bool ipv6_encap_feat_iterate(struct pl_node *node, bool first, - unsigned int *feature_id, void **context) + unsigned int *feature_id, void **context, + void **storage_ctx) { struct ifnet *ifp = ipv6_encap_node_to_ifp(node); + bool ret; + + ret = pl_node_feat_iterate_u8(&ifp->ip6_encap_features, first, + feature_id, context); + if (ret) + *storage_ctx = if_node_instance_get_storage_internal( + ifp, + PL_FEATURE_POINT_IPV6_ENCAP_ID, + *feature_id); - return pl_node_feat_iterate_u8(&ifp->ip6_encap_features, first, - feature_id, context); + return ret; } ALWAYS_INLINE bool ipv6_encap_only_feat_iterate(struct pl_node *node, bool first, - unsigned int *feature_id, void **context) + unsigned int *feature_id, void **context, + void **storage_ctx) { - return ipv6_encap_feat_iterate(node, first, feature_id, context); + return ipv6_encap_feat_iterate(node, first, feature_id, context, + storage_ctx); } static struct pl_node * ipv6_encap_node_lookup(const char *name) { - struct ifnet *ifp = ifnet_byifname(name); + struct ifnet *ifp = dp_ifnet_byifname(name); return ifp ? ifp_to_ipv6_encap_node(ifp) : NULL; } @@ -212,13 +250,18 @@ PL_REGISTER_NODE(ipv6_encap_node) = { .type = PL_PROC, .handler = ipv6_encap_process, .feat_change = ipv6_encap_feat_change, + .feat_change_all = ipv6_encap_feat_change_all, .feat_iterate = ipv6_encap_feat_iterate, .lookup_by_name = ipv6_encap_node_lookup, + .feat_reg_context = if_node_instance_register_storage, + .feat_unreg_context = if_node_instance_unregister_storage, + .feat_get_context = if_node_instance_get_storage, + .feat_setup_cleanup_cb = if_node_instance_set_cleanup_cb, .num_next = IPV6_ENCAP_NUM, .next = { [IPV6_ENCAP_L2_OUT] = "l2-out", - [IPV6_ENCAP_DROPPED] = "term-finish", - [IPV6_ENCAP_FAIL] = "term-finish", + [IPV6_ENCAP_FEAT_CONSUME] = "term-finish", + [IPV6_ENCAP_NEIGH_RES_CONSUME] = "term-finish", } }; @@ -231,9 +274,22 @@ PL_REGISTER_NODE(ipv6_encap_only_node) = { .num_next = IPV6_ENCAP_ONLY_NUM, .next = { [IPV6_ENCAP_ONLY_L2_OUT] = "term-noop", - [IPV6_ENCAP_ONLY_DROPPED] = "term-finish", - [IPV6_ENCAP_ONLY_FAIL] = "term-finish", + [IPV6_ENCAP_ONLY_FEAT_CONSUME] = "term-finish", + [IPV6_ENCAP_ONLY_NEIGH_RES_CONSUME] = "term-finish", } }; struct pl_node_registration *const ipv6_encap_node_ptr = &ipv6_encap_node; + +/* + * show features ipv6_encap [interface ] + */ +static int cmd_pl_show_feat_ipv6_encap(struct pl_command *cmd) +{ + return if_node_instance_feat_print(cmd, ipv6_encap_node_ptr); +} + +PL_REGISTER_OPCMD(pl_show_feat_ipv6_encap) = { + .cmd = "show features ipv6_encap", + .handler = cmd_pl_show_feat_ipv6_encap, +}; diff --git a/src/pipeline/nodes/l3_v6_ipsec.c b/src/pipeline/nodes/l3_v6_ipsec.c index 1bd8c9d6..6ec5a2f7 100644 --- a/src/pipeline/nodes/l3_v6_ipsec.c +++ b/src/pipeline/nodes/l3_v6_ipsec.c @@ -2,7 +2,7 @@ * l3_v6_ipsec.c * * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -16,20 +16,21 @@ #include "compiler.h" #include "crypto/crypto_forward.h" #include "netinet6/ip6_funcs.h" -#include "nh.h" +#include "nh_common.h" #include "pl_common.h" #include "pl_fused.h" ALWAYS_INLINE unsigned int -ipv6_ipsec_out_process(struct pl_packet *pkt) +ipv6_ipsec_out_process(struct pl_packet *pkt, void *context __unused) { struct ifnet *ifp = pkt->in_ifp; struct rte_mbuf *m = pkt->mbuf; - union next_hop_v4_or_v6_ptr nh = {NULL}; + struct next_hop *nh = NULL; /* Returns true if packet was consumed by IPsec */ if (unlikely(crypto_policy_check_outbound(ifp, &m, pkt->tblid, - htons(ETHER_TYPE_IPv6), &nh))) + htons(RTE_ETHER_TYPE_IPV6), + &nh))) return IPV6_IPSEC_CONSUME; /* @@ -38,8 +39,8 @@ ipv6_ipsec_out_process(struct pl_packet *pkt) * the next hop is pointing at. The packet will then be put back * in the crypto path. */ - if (unlikely(nh.v6 != NULL)) - pkt->nxt.v6 = nh.v6; + if (unlikely(nh != NULL)) + pkt->nxt.v6 = nh; if (unlikely(m != pkt->mbuf)) { pkt->mbuf = m; diff --git a/src/pipeline/nodes/l3_v6_l4.c b/src/pipeline/nodes/l3_v6_l4.c new file mode 100644 index 00000000..324d6666 --- /dev/null +++ b/src/pipeline/nodes/l3_v6_l4.c @@ -0,0 +1,231 @@ +/* + * l3_v6_l4.c + * + * Copyright (c) 2020-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#include +#include +#include + +#include "compiler.h" +#include "crypto/crypto_forward.h" +#include "crypto/crypto.h" +#include "ip6_funcs.h" +#include "l2tp/l2tpeth.h" +#include "pl_common.h" +#include "pl_fused.h" +#include "pl_node.h" +#include "pl_nodes_common.h" +#include "vrf.h" + +struct pl_node; + +/* Size of the feat hash table */ +#define L4_FEAT_HASH_MIN 4 +#define L4_FEAT_HASH_MAX 32 + +static struct cds_lfht *l3_v6_l4_feat_ht; + +static inline int +ipv6_l4_feat_match(struct cds_lfht_node *node, const void *key) +{ + const uint32_t *feat_type = key; + const struct pl_feature_registration *feat; + + feat = caa_container_of(node, const struct pl_feature_registration, + feat_node); + + if (feat && feat->feat_type == *feat_type) + return 1; + + return 0; +} + +ALWAYS_INLINE int +ipv6_l4_find_feat_id_by_type(uint32_t feat_type) +{ + struct cds_lfht_iter iter; + struct cds_lfht_node *ret_node; + const struct pl_feature_registration *feat; + + if (!l3_v6_l4_feat_ht) + return 0; + + cds_lfht_lookup(l3_v6_l4_feat_ht, feat_type, + ipv6_l4_feat_match, &feat_type, + &iter); + + ret_node = cds_lfht_iter_get_node(&iter); + if (ret_node) { + feat = caa_container_of(ret_node, + struct pl_feature_registration, + feat_node); + return feat->id; + } + return 0; +} + +static ALWAYS_INLINE int +ipv6_l4_feat_add_type(struct pl_node_registration *node __unused, + struct pl_feature_registration *feat, + uint32_t feat_type) +{ + struct cds_lfht_node *ret_node; + + if (!l3_v6_l4_feat_ht) { + l3_v6_l4_feat_ht = cds_lfht_new(L4_FEAT_HASH_MIN, + L4_FEAT_HASH_MIN, + L4_FEAT_HASH_MAX, + CDS_LFHT_AUTO_RESIZE, + NULL); + if (l3_v6_l4_feat_ht == NULL) + rte_panic("Can't allocate ft node hash\n"); + } + ret_node = cds_lfht_add_unique(l3_v6_l4_feat_ht, feat_type, + ipv6_l4_feat_match, &feat_type, + &feat->feat_node); + return (ret_node != &feat->feat_node) ? EEXIST : 0; +} + +static ALWAYS_INLINE bool +ipv6_l4_pre_process(struct pl_packet *pkt, void *context, struct ifnet *ifp) +{ + struct rte_mbuf *m = pkt->mbuf; + struct ip6_hdr *ip6 = ip6hdr(m); + int rc = -1; + uint32_t spi; + struct ip6_frag *ip6_frag; + + if (crypto_policy_check_inbound_terminating(ifp, &m, + htons(RTE_ETHER_TYPE_IPV6))) + return 0; + + switch (ip6->ip6_nxt) { + case IPPROTO_L2TPV3: + rc = l2tp_ipv6_recv_encap(m, ip6, + (unsigned char *)ip6 + sizeof(struct ip6_hdr)); + break; + case IPPROTO_GRE: + rc = ip6_gre_tunnel_in(&m, ip6); + break; + case IPPROTO_ESP: + spi = crypto_retrieve_spi((unsigned char *)ip6 + + dp_pktmbuf_l3_len(m)); + rc = crypto_enqueue_inbound_v6(m, ifp, spi); + break; + case IPPROTO_FRAGMENT: + /* + * If it is a fragment, and the next proto is ESP send + * to the crypto code. It will reassemble it and then find + * the SPI, so pass in 0. + */ + ip6_frag = (struct ip6_frag *)(ip6 + 1); + if (ip6_frag->ip6f_nxt == IPPROTO_ESP) { + rc = crypto_enqueue_inbound_v6(m, ifp, 0); + break; + } + return true; + default: + return true; + /* other protocols */ + } + if (rc == 0) + return false; + if (rc < 0) { + term_drop_process(pkt, context); + return false; + } + return true; +} + +ALWAYS_INLINE unsigned int +ipv6_l4_process_common(struct pl_packet *pkt, void *context, + enum pl_mode mode) +{ + struct rte_mbuf *m = pkt->mbuf; + struct ip6_hdr *ip6 = ip6hdr(m); + struct ifnet *ifp = pkt->in_ifp; + uint32_t feat_type = ip6->ip6_nxt; + + if (!ipv6_l4_pre_process(pkt, context, ifp)) + return IPV6_L4_CONSUME; + + switch (mode) { + case PL_MODE_FUSED: + if (!pipeline_fused_ipv6_l4_features( + pkt, + ipv6_l4_find_feat_id_by_type_fused(feat_type))) + return IPV6_L4_CONSUME; + break; + case PL_MODE_FUSED_NO_DYN_FEATS: + if (!pipeline_fused_ipv6_l4_no_dyn_features( + pkt, + ipv6_l4_find_feat_id_by_type_fused_no_dyn_features( + feat_type))) + return IPV6_L4_CONSUME; + break; + case PL_MODE_REGULAR: + if (!pl_node_invoke_feature_by_type( + ipv6_l4_node_ptr, + feat_type, pkt)) + return IPV6_L4_CONSUME; + break; + } + return IPV6_L4_ACCEPT; +} + + +ALWAYS_INLINE unsigned int +ipv6_l4_process(struct pl_packet *pkt, void *context) +{ + return ipv6_l4_process_common(pkt, context, PL_MODE_REGULAR); +} + +/* Register Node */ +PL_REGISTER_NODE(ipv6_l4_node) = { + .name = "vyatta:ipv6-l4", + .type = PL_PROC, + .handler = ipv6_l4_process, + .feat_type_insert = ipv6_l4_feat_add_type, + .feat_type_find = ipv6_l4_find_feat_id_by_type, + .num_next = IPV6_L4_NUM, + .next = { + [IPV6_L4_ACCEPT] = "ipv6-local", + [IPV6_L4_DROP] = "ipv6-drop", + [IPV6_L4_CONSUME] = "term-finish", + } +}; + +struct pl_node_registration *const ipv6_l4_node_ptr = &ipv6_l4_node; + +/* + * show features ipv6_l4 + */ +static int cmd_pl_show_feat_ipv6_l4(struct pl_command *cmd) +{ + json_writer_t *wr; + + wr = jsonw_new(cmd->fp); + if (!wr) + return 0; + + jsonw_name(wr, "features"); + jsonw_start_object(wr); + + jsonw_name(wr, "global"); + jsonw_start_array(wr); + pl_node_iter_features(ipv6_l4_node_ptr, NULL, + pl_print_feats, wr); + jsonw_end_array(wr); + + jsonw_end_object(wr); + jsonw_destroy(&wr); + return 0; +} + +PL_REGISTER_OPCMD(pl_show_feat_ipv6_l4) = { + .cmd = "show features ipv6_l4", + .handler = cmd_pl_show_feat_ipv6_l4, +}; diff --git a/src/pipeline/nodes/l3_v6_no_address.c b/src/pipeline/nodes/l3_v6_no_address.c index 5245a38b..22588e21 100644 --- a/src/pipeline/nodes/l3_v6_no_address.c +++ b/src/pipeline/nodes/l3_v6_no_address.c @@ -1,28 +1,47 @@ /* * IPv6 no address feature * - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ #include #include #include +#include +#include #include #include "compiler.h" +#include "ether.h" #include "if_var.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" #include "pl_node.h" #include "pl_nodes_common.h" -#include "vrf.h" +#include "vrf_internal.h" + +static struct rte_ether_addr micro_bfd_dst = { + { 0x01, 0x0, 0x5e, 0x90, 0x0, 0x01 } }; ALWAYS_INLINE unsigned int -ipv6_in_no_address_process(struct pl_packet *pkt __unused) +ipv6_in_no_address_process(struct pl_packet *pkt __unused, + void *context __unused) { + struct rte_mbuf *m = pkt->mbuf; + const struct rte_ether_hdr *eth = ethhdr(m); + + /* + * If this is a micro BFD packet, we need to handle it as + * normal, even if we don't have an IPv6 address. It's likely + * that this is a LAG member and will never have a valid IPv6 + * address. + */ + if (unlikely(rte_ether_addr_equal(ð->d_addr, µ_bfd_dst))) + return IPV6_IN_SPECIAL_PACKET; + /* * Special case of DHCP client, RFC2131 semantics * @@ -41,6 +60,7 @@ PL_REGISTER_NODE(ipv6_in_no_address_node) = { .num_next = IPV6_IN_NO_ADDRESS_NUM, .next = { [IPV6_IN_NO_ADDRESS_LOCAL] = "ipv6-local", + [IPV6_IN_SPECIAL_PACKET] = "ipv6-l4", } }; diff --git a/src/pipeline/nodes/l3_v6_no_forwarding.c b/src/pipeline/nodes/l3_v6_no_forwarding.c index de5d36e0..d34a392a 100644 --- a/src/pipeline/nodes/l3_v6_no_forwarding.c +++ b/src/pipeline/nodes/l3_v6_no_forwarding.c @@ -1,7 +1,7 @@ /* * IPv6 no forwarding feature * - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ diff --git a/src/pipeline/nodes/l3_v6_nptv6.c b/src/pipeline/nodes/l3_v6_nptv6.c index 358605eb..926107ff 100644 --- a/src/pipeline/nodes/l3_v6_nptv6.c +++ b/src/pipeline/nodes/l3_v6_nptv6.c @@ -1,7 +1,7 @@ /* * l3_v6_nptv6.c * - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * @@ -36,7 +36,7 @@ #include "npf/npf_ruleset.h" #include "npf/rproc/npf_rproc.h" #include "npf/rproc/npf_ext_nptv6.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" #include "pl_node.h" @@ -59,6 +59,7 @@ nptv6_process_common(struct pl_packet *pkt, int dir) uint16_t npf_flags = 0; npf_rule_t *rl; void *handle; + int rc; if (!nif) return in ? NPTV6_IN_ACCEPT : NPTV6_OUT_ACCEPT; @@ -71,7 +72,8 @@ nptv6_process_common(struct pl_packet *pkt, int dir) return in ? NPTV6_IN_ACCEPT : NPTV6_OUT_ACCEPT; if (pktmbuf_mdata_exists(m, PKT_MDATA_DEFRAG)) { - npc = npf_get_cache(&npf_flags, m, htons(ETHER_TYPE_IPv6)); + npc = npf_get_cache(&npf_flags, m, + htons(RTE_ETHER_TYPE_IPV6), &rc); if (!npc) return in ? NPTV6_IN_DROP : NPTV6_OUT_DROP; } else { @@ -80,7 +82,8 @@ nptv6_process_common(struct pl_packet *pkt, int dir) npf_cache_init(npc); /* Cache everything. drop if junk. */ - if (unlikely(!npf_cache_all(npc, m, htons(ETHER_TYPE_IPv6)))) + rc = npf_cache_all(npc, m, htons(RTE_ETHER_TYPE_IPV6)); + if (unlikely(rc < 0)) return in ? NPTV6_IN_DROP : NPTV6_OUT_DROP; } @@ -104,7 +107,7 @@ nptv6_process_common(struct pl_packet *pkt, int dir) if (unlikely(m != pkt->mbuf)) { pkt->mbuf = m; - pkt->l3_hdr = pktmbuf_mtol3(m, void *); + pkt->l3_hdr = dp_pktmbuf_mtol3(m, void *); } if (unlikely(decision == NPF_DECISION_BLOCK)) { @@ -121,13 +124,13 @@ nptv6_process_common(struct pl_packet *pkt, int dir) } ALWAYS_INLINE unsigned int -nptv6_in_process(struct pl_packet *pkt) +nptv6_in_process(struct pl_packet *pkt, void *context __unused) { return nptv6_process_common(pkt, PFIL_IN); } ALWAYS_INLINE unsigned int -nptv6_out_process(struct pl_packet *pkt) +nptv6_out_process(struct pl_packet *pkt, void *context __unused) { return nptv6_process_common(pkt, PFIL_OUT); } diff --git a/src/pipeline/nodes/l3_v6_out.c b/src/pipeline/nodes/l3_v6_out.c index b15d2957..b1cc6664 100644 --- a/src/pipeline/nodes/l3_v6_out.c +++ b/src/pipeline/nodes/l3_v6_out.c @@ -2,7 +2,7 @@ * l3_v6_out.c * * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -20,9 +20,10 @@ #include "compiler.h" #include "if_var.h" #include "ip6_funcs.h" +#include "netinet6/ip6_mroute.h" #include "npf/npf_cache.h" #include "npf_shim.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" #include "pl_node.h" @@ -47,7 +48,7 @@ static inline struct ifnet *ipv6_out_node_to_ifp(struct pl_node *node) } struct ipv6_out_frag_ctx { - struct next_hop_v6 *nh; + struct next_hop *nh; struct ifnet *in_ifp; enum l2_packet_type l2_pkt_type; }; @@ -99,12 +100,13 @@ ipv6_out_features(struct pl_packet *pkt, enum pl_mode mode) } ALWAYS_INLINE unsigned int -ipv6_out_process_common(struct pl_packet *pkt, enum pl_mode mode) +ipv6_out_process_common(struct pl_packet *pkt, void *context __unused, + enum pl_mode mode) { if (!ipv6_out_features(pkt, mode)) return IPV6_OUT_FINISH; - struct next_hop_v6 *nxt = pkt->nxt.v6; + struct next_hop *nxt = pkt->nxt.v6; struct ifnet *in_ifp = pkt->in_ifp; struct ifnet *out_ifp = pkt->out_ifp; struct ip6_hdr *ip6 = pkt->l3_hdr; @@ -123,6 +125,14 @@ ipv6_out_process_common(struct pl_packet *pkt, enum pl_mode mode) */ if (unlikely(too_big)) { if (!reassembled || npf_cache_mtu() > out_ifp->if_mtu) { + if (unlikely(nxt->flags & RTF_MULTICAST)) { + struct vrf *vrf = vrf_get_rcu(if_vrfid(in_ifp)); + if (vrf) { + struct mcast6_vrf *mvrf6 = + &vrf->v_mvrf6; + MRT6STAT_INC(mvrf6, mrt6s_pkttoobig); + } + } IP6STAT_INC_MBUF(pkt->mbuf, IPSTATS_MIB_FRAGFAILS); icmp6_error(in_ifp, pkt->mbuf, ICMP6_PACKET_TOO_BIG, 0, htonl(out_ifp->if_mtu)); @@ -135,18 +145,18 @@ ipv6_out_process_common(struct pl_packet *pkt, enum pl_mode mode) if (likely(!reassembled)) { pkt->l2_proto = ETH_P_IPV6; return IPV6_OUT_ENCAP; - } else { - struct ipv6_out_frag_ctx ctx = {nxt, in_ifp, pkt->l2_pkt_type}; - ip6_refragment_packet(out_ifp, pkt->mbuf, &ctx, ipv6_out_frag); } + struct ipv6_out_frag_ctx ctx = {nxt, in_ifp, pkt->l2_pkt_type}; + ip6_refragment_packet(out_ifp, pkt->mbuf, &ctx, ipv6_out_frag); + return IPV6_OUT_FINISH; } ALWAYS_INLINE unsigned int -ipv6_out_process(struct pl_packet *p) +ipv6_out_process(struct pl_packet *p, void *context __unused) { - return ipv6_out_process_common(p, PL_MODE_REGULAR); + return ipv6_out_process_common(p, context, PL_MODE_REGULAR); } static int @@ -160,20 +170,37 @@ ipv6_out_feat_change(struct pl_node *node, action); } +static int +ipv6_out_feat_change_all(struct pl_feature_registration *feat, + enum pl_node_feat_action action) +{ + return if_node_instance_feat_change_all(feat, action, + ipv6_out_feat_change); +} + ALWAYS_INLINE bool ipv6_out_feat_iterate(struct pl_node *node, bool first, - unsigned int *feature_id, void **context) + unsigned int *feature_id, void **context, + void **storage_ctx) { struct ifnet *ifp = ipv6_out_node_to_ifp(node); + bool ret; + + ret = pl_node_feat_iterate_u16(&ifp->ip6_out_features, first, + feature_id, context); + if (ret) + *storage_ctx = if_node_instance_get_storage_internal( + ifp, + PL_FEATURE_POINT_IPV6_OUT_ID, + *feature_id); - return pl_node_feat_iterate_u16(&ifp->ip6_out_features, first, - feature_id, context); + return ret; } static struct pl_node * ipv6_out_node_lookup(const char *name) { - struct ifnet *ifp = ifnet_byifname(name); + struct ifnet *ifp = dp_ifnet_byifname(name); return ifp ? ifp_to_ipv6_out_node(ifp) : NULL; } @@ -183,8 +210,13 @@ PL_REGISTER_NODE(ipv6_out_node) = { .type = PL_PROC, .handler = ipv6_out_process, .feat_change = ipv6_out_feat_change, + .feat_change_all = ipv6_out_feat_change_all, .feat_iterate = ipv6_out_feat_iterate, .lookup_by_name = ipv6_out_node_lookup, + .feat_reg_context = if_node_instance_register_storage, + .feat_unreg_context = if_node_instance_unregister_storage, + .feat_get_context = if_node_instance_get_storage, + .feat_setup_cleanup_cb = if_node_instance_set_cleanup_cb, .num_next = IPV6_OUT_NUM, .next = { [IPV6_OUT_ENCAP] = "ipv6-encap", @@ -193,3 +225,16 @@ PL_REGISTER_NODE(ipv6_out_node) = { }; struct pl_node_registration *const ipv6_out_node_ptr = &ipv6_out_node; + +/* + * show features ipv6_out [interface ] + */ +static int cmd_pl_show_feat_ipv6_out(struct pl_command *cmd) +{ + return if_node_instance_feat_print(cmd, ipv6_out_node_ptr); +} + +PL_REGISTER_OPCMD(pl_show_feat_ipv6_out) = { + .cmd = "show features ipv6_out", + .handler = cmd_pl_show_feat_ipv6_out, +}; diff --git a/src/pipeline/nodes/l3_v6_out_spath.c b/src/pipeline/nodes/l3_v6_out_spath.c new file mode 100644 index 00000000..58fa0d34 --- /dev/null +++ b/src/pipeline/nodes/l3_v6_out_spath.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * SPDX-License-Identifier: LGPL-2.1-only + */ +#include +#include +#include +#include +#include +#include +#include + +#include "if_var.h" +#include "pktmbuf_internal.h" +#include "pl_common.h" +#include "pl_fused.h" +#include "pl_node.h" +#include "pl_nodes_common.h" + +struct pl_node; + +static inline struct pl_node *ifp_to_ipv6_out_spath_node(struct ifnet *ifp) +{ + /* our imaginary node */ + return (struct pl_node *)ifp; +} + +static inline struct ifnet *ipv6_out_spath_node_to_ifp(struct pl_node *node) +{ + /* the node is a fiction of our imagination */ + return (struct ifnet *)node; +} + +ALWAYS_INLINE unsigned int +ipv6_out_spath_process_common(struct pl_packet *pkt, void *context __unused, + enum pl_mode mode) +{ + struct ifnet *out_ifp = pkt->out_ifp; + + switch (mode) { + case PL_MODE_FUSED: + if (!pipeline_fused_ipv6_out_spath_features( + pkt, ifp_to_ipv6_out_spath_node(out_ifp))) + return IPV6_OUT_SPATH_FINISH; + break; + case PL_MODE_FUSED_NO_DYN_FEATS: + if (!pipeline_fused_ipv6_out_spath_no_dyn_features( + pkt, ifp_to_ipv6_out_spath_node(out_ifp))) + return IPV6_OUT_SPATH_FINISH; + break; + case PL_MODE_REGULAR: + if (!pl_node_invoke_enabled_features( + ipv6_out_spath_node_ptr, + ifp_to_ipv6_out_spath_node(out_ifp), + pkt)) + return IPV6_OUT_SPATH_FINISH; + break; + } + return IPV6_OUT_SPATH_ACCEPT; +} + +ALWAYS_INLINE unsigned int +ipv6_out_spath_process(struct pl_packet *p, void *context) +{ + return ipv6_out_spath_process_common(p, context, PL_MODE_REGULAR); +} + +static int +ipv6_out_spath_feat_change(struct pl_node *node, + struct pl_feature_registration *feat, + enum pl_node_feat_action action) +{ + struct ifnet *ifp = ipv6_out_spath_node_to_ifp(node); + + return pl_node_feat_change_u16(&ifp->ip6_out_spath_features, feat, + action); +} + +static int +ipv6_out_spath_feat_change_all(struct pl_feature_registration *feat, + enum pl_node_feat_action action) +{ + return if_node_instance_feat_change_all(feat, action, + ipv6_out_spath_feat_change); +} + +ALWAYS_INLINE bool +ipv6_out_spath_feat_iterate(struct pl_node *node, bool first, + unsigned int *feature_id, void **context, + void **storage_ctx __unused) +{ + struct ifnet *ifp = ipv6_out_spath_node_to_ifp(node); + bool ret; + + ret = pl_node_feat_iterate_u16(&ifp->ip6_out_spath_features, first, + feature_id, context); + + return ret; +} + +static struct pl_node * +ipv6_out_spath_node_lookup(const char *name) +{ + struct ifnet *ifp = dp_ifnet_byifname(name); + return ifp ? ifp_to_ipv6_out_spath_node(ifp) : NULL; +} + +/* Register Node */ +PL_REGISTER_NODE(ipv6_out_spath_node) = { + .name = "vyatta:ipv6-out-spath", + .type = PL_PROC, + .handler = ipv6_out_spath_process, + .feat_change = ipv6_out_spath_feat_change, + .feat_change_all = ipv6_out_spath_feat_change_all, + .feat_iterate = ipv6_out_spath_feat_iterate, + .lookup_by_name = ipv6_out_spath_node_lookup, + .num_next = IPV6_OUT_SPATH_NUM, + .next = { + [IPV6_OUT_SPATH_ACCEPT] = "term-noop", + [IPV6_OUT_SPATH_FINISH] = "term-finish", + } +}; + +struct pl_node_registration *const ipv6_out_spath_node_ptr = + &ipv6_out_spath_node; + +/* + * show features ipv6_out_spath [interface ] + */ +static int cmd_pl_show_feat_ipv6_out_spath(struct pl_command *cmd) +{ + return if_node_instance_feat_print(cmd, ipv6_out_spath_node_ptr); +} + +PL_REGISTER_OPCMD(pl_show_feat_ipv6_out_spath) = { + .cmd = "show features ipv6_out_spath", + .handler = cmd_pl_show_feat_ipv6_out_spath, +}; diff --git a/src/pipeline/nodes/l3_v6_post_route_lookup.c b/src/pipeline/nodes/l3_v6_post_route_lookup.c index 87871632..d245d236 100644 --- a/src/pipeline/nodes/l3_v6_post_route_lookup.c +++ b/src/pipeline/nodes/l3_v6_post_route_lookup.c @@ -2,7 +2,7 @@ * l3_v4_post_route_lookup.c * * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -17,25 +17,26 @@ #include #include "compiler.h" +#include "if/macvlan.h" #include "if_var.h" -#include "macvlan.h" #include "mpls/mpls.h" #include "mpls/mpls_forward.h" #include "netinet6/ip6_funcs.h" -#include "nh.h" -#include "pktmbuf.h" +#include "nh_common.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" #include "route_flags.h" #include "route_v6.h" #include "urcu.h" +#include "npf/npf.h" -static RTE_DEFINE_PER_LCORE(struct next_hop_v6, ll_nexthop); +static RTE_DEFINE_PER_LCORE(struct next_hop, ll_nexthop); ALWAYS_INLINE unsigned int -ipv6_post_route_lookup_process(struct pl_packet *pkt) +ipv6_post_route_lookup_process(struct pl_packet *pkt, void *context __unused) { - struct next_hop_v6 *nxt = pkt->nxt.v6; + struct next_hop *nxt = pkt->nxt.v6; struct ifnet *ifp = pkt->in_ifp; struct ip6_hdr *ip6 = pkt->l3_hdr; @@ -43,7 +44,7 @@ ipv6_post_route_lookup_process(struct pl_packet *pkt) /* Can only forward LL out arrival interface */ RTE_PER_LCORE(ll_nexthop.flags) = 0; nxt = &RTE_PER_LCORE(ll_nexthop); - nh6_set_ifp(nxt, ifp); + nh_set_ifp(nxt, ifp); pkt->nxt.v6 = nxt; } @@ -51,13 +52,30 @@ ipv6_post_route_lookup_process(struct pl_packet *pkt) if (unlikely(!nxt)) { ip6_unreach(ifp, pkt->mbuf); return IPV6_POST_ROUTE_LOOKUP_FINISH; - } else if (unlikely(nxt->flags & (RTF_SLOWPATH | RTF_LOCAL))) + } + if (unlikely(nxt->flags & (RTF_SLOWPATH | RTF_LOCAL))) return IPV6_POST_ROUTE_LOOKUP_LOCAL; ip6->ip6_hlim -= IPV6_HLIMDEC; /* Immediately drop blackholed traffic. */ - if (unlikely(nxt->flags & RTF_BLACKHOLE)) + if (unlikely(nxt->flags & RTF_BLACKHOLE)) { + /* + * These are address errors, but we use the LPM to check for + * them. + */ + if (unlikely(IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst)) || + unlikely(IN6_IS_ADDR_LOOPBACK(&ip6->ip6_dst)) || + unlikely(IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst))) { + if (pkt->in_ifp) + IP6STAT_INC_IFP(pkt->in_ifp, + IPSTATS_MIB_INADDRERRORS); + rte_pktmbuf_free(pkt->mbuf); + pkt->mbuf = NULL; + return IPV6_POST_ROUTE_LOOKUP_FINISH; + } + return IPV6_POST_ROUTE_LOOKUP_DROP; + } if (unlikely(nxt->flags & RTF_REJECT)) { icmp6_error(ifp, pkt->mbuf, ICMP6_DST_UNREACH, @@ -67,15 +85,13 @@ ipv6_post_route_lookup_process(struct pl_packet *pkt) /* MPLS imposition required because nh has given us a label */ if (unlikely(nh_outlabels_present(&nxt->outlabels))) { - union next_hop_v4_or_v6_ptr mpls_nh = { .v6 = nxt }; - - mpls_unlabeled_input(ifp, pkt->mbuf, - NH_TYPE_V6GW, mpls_nh, ip6->ip6_hops); + mpls_unlabeled_input(ifp, pkt->mbuf, MPT_IPV6, + NH_TYPE_V6GW, nxt, ip6->ip6_hops); return IPV6_POST_ROUTE_LOOKUP_FINISH; } /* nxt->ifp may be changed by netlink messages. */ - struct ifnet *nxt_ifp = nh6_get_ifp(nxt); + struct ifnet *nxt_ifp = dp_nh_get_ifp(nxt); /* Destination device is not up? */ if (unlikely(!nxt_ifp || !(nxt_ifp->if_flags & IFF_UP))) { @@ -113,6 +129,13 @@ ipv6_post_route_lookup_process(struct pl_packet *pkt) return IPV6_POST_ROUTE_LOOKUP_FINISH; } icmp6_redirect(ifp, pkt->mbuf, nxt); + /* + * Cache will have been used for handling + * the ICMPv6 redirect, so ensure it is created + * again when continuing with the original + * packet. + */ + pkt->npf_flags |= NPF_FLAG_CACHE_EMPTY; } /* macvlan mac passthrough check & replace ifp */ diff --git a/src/pipeline/nodes/l3_v6_route_lookup.c b/src/pipeline/nodes/l3_v6_route_lookup.c index fdbc31c9..7be1376d 100644 --- a/src/pipeline/nodes/l3_v6_route_lookup.c +++ b/src/pipeline/nodes/l3_v6_route_lookup.c @@ -1,7 +1,7 @@ /* * l3_v6_route_lookup.c * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -19,7 +19,7 @@ #include "if_var.h" #include "ip_mcast.h" #include "netinet6/ip6_funcs.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" #include "pl_node.h" @@ -27,7 +27,7 @@ #include "route_flags.h" #include "route_v6.h" #include "snmp_mib.h" -#include "vrf.h" +#include "vrf_internal.h" struct pl_node; @@ -51,12 +51,13 @@ ipv6_route_lookup_node_to_vrf(struct pl_node *node) } static ALWAYS_INLINE unsigned int -_ipv6_route_lookup_process_common(struct pl_packet *pkt, enum pl_mode mode, +_ipv6_route_lookup_process_common(struct pl_packet *pkt, void *context __unused, + enum pl_mode mode, enum ipv6_route_lookup_mode lkup_mode) { struct ip6_hdr *ip6 = pkt->l3_hdr; struct ifnet *ifp = pkt->in_ifp; - struct next_hop_v6 *nxt; + struct next_hop *nxt; struct vrf *vrf; if (unlikely(ip6->ip6_nxt == IPPROTO_HOPOPTS)) { @@ -66,7 +67,7 @@ _ipv6_route_lookup_process_common(struct pl_packet *pkt, enum pl_mode mode, return IPV6_ROUTE_LOOKUP_FINISH; if (rtalert != ~0u) - return IPV6_ROUTE_LOOKUP_LOCAL; + return IPV6_ROUTE_LOOKUP_L4; } vrf = vrf_get_rcu_fast(pktmbuf_get_vrf(pkt->mbuf)); @@ -79,7 +80,7 @@ _ipv6_route_lookup_process_common(struct pl_packet *pkt, enum pl_mode mode, * till crypto out bound policy check is done */ if (nxt && unlikely(nxt->flags & RTF_LOCAL)) - return IPV6_ROUTE_LOOKUP_LOCAL; + return IPV6_ROUTE_LOOKUP_L4; if (unlikely(IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))) { IP6STAT_INC_IFP(ifp, IPSTATS_MIB_INMCASTPKTS); @@ -141,22 +142,23 @@ _ipv6_route_lookup_process_common(struct pl_packet *pkt, enum pl_mode mode, } ALWAYS_INLINE unsigned int -ipv6_route_lookup_process_common(struct pl_packet *pkt, enum pl_mode mode) +ipv6_route_lookup_process_common(struct pl_packet *pkt, void *context __unused, + enum pl_mode mode) { - return _ipv6_route_lookup_process_common(pkt, mode, + return _ipv6_route_lookup_process_common(pkt, context, mode, IPV6_LKUP_MODE_ROUTER); } ALWAYS_INLINE unsigned int -ipv6_route_lookup_process(struct pl_packet *pkt) +ipv6_route_lookup_process(struct pl_packet *pkt, void *context) { - return ipv6_route_lookup_process_common(pkt, PL_MODE_REGULAR); + return ipv6_route_lookup_process_common(pkt, context, PL_MODE_REGULAR); } ALWAYS_INLINE unsigned int -ipv6_route_lookup_host_process(struct pl_packet *pkt) +ipv6_route_lookup_host_process(struct pl_packet *pkt, void *context) { - return _ipv6_route_lookup_process_common(pkt, PL_MODE_REGULAR, + return _ipv6_route_lookup_process_common(pkt, context, PL_MODE_REGULAR, IPV6_LKUP_MODE_HOST); } @@ -173,10 +175,15 @@ ipv6_route_lookup_feat_change(struct pl_node *node, ALWAYS_INLINE bool ipv6_route_lookup_feat_iterate(struct pl_node *node, bool first, - unsigned int *feature_id, void **context) + unsigned int *feature_id, void **context, + void **storage_ctx) + { struct vrf *vrf = ipv6_route_lookup_node_to_vrf(node); + if (first) + *storage_ctx = NULL; + return pl_node_feat_iterate_u16(&vrf->v_ipv6_post_rlkup_features, first, feature_id, context); } @@ -191,7 +198,7 @@ PL_REGISTER_NODE(ipv6_route_lookup_node) = { .num_next = IPV6_ROUTE_LOOKUP_NUM, .next = { [IPV6_ROUTE_LOOKUP_ACCEPT] = "ipv6-post-route-lookup", - [IPV6_ROUTE_LOOKUP_LOCAL] = "ipv6-local", + [IPV6_ROUTE_LOOKUP_L4] = "ipv6-l4", [IPV6_ROUTE_LOOKUP_DROP] = "ipv6-drop", [IPV6_ROUTE_LOOKUP_FINISH] = "term-finish" } @@ -205,7 +212,7 @@ _Static_assert(IPV6_ROUTE_LOOKUP_NUM == (int)IPV6_ROUTE_LOOKUP_HOST_NUM, "route-lookup and route-lookup-host next node defs differ"); _Static_assert(IPV6_ROUTE_LOOKUP_ACCEPT == (int)IPV6_ROUTE_LOOKUP_HOST_ACCEPT, "route-lookup and route-lookup-host next node defs differ"); -_Static_assert(IPV6_ROUTE_LOOKUP_LOCAL == (int)IPV6_ROUTE_LOOKUP_HOST_LOCAL, +_Static_assert(IPV6_ROUTE_LOOKUP_L4 == (int)IPV6_ROUTE_LOOKUP_HOST_L4, "route-lookup and route-lookup-host next node defs differ"); _Static_assert(IPV6_ROUTE_LOOKUP_DROP == (int)IPV6_ROUTE_LOOKUP_HOST_DROP, "route-lookup and route-lookup-host next node defs differ"); @@ -219,7 +226,7 @@ PL_REGISTER_NODE(ipv6_route_lookup_host_node) = { .num_next = IPV6_ROUTE_LOOKUP_HOST_NUM, .next = { [IPV6_ROUTE_LOOKUP_HOST_ACCEPT] = "ipv6-post-route-lookup", - [IPV6_ROUTE_LOOKUP_HOST_LOCAL] = "ipv6-local", + [IPV6_ROUTE_LOOKUP_HOST_L4] = "ipv6-l4", [IPV6_ROUTE_LOOKUP_HOST_DROP] = "ipv6-drop", [IPV6_ROUTE_LOOKUP_HOST_FINISH] = "term-finish" } @@ -227,3 +234,84 @@ PL_REGISTER_NODE(ipv6_route_lookup_host_node) = { struct pl_node_registration *const ipv6_route_lookup_node_ptr = &ipv6_route_lookup_node; + +struct pl_show_vrf_ctx { + json_writer_t *json; + char *vrfname; +}; + +static void +pl_show_ipv6_route_lookup(struct vrf *vrf, struct pl_show_vrf_ctx *ctx) +{ + json_writer_t *wr = ctx->json; + vrfid_t vrfid = dp_vrf_get_vid(vrf); + const char *vrfname; + + vrfname = (vrfid == VRF_DEFAULT_ID) ? "default" : vrf_get_name(vrfid); + + if (ctx->vrfname && (strcmp(ctx->vrfname, vrfname) != 0) && + (strcmp(ctx->vrfname, "all") != 0)) + return; + + jsonw_start_object(wr); + jsonw_name(wr, vrfname); + + jsonw_start_array(wr); + pl_node_iter_features(ipv6_route_lookup_node_ptr, vrf, + pl_print_feats, wr); + jsonw_end_array(wr); + + jsonw_end_object(wr); +} + +/* + * show features ipv6_route_lookup [vrf ] + */ +static int cmd_pl_show_feat_ipv6_route_lookup(struct pl_command *cmd) +{ + int argc = cmd->argc; + char **argv = cmd->argv; + char *opt, *vrfname = NULL; + json_writer_t *wr; + vrfid_t vrf_id; + struct vrf *vrf; + + while (argc > 0) { + opt = next_arg(&argc, &argv); + + if (!strcmp(opt, "vrf")) { + vrfname = next_arg(&argc, &argv); + if (!vrfname) + return 0; + } + } + + wr = jsonw_new(cmd->fp); + if (!wr) + return 0; + + struct pl_show_vrf_ctx ctx = { + .json = wr, + .vrfname = vrfname, + }; + + jsonw_name(wr, "features"); + jsonw_start_object(wr); + + jsonw_name(wr, "vrf"); + jsonw_start_array(wr); + + VRF_FOREACH(vrf, vrf_id) + pl_show_ipv6_route_lookup(vrf, &ctx); + + jsonw_end_array(wr); + + jsonw_end_object(wr); + jsonw_destroy(&wr); + return 0; +} + +PL_REGISTER_OPCMD(pl_show_feat_ipv6_route_lookup) = { + .cmd = "show features ipv6_route_lookup", + .handler = cmd_pl_show_feat_ipv6_route_lookup, +}; diff --git a/src/pipeline/nodes/l3_v6_udp.c b/src/pipeline/nodes/l3_v6_udp.c new file mode 100644 index 00000000..9e8ae6d3 --- /dev/null +++ b/src/pipeline/nodes/l3_v6_udp.c @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2020-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#include +#include "ip6_funcs.h" + +#include "compiler.h" +#include "../pl_common.h" +#include "../pl_node.h" +#include "../pl_fused.h" +#include "pl_nodes_common.h" + +/* Size of the feat hash table */ +#define L4_FEAT_HASH_MIN 4 +#define L4_FEAT_HASH_MAX 32 + +static struct cds_lfht *l3_v6_udp_feat_ht; + +static inline int +ipv6_udp_in_feat_match(struct cds_lfht_node *node, const void *key) +{ + const uint32_t *feat_type = key; + const struct pl_feature_registration *feat; + + feat = caa_container_of(node, const struct pl_feature_registration, + feat_node); + + if (feat && feat->feat_type == *feat_type) + return 1; + + return 0; +} + +ALWAYS_INLINE int +ipv6_udp_in_find_feat_id_by_type(uint32_t feat_type) +{ + struct cds_lfht_iter iter; + struct cds_lfht_node *ret_node; + const struct pl_feature_registration *feat; + + if (!l3_v6_udp_feat_ht) + return 0; + + cds_lfht_lookup(l3_v6_udp_feat_ht, feat_type, + ipv6_udp_in_feat_match, &feat_type, + &iter); + + ret_node = cds_lfht_iter_get_node(&iter); + if (ret_node) { + feat = caa_container_of(ret_node, + struct pl_feature_registration, + feat_node); + return feat->id; + } + return 0; +} + +static ALWAYS_INLINE int +ipv6_udp_in_feat_add_type(struct pl_node_registration *node __unused, + struct pl_feature_registration *feat, + uint32_t feat_type) +{ + struct cds_lfht_node *ret_node; + + if (!l3_v6_udp_feat_ht) { + l3_v6_udp_feat_ht = cds_lfht_new(L4_FEAT_HASH_MIN, + L4_FEAT_HASH_MIN, + L4_FEAT_HASH_MAX, + CDS_LFHT_AUTO_RESIZE, + NULL); + if (l3_v6_udp_feat_ht == NULL) + rte_panic("Can't allocate ft node hash\n"); + } + ret_node = cds_lfht_add_unique(l3_v6_udp_feat_ht, feat_type, + ipv6_udp_in_feat_match, &feat_type, + &feat->feat_node); + return (ret_node != &feat->feat_node) ? EEXIST : 0; +} + +static ALWAYS_INLINE int +ipv6_udp_in_feat_rem_type(struct pl_node_registration *node __unused, + struct pl_feature_registration *feat __unused, + uint32_t feat_type) +{ + struct cds_lfht_node *ret_node; + struct cds_lfht_iter iter; + + if (!l3_v6_udp_feat_ht) + return -ENOENT; + + cds_lfht_lookup(l3_v6_udp_feat_ht, feat_type, + ipv6_udp_in_feat_match, &feat_type, + &iter); + + ret_node = cds_lfht_iter_get_node(&iter); + if (!ret_node) + return -ENOENT; + + return cds_lfht_del(l3_v6_udp_feat_ht, ret_node); +} + +ALWAYS_INLINE unsigned int +ipv6_udp_in_process_common(struct pl_packet *pkt, void *context __unused, + enum pl_mode mode) +{ + struct rte_mbuf *m = pkt->mbuf; + struct udphdr *udp; + struct ifnet *ifp = pkt->in_ifp; + int rc; + uint32_t feat_type; + + rc = ip6_udp_tunnel_in(m, ifp); + if (likely(rc == 0)) + return IPV6_UDP_CONSUME; + if (rc < 0) + return IPV6_UDP_DROP; + + pkt->mbuf = m; + + udp = dp_pktmbuf_mtol4(m, struct udphdr *); + feat_type = udp->dest; + + switch (mode) { + case PL_MODE_FUSED: + if (!pipeline_fused_ipv6_udp_in_features( + pkt, + ipv6_udp_in_find_feat_id_by_type_fused(feat_type))) + return IPV6_UDP_CONSUME; + break; + case PL_MODE_FUSED_NO_DYN_FEATS: + if (!pipeline_fused_ipv6_udp_in_no_dyn_features( + pkt, + ipv6_udp_in_find_feat_id_by_type_fused_no_dyn_features( + feat_type))) + return IPV6_UDP_CONSUME; + break; + case PL_MODE_REGULAR: + if (!pl_node_invoke_feature_by_type( + ipv6_udp_in_node_ptr, + feat_type, pkt)) + return IPV6_UDP_CONSUME; + break; + } + return IPV6_UDP_ACCEPT; +} + +ALWAYS_INLINE unsigned int +ipv6_udp_in_process(struct pl_packet *pkt, void *context) +{ + return ipv6_udp_in_process_common(pkt, context, PL_MODE_REGULAR); +} + +PL_REGISTER_NODE(ipv6_udp_in_node) = { + .name = "vyatta:ipv6-udp-in", + .type = PL_PROC, + .handler = ipv6_udp_in_process, + .feat_type_insert = ipv6_udp_in_feat_add_type, + .feat_type_remove = ipv6_udp_in_feat_rem_type, + .feat_type_find = ipv6_udp_in_find_feat_id_by_type, + .num_next = IPV6_UDP_NUM, + .next = { + [IPV6_UDP_ACCEPT] = "term-noop", + [IPV6_UDP_DROP] = "term-drop", + [IPV6_UDP_CONSUME] = "term-finish", + } +}; + +PL_REGISTER_FEATURE(ipv6_udp_in_feat) = { + .name = "vyatta:ipv6-udp-in", + .node_name = "ipv6-udp-in", + .feature_point = "ipv6-l4", + .always_on = true, + .id = PL_L3_V6_L4_FUSED_FEAT_UDP_IN, + .feat_type = IPPROTO_UDP, +}; + +struct pl_node_registration *const ipv6_udp_in_node_ptr = &ipv6_udp_in_node; + +/* + * show features ipv6_udp_in + */ +static int cmd_pl_show_feat_ipv6_udp_in(struct pl_command *cmd) +{ + json_writer_t *wr; + + wr = jsonw_new(cmd->fp); + if (!wr) + return 0; + + jsonw_name(wr, "features"); + jsonw_start_object(wr); + + jsonw_name(wr, "global"); + jsonw_start_array(wr); + pl_node_iter_features(ipv6_udp_in_node_ptr, NULL, + pl_print_feats, wr); + jsonw_end_array(wr); + + jsonw_end_object(wr); + jsonw_destroy(&wr); + return 0; +} + +PL_REGISTER_OPCMD(pl_show_feat_ipv6_udp_in) = { + .cmd = "show features ipv6_udp_in", + .handler = cmd_pl_show_feat_ipv6_udp_in, +}; diff --git a/src/pipeline/nodes/l3_v6_val.c b/src/pipeline/nodes/l3_v6_val.c index 9326b177..01e9f90f 100644 --- a/src/pipeline/nodes/l3_v6_val.c +++ b/src/pipeline/nodes/l3_v6_val.c @@ -2,7 +2,7 @@ * l3_v4_val.c * * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -19,7 +19,7 @@ #include "if_var.h" #include "ip6_funcs.h" #include "npf/npf.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" #include "pl_node.h" @@ -27,7 +27,7 @@ #include "route_v6.h" #include "snmp_mib.h" #include "util.h" -#include "vrf.h" +#include "vrf_internal.h" struct pl_node; @@ -44,7 +44,8 @@ static inline struct ifnet *ipv6_val_node_to_ifp(struct pl_node *node) } ALWAYS_INLINE unsigned int -ipv6_validate_process_common(struct pl_packet *pkt, enum pl_mode mode) +ipv6_validate_process_common(struct pl_packet *pkt, void *context __unused, + enum pl_mode mode) { struct ip6_hdr *ip6 = ip6hdr(pkt->mbuf); struct ifnet *ifp = pkt->in_ifp; @@ -87,9 +88,9 @@ ipv6_validate_process_common(struct pl_packet *pkt, enum pl_mode mode) } ALWAYS_INLINE unsigned int -ipv6_validate_process(struct pl_packet *p) +ipv6_validate_process(struct pl_packet *p, void *context) { - return ipv6_validate_process_common(p, PL_MODE_REGULAR); + return ipv6_validate_process_common(p, context, PL_MODE_REGULAR); } static int @@ -102,20 +103,37 @@ ipv6_validate_feat_change(struct pl_node *node, return pl_node_feat_change_u16(&ifp->ip6_in_features, feat, action); } +static int +ipv6_validate_feat_change_all(struct pl_feature_registration *feat, + enum pl_node_feat_action action) +{ + return if_node_instance_feat_change_all(feat, action, + ipv6_validate_feat_change); +} + ALWAYS_INLINE bool ipv6_validate_feat_iterate(struct pl_node *node, bool first, - unsigned int *feature_id, void **context) + unsigned int *feature_id, void **context, + void **storage_ctx) { struct ifnet *ifp = ipv6_val_node_to_ifp(node); + bool ret; + + ret = pl_node_feat_iterate_u16(&ifp->ip6_in_features, first, + feature_id, context); + if (ret) + *storage_ctx = if_node_instance_get_storage_internal( + ifp, + PL_FEATURE_POINT_IPV6_VALIDATE_ID, + *feature_id); - return pl_node_feat_iterate_u16(&ifp->ip6_in_features, first, - feature_id, context); + return ret; } static struct pl_node * ipv6_validate_node_lookup(const char *name) { - struct ifnet *ifp = ifnet_byifname(name); + struct ifnet *ifp = dp_ifnet_byifname(name); return ifp ? ifp_to_ipv6_val_node(ifp) : NULL; } @@ -125,8 +143,13 @@ PL_REGISTER_NODE(ipv6_validate_node) = { .type = PL_PROC, .handler = ipv6_validate_process, .feat_change = ipv6_validate_feat_change, + .feat_change_all = ipv6_validate_feat_change_all, .feat_iterate = ipv6_validate_feat_iterate, .lookup_by_name = ipv6_validate_node_lookup, + .feat_reg_context = if_node_instance_register_storage, + .feat_unreg_context = if_node_instance_unregister_storage, + .feat_get_context = if_node_instance_get_storage, + .feat_setup_cleanup_cb = if_node_instance_set_cleanup_cb, .num_next = IPV6_VAL_NUM, .next = { [IPV6_VAL_ACCEPT] = "ipv6-route-lookup", @@ -137,3 +160,16 @@ PL_REGISTER_NODE(ipv6_validate_node) = { struct pl_node_registration *const ipv6_validate_node_ptr = &ipv6_validate_node; + +/* + * show features ipv6_validate [interface ] + */ +static int cmd_pl_show_feat_ipv6_validate(struct pl_command *cmd) +{ + return if_node_instance_feat_print(cmd, ipv6_validate_node_ptr); +} + +PL_REGISTER_OPCMD(pl_show_feat_ipv6_validate) = { + .cmd = "show features ipv6_validate", + .handler = cmd_pl_show_feat_ipv6_validate, +}; diff --git a/src/pipeline/nodes/pl_nodes_common.h b/src/pipeline/nodes/pl_nodes_common.h index f290afa9..94b0ef38 100644 --- a/src/pipeline/nodes/pl_nodes_common.h +++ b/src/pipeline/nodes/pl_nodes_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * @@ -11,15 +11,29 @@ extern struct pl_node_registration *const ether_lookup_node_ptr; +extern struct pl_node_registration *const ipv4_drop_node_ptr; extern struct pl_node_registration *const ipv4_validate_node_ptr; extern struct pl_node_registration *const ipv4_out_node_ptr; extern struct pl_node_registration *const ipv4_encap_node_ptr; extern struct pl_node_registration *const ipv4_route_lookup_node_ptr; +extern struct pl_node_registration *const ipv4_l4_node_ptr; +extern struct pl_node_registration *const ipv4_udp_in_node_ptr; +extern struct pl_node_registration *const ipv4_out_spath_node_ptr; +extern struct pl_node_registration *const ipv6_drop_node_ptr; +extern struct pl_node_registration *const ipv6_l4_node_ptr; extern struct pl_node_registration *const ipv6_validate_node_ptr; extern struct pl_node_registration *const ipv6_out_node_ptr; extern struct pl_node_registration *const ipv6_encap_node_ptr; extern struct pl_node_registration *const ipv6_route_lookup_node_ptr; +extern struct pl_node_registration *const ipv6_udp_in_node_ptr; +extern struct pl_node_registration *const ipv6_out_spath_node_ptr; + +extern struct pl_node_registration *const l2_consume_node_ptr; +extern struct pl_node_registration *const l2_local_node_ptr; +extern struct pl_node_registration *const l2_output_node_ptr; + +extern struct pl_node_registration *const term_drop_node_ptr; PL_DECLARE_FEATURE(ipv4_rpf_feat); PL_DECLARE_FEATURE(ipv4_in_no_address_feat); @@ -30,17 +44,21 @@ PL_DECLARE_FEATURE(ipv4_ipsec_out_feat); PL_DECLARE_FEATURE(ipv6_ipsec_out_feat); PL_DECLARE_FEATURE(sw_vlan_in_feat); PL_DECLARE_FEATURE(capture_ether_in_feat); +PL_DECLARE_FEATURE(capture_l2_output_feat); PL_DECLARE_FEATURE(portmonitor_in_feat); +PL_DECLARE_FEATURE(portmonitor_out_feat); PL_DECLARE_FEATURE(bridge_in_feat); PL_DECLARE_FEATURE(cross_connect_ether_feat); -PL_DECLARE_FEATURE(flow_capture_ether_in_feat); PL_DECLARE_FEATURE(hw_hdr_in_feat); PL_DECLARE_FEATURE(vlan_mod_in_feat); +PL_DECLARE_FEATURE(vlan_mod_out_feat); PL_DECLARE_FEATURE(ipv4_defrag_in_feat); PL_DECLARE_FEATURE(ipv4_defrag_out_feat); PL_DECLARE_FEATURE(ipv6_defrag_in_feat); PL_DECLARE_FEATURE(ipv6_defrag_out_feat); +PL_DECLARE_FEATURE(ipv4_defrag_out_spath_feat); +PL_DECLARE_FEATURE(ipv6_defrag_out_spath_feat); PL_DECLARE_FEATURE(ipv4_dpi_in_feat); PL_DECLARE_FEATURE(ipv6_dpi_in_feat); @@ -51,11 +69,17 @@ PL_DECLARE_FEATURE(ipv4_acl_in_feat); PL_DECLARE_FEATURE(ipv4_acl_out_feat); PL_DECLARE_FEATURE(ipv6_acl_in_feat); PL_DECLARE_FEATURE(ipv6_acl_out_feat); +PL_DECLARE_FEATURE(ipv4_acl_out_spath_feat); +PL_DECLARE_FEATURE(ipv6_acl_out_spath_feat); PL_DECLARE_FEATURE(ipv4_fw_in_feat); PL_DECLARE_FEATURE(ipv4_fw_out_feat); PL_DECLARE_FEATURE(ipv6_fw_in_feat); PL_DECLARE_FEATURE(ipv6_fw_out_feat); +PL_DECLARE_FEATURE(ipv4_fw_orig_feat); +PL_DECLARE_FEATURE(ipv6_fw_orig_feat); +PL_DECLARE_FEATURE(ipv4_fw_out_spath_feat); +PL_DECLARE_FEATURE(ipv6_fw_out_spath_feat); PL_DECLARE_FEATURE(ipv4_pbr_feat); PL_DECLARE_FEATURE(ipv6_pbr_feat); @@ -66,4 +90,9 @@ PL_DECLARE_FEATURE(nptv6_out_feat); PL_DECLARE_FEATURE(ipv4_cgnat_in_feat); PL_DECLARE_FEATURE(ipv4_cgnat_out_feat); +PL_DECLARE_FEATURE(ipv4_nat46_in_feat); +PL_DECLARE_FEATURE(ipv6_nat64_in_feat); +PL_DECLARE_FEATURE(ipv6_nat46_out_feat); +PL_DECLARE_FEATURE(ipv4_nat64_out_feat); + #endif /* PL_NODES_COMMON_H */ diff --git a/src/pipeline/nodes/pppoe/l2_pppoe_cmd.c b/src/pipeline/nodes/pppoe/l2_pppoe_cmd.c index 8b650ca1..2aef2161 100644 --- a/src/pipeline/nodes/pppoe/l2_pppoe_cmd.c +++ b/src/pipeline/nodes/pppoe/l2_pppoe_cmd.c @@ -1,6 +1,7 @@ /* * l2_pppoe_cmd.c * + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2018-2019 AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -62,38 +63,38 @@ _pppoe_cmd_handler(PPPOEConfig *pppoe_msg, struct pb_msg *msg) char *under_name; struct ifnet *underlying; struct pppoe_connection *conn; - struct ether_addr my_eth; - struct ether_addr peer_eth; + struct rte_ether_addr my_eth; + struct rte_ether_addr peer_eth; pppname = pppoe_msg->pppname; - ppp_inter = ifnet_byifname(pppname); + ppp_inter = dp_ifnet_byifname(pppname); if (!ppp_inter) return 0; under_name = pppoe_msg->undername; - underlying = ifnet_byifname(under_name); + underlying = dp_ifnet_byifname(under_name); if (!ether_aton_r(pppoe_msg->ether, &my_eth)) { - pb_cmd_err(msg, "not a valid session id: %s\n", + dp_pb_cmd_err(msg, "not a valid session id: %s\n", pppoe_msg->ether); return -1; } if (!ether_aton_r(pppoe_msg->peer_ether, &peer_eth)) { - pb_cmd_err(msg, "not a valid ethernet net address: %s\n", + dp_pb_cmd_err(msg, "not a valid ethernet net address: %s\n", pppoe_msg->peer_ether); return -1; } if (ppp_inter->if_softc) { - pb_cmd_err(msg, "Can not modify PPP connection."); + dp_pb_cmd_err(msg, "Can not modify PPP connection."); return -1; } ppp_inter->if_softc = zmalloc_aligned(sizeof(struct pppoe_connection)); if (!ppp_inter->if_softc) { RTE_LOG(ERR, PPPOE, "Out of memory allocating connection struct."); - pb_cmd_err(msg, + dp_pb_cmd_err(msg, "Out of memory allocating connection struct."); return -1; } @@ -111,7 +112,7 @@ _pppoe_cmd_handler(PPPOEConfig *pppoe_msg, struct pb_msg *msg) conn->underlying_ifindex = underlying->if_index; if (!pppoe_init_session(ppp_inter, conn->session)) { - pb_cmd_err(msg, + dp_pb_cmd_err(msg, "could not initialize pppoe session\n"); free(ppp_inter->if_softc); ppp_inter->if_softc = NULL; diff --git a/src/pipeline/nodes/pppoe/l2_pppoe_node.c b/src/pipeline/nodes/pppoe/l2_pppoe_node.c index 79b8c2e0..d7fc2336 100644 --- a/src/pipeline/nodes/pppoe/l2_pppoe_node.c +++ b/src/pipeline/nodes/pppoe/l2_pppoe_node.c @@ -1,7 +1,7 @@ /* * l2_pppoe_node.c * - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * @@ -14,7 +14,7 @@ #include "pppoe.h" ALWAYS_INLINE unsigned int -pppoe_in_process(struct pl_packet *pkt) +pppoe_in_process(struct pl_packet *pkt, void *context __unused) { struct rte_mbuf *m = pkt->mbuf; struct pppoe_packet *pppoe_hdr = @@ -35,7 +35,8 @@ pppoe_in_process(struct pl_packet *pkt) if (inner_proto == PPP_LCP || inner_proto == PPP_IPCP || inner_proto == PPP_IPV6CP || - inner_proto == PPP_PAP) + inner_proto == PPP_PAP || + inner_proto == PPP_CHAP) return PPP_FORWARD_LOCAL; /* Set input interface to corresponding PPP device */ @@ -48,22 +49,22 @@ pppoe_in_process(struct pl_packet *pkt) pkt->in_ifp = ppp; /* Trim ONLY PPP overhead, keep length for ether_hdr */ - struct ether_hdr *eh = - (struct ether_hdr *)rte_pktmbuf_adj( + struct rte_ether_hdr *eh = + (struct rte_ether_hdr *)rte_pktmbuf_adj( m, (sizeof(struct pppoe_packet) - - sizeof(struct ether_hdr))); - m->l2_len = sizeof(struct ether_hdr); + sizeof(struct rte_ether_hdr))); + m->l2_len = sizeof(struct rte_ether_hdr); memcpy(&eh->d_addr, &pppoe_hdr->eth_hdr.d_addr, - sizeof(struct ether_addr)); + sizeof(struct rte_ether_addr)); memcpy(&eh->s_addr, &pppoe_hdr->eth_hdr.s_addr, - sizeof(struct ether_addr)); + sizeof(struct rte_ether_addr)); switch (inner_proto) { case PPP_IP: - eh->ether_type = htons(ETHER_TYPE_IPv4); + eh->ether_type = htons(RTE_ETHER_TYPE_IPV4); return PPP_FORWARD_V4_ACCEPT; case PPP_IPV6: - eh->ether_type = htons(ETHER_TYPE_IPv6); + eh->ether_type = htons(RTE_ETHER_TYPE_IPV6); return PPP_FORWARD_V6_ACCEPT; default: /* Unsupported inner protocol */ @@ -80,9 +81,7 @@ pppoe_in_process(struct pl_packet *pkt) PL_REGISTER_NODE(pppoe_in_node) = { .name = "vyatta:pppoe-in", .type = PL_PROC, - .init = NULL, .handler = pppoe_in_process, - .disable = false, .num_next = PPP_FORWARD_NUM, .next = { [PPP_FORWARD_V4_ACCEPT] = "ipv4-validate", diff --git a/src/pipeline/nodes/pppoe/pppoe.c b/src/pipeline/nodes/pppoe/pppoe.c index aac6b461..b3cc7d67 100644 --- a/src/pipeline/nodes/pppoe/pppoe.c +++ b/src/pipeline/nodes/pppoe/pppoe.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * @@ -109,7 +109,7 @@ static void pppoe_invalidate_conn(struct ifnet *ifp, uint32_t old_ifindex) * and have this interface as the one they need. */ static void -pppoe_track_if_index_set(struct ifnet *new_ifp, uint32_t ifindex __unused) +pppoe_track_if_index_set(struct ifnet *new_ifp) { struct pppoe_connection *conn; @@ -205,7 +205,7 @@ ppp_lookup_ses(struct ifnet *underlying_interface, uint16_t session) struct cds_lfht_iter iter; - rcu_read_lock(); + dp_rcu_read_lock(); cds_lfht_lookup(p_map_htbl, pppoe_classify_map_hash(&key), pppoe_classify_map_match, &key, &iter); struct cds_lfht_node *node = cds_lfht_iter_get_node(&iter); @@ -214,11 +214,11 @@ ppp_lookup_ses(struct ifnet *underlying_interface, uint16_t session) struct pppoe_map_node *pnode = caa_container_of(node, struct pppoe_map_node, pnode); if (pnode->ppp) { - rcu_read_unlock(); + dp_rcu_read_unlock(); return pnode->ppp; } } - rcu_read_unlock(); + dp_rcu_read_unlock(); return NULL; } @@ -243,7 +243,7 @@ ppp_remove_ses(uint32_t ifindex, uint16_t session) struct cds_lfht_iter iter; - rcu_read_lock(); + dp_rcu_read_lock(); cds_lfht_lookup(p_map_htbl, pppoe_classify_map_hash(&key), pppoe_classify_map_match, &key, &iter); struct cds_lfht_node *node = cds_lfht_iter_get_node(&iter); @@ -256,7 +256,7 @@ ppp_remove_ses(uint32_t ifindex, uint16_t session) call_rcu(&pnode->pppoe_rcu, pppoe_entry_free); } - rcu_read_unlock(); + dp_rcu_read_unlock(); } bool @@ -295,7 +295,7 @@ pppoe_init_session(struct ifnet *ppp_dev, uint16_t session) struct cds_lfht_node *node; /* Does session already exist? */ - rcu_read_lock(); + dp_rcu_read_lock(); cds_lfht_lookup(pppoe_tbl, pppoe_classify_map_hash(&key), pppoe_classify_map_match, &key, &iter); node = cds_lfht_iter_get_node(&iter); @@ -310,7 +310,7 @@ pppoe_init_session(struct ifnet *ppp_dev, uint16_t session) &pnode->pnode); } } - rcu_read_unlock(); + dp_rcu_read_unlock(); return true; } @@ -335,20 +335,20 @@ ppp_do_encap(struct rte_mbuf *m, struct pppoe_connection *conn, struct pppoe_packet *pheader = (struct pppoe_packet *)rte_pktmbuf_prepend( m, sizeof(struct pppoe_packet) - - sizeof(struct ether_hdr)); + sizeof(struct rte_ether_hdr)); if (unlikely(!pheader)) return false; pheader->session = htons(conn->session); if (output) { memcpy(&pheader->eth_hdr.d_addr, &conn->peer_eth, - sizeof(struct ether_addr)); + sizeof(struct rte_ether_addr)); memcpy(&pheader->eth_hdr.s_addr, &conn->my_eth, - sizeof(struct ether_addr)); + sizeof(struct rte_ether_addr)); } else { memcpy(&pheader->eth_hdr.d_addr, &conn->my_eth, - sizeof(struct ether_addr)); + sizeof(struct rte_ether_addr)); memcpy(&pheader->eth_hdr.s_addr, &conn->peer_eth, - sizeof(struct ether_addr)); + sizeof(struct rte_ether_addr)); } pheader->eth_hdr.ether_type = htons(ETH_P_PPP_SES); pheader->vertype = PPPOE_VER_TYPE(1, 1); @@ -425,8 +425,15 @@ ppp_tunnel_delete(struct ifnet *ifp) pppoe_no_track_underlying_interfaces(); } +static enum dp_ifnet_iana_type +ppp_iana_type(struct ifnet *ifp __unused) +{ + return DP_IFTYPE_IANA_PPP; +} + static const struct ift_ops ppp_if_ops = { .ifop_uninit = ppp_tunnel_delete, + .ifop_iana_type = ppp_iana_type, }; static void ppp_init(void) diff --git a/src/pipeline/nodes/pppoe/pppoe.h b/src/pipeline/nodes/pppoe/pppoe.h index a01b6e58..4d664e5c 100644 --- a/src/pipeline/nodes/pppoe/pppoe.h +++ b/src/pipeline/nodes/pppoe/pppoe.h @@ -1,6 +1,6 @@ /* pppoe.h * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -14,14 +14,14 @@ #include #include -#include "include/pl_common.h" +#include "pl_common.h" #include "if_var.h" #define PPPOE_VER_TYPE(v, t)(((v) << 4) | (t)) /* A PPPoE Packet, including Ethernet headers */ struct pppoe_packet { - struct ether_hdr eth_hdr; /* Ethernet header */ + struct rte_ether_hdr eth_hdr; /* Ethernet header */ uint8_t vertype; /* PPPoE Version and Type (must both be 1) */ uint8_t code; /* PPPoE code */ uint16_t session; /* PPPoE session */ @@ -39,8 +39,8 @@ struct pppoe_tag { struct pppoe_connection { struct rcu_head scpppoe_rcu; - struct ether_addr my_eth; /* My MAC address */ - struct ether_addr peer_eth; /* Peer's MAC address */ + struct rte_ether_addr my_eth; /* My MAC address */ + struct rte_ether_addr peer_eth; /* Peer's MAC address */ uint16_t session; /* Session ID */ char *service_name; /* Desired service name, if any */ char *ac_name; /* Desired AC name, if any */ diff --git a/src/pipeline/nodes/sample/SampleFeatConfig.proto b/src/pipeline/nodes/sample/SampleFeatConfig.proto index 137bc44a..f8755322 100644 --- a/src/pipeline/nodes/sample/SampleFeatConfig.proto +++ b/src/pipeline/nodes/sample/SampleFeatConfig.proto @@ -1,4 +1,4 @@ -// Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. +// Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. // // SPDX-License-Identifier: LGPL-2.1-only // diff --git a/src/pipeline/nodes/sample/SampleFeatOp.proto b/src/pipeline/nodes/sample/SampleFeatOp.proto new file mode 100644 index 00000000..e6aec9c3 --- /dev/null +++ b/src/pipeline/nodes/sample/SampleFeatOp.proto @@ -0,0 +1,16 @@ +// Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. +// +// SPDX-License-Identifier: LGPL-2.1-only +// +// Sample feat protobuf definitions +// + +syntax="proto2"; + +message SampleFeatOpReq { + //empty +} + +message SampleFeatOpResp { + optional uint32 count = 1; +} \ No newline at end of file diff --git a/src/pipeline/nodes/sample/meson.build b/src/pipeline/nodes/sample/meson.build new file mode 100644 index 00000000..4e3d9dbc --- /dev/null +++ b/src/pipeline/nodes/sample/meson.build @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + +sample_plugin_sources = files('sample.c') + +sample_protobuf_sources = [ + 'SampleFeatConfig.proto', + 'SampleFeatOp.proto' +] + +sample_generated_protobuf_c = [] +sample_generated_protobuf_c_headers = [] +foreach protobuf_definition : sample_protobuf_sources + generated_c = custom_target('c_' + protobuf_definition, + command: [protoc, '--proto_path=@CURRENT_SOURCE_DIR@', '--c_out=@OUTDIR@', '@INPUT@'], + input: protobuf_definition, + output: ['@BASENAME@.pb-c.c', '@BASENAME@.pb-c.h'], + ) + sample_generated_protobuf_c += generated_c + sample_generated_protobuf_c_headers += generated_c[1] +endforeach + +sample_plugin = shared_module('sample_plugin', + sources: [sample_plugin_sources, sample_generated_protobuf_c], + include_directories: public_include, + name_prefix: '', + install: true, + install_dir: get_option('prefix') / get_option('libdir') / meson.project_name() / 'pipeline' / 'plugins' +) diff --git a/src/pipeline/nodes/sample/sample.c b/src/pipeline/nodes/sample/sample.c index 0b3d8601..f44e43c2 100644 --- a/src/pipeline/nodes/sample/sample.c +++ b/src/pipeline/nodes/sample/sample.c @@ -1,7 +1,7 @@ /* * Sample pipeline feature node * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -16,13 +16,17 @@ * */ #include +#include +#include #include +#include +#include +#include #include -#include -#include +#include -#include "protobuf.h" +#include #include enum sample_dispositions { @@ -31,86 +35,142 @@ enum sample_dispositions { }; static uint32_t sample_pkt_count; +static uint32_t sample_ctx = 0x12345678; +static uint32_t sample_cleanup_cb_count; + +static void sample_cleanup_cb(const char *instance __unused, + void *context) +{ + sample_cleanup_cb_count++; + + assert(context == &sample_ctx); +} + static unsigned int -sample_process(struct pl_packet *pkt __attribute__((unused))) +sample_process(struct pl_packet *pkt __unused, + void *context) { uatomic_inc(&sample_pkt_count); + assert(context == &sample_ctx); return SAMPLE_ACCEPT; } -/* Register Node */ -PL_REGISTER_NODE(sample_node) = { - .name = "sample:sample", - .type = PL_PROC, - .handler = sample_process, - .num_next = SAMPLE_NUM, - .next = { - [SAMPLE_ACCEPT] = "vyatta:term-noop" - } -}; - -PL_REGISTER_FEATURE(sample_feat) = { - .name = "sample:sample", - .node_name = "sample", - .feature_point = "vyatta:ipv4-validate", - .visit_after = "vyatta:ipv4-pbr", -}; static int sample_feat_cmd(struct pb_msg *msg) { - int ret; + int ret = 0; SampleFeatConfig *sample_msg = sample_feat_config__unpack(NULL, msg->msg_len, msg->msg); if (!sample_msg) { - pb_cmd_err(msg, "failed to read sample protobuf command\n"); + dp_pb_cmd_err(msg, "failed to read sample protobuf command\n"); return -1; } if (!sample_msg->has_is_active) { - pb_cmd_err(msg, "error in sample protobuf command\n"); + dp_pb_cmd_err(msg, "error in sample protobuf command\n"); return -1; } - if (sample_msg->is_active == false) - ret = pl_node_remove_feature(&sample_feat, - sample_msg->if_name); - else - ret = pl_node_add_feature(&sample_feat, sample_msg->if_name); + if (sample_msg->is_active == false) { + dp_pipeline_disable_feature_by_inst("sample:sample", + sample_msg->if_name); + dp_pipeline_unregister_inst_storage("sample:sample", + sample_msg->if_name); + } else { + ret = dp_pipeline_register_inst_storage("sample:sample", + sample_msg->if_name, + &sample_ctx); + if (ret) + goto out; + ret = dp_pipeline_enable_feature_by_inst("sample:sample", + sample_msg->if_name); + + } +out: sample_feat_config__free_unpacked(sample_msg, NULL); return ret; } static int -cmd_sample_feat_show(struct pl_command *cmd) +cmd_sample_feat_show(struct pb_msg *msg) { - json_writer_t *json = jsonw_new(cmd->fp); + /* request */ + SampleFeatOpReq *sample_op_req_msg = + sample_feat_op_req__unpack(NULL, msg->msg_len, msg->msg); + if (!sample_op_req_msg) { + dp_pb_cmd_err(msg, + "failed to read sample protobuf op command\n"); + return -1; + } + sample_feat_op_req__free_unpacked(sample_op_req_msg, NULL); - if (!json) - return 0; + /* response */ + SampleFeatOpResp sample_op_resp_msg = SAMPLE_FEAT_OP_RESP__INIT; - jsonw_name(json, "sample-feat"); - jsonw_start_object(json); + sample_op_resp_msg.count = uatomic_read(&sample_pkt_count); + sample_op_resp_msg.has_count = true; - jsonw_uint_field(json, "ipv4-validate-packet-count", - uatomic_read(&sample_pkt_count)); + /* now convert this to binary and add back */ + int len = sample_feat_op_resp__get_packed_size(&sample_op_resp_msg); + void *buf2 = malloc(len); + sample_feat_op_resp__pack(&sample_op_resp_msg, buf2); + msg->ret_msg = buf2; + msg->ret_msg_len = len; - jsonw_end_object(json); - jsonw_destroy(&json); return 0; } -PB_REGISTER_CMD(sample_cmd) = { - .cmd = "sample:sample-feat", - .handler = sample_feat_cmd, +const char *sampler_next_nodes[] = { + "vyatta:term-noop", }; -PL_REGISTER_OPCMD(sample_show) = { - .cmd = "sample-feat show", - .handler = cmd_sample_feat_show, +static const char *plugin_name = "sample"; + +struct dp_pipeline_feat_registration sample_feat = { + .plugin_name = "sample", + .name = "sample:sample", + .node_name = "sample:sample", + .feature_point = "vyatta:ipv4-validate", + .visit_before = NULL, + .visit_after = "vyatta:ipv4-pbr", + .cleanup_cb = sample_cleanup_cb, }; + +int dp_feature_plugin_init(const char **name) +{ + int rv; + + rv = dp_pipeline_register_node("sample:sample", + 1, + sampler_next_nodes, + PL_PROC, + sample_process); + if (rv) + goto error; + + + rv = dp_pipeline_register_list_feature(&sample_feat); + if (rv) + goto error; + + rv = dp_feature_register_pb_cfg_handler("sample:sample-feat", + sample_feat_cmd); + if (rv) + goto error; + + rv = dp_feature_register_pb_op_handler("sample:sample-feat", + cmd_sample_feat_show); + if (rv) + goto error; + + *name = plugin_name; + return 0; +error: + return rv; +} diff --git a/src/pipeline/nodes/term.c b/src/pipeline/nodes/term.c index fb574f95..f088c15e 100644 --- a/src/pipeline/nodes/term.c +++ b/src/pipeline/nodes/term.c @@ -1,7 +1,7 @@ /* * term.c * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -28,7 +28,7 @@ #include "snmp_mib.h" ALWAYS_INLINE unsigned int -term_v4_to_v6_process(struct pl_packet *pkt) +term_v4_to_v6_process(struct pl_packet *pkt, void *context __unused) { pkt->npf_flags |= NPF_FLAG_CACHE_EMPTY; pktmbuf_prepare_encap_out(pkt->mbuf); @@ -45,7 +45,7 @@ PL_REGISTER_NODE(term_v4_to_v6_node) = { }; ALWAYS_INLINE unsigned int -term_v6_to_v4_process(struct pl_packet *pkt) +term_v6_to_v4_process(struct pl_packet *pkt, void *context __unused) { pkt->npf_flags |= NPF_FLAG_CACHE_EMPTY; pktmbuf_prepare_encap_out(pkt->mbuf); @@ -63,41 +63,7 @@ PL_REGISTER_NODE(term_v6_to_v4_node) = { }; ALWAYS_INLINE unsigned int -term_drop_process(struct pl_packet *pkt) -{ - rte_pktmbuf_free(pkt->mbuf); - pkt->mbuf = NULL; - return 0; -} - -/* Register Node */ -PL_REGISTER_NODE(term_drop_node) = { - .name = "vyatta:term-drop", - .type = PL_OUTPUT, - .handler = term_drop_process, - .num_next = 0, -}; - -ALWAYS_INLINE unsigned int -ipv6_drop_process(struct pl_packet *pkt) -{ - if (pkt->in_ifp) - IP6STAT_INC_IFP(pkt->in_ifp, IPSTATS_MIB_INDISCARDS); - rte_pktmbuf_free(pkt->mbuf); - pkt->mbuf = NULL; - return 0; -} - -/* Register Node */ -PL_REGISTER_NODE(ipv6_drop_node) = { - .name = "vyatta:ipv6-drop", - .type = PL_OUTPUT, - .handler = ipv6_drop_process, - .num_next = 0, -}; - -ALWAYS_INLINE unsigned int -ipv4_local_process(struct pl_packet *pkt) +ipv4_local_process(struct pl_packet *pkt, void *context __unused) { ip_local_deliver(pkt->in_ifp, pkt->mbuf); return 0; @@ -112,7 +78,7 @@ PL_REGISTER_NODE(ipv4_local_node) = { }; ALWAYS_INLINE unsigned int -ipv6_local_process(struct pl_packet *pkt) +ipv6_local_process(struct pl_packet *pkt, void *context __unused) { ip6_local_deliver(pkt->in_ifp, pkt->mbuf); return 0; @@ -127,22 +93,7 @@ PL_REGISTER_NODE(ipv6_local_node) = { }; ALWAYS_INLINE unsigned int -l2_local_process(struct pl_packet *pkt) -{ - local_packet(pkt->in_ifp, pkt->mbuf); - return 0; -} - -/* Register Node */ -PL_REGISTER_NODE(l2_local_node) = { - .name = "vyatta:l2-local", - .type = PL_OUTPUT, - .handler = l2_local_process, - .num_next = 0, -}; - -ALWAYS_INLINE unsigned int -term_finish_process(struct pl_packet *p __unused) +term_finish_process(struct pl_packet *p __unused, void *context __unused) { return 0; } @@ -167,7 +118,7 @@ PL_REGISTER_NODE(term_finish_node) = { * feature processing. */ ALWAYS_INLINE unsigned int -term_noop_process(struct pl_packet *p __unused) +term_noop_process(struct pl_packet *p __unused, void *context __unused) { return 0; } @@ -181,9 +132,9 @@ PL_REGISTER_NODE(term_noop_node) = { }; ALWAYS_INLINE unsigned int -l2_out_process(struct pl_packet *pkt) +l2_out_process(struct pl_packet *pkt, void *context __unused) { - if_output(pkt->out_ifp, pkt->mbuf, pkt->in_ifp, pkt->l2_proto); + if_output_internal(pkt); return 0; } diff --git a/src/pipeline/nodes/term_drop.c b/src/pipeline/nodes/term_drop.c new file mode 100644 index 00000000..a8536c6e --- /dev/null +++ b/src/pipeline/nodes/term_drop.c @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * SPDX-License-Identifier: LGPL-2.1-only + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "compiler.h" +#include "ether.h" +#include "if_var.h" +#include "if/macvlan.h" +#include "main.h" +#include "pktmbuf_internal.h" +#include "pl_common.h" +#include "pl_fused.h" +#include "pl_node.h" +#include "pl_nodes_common.h" +#include "util.h" + +struct pl_node; + +/* + * Term drop feature instance is global, so we can store it in a global var. + */ +uint16_t term_drop_features; + +static inline struct pl_node *term_drop_feat_list_to_node(void) +{ + /* our imaginary node */ + return (struct pl_node *)&term_drop_features; +} + +static inline uint16_t * +drop_node_to_term_drop_feat_list(struct pl_node *node) +{ + /* the node is a fiction of our imagination */ + return (uint16_t *)node; +} + +ALWAYS_INLINE unsigned int +term_drop_process_common(struct pl_packet *pkt, void *context __unused, + enum pl_mode mode) +{ + /* + * As this is a feature run once it is decided that the packet is + * to be dropped the features can not change that decision. It will + * still be dropped. The feature return value can not change that + * so don't check it. + */ + switch (mode) { + case PL_MODE_FUSED: + pipeline_fused_term_drop_features( + pkt, term_drop_feat_list_to_node()); + break; + case PL_MODE_FUSED_NO_DYN_FEATS: + pipeline_fused_term_drop_no_dyn_features( + pkt, term_drop_feat_list_to_node()); + break; + case PL_MODE_REGULAR: + pl_node_invoke_enabled_features(term_drop_node_ptr, + term_drop_feat_list_to_node(), + pkt); + break; + } + + rte_pktmbuf_free(pkt->mbuf); + pkt->mbuf = NULL; + + return TERM_DROP_ACCEPT; +} + +ALWAYS_INLINE unsigned int +term_drop_process(struct pl_packet *p, void *context) +{ + return term_drop_process_common(p, context, PL_MODE_REGULAR); +} + +static int +term_drop_feat_change(struct pl_node *node, + struct pl_feature_registration *feat, + enum pl_node_feat_action action) +{ + uint16_t *feature_list = drop_node_to_term_drop_feat_list(node); + + return pl_node_feat_change_u16(feature_list, feat, action); +} + +ALWAYS_INLINE bool +term_drop_feat_iterate(struct pl_node *node, bool first, + unsigned int *feature_id, void **context, + void **storage_ctx __unused) +{ + uint16_t *feature_list = drop_node_to_term_drop_feat_list(node); + + /* No support for instance context at the moment */ + return pl_node_feat_iterate_u16(feature_list, first, + feature_id, context); +} + +static struct pl_node * +term_drop_node_lookup(const char *name) +{ + if (strcmp(name, "all") == 0) + return term_drop_feat_list_to_node(); + + return NULL; +} + +/* Register Node */ +PL_REGISTER_NODE(term_drop_node) = { + .name = "vyatta:term-drop", + .type = PL_PROC, + .handler = term_drop_process, + .feat_change = term_drop_feat_change, + .feat_iterate = term_drop_feat_iterate, + .lookup_by_name = term_drop_node_lookup, + .num_next = TERM_DROP_NUM, + .next = { + [TERM_DROP_ACCEPT] = "term-finish", + } +}; + +struct pl_node_registration *const term_drop_node_ptr = + &term_drop_node; diff --git a/src/pipeline/pl_commands.c b/src/pipeline/pl_commands.c index 5c469e5e..1f52fca6 100644 --- a/src/pipeline/pl_commands.c +++ b/src/pipeline/pl_commands.c @@ -2,7 +2,7 @@ * pl_commands.c * * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -13,6 +13,7 @@ #include #include "commands.h" +#include "feature_commands.h" #include "pl_commands.h" #include "pl_common.h" #include "pl_internal.h" @@ -191,3 +192,12 @@ PB_REGISTER_CMD(pipeline_stats_cmd) = { .cmd = "vyatta:pipeline-stats", .handler = cmd_pipeline_stats_cfg, }; + +bool pl_print_feats(struct pl_feature_registration *feat_reg, void *context) +{ + json_writer_t *wr = context; + + jsonw_string(wr, feat_reg->name); + + return true; +} diff --git a/src/pipeline/pl_commands.h b/src/pipeline/pl_commands.h index 776d5210..a68bbdaf 100644 --- a/src/pipeline/pl_commands.h +++ b/src/pipeline/pl_commands.h @@ -1,7 +1,7 @@ /* * pl_commands.h * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -20,7 +20,7 @@ extern zhash_t *g_pl_cmds; extern zhash_t *g_pl_opcmds; void -pl_recurse_cmds(zhash_t *h, const struct pl_node_command *c, char *s); +pl_recurse_cmds(zhash_t *cmds, const struct pl_node_command *c, char *toks); void pl_dump_nodes(json_writer_t *json); diff --git a/include/pl_common.h b/src/pipeline/pl_common.h similarity index 72% rename from include/pl_common.h rename to src/pipeline/pl_common.h index b85de51b..327ffaf9 100644 --- a/include/pl_common.h +++ b/src/pipeline/pl_common.h @@ -1,7 +1,7 @@ /* * pl_common.h * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017,2019-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -15,16 +15,19 @@ #include #include #include +#include "urcu.h" + +#include "pipeline.h" struct rte_mbuf; struct ifnet; struct pl_node; struct json_writer; struct pl_feature_registration; +struct pl_node_registration; #define PL_NODE_INPUT_MAX 16 #define PL_NODE_COLL_MAX 128 -#define PL_NODE_STORE_MAX 4 enum pl_mode { /* @@ -52,43 +55,12 @@ enum pl_mode { typedef void (pl_storage_delete) (void *s); -/* - * These are carry over from existing - * pipeline functionality but should be - * refactored out of existence if possible. - */ -enum validation_flags { - NEEDS_EMPTY = 0x0, - NEEDS_SLOWPATH = 0x1, -}; - -struct pl_packet { - struct rte_mbuf *mbuf; - void *l3_hdr; - int l2_pkt_type; - enum validation_flags val_flags; - union { - struct next_hop *v4; - struct next_hop_v6 *v6; - } nxt; - struct ifnet *in_ifp; - struct ifnet *out_ifp; - uint32_t tblid; - uint16_t npf_flags; - uint16_t l2_proto; - int max_data_used; - void *data[PL_NODE_STORE_MAX]; -} __rte_cache_aligned; enum pl_node_feat_action { PL_NODE_FEAT_ADD, PL_NODE_FEAT_REM, }; -/* main node processing entry point */ -typedef unsigned int -(pl_proc) (struct pl_packet *p); - /* node initialization function */ typedef void (pl_init_node) (const struct pl_node *); @@ -113,38 +85,73 @@ typedef int struct pl_feature_registration *feat, enum pl_node_feat_action action); +typedef int +(pl_node_feat_change_all) (struct pl_feature_registration *feat, + enum pl_node_feat_action action); + +typedef int +(pl_node_feat_type_insert) (struct pl_node_registration *node, + struct pl_feature_registration *feat, + uint32_t type); + +typedef int +(pl_node_feat_type_remove) (struct pl_node_registration *node, + struct pl_feature_registration *feat, + uint32_t type); + +typedef int +(pl_node_feat_type_find) (uint32_t type); + typedef bool (pl_node_feat_iterate) (struct pl_node *node, bool first, - unsigned int *feature_id, void **context); + unsigned int *feature_id, void **context, + void **storage_context); typedef struct pl_node * (pl_node_lookup_by_name_fn) (const char *name); -/* - * Types of nodes - */ -enum pl_node_type { - PL_PROC = 0, - PL_OUTPUT, - PL_CONTINUE, -}; +typedef int +(pl_node_register_context) (struct pl_node *node, + struct pl_feature_registration *feat, + void *context); + +typedef int +(pl_node_unregister_context) (struct pl_node *node, + struct pl_feature_registration *feat); + +typedef void * +(pl_node_get_context) (struct pl_node *node, + struct pl_feature_registration *feat); +typedef int +(pl_node_setup_cleanup_cb) (struct pl_feature_registration *feat); + +typedef void * +(pl_node_get_context) (struct pl_node *node, + struct pl_feature_registration *feat); /* registration */ struct pl_node_registration { const char *name; - pl_init_node *init; pl_proc *handler; pl_node_feat_change *feat_change; + pl_node_feat_change_all *feat_change_all; pl_node_feat_iterate *feat_iterate; pl_node_lookup_by_name_fn *lookup_by_name; + pl_node_feat_type_insert *feat_type_insert; + pl_node_feat_type_remove *feat_type_remove; + pl_node_feat_type_find *feat_type_find; + pl_node_register_context *feat_reg_context; + pl_node_unregister_context *feat_unreg_context; + pl_node_get_context *feat_get_context; + pl_node_setup_cleanup_cb *feat_setup_cleanup_cb; enum pl_node_type type; - bool disable; uint16_t num_next; /* internal state */ - void *data; + struct cds_lfht *pl_feat_node_ht; int node_decl_id; + int feature_point_id; TAILQ_ENTRY(pl_node_registration) links; uint16_t max_feature_reg_idx; struct pl_feature_registration **feature_regs; @@ -154,20 +161,32 @@ struct pl_node_registration { const char *next[]; }; +enum pl_feat_type { + PL_FEAT_LIST, + PL_FEAT_CASE, +}; + struct pl_feature_registration { + const char *plugin_name; const char *name; const char *feature_point; const char *node_name; const char *visit_before; const char *visit_after; + bool always_on; uint8_t id; + uint32_t feat_type; + enum pl_feat_type feature_type; /* internal state */ bool dynamic; + struct cds_lfht_node feat_node; + struct rcu_head feat_rcu; struct pl_node_registration *node; struct pl_node_registration *feature_point_node; TAILQ_ENTRY(pl_feature_registration) links; TAILQ_ENTRY(pl_feature_registration) feature_point_links; + dp_pipeline_inst_cleanup_cb *cleanup_cb; /* end internal state */ }; @@ -254,4 +273,8 @@ pl_add_node_op_command(struct pl_node_command *cmd); */ void pl_cmd_err(struct pl_command *cmd, const char *fmt, ...); +int pl_get_max_node_count(void); + +bool pl_print_feats(struct pl_feature_registration *feat_reg, void *context); + #endif /* PL_COMMON_H */ diff --git a/src/pipeline/pl_fused.h b/src/pipeline/pl_fused.h index b22ce564..3ec38891 100644 --- a/src/pipeline/pl_fused.h +++ b/src/pipeline/pl_fused.h @@ -2,7 +2,7 @@ * pl_fused.h * * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -11,6 +11,8 @@ #ifndef PL_FUSED_H #define PL_FUSED_H +#include +#include "l2tp/l2tpeth.h" #include "pl_fused_gen.h" /* @@ -23,10 +25,10 @@ enum pl_ether_lookup_fused_feat { PL_ETHER_LOOKUP_FUSED_FEAT_SW_VLAN = 2, PL_ETHER_LOOKUP_FUSED_FEAT_CAPTURE = 3, PL_ETHER_LOOKUP_FUSED_FEAT_PORTMONITOR = 4, - PL_ETHER_LOOKUP_FUSED_FEAT_VLAN_MOD_INGRESS = 5, - PL_ETHER_LOOKUP_FUSED_FEAT_BRIDGE = 6, - PL_ETHER_LOOKUP_FUSED_FEAT_CROSS_CONNECT = 7, - PL_ETHER_LOOKUP_FUSED_FEAT_FLOW_CAPTURE = 8, + /* Leave a gap to allow other monitoring features */ + PL_ETHER_LOOKUP_FUSED_FEAT_VLAN_MOD_INGRESS = 10, + PL_ETHER_LOOKUP_FUSED_FEAT_BRIDGE = 11, + PL_ETHER_LOOKUP_FUSED_FEAT_CROSS_CONNECT = 12, }; enum pl_l3_v4_in_fused_feat { @@ -37,6 +39,7 @@ enum pl_l3_v4_in_fused_feat { PL_L3_V4_IN_FUSED_FEAT_FW, PL_L3_V4_IN_FUSED_FEAT_CGNAT, PL_L3_V4_IN_FUSED_FEAT_DPI, + PL_L3_V4_IN_FUSED_FEAT_NAT46, PL_L3_V4_IN_FUSED_FEAT_PBR, /* * no-address feature should be near the end to give other @@ -53,6 +56,7 @@ enum pl_l3_v6_in_fused_feat { PL_L3_V6_IN_FUSED_FEAT_FW, PL_L3_V6_IN_FUSED_FEAT_NPTV6, PL_L3_V6_IN_FUSED_FEAT_DPI, + PL_L3_V6_IN_FUSED_FEAT_NAT64, PL_L3_V6_IN_FUSED_FEAT_PBR, /* * no-address feature should be near the end to give other @@ -66,6 +70,7 @@ enum pl_l3_v4_out_fused_feat { PL_L3_V4_OUT_FUSED_FEAT_DEFRAG = 1, PL_L3_V4_OUT_FUSED_FEAT_CGNAT, PL_L3_V4_OUT_FUSED_FEAT_FW, + PL_L3_V4_OUT_FUSED_FEAT_NAT64, PL_L3_V4_OUT_FUSED_FEAT_DPI, PL_L3_V4_OUT_FUSED_FEAT_TCP_MSS, }; @@ -74,6 +79,7 @@ enum pl_l3_v6_out_fused_feat { PL_L3_V6_OUT_FUSED_FEAT_NPTV6 = 1, PL_L3_V6_OUT_FUSED_FEAT_DEFRAG, PL_L3_V6_OUT_FUSED_FEAT_FW, + PL_L3_V6_OUT_FUSED_FEAT_NAT46, PL_L3_V6_OUT_FUSED_FEAT_DPI, PL_L3_V6_OUT_FUSED_FEAT_TCP_MSS, }; @@ -86,6 +92,20 @@ enum pl_l3_v6_encap_fused_feat { PL_L3_V6_ENCAP_FUSED_FEAT_ACL = 1, }; +enum pl_l3_v4_out_spath_fused_feat { + PL_L3_V4_OUT_SPATH_FUSED_FEAT_DEFRAG = 1, + PL_L3_V4_OUT_FUSED_FEAT_FW_ORIG = 2, + PL_L3_V4_OUT_SPATH_FUSED_FEAT_FW = 3, + PL_L3_V4_OUT_SPATH_FUSED_FEAT_ACL = 4, +}; + +enum pl_l3_v6_out_spath_fused_feat { + PL_L3_V6_OUT_SPATH_FUSED_FEAT_DEFRAG = 1, + PL_L3_V6_OUT_FUSED_FEAT_FW_ORIG = 2, + PL_L3_V6_OUT_SPATH_FUSED_FEAT_FW = 3, + PL_L3_V6_OUT_SPATH_FUSED_FEAT_ACL = 4, +}; + enum pl_l3_v4_route_lookup_fused_feat { PL_L3_V4_ROUTE_LOOKUP_FUSED_FEAT_IPSEC = 1, }; @@ -94,4 +114,21 @@ enum pl_l3_v6_route_lookup_fused_feat { PL_L3_V6_ROUTE_LOOKUP_FUSED_FEAT_IPSEC = 1, }; +enum pl_l3_v4_l4_fused_feat { + PL_L3_V4_L4_FUSED_FEAT_GRE_IN = 1, + PL_L3_V4_L4_FUSED_FEAT_IPSEC_IN = 2, + PL_L3_V4_L4_FUSED_FEAT_UDP_IN = 3, + PL_L3_V4_L4_FUSED_FEAT_L2TPV3_IN = 4, +}; + +enum pl_l3_v6_l4_fused_feat { + PL_L3_V6_L4_FUSED_FEAT_UDP_IN = 1, +}; + +enum pl_l2_output_fused_feat { + PL_L2_OUTPUT_FUSED_FEAT_VLAN_MOD_EGRESS = 1, + PL_L2_OUTPUT_FUSED_FEAT_PORTMONITOR_OUT, + PL_L2_OUTPUT_FUSED_FEAT_CAPTURE_OUT, +}; + #endif /* PL_FUSED_H */ diff --git a/src/pipeline/pl_internal.h b/src/pipeline/pl_internal.h index 6aeb2cc5..9276b306 100644 --- a/src/pipeline/pl_internal.h +++ b/src/pipeline/pl_internal.h @@ -2,7 +2,7 @@ * pl_internal.h * * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -12,6 +12,7 @@ #define PL_INTERNAL_H #include "compiler.h" +#include "json_writer.h" #include "util.h" extern int g_stats_enabled __hot_data; @@ -31,9 +32,9 @@ pl_inc_node_stat(int node_id) pl_node_stats_id(node_id, dp_lcore_id()))); } -void pl_load_plugins(void); void pl_graph_validate(void); uint64_t pl_get_node_stats(int id); +void pl_show_plugin_state(json_writer_t *json, const char *plugin_name); #endif /* PL_INTERNAL_H */ diff --git a/src/pipeline/pl_node.c b/src/pipeline/pl_node.c index 3e2114ea..0b4163b2 100644 --- a/src/pipeline/pl_node.c +++ b/src/pipeline/pl_node.c @@ -1,7 +1,7 @@ /* * pl_node.c * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -78,6 +78,24 @@ pl_node_add_feature_by_inst(struct pl_feature_registration *feat, void *node) return ret; } +static int +pl_node_add_feature_all_inst(struct pl_feature_registration *feat) +{ + int ret; + + if (!feat->feature_point_node->feat_change_all) + return -ENOTSUP; + + ret = feat->feature_point_node->feat_change_all(feat, PL_NODE_FEAT_ADD); + + if (ret == 0 && feat->dynamic) { + if (uatomic_add_return(&dyn_feat_inst_count, 1) == 1) + set_packet_input_func(ether_input); + } + + return ret; +} + int pl_node_remove_feature_by_inst(struct pl_feature_registration *feat, void *node) { @@ -97,6 +115,24 @@ pl_node_remove_feature_by_inst(struct pl_feature_registration *feat, void *node) return ret; } +static int +pl_node_remove_feature_all_inst(struct pl_feature_registration *feat) +{ + int ret; + + if (!feat->feature_point_node->feat_change_all) + return -ENOTSUP; + + ret = feat->feature_point_node->feat_change_all(feat, PL_NODE_FEAT_REM); + + if (ret == 0 && feat->dynamic) { + if (uatomic_add_return(&dyn_feat_inst_count, -1) == 0) + set_packet_input_func(NULL); + } + + return ret; +} + int pl_node_add_feature(struct pl_feature_registration *feat, const char *node_inst_name) @@ -128,32 +164,65 @@ pl_node_remove_feature(struct pl_feature_registration *feat, } bool -pl_node_is_feature_enabled(struct pl_feature_registration *feat, void *node) +pl_node_is_feature_enabled_by_inst(struct pl_feature_registration *feat, + void *node) { pl_node_feat_iterate *iter_fn; unsigned int feature_id; void *context; bool more; + void *storage_ctx; iter_fn = feat->feature_point_node->feat_iterate; if (!iter_fn) return false; - for (more = iter_fn(node, true, &feature_id, &context); more; - more = iter_fn(node, false, &feature_id, &context)) + for (more = iter_fn(node, true, &feature_id, &context, &storage_ctx); + more; + more = iter_fn(node, false, &feature_id, &context, &storage_ctx)) if (feature_id == feat->id) return true; return false; } +bool +pl_node_is_feature_enabled(struct pl_feature_registration *feat, + const char *node_inst_name) +{ + struct pl_node *node; + + if (!feat->feature_point_node->lookup_by_name) + return false; + node = feat->feature_point_node->lookup_by_name(node_inst_name); + if (!node) + return false; + + return pl_node_is_feature_enabled_by_inst(feat, node); +} + ALWAYS_INLINE bool pl_node_invoke_feature(struct pl_node_registration *node_reg, - unsigned int feature, struct pl_packet *pkt) + unsigned int feature, struct pl_packet *pkt, + void *storage_ctx) { assert(feature < node_reg->max_feature_reg_idx); return pl_graph_walk(node_reg->feature_regs[feature]->node, - pkt); + pkt, storage_ctx); +} + +ALWAYS_INLINE bool +pl_node_invoke_feature_by_type(struct pl_node_registration *node_reg, + uint32_t feature_type, struct pl_packet *pkt) +{ + unsigned int feature_id; + + if (!node_reg->feat_type_find) + return true; + + feature_id = node_reg->feat_type_find(feature_type); + /* The case features are enabled globally not per instance */ + return pl_node_invoke_feature(node_reg, feature_id, pkt, NULL); } /* @@ -170,14 +239,17 @@ pl_node_invoke_enabled_features( unsigned int feature_id; void *context; bool more; + void *storage_ctx; iter_fn = node_reg->feat_iterate; if (!iter_fn) return true; - for (more = iter_fn(node, true, &feature_id, &context); more; - more = iter_fn(node, false, &feature_id, &context)) { - if (!pl_node_invoke_feature(node_reg, feature_id, pkt)) + for (more = iter_fn(node, true, &feature_id, &context, &storage_ctx); + more; + more = iter_fn(node, false, &feature_id, &context, &storage_ctx)) { + if (!pl_node_invoke_feature(node_reg, feature_id, + pkt, storage_ctx)) return false; } @@ -202,13 +274,15 @@ pl_node_iter_features(struct pl_node_registration *node_reg, unsigned int feature_id; void *context; bool more; + void *storage_ctx; iter_fn = node_reg->feat_iterate; if (!iter_fn) return false; - for (more = iter_fn(node, true, &feature_id, &context); more; - more = iter_fn(node, false, &feature_id, &context)) { + for (more = iter_fn(node, true, &feature_id, &context, &storage_ctx); + more; + more = iter_fn(node, false, &feature_id, &context, &storage_ctx)) { assert(feature_id < node_reg->max_feature_reg_idx); if (!callback(node_reg->feature_regs[feature_id], user_context)) @@ -228,13 +302,14 @@ pl_node_iter_features(struct pl_node_registration *node_reg, */ bool pl_graph_walk(struct pl_node_registration *node_reg, - struct pl_packet *pkt) + struct pl_packet *pkt, + void *storage_ctx) { int resp; while (true) { pl_inc_node_stat(node_reg->node_decl_id); - resp = node_reg->handler(pkt); + resp = node_reg->handler(pkt, storage_ctx); switch (node_reg->type) { case PL_OUTPUT: @@ -247,6 +322,7 @@ pl_graph_walk(struct pl_node_registration *node_reg, assert(resp < node_reg->num_next); node_reg = node_reg->next_nodes[resp]; + storage_ctx = NULL; } return true; @@ -346,3 +422,120 @@ pl_get_node_stats(int id) ct += *(g_pl_node_stats + pl_node_stats_id(id, i)); return ct; } + +static int +pl_node_enable_global_case_feature(struct pl_feature_registration *pl_feat) +{ + + if (!pl_feat->feature_point_node->feat_type_find) + return -ENOTSUP; + + if (!pl_feat->feature_point_node->feat_type_insert || + !pl_feat->feature_point_node->feat_type_remove) + return -ENOTSUP; + + if (pl_feat->feature_point_node->feat_type_insert( + pl_feat->feature_point_node, + pl_feat, + pl_feat->feat_type) != 0) + return -EINVAL; + + if (uatomic_add_return(&dyn_feat_inst_count, 1) == 1) + set_packet_input_func(ether_input); + + return 0; +} + +int pl_node_enable_global_feature(struct pl_feature_registration *pl_feat) +{ + if (!pl_feat) + return -EINVAL; + + if (pl_feat->feature_point_node->feat_type_find) + return pl_node_enable_global_case_feature(pl_feat); + + return pl_node_add_feature_all_inst(pl_feat); +} + +static int +pl_node_disable_global_case_feature(struct pl_feature_registration *pl_feat) +{ + if (!pl_feat) + return -EINVAL; + + if (!pl_feat->feature_point_node->feat_type_find) + return -ENOTSUP; + + if (!pl_feat->feature_point_node->feat_type_insert || + !pl_feat->feature_point_node->feat_type_remove) + return -ENOTSUP; + + if (pl_feat->feature_point_node->feat_type_remove( + pl_feat->feature_point_node, + pl_feat, + pl_feat->feat_type) != 0) + return -EINVAL; + + if (uatomic_add_return(&dyn_feat_inst_count, -1) == 0) + set_packet_input_func(NULL); + + return 0; +} + +int pl_node_disable_global_feature(struct pl_feature_registration *pl_feat) +{ + if (!pl_feat) + return -EINVAL; + + if (pl_feat->feature_point_node->feat_type_find) + return pl_node_disable_global_case_feature(pl_feat); + + return pl_node_remove_feature_all_inst(pl_feat); +} + +int pl_node_register_storage(struct pl_feature_registration *feat, + const char *node_inst_name, + void *context) +{ + struct pl_node *node; + + if (!feat->feature_point_node->feat_reg_context) + return -ENOTSUP; + + node = feat->feature_point_node->lookup_by_name(node_inst_name); + if (!node) + return -ENODEV; + + return feat->feature_point_node->feat_reg_context(node, feat, + context); +} + +int pl_node_unregister_storage(struct pl_feature_registration *feat, + const char *node_inst_name) +{ + struct pl_node *node; + + if (!feat->feature_point_node->feat_unreg_context) + return -ENOTSUP; + + node = feat->feature_point_node->lookup_by_name(node_inst_name); + if (!node) + return -ENODEV; + + return feat->feature_point_node->feat_unreg_context(node, feat); +} + +void *pl_node_get_storage(struct pl_feature_registration *feat, + const char *node_inst_name) +{ + struct pl_node *node; + + if (!feat->feature_point_node->feat_get_context) + return NULL; + + node = feat->feature_point_node->lookup_by_name(node_inst_name); + if (!node) + return NULL; + + return feat->feature_point_node->feat_get_context(node, feat); +} diff --git a/include/pl_node.h b/src/pipeline/pl_node.h similarity index 65% rename from include/pl_node.h rename to src/pipeline/pl_node.h index 1a4818e0..3a401d3c 100644 --- a/include/pl_node.h +++ b/src/pipeline/pl_node.h @@ -2,7 +2,7 @@ * pl_node.h * * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017,2019-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -13,6 +13,7 @@ #include "compiler.h" #include "pl_common.h" +#include "pl_fused_gen.h" typedef bool (*pl_user_feat_iterate_fn)( struct pl_feature_registration *feat_reg, void *context); @@ -59,11 +60,21 @@ pl_node_remove_feature(struct pl_feature_registration *feat, const char *node_inst_name); bool -pl_node_is_feature_enabled(struct pl_feature_registration *feat, void *node); +pl_node_is_feature_enabled_by_inst(struct pl_feature_registration *feat, + void *node); + +bool +pl_node_is_feature_enabled(struct pl_feature_registration *feat, + const char *node_inst_name); bool pl_node_invoke_feature(struct pl_node_registration *node_reg, - unsigned int feature, struct pl_packet *pkt); + unsigned int feature, struct pl_packet *pkt, + void *storage_ctx); + +bool +pl_node_invoke_feature_by_type(struct pl_node_registration *node_reg, + uint32_t feature_type, struct pl_packet *pkt); bool pl_node_invoke_enabled_features( @@ -76,7 +87,9 @@ pl_node_iter_features(struct pl_node_registration *node_reg, void *context); bool -pl_graph_walk(struct pl_node_registration *node_reg, struct pl_packet *pkt); +pl_graph_walk(struct pl_node_registration *node_reg, + struct pl_packet *pkt, + void *storage_ctx); int pl_node_feat_change_u16(uint16_t *bitmask, @@ -96,4 +109,18 @@ bool pl_node_feat_iterate_u8(const uint8_t *bitmask, bool first, unsigned int *feature_id, void **context); +int pl_node_enable_global_feature(struct pl_feature_registration *pl_feat); +int pl_node_disable_global_feature(struct pl_feature_registration *pl_feat); + +uint32_t +pl_feat_point_node_get_max_features(enum pl_feature_point_id feat_point); + +int pl_node_register_storage(struct pl_feature_registration *feat, + const char *node_inst_name, + void *context); +int pl_node_unregister_storage(struct pl_feature_registration *feat, + const char *node_inst_name); +void *pl_node_get_storage(struct pl_feature_registration *feat, + const char *node_inst_name); + #endif /* PL_NODE_H */ diff --git a/src/pipeline/pl_node_boot.c b/src/pipeline/pl_node_boot.c index 0e81802e..c248e83a 100644 --- a/src/pipeline/pl_node_boot.c +++ b/src/pipeline/pl_node_boot.c @@ -1,7 +1,7 @@ /* * pl_node_boot.c * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -18,9 +18,13 @@ #include #include "json_writer.h" +#include "lcore_sched.h" +#include "main.h" +#include "pipeline.h" #include "pl_commands.h" #include "pl_common.h" #include "pl_internal.h" +#include "pl_node.h" #include "util.h" #include "vplane_log.h" @@ -166,8 +170,8 @@ pl_feature_node_alloc_id(struct pl_feature_registration *feat, struct pl_feature_registration *before_feat = NULL; struct pl_feature_registration *after_feat = NULL; char *default_domain = parse_domain(feat->name); - char *before_feat_name; - char *after_feat_name; + char *before_feat_name = NULL; + char *after_feat_name = NULL; unsigned int max = UINT_MAX; unsigned int min = 0; unsigned int id; @@ -186,7 +190,6 @@ pl_feature_node_alloc_id(struct pl_feature_registration *feat, RTE_LOG(WARNING, DATAPLANE, "unknown before feature %s for feature %s\n", before_feat_name, feat->name); - free(before_feat_name); } if (feat->visit_after) { after_feat_name = construct_name_and_domain( @@ -199,7 +202,17 @@ pl_feature_node_alloc_id(struct pl_feature_registration *feat, RTE_LOG(WARNING, DATAPLANE, "unknown after feature %s for feature %s\n", after_feat_name, feat->name); - free(after_feat_name); + } + + if (feat->feature_type == PL_FEAT_CASE) { + /* + * This is a case feature so doesn't have before/after. + * IDs are first come first served. + */ + if (feat->visit_after || feat->visit_before) + rte_panic("Case feature %s has before/after features\n", + feat->name); + min = feat->feature_point_node->max_feature_reg_idx + 1; } free(default_domain); @@ -215,6 +228,9 @@ pl_feature_node_alloc_id(struct pl_feature_registration *feat, before_feat->id, before_feat_name, after_feat->id, after_feat_name, feat->name); + free(before_feat_name); + free(after_feat_name); + for (id = min; id <= max; id++) { /* id 0 is reserved */ @@ -222,8 +238,8 @@ pl_feature_node_alloc_id(struct pl_feature_registration *feat, continue; /* candidate id already allocated */ - if (feat->node->max_feature_reg_idx > id && - feat->node->feature_regs[id]) + if (feat->feature_point_node->max_feature_reg_idx > id && + feat->feature_point_node->feature_regs[id]) continue; /* found an unallocated id */ @@ -241,6 +257,17 @@ pl_feature_node_alloc_id(struct pl_feature_registration *feat, feat->visit_after ? feat->visit_after : ""); } +/* + * Incrementing node id counter. Each dynamic node takes the next node id + * and increments the counter. + */ +static int next_dyn_node_id = PL_NODE_NUM_IDS; + +int pl_get_max_node_count(void) +{ + return next_dyn_node_id; +} + void pl_graph_validate(void) { @@ -253,7 +280,6 @@ pl_graph_validate(void) char *default_domain; uint16_t next_idx; char *node_name; - int next_dyn_node_id = PL_NODE_NUM_IDS; if (!name_hash) rte_panic("unable to allocate pipeline graph node name hash\n"); @@ -328,9 +354,17 @@ pl_graph_validate(void) rte_panic( "unknown feature point node %s for feature %s\n", node_name, feat->name); + if (node->feat_type_find && (feat->feat_type == 0)) + rte_panic( + "feature point node %s expects a qualifier from %s\n", + feat->feature_point, feat->name); + free(node_name); feat->feature_point_node = node; free(default_domain); + + if (feat->node->feat_setup_cleanup_cb) + feat->node->feat_setup_cleanup_cb(feat); } /* check feature point constraints */ @@ -365,6 +399,26 @@ pl_graph_validate(void) feat->name, feat_point_node->feature_regs[feat->id]->name); feat_point_node->feature_regs[feat->id] = feat; + /* + * If the feature point node is case based then add it + * to the hash table, if we want to always have it enabled. + */ + if (!feat->always_on) + continue; + + if (feat_point_node->feat_type_find) { + if (!feat_point_node->feat_type_insert) + rte_panic( + "Cannot add features to feat attach node %s\n", + feat_point_node->name); + if (feat_point_node->feat_type_insert( + feat_point_node, + feat, + feat->feat_type) != 0) + rte_panic( + "Unable to add feat type: %s to feat attach node %s\n", + feat->name, feat_point_node->name); + } } zhashx_destroy(&name_hash); @@ -411,10 +465,10 @@ pl_dump_nodes(json_writer_t *json) TAILQ_FOREACH(node, &pl_node_reg_list, links) { jsonw_name(json, node->name); jsonw_start_object(json); + jsonw_uint_field(json, "node-id", node->node_decl_id); + jsonw_uint_field(json, "pkt-count", pl_get_node_stats(node->node_decl_id)); - jsonw_string_field(json, "disable", - node->disable ? "true" : "false"); jsonw_name(json, "next"); jsonw_start_array(json); int i; @@ -446,3 +500,297 @@ pl_dump_nodes(json_writer_t *json) } jsonw_end_object(json); } + +int dp_pipeline_register_node(const char *name, + int num_next_nodes, + const char **next_node_names, + enum pl_node_type node_type, + pl_proc handler) +{ + + struct pl_node_registration *pl_node; + int i; + + if (!name || num_next_nodes == 0 || !next_node_names || !handler) + return -EINVAL; + + if (!strchr(name, ':')) + return -EINVAL; + + for (i = 0; i < num_next_nodes; i++) { + /* domain is not required, use 'vyatta' if not given */ + if (!next_node_names[i]) + return -EINVAL; + } + + /* Extra size for the flexible array of next nodes at end of struct */ + pl_node = calloc(1, sizeof(*pl_node) + + (sizeof(char *) * num_next_nodes)); + if (!pl_node) + return -ENOMEM; + + pl_node->name = name; + pl_node->type = node_type; + pl_node->handler = handler; + pl_node->num_next = num_next_nodes; + + for (i = 0; i < num_next_nodes; i++) + pl_node->next[i] = next_node_names[i]; + + pl_add_node_registration(pl_node); + + return 0; +} + +static int +pipeline_register_feature_internal(struct dp_pipeline_feat_registration *feat, + enum pl_feat_type feature_type) +{ + struct pl_feature_registration *pl_feat; + + /* plugin_name, visit_before and visit_after are optional */ + if (!feat || !feat->name || !feat->node_name || !feat->feature_point) + return -EINVAL; + + /* names being registered must include a domain */ + if (!strchr(feat->name, ':') || + !strchr(feat->node_name, ':')) + return -EINVAL; + + /* a visit requirement only makes sense for a LIST feature*/ + if (feature_type == PL_FEAT_CASE) { + if (feat->visit_after || feat->visit_before) + return -EINVAL; + } + + pl_feat = calloc(1, sizeof(*pl_feat)); + if (!pl_feat) + return -ENOMEM; + + pl_feat->plugin_name = feat->plugin_name; + pl_feat->name = feat->name; + pl_feat->node_name = feat->node_name; + pl_feat->feature_point = feat->feature_point; + pl_feat->visit_after = feat->visit_after; + pl_feat->visit_before = feat->visit_before; + pl_feat->feat_type = feat->value; + pl_feat->cleanup_cb = feat->cleanup_cb; + pl_feat->feature_type = feature_type; + /* + * Always on only adds value in the 'fused' path, and that can not + * happen for external feature plugins so don't support it. + */ + pl_feat->always_on = false; + + pl_add_feature_registration(pl_feat); + + return 0; +} + +int +dp_pipeline_register_list_feature(struct dp_pipeline_feat_registration *feat) +{ + return pipeline_register_feature_internal(feat, PL_FEAT_LIST); +} + +int +dp_pipeline_register_case_feature(struct dp_pipeline_feat_registration *feat) +{ + return pipeline_register_feature_internal(feat, PL_FEAT_CASE); +} + +static struct pl_feature_registration * +pl_feat_registration_find_by_name(const char *name) +{ + struct pl_feature_registration *pl_feat; + + TAILQ_FOREACH(pl_feat, &pl_feature_reg_list, links) { + if (strcmp(name, pl_feat->name) == 0) + return pl_feat; + } + + return NULL; +} + +bool dp_pipeline_is_feature_enabled_by_inst(const char *name, + const char *instance) +{ + struct pl_feature_registration *pl_feat; + + if (!name || !instance) + return false; + + pl_feat = pl_feat_registration_find_by_name(name); + if (!pl_feat) + return false; + + return pl_node_is_feature_enabled(pl_feat, instance); +} + +int dp_pipeline_enable_feature_by_inst(const char *name, + const char *instance) +{ + struct pl_feature_registration *pl_feat; + + if (!name || !instance) + return -EINVAL; + + pl_feat = pl_feat_registration_find_by_name(name); + if (!pl_feat) + return -EINVAL; + + return pl_node_add_feature(pl_feat, instance); +} + +int dp_pipeline_disable_feature_by_inst(const char *name, + const char *instance) +{ + struct pl_feature_registration *pl_feat; + + if (!name || !instance) + return -EINVAL; + + pl_feat = pl_feat_registration_find_by_name(name); + if (!pl_feat) + return -EINVAL; + + return pl_node_remove_feature(pl_feat, instance); +} + +int dp_pipeline_enable_global_feature(const char *name) +{ + struct pl_feature_registration *pl_feat; + + if (!name) + return -EINVAL; + + pl_feat = pl_feat_registration_find_by_name(name); + if (!pl_feat) + return -EINVAL; + + return pl_node_enable_global_feature(pl_feat); +} + +int dp_pipeline_disable_global_feature(const char *name) +{ + struct pl_feature_registration *pl_feat; + + if (!name) + return -EINVAL; + + pl_feat = pl_feat_registration_find_by_name(name); + if (!pl_feat) + return -EINVAL; + + return pl_node_disable_global_feature(pl_feat); +} + +void pl_show_plugin_state(json_writer_t *json, const char *plugin_name) +{ + struct pl_feature_registration *feat; + const char *type; + + jsonw_name(json, "feature_registrations"); + jsonw_start_array(json); + + TAILQ_FOREACH(feat, &pl_feature_reg_list, links) { + if (feat && feat->plugin_name && + strcmp(plugin_name, feat->plugin_name) == 0) { + + jsonw_start_object(json); + + jsonw_string_field(json, "node-name", + feat->node_name); + jsonw_string_field(json, "feature-point", + feat->feature_point_node->name); + + if (feat->feature_type == PL_FEAT_LIST) { + type = "list"; + if (feat->visit_before) + jsonw_string_field(json, + "before", + feat->visit_before); + if (feat->visit_after) + jsonw_string_field(json, + "after", + feat->visit_after); + } else { + type = "case"; + jsonw_uint_field(json, "case-value", + ntohs(feat->feat_type)); + } + jsonw_string_field(json, "feature-type", type); + jsonw_end_object(json); + } + } + + jsonw_end_array(json); +} + +uint32_t +pl_feat_point_node_get_max_features(enum pl_feature_point_id feat_point) +{ + struct pl_node_registration *node; + + if (feat_point == PL_FEATURE_POINT_NONE_ID || + feat_point >= PL_FEATURE_POINT_NUM_IDS) + return 0; + + TAILQ_FOREACH(node, &pl_node_reg_list, links) { + if ((enum pl_feature_point_id)node->feature_point_id == + feat_point) + return node->max_feature_reg_idx; + } + return 0; +} + +int dp_pipeline_register_inst_storage(const char *name, + const char *instance, + void *context) +{ + struct pl_feature_registration *pl_feat; + + ASSERT_MAIN(); + + /* Not providing a callback cleanup is allowed */ + if (!name || !instance || !context) + return -EINVAL; + + pl_feat = pl_feat_registration_find_by_name(name); + if (!pl_feat) + return -EINVAL; + + return pl_node_register_storage(pl_feat, instance, context); +} + +int dp_pipeline_unregister_inst_storage(const char *name, + const char *instance) +{ + struct pl_feature_registration *pl_feat; + + ASSERT_MAIN(); + + if (!name || !instance) + return -EINVAL; + + pl_feat = pl_feat_registration_find_by_name(name); + if (!pl_feat) + return -EINVAL; + + return pl_node_unregister_storage(pl_feat, instance); +} + +void *dp_pipeline_get_inst_storage(const char *node_name, + const char *instance) +{ + struct pl_feature_registration *pl_feat; + + if (!node_name || !instance) + return NULL; + + pl_feat = pl_feat_registration_find_by_name(node_name); + if (!pl_feat) + return NULL; + + return pl_node_get_storage(pl_feat, instance); +} diff --git a/src/pipeline/pl_plugin.c b/src/pipeline/pl_plugin.c deleted file mode 100644 index b28e883b..00000000 --- a/src/pipeline/pl_plugin.c +++ /dev/null @@ -1,66 +0,0 @@ -/* - * pl_plugin.c - * - * - * Copyright (c) 2016, 2017 by Brocade Communications Systems, Inc. - * All rights reserved. - * Copyright (c) 2017, AT&T Intellectual Property. All rights reserved. - * - * SPDX-License-Identifier: LGPL-2.1-only - */ - -#include -#include -#include -#include -#include -#include - -#include "pl_internal.h" -#include "vplane_log.h" - -#define PL_DLL_LOC PKGLIB_DIR"/pipeline/plugins" - -void pl_load_plugins(void) -{ - /* - * Iterate through directory loading pipeline plugins - */ - DIR *dp; - struct dirent *ep; - dp = opendir(PL_DLL_LOC); - if (dp != NULL) { - while ((ep = readdir(dp))) { - /* restrict to .so files only */ - char *tmp = strrchr(ep->d_name, '.'); - if (!tmp) - continue; - if (strcmp(tmp, ".so") != 0) - continue; - - char buf[1024]; - snprintf(buf, 1024, "%s/%s", - PL_DLL_LOC, ep->d_name); - void *handle = dlopen(buf, RTLD_NOW); - if (handle == NULL) { - RTE_LOG(ERR, DATAPLANE, - "failed to load pipeline plug-in: %s\n", - dlerror()); - continue; - } - RTE_LOG(INFO, DATAPLANE, - "loaded pipeline plug-in: %s\n", buf); - } - } else { - /* - * The directory not existing is normal so don't log - * an error in that case. - */ - if (errno != ENOENT) - RTE_LOG(ERR, DATAPLANE, - "error opening pipeline plug-in directory \"%s\": %s\n", - PL_DLL_LOC, strerror(errno)); - return; - } - closedir(dp); -} diff --git a/src/pktmbuf.c b/src/pktmbuf.c index 4e439b4d..fb37e933 100644 --- a/src/pktmbuf.c +++ b/src/pktmbuf.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -9,15 +9,18 @@ */ #include +#include #include #include #include #include +#include "debug.h" #include "if_var.h" #include "ip_funcs.h" +#include "lcore_sched.h" #include "netinet6/ip6_funcs.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" struct rte_mempool; @@ -64,6 +67,14 @@ char *pktmbuf_append_alloc(struct rte_mbuf *m, uint16_t len) __rte_mbuf_sanity_check(m, 1); + if (likely(m->nb_segs == 1 && + len < rte_pktmbuf_tailroom(m))) { + tail = (char *)m->buf_addr + m->data_off + m->data_len; + m->data_len = (uint16_t)(m->data_len + len); + m->pkt_len = (m->pkt_len + len); + return (char *)tail; + } + tail = rte_pktmbuf_append(m, len); if (tail) return tail; @@ -80,25 +91,75 @@ char *pktmbuf_append_alloc(struct rte_mbuf *m, uint16_t len) return rte_pktmbuf_append(m, len); } +ALWAYS_INLINE void +dp_pktmbuf_mdata_invar_ptr_set(struct rte_mbuf *m, + uint32_t feature_id, + void *ptr) +{ + struct pktmbuf_mdata *mdata = pktmbuf_mdata(m); + + /* userdata repurposed as flags + vrf field */ + mdata->md_feature_ptrs[feature_id] = ptr; + m->udata64 |= + ((PKT_MDATA_INVAR_FEATURE_PTRS << feature_id) & UINT16_MAX) + << 16; +} + +ALWAYS_INLINE bool +dp_pktmbuf_mdata_invar_ptr_get(const struct rte_mbuf *m, + uint32_t feature_id, + void **ptr) +{ + assert(feature_id < DP_PKTMBUF_MAX_INVAR_FEATURE_PTRS); + + if (m->udata64 & + (((PKT_MDATA_INVAR_FEATURE_PTRS << feature_id) & UINT16_MAX) + << 16)) { + struct pktmbuf_mdata *mdata = pktmbuf_mdata(m); + + *ptr = mdata->md_feature_ptrs[feature_id]; + return true; + } + return false; +} + +ALWAYS_INLINE void +dp_pktmbuf_mdata_invar_ptr_clear(struct rte_mbuf *m, + uint32_t feature_id) +{ + m->udata64 &= ~((uint64_t) + (((PKT_MDATA_INVAR_FEATURE_PTRS << feature_id) & + UINT16_MAX) << 16)); +} + void pktmbuf_move_mdata(struct rte_mbuf *md, struct rte_mbuf *ms) { - if (!pktmbuf_mdata_invar_exists(ms, PKT_MDATA_INVAR_FLOW | - PKT_MDATA_INVAR_SPATH | - PKT_MDATA_INVAR_ALT_FLOW | - PKT_MDATA_INVAR_NAT64) && - !pktmbuf_mdata_exists(ms, PKT_MDATA_SESSION | + int i; + int found = 0; + void *ptr; + + if (pktmbuf_mdata_invar_exists(ms, PKT_MDATA_INVAR_SPATH | + PKT_MDATA_INVAR_NAT64) || + pktmbuf_mdata_exists(ms, PKT_MDATA_SESSION | PKT_MDATA_CGNAT_SESSION | PKT_MDATA_DPI_SEEN | PKT_MDATA_SESSION_SENTRY)) + found = true; + + for (i = 0; i < DP_PKTMBUF_MAX_INVAR_FEATURE_PTRS; i++) + found |= dp_pktmbuf_mdata_invar_ptr_get(ms, i, &ptr); + + if (!found) return; struct pktmbuf_mdata *mdatad = pktmbuf_mdata(md); struct pktmbuf_mdata *mdatas = pktmbuf_mdata(ms); - if (pktmbuf_mdata_invar_exists(ms, PKT_MDATA_INVAR_FLOW)) { - pktmbuf_mdata_invar_set(md, PKT_MDATA_INVAR_FLOW); - mdatad->md_flowp = mdatas->md_flowp; - pktmbuf_mdata_invar_clear(ms, PKT_MDATA_INVAR_FLOW); + for (i = 0; i < DP_PKTMBUF_MAX_INVAR_FEATURE_PTRS; i++) { + if (dp_pktmbuf_mdata_invar_ptr_get(ms, i, &ptr)) { + dp_pktmbuf_mdata_invar_ptr_set(md, i, ptr); + dp_pktmbuf_mdata_invar_ptr_clear(ms, i); + } } if (pktmbuf_mdata_exists(ms, PKT_MDATA_SESSION)) { @@ -131,12 +192,6 @@ void pktmbuf_move_mdata(struct rte_mbuf *md, struct rte_mbuf *ms) pktmbuf_mdata_invar_clear(ms, PKT_MDATA_INVAR_SPATH); } - if (pktmbuf_mdata_invar_exists(ms, PKT_MDATA_INVAR_ALT_FLOW)) { - pktmbuf_mdata_invar_set(md, PKT_MDATA_INVAR_ALT_FLOW); - mdatad->md_flow = mdatas->md_flow; - pktmbuf_mdata_invar_clear(ms, PKT_MDATA_INVAR_ALT_FLOW); - } - if (pktmbuf_mdata_exists(ms, PKT_MDATA_DPI_SEEN)) { pktmbuf_mdata_set(md, PKT_MDATA_DPI_SEEN); pktmbuf_mdata_clear(ms, PKT_MDATA_DPI_SEEN); @@ -361,12 +416,12 @@ void *memcpy_from_mbuf(void *dest, struct rte_mbuf *m, unsigned int offset, void pktmbuf_ecn_set_ce(struct rte_mbuf *m) { - const struct ether_hdr *eh - = rte_pktmbuf_mtod(m, const struct ether_hdr *); + const struct rte_ether_hdr *eh + = rte_pktmbuf_mtod(m, const struct rte_ether_hdr *); - if (eh->ether_type == htons(ETHER_TYPE_IPv4)) + if (eh->ether_type == htons(RTE_ETHER_TYPE_IPV4)) ip_tos_set_ecn_ce(iphdr(m)); - else if (eh->ether_type == htons(ETHER_TYPE_IPv6)) + else if (eh->ether_type == htons(RTE_ETHER_TYPE_IPV6)) ip6_tos_set_ecn_ce(ip6hdr(m)); } @@ -386,7 +441,7 @@ struct ifnet *pktmbuf_restore_ifp(struct rte_mbuf *m) struct ifnet *ifp; if (pktmbuf_mdata_exists(m, PKT_MDATA_IFINDEX)) { - ifp = ifnet_byifindex(pktmbuf_mdata(m)->md_ifindex.ifindex); + ifp = dp_ifnet_byifindex(pktmbuf_mdata(m)->md_ifindex.ifindex); pktmbuf_mdata_clear(m, PKT_MDATA_IFINDEX); } else { assert(m->port < DATAPLANE_MAX_PORTS); @@ -402,12 +457,12 @@ int pktmbuf_tcp_header_is_usable(struct rte_mbuf *m) unsigned int tcphlen; struct tcphdr *tcp; - l2l3hlen = pktmbuf_l2_len(m) + pktmbuf_l3_len(m); + l2l3hlen = dp_pktmbuf_l2_len(m) + dp_pktmbuf_l3_len(m); tcphlen = l2l3hlen + sizeof(struct tcphdr); if (rte_pktmbuf_data_len(m) <= tcphlen) return 0; /* can not overlay header */ - tcp = pktmbuf_mtol4(m, struct tcphdr *); + tcp = dp_pktmbuf_mtol4(m, struct tcphdr *); if (rte_pktmbuf_pkt_len(m) - l2l3hlen < ntohs(tcp->th_off)*4) return 0; /* truncated */ @@ -421,15 +476,91 @@ int pktmbuf_udp_header_is_usable(struct rte_mbuf *m) unsigned int udphlen; struct udphdr *udp; - l2l3hlen = pktmbuf_l2_len(m) + pktmbuf_l3_len(m); + l2l3hlen = dp_pktmbuf_l2_len(m) + dp_pktmbuf_l3_len(m); udphlen = l2l3hlen + sizeof(struct udphdr); if (rte_pktmbuf_data_len(m) <= udphlen) return 0; /* can not overlay header */ - udp = pktmbuf_mtol4(m, struct udphdr *); + udp = dp_pktmbuf_mtol4(m, struct udphdr *); if (rte_pktmbuf_pkt_len(m) - l2l3hlen < ntohs(udp->len)) return 0; /* truncated */ return 1; } + +struct rte_mbuf *dp_pktmbuf_alloc_from_default(vrfid_t vrf_id) +{ + return pktmbuf_alloc(mbuf_pool(0), vrf_id); +} + +vrfid_t +dp_pktmbuf_get_vrf(const struct rte_mbuf *m) +{ + return pktmbuf_get_vrf(m); +} + +void +dp_pktmbuf_set_vrf(struct rte_mbuf *m, vrfid_t vrf_id) +{ + pktmbuf_set_vrf(m, vrf_id); +} + +void dp_pktmbuf_mark_locally_generated(struct rte_mbuf *m) +{ + pktmbuf_mdata_set(m, PKT_MDATA_FROM_US); +} + +static char *pktmbuf_mdata_feat_regs[DP_PKTMBUF_MAX_INVAR_FEATURE_PTRS]; + +int dp_pktmbuf_mdata_invar_feature_register(const char *name) +{ + int i; + + ASSERT_MAIN(); + + if (!name) + return -EINVAL; + + for (i = 0; i < DP_PKTMBUF_MAX_INVAR_FEATURE_PTRS; i++) { + if (!pktmbuf_mdata_feat_regs[i]) { + pktmbuf_mdata_feat_regs[i] = + strdup(name); + if (!pktmbuf_mdata_feat_regs[i]) { + RTE_LOG(ERR, DATAPLANE, + "Feature %s registration for meta data failed\n", + name); + return -ENOMEM; + } + RTE_LOG(INFO, DATAPLANE, + "Feature %s registered for meta data ptr %d\n", + name, i); + return i; + } + } + + return -ENOSPC; +} + +int dp_pktmbuf_mdata_invar_feature_unregister(const char *name, int slot) +{ + ASSERT_MAIN(); + + if (!name) + return -EINVAL; + + if (!pktmbuf_mdata_feat_regs[slot]) + return -EINVAL; + + if (strcmp(pktmbuf_mdata_feat_regs[slot], name) != 0) + return -EINVAL; + + free(pktmbuf_mdata_feat_regs[slot]); + pktmbuf_mdata_feat_regs[slot] = NULL; + + RTE_LOG(INFO, DATAPLANE, + "Feature %s unregistered for meta data ptr %d\n", + name, slot); + + return 0; +} diff --git a/src/pktmbuf.h b/src/pktmbuf_internal.h similarity index 87% rename from src/pktmbuf.h rename to src/pktmbuf_internal.h index 10c8963f..da9d48bb 100644 --- a/src/pktmbuf.h +++ b/src/pktmbuf_internal.h @@ -1,13 +1,13 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ -#ifndef PKTMBUF_H -#define PKTMBUF_H +#ifndef PKTMBUF_INTERNAL_H +#define PKTMBUF_INTERNAL_H /* * Extensions to rte mbuf library */ @@ -26,10 +26,12 @@ #include #include +#include "compat.h" #include "ip_addr.h" #include "main.h" +#include "pktmbuf.h" #include "util.h" -#include "compat.h" +#include "vrf.h" struct ifnet; struct rte_mempool; @@ -52,12 +54,6 @@ struct pkt_mdata_spath { struct tun_meta meta; } __attribute__ ((__packed__)); -struct pkt_mdata_flow { - struct ip_addr nexthop; - vrfid_t ingress_vrf_id; - uint32_t input_ifindex; -} __attribute__ ((__packed__)); - struct pkt_mdata_bridge { uint32_t member_ifindex; uint16_t outer_vlan; @@ -73,18 +69,21 @@ struct pkt_mdata_ifindex { * interface. */ enum pkt_mdata_invar_type { - PKT_MDATA_INVAR_FLOW = (1 << 0), - PKT_MDATA_INVAR_ALT_FLOW = (1 << 1), - PKT_MDATA_INVAR_SPATH = (1 << 2), + PKT_MDATA_INVAR_SPATH = (1 << 0), /* * This is invariant because there are certain types of * interface, e.g. l2tp, that expect to punt to the kernel on the * original arriving interface, even after a decap. */ - PKT_MDATA_INVAR_BRIDGE = (1 << 3), - PKT_MDATA_INVAR_NAT64 = (1 << 4), + PKT_MDATA_INVAR_BRIDGE = (1 << 1), + PKT_MDATA_INVAR_NAT64 = (1 << 2), + PKT_MDATA_INVAR_FEATURE_PTRS = (1 << 3), + PKT_MDATA_INVAR_MAX, }; +static_assert(DP_PKTMBUF_MAX_INVAR_FEATURE_PTRS == 1, + "Too many invar ptr meta data fields - update the non ptr values"); + /* * Packet metadata that is clear when the key character of the packet * is changed, i.e. when changing L3 protocol, when decapped or when @@ -110,9 +109,6 @@ enum pkt_mdata_type { struct npf_session; struct pktmbuf_mdata { - /* PKT_MDATA_INVAR_FLOW */ - struct flow_data *md_flowp; - /* PKT_MDATA_SESSION_SENTRY */ struct sentry *md_sentry; @@ -128,9 +124,6 @@ struct pktmbuf_mdata { /* PKT_MDATA_INVAR_SPATH */ struct pkt_mdata_spath md_spath; - /* PKT_MDATA_INVAR_ALT_FLOW */ - struct pkt_mdata_flow md_flow; - /* PKT_MDATA_CRYPTO_PR */ struct policy_rule *pr; @@ -142,10 +135,18 @@ struct pktmbuf_mdata { /* PKT_MDATA_L2_RCV_TYPE */ enum l2_packet_type md_l2_rcv_type; + + /* Pointers that features can register for ownership of */ + void *md_feature_ptrs[DP_PKTMBUF_MAX_INVAR_FEATURE_PTRS]; + } __rte_aligned(RTE_CACHE_LINE_SIZE * 2); +/* Ensure struct fits in two cache lines */ +static_assert(sizeof(struct pktmbuf_mdata) <= 128, + "struct is too large"); + static inline struct pktmbuf_mdata * -pktmbuf_mdata(struct rte_mbuf *m) +pktmbuf_mdata(const struct rte_mbuf *m) { assert(m->priv_size >= sizeof(struct pktmbuf_mdata)); return (struct pktmbuf_mdata *) @@ -263,57 +264,6 @@ static inline struct rte_mbuf *pktmbuf_alloc(struct rte_mempool *mp, */ void pktmbuf_free_bulk(struct rte_mbuf *pkts[], unsigned int n); -/** - * A macro that points to the start of the L3 data in the mbuf. - * - * The returned pointer is cast to type t. Before using this - * function, the user must ensure that m_headlen(m) is large enough to - * read its data, and must ensure that the L2 length is set in the mbuf. - * - * @param m - * The packet mbuf. - * @param t - * The type to cast the result into. - */ -#define pktmbuf_mtol3(m, t) ((t)(rte_pktmbuf_mtod(m, char *) + \ - (m)->l2_len)) - -/** - * A macro that points to the start of the L4 data in the mbuf. - * - * The returned pointer is cast to type t. Before using this - * function, the user must ensure that m_headlen(m) is large enough to - * read its data , and must ensure that the L2 and L3 lengths are set - * in the mbuf. - * - * @param m - * The packet mbuf. - * @param t - * The type to cast the result into. - */ -#define pktmbuf_mtol4(m, t) ((t)(rte_pktmbuf_mtod(m, char *) + \ - (m)->l2_len + (m)->l3_len)) - -/** - * A macro that returns the length of the L2 header in the mbuf. - * - * The value can be read or assigned. - * - * @param m - * The packet mbuf. - */ -#define pktmbuf_l2_len(m) ((m)->l2_len) - -/** - * A macro that returns the length of the L3 header in the mbuf. - * - * The value can be read or assigned. - * - * @param m - * The packet mbuf. - */ -#define pktmbuf_l3_len(m) ((m)->l3_len) - /** * A macro to clear the header lengths in the given mbuf. * @@ -340,7 +290,9 @@ char *pktmbuf_append_alloc(struct rte_mbuf *m, uint16_t len); * Moves the mdata metadata from one mbuf to another. This is intended for * when an mbuf with multiple references is copied due to needing to change it. * - * The source mbuf has its metadata cleared. It is expected that the + * The source mbuf has its metadata cleared. This is to ensure that there + * is no issue with features such as which expect there to be + * only one mbuf containing the same metadata. It is expected that the * source mbuf is subsequently freed, or no longer the first segment of the * packet. * @@ -633,4 +585,4 @@ pkt_mbuf_get_l2_traffic_type(struct rte_mbuf *m) return L2_PKT_UNICAST; } -#endif /* PKTMBUF_H */ +#endif /* PKTMBUF_INTERNAL_H */ diff --git a/src/portmonitor/portmonitor.h b/src/portmonitor/portmonitor.h index 50776fe2..c2dc634c 100644 --- a/src/portmonitor/portmonitor.h +++ b/src/portmonitor/portmonitor.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -12,6 +12,8 @@ #include #include +#include "compiler.h" + #define MAX_PORTMONITOR_SESSIONS 8 #define MAX_PORTMONITOR_SRC_INTF 8 @@ -82,12 +84,25 @@ struct portmonitor_filter { uint8_t type; /* filter type: in or out */ }; +struct vlan_info { + struct rcu_head vlan_rcu; /* Chain for RCU free */ + uint8_t num_vlans; /* Num of Rx vlans */ + uint16_t *vlanids; /* Rx vlan array ptr */ +}; + struct portmonitor_srcif { struct ifnet *ifp; /* source ifp */ char ifname[IFNAMSIZ]; /* source ifname */ struct portmonitor_session *pm_session; /* srcif session */ struct cds_list_head srcif_list; /* Linked list chain */ struct rcu_head srcif_rcu; /* Chain for rcu free */ + + /* Mirroring for specific vlans is not supported in dataplane + * This information not used in dataplane but is passed to + * FAL plugins as some platforms may support it in hardware + */ + struct vlan_info *rx_vinfo; /* rx vlan info */ + struct vlan_info *tx_vinfo; /* tx vlan info */ }; struct erspan_v2_hdr { @@ -135,15 +150,15 @@ struct portmonitor_info { }; void portmonitor_src_vif_rx_output(struct ifnet *ifp, struct rte_mbuf **m) - __attribute__((cold)); + __cold_func; void portmonitor_src_vif_tx_output(struct ifnet *ifp, struct rte_mbuf **m) - __attribute__((cold)); + __cold_func; void portmonitor_src_phy_rx_output(struct ifnet *ifp, struct rte_mbuf *mbi[], - unsigned int n) __attribute__((cold)); + unsigned int n) __cold_func; void portmonitor_src_phy_tx_output(struct ifnet *ifp, struct rte_mbuf *mbi[], - unsigned int n) __attribute__((cold)); + unsigned int n) __cold_func; int portmonitor_dest_output(struct ifnet *ifp, struct rte_mbuf *m) - __attribute__((cold)); + __cold_func; void portmonitor_cleanup(struct ifnet *ifp); #endif /* PORTMONITOR_H */ diff --git a/src/portmonitor/portmonitor_cmds.c b/src/portmonitor/portmonitor_cmds.c index e3b54c78..123dfb82 100644 --- a/src/portmonitor/portmonitor_cmds.c +++ b/src/portmonitor/portmonitor_cmds.c @@ -1,7 +1,7 @@ /* * Port Monitoring Command Processing * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -34,77 +34,12 @@ #include "vplane_log.h" #include "fal.h" -#define ERSPAN_SESSION(pmsess) \ - ((pmsess->session_type == PORTMONITOR_ERSPAN_SOURCE) || \ - (pmsess->session_type == PORTMONITOR_ERSPAN_DESTINATION)) - static CDS_LIST_HEAD(pmsrcif_list); static CDS_LIST_HEAD(pmsession_list); static uint8_t num_sessions; static uint8_t num_srcif_for_all_sessions; -static struct cfg_if_list *portmonitor_cfg_list; - -static void -portmonitor_event_if_index_set(struct ifnet *ifp, uint32_t ifindex); -static void -portmonitor_event_if_index_unset(struct ifnet *ifp, uint32_t ifindex); - -static const struct dp_event_ops portmonitor_event_ops = { - .if_index_set = portmonitor_event_if_index_set, - .if_index_unset = portmonitor_event_if_index_unset, -}; - -static void -portmonitor_event_if_index_set(struct ifnet *ifp, uint32_t ifindex __unused) -{ - struct cfg_if_list_entry *le; - - if (!portmonitor_cfg_list) - return; - - le = cfg_if_list_lookup(portmonitor_cfg_list, ifp->if_name); - if (!le) - return; - - RTE_LOG(INFO, DATAPLANE, - "Replaying portmonitor %s for interface %s\n", - le->le_buf, ifp->if_name); - - cmd_portmonitor(NULL, le->le_argc, le->le_argv); - cfg_if_list_del(portmonitor_cfg_list, ifp->if_name); - - if (!portmonitor_cfg_list->if_list_count) { - cfg_if_list_destroy(&portmonitor_cfg_list); - dp_event_unregister(&portmonitor_event_ops); - } -} - -static void -portmonitor_event_if_index_unset(struct ifnet *ifp, uint32_t ifindex __unused) -{ - if (!portmonitor_cfg_list) - return; - - cfg_if_list_del(portmonitor_cfg_list, ifp->if_name); - if (!portmonitor_cfg_list->if_list_count) { - dp_event_unregister(&portmonitor_event_ops); - cfg_if_list_destroy(&portmonitor_cfg_list); - } -} - -static int portmonitor_replay_init(void) -{ - if (!portmonitor_cfg_list) { - portmonitor_cfg_list = cfg_if_list_create(); - if (!portmonitor_cfg_list) - return -ENOMEM; - } - dp_event_register(&portmonitor_event_ops); - return 0; -} - static struct portmonitor_info *portmonitor_info_alloc(struct ifnet *ifp) { struct portmonitor_info *pminfo; @@ -133,20 +68,28 @@ static void portmonitor_info_deinit(struct ifnet *ifp) } } +static bool is_erspan_session(struct portmonitor_session *pmsess) +{ + return (pmsess->session_type == PORTMONITOR_ERSPAN_SOURCE) || + (pmsess->session_type == PORTMONITOR_ERSPAN_DESTINATION); +} + static void set_srcif_enabled(struct ifnet *ifp, struct portmonitor_session *pmsess, bool enabled) { ifp->portmonitor = 0; pl_node_remove_feature_by_inst(&portmonitor_in_feat, ifp); + pl_node_remove_feature_by_inst(&portmonitor_out_feat, ifp); if (enabled && pmsess->session_type && pmsess->dest_ifp) { - if (ERSPAN_SESSION(pmsess)) { + if (is_erspan_session(pmsess)) { if (!pmsess->erspan_id || !pmsess->erspan_hdr_type) return; } ifp->portmonitor = 1; pl_node_add_feature_by_inst(&portmonitor_in_feat, ifp); + pl_node_add_feature_by_inst(&portmonitor_out_feat, ifp); if (pmsess->session_type == PORTMONITOR_ERSPAN_SOURCE) { if (pmsess->erspan_hdr_type == ERSPAN_TYPE_II) @@ -252,6 +195,7 @@ static void portmonitor_info_delete(struct ifnet *ifp) { ifp->portmonitor = 0; pl_node_remove_feature_by_inst(&portmonitor_in_feat, ifp); + pl_node_remove_feature_by_inst(&portmonitor_out_feat, ifp); portmonitor_info_deinit(ifp); } @@ -270,6 +214,7 @@ void portmonitor_cleanup(struct ifnet *ifp) } else { ifp->portmonitor = 0; pl_node_remove_feature_by_inst(&portmonitor_in_feat, ifp); + pl_node_remove_feature_by_inst(&portmonitor_out_feat, ifp); } rcu_assign_pointer(ifp->pminfo, NULL); @@ -342,14 +287,29 @@ static void pm_fal_src_update(const struct ifnet *ifp, pm_if_update_hw_mirroring(ifp); } +static void portmonitor_srcif_vlan_free(struct rcu_head *head) +{ + struct vlan_info *vinfo; + + vinfo = caa_container_of(head, struct vlan_info, vlan_rcu); + + if (vinfo->vlanids) + free(vinfo->vlanids); + vinfo->num_vlans = 0; + vinfo->vlanids = NULL; + + free(vinfo); +} + static void portmonitor_srcif_delete(struct portmonitor_srcif *pmsrcif) { const struct portmonitor_session *pmsess; const struct portmonitor_info *pminfo; struct ifnet *ifp; + struct vlan_info *rxvi, *txvi; - ifp = ifnet_byifname(pmsrcif->ifname); + ifp = dp_ifnet_byifname(pmsrcif->ifname); if (ifp == NULL) { RTE_LOG(ERR, DATAPLANE, "PM: cannot delete source, no such interface %s\n", @@ -367,14 +327,33 @@ static void portmonitor_srcif_delete(struct portmonitor_srcif *pmsrcif) pm_fal_src_update(ifp, pmsess, FAL_PORT_ATTR_EGRESS_MIRROR_SESSION, true); + rxvi = rcu_dereference(pmsrcif->rx_vinfo); + txvi = rcu_dereference(pmsrcif->tx_vinfo); + if (txvi) { + rcu_assign_pointer(pmsrcif->tx_vinfo, NULL); + call_rcu(&txvi->vlan_rcu, portmonitor_srcif_vlan_free); + } + if (rxvi) { + rcu_assign_pointer(pmsrcif->rx_vinfo, NULL); + call_rcu(&rxvi->vlan_rcu, portmonitor_srcif_vlan_free); + } if (ifp == pmsrcif->ifp) portmonitor_info_delete(ifp); } static void -pm_event_if_hw_switching_change(struct ifnet *ifp, - bool enable) +pm_event_if_feat_mode_change(struct ifnet *ifp, + enum if_feat_mode_event event) { + bool enable; + + if (event == IF_FEAT_MODE_EVENT_L2_FAL_ENABLED) + enable = true; + else if (event == IF_FEAT_MODE_EVENT_L2_FAL_DISABLED) + enable = false; + else + return; + RTE_LOG(INFO, DATAPLANE, "Portmonitor(%s):Intf %s, hw forwarding changed to %s\n", __func__, @@ -383,7 +362,7 @@ pm_event_if_hw_switching_change(struct ifnet *ifp, } static const struct dp_event_ops pm_event_ops = { - .if_hw_switching_change = pm_event_if_hw_switching_change + .if_feat_mode_change = pm_event_if_feat_mode_change, }; static void portmonitor_del_all_srcif(uint8_t session_id) @@ -425,7 +404,7 @@ static struct ifnet *get_vif(const char *ifname, uint16_t vid) char if_name[IFNAMSIZ]; snprintf(if_name, IFNAMSIZ, "%s.%d", ifname, vid); - return ifnet_byifname(if_name); + return dp_ifnet_byifname(if_name); } static int portmonitor_session_del_srcif(struct portmonitor_session *pmsess, @@ -711,7 +690,7 @@ static int portmonitor_session_set_srcif(FILE *f, return -1; } } else { - ifp = ifnet_byifname(ifname); + ifp = dp_ifnet_byifname(ifname); if (!ifp) { fprintf(f, "Unknown source interface %s\n", ifname); return -1; @@ -800,7 +779,7 @@ static int portmonitor_session_set_dstif(FILE *f, return -1; } } else { - ifp = ifnet_byifname(ifname); + ifp = dp_ifnet_byifname(ifname); if (!ifp) { fprintf(f, "Unknown destination interface %s\n", ifname); return -1; @@ -905,6 +884,23 @@ static void show_one_session(struct portmonitor_session *s, json_writer_t *wr) jsonw_string_field(wr, "direction", "rx"); else if (pminfo->direction & PORTMONITOR_DIRECTION_TX) jsonw_string_field(wr, "direction", "tx"); + int i; + struct vlan_info *rxvi, *txvi; + + rxvi = rcu_dereference(pmsrcif->rx_vinfo); + txvi = rcu_dereference(pmsrcif->tx_vinfo); + jsonw_name(wr, "rx_vlan"); + jsonw_start_array(wr); + if (rxvi) + for (i = 0; i < rxvi->num_vlans; i++) + jsonw_uint(wr, rxvi->vlanids[i]); + jsonw_end_array(wr); + jsonw_name(wr, "tx_vlan"); + jsonw_start_array(wr); + if (txvi) + for (i = 0; i < txvi->num_vlans; i++) + jsonw_uint(wr, txvi->vlanids[i]); + jsonw_end_array(wr); } jsonw_end_object(wr); } @@ -976,6 +972,151 @@ static bool get_value(char *strval, uint32_t *val) return false; } +/* + * Validates source interface vlan information + * It assumes that first 7 argv (0to6) params are already + * validated and assumes the following + * portmonitor set session srcif + * vlan {rx|tx} ... + * + * For a valid portmonitor vlan command at least 9 params + * are expected followed by vlans + */ +static int pm_validate_srcif_vlans(FILE *f, int argc, + char **argv, unsigned int *num, + bool *tx_vlan) +{ + if (argc < 9) { + fprintf(f, + "Invalid num of args portmonitor vlan direction %d\n", + argc); + return -1; + } + + if (strcmp(argv[7], "rx") == 0) { + *tx_vlan = false; + } else if (strcmp(argv[7], "tx") == 0) { + *tx_vlan = true; + } else { + fprintf(f, + "Invalid portmonitor vlan direction %s\n", + argv[7]); + return -1; + } + if (!get_value(argv[8], num)) { + fprintf(f, + "Invalid portmonitor vlans %s\n", + argv[8]); + return -1; + } + if (argc != (int) (9 + *num)) { + fprintf(f, + "Invalid number of vlans %d, expected %s\n", + argc, argv[8]); + return -1; + } + return 0; +} + +/* Update source interface vlans to be monitored for ingress (rx) + * and egress (tx). Currently functionality to monitor for specific + * vlans is only supported in hardware so dataplane data structures + * are only needed in control path and are used to update information + * to the FAL plugins. The update is always sent to FAL and plugin + * should decipher it to add and delete as needed + */ +static int pm_upd_srcif_vlans(FILE *f, + const char *ifname, bool tx_vlan, + int argc, char **argv) +{ + int i, rc; + uint32_t vlanid; + const struct ifnet *ifp; + struct portmonitor_srcif *pmsrcif; + struct fal_u32_list_t *vlist; + uint16_t *pvlanids = NULL; + struct vlan_info *old_vi, *new_vi; + + ifp = dp_ifnet_byifname(ifname); + if (!ifp) { + fprintf(f, "Portmonitor: unknown src interface %s\n", ifname); + return -1; + } + pmsrcif = get_pmsrcif_byifindex(ifp->if_index); + if (!pmsrcif) { + fprintf(f, "Portmonitor: vlans src interface %s not found\n", + ifp->if_name); + return -1; + } + vlist = calloc(1, sizeof(*vlist) + (argc * sizeof(vlist->list[0]))); + if (!vlist) { + RTE_LOG(ERR, DATAPLANE, "Portmonitor: out of memory\n"); + return -1; + } + struct fal_attribute_t attr = { + .id = FAL_PORT_ATTR_INGRESS_MIRROR_VLAN, + .value.u32list = vlist + }; + + if (argc) { + pvlanids = calloc(1, argc * sizeof(*pvlanids)); + if (!pvlanids) { + RTE_LOG(ERR, DATAPLANE, "Portmonitor: out of memory\n"); + free(vlist); + return -1; + } + } + /* if argc is 0, it indicates that no vlans are configured + * and plugins will interpret it to a delete all vlans for the + * direction, so always update FAL + */ + vlist->count = 0; + for (i = 0; i < argc; i++) { + if (!get_value(argv[i+1], &vlanid) || (vlanid > 4094)) { + fprintf(f, "Invalid portmonitor vlan %s\n", argv[i+1]); + free(vlist); + free(pvlanids); + return -1; + } + pvlanids[i] = vlanid; + vlist->list[i] = vlanid; + vlist->count++; + } + new_vi = calloc(1, sizeof(*new_vi)); + if (!new_vi) { + RTE_LOG(ERR, DATAPLANE, "Portmonitor: out of memory\n"); + free(vlist); + free(pvlanids); + return -1; + } + if (tx_vlan) { + attr.id = FAL_PORT_ATTR_EGRESS_MIRROR_VLAN; + + old_vi = rcu_dereference(pmsrcif->tx_vinfo); + new_vi->num_vlans = vlist->count; + new_vi->vlanids = pvlanids; + rcu_assign_pointer(pmsrcif->tx_vinfo, new_vi); + if (old_vi) + call_rcu(&old_vi->vlan_rcu, + portmonitor_srcif_vlan_free); + } else { + old_vi = rcu_dereference(pmsrcif->rx_vinfo); + new_vi->num_vlans = vlist->count; + new_vi->vlanids = pvlanids; + rcu_assign_pointer(pmsrcif->rx_vinfo, new_vi); + if (old_vi) + call_rcu(&old_vi->vlan_rcu, + portmonitor_srcif_vlan_free); + } + rc = fal_l2_upd_port(ifp->if_index, &attr); + if (rc < 0 && (rc != -EOPNOTSUPP)) + RTE_LOG(ERR, DATAPLANE, + "Portmonitor: FAL vlan upd failed (%s)\n", + strerror(-rc)); + free(vlist); + return 0; +} + int cmd_portmonitor(FILE *f, int argc, char **argv) { uint32_t session_id; @@ -986,6 +1127,9 @@ int cmd_portmonitor(FILE *f, int argc, char **argv) uint32_t erspan_hdr_type; struct portmonitor_session *pmsess; int rc; + unsigned int num; + bool tx_vlan; + bool upd_vlan = false; if (argc < 3) goto bad_command; @@ -1025,7 +1169,6 @@ int cmd_portmonitor(FILE *f, int argc, char **argv) fprintf(f, "Invalid vid %s\n", argv[6]); return -1; } - cfg_if_list_del(portmonitor_cfg_list, argv[5]); if (portmonitor_session_del_srcif(pmsess, argv[5], vid) < 0) { fprintf(f, "Cannot delete source interface %s\n", @@ -1037,7 +1180,6 @@ int cmd_portmonitor(FILE *f, int argc, char **argv) fprintf(f, "Invalid vid %s\n", argv[6]); return -1; } - cfg_if_list_del(portmonitor_cfg_list, argv[5]); if (portmonitor_session_del_dstif(pmsess, argv[5], vid) < 0) { fprintf(f, "Cannot delete destination interface %s\n", @@ -1170,25 +1312,38 @@ int cmd_portmonitor(FILE *f, int argc, char **argv) return -1; } } else if (strcmp(argv[4], "srcif") == 0) { - if (!get_value(argv[6], &vid)) { - fprintf(f, "Invalid vid %s\n", argv[6]); - return -1; + if (strcmp(argv[6], "vlan") == 0) { + if (pm_validate_srcif_vlans(f, argc, + argv, &num, &tx_vlan) < 0) + goto bad_command; + upd_vlan = true; + } else { + if (!get_value(argv[6], &vid)) { + fprintf(f, "Invalid vid %s\n", + argv[6]); + return -1; + } + if (!get_value(argv[7], &direction)) { + fprintf(f, "Invalid direction %s\n", + argv[7]); + return -1; + } } - if (!get_value(argv[7], &direction)) { - fprintf(f, "Invalid direction %s\n", argv[7]); + if (!dp_ifnet_byifname(argv[5])) { + RTE_LOG(INFO, DATAPLANE, + "portmonitor src if set but interface missing %s\n", + argv[5]); return -1; } - if (!ifnet_byifname(argv[5])) { - if (portmonitor_replay_init() < 0) { - RTE_LOG(ERR, DATAPLANE, - "Portmonitor could not set up replay cache\n"); - return -ENOMEM; + if (upd_vlan) { + char **argv8 = argv + 8; + + if (pm_upd_srcif_vlans(f, argv[5], + tx_vlan, num, argv8) < 0) { + fprintf(f, "Upd src vlans %s failed\n", + argv[5]); + return -1; } - RTE_LOG(INFO, DATAPLANE, - "Caching portmonitor srcif for %s\n", - argv[5]); - cfg_if_list_add(portmonitor_cfg_list, - argv[5], argc, argv); return 0; } if (portmonitor_session_set_srcif(f, pmsess, argv[5], @@ -1199,18 +1354,11 @@ int cmd_portmonitor(FILE *f, int argc, char **argv) fprintf(f, "Invalid vid %s\n", argv[6]); return -1; } - if (!ifnet_byifname(argv[5])) { - if (portmonitor_replay_init() < 0) { - RTE_LOG(ERR, DATAPLANE, - "Portmonitor could not set up replay cache\n"); - return -ENOMEM; - } + if (!dp_ifnet_byifname(argv[5])) { RTE_LOG(INFO, DATAPLANE, - "Caching portmonitor dstif for %s\n", + "portmonitor dst if set but interface missing %s\n", argv[5]); - cfg_if_list_add(portmonitor_cfg_list, - argv[5], argc, argv); - return 0; + return -1; } if (portmonitor_session_set_dstif(f, pmsess, argv[5], vid) < 0) diff --git a/src/portmonitor/portmonitor_dp.c b/src/portmonitor/portmonitor_dp.c index b98acc9a..3382c558 100644 --- a/src/portmonitor/portmonitor_dp.c +++ b/src/portmonitor/portmonitor_dp.c @@ -1,7 +1,7 @@ /* * SPAN, RSPAN and ERSPAN Port Monitoring * - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -21,7 +21,7 @@ #include "capture.h" #include "ether.h" -#include "gre.h" +#include "if/gre.h" #include "if_var.h" #include "main.h" #include "npf/npf.h" @@ -29,7 +29,7 @@ #include "npf/config/npf_ruleset_type.h" #include "npf/npf_if.h" #include "npf_shim.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "portmonitor/portmonitor.h" #include "portmonitor/portmonitor_hw.h" #include "urcu.h" @@ -184,12 +184,12 @@ static int portmonitor_encap_erspan_hdr(struct ifnet *ifp, pktmbuf_clear_tx_vlan(m); } frame_size = rte_pktmbuf_pkt_len(m); - if (frame_size < ETHER_MIN_LEN) { + if (frame_size < RTE_ETHER_MIN_LEN) { v3_hdr->cos_bso_t_id = htons((pktmbuf_get_vlan_pcp(m) << 13) | (ERSPAN_ORIG_FRAME_SHORT << 11) | pmsess->erspan_id); - } else if (frame_size > ETHER_MAX_LEN) { + } else if (frame_size > RTE_ETHER_MAX_LEN) { v3_hdr->cos_bso_t_id = htons((pktmbuf_get_vlan_pcp(m) << 13) | (ERSPAN_ORIG_FRAME_OVERSIZED << 11) | @@ -234,7 +234,7 @@ static void portmonitor_source_output(struct ifnet *ifp, if (!dest_ifp) return; - pktmbuf_l2_len(*m) = ETHER_HDR_LEN; + dp_pktmbuf_l2_len(*m) = RTE_ETHER_HDR_LEN; struct npf_if *nif = rcu_dereference(ifp->if_npf); if (direction == PORTMONITOR_DIRECTION_RX) { @@ -250,7 +250,8 @@ static void portmonitor_source_output(struct ifnet *ifp, struct npf_config *npf_config = npf_if_conf(nif); result = npf_hook_notrack( npf_get_ruleset(npf_config, ruleset_type), - m, ifp, filter_dir, 0, htons(ETHER_TYPE_IPv4)); + m, ifp, filter_dir, 0, + htons(RTE_ETHER_TYPE_IPV4), NULL); if (result.decision != NPF_DECISION_PASS) return; } @@ -261,7 +262,7 @@ static void portmonitor_source_output(struct ifnet *ifp, if (((*m)->ol_flags & PKT_RX_VLAN) && ifp->qinq_inner) { if (unlikely(vid_encap(ifp->if_vlan, &mirror_pkt, - ETHER_TYPE_VLAN) == NULL)) { + RTE_ETHER_TYPE_VLAN) == NULL)) { rte_pktmbuf_free(mirror_pkt); return; } diff --git a/src/portmonitor/portmonitor_hw.h b/src/portmonitor/portmonitor_hw.h index 7fd89ccb..0820191e 100644 --- a/src/portmonitor/portmonitor_hw.h +++ b/src/portmonitor/portmonitor_hw.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * Portmonitor hardware specific functions diff --git a/src/power.c b/src/power.c index e49aaa7b..14a1e4c7 100644 --- a/src/power.c +++ b/src/power.c @@ -1,7 +1,7 @@ /*- * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. - * Copyright (c) 2017, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017,2019, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -46,12 +46,12 @@ static void change_power_mode(struct power_profile *pm) struct power_profile *old = rcu_xchg_pointer(&cur_pm, pm); /* unsafe to call defer_rcu with rcu read lock held. */ - rcu_read_unlock(); + dp_rcu_read_unlock(); if (!strcmp(old->name, "custom")) defer_rcu(free, old); - rcu_read_lock(); + dp_rcu_read_lock(); } diff --git a/src/power.h b/src/power.h index dc6f1e84..7271c433 100644 --- a/src/power.h +++ b/src/power.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * diff --git a/src/protobuf.c b/src/protobuf.c index f90afd58..5b4ca965 100644 --- a/src/protobuf.c +++ b/src/protobuf.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018, AT&T Intellectual Property. + * Copyright (c) 2018-2020, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -9,14 +9,16 @@ #include "compiler.h" #include "vplane_log.h" +#include "zmq_dp.h" #include "protobuf/DataplaneEnvelope.pb-c.h" #include "commands.h" #include "protobuf.h" static zhash_t *g_pb_cmds; +static zhash_t *g_pb_opcmds; __attribute__((format(printf, 2, 3))) -void pb_cmd_err(struct pb_msg *msg, const char *fmt, ...) +void dp_pb_cmd_err(struct pb_msg *msg, const char *fmt, ...) { if (!msg || !msg->fp) return; @@ -74,18 +76,94 @@ pb_cmd(void *data, size_t size, FILE *f) return status; } +/* + * Dispatcher for received protobuf commands. + */ +int +pb_op_cmd(zsock_t *sock, void *data, size_t size, FILE *f) +{ + int status = -1; + + if (!g_pb_opcmds) { + RTE_LOG(ERR, DATAPLANE, + "protobuf not initialized\n"); + return status; + } + + /* first validate against pb command set */ + DataplaneEnvelope * dmsg = + dataplane_envelope__unpack(NULL, + size, + (unsigned char *)data); + if (!dmsg) { + RTE_LOG(ERR, DATAPLANE, + "failed to read protobuf command\n"); + return status; + } + + if (!dmsg->type) { + RTE_LOG(ERR, DATAPLANE, + "protobuf type not found\n"); + goto cleanup; + } + + struct pb_msg_handler *c_entry = zhash_lookup(g_pb_opcmds, dmsg->type); + if (c_entry) { + struct pb_msg cmd = {.fp = f, + .msg = dmsg->msg.data, + .msg_len = dmsg->msg.len}; + status = c_entry->handler(&cmd); + + DataplaneEnvelope msg = DATAPLANE_ENVELOPE__INIT; + msg.msg.data = cmd.ret_msg; + msg.msg.len = cmd.ret_msg_len; + + int len = dataplane_envelope__get_packed_size(&msg); + + void *buf = malloc(len); + if (!buf) { + RTE_LOG(ERR, DATAPLANE, "Failed to allocate buffer\n"); + free(cmd.ret_msg); + goto cleanup; + } + + dataplane_envelope__pack(&msg, buf); + + zmsg_t *m = zmsg_new(); + if (!m) { + RTE_LOG(ERR, DATAPLANE, "Failed to allocate zmsg\n"); + free(cmd.ret_msg); + free(buf); + goto cleanup; + } + zmsg_addmem(m, buf, len); + zmsg_send_and_destroy(&m, sock); + + free(cmd.ret_msg); + free(buf); + goto cleanup; + } + + RTE_LOG(ERR, DATAPLANE, "unknown op mode protobuf command\n"); +cleanup: + dataplane_envelope__free_unpacked(dmsg, NULL); + return status; +} + /* * Registers new protocol buffer commands * via the PB_REGISTER_CMD macro. */ int -pb_add_command(const struct pb_msg_handler *cmd) +pb_add_command(const struct pb_msg_handler *cmd, int mode) { + zhash_t **pb_cmds = (mode == 0 ? &g_pb_cmds : &g_pb_opcmds); + char *tok = strdupa(cmd->cmd); - if (!g_pb_cmds) { - g_pb_cmds = zhash_new(); - if (g_pb_cmds == 0) { + if (!*pb_cmds) { + *pb_cmds = zhash_new(); + if (*pb_cmds == 0) { RTE_LOG(ERR, DATAPLANE, "memory allocation failure: protobuf collection\n"); return -1; @@ -100,7 +178,7 @@ pb_add_command(const struct pb_msg_handler *cmd) return -1; } c_entry->handler = cmd->handler; - if (zhash_insert(g_pb_cmds, tok, c_entry) != 0) { + if (zhash_insert(*pb_cmds, tok, c_entry) != 0) { RTE_LOG(ERR, DATAPLANE, "failed to register protobuf cmd: %s (%s)\n", tok, strerror(errno)); @@ -127,3 +205,36 @@ void pb_register_cmd_err(const char *cmd) cmd); } +int +dp_feature_register_pb_cfg_handler(const char *name, + pb_cmd_proc handler) +{ + struct pb_msg_handler msg_handler; + + if (!name || !handler) + return -EINVAL; + + msg_handler.handler = handler; + msg_handler.version = 0; + msg_handler.cmd = name; + + return pb_add_command(&msg_handler, 0); +} + +int +dp_feature_register_pb_op_handler(const char *name, + pb_cmd_proc handler) +{ + struct pb_msg_handler msg_handler; + + if (!name || !handler) + return -EINVAL; + + msg_handler.handler = handler; + msg_handler.version = 0; + msg_handler.cmd = name; + + pb_add_command(&msg_handler, 1); + + return 0; +} diff --git a/src/protobuf.h b/src/protobuf.h new file mode 100644 index 00000000..05ddd024 --- /dev/null +++ b/src/protobuf.h @@ -0,0 +1,51 @@ +/*- + * Copyright (c) 2018-2019, AT&T Intellectual Property. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#ifndef PROTOBUF_H +#define PROTOBUF_H + +#include + +#include "feature_commands.h" + +int pb_cmd(void *data, size_t size, FILE *f); +int pb_op_cmd(zsock_t *sock, void *data, size_t size, FILE *f); + +struct pb_msg_handler { + uint32_t version; + const char *cmd; + pb_cmd_proc *handler; +}; + +int +pb_add_command(const struct pb_msg_handler *cmd, int mode); + +void +pb_register_cmd_err(const char *cmd); + + +#define PB_REGISTER_CMD(x, ...) \ + __VA_ARGS__ struct pb_msg_handler x; \ + static void __pb_add_command_##x(void) \ + __attribute__((__constructor__)); \ + static void __pb_add_command_##x(void) \ + { if (pb_add_command(&x, 0) != 0) \ + pb_register_cmd_err(x.cmd); } \ + __VA_ARGS__ struct pb_msg_handler x + + +#define PB_REGISTER_OPCMD(x, ...) \ + __VA_ARGS__ struct pb_msg_handler x; \ + static void __pb_add_command_##x(void) \ + __attribute__((__constructor__)); \ + static void __pb_add_command_##x(void) \ + { if (pb_add_command(&x, 1) != 0) \ + pb_register_cmd_err(x.cmd); } \ + __VA_ARGS__ struct pb_msg_handler x + +void list_all_protobuf_msg_versions(FILE *f); + +#endif diff --git a/src/protobuf_util.c b/src/protobuf_util.c index b4de1b1f..988f86b0 100644 --- a/src/protobuf_util.c +++ b/src/protobuf_util.c @@ -5,11 +5,15 @@ * SPDX-License-Identifier: LGPL-2.1-only */ +#include "vplane_log.h" #include "protobuf_util.h" #include "stdio.h" -int protobuf_get_ipaddr(IPAddress *addr_msg, struct ip_addr *addr) +int dp_protobuf_get_ipaddr(IPAddress *addr_msg, struct ip_addr *addr) { + if (!addr_msg) + return -1; + if (addr_msg->address_oneof_case == IPADDRESS__ADDRESS_ONEOF_IPV4_ADDR) { memcpy(&addr->address.ip_v4, @@ -17,9 +21,11 @@ int protobuf_get_ipaddr(IPAddress *addr_msg, struct ip_addr *addr) sizeof(addr->address.ip_v4)); addr->type = AF_INET; return 0; - } else if (addr_msg->address_oneof_case == - IPADDRESS__ADDRESS_ONEOF_IPV6_ADDR && - sizeof(addr->address.ip_v6) == addr_msg->ipv6_addr.len) { + } + + if (addr_msg->address_oneof_case == + IPADDRESS__ADDRESS_ONEOF_IPV6_ADDR && + sizeof(addr->address.ip_v6) == addr_msg->ipv6_addr.len) { memcpy(&addr->address.ip_v6, addr_msg->ipv6_addr.data, addr_msg->ipv6_addr.len); @@ -29,3 +35,42 @@ int protobuf_get_ipaddr(IPAddress *addr_msg, struct ip_addr *addr) return -1; } +int dp_protobuf_create_ipaddr(IPAddress **addr_msg) +{ + if (!addr_msg) { + RTE_LOG(ERR, DATAPLANE, + "Error in addr value\n"); + return -1; + } + + const IPAddress addr = IPADDRESS__INIT; + *addr_msg = malloc(sizeof(addr)); + if (!*addr_msg) { + RTE_LOG(ERR, DATAPLANE, + "Failed to allocate protobuf ipaddr\n"); + return -1; + } + memcpy(*addr_msg, &addr, sizeof(addr)); + (*addr_msg)->ipv6_addr.data = malloc(sizeof(uint8_t) * 16); + if (!(*addr_msg)->ipv6_addr.data) { + free(*addr_msg); + RTE_LOG(ERR, DATAPLANE, + "Failed to allocate protobuf ipaddr\n"); + return -1; + } + return 0; +} + +int dp_protobuf_set_ipaddr(IPAddress *to, struct ip_addr *from) +{ + if (from->type == AF_INET) { + to->address_oneof_case = IPADDRESS__ADDRESS_ONEOF_IPV4_ADDR; + to->ipv4_addr = from->address.ip_v4.s_addr; + } else if (from->type == AF_INET6) { + to->address_oneof_case = IPADDRESS__ADDRESS_ONEOF_IPV6_ADDR; + memcpy(to->ipv6_addr.data, &from->address.ip_v6, 16); + to->ipv6_addr.len = 16; + } else + return -1; + return 0; +} diff --git a/src/protobuf_util.h b/src/protobuf_util.h deleted file mode 100644 index d8bc43df..00000000 --- a/src/protobuf_util.h +++ /dev/null @@ -1,16 +0,0 @@ -/*- - * Copyright (c) 2019, AT&T Intellectual Property. - * All rights reserved. - * - * SPDX-License-Identifier: LGPL-2.1-only - */ - -#ifndef PROTOBUF_UTIL_H -#define PROTOBUF_UTIL_H - -#include "ip_addr.h" -#include "protobuf/IPAddress.pb-c.h" - -int protobuf_get_ipaddr(IPAddress *addr_msg, struct ip_addr *addr); - -#endif diff --git a/src/ptp.c b/src/ptp.c index 9f27b27a..6aaf9fc4 100644 --- a/src/ptp.c +++ b/src/ptp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. + * Copyright (c) 2019-2020, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -14,10 +14,10 @@ #include #include "control.h" -#include "bridge_port.h" #include "dp_event.h" #include "ether.h" #include "fal.h" +#include "if/bridge/bridge_port.h" #include "if_llatbl.h" #include "netinet6/nd6_nbr.h" #include "ptp.h" @@ -44,16 +44,35 @@ struct ptp_port_t { struct cds_list_head list; }; +/* Group the peers with the same IP address. This avoids some + * O(n^2) behaviors when searching for the best peer to a + * particular IP address. + * + * a.b.c.d, port 1 -> e.g.h.i, port 1 -> j.k.l.m, port 2 -> null + * | + * V + * e.g.h.i, port 2 + * | + * V + * e.g.h.i, port 3 + * | + * V + * null + */ + struct ptp_peer_t { enum fal_ptp_peer_type_t type; struct fal_ip_address_t ipaddr; fal_object_t obj_id; /**< returned FAL object id */ struct ptp_port_t *port; - struct ether_addr mac; + struct rte_ether_addr mac; bool installed; struct rcu_head rcu; - struct cds_list_head list; + struct cds_list_head list; /**< for ptp_peer_list */ + + struct cds_list_head siblings; /**< peers with same IP address */ + struct cds_list_head slist; /**< for siblings */ }; static CDS_LIST_HEAD(ptp_clock_list); @@ -61,6 +80,7 @@ static CDS_LIST_HEAD(ptp_port_list); static CDS_LIST_HEAD(ptp_peer_list); static struct rte_timer ptp_peer_resolver; +static bool ptp_peer_resolver_running; static unsigned int ptp_peer_resolver_period = 15; /* seconds */ static void ptp_peer_resolver_cb(struct rte_timer *timer, void *arg); @@ -95,20 +115,59 @@ struct ptp_port_t *ptp_find_port(uint32_t clock_id, uint16_t port_id) return NULL; } +static int +ptp_peer_compare(struct ptp_peer_t *peer, + struct ptp_port_t *port, + enum fal_ptp_peer_type_t type, + struct fal_ip_address_t *ipaddr) +{ + if (rcu_dereference(peer->port) == port && + peer->type == type && + memcmp(&peer->ipaddr, ipaddr, sizeof(*ipaddr)) == 0) + return 1; + + return 0; +} + +/* Search the entire list of peers, descending into the siblings + * if necessary. + */ static -struct ptp_peer_t *ptp_find_peer(uint32_t clock_id, uint16_t port_id, +struct ptp_peer_t *ptp_find_peer(struct ptp_port_t *port, enum fal_ptp_peer_type_t type, struct fal_ip_address_t *ipaddr) { - struct ptp_port_t *port; + struct ptp_peer_t *peer, *sibling; + + cds_list_for_each_entry_rcu(peer, &ptp_peer_list, list) { + if (ptp_peer_compare(peer, port, type, ipaddr)) + return peer; + + /* check siblings for a match */ + cds_list_for_each_entry_rcu(sibling, &peer->siblings, slist) { + if (ptp_peer_compare(sibling, port, type, ipaddr)) + return sibling; + } + } + + return NULL; +} + +/* Only search the list of unique peer IP addresses. */ +static +struct ptp_peer_t *ptp_find_parent(uint32_t clock_id, + enum fal_ptp_peer_type_t type, + struct fal_ip_address_t *ipaddr) +{ + struct ptp_clock_t *clock; struct ptp_peer_t *peer; - port = ptp_find_port(clock_id, port_id); - if (!port) + clock = ptp_find_clock(clock_id); + if (!clock) return NULL; cds_list_for_each_entry_rcu(peer, &ptp_peer_list, list) { - if (rcu_dereference(peer->port) == port && + if (rcu_dereference(peer->port)->clock == clock && peer->type == type && memcmp(&peer->ipaddr, ipaddr, sizeof(*ipaddr)) == 0) return peer; @@ -161,6 +220,14 @@ static int get_signed_char_token(const char *token, signed char *ptr) return get_signed_char(str, ptr); } +static int get_signed_token(const char *token, int *ptr) +{ + char *str; + + str = check_token(token); + return get_signed(str, ptr); +} + static int get_bool_token(const char *token, bool *ptr) { char *str; @@ -272,6 +339,17 @@ static int ptp_clock_create(FILE *f, uint32_t clock_id, int argc, char **argv) if (strcmp(profile_string, "default-profile") == 0) profile = FAL_PTP_CLOCK_DEFAULT_PROFILE; + else if (strcmp(profile_string, "g82752-profile") == 0) + profile = FAL_PTP_CLOCK_G82752_PROFILE; + else if (strcmp(profile_string, + "g82752-apts-profile") == 0) + profile = FAL_PTP_CLOCK_G82752_APTS_PROFILE; + else if (strcmp(profile_string, + "g82751-forwardable-profile") == 0) + profile = FAL_PTP_CLOCK_G82751_FWD_PROFILE; + else if (strcmp(profile_string, + "g82751-non-forwardable-profile") == 0) + profile = FAL_PTP_CLOCK_G82751_NON_FWD_PROFILE; else { fprintf(f, "ptp: bad profile: %s\n", profile_string); @@ -281,6 +359,16 @@ static int ptp_clock_create(FILE *f, uint32_t clock_id, int argc, char **argv) attrs[num_attrs].id = FAL_PTP_CLOCK_PROFILE; attrs[num_attrs].value.u32 = profile; + } else if (strstr(*argv, "antenna-delay=")) { + int antenna_delay; + + rc = get_signed_token(*argv, &antenna_delay); + if (rc < 0) + goto error; + + attrs[num_attrs].id = FAL_PTP_CLOCK_ANTENNA_DELAY; + attrs[num_attrs].value.i32 = antenna_delay; + } else { fprintf(f, "ptp: bad option: %s\n", *argv); goto out; @@ -312,6 +400,7 @@ static int ptp_clock_create(FILE *f, uint32_t clock_id, int argc, char **argv) if (!cds_list_empty(&ptp_clock_list)) { rte_timer_init(&ptp_peer_resolver); + ptp_peer_resolver_running = true; rte_timer_reset_sync(&ptp_peer_resolver, rte_get_timer_hz() * ptp_peer_resolver_period, PERIODICAL, rte_get_master_lcore(), @@ -358,8 +447,10 @@ static int ptp_clock_delete(FILE *f, uint32_t clock_id, cds_list_del_rcu(&clock->list); call_rcu(&clock->rcu, ptp_clock_free); - if (cds_list_empty(&ptp_clock_list)) + if (cds_list_empty(&ptp_clock_list)) { rte_timer_stop_sync(&ptp_peer_resolver); + ptp_peer_resolver_running = false; + } error: return rc; @@ -401,9 +492,12 @@ int ptp_port_create(FILE *f, uint16_t port_id, int argc, char **argv) char *ifname; ifname = strchr(*argv, '=') + 1; - ifp = ifnet_byifname(ifname); + ifp = dp_ifnet_byifname(ifname); if (!ifp) { - // TBD -- create a replay cache + RTE_LOG(ERR, DATAPLANE, + "%s: %s is missing, bad replay?\n", + __func__, ifname); + rc = 0; goto error; } rcu_assign_pointer(port->ifp, ifp); @@ -421,6 +515,33 @@ int ptp_port_create(FILE *f, uint16_t port_id, int argc, char **argv) attrs[num_attrs].id = FAL_PTP_PORT_VLAN_ID; attrs[num_attrs].value.u16 = vlan_id; + } else if (strstr(*argv, "additional-path=")) { + struct ifnet *ifp; + char *ifname, *vlan_str; + uint16_t vlan_id; + + /* additional-path=ifname,vlan-id */ + + ifname = strchr(*argv, '=') + 1; + vlan_str = strchr(ifname, ','); + if (!vlan_str) { + rc = -EINVAL; + goto error; + } + *vlan_str++ = '\0'; /* remove ',' from ifname */ + + ifp = dp_ifnet_byifname(ifname); + if (!ifp) + goto error; + + rc = get_unsigned_short(vlan_str, &vlan_id); + if (rc < 0) + goto error; + + attrs[num_attrs].id = FAL_PTP_PORT_ADDITIONAL_PATH; + attrs[num_attrs].value.ptp_port_path.ifindex = ifp->if_index; + attrs[num_attrs].value.ptp_port_path.vlan_id = vlan_id; + } else if (strstr(*argv, "log-min-delay-req-interval=")) { int8_t log_min_delay_req_interval; @@ -608,8 +729,8 @@ int ptp_port_delete(FILE *f, uint16_t port_id, int argc, char **argv) port = ptp_find_port(clock_id, port_id); if (!port) { - fprintf(f, "ptp: clock %d has no port %d\n", - clock_id, port_id); + /* interface never arrived, not an error. */ + rc = 0; goto error; } @@ -627,14 +748,6 @@ int ptp_port_delete(FILE *f, uint16_t port_id, int argc, char **argv) return rc; } -static -bool is_ipaddr_empty(struct fal_ip_address_t *ipaddr) -{ - struct fal_ip_address_t empty_ipaddr = { 0 }; - - return memcmp(ipaddr, &empty_ipaddr, sizeof(empty_ipaddr)) == 0; -} - static int ptp_peer_install(struct ptp_peer_t *peer) { @@ -686,6 +799,7 @@ int ptp_peer_uninstall(struct ptp_peer_t *peer) if (peer->installed) { rc = fal_delete_ptp_peer(peer->obj_id); peer->installed = false; + memset(&peer->mac, 0, sizeof(peer->mac)); } return rc; @@ -727,116 +841,225 @@ struct ifnet *ptp_port_port_to_vlan(struct ptp_port_t *port) return vlan_ifp; } +/* Find the nexthop interface for the peer if it exists */ static -void ptp_peer_update(struct ptp_peer_t *peer) +struct ifnet *ptp_peer_dst_lookup(struct ptp_peer_t *peer, bool *connected) { - struct ptp_port_t *port; - struct ifnet *ifp, *nh_ifp; - struct ether_addr newmac = { { 0 } }; - char buf[INET_ADDRSTRLEN], buf2[INET_ADDRSTRLEN]; - const char *peerip = - fal_ip_address_t_to_str(&peer->ipaddr, buf2, sizeof(buf2)); + struct ifnet *nh_ifp = NULL; const struct vrf *vrf; - bool connected; - port = rcu_dereference(peer->port); - if (!port) - goto out; - - ifp = ptp_port_port_to_vlan(port); - if (!ifp) - goto out; - - /* Determine the nexthop for this address. */ vrf = get_vrf(VRF_DEFAULT_ID); if (!vrf) { RTE_LOG(ERR, DATAPLANE, "%s: no default VRF?\n", __func__); - goto out; + return NULL; } + *connected = false; if (peer->ipaddr.addr_family == FAL_IP_ADDR_FAMILY_IPV4) { nh_ifp = nhif_dst_lookup(vrf, peer->ipaddr.addr.ip4, - &connected); + connected); } else if (peer->ipaddr.addr_family == FAL_IP_ADDR_FAMILY_IPV6) { nh_ifp = nhif_dst_lookup6(vrf, &peer->ipaddr.addr.addr6, - &connected); + connected); } else { + char buf[INET6_ADDRSTRLEN]; + const char *ip = fal_ip_address_t_to_str(&peer->ipaddr, + buf, + sizeof(buf)); + RTE_LOG(ERR, DATAPLANE, "%s: peer %s bad address family?\n", - __func__, peerip); - goto out; + __func__, ip); } - if (nh_ifp == ifp && connected) { - struct llentry *lle; + return nh_ifp; +} - /* Next hop is directly reachable from switch interface. */ - if (peer->ipaddr.addr_family == FAL_IP_ADDR_FAMILY_IPV4) - lle = in_lltable_find(ifp, peer->ipaddr.addr.ip4); - else if (peer->ipaddr.addr_family == FAL_IP_ADDR_FAMILY_IPV6) - lle = in6_lltable_find(ifp, &peer->ipaddr.addr.addr6); - else { - RTE_LOG(ERR, DATAPLANE, "%s: bad address family?\n", +static +void ptp_peer_dst_resolve(struct ptp_peer_t *peer, + struct ifnet *ifp, + struct rte_ether_addr *dst) + +{ + struct rte_mbuf *m; + struct llentry *lle; + struct sockaddr_in taddr; + char buf[INET6_ADDRSTRLEN]; + const char *peerip = fal_ip_address_t_to_str(&peer->ipaddr, + buf, + sizeof(buf)); + + /* Next hop is directly reachable from switch interface. */ + if (peer->ipaddr.addr_family == FAL_IP_ADDR_FAMILY_IPV4) + lle = in_lltable_find(ifp, peer->ipaddr.addr.ip4); + else if (peer->ipaddr.addr_family == FAL_IP_ADDR_FAMILY_IPV6) + lle = in6_lltable_find(ifp, &peer->ipaddr.addr.addr6); + else { + RTE_LOG(ERR, DATAPLANE, "%s: bad address family?\n", + __func__); + return; + } + + if (llentry_copy_mac(lle, dst)) + return; + + /* The lle isn't valid (yet), attempt to resolve locally. */ + + DP_DEBUG(PTP, ERR, DATAPLANE, "%s: resolving %s...\n", + __func__, peerip); + + if (peer->ipaddr.addr_family == FAL_IP_ADDR_FAMILY_IPV4) { + taddr.sin_family = AF_INET; + taddr.sin_addr.s_addr = peer->ipaddr.addr.ip4; + + m = arprequest(ifp, (struct sockaddr *) &taddr); + if (m) + if_output(ifp, m, NULL, RTE_ETHER_TYPE_ARP); + + } else if (peer->ipaddr.addr_family == FAL_IP_ADDR_FAMILY_IPV6) { + m = pktmbuf_alloc(mbuf_pool(0), if_vrfid(ifp)); + if (!m) { + RTE_LOG(ERR, DATAPLANE, "%s: no mbufs for ND\n", __func__); - goto out; + return; } - /* If the lle isn't valid, attempt to resolve locally. */ - if (!llentry_copy_mac(lle, &newmac)) { - struct rte_mbuf *m; - struct sockaddr_in taddr; - - DP_DEBUG(PTP, ERR, DATAPLANE, "%s: resolving %s...\n", - __func__, peerip); - - if (peer->ipaddr.addr_family == - FAL_IP_ADDR_FAMILY_IPV4) { - taddr.sin_family = AF_INET; - taddr.sin_addr.s_addr = peer->ipaddr.addr.ip4; - - m = arprequest(ifp, (struct sockaddr *) &taddr); - if (m) - if_output(ifp, m, NULL, ETHER_TYPE_ARP); - } else if (peer->ipaddr.addr_family == - FAL_IP_ADDR_FAMILY_IPV6) { - m = pktmbuf_alloc(mbuf_pool(0), - if_vrfid(ifp)); - if (!m) { - RTE_LOG(ERR, DATAPLANE, - "%s: no mbufs for ND\n", - __func__); - goto out; - } - - if (!nd6_resolve(NULL, ifp, m, - &peer->ipaddr.addr.addr6, - &newmac)) - if_output(ifp, m, NULL, - ETHER_TYPE_IPv6); - } else { - RTE_LOG(ERR, DATAPLANE, - "%s: peer %s bad address family?\n", - __func__, peerip); - } - } - } else if (nh_ifp) { - /* Send packets to switch interface for routing. */ - DP_DEBUG(PTP, ERR, DATAPLANE, - "%s: peer %s routed via switch interface.\n", - __func__, peerip); - ether_addr_copy(&ifp->eth_addr, &newmac); + if (!nd6_resolve(NULL, ifp, m, + &peer->ipaddr.addr.addr6, + dst)) + if_output(ifp, m, NULL, RTE_ETHER_TYPE_IPV6); + } else { + RTE_LOG(ERR, DATAPLANE, + "%s: peer %s bad address family?\n", + __func__, peerip); + } +} + +enum ptp_peer_state { + NO_ROUTE, + ROUTED, /* ptp_port -> ifp -> nh_ifp -> ... */ + ONE_HOP, /* ptp_port -> ifp (== nh_ifp) -> peer */ + CONNECTED, /* ptp_port -> peer */ +}; + +static +enum ptp_peer_state ptp_peer_find_nexthop(struct ptp_peer_t *peer, + struct ifnet **ifp, + struct ifnet **nh_ifp) +{ + struct ptp_port_t *port; + enum ptp_peer_state state = NO_ROUTE; + bool is_connected; + + *ifp = NULL; + *nh_ifp = NULL; + + port = rcu_dereference(peer->port); + if (!port) + return state; + + *ifp = ptp_port_port_to_vlan(port); + if (!*ifp || !((*ifp)->if_flags & IFF_UP)) + return state; + + *nh_ifp = ptp_peer_dst_lookup(peer, &is_connected); + if (!*nh_ifp || !((*nh_ifp)->if_flags & IFF_UP)) + return state; + + if (*nh_ifp == *ifp && is_connected) + state = CONNECTED; + else if (*nh_ifp == *ifp) + state = ONE_HOP; + else if (*nh_ifp) + state = ROUTED; + + return state; +} + +static +void ptp_peer_update(struct ptp_peer_t *peer) +{ + struct ifnet *ifp, *nh_ifp; + struct rte_ether_addr newmac = { { 0 } }; + char buf[INET6_ADDRSTRLEN], buf2[INET6_ADDRSTRLEN]; + const char *peerip = + fal_ip_address_t_to_str(&peer->ipaddr, buf2, sizeof(buf2)); + struct ptp_peer_t *parent = peer, *sibling; + enum ptp_peer_state state; + + state = ptp_peer_find_nexthop(peer, &ifp, &nh_ifp); + + /* Is this the best way to reach the peer? There are potentially + * three different way to reach a.b.c.d from the peers configured + * on two different PTP ports: + * + * PTP port 1 + * peer a.b.c.d -------> sw0. ---- ? --> a.b.c.d + * + * PTP port 2 + * peer a.b.c.d -------> sw0. ---- ? --> a.b.c.d + * + * sw0. ---- ? --> a.b.c.d + * + * ifp can be either sw0. or sw0.. However, + * nh_ifp could be sw0. or sw0. or sw0.. + * Ideally, we should use the ifp that is also the nh_ifp. + */ + cds_list_for_each_entry_rcu(sibling, &parent->siblings, slist) { + struct ifnet *sib_ifp, *sib_nh_ifp; + enum ptp_peer_state sib_state; + + sib_state = ptp_peer_find_nexthop(sibling, + &sib_ifp, &sib_nh_ifp); + + /* If the nexthop is on the same interface, and the + * interface is up, prefer this peer over any other. + * The sibling might also be better if the current + * peer isn't reachable or IFF_UP. + */ + if (sib_state != NO_ROUTE && sib_state > state) { + DP_DEBUG(PTP, ERR, DATAPLANE, + "%s: choosing peer %s on %s\n", + __func__, peerip, sib_ifp->if_name); + ptp_peer_uninstall(peer); + peer = sibling; + nh_ifp = sib_nh_ifp; + ifp = sib_ifp; + state = sib_state; + continue; + } + + /* This peer might have been active, so always uninstall. */ + ptp_peer_uninstall(sibling); + } + + switch (state) { + case CONNECTED: + /* Next hop is directly reachable from switch interface. */ + DP_DEBUG(PTP, INFO, DATAPLANE, + "%s: peer %s is directly connected via %s.\n", + __func__, peerip, ifp->if_name); + ptp_peer_dst_resolve(peer, ifp, &newmac); + break; + case ONE_HOP: + case ROUTED: + /* Send packets to sw0. for routing. */ + DP_DEBUG(PTP, INFO, DATAPLANE, + "%s: peer %s ROUTED via switch interface %s.\n", + __func__, peerip, nh_ifp->if_name); + rte_ether_addr_copy(&ifp->eth_addr, &newmac); + break; + default: DP_DEBUG(PTP, ERR, DATAPLANE, - "%s: peer %s is unreachable from clock port.\n", - __func__, peerip); + "%s: peer %s is unreachable\n", __func__, peerip); } -out: /* If the MAC address changed (or finally resolved), * we need to update (or install) the peer in the FAL. */ - if (!ether_addr_equal(&newmac, &peer->mac)) { + if (!rte_ether_addr_equal(&newmac, &peer->mac)) { if (ptp_peer_uninstall(peer) < 0) { RTE_LOG(ERR, DATAPLANE, "%s: ptp_peer_uninstall for %s failed!\n", @@ -844,7 +1067,7 @@ void ptp_peer_update(struct ptp_peer_t *peer) return; } - ether_addr_copy(&newmac, &peer->mac); + rte_ether_addr_copy(&newmac, &peer->mac); if (!ether_is_empty(&peer->mac)) { DP_DEBUG(PTP, ERR, DATAPLANE, "%s: peer %s is at %s.\n", __func__, peerip, ether_ntoa_r(&newmac, buf)); @@ -876,7 +1099,7 @@ int ptp_peer_create(FILE *f, int argc, char **argv) uint32_t clock_id = 0; uint16_t port_id = 0; struct ptp_port_t *port = NULL; - struct ptp_peer_t *peer = NULL; + struct ptp_peer_t *parent, *peer = NULL; int rc = -EINVAL; bool have_clock = false; bool have_port = false; @@ -961,24 +1184,26 @@ int ptp_peer_create(FILE *f, int argc, char **argv) goto error; } - if (is_ipaddr_empty(&peer->ipaddr)) { + if (fal_is_ipaddr_empty(&peer->ipaddr)) { fprintf(f, "ptp: ip address required for peer\n"); goto error; } - if (ptp_find_peer(clock_id, port_id, peer->type, &peer->ipaddr)) { - fprintf(f, "ptp: peer already exists\n"); - rc = -EEXIST; + port = ptp_find_port(clock_id, port_id); + if (!port) { + RTE_LOG(ERR, DATAPLANE, + "%s: port-id %d is missing, bad replay?\n", + __func__, port_id); + rc = 0; goto error; } - port = ptp_find_port(clock_id, port_id); - if (!port) { - fprintf(f, "ptp: clock %d port %d doesn't exist\n", - clock_id, port_id); - rc = -ENODEV; + if (ptp_find_peer(port, peer->type, &peer->ipaddr)) { + fprintf(f, "ptp: peer already exists\n"); + rc = -EEXIST; goto error; } + rcu_assign_pointer(peer->port, port); /* If we already have a MAC or this is an allowed peer entry, @@ -993,7 +1218,19 @@ int ptp_peer_create(FILE *f, int argc, char **argv) } } - cds_list_add_rcu(&peer->list, &ptp_peer_list); + CDS_INIT_LIST_HEAD(&peer->list); + CDS_INIT_LIST_HEAD(&peer->slist); + CDS_INIT_LIST_HEAD(&peer->siblings); + + /* If we already have a peer with this IP address, we + * keep this new peer on the sibling list with that + * peer. + */ + parent = ptp_find_parent(clock_id, peer->type, &peer->ipaddr); + if (parent) + cds_list_add_rcu(&peer->slist, &parent->siblings); + else + cds_list_add_rcu(&peer->list, &ptp_peer_list); return 0; error: @@ -1016,6 +1253,7 @@ int ptp_peer_delete(FILE *f, int argc, char **argv) uint16_t port_id = 0; bool have_clock = false; bool have_port = false; + struct ptp_port_t *port; struct ptp_peer_t *peer; enum fal_ptp_peer_type_t peer_type = 0; bool have_peer_type = false; @@ -1080,12 +1318,19 @@ int ptp_peer_delete(FILE *f, int argc, char **argv) goto error; } - if (is_ipaddr_empty(&ipaddr)) { + if (fal_is_ipaddr_empty(&ipaddr)) { fprintf(f, "ptp: ip address required for peer\n"); goto error; } - peer = ptp_find_peer(clock_id, port_id, peer_type, &ipaddr); + port = ptp_find_port(clock_id, port_id); + if (!port) { + /* interface never arrived, not an error. */ + rc = 0; + goto error; + } + + peer = ptp_find_peer(port, peer_type, &ipaddr); if (!peer) { fprintf(f, "ptp: can't find object for peer\n"); rc = -ENODEV; @@ -1099,7 +1344,25 @@ int ptp_peer_delete(FILE *f, int argc, char **argv) } rcu_assign_pointer(peer->port, NULL); + if (!cds_list_empty(&peer->siblings)) { + struct ptp_peer_t *sibling, *new_parent = NULL; + + /* Move remaining siblings to the new parent peer. */ + cds_list_for_each_entry_rcu(sibling, &peer->siblings, slist) { + if (!new_parent) { + new_parent = sibling; + cds_list_add_rcu(&new_parent->list, + &ptp_peer_list); + cds_list_del_rcu(&new_parent->slist); + } else { + cds_list_del_rcu(&sibling->slist); + cds_list_add_rcu(&sibling->slist, + &new_parent->siblings); + } + } + } cds_list_del_rcu(&peer->list); + cds_list_del_rcu(&peer->slist); call_rcu(&peer->rcu, ptp_peer_free); error: @@ -1112,36 +1375,11 @@ enum ptp_obj_type { PTP_PEER, }; -int cmd_ptp_op(FILE *f, int argc, char **argv) +static int ptp_clock_dump(FILE *f, struct ptp_clock_t *clock) { - struct ptp_clock_t *clock; json_writer_t *wr; - uint32_t clock_id; int rc = -EINVAL; - if (argc < 4) - goto usage; - argc--; - argv++; - - if (strcmp(*argv, "clock") != 0) - goto usage; - argc--; - argv++; - - if (strcmp(*argv, "dump") != 0) - goto usage; - argc--; - argv++; - - if (get_unsigned(*argv, &clock_id) < 0) - goto error; - clock = ptp_find_clock(clock_id); - if (!clock) { - fprintf(f, "ptp: clock %d does not exist\n", clock_id); - goto error; - } - wr = jsonw_new(f); if (!wr) { fprintf(f, "ptp: could not create json writer\n"); @@ -1153,17 +1391,143 @@ int cmd_ptp_op(FILE *f, int argc, char **argv) rc = fal_dump_ptp_clock(clock->obj_id, wr); jsonw_end_array(wr); if (rc < 0) { - fprintf(f, "ptp: dump failed\n"); + fprintf(f, "ptp: clock dump failed\n"); goto error; } + jsonw_destroy(&wr); + +error: + return rc; +} + +static const char *ptp_peer_type_to_name(enum fal_ptp_peer_type_t type) +{ + const char *name; + + switch (type) { + case FAL_PTP_PEER_MASTER: + name = "master"; + break; + case FAL_PTP_PEER_SLAVE: + name = "slave"; + break; + case FAL_PTP_PEER_ALLOWED: + name = "allowed"; + break; + default: + name = "unknown"; + } + + return name; +} + +static void ptp_resolver_peer_dump(json_writer_t *wr, struct ptp_peer_t *peer) +{ + struct ptp_port_t *port = rcu_dereference(peer->port); + char buf[INET6_ADDRSTRLEN]; + const char *peerip = fal_ip_address_t_to_str(&peer->ipaddr, + buf, + sizeof(buf)); + + jsonw_start_object(wr); + jsonw_string_field(wr, "peer", peerip); + jsonw_bool_field(wr, "installed", peer->installed); + if (port) + jsonw_uint_field(wr, "port-id", port->port_id); + if (peer->installed) + jsonw_string_field(wr, "mac", + ether_ntoa_r(&peer->mac, buf)); + jsonw_string_field(wr, "type", + ptp_peer_type_to_name(peer->type)); + jsonw_end_object(wr); +} +static int ptp_resolver_dump(FILE *f) +{ + struct ptp_peer_t *peer, *sibling; + json_writer_t *wr; + int rc = -EINVAL; + + wr = jsonw_new(f); + if (!wr) { + fprintf(f, "ptp: could not create json writer\n"); + goto error; + } + jsonw_pretty(wr, true); + jsonw_start_array(wr); + + cds_list_for_each_entry_rcu(peer, &ptp_peer_list, list) { + ptp_resolver_peer_dump(wr, peer); + + if (!cds_list_empty(&peer->siblings)) { + jsonw_start_array(wr); + cds_list_for_each_entry_rcu(sibling, + &peer->siblings, slist) + ptp_resolver_peer_dump(wr, sibling); + jsonw_end_array(wr); + } + } + + jsonw_end_array(wr); jsonw_destroy(&wr); + rc = 0; + +error: + return rc; +} + +int cmd_ptp_op(FILE *f, int argc, char **argv) +{ + struct ptp_clock_t *clock; + uint32_t clock_id; + int rc = -EINVAL; + + if (argc < 3) + goto usage; + + /* ptp clock dump */ + if (strcmp(argv[1], "clock") == 0 && + strcmp(argv[2], "dump") == 0 && argc == 4) { + if (get_unsigned(argv[3], &clock_id) < 0) + goto error; + clock = ptp_find_clock(clock_id); + if (!clock) { + fprintf(f, "ptp: clock %d does not exist\n", clock_id); + goto error; + } + rc = ptp_clock_dump(f, clock); + } + + /* ptp resolver dump */ + if (strcmp(argv[1], "resolver") == 0 && + strcmp(argv[2], "dump") == 0 && argc == 3) { + rc = ptp_resolver_dump(f); + } + + /* ptp resolver trigger */ + if (strcmp(argv[1], "resolver") == 0 && + strcmp(argv[2], "trigger") == 0 && argc == 3) { + fprintf(f, "ptp: calling peer resolver...\n"); + if (ptp_peer_resolver_running) + rte_timer_stop_sync(&ptp_peer_resolver); + ptp_peer_resolver_cb(NULL, NULL); + if (ptp_peer_resolver_running) + rte_timer_reset_sync(&ptp_peer_resolver, + rte_get_timer_hz() * + ptp_peer_resolver_period, + PERIODICAL, rte_get_master_lcore(), + ptp_peer_resolver_cb, NULL); + fprintf(f, "ptp: peer resolver done!\n"); + rc = 0; + } error: return rc; usage: fprintf(f, "ptp clock dump \n"); + fprintf(f, "ptp resolver dump\n"); + fprintf(f, "ptp resolver trigger\n"); goto error; } diff --git a/src/ptp.h b/src/ptp.h index d0af9195..6f55cc41 100644 --- a/src/ptp.h +++ b/src/ptp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. + * Copyright (c) 2019-2020, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -7,7 +7,7 @@ #ifndef PTP_H #define PTP_H -#include "config.h" +#include "config_internal.h" int ptp_init(struct pci_list *bp_list); int cmd_ptp_cfg(FILE *f, int argc, char **argv); diff --git a/src/qos.h b/src/qos.h index e8ce066b..13b7d8a4 100644 --- a/src/qos.h +++ b/src/qos.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2013-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -19,13 +19,14 @@ struct rte_sched_port; -#define DEFAULT_QSIZE 64 -#define MAX_QSIZE 8192 -#define DEFAULT_TBSIZE 10000 +#define DEFAULT_QSIZE 64 /* 64 packets */ +#define MAX_QSIZE 8192 /* 8192 packets */ +#define DEFAULT_TBSIZE 10000 /* 10000 bytes or credits (1 byte/credit) */ +#define DEFAULT_PERIOD 10000 /* 10000 microseconds */ #define DEFAULT_Q 3 /* class 3: queue 0 */ #define MAX_PIPES 256 -#define MAX_RED_QUEUE_LENGTH 8192 +#define MAX_RED_QUEUE_LENGTH 8192 /* 8192 packets */ #define QOS_DPDK_ID 0 #define QOS_HW_ID 1 @@ -33,48 +34,107 @@ struct rte_sched_port; #define QOS_MAX_DROP_PRECEDENCE 2 +/* + * We support 2 queues with up to 4 WRED profiles configured per queue + * so allow a maximum of 8 per profile. + */ +#define QOS_NUM_DSCP_MAPS 8 +#define QOS_MAX_DSCP_MAPS (QOS_NUM_DSCP_MAPS - 1) + +/* + * Maximum burst size in bytes supported by DPDK. This value must match the + * burst-size maximum range given in vyatta-npf-v1.yang. + */ +#define QOS_MAX_BURST_SIZE_DPDK (312500000) // 100ms at 25Gbit/sec +#define QOS_MAX_BURST_SIZE_DEFAULT QOS_MAX_BURST_SIZE_DPDK + struct npf_act_grp; -struct red_params { - uint32_t min_th; /* Minimum threshold in bytes for queue */ - uint32_t max_th; /* Maximum threshold in bytes for queue */ - uint16_t maxp_inv; /* Inverse of packet marking probability */ +enum qos_queue_size_type { + QOS_QUEUE_SIZE_PACKETS, + QOS_QUEUE_SIZE_BYTES, + QOS_QUEUE_SIZE_USEC }; -enum wred_unit { - WRED_PACKETS, - WRED_BYTES +enum qos_state { + QOS_INSTALL, + QOS_NPF_READY, + QOS_NPF_COMMIT }; -struct queue_wred_info { - union { - struct rte_red_params map_params[QOS_MAX_DROP_PRECEDENCE + 1]; - struct red_params map_params_bytes[QOS_MAX_DROP_PRECEDENCE + 1]; - } params; - char *dscp_grp_names[QOS_MAX_DROP_PRECEDENCE + 1]; - uint8_t filter_weight; - uint8_t num_maps; - enum wred_unit unit; +struct qos_red_params { + uint32_t min_th; + uint32_t max_th; + enum qos_queue_size_type qsize_type; + uint16_t maxp_inv; + /* Negated log2 of queue weight + * (wq = 1 / (2 ^ wq_log2)) + */ + uint16_t wq_log2; +}; + +/* + * This holds the names of the dscp groups and the masks. + * We need to save this incase a resource group is changed + * and we need to reset the classification indices. + */ +struct qos_dscp_map { + unsigned int num_maps; + uint8_t qmap[QOS_NUM_DSCP_MAPS]; + char *dscp_grp_names[QOS_NUM_DSCP_MAPS]; + uint64_t dscp_mask[QOS_NUM_DSCP_MAPS]; +}; + +enum egress_map_type { + EGRESS_UNDEF = 0, + EGRESS_DSCP = 1, + EGRESS_DESIGNATION = 2, + EGRESS_DSCPGRP_DSCP = 3, + EGRESS_DESIGNATION_PCP = 4 +}; + +struct qos_mark_map_entry { + uint8_t des; + enum fal_packet_colour color; + uint8_t pcp_value; }; -struct profile_wred_info { - struct queue_wred_info queue_wred[RTE_SCHED_QUEUES_PER_PIPE]; +struct dscp_grp_list { + SLIST_ENTRY(dscp_grp_list) list; + uint8_t pcp_val; + char name[0]; }; struct qos_mark_map { struct rcu_head obj_rcu; struct cds_list_head list; - uint8_t pcp_value[MAX_DSCP]; + enum egress_map_type type; + union { + uint8_t des_used; + uint64_t dscp_used; + }; + union { + struct qos_mark_map_entry entries[FAL_QOS_MAP_DES_DP_VALUES]; + uint8_t pcp_value[MAX_DSCP]; + }; + fal_object_t mark_obj; + SLIST_HEAD(dscp_grps, dscp_grp_list) dscp_grps; char map_name[0]; }; struct qos_rate_info { bool bw_is_percent; union _bw_info { - uint8_t bw_percent; - uint32_t bandwidth; + float bw_percent; + uint64_t bandwidth; } rate; - uint32_t burst; /* value of 0 indicates burst should be calculated */ + + bool burst_is_time; + union _burst_info { + uint32_t size; + uint32_t time_ms; + } burst; + uint32_t period; }; @@ -82,6 +142,20 @@ struct qos_tc_rate_info { struct qos_rate_info tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; }; +struct qos_shaper_conf { + uint64_t tb_rate; /* bytes/sec */ + uint64_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint32_t tc_period; + uint32_t tb_size; +#ifdef RTE_SCHED_SUBPORT_TC_OV + uint8_t tc_ov_weight; /* Weight TC 3 oversubscription */ +#endif +}; + +static_assert(sizeof(struct qos_shaper_conf) == + sizeof(struct rte_sched_subport_params), + "qos and dpdk structures are not of same size"); + /* Qos Scheduler sub port (one per vlan) */ struct subport_info { char attach_name[IFNAMSIZ + sizeof("/4294967295")]; @@ -90,7 +164,7 @@ struct subport_info { struct npf_act_grp *act_grp_list; struct mark_reqs *marks; uint8_t *profile_map; /* pipe to profile */ - struct rte_sched_subport_params params; + struct qos_shaper_conf params; struct qos_rate_info subport_rate; struct qos_tc_rate_info sp_tc_rates; @@ -99,10 +173,12 @@ struct subport_info { struct rte_sched_subport_stats64 queue_stats; /* Non-zeroing counts */ struct rte_sched_subport_stats64 clear_stats; /* Counts at last clear */ uint32_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; - struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] - [e_RTE_METER_COLORS]; + enum qos_queue_size_type qsize_type; + struct qos_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] + [RTE_COLORS]; bool pipe_configured[MAX_PIPES]; struct qos_mark_map *mark_map; + bool auto_speed; }; /* DSCP and PCP maps (per profile) */ @@ -113,14 +189,41 @@ struct queue_map { uint8_t pcp_enabled:1, /* Flag to track user-defined PCP map */ dscp_enabled:1, /* Flag to track user-defined DSCP map */ local_priority:1, /* Local priority queue enabled */ - unused:5; + designation:1, + unused:4; uint8_t conf_ids[RTE_SCHED_QUEUES_PER_PIPE]; /* The configured Q ids */ + struct qos_dscp_map *dscp_maps; + uint64_t reset_mask; +}; + +/* Egress map sub-port/VIF information */ +struct egress_map_subport_info { + SLIST_ENTRY(egress_map_subport_info) egr_map_list; + int vlan_id; + fal_object_t egr_map_obj; +}; + +/* Egress map infprmation per Physical port */ +struct egress_map_info { + SLIST_HEAD(egr_map_head, egress_map_subport_info) egr_map_head; }; #define CONF_ID_Q_CONFIG 0x80 #define CONF_ID_Q_DEFAULT 0x40 #define CONF_ID_Q_IN_USE (CONF_ID_Q_CONFIG | CONF_ID_Q_DEFAULT) +enum ingress_map_type { + INGRESS_UNDEF = 0, + INGRESS_DSCP, + INGRESS_PCP +}; + +#define INGRESS_DESIGNATORS 8 +#define MAX_DESIGNATOR 7 +/* We currently support 3 levels of drop precedence; green, yellow and red */ +#define NUM_DPS 3 +#define MAX_DP 2 + /* Qos queue counters (one per queue) */ struct queue_stats { /* The ever-increasing counts */ @@ -139,9 +242,46 @@ struct queue_stats { uint64_t n_pkts_red_dscp_dropped_lc[RTE_NUM_DSCP_MAPS]; }; +struct qos_red_q_params { + uint64_t dscp_set[RTE_NUM_DSCP_MAPS]; + struct qos_red_params qparams[RTE_NUM_DSCP_MAPS]; + char *grp_names[RTE_NUM_DSCP_MAPS]; + uint8_t num_maps; + uint8_t filter_weight; + uint8_t dps_in_use; +}; + +struct qos_red_pipe_params { + SLIST_ENTRY(qos_red_pipe_params) list; + struct qos_red_q_params red_q_params; + uint32_t qindex; + bool alloced; +}; + +struct qos_pipe_params { + struct qos_shaper_conf shaper; + uint8_t wrr_weights[RTE_SCHED_QUEUES_PER_PIPE]; + uint8_t designation[INGRESS_DESIGNATORS]; + uint8_t des_set; + SLIST_HEAD(red_head, qos_red_pipe_params) red_head; +}; + +struct qos_port_params { + struct qos_pipe_params *pipe_profiles; + uint32_t n_pipe_profiles; + uint32_t mtu; + uint64_t rate; /* Port rate in bytes/sec */ + int32_t frame_overhead; + uint32_t n_subports_per_port; + uint32_t n_pipes_per_subport; + uint32_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + enum qos_queue_size_type qsize_type; +}; + /* Qos Scheduler handles (one per physical port) */ struct sched_info { int dev_id; /* Device ID - DPDK or FAL */ + struct ifnet *ifp; union _dev_info { struct _dpdk { struct rte_sched_port *port; /* DPDK object */ @@ -152,11 +292,12 @@ struct sched_info { } fal; } dev_info; struct subport_info *subport; /* Subport's */ - struct rte_sched_port_params port_params; + struct qos_port_params port_params; struct qos_rate_info *profile_rates; struct qos_tc_rate_info *profile_tc_rates; struct qos_rate_info port_rate; bool enabled; + enum qos_state reset_port; struct rcu_head rcu; /* subports and pipes as configured, actual size is in port_params */ @@ -167,7 +308,7 @@ struct sched_info { struct queue_map *queue_map; struct queue_stats *queue_stats; rte_spinlock_t stats_lock; /* To control access to queue-stats */ - struct profile_wred_info *wred_profiles; + SLIST_ENTRY(sched_info) list; }; struct mark_reqs { @@ -184,6 +325,7 @@ struct qos_show_context { json_writer_t *wr; bool optimised_json; bool is_platform; + bool sent_sysdef_map; }; /* @@ -192,6 +334,7 @@ struct qos_show_context { * one if there's also a hardware forwarding path. */ struct qos_dev { + int (*qos_init)(void); int (*qos_disable)(struct ifnet *ifp, struct sched_info *q); int (*qos_enable)(struct ifnet *ifp, struct sched_info *q); int (*qos_start)(struct ifnet *ifp, struct sched_info *q, @@ -212,10 +355,13 @@ struct qos_dev { uint32_t pipe, uint32_t tc, uint32_t q, uint64_t *random_dscp_drop, json_writer_t *wr); + uint64_t (*qos_check_rate)(uint64_t rate, uint64_t parent_bw); }; extern struct qos_dev qos_devices[]; +fal_object_t qos_global_map_obj; + /* Encode DPDK Traffic-Class and Queue */ #define QMAP(tc, wrr) (wrr << RTE_SCHED_TC_BITS | (tc)) @@ -234,6 +380,7 @@ extern struct qos_dev qos_devices[]; #define QOS_ENABLE(qinfo) qos_devices[qinfo->dev_id].qos_enable #define QOS_DSCP_RESGRP_JSON(qinfo) \ qos_devices[qinfo->dev_id].qos_dscp_resgrp_json +#define QOS_CHECK_RATE(qinfo) qos_devices[qinfo->dev_id].qos_check_rate #define QOS_CONFIGURED(qinfo) \ (qinfo->dev_info.dpdk.port || qinfo->dev_info.fal.hw_port_id) @@ -280,18 +427,47 @@ static inline uint8_t qmap_to_dp(uint8_t m) return m >> RTE_SCHED_TC_WRR_BITS; } +#define DES_IN_USE 0x80 + +/* + * Search for the designation value for this queue. + * If there isn't one allocate the next free one. + */ +static inline bool qos_qmap_to_des(uint8_t q, uint8_t *des2q, int *ind) +{ + int i; + + for (i = 0; i < INGRESS_DESIGNATORS; i++) { + if (des2q[i] == (DES_IN_USE | q_from_mask(q))) { + *ind = i; + return true; + } + } + for (i = 0; i < INGRESS_DESIGNATORS; i++) { + if (des2q[i] & DES_IN_USE) + continue; + des2q[i] = DES_IN_USE | q_from_mask(q); + *ind = i; + return true; + } + return false; +} + _Static_assert(sizeof(struct rte_sched_subport_params) != sizeof(struct rte_sched_pipe_params), "Structures should be the same size."); -void qos_sched_subport_params_check(struct rte_sched_subport_params *params, +void qos_sched_subport_params_check(struct qos_shaper_conf *params, struct qos_rate_info *config_rate, struct qos_rate_info *config_tc_rate, - uint16_t max_pkt_len, uint32_t bps); + uint16_t max_pkt_len, uint32_t max_burst_size, + uint64_t bps, + struct sched_info *qinfo); static inline void qos_sched_pipe_check(struct sched_info *qinfo, - uint16_t max_pkt_len, uint32_t bps) + uint16_t max_pkt_len, + uint32_t max_burst_size, uint64_t bps) { unsigned int profile; @@ -299,8 +475,8 @@ static inline void qos_sched_pipe_check(struct sched_info *qinfo, profile < qinfo->port_params.n_pipe_profiles; profile++) { unsigned int subport; - uint32_t parent_rate = bps; - struct rte_sched_pipe_params *p + uint64_t parent_rate = bps; + struct qos_pipe_params *p = qinfo->port_params.pipe_profiles + profile; /* @@ -332,30 +508,76 @@ static inline void qos_sched_pipe_check(struct sched_info *qinfo, * breaks, check if these structures have changed. */ qos_sched_subport_params_check( - (struct rte_sched_subport_params *)p, + &p->shaper, &qinfo->profile_rates[profile], qinfo->profile_tc_rates[profile].tc_rate, - max_pkt_len, parent_rate); + max_pkt_len, max_burst_size, parent_rate, qinfo); } } +struct ingress_designator { + uint8_t dps_in_use; + uint64_t mask[NUM_DPS]; +}; + +struct qos_ingress_map { + struct rcu_head obj_rcu; + struct cds_list_head list; + enum ingress_map_type type; + struct ingress_designator designation[INGRESS_DESIGNATORS]; + bool sysdef; + fal_object_t map_obj; + char name[0]; +}; + +/* + * The ingress map plugin structure. + */ +struct qos_ingressm { + int (*qos_ingressm_attach)(unsigned int ifindex, unsigned int vlan, + struct qos_ingress_map *map); + int (*qos_ingressm_detach)(unsigned int ifindex, unsigned int vlan); + int (*qos_ingressm_config)(struct qos_ingress_map *map, bool create); +}; + +extern struct qos_ingressm qos_ingressm; + +/* + * The egress map plugin structure. + */ +struct qos_egressm { + int (*qos_egressm_attach)(unsigned int ifindex, unsigned int vlan, + struct qos_mark_map *map); + int (*qos_egressm_detach)(unsigned int ifindex, unsigned int vlan, + struct qos_mark_map *map); + int (*qos_egressm_config)(struct qos_mark_map *map, bool create); +}; + +extern struct qos_egressm qos_egressm; + void qos_init(void); int qos_sched_start(struct ifnet *ifp, uint64_t link_speed); void qos_sched_stop(struct ifnet *ifp); uint32_t qos_sched_calc_qindex(struct sched_info *qinfo, unsigned int subport, unsigned int pipe, unsigned int tc, unsigned int q); +bool qos_wred_threshold_get(struct qos_red_params *wred_params, + uint64_t rate, uint32_t *wred_min_th, uint32_t *wred_max_th); +uint32_t qos_queue_size_get(uint32_t qsize, + enum qos_queue_size_type qsize_type, + uint64_t rate); +uint32_t qos_sp_qsize_get(struct qos_port_params *pp, + struct subport_info *sinfo, int tc); struct sched_info; -int qos_sched(struct ifnet *ifp, struct sched_info *info, - struct rte_mbuf **in, uint32_t n_in, - struct rte_mbuf **out, uint32_t n_out); +int qos_sched(struct ifnet *ifp, struct sched_info *qinfo, + struct rte_mbuf *enq_pkts[], uint32_t n_pkts, + struct rte_mbuf *deq_pkts[], uint32_t space); struct subport_info *qos_get_subport(const char *name, struct ifnet **ifp); struct npf_act_grp *qos_ag_get_head(struct subport_info *subport); struct npf_act_grp *qos_ag_set_or_get_head(struct subport_info *subport, struct npf_act_grp *act_grp); int16_t qos_get_overhead(const char *name); int16_t qos_get_overhead_from_ifnet(struct ifnet *ifp); -void qos_sched_update_if_stats(const struct ifnet *ifp); bool qos_sched_subport_get_stats(struct sched_info *qinfo, uint16_t vlan_id, struct rte_sched_subport_stats64 *stats); struct ifnet *qos_get_vlan_ifp(const char *att_pnt, uint16_t *vlan_id); @@ -370,12 +592,18 @@ struct sched_info *qos_sched_new(struct ifnet *ifp, unsigned int subports, int32_t overhead); void qos_sched_free(struct sched_info *qinfo); void qos_sched_free_rcu(struct rcu_head *head); +uint8_t qos_get_prio_lp_des(void); int qos_hw_show_port(struct ifnet *ifp, void *arg); void qos_hw_dump_map(json_writer_t *wr, const struct sched_info *qinfo, uint32_t subport, uint32_t pipe); void qos_hw_dump_subport(json_writer_t *wr, const struct sched_info *qinfo, uint32_t subport); +void qos_hw_dump_buf_errors(json_writer_t *wr); +struct qos_red_pipe_params * +qos_red_find_q_params(struct qos_pipe_params *pipe, unsigned int qindex); +struct qos_red_pipe_params * +qos_red_alloc_q_params(struct qos_pipe_params *pipe, unsigned int qindex); /* The DPDK plugin functions */ void qos_dpdk_dscp_resgrp_json(struct sched_info *qinfo, uint32_t subport, @@ -387,7 +615,7 @@ int qos_dpdk_subport_read_stats(struct sched_info *qinfo, uint32_t subport, int qos_dpdk_subport_clear_stats(struct sched_info *qinfo, uint32_t subport); int qos_dpdk_queue_read_stats(struct sched_info *qinfo, uint32_t subport, uint32_t pipe, uint32_t tc, uint32_t q, - struct queue_stats *st, uint64_t *qlen, + struct queue_stats *queue_stats, uint64_t *qlen, bool *qlen_in_pkts); int qos_dpdk_queue_clear_stats(struct sched_info *qinfo, uint32_t subport, uint32_t pipe, @@ -402,6 +630,7 @@ int qos_dpdk_enable(struct ifnet *ifp, int qos_dpdk_stop(__unused struct ifnet *ifp, struct sched_info *qinfo); int qos_dpdk_start(struct ifnet *ifp, struct sched_info *qinfo, uint64_t bps, uint16_t max_pkt_len); +uint64_t qos_dpdk_check_rate(uint64_t rate, uint64_t parent_bw); /* The HW forwarding plugin functions */ fal_object_t @@ -435,5 +664,17 @@ int qos_hw_stop(__unused struct ifnet *ifp, __unused struct sched_info *qinfo); int qos_hw_start(__unused struct ifnet *ifp, struct sched_info *qinfo, uint64_t bps, uint16_t max_pkt_len); +uint64_t qos_hw_check_rate(uint64_t rate, uint64_t parent_bw); +int qos_hw_init(void); +void qos_hw_del_map(fal_object_t mark_obj); +void qos_hw_show_legacy_map(struct queue_map *qmap, json_writer_t *wr); +fal_object_t qos_hw_get_att_ingress_map(struct ifnet *ifp, unsigned int vlan); +fal_object_t qos_hw_get_att_egress_map(struct ifnet *ifp, unsigned int vlan); +struct qos_mark_map *qos_egress_map_find(char const *name); +void qos_abs_rate_save(struct qos_rate_info *bw_info, uint64_t abs_bw); +struct egress_map_subport_info *qos_egress_map_subport_get( + struct ifnet *parent_ifp, int vlan_id); +struct egress_map_subport_info *qos_egress_map_subport_new(struct ifnet *ifp, + struct ifnet *parent_ifp, bool is_sub_if); #endif /* QOS_H */ diff --git a/src/qos_dpdk.c b/src/qos_dpdk.c index 81580e85..147b7f5c 100644 --- a/src/qos_dpdk.c +++ b/src/qos_dpdk.c @@ -1,6 +1,6 @@ /*- - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -21,14 +21,46 @@ #include "vplane_log.h" #include "ether.h" +/* + * Only allow a child shaper to use 99.6% of the parent bandwidth so when we + * borrow tokens we don't set the time into the future. + * 99.6 was chosen by testing, no failures were seen using it. If the time + * is incorrectly advanced we drop the packets on the backplane which is far + * harder to diagnose so forcing them into the shaper is preferrable. + */ +#define MAX_RATE_FLOAT 99.6 +#define MAX_RATE_SCALED 996 + +uint64_t qos_dpdk_check_rate(uint64_t rate, uint64_t parent_bw) +{ + /* + * Check whether the rate is close or equal to the parent bandwidth. + * If it is we may run into time issues when borrowing tokens which + * turns off the shaper so make sure we only set the rate to 99.6% + * of the parent + */ + if (parent_bw) { + float percent; + + percent = (((float)rate * 100.0) / (float)parent_bw); + if (percent > MAX_RATE_FLOAT) { + uint64_t tmp_rate; + + tmp_rate = (uint64_t)parent_bw * MAX_RATE_SCALED; + rate = tmp_rate / 1000; + } + } + return rate; +} + /* * Return the DSCP wred resource group name associated with a map entry * in a queue index. */ static char *qos_get_dscp_grp(struct sched_info *qinfo, uint32_t qid, int i) { - struct rte_sched_pipe_params *pp; - struct rte_red_pipe_params *wred_params; + struct qos_pipe_params *pp; + struct qos_red_pipe_params *wred_params; int profile; profile = rte_sched_get_profile_for_pipe(qinfo->dev_info.dpdk.port, @@ -37,7 +69,7 @@ static char *qos_get_dscp_grp(struct sched_info *qinfo, uint32_t qid, int i) return NULL; pp = &qinfo->port_params.pipe_profiles[profile]; - wred_params = rte_red_find_q_params(pp, qid); + wred_params = qos_red_find_q_params(pp, qid); if (wred_params) return wred_params->red_q_params.grp_names[i]; @@ -269,10 +301,9 @@ int qos_dpdk_port(struct ifnet *ifp, int qos_dpdk_disable(struct ifnet *ifp, struct sched_info *qinfo) { + qos_dpdk_stop(ifp, qinfo); rcu_assign_pointer(ifp->if_qos, NULL); - disable_transmit_thread(ifp->if_port); - qos_subport_npf_free(qinfo); call_rcu(&qinfo->rcu, qos_sched_free_rcu); @@ -282,27 +313,28 @@ int qos_dpdk_disable(struct ifnet *ifp, struct sched_info *qinfo) int qos_dpdk_enable(struct ifnet *ifp, struct sched_info *qinfo) { - /* If link is already up, then start now */ - struct if_link_status link; - - if_get_link_status(ifp, &link); - - if (link.link_status && - qos_sched_start(ifp, link.link_speed) < 0) { - DP_DEBUG(QOS_DP, ERR, DATAPLANE, "Qos start failed\n"); - qinfo->enabled = false; - return -ENODEV; - } - - if (enable_transmit_thread(ifp->if_port) < 0) { - DP_DEBUG(QOS_DP, ERR, DATAPLANE, - "Transmit thread setup failed\n"); - qinfo->enabled = false; - return -ENODEV; + struct dp_ifnet_link_status link; + + if (!ifp->hw_forwarding) { + /* If link is already up, then start now */ + dp_ifnet_link_status(ifp, &link); + + if (link.link_status && + link.link_speed != ETH_SPEED_NUM_NONE && + qos_sched_start(ifp, link.link_speed) < 0) { + DP_DEBUG(QOS_DP, ERR, DATAPLANE, "Qos start failed\n"); + qinfo->enabled = false; + return -ENODEV; + } + DP_DEBUG(QOS_DP, DEBUG, DATAPLANE, + "link status %s, speed %d, QoS not started\n", + link.link_status ? "up" : "down", + link.link_speed); + } else { + DP_DEBUG(QOS_DP, DEBUG, DATAPLANE, + "interface not sw forwarding, QoS not started\n"); } - ifp->qos_software_fwd = 1; - return 0; } @@ -316,25 +348,141 @@ static void qos_dpdk_port_free_rcu(void *arg) * If the subport doesn't have its TC queue-limits explicitly defined inherit * the port's queue-limits. */ -static uint32_t qos_sched_subport_qsize(struct rte_sched_port_params *pp, - uint32_t *qsize) +static uint32_t qos_sched_subport_qsize(struct qos_port_params *pp, + struct subport_info *sinfo) { uint32_t queue_array_size = 0; uint32_t tc; for (tc = 0; tc < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc++) { - if (qsize[tc] == 0) - qsize[tc] = pp->qsize[tc]; - - queue_array_size += qsize[tc]; + uint32_t qsize = qos_sp_qsize_get(pp, sinfo, tc); + queue_array_size += qsize; } return (queue_array_size * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS * pp->n_pipes_per_subport * sizeof(struct rte_mbuf *)); } +static void qos_copy_red_params(struct rte_red_params + dpdk[][RTE_COLORS], + struct subport_info *sinfo) +{ + int i, j; + + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { + for (j = 0; j < RTE_COLORS; j++) { + uint32_t wred_min_th = 0; + uint32_t wred_max_th = 0; + qos_wred_threshold_get(&sinfo->red_params[i][j], + sinfo->params.tc_rate[i], + &wred_min_th, &wred_max_th); + + dpdk[i][j].min_th = (uint16_t)wred_min_th; + dpdk[i][j].max_th = (uint16_t)wred_max_th; + dpdk[i][j].maxp_inv = + (uint16_t)sinfo->red_params[i][j].maxp_inv; + dpdk[i][j].wq_log2 = + (uint16_t)sinfo->red_params[i][j].wq_log2; + } + } +} + +static int qos_dpdk_setup_params(struct ifnet *ifp, struct sched_info *qinfo, + struct rte_sched_port_params *dpdk_port_params) +{ + struct qos_port_params *qos_params = &qinfo->port_params; + int socketid = rte_eth_dev_socket_id(ifp->if_port); + unsigned int i, j; + struct rte_sched_pipe_params *pipe_profiles; + + pipe_profiles = calloc(qos_params->n_pipe_profiles, + sizeof(*pipe_profiles)); + if (!pipe_profiles) + return -1; + + dpdk_port_params->pipe_profiles = pipe_profiles; + + if (socketid < 0) /* SOCKET_ID_ANY */ + socketid = 0; + + dpdk_port_params->socket = socketid; + dpdk_port_params->n_pipe_profiles = qos_params->n_pipe_profiles; + dpdk_port_params->rate = qos_params->rate; + dpdk_port_params->mtu = qos_params->mtu; + dpdk_port_params->frame_overhead = qos_params->frame_overhead; + dpdk_port_params->n_subports_per_port = qos_params->n_subports_per_port; + dpdk_port_params->n_pipes_per_subport = qos_params->n_pipes_per_subport; + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) + dpdk_port_params->qsize[i] = qos_queue_size_get( + qos_params->qsize[i], + qos_params->qsize_type, + qos_params->rate); + for (i = 0; i < qos_params->n_pipe_profiles; i++) { + struct rte_sched_pipe_params *to = &pipe_profiles[i]; + struct qos_pipe_params *from = + qos_params->pipe_profiles + i; + struct qos_red_pipe_params *wred_params = NULL; + + to->tc_period = from->shaper.tc_period; + to->tb_size = from->shaper.tb_size; +#ifdef RTE_SCHED_SUBPORT_TC_OV + to->tc_tc_ov_weight = from->shaper.tc_ov_weight; +#endif + to->tb_rate = from->shaper.tb_rate; + for (j = 0; j < RTE_SCHED_QUEUES_PER_PIPE; j++) + to->wrr_weights[j] = from->wrr_weights[j]; + for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) + to->tc_rate[j] = from->shaper.tc_rate[j]; + SLIST_FOREACH(wred_params, &from->red_head, list) { + struct rte_red_pipe_params *qred_info; + int err, k; + + qred_info = rte_red_alloc_q_params(to, + wred_params->qindex); + if (!qred_info) + return -ENOMEM; + for (k = 0; k < wred_params->red_q_params.num_maps; + k++) { + struct qos_red_q_params *params; + params = &wred_params->red_q_params; + + uint32_t wred_min_th = 0; + uint32_t wred_max_th = 0; + qos_wred_threshold_get(¶ms->qparams[k], + from->shaper.tb_rate, + &wred_min_th, &wred_max_th); + + err = rte_red_init_q_params( + &qred_info->red_q_params, + wred_max_th, + wred_min_th, + params->qparams[k].maxp_inv, + params->dscp_set[k], + params->grp_names[k]); + if (err < 0) + return -ENOMEM; + qred_info->red_q_params.qparams[k].wq_log2 = + params->qparams[k].wq_log2; + } + } + } + return 0; +} + +static void qos_dpdk_free_params(struct rte_sched_port_params *dpdk_port_params) +{ + unsigned int i; + + for (i = 0; i < dpdk_port_params->n_pipe_profiles; i++) { + struct rte_sched_pipe_params *pp = + dpdk_port_params->pipe_profiles + i; + rte_red_free_q_params(pp, i); + } + free(dpdk_port_params->pipe_profiles); +} + /* Allocate and initialize a handle to QoS scheduler. - * Only called by master thread. + * Only called by main thread. */ int qos_dpdk_start(struct ifnet *ifp, struct sched_info *qinfo, uint64_t bps, uint16_t max_pkt_len) @@ -343,6 +491,18 @@ int qos_dpdk_start(struct ifnet *ifp, struct sched_info *qinfo, unsigned int subport, pipe; int ret; uint32_t q_array_size; + struct rte_sched_port_params dpdk_port_params = {0}; + const uint32_t max_burst_size = QOS_MAX_BURST_SIZE_DPDK; + + if (enable_transmit_thread(ifp->if_port) < 0) { + DP_DEBUG(QOS_DP, ERR, DATAPLANE, + "Transmit thread setup failed on %s, portid %u\n", + ifp->if_name, ifp->if_port); + qinfo->enabled = false; + return -ENODEV; + } + + ifp->qos_software_fwd = 1; /* * Allow subports to inherit their queue sizes from the port, and @@ -353,7 +513,14 @@ int qos_dpdk_start(struct ifnet *ifp, struct sched_info *qinfo, struct subport_info *sinfo = &qinfo->subport[subport]; q_array_size += qos_sched_subport_qsize(&qinfo->port_params, - sinfo->qsize); + sinfo); + + /* + * If we've received a rate auto we use the reported + * interface speed as the subport rate. + */ + if (sinfo->auto_speed) + qos_abs_rate_save(&sinfo->subport_rate, bps); /* * Establish subport rates before checking pipes so that the @@ -361,29 +528,46 @@ int qos_dpdk_start(struct ifnet *ifp, struct sched_info *qinfo, */ qos_sched_subport_params_check( &sinfo->params, &sinfo->subport_rate, - sinfo->sp_tc_rates.tc_rate, max_pkt_len, bps); + sinfo->sp_tc_rates.tc_rate, max_pkt_len, + max_burst_size, bps, qinfo); } - qos_sched_pipe_check(qinfo, max_pkt_len, bps); + qos_sched_pipe_check(qinfo, max_pkt_len, max_burst_size, bps); - port = rte_sched_port_config_v2(&qinfo->port_params, q_array_size); + if (qos_dpdk_setup_params(ifp, qinfo, &dpdk_port_params)) { + qos_dpdk_free_params(&dpdk_port_params); + DP_DEBUG(QOS_DP, ERR, DATAPLANE, + "QoS DPDK config setup failed\n"); + goto out_disable_tx; + } + + port = rte_sched_port_config_v2(&dpdk_port_params, q_array_size); if (port == NULL) { DP_DEBUG(QOS_DP, ERR, DATAPLANE, "QoS config port failed\n"); - return -1; + qos_dpdk_free_params(&dpdk_port_params); + goto out_disable_tx; } for (subport = 0; subport < qinfo->n_subports; subport++) { struct subport_info *sinfo = &qinfo->subport[subport]; - struct rte_sched_subport_params *params = &sinfo->params; + struct qos_shaper_conf *qos_params = &sinfo->params; + struct rte_sched_subport_params dpdk_params; uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + struct rte_red_params + dpdk_red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE] + [RTE_COLORS]; int i; for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) - qsize[i] = (uint16_t)sinfo->qsize[i]; + qsize[i] = (uint16_t)qos_sp_qsize_get( + &qinfo->port_params, sinfo, i); - ret = rte_sched_subport_config_v2(port, subport, params, - &qsize[0], sinfo->red_params); + memcpy(&dpdk_params, qos_params, sizeof(*qos_params)); + qos_copy_red_params(dpdk_red_params, sinfo); + + ret = rte_sched_subport_config_v2(port, subport, &dpdk_params, + &qsize[0], dpdk_red_params); if (ret != 0) { DP_DEBUG(QOS_DP, ERR, DATAPLANE, "Qos config subport %u failed: %d\n", @@ -396,7 +580,7 @@ int qos_dpdk_start(struct ifnet *ifp, struct sched_info *qinfo, ret = rte_sched_pipe_config_v2(port, subport, pipe, profile, - &qinfo->port_params); + &dpdk_port_params); if (ret != 0) { DP_DEBUG(QOS_DP, ERR, DATAPLANE, "Qos config pipe subport %u pipe %u" @@ -410,7 +594,7 @@ int qos_dpdk_start(struct ifnet *ifp, struct sched_info *qinfo, npf_cfg_commit_all(); } - /* Use RCU to set the pointer because changed by master thread + /* Use RCU to set the pointer because changed by main thread * but referenced by Tx thread */ DP_DEBUG(QOS_DP, DEBUG, DATAPLANE, "QoS on port %s enabled\n", @@ -418,14 +602,19 @@ int qos_dpdk_start(struct ifnet *ifp, struct sched_info *qinfo, old_port = qinfo->dev_info.dpdk.port; rcu_assign_pointer(qinfo->dev_info.dpdk.port, port); defer_rcu(qos_dpdk_port_free_rcu, old_port); + qos_dpdk_free_params(&dpdk_port_params); return 0; out_free_sched: rte_sched_port_free(port); + qos_dpdk_free_params(&dpdk_port_params); + out_disable_tx: + ifp->qos_software_fwd = 0; + disable_transmit_thread(ifp->if_port); return -1; } -int qos_dpdk_stop(__unused struct ifnet *ifp, struct sched_info *qinfo) +int qos_dpdk_stop(struct ifnet *ifp, struct sched_info *qinfo) { struct rte_sched_port *port = qinfo->dev_info.dpdk.port; @@ -435,6 +624,9 @@ int qos_dpdk_stop(__unused struct ifnet *ifp, struct sched_info *qinfo) rcu_assign_pointer(qinfo->dev_info.dpdk.port, NULL); defer_rcu(qos_dpdk_port_free_rcu, port); + ifp->qos_software_fwd = 0; + disable_transmit_thread(ifp->if_port); + return 0; } @@ -450,7 +642,7 @@ static int qos_npf_classify(struct ifnet *ifp, const struct sched_info *qinfo, struct rte_mbuf **m) { - uint16_t ether_type = ethtype(*m, ETHER_TYPE_VLAN); + uint16_t ether_type = ethtype(*m, RTE_ETHER_TYPE_VLAN); uint32_t subport, pipe = 0, q = DEFAULT_Q; npf_result_t result = { .decision = NPF_DECISION_PASS }; @@ -474,7 +666,7 @@ int qos_npf_classify(struct ifnet *ifp, const struct sched_info *qinfo, if (npf_active(npf_config, NPF_QOS)) { result = npf_hook_notrack(npf_get_ruleset(npf_config, NPF_RS_QOS), m, ifp, PFIL_OUT, 0, - ether_type); + ether_type, NULL); if (result.tag_set) pipe = result.tag; } @@ -496,9 +688,9 @@ int qos_npf_classify(struct ifnet *ifp, const struct sched_info *qinfo, if (vlan != 0 && !qmap->dscp_enabled && qmap->pcp_enabled) { q = qmap->pcp2q[pcp]; } else { - if (ether_type == htons(ETHER_TYPE_IPv4)) + if (ether_type == htons(RTE_ETHER_TYPE_IPV4)) dscp = ip_dscp_get(iphdr(*m)); - else if (ether_type == htons(ETHER_TYPE_IPv6)) + else if (ether_type == htons(RTE_ETHER_TYPE_IPV6)) dscp = ip6_dscp_get(ip6hdr(*m)); /* @@ -523,7 +715,7 @@ int qos_npf_classify(struct ifnet *ifp, const struct sched_info *qinfo, rte_sched_port_pkt_write_v2(*m, subport, pipe, qmap_to_tc(q), qmap_to_wrr(q), - e_RTE_METER_GREEN, dscp); + RTE_COLOR_GREEN, dscp); return result.decision; } @@ -583,6 +775,5 @@ int qos_sched(struct ifnet *ifp, struct sched_info *qinfo, /* Get what is available to send */ if (space > 0) return rte_sched_port_dequeue(port, deq_pkts, space); - else - return 0; + return 0; } diff --git a/src/qos_ext_buf_monitor.c b/src/qos_ext_buf_monitor.c new file mode 100644 index 00000000..e434d0f4 --- /dev/null +++ b/src/qos_ext_buf_monitor.c @@ -0,0 +1,457 @@ +/* + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include +#include +#include +#include + +#include "event.h" +#include "fal.h" +#include "fal_plugin.h" +#include "qos.h" +#include "qos_ext_buf_monitor.h" +#include "vplane_debug.h" +#include "vplane_log.h" + +enum qos_ext_buf_msg_type { + EXT_BUF_MSG_MIBINIT = 0, + EXT_BUF_MSG_CLEAR, + EXT_BUF_MSG_WARNING, + EXT_BUF_MSG_ALERT, + EXT_BUF_MSG_UPDATE, + EXT_BUF_MSG_NONE +}; + +struct qos_ext_buf_notification_set { + enum qos_ext_buf_evt_notify_mode mode; + uint32_t max_samples; + uint32_t max_periods; +}; + +struct qos_external_buffer_congest_stats buf_stats; +static struct rte_timer qos_external_buf_timer; + +static const struct qos_ext_buf_notification_set +notifi_mode_set[EXT_BUF_EVT_NOTIFY_MODE_NUM] = { + {EXT_BUF_EVT_NOTIFY_MODE_TEN_SEC, 1, 3}, + {EXT_BUF_EVT_NOTIFY_MODE_MINUTE, 6, 3}, + {EXT_BUF_EVT_NOTIFY_MODE_HOUR, 360, 1} +}; + +static const char * const notification_tag[] = { + "MIBINIT", "CLEAR", "WARNING", "ALERT", "UPDATE" +}; + +static uint32_t qos_external_buf_counter_ids[] = { + FAL_QOS_EXTERNAL_BUFFER_COUNTER_ID, + FAL_QOS_EXTERNAL_BUFFER_PKT_REJECT_COUNTER_ID, + FAL_QOS_EXTERNAL_BUFFER_MAX_ID +}; + +static pthread_mutex_t mtx; + +static void +qos_ext_buf_stats_data_init(void) +{ + time_t t = time(NULL); + + buf_stats.initial_tm = *localtime(&t); + buf_stats.prev_sample_idx = EXT_BUF_STATUS_STATS_CNT - 1; + pthread_mutex_init(&mtx, NULL); +} + +static int +qos_ext_buf_state_evt_compare(enum qos_ext_buf_state state, + enum qos_ext_buf_event event) +{ + switch (state) { + case EXT_BUF_S_THRESHOLD_ONLY: + return event < EXT_BUF_EVT_THRESHOLD_ONLY; + case EXT_BUF_S_REJECTPKT_ONLY: + return event < EXT_BUF_EVT_REJECTPKT_ONLY; + case EXT_BUF_S_THRESHOLD_REJECTPKT: + return event < EXT_BUF_EVT_THRESHOLD_REJECTPKT; + default: + break; + } + return 0; +} + +static int +qos_ext_buf_send_notification( + struct qos_external_buffer_congest_stats *stats, + enum qos_ext_buf_msg_type msg_type) +{ + if (!stats) + return -EINVAL; + + zmsg_t *msg = zmsg_new(); + + if (!msg) + return -ENOMEM; + + if (zmsg_addstr(msg, EXT_BUF_ZMSG_QUEUE) < 0 || + zmsg_addstr(msg, notification_tag[msg_type]) < 0) + goto err; + + if (msg_type == EXT_BUF_MSG_MIBINIT) { + if (zmsg_addu32(msg, buf_stats.max_buf_desc) < 0) + goto err; + } else if (msg_type == EXT_BUF_MSG_UPDATE) { + struct qos_external_buffer_sample *sample = + &stats->buf_samples[stats->cur_sample_idx]; + if (zmsg_addu32(msg, buf_stats.max_buf_desc - + sample->ext_buf_free) < 0 || + zmsg_addu32(msg, stats->rejected_pkt_cnt)) + goto err; + } else if (msg_type == EXT_BUF_MSG_CLEAR) { + if (zmsg_addu32(msg, MAX_CONSECUTIVE_SAMPLES_ON_CLEAR) < 0) + goto err; + } else { + /* For tag WARNING or ALERT */ + struct qos_external_buffer_sample *sample = + &stats->buf_samples[stats->cur_sample_idx]; + struct qos_ext_buf_notify_period *period_data = + &stats->cur_state.period_data; + uint32_t *cnt = period_data->results_cnt; + + if (zmsg_addu32(msg, buf_stats.buf_cfg_threshold) < 0 || + zmsg_addu32(msg, + cnt[EXT_BUF_SPL_R_THRESHOLD_ONLY]) < 0 || + zmsg_addu32(msg, + cnt[EXT_BUF_SPL_R_REJECTPKT_ONLY]) < 0 || + zmsg_addu32(msg, + cnt[EXT_BUF_SPL_R_THRESHOLD_REJECTPKT]) < 0 || + zmsg_addu32(msg, period_data->notify_mode) < 0) + goto err; + + if (period_data->notify_mode == + EXT_BUF_EVT_NOTIFY_MODE_TEN_SEC) { + if (zmsg_addu32(msg, sample->utilization_rate) < 0) + goto err; + } + } + return dp_send_event_to_vplaned(msg); +err: + RTE_LOG(ERR, DATAPLANE, + "Could not send QoS ext buffer congestion notification.\n"); + zmsg_destroy(&msg); + return -EINVAL; +} + +static void +qos_ext_buf_exec_state_action(struct qos_ext_buf_state_record *cur_state) +{ + if (!cur_state || !buf_stats.buf_cfg_threshold) + return; + + if (cur_state->state == EXT_BUF_S_CLEAR) + qos_ext_buf_send_notification(&buf_stats, EXT_BUF_MSG_CLEAR); + else if (cur_state->state == EXT_BUF_S_THRESHOLD_ONLY) { + cur_state->msg_warning_cnt++; + qos_ext_buf_send_notification(&buf_stats, EXT_BUF_MSG_WARNING); + } else if (cur_state->state == EXT_BUF_S_REJECTPKT_ONLY) { + cur_state->msg_alert_cnt++; + qos_ext_buf_send_notification(&buf_stats, EXT_BUF_MSG_ALERT); + } else if (cur_state->state == EXT_BUF_S_THRESHOLD_REJECTPKT) { + cur_state->msg_alert_cnt++; + qos_ext_buf_send_notification(&buf_stats, EXT_BUF_MSG_ALERT); + } +} + +int +qos_ext_buf_state_transit(struct qos_ext_buf_state_record *cur_state, + enum qos_ext_buf_event evt) +{ + if (!cur_state || evt == EXT_BUF_EVT_NONE) + return 0; + + enum qos_ext_buf_state new_state = cur_state->state; + + if (evt == EXT_BUF_EVT_CLEAR) + new_state = EXT_BUF_S_CLEAR; + else { + switch (cur_state->state) { + case EXT_BUF_S_CLEAR: + if (evt == EXT_BUF_EVT_THRESHOLD_ONLY) + new_state = EXT_BUF_S_THRESHOLD_ONLY; + else if (evt == EXT_BUF_EVT_REJECTPKT_ONLY) + new_state = EXT_BUF_S_REJECTPKT_ONLY; + else if (evt == EXT_BUF_EVT_THRESHOLD_REJECTPKT) + new_state = EXT_BUF_S_THRESHOLD_REJECTPKT; + break; + case EXT_BUF_S_THRESHOLD_ONLY: + if (evt == EXT_BUF_EVT_REJECTPKT_ONLY) + new_state = EXT_BUF_S_REJECTPKT_ONLY; + else if (evt == EXT_BUF_EVT_THRESHOLD_REJECTPKT) + new_state = EXT_BUF_S_THRESHOLD_REJECTPKT; + break; + case EXT_BUF_S_REJECTPKT_ONLY: + if (evt == EXT_BUF_EVT_THRESHOLD_REJECTPKT) + new_state = EXT_BUF_S_THRESHOLD_REJECTPKT; + break; + case EXT_BUF_S_THRESHOLD_REJECTPKT: + default: + break; + } + } + + if (cur_state->state != new_state) { + cur_state->state = new_state; + qos_ext_buf_exec_state_action(cur_state); + if (new_state != EXT_BUF_S_CLEAR) { + cur_state->bad_periods_in_notification_mode = 1; + cur_state->consecutive_periods_cnt = 1; + } else { + cur_state->bad_periods_in_notification_mode = 0; + cur_state->consecutive_periods_cnt = 0; + } + cur_state->consecutive_good_samples_cnt = 0; + memset(&cur_state->period_data, 0, + sizeof(cur_state->period_data)); + return 1; + } + return 0; +} + +enum qos_ext_buf_event +qos_ext_buf_get_evt_by_sample_result( + struct qos_ext_buf_state_record *cur_state, + enum qos_ext_buf_sample_result sample_result) +{ + if (!cur_state) + return EXT_BUF_EVT_NONE; + + if (sample_result == EXT_BUF_SPL_R_THRESHOLD_ONLY) + return EXT_BUF_EVT_THRESHOLD_ONLY; + if (sample_result == EXT_BUF_SPL_R_REJECTPKT_ONLY) + return EXT_BUF_EVT_REJECTPKT_ONLY; + if (sample_result == EXT_BUF_SPL_R_THRESHOLD_REJECTPKT) + return EXT_BUF_EVT_THRESHOLD_REJECTPKT; + if (sample_result == EXT_BUF_SPL_R_NONE) { + if (cur_state->state == EXT_BUF_S_CLEAR) + return EXT_BUF_EVT_NONE; + cur_state->consecutive_good_samples_cnt++; + if (cur_state->consecutive_good_samples_cnt >= + MAX_CONSECUTIVE_SAMPLES_ON_CLEAR) + return EXT_BUF_EVT_CLEAR; + } + return EXT_BUF_EVT_NONE; +} + +static void +qos_ext_buf_tune_notification_rate(struct qos_ext_buf_state_record *cur_state, + struct qos_external_buffer_sample *sample, + enum qos_ext_buf_event evt) +{ + if (!cur_state || !sample) + return; + + enum qos_ext_buf_evt_notify_mode *mode = + &cur_state->period_data.notify_mode; + uint32_t max_periods = notifi_mode_set[*mode].max_periods; + int update_notify_mode = 0; + + if (sample->result != EXT_BUF_SPL_R_NONE) { + cur_state->consecutive_good_samples_cnt = 0; + /* Because current state event is more severe, + * bypass less severe event + */ + if (!qos_ext_buf_state_evt_compare(cur_state->state, evt)) + cur_state->period_data.bad_sample_in_period++; + } + + /* check if end of notification period is reached */ + if (cur_state->period_data.samples_cnt < + notifi_mode_set[*mode].max_samples) + return; + + cur_state->consecutive_periods_cnt++; + + if (cur_state->period_data.bad_sample_in_period > 0) { + cur_state->bad_periods_in_notification_mode++; + qos_ext_buf_exec_state_action(cur_state); + + update_notify_mode = + (cur_state->consecutive_periods_cnt >= max_periods) || + (cur_state->consecutive_periods_cnt != + cur_state->bad_periods_in_notification_mode); + } else { + update_notify_mode = + cur_state->consecutive_periods_cnt >= (max_periods/2); + } + + /* one sampling period is over, reset data */ + cur_state->period_data.samples_cnt = 0; + cur_state->period_data.bad_sample_in_period = 0; + memset(&cur_state->period_data.results_cnt, 0, + sizeof(cur_state->period_data.results_cnt)); + + /* Update notification mode for samples in next period */ + if (update_notify_mode) { + cur_state->bad_periods_in_notification_mode = 0; + cur_state->consecutive_periods_cnt = 0; + *mode = (*mode == EXT_BUF_EVT_NOTIFY_MODE_TEN_SEC) ? + EXT_BUF_EVT_NOTIFY_MODE_MINUTE : + ((*mode == EXT_BUF_EVT_NOTIFY_MODE_MINUTE) ? + EXT_BUF_EVT_NOTIFY_MODE_HOUR : *mode); + } +} + +void +qos_ext_buf_schedule_state_machine( + struct qos_ext_buf_state_record *cur_state, + struct qos_external_buffer_sample *sample) +{ + enum qos_ext_buf_event event = 0; + + if (!cur_state || !sample) + return; + + if (cur_state->state == EXT_BUF_S_CLEAR && + sample->result == EXT_BUF_SPL_R_NONE) + return; + + event = qos_ext_buf_get_evt_by_sample_result(cur_state, + sample->result); + + cur_state->period_data.samples_cnt++; + cur_state->period_data.results_cnt[sample->result]++; + + if (!qos_ext_buf_state_transit(cur_state, event)) + qos_ext_buf_tune_notification_rate(cur_state, sample, event); +} + +static void +qos_ext_buf_process_sample_value(uint64_t buf_free, uint64_t rejected_pkt) +{ + struct qos_external_buffer_sample *sample = + &buf_stats.buf_samples[buf_stats.cur_sample_idx]; + time_t t = time(NULL); + struct tm smp_tm = *localtime(&t); + int e1, e2, e3; + + pthread_mutex_lock(&mtx); + + buf_stats.results_cnt[sample->result]++; + buf_stats.rejected_pkt_cnt += rejected_pkt; + buf_stats.total_samples_cnt++; + sample->sample_tm = smp_tm; + sample->ext_buf_free = buf_free; + sample->ext_buf_pkt_reject = rejected_pkt; + sample->utilization_rate = + 100 - (buf_free * 100) / buf_stats.max_buf_desc; + e1 = sample->utilization_rate > buf_stats.buf_cfg_threshold; + e2 = rejected_pkt > 0; + e3 = e1 && e2; + + sample->result = e3 ? EXT_BUF_SPL_R_THRESHOLD_REJECTPKT : + (e2 ? EXT_BUF_SPL_R_REJECTPKT_ONLY : + (e1 ? EXT_BUF_SPL_R_THRESHOLD_ONLY : EXT_BUF_SPL_R_NONE)); + /* update SNMP MIB if values change */ + if ((buf_stats.buf_samples[buf_stats.prev_sample_idx].ext_buf_free != + buf_free) || rejected_pkt) + qos_ext_buf_send_notification(&buf_stats, EXT_BUF_MSG_UPDATE); + + qos_ext_buf_schedule_state_machine(&buf_stats.cur_state, sample); + + buf_stats.prev_sample_idx = buf_stats.cur_sample_idx; + buf_stats.cur_sample_idx = (buf_stats.cur_sample_idx + 1) % + EXT_BUF_STATUS_STATS_CNT; + + pthread_mutex_unlock(&mtx); +} + +void +qos_external_buffer_congestion_tmr_hdlr(struct rte_timer *tim __rte_unused, + void *arg __rte_unused) +{ + int ret; + uint64_t values[FAL_QOS_EXTERNAL_BUFFER_MAX_COUNTER] = { 0 }; + + if (buf_stats.total_samples_cnt == 0) + qos_ext_buf_send_notification(&buf_stats, EXT_BUF_MSG_MIBINIT); + + ret = fal_qos_get_counters(qos_external_buf_counter_ids, + ARRAY_SIZE(qos_external_buf_counter_ids), values); + + if (ret) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "FAL failed to get external buffer counters, status: %d\n", + ret); + return; + } + qos_ext_buf_process_sample_value( + values[FAL_QOS_EXTERNAL_BUFFER_DESC_FREE], + values[FAL_QOS_EXTERNAL_BUFFER_PKT_REJECT]); +} + +void +qos_external_buf_monitor_init(void) +{ + int ret; + struct fal_attribute_t max_buffers; + + if (!fal_plugins_present()) { + DP_DEBUG(QOS, DEBUG, DATAPLANE, + "FAL plugins not present, external buffer monitor init failed."); + return; + } + + max_buffers.id = FAL_SWITCH_ATTR_MAX_BUF_DESCRIPTOR; + ret = fal_get_switch_attrs(1, &max_buffers); + + if (ret) { + DP_DEBUG(QOS, DEBUG, DATAPLANE, + "FAL failed to get max buffer descriptors, status: %d\n", + ret); + return; + } + + qos_ext_buf_stats_data_init(); + /* TODO: need to clarify if 2 BDBs are reserved in Broadcom TM */ + buf_stats.max_buf_desc = max_buffers.value.u32 - 2; + + rte_timer_init(&qos_external_buf_timer); +} + +void +qos_external_buf_threshold_interval(unsigned int value) +{ + buf_stats.buf_cfg_threshold = value; + if (!buf_stats.monitor_started && buf_stats.buf_cfg_threshold) { + buf_stats.monitor_started = 1; + rte_timer_reset(&qos_external_buf_timer, + EXT_BUF_STATUS_SAMPLE_INTERVAL * rte_get_timer_hz(), + PERIODICAL, rte_get_master_lcore(), + qos_external_buffer_congestion_tmr_hdlr, NULL); + } +} + +int +qos_ext_buf_get_stats(struct qos_external_buffer_congest_stats *stats) +{ + if (!stats) + return 0; + + pthread_mutex_lock(&mtx); + + memcpy(stats, &buf_stats, sizeof(buf_stats)); + + pthread_mutex_unlock(&mtx); + + return 1; +} + +int qos_ext_buf_get_threshold(uint32_t *threshold) +{ + if (!buf_stats.monitor_started) + return 0; + + *threshold = buf_stats.buf_cfg_threshold; + return 1; +} diff --git a/src/qos_ext_buf_monitor.h b/src/qos_ext_buf_monitor.h new file mode 100644 index 00000000..971c4113 --- /dev/null +++ b/src/qos_ext_buf_monitor.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef QOS_EXT_BUF_MONITOR_H +#define QOS_EXT_BUF_MONITOR_H + +#include + +#define EXT_BUF_STATUS_STATS_CNT 6 +#define EXT_BUF_STATUS_SAMPLE_INTERVAL 10 /* 10 sec */ +#define MAX_CONSECUTIVE_SAMPLES_ON_CLEAR 360 /* 360 consecutive samples */ + +#define EXT_BUF_ZMSG_QUEUE "QosExtBufCongestion" + +enum qos_ext_buf_evt_notify_mode { + EXT_BUF_EVT_NOTIFY_MODE_TEN_SEC = 0, + EXT_BUF_EVT_NOTIFY_MODE_MINUTE, + EXT_BUF_EVT_NOTIFY_MODE_HOUR, + EXT_BUF_EVT_NOTIFY_MODE_NUM +}; + +enum qos_ext_buf_event { + EXT_BUF_EVT_NONE = 0, + EXT_BUF_EVT_THRESHOLD_ONLY, + EXT_BUF_EVT_REJECTPKT_ONLY, + EXT_BUF_EVT_THRESHOLD_REJECTPKT, + EXT_BUF_EVT_CLEAR +}; + +enum qos_ext_buf_state { + EXT_BUF_S_CLEAR = 0, + EXT_BUF_S_THRESHOLD_ONLY, + EXT_BUF_S_REJECTPKT_ONLY, + EXT_BUF_S_THRESHOLD_REJECTPKT +}; + +enum qos_ext_buf_sample_result { + EXT_BUF_SPL_R_NONE = 0, + EXT_BUF_SPL_R_THRESHOLD_ONLY, + EXT_BUF_SPL_R_REJECTPKT_ONLY, + EXT_BUF_SPL_R_THRESHOLD_REJECTPKT, + EXT_BUF_SPL_R_NUM +}; + +struct qos_ext_buf_notify_period { + enum qos_ext_buf_evt_notify_mode notify_mode; + uint16_t samples_cnt; /* total samples in a period */ + uint16_t bad_sample_in_period; + uint32_t results_cnt[EXT_BUF_SPL_R_NUM]; +}; + +struct qos_ext_buf_state_record { + enum qos_ext_buf_state state; + uint32_t consecutive_good_samples_cnt; + uint32_t consecutive_periods_cnt; + uint32_t bad_periods_in_notification_mode; + uint32_t msg_warning_cnt; + uint32_t msg_alert_cnt; + struct qos_ext_buf_notify_period period_data; +}; + +struct qos_external_buffer_sample { + uint64_t ext_buf_free; + uint64_t ext_buf_pkt_reject; + uint32_t utilization_rate; + enum qos_ext_buf_sample_result result; + struct tm sample_tm; +}; + +struct qos_external_buffer_congest_stats { + struct qos_external_buffer_sample + buf_samples[EXT_BUF_STATUS_STATS_CNT]; + int cur_sample_idx; + int prev_sample_idx; + struct qos_ext_buf_state_record cur_state; + uint64_t rejected_pkt_cnt; + uint64_t results_cnt[EXT_BUF_SPL_R_NUM]; + uint64_t total_samples_cnt; + uint32_t max_buf_desc; + uint32_t buf_cfg_threshold; + int monitor_started; + struct tm initial_tm; +}; + +int qos_ext_buf_state_transit( + struct qos_ext_buf_state_record *cur_state, + enum qos_ext_buf_event evt); +enum qos_ext_buf_event qos_ext_buf_get_evt_by_sample_result( + struct qos_ext_buf_state_record *cur_state, + enum qos_ext_buf_sample_result sample_result); +void qos_ext_buf_schedule_state_machine( + struct qos_ext_buf_state_record *cur_state, + struct qos_external_buffer_sample *sample); +int qos_ext_buf_get_stats(struct qos_external_buffer_congest_stats *stats); +int qos_ext_buf_get_threshold(uint32_t *threshold); +void qos_external_buffer_congestion_tmr_hdlr( + struct rte_timer *tim __rte_unused, + void *arg __rte_unused); + +void qos_external_buf_monitor_init(void); +void qos_external_buf_threshold_interval(unsigned int value); + +#endif /* QOS_EXT_BUF_MONITOR_H */ diff --git a/src/qos_hw.c b/src/qos_hw.c index 714e3b8c..ceee03ee 100644 --- a/src/qos_hw.c +++ b/src/qos_hw.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -17,14 +17,29 @@ #include "netinet6/ip6_funcs.h" #include "npf/config/npf_config.h" #include "npf_shim.h" +#include "npf/npf_rule_gen.h" #include "vplane_debug.h" #include "vplane_log.h" #include "ether.h" #include "fal.h" +#include "if_var.h" +#include "dp_event.h" _Static_assert(MAX_DSCP == FAL_QOS_MAP_DSCP_VALUES, "max DSCP value mismatch"); _Static_assert(MAX_PCP == FAL_QOS_MAP_PCP_VALUES, "max PCP value mismatch"); +static struct egress_map_subport_info * +qos_egress_map_subport_get_or_create(struct ifnet *ifp, + struct ifnet *parent_ifp, bool is_sub_if); +static int +qos_egress_map_subport_delete(struct ifnet *ifp, struct ifnet *parent_ifp, + bool is_sub_if); + +uint64_t qos_hw_check_rate(uint64_t rate, uint64_t parent_bw __unused) +{ + return rate; +} + static fal_object_t qos_hw_get_map(uint32_t port_obj_id, uint32_t subport_id, uint32_t pipe_id, enum qos_obj_hw_type hw_type) @@ -117,37 +132,832 @@ qos_hw_get_wred(uint32_t port_obj_id, uint32_t subport_id, uint32_t pipe_id, return wred_obj; } -void qos_hw_dscp_resgrp_json(struct sched_info *qinfo, uint32_t subport, - uint32_t pipe, uint32_t tc, uint32_t q, - uint64_t *random_dscp_drop, json_writer_t *wr) +void qos_hw_dscp_resgrp_json(struct sched_info *qinfo, uint32_t subport, + uint32_t pipe, uint32_t tc, uint32_t q, + uint64_t *random_dscp_drop, json_writer_t *wr) +{ + int i, num_maps; + + struct subport_info *sinfo = qinfo->subport + subport; + uint8_t profile_id = sinfo->profile_map[pipe]; + struct qos_pipe_params *prof = + &qinfo->port_params.pipe_profiles[profile_id]; + uint8_t qindex = (tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) + q; + struct qos_red_pipe_params *wred; + + wred = qos_red_find_q_params(prof, qindex); + if (!wred) + return; + + num_maps = wred->red_q_params.num_maps; + if (num_maps) { + char *grp_name; + + jsonw_name(wr, "wred_map"); + jsonw_start_array(wr); + for (i = 0; i < NUM_DPS; i++) { + if (!(wred->red_q_params.dps_in_use & (1 << i))) + continue; + grp_name = wred->red_q_params.grp_names[i]; + if (grp_name == NULL) + break; + jsonw_start_object(wr); + jsonw_string_field(wr, "res_grp", grp_name); + jsonw_uint_field(wr, "random_dscp_drop", + random_dscp_drop[i]); + jsonw_end_object(wr); + } + jsonw_end_array(wr); + } +} + +static void qos_hw_setup_maplist(struct fal_qos_map_list_t *map_list, + struct ingress_designator *des, int ind, + bool is_dscp) +{ + int i, k, l; + uint64_t j; + int max_entries = is_dscp ? FAL_QOS_MAP_DSCP_VALUES : + FAL_QOS_MAP_PCP_VALUES; + + for (i = 0; i < NUM_DPS; i++) { + if (!(des->dps_in_use & (1 << i))) + continue; + for (k = 0, j = 1 ; k < max_entries ; j <<= 1, k++) { + if (des->mask[i] & j) { + l = map_list->count++; + if (is_dscp) + map_list->list[l].key.dscp = k; + else + map_list->list[l].key.dot1p = k; + map_list->list[l].value.des = ind; + switch (i) { + case 0: + map_list->list[l].value.colour = + FAL_PACKET_COLOUR_GREEN; + break; + case 1: + map_list->list[l].value.colour = + FAL_PACKET_COLOUR_YELLOW; + break; + case 2: + map_list->list[l].value.colour = + FAL_PACKET_COLOUR_RED; + break; + } + } + } + } +} + +static int qos_hw_setup_des2q(struct queue_map *qmap, uint8_t *des2q) +{ + int cp, des; + + for (cp = 0; cp < MAX_DSCP; cp++) { + if (!qos_qmap_to_des(qmap->dscp2q[cp], &des2q[0], &des)) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "map create, out of designators\n"); + return -EINVAL; + } + } + return 0; +} + +void qos_hw_show_legacy_map(struct queue_map *qmap, json_writer_t *wr) +{ + uint8_t cp; + int des; + uint8_t des2q[INGRESS_DESIGNATORS] = { 0 }; + struct ingress_designator designation[INGRESS_DESIGNATORS] = {{ 0 }}; + uint8_t dp; + + for (cp = 0; cp < MAX_DSCP; cp++) { + dp = qmap_to_dp(qmap->dscp2q[cp]); + + if (!qos_qmap_to_des(qmap->dscp2q[cp], &des2q[0], &des)) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "show legacy map, out of designators\n"); + return; + } + designation[des].dps_in_use |= (1 << dp); + designation[des].mask[dp] |= (1ULL << cp); + } + + jsonw_name(wr, "ingress-maps"); + jsonw_start_array(wr); + + jsonw_start_object(wr); + jsonw_string_field(wr, "name", "legacy-map"); + jsonw_string_field(wr, "type", "dscp"); + jsonw_bool_field(wr, "system-default", true); + jsonw_name(wr, "map"); + jsonw_start_array(wr); + + for (des = 0; des < INGRESS_DESIGNATORS; des++) { + if (!designation[des].dps_in_use) + continue; + jsonw_start_object(wr); + jsonw_uint_field(wr, "designation", des); + jsonw_name(wr, "DPs"); + jsonw_start_array(wr); + for (dp = 0; dp < NUM_DPS; dp++) { + if (!(designation[des].dps_in_use & (1 << dp))) + continue; + jsonw_start_object(wr); + jsonw_uint_field(wr, "DP", dp); + jsonw_uint_field(wr, "pcp/mask", + designation[des].mask[dp]); + jsonw_end_object(wr); + } + jsonw_end_array(wr); /* DPs */ + jsonw_end_object(wr); + } + jsonw_end_array(wr); /* map */ + jsonw_end_object(wr); + jsonw_end_array(wr); /* ingress-maps */ +} + +static void qos_hw_ingressm_attrs(struct qos_ingress_map *map, + struct fal_qos_map_list_t *map_list) +{ + int i; + + for (i = 0; i < INGRESS_DESIGNATORS; i++) { + if (map->designation[i].dps_in_use) + qos_hw_setup_maplist(map_list, + &map->designation[i], + i, (map->type == INGRESS_DSCP)); + } +} + +static int qos_hw_ingressm_attach(unsigned int ifindex, unsigned int vlan, + struct qos_ingress_map *map) +{ + if (map->map_obj == FAL_QOS_NULL_OBJECT_ID) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Invalid ingress-map attach, not created %s\n", + map->name); + + return -ENOENT; + } + + if (!vlan) { + struct fal_attribute_t port_attr_list = { + .id = FAL_PORT_ATTR_QOS_INGRESS_MAP_ID, + .value.objid = map->map_obj + }; + fal_l2_upd_port(ifindex, &port_attr_list); + + DP_DEBUG(QOS_HW, DEBUG, DATAPLANE, + "Created ingress feature on if %u\n", ifindex); + + return 0; + } + + struct fal_attribute_t vlan_attr[] = { + { .id = FAL_VLAN_FEATURE_INTERFACE_ID, + .value.u32 = ifindex }, + { .id = FAL_VLAN_FEATURE_VLAN_ID, + .value.u16 = vlan }, + { .id = FAL_VLAN_FEATURE_ATTR_QOS_INGRESS_MAP_ID, + .value.objid = map->map_obj } + }; + int ret; + struct if_vlan_feat *vlan_feat; + struct ifnet *ifp = dp_ifnet_byifindex(ifindex); + + if (!ifp) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Failed to retrieve ifp for ingress feature %u\n", + ifindex); + + return -ENOENT; + } + + vlan_feat = if_vlan_feat_get(ifp, vlan); + if (!vlan_feat) { + ret = if_vlan_feat_create(ifp, vlan, FAL_NULL_OBJECT_ID); + if (ret) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Failed to create feature for if %s vlan %u\n", + ifp->if_name, vlan); + return ret; + } + vlan_feat = if_vlan_feat_get(ifp, vlan); + if (!vlan_feat) + return -ENOENT; + ret = fal_vlan_feature_create(ARRAY_SIZE(vlan_attr), vlan_attr, + &vlan_feat->fal_vlan_feat); + if (ret && ret != -EOPNOTSUPP) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Can not create vlan_feat for vlan %u fal %d\n", + vlan, ret); + if_vlan_feat_delete(ifp, vlan); + return ret; + } + } else { + ret = fal_vlan_feature_set_attr(vlan_feat->fal_vlan_feat, + &vlan_attr[2]); + if (ret) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Failed to add ingress map to if %s vlan %u\n", + ifp->if_name, vlan); + return ret; + } + } + + vlan_feat->refcount++; + + DP_DEBUG(QOS_HW, DEBUG, DATAPLANE, + "Created ingress feature on if %u vlan %u\n", + ifindex, vlan); + + return ret; +} + +static int qos_hw_ingressm_detach(unsigned int ifindex, unsigned int vlan) +{ + if (!vlan) { + struct fal_attribute_t port_attr_list[] = { + { .id = FAL_PORT_ATTR_QOS_INGRESS_MAP_ID, + .value.objid = FAL_NULL_OBJECT_ID } + }; + + fal_l2_upd_port(ifindex, &port_attr_list[0]); + DP_DEBUG(QOS_HW, DEBUG, DATAPLANE, + "Removed ingress feature on if %u\n", ifindex); + return 0; + } + + int ret; + struct if_vlan_feat *vlan_feat = NULL; + struct fal_attribute_t vlan_attr[1] = { + { .id = FAL_VLAN_FEATURE_ATTR_QOS_INGRESS_MAP_ID, + .value.objid = FAL_NULL_OBJECT_ID } + }; + struct ifnet *ifp = dp_ifnet_byifindex(ifindex); + if (!ifp) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Failed to retrieve ifp for ingress feat %u\n", + ifindex); + + return -ENOENT; + } + + vlan_feat = if_vlan_feat_get(ifp, vlan); + if (!vlan_feat) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Could not find vlan feat for intf %s vlan %d\n", + ifp->if_name, vlan); + return -ENOENT; + } + + ret = fal_vlan_feature_set_attr(vlan_feat->fal_vlan_feat, + vlan_attr); + if (ret && ret != -EOPNOTSUPP) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Could not remove vlan_feat for vlan %d in fal (%d)\n", + vlan, ret); + return ret; + } + + vlan_feat->refcount--; + + if (!vlan_feat->refcount) { + ret = fal_vlan_feature_delete(vlan_feat->fal_vlan_feat); + if (ret) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Could not destroy fal vlan feature obj" + " for %s vlan %d (%d)\n", + ifp->if_name, vlan, ret); + return ret; + } + + ret = if_vlan_feat_delete(ifp, vlan); + if (ret) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Could not destroy vlan feature obj for " + "%s vlan %d (%d)\n", + ifp->if_name, vlan, ret); + return ret; + } + } + + DP_DEBUG(QOS_HW, DEBUG, DATAPLANE, + "Deleted vlan ingress feature obj for %s vlan %u\n", + ifp->if_name, vlan); + + return 0; +} + +static int qos_hw_ingressm_config(struct qos_ingress_map *map, + bool create) +{ + if (!create) { + /* Make sure the attach went ok */ + if (map->map_obj != FAL_QOS_NULL_OBJECT_ID) { + fal_qos_del_map(map->map_obj); + map->map_obj = FAL_QOS_NULL_OBJECT_ID; + } + DP_DEBUG(QOS_HW, DEBUG, DATAPLANE, + "Deleted fal ingress map %s\n", map->name); + return 0; + } + + struct fal_qos_map_list_t map_list = {0}; + struct fal_attribute_t attr_list[] = { + { .id = FAL_QOS_MAP_ATTR_TYPE, + .value.u8 = FAL_QOS_MAP_TYPE_DSCP_TO_DESIGNATOR }, + { .id = FAL_QOS_MAP_ATTR_MAP_TO_VALUE_LIST, + .value.maplist = &map_list }, + { .id = FAL_QOS_MAP_ATTR_INGRESS_SYSTEM_DEFAULT, + .value.booldata = map->sysdef }, + }; + int ret; + + if (map->type == INGRESS_PCP) + attr_list[0].value.u8 = FAL_QOS_MAP_TYPE_DOT1P_TO_DESIGNATOR; + + qos_hw_ingressm_attrs(map, &map_list); + + if ((map->type == INGRESS_DSCP && + map_list.count != FAL_QOS_MAP_DSCP_VALUES) || + (map->type == INGRESS_PCP && + map_list.count != FAL_QOS_MAP_PCP_VALUES)) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Invalid map, not all values used %d\n", + map_list.count); + return -EINVAL; + } + + ret = fal_qos_new_map(FAL_QOS_NULL_OBJECT_ID, ARRAY_SIZE(attr_list), + attr_list, &map->map_obj); + + DP_DEBUG(QOS_HW, DEBUG, DATAPLANE, "Created ingress map %s\n", + map->name); + + return ret; +} + +fal_object_t qos_hw_get_att_egress_map(struct ifnet *ifp, unsigned int vlan) +{ + struct fal_attribute_t qos_map_attr; + int rv = 0; + + if (!ifp) + return FAL_NULL_OBJECT_ID; + + if (ifp->if_type == IFT_ETHER) { + qos_map_attr.id = FAL_ROUTER_INTERFACE_ATTR_EGRESS_QOS_MAP; + rv = if_get_l3_intf_attr(ifp, 1, &qos_map_attr); + if (rv != -EOPNOTSUPP && qos_map_attr.value.objid) + return qos_map_attr.value.objid; + } else { + if (!vlan) { + struct fal_attribute_t port_attr_list[] = { + { .id = FAL_PORT_ATTR_QOS_EGRESS_MAP_ID, + .value.objid = FAL_QOS_NULL_OBJECT_ID } + }; + if (fal_l2_get_attrs(ifp->if_index, 1, + &port_attr_list[0]) == 0) + return port_attr_list[0].value.objid; + + return FAL_NULL_OBJECT_ID; + } + + struct fal_attribute_t vlan_attr[1] = { + { .id = FAL_VLAN_FEATURE_ATTR_QOS_EGRESS_MAP_ID, + .value.objid = FAL_QOS_NULL_OBJECT_ID } + }; + + struct if_vlan_feat *vlan_feat = if_vlan_feat_get(ifp, vlan); + if (!vlan_feat) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Egress-map failed to retrieve intf %s vlan %d\n", + ifp->if_name, vlan); + return FAL_NULL_OBJECT_ID; + } + if (!fal_vlan_feature_get_attr(vlan_feat->fal_vlan_feat, 1, + &vlan_attr[0])) + return vlan_attr[0].value.objid; + } + return FAL_NULL_OBJECT_ID; +} + +fal_object_t qos_hw_get_att_ingress_map(struct ifnet *ifp, unsigned int vlan) +{ + if (!ifp) + return 0; + + if (!vlan) { + struct fal_attribute_t port_attr_list[] = { + { .id = FAL_PORT_ATTR_QOS_INGRESS_MAP_ID, + .value.objid = FAL_QOS_NULL_OBJECT_ID } + }; + if (fal_l2_get_attrs(ifp->if_index, 1, &port_attr_list[0]) == 0) + return port_attr_list[0].value.objid; + + return 0; + } + + struct fal_attribute_t vlan_attr[1] = { + { .id = FAL_VLAN_FEATURE_ATTR_QOS_INGRESS_MAP_ID, + .value.objid = FAL_QOS_NULL_OBJECT_ID } + }; + + struct if_vlan_feat *vlan_feat = if_vlan_feat_get(ifp, vlan); + if (!vlan_feat) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Ingress-map failed to retrieve intf %s vlan %d\n", + ifp->if_name, vlan); + return 0; + } + if (!fal_vlan_feature_get_attr(vlan_feat->fal_vlan_feat, 1, + &vlan_attr[0])) + return vlan_attr[0].value.objid; + + return 0; +} + +static void qos_hw_egressm_attrs(struct qos_mark_map *map, + struct fal_qos_map_list_t *map_list) +{ + int i; + int max_entries = ((map->type == EGRESS_DSCP) || + (map->type = EGRESS_DSCPGRP_DSCP)) ? + FAL_QOS_MAP_DSCP_VALUES : + FAL_QOS_MAP_DESIGNATION_VALUES; + uint64_t mask = ((map->type == EGRESS_DSCP) || + (map->type = EGRESS_DSCPGRP_DSCP)) ? + map->dscp_used : + map->des_used; + + map_list->des_used = map->des_used; + for (i = 0; i < max_entries; i++) { + if (mask & (1UL << i)) { + map_list->count++; + map_list->list[i].key.des = i; + if (map->type == EGRESS_DSCPGRP_DSCP) { + map_list->list[i].value.dscp = + map->pcp_value[i]; + } else if (map->type == EGRESS_DESIGNATION_PCP) { + map_list->list[i].value.dot1p = + map->pcp_value[i]; + } + } + } +} + +static int +qos_hw_if_set_egress_map(struct ifnet *ifp, struct qos_mark_map *map) +{ + struct fal_attribute_t l3_egr_map_attr = { + .id = FAL_ROUTER_INTERFACE_ATTR_EGRESS_QOS_MAP, + }; + int ret = 0; + struct ifnet *child_ifp = NULL; + struct ifnet *parent_ifp = NULL; + struct ifnet *temp_ifp = NULL; + struct egress_map_subport_info *egr_map_subport = NULL; + struct egress_map_subport_info *parent_egr_map_subport = NULL; + bool is_sub_if = false; + + if (!map) { + /* Delete case */ + l3_egr_map_attr.value.objid = FAL_QOS_NULL_OBJECT_ID; + RTE_LOG(ERR, DATAPLANE, + "%s Egress map object is NULL\n", + ifp->if_name); + } else { + l3_egr_map_attr.value.objid = map->mark_obj; + } + + if (ifp->if_type == IFT_L2VLAN) { + parent_ifp = ifp->if_parent; + if ((!map) || (map->mark_obj == FAL_QOS_NULL_OBJECT_ID)) { + parent_egr_map_subport = qos_egress_map_subport_get( + parent_ifp, 0); + if (!parent_egr_map_subport) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Failed to get info for parent egr_map_subport ifp:%s\n", + parent_ifp->if_name); + return -EINVAL; + } + l3_egr_map_attr.value.objid = + (parent_egr_map_subport) ? + parent_egr_map_subport->egr_map_obj : + FAL_QOS_NULL_OBJECT_ID; + } + temp_ifp = parent_ifp; + is_sub_if = true; + } else { + temp_ifp = ifp; + is_sub_if = false; + } + if (map && map->mark_obj != FAL_QOS_NULL_OBJECT_ID) { + egr_map_subport = qos_egress_map_subport_get_or_create( + ifp, temp_ifp, is_sub_if); + if (!egr_map_subport) + return -ENOMEM; + } else { + qos_egress_map_subport_delete(ifp, temp_ifp, + is_sub_if); + } + + /* + * For VIF interface and no egress map, have to apply + * parent interface egress map + */ + ret = if_set_l3_intf_attr(ifp, &l3_egr_map_attr); + + if (ret != 0) { + RTE_LOG(ERR, DATAPLANE, + "%s Setting Egress map %s failed: %d (%s)\n", + ifp->if_name, map ? map->map_name : "", ret, + strerror(-ret)); + return ret; + } + if (egr_map_subport) + egr_map_subport->egr_map_obj = (map) ? map->mark_obj : + FAL_QOS_NULL_OBJECT_ID; + + /* + * If the interface has sub-ports, need to check if egress-map + * has to be inherited to sub-ports which does not have any + * specific egress-map configured + */ + cds_list_for_each_entry_rcu(child_ifp, &ifp->if_list, if_list) { + if (!child_ifp || child_ifp->if_parent != ifp + || child_ifp->if_type != IFT_L2VLAN) + continue; + + egr_map_subport = + qos_egress_map_subport_get_or_create( + child_ifp, ifp, true); + if (!egr_map_subport) + return -ENOMEM; + + if (egr_map_subport->egr_map_obj == + FAL_QOS_NULL_OBJECT_ID) { + ret = if_set_l3_intf_attr(child_ifp, + &l3_egr_map_attr); + if (ret != 0) { + RTE_LOG(ERR, DATAPLANE, + "%s Setting Egress map %s failed: %d (%s)\n", + child_ifp->if_name, + map ? map->map_name : + "", ret, + strerror(-ret)); + return ret; + } + } + } + if (map) { + ifp->egr_map_obj = map->mark_obj; + RTE_LOG(INFO, DATAPLANE, + "%s Setting egress map:%s\n", + ifp->if_name, map->map_name); + } + return 0; +} + +static int qos_hw_egressm_attach(unsigned int ifindex, unsigned int vlan, + struct qos_mark_map *map) +{ + int ret = 0; + struct ifnet *ifp = dp_ifnet_byifindex(ifindex); + + if (map->mark_obj == FAL_QOS_NULL_OBJECT_ID) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Invalid egress-map attach, not created %s\n", + map->map_name); + + return -ENOENT; + } + + if (!ifp) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Failed to retrieve ifp for egress feature %u\n", + ifindex); + return -ENOENT; + } + if (map->type != EGRESS_DSCPGRP_DSCP) { + if (!vlan) { + struct fal_attribute_t port_attr_list = { + .id = FAL_PORT_ATTR_QOS_EGRESS_MAP_ID, + .value.objid = map->mark_obj + }; + fal_l2_upd_port(ifindex, &port_attr_list); + + DP_DEBUG(QOS_HW, DEBUG, DATAPLANE, + "Created ingress feature on if %u\n", ifindex); + + return 0; + } + struct if_vlan_feat *vlan_feat; + + struct fal_attribute_t vlan_attr[] = { + { .id = FAL_VLAN_FEATURE_INTERFACE_ID, + .value.u32 = ifindex }, + { .id = FAL_VLAN_FEATURE_VLAN_ID, + .value.u16 = vlan }, + { .id = FAL_VLAN_FEATURE_ATTR_QOS_EGRESS_MAP_ID, + .value.objid = map->mark_obj } + }; + + vlan_feat = if_vlan_feat_get(ifp, vlan); + if (!vlan_feat) { + ret = if_vlan_feat_create(ifp, vlan, + FAL_NULL_OBJECT_ID); + if (ret) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Failed to create feature for if %s" + " vlan %u\n", + ifp->if_name, vlan); + return ret; + } + vlan_feat = if_vlan_feat_get(ifp, vlan); + if (!vlan_feat) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Failed to get feature for if %s " + "vlan %u\n", + ifp->if_name, vlan); + return -ENOENT; + } + ret = fal_vlan_feature_create(ARRAY_SIZE(vlan_attr), + vlan_attr, &vlan_feat->fal_vlan_feat); + if (ret && ret != -EOPNOTSUPP) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Can not create vlan_feat for vlan %u " + "fal %d\n", + vlan, ret); + if_vlan_feat_delete(ifp, vlan); + return ret; + } + } else { + ret = fal_vlan_feature_set_attr( + vlan_feat->fal_vlan_feat, + &vlan_attr[2]); + if (ret) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Failed to add egress map to if %s " + "vlan %u\n", + ifp->if_name, vlan); + return ret; + } + } + vlan_feat->refcount++; + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Successfully added feature for if %s vlan %u\n", + ifp->if_name, vlan); + } else if (map->type == EGRESS_DSCPGRP_DSCP) { + qos_hw_if_set_egress_map(ifp, map); + } else { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Unknown Egress map type:%d for ifindex:%u\n", + map->type, ifindex); + return -EOPNOTSUPP; + } + DP_DEBUG(QOS_HW, DEBUG, DATAPLANE, + "Created egress feature on if:%s ifindex:%d vlan %u\n", + ifp->if_name, ifp->if_index, vlan); + + return ret; +} + +static int qos_hw_egressm_detach(unsigned int ifindex, unsigned int vlan, + struct qos_mark_map *map) +{ + int ret; + struct ifnet *ifp = dp_ifnet_byifindex(ifindex); + if (!ifp) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Failed to retrieve ifp for egress feat %u\n", + ifindex); + + return -ENOENT; + } + + if (!vlan && ifp->if_type != IFT_ETHER) { + struct fal_attribute_t port_attr_list[] = { + { .id = FAL_PORT_ATTR_QOS_EGRESS_MAP_ID, + .value.objid = FAL_NULL_OBJECT_ID } + }; + + fal_l2_upd_port(ifindex, &port_attr_list[0]); + DP_DEBUG(QOS_HW, DEBUG, DATAPLANE, + "Removed egress feature on if %u\n", ifindex); + return 0; + } + if (map->type != EGRESS_DSCPGRP_DSCP) { + struct if_vlan_feat *vlan_feat = NULL; + struct fal_attribute_t vlan_attr[1] = { + { .id = FAL_VLAN_FEATURE_ATTR_QOS_EGRESS_MAP_ID, + .value.objid = FAL_NULL_OBJECT_ID } + }; + vlan_feat = if_vlan_feat_get(ifp, vlan); + if (!vlan_feat) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Could not find vlan feat for intf %s vlan %d\n", + ifp->if_name, vlan); + return -ENOENT; + } + + ret = fal_vlan_feature_set_attr(vlan_feat->fal_vlan_feat, + vlan_attr); + if (ret && ret != -EOPNOTSUPP) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Could not remove vlan_feat for vlan %d in fal (%d)\n", + vlan, ret); + return ret; + } + + vlan_feat->refcount--; + + if (!vlan_feat->refcount) { + ret = fal_vlan_feature_delete(vlan_feat->fal_vlan_feat); + if (ret) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Could not destroy fal vlan feature obj" + " for %s vlan %d (%d)\n", + ifp->if_name, vlan, ret); + return ret; + } + + ret = if_vlan_feat_delete(ifp, vlan); + if (ret) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Could not destroy vlan feature obj for " + "%s vlan %d (%d)\n", + ifp->if_name, vlan, ret); + return ret; + } + } + } else if (map->type == EGRESS_DSCPGRP_DSCP) { + qos_hw_if_set_egress_map(ifp, NULL); + } else { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Unknown Egress map type:%d for ifindex:%u\n", + map->type, ifindex); + return -EOPNOTSUPP; + } + + DP_DEBUG(QOS_HW, DEBUG, DATAPLANE, + "Deleted vlan egress feature obj for %s vlan %u\n", + ifp->if_name, vlan); + + return 0; +} + +static int qos_hw_egressm_config(struct qos_mark_map *map, + bool create) +{ + if (!create) { + /* Make sure the attach went ok */ + if (map->mark_obj != FAL_QOS_NULL_OBJECT_ID) { + fal_qos_del_map(map->mark_obj); + map->mark_obj = FAL_QOS_NULL_OBJECT_ID; + } + DP_DEBUG(QOS_HW, DEBUG, DATAPLANE, + "Deleted fal egress map %s\n", map->map_name); + return 0; + } + + struct fal_qos_map_list_t map_list = {0}; + struct fal_attribute_t attr_list[] = { + { .id = FAL_QOS_MAP_ATTR_TYPE, + .value.u8 = FAL_QOS_MAP_TYPE_DESIGNATOR_TO_DOT1P }, + { .id = FAL_QOS_MAP_ATTR_MAP_TO_VALUE_LIST, + .value.maplist = &map_list }, + }; + int ret; + + if (map->type == EGRESS_DSCPGRP_DSCP) + attr_list[0].value.u8 = FAL_QOS_MAP_TYPE_DSCP_TO_DSCP; + + qos_hw_egressm_attrs(map, &map_list); + + ret = fal_qos_new_map(FAL_QOS_NULL_OBJECT_ID, ARRAY_SIZE(attr_list), + attr_list, &map->mark_obj); + + DP_DEBUG(QOS_HW, DEBUG, DATAPLANE, "Created egress map %s with" + " mark_obj:%lu\n", map->map_name, map->mark_obj); + + return ret; +} + +int qos_hw_init(void) { - int i, num_maps; - - struct subport_info *sinfo = qinfo->subport + subport; - uint8_t profile_id = sinfo->profile_map[pipe]; - struct profile_wred_info *p_wred_info = - &qinfo->wred_profiles[profile_id]; - uint8_t qindex = (tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) + q; - struct queue_wred_info *q_wred_info = &p_wred_info->queue_wred[qindex]; + qos_ingressm.qos_ingressm_attach = qos_hw_ingressm_attach; + qos_ingressm.qos_ingressm_detach = qos_hw_ingressm_detach; + qos_ingressm.qos_ingressm_config = qos_hw_ingressm_config; - num_maps = q_wred_info->num_maps; - if (num_maps) { - char *grp_name; + qos_egressm.qos_egressm_attach = qos_hw_egressm_attach; + qos_egressm.qos_egressm_detach = qos_hw_egressm_detach; + qos_egressm.qos_egressm_config = qos_hw_egressm_config; - jsonw_name(wr, "wred_map"); - jsonw_start_array(wr); - for (i = 0; i < num_maps; i++) { - grp_name = q_wred_info->dscp_grp_names[i]; - if (grp_name == NULL) - break; - jsonw_start_object(wr); - jsonw_string_field(wr, "res_grp", grp_name); - jsonw_uint_field(wr, "random_dscp_drop", - random_dscp_drop[i]); - jsonw_end_object(wr); - } - jsonw_end_array(wr); - } + return 0; } /* @@ -502,12 +1312,6 @@ int qos_hw_port(struct ifnet *ifp, unsigned int subports, unsigned int pipes, { int retval = 0; - /* - * No hardware support - */ - if (!ifp->hw_forwarding) - return -ENODEV; - /* Drop old config if any */ struct sched_info *qinfo = ifp->if_qos; @@ -571,6 +1375,12 @@ int qos_hw_enable(struct ifnet *ifp, struct sched_info *qinfo) DP_DEBUG(QOS_HW, DEBUG, DATAPLANE, "hardware enable, if-index: %u\n", ifp->if_index); + if (!ifp->hw_forwarding) { + DP_DEBUG(QOS_HW, DEBUG, DATAPLANE, + "interface not hw forwarding, QoS not started\n"); + return 0; + } + rte_eth_link_get_nowait(ifp->if_port, &link); if (link.link_status) { ret = qos_sched_start(ifp, link.link_speed); @@ -694,7 +1504,8 @@ qos_hw_create_group_and_sched(struct qos_obj_db_obj *db_obj, uint8_t level, uint16_t max_children, uint8_t sched_type, uint64_t bandwidth, uint64_t burst, int8_t overhead, - fal_object_t *child_obj, uint16_t vlan) + fal_object_t *child_obj, uint16_t vlan, + uint8_t lp_des) { fal_object_t sch_obj; fal_object_t grp_obj = FAL_QOS_NULL_OBJECT_ID; @@ -733,7 +1544,7 @@ qos_hw_create_group_and_sched(struct qos_obj_db_obj *db_obj, return ret; } - struct fal_attribute_t grp_attr_list[6] = { + struct fal_attribute_t grp_attr_list[7] = { { .id = FAL_QOS_SCHED_GROUP_ATTR_SG_INDEX, .value.u32 = sched_group_id }, { .id = FAL_QOS_SCHED_GROUP_ATTR_LEVEL, @@ -764,6 +1575,14 @@ qos_hw_create_group_and_sched(struct qos_obj_db_obj *db_obj, attr_count++; } + if ((level == FAL_QOS_SCHED_GROUP_LEVEL_PIPE) && + (lp_des != INGRESS_DESIGNATORS)) { + grp_attr_list[attr_count].id = + FAL_QOS_SCHED_GROUP_ATTR_LOCAL_PRIORITY_DESIGNATOR; + grp_attr_list[attr_count].value.u8 = lp_des; + attr_count++; + } + ret = fal_qos_new_sched_group(switch_id, attr_count, grp_attr_list, &grp_obj); @@ -779,6 +1598,11 @@ qos_hw_create_group_and_sched(struct qos_obj_db_obj *db_obj, return ret; } +void qos_hw_del_map(fal_object_t mark_obj) +{ + (void)fal_qos_del_map(mark_obj); +} + static void qos_hw_delete_callback(struct qos_obj_db_obj *db_obj) { @@ -786,7 +1610,6 @@ qos_hw_delete_callback(struct qos_obj_db_obj *db_obj) fal_object_t scheduler_obj; fal_object_t queue_obj; fal_object_t wred_obj; - fal_object_t ingress_map_obj; fal_object_t egress_map_obj; int32_t hw_status; @@ -798,9 +1621,6 @@ qos_hw_delete_callback(struct qos_obj_db_obj *db_obj) qos_obj_db_hw_get(db_obj, QOS_OBJ_HW_TYPE_SCHEDULER, &hw_status, &scheduler_obj); - qos_obj_db_hw_get(db_obj, QOS_OBJ_HW_TYPE_INGRESS_MAP, &hw_status, - &ingress_map_obj); - qos_obj_db_hw_get(db_obj, QOS_OBJ_HW_TYPE_EGRESS_MAP, &hw_status, &egress_map_obj); @@ -815,11 +1635,6 @@ qos_hw_delete_callback(struct qos_obj_db_obj *db_obj) FAL_QOS_SCHED_GROUP_ATTR_SCHEDULER_ID, FAL_QOS_NULL_OBJECT_ID); - if (sched_group_obj && ingress_map_obj) - (void)qos_hw_update_sched_group(sched_group_obj, - FAL_QOS_SCHED_GROUP_ATTR_INGRESS_MAP_ID, - FAL_QOS_NULL_OBJECT_ID); - if (sched_group_obj && egress_map_obj) (void)qos_hw_update_sched_group(sched_group_obj, FAL_QOS_SCHED_GROUP_ATTR_EGRESS_MAP_ID, @@ -850,12 +1665,6 @@ qos_hw_delete_callback(struct qos_obj_db_obj *db_obj) if (scheduler_obj) (void)fal_qos_del_scheduler(scheduler_obj); - if (ingress_map_obj) - (void)fal_qos_del_map(ingress_map_obj); - - if (egress_map_obj) - (void)fal_qos_del_map(egress_map_obj); - if (queue_obj) (void)fal_qos_del_queue(queue_obj); @@ -895,38 +1704,47 @@ qos_hw_upd_u32_attr(struct fal_attribute_t *attr_list, uint32_t array_size, static int qos_hw_create_wred(struct qos_obj_db_obj *db_obj, - struct rte_red_params *wred_params, - struct queue_wred_info *q_wred_info, fal_object_t *wred_obj) + uint64_t tc_rate, + struct qos_red_params *wred_params, + struct qos_red_pipe_params *q_wred_info, + fal_object_t *wred_obj) { uint32_t switch_id = 0; + uint32_t wred_min_th = 0; + uint32_t wred_max_th = 0; int ret = 0; - /* - * We can get WRED configurations from two different places. Either - * from the "... traffic-class <0..3> random-detect ..." command or - * the "... queue <0..31> wred-map dscp-group ..." command, but the - * QoS perl validation scripts should mean that we never have both. - */ - if ((wred_params->min_th != 0 && wred_params->max_th != 0) && - (q_wred_info != NULL && q_wred_info->num_maps != 0)) { - DP_DEBUG(QOS, ERR, DATAPLANE, - "Conflicting WRED configurations\n"); + if (!qos_wred_threshold_get(wred_params, tc_rate, + &wred_min_th, &wred_max_th)) return -EINVAL; - } /* * The queue can have an optional wred object associated with it. * Create the wred object if wred has been configured. * When wred is configured both min_th and max_th are non-zero. */ - if (wred_params->min_th != 0 && wred_params->max_th != 0) { + if (wred_min_th != 0 && wred_max_th != 0) { + /* + * We can get WRED configurations from two different places. + * Either from the + * "... traffic-class <0..3> random-detect ..." command or + * the "... queue <0..31> wred-map dscp-group ..." command, + * but the QoS perl validation scripts should mean that we + * never have both. + */ + if (q_wred_info != NULL && + q_wred_info->red_q_params.num_maps != 0) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Conflicting WRED configurations\n"); + return -EINVAL; + } struct fal_attribute_t wred_attr_list[] = { { .id = FAL_QOS_WRED_ATTR_GREEN_ENABLE, .value.u8 = true }, { .id = FAL_QOS_WRED_ATTR_GREEN_MIN_THRESHOLD, - .value.u32 = wred_params->min_th }, + .value.u32 = wred_min_th }, { .id = FAL_QOS_WRED_ATTR_GREEN_MAX_THRESHOLD, - .value.u32 = wred_params->max_th }, + .value.u32 = wred_max_th }, { .id = FAL_QOS_WRED_ATTR_GREEN_DROP_PROBABILITY, .value.u32 = wred_params->maxp_inv }, { .id = FAL_QOS_WRED_ATTR_WEIGHT, @@ -938,7 +1756,8 @@ qos_hw_create_wred(struct qos_obj_db_obj *db_obj, ret = fal_qos_new_wred(switch_id, ARRAY_SIZE(wred_attr_list), wred_attr_list, wred_obj); - } else if (q_wred_info != NULL && q_wred_info->num_maps != 0) { + } else if (q_wred_info != NULL && + q_wred_info->red_q_params.num_maps != 0) { uint8_t colour; /* @@ -948,7 +1767,7 @@ qos_hw_create_wred(struct qos_obj_db_obj *db_obj, */ struct fal_attribute_t wred_attr_list[] = { { .id = FAL_QOS_WRED_ATTR_WEIGHT, - .value.u8 = q_wred_info->filter_weight }, + .value.u8 = q_wred_info->red_q_params.filter_weight }, { .id = FAL_QOS_WRED_ATTR_GREEN_ENABLE, .value.booldata = false }, { .id = FAL_QOS_WRED_ATTR_GREEN_MIN_THRESHOLD, @@ -976,12 +1795,22 @@ qos_hw_create_wred(struct qos_obj_db_obj *db_obj, }; for (colour = FAL_PACKET_COLOUR_GREEN; - colour < q_wred_info->num_maps; colour++) { - struct red_params *colour_params = - &q_wred_info->params.map_params_bytes[colour]; + colour < NUM_DPS; colour++) { + struct qos_red_params *colour_params; + + if (!(q_wred_info->red_q_params.dps_in_use & + (1 << colour))) + continue; + + colour_params = + &q_wred_info->red_q_params.qparams[colour]; + + uint32_t min_th = 0; + uint32_t max_th = 0; + if (!qos_wred_threshold_get(colour_params, tc_rate, + &min_th, &max_th)) + return -EINVAL; - uint32_t min_th = colour_params->min_th; - uint32_t max_th = colour_params->max_th; bool enabled; enabled = (min_th != 0 && max_th != 0) ? true : false; @@ -1068,28 +1897,77 @@ qos_hw_create_wred(struct qos_obj_db_obj *db_obj, return ret; } +static uint64_t +qos_hw_get_rate(fal_object_t tc_sched_obj) +{ + int ret = 0; + fal_object_t scheduler_obj; + + /* Get Scheduler object */ + struct fal_attribute_t attr_list[] = { + { .id = FAL_QOS_SCHED_GROUP_ATTR_SCHEDULER_ID, + .value.objid = FAL_QOS_NULL_OBJECT_ID }, + }; + + ret = fal_qos_get_sched_group_attrs(tc_sched_obj, ARRAY_SIZE(attr_list), + attr_list); + if (ret) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "FAL failed to get sched-group attributes, status: " + "%d\n", ret); + return 0; + } + scheduler_obj = attr_list[0].value.objid; + + /* Get max_bandwidth from scheduler object */ + if (scheduler_obj != FAL_QOS_NULL_OBJECT_ID) { + attr_list[0].id = FAL_QOS_SCHEDULER_ATTR_MAX_BANDWIDTH_RATE; + attr_list[0].value.u64 = 0; + + ret = fal_qos_get_scheduler_attrs(scheduler_obj, + ARRAY_SIZE(attr_list), attr_list); + if (ret) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "FAL failed to get sched-group attributes, status: " + "%d\n", ret); + return 0; + } + return (attr_list[0].value.u64); + } + + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "Failed to get Bandwidth rate.\n"); + return 0; + +} + static int qos_hw_create_queue_and_sched(struct qos_obj_db_obj *db_obj, fal_object_t parent_obj, uint32_t queue_limit, - uint8_t wrr_weight, - struct rte_red_params *wred_params, - struct queue_wred_info *q_wred_info, + uint8_t wrr_weight, uint8_t designator, + struct qos_red_params *wred_params, + struct qos_red_pipe_params *q_wred_info, uint32_t tc_id, uint32_t q_id, - bool local_priority_queue, fal_object_t *child_obj) { fal_object_t wred_obj = FAL_QOS_NULL_OBJECT_ID; fal_object_t sch_obj = FAL_QOS_NULL_OBJECT_ID; fal_object_t queue_obj = FAL_QOS_NULL_OBJECT_ID; uint32_t switch_id = 0; + uint64_t tc_rate = 0; int ret; DP_DEBUG(QOS_HW, DEBUG, DATAPLANE, "creating queue and scheduler\n"); *child_obj = FAL_QOS_NULL_OBJECT_ID; - ret = qos_hw_create_wred(db_obj, wred_params, q_wred_info, &wred_obj); + /* Get bandwidth to convert usec limits to bytes */ + tc_rate = qos_hw_get_rate(parent_obj); + + ret = qos_hw_create_wred(db_obj, tc_rate, wred_params, + q_wred_info, &wred_obj); + if (ret) return ret; @@ -1137,8 +2015,8 @@ qos_hw_create_queue_and_sched(struct qos_obj_db_obj *db_obj, .value.u8 = FAL_QOS_QUEUE_TYPE_ALL }, { .id = FAL_QOS_QUEUE_ATTR_TC, .value.u8 = tc_id }, - { .id = FAL_QOS_QUEUE_ATTR_LOCAL_PRIORITY, - .value.booldata = local_priority_queue }, + { .id = FAL_QOS_QUEUE_ATTR_DESIGNATOR, + .value.u8 = designator }, }; ret = fal_qos_new_queue(switch_id, ARRAY_SIZE(queue_attr_list), @@ -1158,9 +2036,9 @@ qos_hw_create_queue_and_sched(struct qos_obj_db_obj *db_obj, static int qos_hw_new_wrr_queue(fal_object_t tc_sched_obj, uint32_t queue_limit, - uint8_t local_priority_wrr, uint8_t wrr_weight, - struct rte_red_params *red_params, - struct queue_wred_info *q_wred_info, uint32_t *ids) + uint8_t wrr_weight, uint8_t designator, + struct qos_red_params *red_params, + struct qos_red_pipe_params *q_wred_info, uint32_t *ids) { char ids_str[QOS_OBJ_DB_MAX_ID_LEN + 1]; struct qos_obj_db_obj *db_obj; @@ -1186,12 +2064,12 @@ qos_hw_new_wrr_queue(fal_object_t tc_sched_obj, uint32_t queue_limit, qos_obj_db_sw_set(db_obj, QOS_OBJ_SW_STATE_HW_PROG_IN_PROGRESS); ret = qos_hw_create_queue_and_sched(db_obj, tc_sched_obj, - queue_limit, wrr_weight, red_params, + queue_limit, wrr_weight, + designator, + red_params, q_wred_info, ids[QOS_OBJ_DB_LEVEL_TC], ids[QOS_OBJ_DB_LEVEL_QUEUE], - (local_priority_wrr == - ids[QOS_OBJ_DB_LEVEL_QUEUE]), &queue_obj); if (ret) qos_obj_db_sw_set(db_obj, QOS_OBJ_SW_STATE_HW_PROG_FAILED); @@ -1203,10 +2081,10 @@ qos_hw_new_wrr_queue(fal_object_t tc_sched_obj, uint32_t queue_limit, static int qos_hw_new_tc(uint32_t tc_id, fal_object_t pipe_sched_obj, - uint32_t tc_rate, uint32_t tc_size, uint32_t queue_limit, - uint8_t local_priority_wrr, uint8_t *wrr_weight, - struct rte_red_params *red_params, - struct queue_wred_info **q_wred_info, uint32_t *ids, + uint64_t tc_rate, uint32_t tc_size, uint32_t queue_limit, + uint8_t *wrr_weight, uint8_t *designators, + struct qos_red_params *red_params, + struct qos_red_pipe_params **q_wred_info, uint32_t *ids, int8_t overhead) { char ids_str[QOS_OBJ_DB_MAX_ID_LEN + 1]; @@ -1247,7 +2125,8 @@ qos_hw_new_tc(uint32_t tc_id, fal_object_t pipe_sched_obj, queues_configured, FAL_QOS_SCHEDULING_TYPE_WRR, tc_rate, tc_size, overhead, - &tc_sched_obj, 0); + &tc_sched_obj, 0, + INGRESS_DESIGNATORS); for (q_id = 0; !ret && q_id < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; q_id++) { @@ -1255,8 +2134,8 @@ qos_hw_new_tc(uint32_t tc_id, fal_object_t pipe_sched_obj, ids[QOS_OBJ_DB_LEVEL_QUEUE] = q_id; ret = qos_hw_new_wrr_queue(tc_sched_obj, queue_limit, - local_priority_wrr, wrr_weight[q_id], + designators[q_id], red_params, q_wred_info[q_id], ids); @@ -1272,46 +2151,83 @@ qos_hw_new_tc(uint32_t tc_id, fal_object_t pipe_sched_obj, } static int -qos_hw_create_and_attach_map(struct qos_obj_db_obj *db_obj, - fal_object_t pipe_sched_obj, - enum qos_obj_hw_type hw_type, - enum fal_qos_map_type_t map_type, - struct fal_qos_map_list_t *map_list, - bool local_priority) +qos_hw_egress_map_attach(struct qos_obj_db_obj *db_obj, + fal_object_t pipe_sched_obj, + enum fal_qos_map_type_t map_type, + struct fal_qos_map_list_t *map_list, + fal_object_t *mark_obj) { - enum fal_qos_sched_group_attr_t sched_group_map_id; struct fal_attribute_t attr_list[] = { { .id = FAL_QOS_MAP_ATTR_TYPE, .value.u8 = map_type }, { .id = FAL_QOS_MAP_ATTR_MAP_TO_VALUE_LIST, .value.maplist = map_list }, - { .id = FAL_QOS_MAP_ATTR_LOCAL_PRIORITY_QUEUE, - .value.booldata = local_priority }, }; - fal_object_t map_obj; + fal_object_t map_obj = FAL_QOS_NULL_OBJECT_ID; int ret; - /* - * Create the map object and attach it to the pipe sched-group. - */ - ret = fal_qos_new_map(pipe_sched_obj, ARRAY_SIZE(attr_list), attr_list, - &map_obj); - - qos_obj_db_hw_set(db_obj, hw_type, ret, map_obj); + if (!*mark_obj) { + /* + * Create the map object and attach it to the pipe + * sched-group. + */ + ret = fal_qos_new_map(pipe_sched_obj, ARRAY_SIZE(attr_list), + attr_list, &map_obj); + *mark_obj = map_obj; + } else { + map_obj = *mark_obj; + ret = 0; + } - if (hw_type == QOS_OBJ_HW_TYPE_INGRESS_MAP) - sched_group_map_id = FAL_QOS_SCHED_GROUP_ATTR_INGRESS_MAP_ID; - else - sched_group_map_id = FAL_QOS_SCHED_GROUP_ATTR_EGRESS_MAP_ID; + qos_obj_db_hw_set(db_obj, QOS_OBJ_HW_TYPE_EGRESS_MAP, ret, map_obj); if (ret) DP_DEBUG(QOS_HW, ERR, DATAPLANE, "FAL failed to create qos-map, status: %d\n", ret); else ret = qos_hw_update_sched_group(pipe_sched_obj, - sched_group_map_id, - map_obj); + FAL_QOS_SCHED_GROUP_ATTR_EGRESS_MAP_ID, + map_obj); + + return ret; +} + +static int +qos_hw_ingress_map_attach(fal_object_t pipe_sched_obj, + enum fal_qos_map_type_t map_type, + struct fal_qos_map_list_t *map_list) + +{ + struct fal_attribute_t attr_list[] = { + { .id = FAL_QOS_MAP_ATTR_TYPE, + .value.u8 = map_type }, + { .id = FAL_QOS_MAP_ATTR_MAP_TO_VALUE_LIST, + .value.maplist = map_list }, + { .id = FAL_QOS_MAP_ATTR_INGRESS_SYSTEM_DEFAULT, + .value.booldata = true }, + }; + fal_object_t map_obj; + int ret; + + /* + * If we're using the legacy config we setup a single system-default + * map and use it so only install the first map, all the others should + * be the same since we only support a single ingress map. + */ + if (map_type == FAL_QOS_MAP_TYPE_DSCP_TO_DESIGNATOR) { + if (qos_global_map_obj != FAL_QOS_NULL_OBJECT_ID) + return 0; + } + + /* + * Create the map object and attach it to the pipe sched-group. + */ + ret = fal_qos_new_map(pipe_sched_obj, ARRAY_SIZE(attr_list), attr_list, + &map_obj); + + if (map_type == FAL_QOS_MAP_TYPE_DSCP_TO_DESIGNATOR) + qos_global_map_obj = map_obj; return ret; } @@ -1344,8 +2260,8 @@ qmap_to_fal_colour(uint8_t q, enum fal_packet_colour *fal_colour) } static int -qos_hw_create_ingress_map(struct qos_obj_db_obj *db_obj, - fal_object_t pipe_sched_obj, struct queue_map *qmap) +qos_hw_create_ingress_map(fal_object_t pipe_sched_obj, struct queue_map *qmap, + const uint8_t *des2q) { uint8_t cp; uint8_t q; @@ -1355,38 +2271,57 @@ qos_hw_create_ingress_map(struct qos_obj_db_obj *db_obj, DP_DEBUG(QOS_HW, DEBUG, DATAPLANE, "creating ingress qos-map\n"); - if (qmap->pcp_enabled == 1) { - map_type = FAL_QOS_MAP_TYPE_DOT1P_TO_TC; - map_list.count = MAX_PCP; - for (cp = 0; cp < MAX_PCP; cp++) { - q = qmap->pcp2q[cp]; - map_list.list[cp].key.dot1p = cp; - map_list.list[cp].value.tc = qmap_to_tc(q); - map_list.list[cp].value.wrr = qmap_to_wrr(q); - ret = qmap_to_fal_colour( - q, &map_list.list[cp].value.color); - if (ret < 0) - return ret; - } - } else { - map_type = FAL_QOS_MAP_TYPE_DSCP_TO_TC; + if (qmap->dscp_enabled == 1) { + map_type = FAL_QOS_MAP_TYPE_DSCP_TO_DESIGNATOR; map_list.count = MAX_DSCP; for (cp = 0; cp < MAX_DSCP; cp++) { + int i; + uint8_t des = 0; + bool found_des = false; + q = qmap->dscp2q[cp]; map_list.list[cp].key.dscp = cp; - map_list.list[cp].value.tc = qmap_to_tc(q); - map_list.list[cp].value.wrr = qmap_to_wrr(q); + for (i = 0; i < INGRESS_DESIGNATORS; i++) { + if (des2q[i] == (DES_IN_USE | q_from_mask(q))) { + found_des = true; + des = i; + break; + } + } + if (!found_des) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "map create, no designator\n"); + return -EINVAL; + } + map_list.list[cp].value.des = des; ret = qmap_to_fal_colour( - q, &map_list.list[cp].value.color); + q, &map_list.list[cp].value.colour); if (ret < 0) return ret; + + DP_DEBUG(QOS_HW, DEBUG, DATAPLANE, + "map DSCP %d to tc/wrr %d/%d, des %d col %d\n", + cp, qmap_to_tc(q), qmap_to_wrr(q), des, + map_list.list[cp].value.colour); } + /* + * If we're using the designation CLI the ingress-map has been + * moved out of the policy and is attached to the interface or + * vlan using a separate CLI which calls qos_hw_ingressm_attach() + * to setup the classification designators. + * The cases above use the legacy CLI where the ingress-maps are + * derived from the policy, we only support a single map in this + * setup. + */ + } else if (qmap->designation == 1) { + return 0; + } else { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, "Invalid map type\n"); + return -EINVAL; } - ret = qos_hw_create_and_attach_map(db_obj, pipe_sched_obj, - QOS_OBJ_HW_TYPE_INGRESS_MAP, - map_type, &map_list, - qmap->local_priority); + ret = qos_hw_ingress_map_attach(pipe_sched_obj, + map_type, &map_list); return ret; } @@ -1397,30 +2332,105 @@ qos_hw_create_egress_map(struct qos_obj_db_obj *db_obj, struct qos_mark_map *mark_map) { struct fal_qos_map_list_t map_list; - uint32_t dscp; - - for (dscp = 0; dscp < FAL_QOS_MAP_DSCP_VALUES; dscp++) { - map_list.list[dscp].key.dscp = dscp; - map_list.list[dscp].value.dot1p = mark_map->pcp_value[dscp]; + uint32_t dscp, entry; + enum fal_qos_map_type_t map_type; + + if (mark_map->type == EGRESS_DSCP) { + for (dscp = 0; dscp < FAL_QOS_MAP_DSCP_VALUES; dscp++) { + map_list.list[dscp].key.dscp = dscp; + map_list.list[dscp].value.dot1p = + mark_map->pcp_value[dscp]; + } + map_list.count = FAL_QOS_MAP_DSCP_VALUES; + map_type = FAL_QOS_MAP_TYPE_DSCP_TO_DOT1P; + } else { + map_list.count = FAL_QOS_MAP_DES_DP_VALUES; + map_type = FAL_QOS_MAP_TYPE_DESIGNATOR_TO_DOT1P; + for (entry = 0; entry < FAL_QOS_MAP_DES_DP_VALUES; entry++) { + map_list.list[entry].key.des = + entry/FAL_NUM_PACKET_COLOURS; + map_list.list[entry].key.colour = + entry%FAL_NUM_PACKET_COLOURS; + map_list.list[entry].value.dot1p = + mark_map->entries[entry].pcp_value; + } } - map_list.count = FAL_QOS_MAP_DSCP_VALUES; - DP_DEBUG(QOS_HW, DEBUG, DATAPLANE, "creating egress qos-map\n"); - return qos_hw_create_and_attach_map(db_obj, pipe_sched_obj, - QOS_OBJ_HW_TYPE_EGRESS_MAP, - FAL_QOS_MAP_TYPE_DSCP_TO_DOT1P, - &map_list, false); + DP_DEBUG(QOS_HW, DEBUG, DATAPLANE, "creating egress qos-map type %u\n", + map_type); + return qos_hw_egress_map_attach(db_obj, pipe_sched_obj, + map_type, &map_list, + &mark_map->mark_obj); +} + +static int qos_hw_setup_queues(struct queue_map *qmap, + struct qos_pipe_params *pipe_params, + uint32_t tc_id, + uint8_t *wrr_weight, uint8_t *designators, + struct qos_red_pipe_params **q_wred_info, + uint8_t *lp_wrr, uint64_t *dscp_bitmap, + const uint8_t *des2q, uint8_t lp_des) +{ + int cp; + uint8_t q; + uint8_t qindex; + uint8_t weight; + uint8_t tc; + uint8_t wrr; + + for (cp = 0; cp < MAX_DSCP; cp++) { + q = qmap->dscp2q[cp]; + qindex = q_from_mask(q); + weight = pipe_params->wrr_weights[qindex]; + tc = qmap_to_tc(q); + wrr = qmap_to_wrr(q); + if (tc == tc_id) { + int i; + uint8_t des = 0; + bool found_des = false; + + for (i = 0; i < INGRESS_DESIGNATORS; i++) { + if (des2q[i] == (DES_IN_USE | q_from_mask(q))) { + found_des = true; + des = i; + break; + } + } + if (!found_des) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "queue create, no designator\n"); + return -EINVAL; + } + designators[wrr] = des; + *dscp_bitmap |= 1ul << cp; + wrr_weight[wrr] = weight; + q_wred_info[wrr] = qos_red_find_q_params( + pipe_params, qindex); + } + } + if (qmap->local_priority) { + q = qmap->local_priority_queue; + qindex = q_from_mask(q); + weight = pipe_params->wrr_weights[qindex]; + tc = qmap_to_tc(q); + wrr = qmap_to_wrr(q); + if (tc == tc_id) { + *lp_wrr = wrr; + wrr_weight[wrr] = weight; + designators[wrr] = lp_des; + } + } + return 0; } static int qos_hw_new_pipe(uint32_t pipe_id, fal_object_t subport_sched_obj, - uint16_t *port_qsize, struct subport_info *sinfo, - struct rte_sched_pipe_params *pipe_params, - struct queue_map *qmap, struct profile_wred_info *p_wred_info, + struct qos_port_params *port_params, struct subport_info *sinfo, + struct qos_pipe_params *pipe_params, struct queue_map *qmap, uint32_t *ids, int8_t overhead) { - uint32_t tb_rate = pipe_params->tb_rate; - uint32_t tb_size = pipe_params->tb_size; + uint64_t tb_rate = pipe_params->shaper.tb_rate; + uint32_t tb_size = pipe_params->shaper.tb_size; char ids_str[QOS_OBJ_DB_MAX_ID_LEN + 1]; struct qos_obj_db_obj *db_obj; enum qos_obj_db_status db_ret; @@ -1428,6 +2438,15 @@ qos_hw_new_pipe(uint32_t pipe_id, fal_object_t subport_sched_obj, uint32_t tc_id; char *out_str; int ret; + uint8_t local_priority_wrr = RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; + uint8_t bit; + uint8_t cp; + + /* Map of Designators to Queues */ + uint8_t des2q[INGRESS_DESIGNATORS] = {0}; + + /* Designator for the local prio queue */ + uint8_t lp_des = INGRESS_DESIGNATORS; out_str = qos_obj_db_get_ids_string(QOS_OBJ_DB_LEVEL_PIPE, ids, QOS_OBJ_DB_MAX_ID_LEN, ids_str); @@ -1445,16 +2464,58 @@ qos_hw_new_pipe(uint32_t pipe_id, fal_object_t subport_sched_obj, qos_obj_db_sw_set(db_obj, QOS_OBJ_SW_STATE_HW_PROG_IN_PROGRESS); + if (qmap->dscp_enabled) { + int i, ret; + + ret = qos_hw_setup_des2q(qmap, &des2q[0]); + if (ret) + return ret; + + if (qmap->local_priority) { + for (i = 0; i < INGRESS_DESIGNATORS; i++) { + if (des2q[i] & DES_IN_USE) + continue; + des2q[i] = DES_IN_USE | + q_from_mask(qmap->local_priority_queue); + lp_des = i; + break; + } + if (i == INGRESS_DESIGNATORS) { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, + "no designator for PLQ\n"); + return -EINVAL; + } + } + } else if (qmap->designation && qmap->local_priority) { + /* + * If a designator for the PLQ was supplied by + * config then use it. Otherwise, look for a + * designation not currently in use. + */ + lp_des = qos_get_prio_lp_des(); + + if (lp_des == INGRESS_DESIGNATORS) { + for (cp = 0, bit = 1; cp < INGRESS_DESIGNATORS; + cp++, bit <<= 1) { + if (!(pipe_params->des_set & bit)) { + if (lp_des == INGRESS_DESIGNATORS) + lp_des = cp; + break; + } + } + } + } + ret = qos_hw_create_group_and_sched(db_obj, pipe_id, subport_sched_obj, FAL_QOS_SCHED_GROUP_LEVEL_PIPE, RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE, FAL_QOS_SCHEDULING_TYPE_STRICT, tb_rate, tb_size, overhead, - &pipe_sched_obj, 0); + &pipe_sched_obj, 0, lp_des); if (!ret) - ret = qos_hw_create_ingress_map(db_obj, pipe_sched_obj, qmap); + ret = qos_hw_create_ingress_map(pipe_sched_obj, qmap, des2q); if (!ret && sinfo->mark_map) ret = qos_hw_create_egress_map(db_obj, pipe_sched_obj, @@ -1464,45 +2525,43 @@ qos_hw_new_pipe(uint32_t pipe_id, fal_object_t subport_sched_obj, tc_id++) { uint64_t dscp_bitmap = 0; uint8_t pcp_bitmap = 0; - uint8_t cp; + uint8_t des_bitmap = 0; uint8_t q; uint8_t qindex; uint8_t weight; uint8_t tc; uint8_t wrr; - uint8_t local_priority_wrr = RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; uint8_t wrr_weight[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS] = { 0 }; - struct queue_wred_info *q_wred_info + uint8_t designators[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS] = { 0 }; + struct qos_red_pipe_params *q_wred_info [RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS] = { NULL }; - if (qmap->pcp_enabled == 1) { - for (cp = 0; cp < MAX_PCP; cp++) { - q = qmap->pcp2q[cp]; - qindex = q_from_mask(q); - weight = pipe_params->wrr_weights[qindex]; - tc = qmap_to_tc(q); - wrr = qmap_to_wrr(q); - if (tc == tc_id) { - pcp_bitmap |= 1 << cp; - wrr_weight[wrr] = weight; - } - } - } else { - /* - * If PCP mapping isn't explicitly configured, we're - * using DSCP mapping. - */ - for (cp = 0; cp < MAX_DSCP; cp++) { - q = qmap->dscp2q[cp]; + if (qmap->dscp_enabled == 1) { + ret = qos_hw_setup_queues(qmap, pipe_params, tc_id, + &wrr_weight[0], &designators[0], + &q_wred_info[0], + &local_priority_wrr, + &dscp_bitmap, &des2q[0], lp_des); + if (ret) + return ret; + } else if (qmap->designation == 1) { + for (cp = 0, bit = 1; cp < INGRESS_DESIGNATORS; + cp++, bit <<= 1) { + if (!(pipe_params->des_set & bit)) + continue; + + q = pipe_params->designation[cp]; qindex = q_from_mask(q); weight = pipe_params->wrr_weights[qindex]; tc = qmap_to_tc(q); wrr = qmap_to_wrr(q); if (tc == tc_id) { - dscp_bitmap |= 1ul << cp; + des_bitmap |= 1 << cp; wrr_weight[wrr] = weight; + designators[wrr] = cp; q_wred_info[wrr] = - &p_wred_info->queue_wred[qindex]; + qos_red_find_q_params( + pipe_params, qindex); } } if (qmap->local_priority) { @@ -1512,10 +2571,14 @@ qos_hw_new_pipe(uint32_t pipe_id, fal_object_t subport_sched_obj, tc = qmap_to_tc(q); wrr = qmap_to_wrr(q); if (tc == tc_id) { - local_priority_wrr = wrr; + des_bitmap |= 1 << lp_des; wrr_weight[wrr] = weight; + designators[wrr] = lp_des; } } + } else { + DP_DEBUG(QOS_HW, ERR, DATAPLANE, "No map type set\n"); + return -EINVAL; } /* @@ -1525,18 +2588,20 @@ qos_hw_new_pipe(uint32_t pipe_id, fal_object_t subport_sched_obj, if (!ret && ((local_priority_wrr < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) || pcp_bitmap || - dscp_bitmap)) { - uint32_t queue_limit = sinfo->qsize[tc_id] ? - sinfo->qsize[tc_id] : port_qsize[tc_id]; + dscp_bitmap || des_bitmap)) { + + uint32_t queue_limit = + qos_sp_qsize_get(port_params, + sinfo, tc_id); ids[QOS_OBJ_DB_LEVEL_TC] = tc_id; ret = qos_hw_new_tc(tc_id, pipe_sched_obj, - pipe_params->tc_rate[tc_id], + pipe_params->shaper.tc_rate[tc_id], tb_size, queue_limit, - local_priority_wrr, &wrr_weight[0], + &designators[0], &sinfo->red_params[tc_id] - [e_RTE_METER_GREEN], + [RTE_COLOR_GREEN], &q_wred_info[0], ids, overhead); } } @@ -1555,7 +2620,7 @@ qos_hw_new_subport(uint32_t subport_id, fal_object_t port_sched_obj, { struct subport_info *sinfo = qinfo->subport + ids[QOS_OBJ_DB_LEVEL_SUBPORT]; - uint32_t tb_rate = sinfo->params.tb_rate; + uint64_t tb_rate = sinfo->params.tb_rate; uint32_t tb_size = sinfo->params.tb_size; char ids_str[QOS_OBJ_DB_MAX_ID_LEN + 1]; fal_object_t subport_sched_obj; @@ -1594,22 +2659,22 @@ qos_hw_new_subport(uint32_t subport_id, fal_object_t port_sched_obj, FAL_QOS_SCHEDULING_TYPE_WRR, tb_rate, tb_size, overhead, &subport_sched_obj, - qinfo->subport[subport_id].vlan_id); + qinfo->subport[subport_id].vlan_id, + INGRESS_DESIGNATORS); for (pipe_id = 0; !ret && pipe_id < MAX_PIPES; pipe_id++) { if (sinfo->pipe_configured[pipe_id]) { uint8_t profile_id = sinfo->profile_map[pipe_id]; - struct rte_sched_pipe_params *pipe_params = + struct qos_pipe_params *pipe_params = qinfo->port_params.pipe_profiles + profile_id; struct queue_map *qmap = &qinfo->queue_map[profile_id]; - uint16_t *port_qsize = &qinfo->port_params.qsize[0]; - struct profile_wred_info *p_wred_info = - &qinfo->wred_profiles[profile_id]; + struct qos_port_params *port_params = + &qinfo->port_params; ids[QOS_OBJ_DB_LEVEL_PIPE] = pipe_id; ret = qos_hw_new_pipe(pipe_id, subport_sched_obj, - port_qsize, sinfo, pipe_params, - qmap, p_wred_info, ids, overhead); + port_params, sinfo, pipe_params, + qmap, ids, overhead); } } @@ -1668,7 +2733,8 @@ qos_hw_new_port(struct ifnet *ifp, struct sched_info *qinfo, uint64_t linerate) qinfo->n_subports, FAL_QOS_SCHEDULING_TYPE_WRR, linerate, 0, overhead, - &port_sched_group_obj, 0); + &port_sched_group_obj, 0, + INGRESS_DESIGNATORS); if (ret) qos_obj_db_sw_set(db_obj, QOS_OBJ_SW_STATE_HW_PROG_FAILED); else { @@ -1714,26 +2780,84 @@ int qos_hw_start(struct ifnet *ifp, struct sched_info *qinfo, uint64_t bps, uint16_t max_pkt_len) { unsigned int subport; - int ret; + int ret, i; + static uint32_t max_burst_size = 0; + struct fal_attribute_t max_burst_attr = { + .id = FAL_SWITCH_ATTR_MAX_BURST_SIZE}; DP_DEBUG(QOS_HW, DEBUG, DATAPLANE, "hardware start, if-index: %u", ifp->if_index); + if (!max_burst_size) { + if (!fal_get_switch_attrs(1, &max_burst_attr)) + max_burst_size = max_burst_attr.value.u32; + else + max_burst_size = QOS_MAX_BURST_SIZE_DEFAULT; + } + for (subport = 0; subport < qinfo->n_subports; subport++) { struct subport_info *sinfo = &qinfo->subport[subport]; - struct rte_sched_subport_params *params = &sinfo->params; + struct qos_shaper_conf *params = &sinfo->params; + + /* + * If we've received a rate auto we use the reported + * interface speed as the subport rate. + */ + if (sinfo->auto_speed) + qos_abs_rate_save(&sinfo->subport_rate, bps); /* * Establish subport rates before checking pipes so that the * pipes can be checked against their actual subport rates. */ qos_sched_subport_params_check(params, &sinfo->subport_rate, - sinfo->sp_tc_rates.tc_rate, max_pkt_len, bps); + sinfo->sp_tc_rates.tc_rate, max_pkt_len, + max_burst_size, bps, qinfo); + + /* + * If it's been a resource group change and we have a + * mark-map check whether it uses dscp-groups and recalculate + * the masks before resetting the port. + */ + if (qinfo->reset_port == QOS_NPF_COMMIT && sinfo->mark_map) { + struct qos_mark_map *mark_map = sinfo->mark_map; + + if (SLIST_EMPTY(&mark_map->dscp_grps)) + continue; + + /* Clear previous config */ + for (i = 0; i < MAX_DSCP; i++) + mark_map->pcp_value[i] = 0; + + if (!SLIST_EMPTY(&mark_map->dscp_grps) && + mark_map->mark_obj) { + qos_hw_del_map(mark_map->mark_obj); + mark_map->mark_obj = FAL_QOS_NULL_OBJECT_ID; + } + struct dscp_grp_list *dscp_grp; + SLIST_FOREACH(dscp_grp, &mark_map->dscp_grps, list) { + uint64_t dscp_set = 0; + uint8_t dscp; + + if (npf_dscp_group_getmask(dscp_grp->name, + &dscp_set)) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Failed to retrieve dscp-group %s\n", + dscp_grp->name); + break; + } + for (dscp = 0; dscp < MAX_DSCP; dscp++) { + if (dscp_set & (1ul << dscp)) + mark_map->pcp_value[dscp] = + dscp_grp->pcp_val; + } + } + } /* Update NPF rules */ npf_cfg_commit_all(); } - qos_sched_pipe_check(qinfo, max_pkt_len, bps); + qos_sched_pipe_check(qinfo, max_pkt_len, max_burst_size, bps); ret = qos_hw_new_port(ifp, qinfo, bps); if (ret) @@ -1742,3 +2866,144 @@ int qos_hw_start(struct ifnet *ifp, struct sched_info *qinfo, uint64_t bps, return ret; } + +static struct egress_map_subport_info * +qos_egress_map_subport_get_or_create(struct ifnet *ifp, + struct ifnet *parent_ifp, bool is_sub_if) +{ + struct egress_map_subport_info *egr_map_subport = NULL; + struct ifnet *temp_ifp; + + if (is_sub_if) + temp_ifp = parent_ifp; + else + temp_ifp = ifp; + + egr_map_subport = qos_egress_map_subport_get(temp_ifp, + ifp->if_vlan); + if (!egr_map_subport) { + egr_map_subport = qos_egress_map_subport_new( + ifp, temp_ifp, is_sub_if); + if (!egr_map_subport) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Failed to allocate memory for egr_map_subport for %s\n", + (is_sub_if) ? "Child" : "Parent"); + return NULL; + } + } + return egr_map_subport; +} + +/* Delete egress map object */ +static int +qos_egress_map_subport_delete(struct ifnet *ifp, struct ifnet *parent_ifp, + bool is_sub_if) +{ + struct egress_map_subport_info *list_entry = NULL; + int list_cnt = 0; + + if (is_sub_if) { + SLIST_FOREACH(list_entry, + &parent_ifp->egr_map_info->egr_map_head, + egr_map_list) { + if (list_entry->vlan_id == ifp->if_vlan) + break; + } + if (list_entry) { + SLIST_REMOVE(&parent_ifp->egr_map_info->egr_map_head, + list_entry, egress_map_subport_info, + egr_map_list); + free(list_entry); + list_entry = NULL; + } + } else { + SLIST_FOREACH(list_entry, + &parent_ifp->egr_map_info->egr_map_head, + egr_map_list) { + list_cnt++; + if (list_entry->vlan_id == ifp->if_vlan) + break; + } + if (list_cnt == 1) { + /* Only parent egress map info exists */ + SLIST_REMOVE_HEAD( + &parent_ifp->egr_map_info->egr_map_head, + egr_map_list); + + free(list_entry); + list_entry = NULL; + free(parent_ifp->egr_map_info); + parent_ifp->egr_map_info = NULL; + } + } + return 0; +} + +static void +qos_hw_if_feat_mode_change(struct ifnet *ifp, + enum if_feat_mode_event event) +{ + struct ifnet *parent_ifp; + struct egress_map_subport_info *egr_map_subport = NULL; + struct egress_map_subport_info *parent_egr_map_subport = NULL; + struct fal_attribute_t l3_egr_map_attr = { + .id = FAL_ROUTER_INTERFACE_ATTR_EGRESS_QOS_MAP, + }; + int ret = 0; + + /* + * This function primarily handles the inheritance of egress map + * from parent interface when sub-interface is created + */ + if (event != IF_FEAT_MODE_EVENT_L3_FAL_ENABLED) + return; + + if (ifp->if_type != IFT_L2VLAN) + return; + + /* + * Check if sub-interface already has egress map configured, if + * so then nothing to do here + */ + parent_ifp = ifp->if_parent; + if (parent_ifp) { + parent_egr_map_subport = qos_egress_map_subport_get(parent_ifp, + 0); + if (!parent_egr_map_subport) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Failed to get info for parent_egr_map_subport ifp:%s\n", + ifp->if_name); + return; + } + egr_map_subport = qos_egress_map_subport_get(ifp, ifp->if_vlan); + } else { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Parent interface does not exist for this subinterface ifp:%s\n", + ifp->if_name); + return; + } + if (egr_map_subport) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Sub interface ifp:%s already have a egress map attached!\n", + ifp->if_name); + return; + } + + l3_egr_map_attr.value.objid = (parent_egr_map_subport) ? + parent_egr_map_subport->egr_map_obj : FAL_NULL_OBJECT_ID; + + ret = if_set_l3_intf_attr(ifp, &l3_egr_map_attr); + + if (ret != 0) { + RTE_LOG(ERR, DATAPLANE, + "%s Setting Egress map_objid %lu failed: %d (%s)\n", + ifp->if_name, ifp->egr_map_obj, ret, + strerror(-ret)); + } +} + +static const struct dp_event_ops qos_events = { + .if_feat_mode_change = qos_hw_if_feat_mode_change, +}; + +DP_STARTUP_EVENT_REGISTER(qos_events); diff --git a/src/qos_hw_show.c b/src/qos_hw_show.c index 50e260f7..a03ddf62 100644 --- a/src/qos_hw_show.c +++ b/src/qos_hw_show.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -261,7 +261,7 @@ void qos_hw_show_queue(fal_object_t queue, uint32_t id, json_writer_t *wr) uint32_t queue_type; uint8_t queue_index; uint8_t tc; - bool local_priority; + uint8_t designator; int ret; struct fal_attribute_t attr_list[] = { @@ -281,8 +281,8 @@ void qos_hw_show_queue(fal_object_t queue, uint32_t id, json_writer_t *wr) .value.u32 = 0 }, { .id = FAL_QOS_QUEUE_ATTR_TC, .value.u8 = 0xFF }, - { .id = FAL_QOS_QUEUE_ATTR_LOCAL_PRIORITY, - .value.booldata = false }, + { .id = FAL_QOS_QUEUE_ATTR_DESIGNATOR, + .value.u8 = 0 }, }; ret = fal_qos_get_queue_attrs(queue, ARRAY_SIZE(attr_list), attr_list); @@ -301,7 +301,7 @@ void qos_hw_show_queue(fal_object_t queue, uint32_t id, json_writer_t *wr) scheduler_id = attr_list[5].value.objid; queue_limit = attr_list[6].value.u32; tc = attr_list[7].value.u8; - local_priority = attr_list[8].value.booldata; + designator = attr_list[8].value.u8; jsonw_name(wr, "queue"); jsonw_start_object(wr); @@ -310,7 +310,7 @@ void qos_hw_show_queue(fal_object_t queue, uint32_t id, json_writer_t *wr) jsonw_uint_field(wr, "queue-limit", queue_limit); jsonw_uint_field(wr, "queue-index", queue_index); jsonw_uint_field(wr, "tc", tc); - jsonw_bool_field(wr, "local-priority", local_priority); + jsonw_uint_field(wr, "designation", designator); if (scheduler_id != FAL_QOS_NULL_OBJECT_ID) qos_hw_show_scheduler(scheduler_id, wr); @@ -322,13 +322,12 @@ void qos_hw_show_queue(fal_object_t queue, uint32_t id, json_writer_t *wr) } static -void qos_hw_show_to_tc_map_list(uint8_t map_type, +void qos_hw_show_to_tc_map_list(uint8_t map_type __unused, struct fal_qos_map_list_t *map_list, json_writer_t *wr) { uint64_t cp_bitmap[RTE_SCHED_QUEUES_PER_PIPE * (QOS_MAX_DROP_PRECEDENCE + 1)] = { 0 }; - uint32_t queue; uint32_t tc; uint32_t dp; uint32_t mli; @@ -338,47 +337,33 @@ void qos_hw_show_to_tc_map_list(uint8_t map_type, struct fal_qos_map_t *map = &map_list->list[mli]; uint8_t key; - if (map_type == FAL_QOS_MAP_TYPE_DOT1P_TO_TC) - key = map->key.dot1p; - else /* map_type == FAL_QOS_MAP_TYPE_DSCP_TO_TC */ - key = map->key.dscp; + key = map->key.dscp; if (mli != key) DP_DEBUG(QOS, ERR, DATAPLANE, "map-list not in order\n"); bmi = (map->value.dp * RTE_SCHED_QUEUES_PER_PIPE) + - (map->value.tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) + - map->value.wrr; + (map->value.des * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); cp_bitmap[bmi] |= (1ul << key); } - for (tc = 0; tc < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc++) { - for (queue = 0; queue < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; - queue++) { - for (dp = 0; dp <= QOS_MAX_DROP_PRECEDENCE; dp++) { - bmi = (dp * RTE_SCHED_QUEUES_PER_PIPE) + - (tc * - RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) + - queue; - - if (cp_bitmap[bmi]) { - char str_bitmap[22]; - - jsonw_start_object(wr); - snprintf(str_bitmap, 21, "%lu", - cp_bitmap[bmi]); - jsonw_string_field(wr, "cp-bitmap", - str_bitmap); - jsonw_uint_field(wr, "traffic-class", - tc); - jsonw_uint_field(wr, "queue", queue); - jsonw_uint_field(wr, "drop-precedence", - dp); - jsonw_end_object(wr); - } + for (dp = 0; dp <= QOS_MAX_DROP_PRECEDENCE; dp++) { + bmi = (dp * RTE_SCHED_QUEUES_PER_PIPE) + + (tc * + RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); + + if (cp_bitmap[bmi]) { + char str_bitmap[22]; + + jsonw_start_object(wr); + snprintf(str_bitmap, 21, "%lu", cp_bitmap[bmi]); + jsonw_string_field(wr, "cp-bitmap", str_bitmap); + jsonw_uint_field(wr, "designator", tc); + jsonw_uint_field(wr, "drop-precedence", dp); + jsonw_end_object(wr); } } } @@ -421,30 +406,29 @@ qos_hw_show_to_dot1p_map_list(struct fal_qos_map_list_t *map_list, static void qos_hw_show_map_list(uint8_t map_type, struct fal_qos_map_list_t *map_list, - bool local_priority, json_writer_t *wr) + json_writer_t *wr) { - if (map_type != FAL_QOS_MAP_TYPE_DOT1P_TO_TC && - map_type != FAL_QOS_MAP_TYPE_DSCP_TO_TC && - map_type != FAL_QOS_MAP_TYPE_DSCP_TO_DOT1P) { + switch (map_type) { + case FAL_QOS_MAP_TYPE_DSCP_TO_DESIGNATOR: + case FAL_QOS_MAP_TYPE_DOT1P_TO_DESIGNATOR: + jsonw_name(wr, "ingress-map"); + break; + case FAL_QOS_MAP_TYPE_DSCP_TO_DOT1P: + case FAL_QOS_MAP_TYPE_DESIGNATOR_TO_DOT1P: + jsonw_name(wr, "egress-map"); + break; + default: DP_DEBUG(QOS, ERR, DATAPLANE, "unsupported map-type: %u\n", map_type); return; } - if (map_type == FAL_QOS_MAP_TYPE_DOT1P_TO_TC || - map_type == FAL_QOS_MAP_TYPE_DSCP_TO_TC) - jsonw_name(wr, "ingress-map"); - else - jsonw_name(wr, "egress-map"); - jsonw_start_object(wr); jsonw_uint_field(wr, "map-type", map_type); - jsonw_bool_field(wr, "local-priority", local_priority); jsonw_name(wr, "map-list"); jsonw_start_array(wr); - if (map_type == FAL_QOS_MAP_TYPE_DOT1P_TO_TC || - map_type == FAL_QOS_MAP_TYPE_DSCP_TO_TC) + if (map_type == FAL_QOS_MAP_TYPE_DSCP_TO_DESIGNATOR) qos_hw_show_to_tc_map_list(map_type, map_list, wr); else qos_hw_show_to_dot1p_map_list(map_list, wr); @@ -459,14 +443,17 @@ void qos_hw_show_map_list(uint8_t map_type, struct fal_qos_map_list_t *map_list, */ const uint8_t qos_map_entries[FAL_QOS_MAP_TYPE_MAX + 1] = { 8, /* FAL_QOS_MAP_TYPE_DOT1P_TO_TC */ - 8, /* FAL_QOS_MAP_TYPE_DOT1P_TO_COLOR */ + 8, /* FAL_QOS_MAP_TYPE_DOT1P_TO_COLOUR */ 64, /* FAL_QOS_MAP_TYPE_DSCP_TO_TC */ - 64, /* FAL_QOS_MAP_TYPE_DSCP_TO_COLOR */ + 64, /* FAL_QOS_MAP_TYPE_DSCP_TO_COLOUR */ 8, /* FAL_QOS_MAP_TYPE_TC_TO_QUEUE */ - 8, /* FAL_QOS_MAP_TYPE_TC_AND_COLOR_TO_DSCP */ - 8, /* FAL_QOS_MAP_TYPE_TC_AND_COLOR_TO_DOT1P */ + 8, /* FAL_QOS_MAP_TYPE_TC_AND_COLOUR_TO_DSCP */ + 8, /* FAL_QOS_MAP_TYPE_TC_AND_COLOUR_TO_DOT1P */ 8, /* FAL_QOS_MAP_TYPE_TC_TO_PRIORITY_GROUP */ 64, /* FAL_QOS_MAP_TYPE_DSCP_TO_DOT1P */ + 64, /* FAL_QOS_MAP_TYPE_DSCP_TO_DESIGNATOR */ + 8, /* FAL_QOS_MAP_TYPE_DOT1P_TO_DESIGNATOR */ + 8, /* FAL_QOS_MAP_TYPE_DESIGNATOR_TO_DOT1P */ }; static @@ -474,14 +461,11 @@ void qos_hw_show_map(fal_object_t map, json_writer_t *wr) { struct fal_qos_map_list_t map_list; uint8_t map_type; - bool local_priority; int ret; struct fal_attribute_t attr_list[] = { { .id = FAL_QOS_MAP_ATTR_TYPE, .value.u8 = FAL_QOS_MAP_TYPE_MAX + 1 }, - { .id = FAL_QOS_MAP_ATTR_LOCAL_PRIORITY_QUEUE, - .value.booldata = false } }; /* @@ -514,13 +498,11 @@ void qos_hw_show_map(fal_object_t map, json_writer_t *wr) DP_DEBUG(QOS, ERR, DATAPLANE, "FAL failed to get map attributes, status: %d\n", ret); else { - local_priority = attr_list[1].value.booldata; if (map_list.count != qos_map_entries[map_type]) DP_DEBUG(QOS, ERR, DATAPLANE, "wrong map-list count returned\n"); else - qos_hw_show_map_list(map_type, &map_list, - local_priority, wr); + qos_hw_show_map_list(map_type, &map_list, wr); } } @@ -596,6 +578,7 @@ void qos_hw_show_sched_group(fal_object_t sched_group, uint32_t id, uint32_t max_children; uint8_t level; uint16_t vlan; + uint8_t lp_des; int ret; struct fal_attribute_t attr_list[] = { @@ -617,6 +600,8 @@ void qos_hw_show_sched_group(fal_object_t sched_group, uint32_t id, .value.objid = FAL_QOS_NULL_OBJECT_ID }, { .id = FAL_QOS_SCHED_GROUP_ATTR_VLAN_ID, .value.u16 = 0 }, + { .id = FAL_QOS_SCHED_GROUP_ATTR_LOCAL_PRIORITY_DESIGNATOR, + .value.u8 = 0 }, }; ret = fal_qos_get_sched_group_attrs(sched_group, ARRAY_SIZE(attr_list), @@ -639,6 +624,7 @@ void qos_hw_show_sched_group(fal_object_t sched_group, uint32_t id, ingress_map_id = attr_list[6].value.objid; egress_map_id = attr_list[7].value.objid; vlan = attr_list[8].value.u16; + lp_des = attr_list[9].value.u8; jsonw_name(wr, "sched-group"); jsonw_start_object(wr); @@ -651,13 +637,14 @@ void qos_hw_show_sched_group(fal_object_t sched_group, uint32_t id, jsonw_uint_field(wr, "max-children", max_children); jsonw_uint_field(wr, "current-children", child_count); - if (level == FAL_QOS_SCHED_GROUP_LEVEL_PIPE && - ingress_map_id != FAL_QOS_NULL_OBJECT_ID) - qos_hw_show_map(ingress_map_id, wr); + if (level == FAL_QOS_SCHED_GROUP_LEVEL_PIPE) { + jsonw_uint_field(wr, "local-priority-des", lp_des); + if (ingress_map_id != FAL_QOS_NULL_OBJECT_ID) + qos_hw_show_map(ingress_map_id, wr); - if (level == FAL_QOS_SCHED_GROUP_LEVEL_PIPE && - egress_map_id != FAL_QOS_NULL_OBJECT_ID) - qos_hw_show_map(egress_map_id, wr); + if (egress_map_id != FAL_QOS_NULL_OBJECT_ID) + qos_hw_show_map(egress_map_id, wr); + } /* * Don't show the vlan for subport 0 which corresponds to @@ -720,3 +707,8 @@ void qos_hw_dump_subport(json_writer_t *wr, const struct sched_info *qinfo, if (subport_sg) fal_qos_dump_sched_group(subport_sg, wr); } + +void qos_hw_dump_buf_errors(json_writer_t *wr) +{ + fal_qos_dump_buf_errors(wr); +} diff --git a/src/qos_obj_db.c b/src/qos_obj_db.c index 79295f72..ccd7028a 100644 --- a/src/qos_obj_db.c +++ b/src/qos_obj_db.c @@ -131,8 +131,8 @@ qos_obj_db_get_ids_string(enum qos_obj_db_level level, uint32_t *ids, if (max_len >= 0 && len >= 0) { ids_string[total_len - 1] = '\0'; return ids_string; - } else - return NULL; + } + return NULL; } static void @@ -260,7 +260,7 @@ void qos_obj_db_sw_get(struct qos_obj_db_obj *db_obj, enum qos_obj_sw_state *sw_state) { - assert(db_obj != NULL || sw_state != NULL); + assert(db_obj != NULL && sw_state != NULL); *sw_state = db_obj->sw_state; } diff --git a/src/qos_obj_db.h b/src/qos_obj_db.h index 4e7d2c4a..1b909a5f 100644 --- a/src/qos_obj_db.h +++ b/src/qos_obj_db.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ diff --git a/src/qos_public.h b/src/qos_public.h new file mode 100644 index 00000000..a3b2fb94 --- /dev/null +++ b/src/qos_public.h @@ -0,0 +1,12 @@ +/*- + * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef QOS_PUBLIC_H +#define QOS_PUBLIC_H + +extern void qos_sched_res_grp_update(char *grp); + +#endif /* QOS_PUBLIC_H */ diff --git a/src/qos_sched.c b/src/qos_sched.c index 1048f419..19e71ab5 100644 --- a/src/qos_sched.c +++ b/src/qos_sched.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2013-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -35,6 +35,7 @@ #include "compiler.h" #include "dp_event.h" #include "ether.h" +#include "fal.h" #include "if_var.h" #include "ip_funcs.h" #include "json_writer.h" @@ -51,16 +52,47 @@ #include "npf/rproc/npf_rproc.h" #include "npf/npf_rule_gen.h" #include "npf_shim.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "qos.h" +#include "qos_ext_buf_monitor.h" #include "qos_obj_db.h" +#include "qos_public.h" #include "urcu.h" #include "util.h" #include "vplane_debug.h" #include "vplane_log.h" +#define MAX_LINERATE (100000000000/8) /* 100Gbits converted to bytes */ + +/* + * Queue limit range from vyatta-policy-qos-groupings-v1: + * 9000..500000000 + */ +#define MAX_QUEUE_LIMIT_BYTES 500000000 +#define MIN_QUEUE_LIMIT_BYTES 9000 + +/* + * Threshold range from vyatta-policy-qos-groupings-v1: + * min_th = 64..499999998 + * max_th = 128..499999999 + */ +#define MAX_THRESHOLD_RANGE_MIN 128 +#define MAX_THRESHOLD_RANGE_MAX (MAX_QUEUE_LIMIT_BYTES - 1) +#define MIN_THRESHOLD_RANGE_MIN 64 +#define MIN_THRESHOLD_RANGE_MAX (MAX_THRESHOLD_RANGE_MAX - 1) + +static CDS_LIST_HEAD(qos_ingress_maps); +static CDS_LIST_HEAD(qos_egress_maps); + +struct qos_qinfo_list { + SLIST_HEAD(qinfo_head, sched_info) qinfo_head; +}; + +static struct qos_qinfo_list qos_qinfos; + struct qos_dev qos_devices[NUM_DEVS] = { - { qos_dpdk_disable, + { NULL, + qos_dpdk_disable, qos_dpdk_enable, qos_dpdk_start, qos_dpdk_stop, @@ -70,8 +102,10 @@ struct qos_dev qos_devices[NUM_DEVS] = { qos_dpdk_queue_read_stats, qos_dpdk_queue_clear_stats, qos_dpdk_dscp_resgrp_json, + qos_dpdk_check_rate, }, - { qos_hw_disable, + { qos_hw_init, + qos_hw_disable, qos_hw_enable, qos_hw_start, qos_hw_stop, @@ -81,9 +115,240 @@ struct qos_dev qos_devices[NUM_DEVS] = { qos_hw_queue_read_stats, qos_hw_queue_clear_stats, qos_hw_dscp_resgrp_json, + qos_hw_check_rate, } }; +struct qos_ingressm qos_ingressm = {0}; +struct qos_egressm qos_egressm = {0}; + +struct qos_ingress_map *qos_im_sysdef; + +/* Used for legacy configs */ +fal_object_t qos_global_map_obj = FAL_QOS_NULL_OBJECT_ID; + +static const char *qos_dps[NUM_DPS] = {"green", "yellow", "red"}; + +static inline void QOS_RM_GLOBAL_MAP(void) +{ + if (SLIST_EMPTY(&qos_qinfos.qinfo_head)) { + if (qos_global_map_obj) { + qos_hw_del_map(qos_global_map_obj); + qos_global_map_obj = FAL_QOS_NULL_OBJECT_ID; + } + } +} + +static void qos_sched_npf_commit(void) +{ + struct sched_info *qinfo; + unsigned int i; + + SLIST_FOREACH(qinfo, &qos_qinfos.qinfo_head, list) { + + for (i = 0; i < qinfo->port_params.n_pipe_profiles; i++) { + struct queue_map *qmap = &qinfo->queue_map[i]; + + qmap->reset_mask = 0; + } + + if (qinfo->reset_port != QOS_NPF_COMMIT) + continue; + + struct rte_eth_link link; + + QOS_STOP(qinfo)(qinfo->ifp, qinfo); + + /* + * If it exists, the global map obj must apply to all + * policy instances and all policy instances must have + * been affected by the resource group change we are + * responding to. So it is safe to delete it now + * and it will be reinstalled when the first policy is + * reinstalled. + */ + if (qos_global_map_obj) { + qos_hw_del_map(qos_global_map_obj); + qos_global_map_obj = FAL_QOS_NULL_OBJECT_ID; + } + + rte_eth_link_get_nowait(qinfo->ifp->if_port, &link); + if (link.link_status) { + int ret; + + ret = qos_sched_start(qinfo->ifp, link.link_speed); + if (ret != 0) + qinfo->enabled = false; + } + qinfo->reset_port = QOS_NPF_READY; + + DP_DEBUG(QOS, DEBUG, DATAPLANE, + "Port restart via npf res grp, link state %s\n", + (link.link_status) ? "up" : "down"); + } +} + +/* + * The mask passed in assigned dscp values to queues, the group used + * to setup the mask is being changed so reset the dscp values to their + * default queues. + */ +static void qos_dscp_reset_map(struct queue_map *qmap, uint64_t dscp_mask) +{ + unsigned int i; + uint64_t j; + + for (i = 0, j = 1; i < MAX_DSCP; i++, j <<= 1) { + if (j & dscp_mask) { + /* + * If the dscp value has already been reassigned to + * another queue in a previous resource update do not + * reset it to the default queue otherwise we'll be + * overwriting a previous classifier. + */ + if (j & qmap->reset_mask) + continue; + + qmap->dscp2q[i] = + (~i >> (DSCP_BITS - RTE_SCHED_TC_BITS)) + & RTE_SCHED_TC_MASK; + } + } +} + +/* + * This will assign new dscp to queue classification entries when a + * resource group is changed. + */ +static void qos_dscp_init_map(struct queue_map *qmap, uint64_t dscp_mask, + uint8_t q_class) +{ + uint64_t i, j; + + for (i = 0, j = 1; i < MAX_DSCP; i++, j <<= 1) { + if (dscp_mask & j) + qmap->dscp2q[i] = q_class; + } + + /* Keep track of the values we've already assigned */ + qmap->reset_mask |= dscp_mask; +} + +static int qos_sched_update_wred_prof(struct sched_info *qinfo, char *grp, + struct qos_red_pipe_params *wred, + uint64_t new_dscp_mask) +{ + int j; + + for (j = 0; j < wred->red_q_params.num_maps; j++) { + if (!strcmp(wred->red_q_params.grp_names[j], grp)) { + if (wred->red_q_params.dscp_set[j] == new_dscp_mask) + return -1; + wred->red_q_params.dscp_set[j] = new_dscp_mask; + qinfo->reset_port = QOS_NPF_COMMIT; + return 0; + } + } + return 0; +} + +static int qos_sched_update_map(struct sched_info *qinfo, char *grp, + struct queue_map *qmap, + uint64_t new_dscp_mask) +{ + unsigned int j; + struct qos_dscp_map *map; + + map = qmap->dscp_maps; + if (!map) + return 0; + + for (j = 0; j < map->num_maps; j++) { + if (!strcmp(grp, map->dscp_grp_names[j])) { + if (map->dscp_mask[j] == new_dscp_mask) + return -1; + qos_dscp_reset_map(qmap, map->dscp_mask[j]); + qos_dscp_init_map(qmap, new_dscp_mask, map->qmap[j]); + map->dscp_mask[j] = new_dscp_mask; + qinfo->reset_port = QOS_NPF_COMMIT; + return 0; + } + } + return 0; +} + +/* + * Search the installed policies to check if any are using the resource group + * which has been changed and update the dscp mask. + */ +void +qos_sched_res_grp_update(char *grp) +{ + struct sched_info *qinfo; + unsigned int i; + uint64_t new_dscp_mask; + int ret; + + ret = npf_dscp_group_getmask(grp, &new_dscp_mask); + if (ret) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Failed to retrieve resource group %s\n", grp); + return; + } + + DP_DEBUG(QOS, DEBUG, DATAPLANE, "Qos resource grp %s mask %"PRIx64"\n", + grp, new_dscp_mask); + + SLIST_FOREACH(qinfo, &qos_qinfos.qinfo_head, list) { + /* + * We're called at policy install which we want to ignore. + * Set the state to NPF_READY which means any indications + * after policy install we need to check the resource groups. + */ + if (qinfo->reset_port == QOS_INSTALL) { + qinfo->reset_port = QOS_NPF_READY; + continue; + } + + for (i = 0; i < qinfo->port_params.n_pipe_profiles; i++) { + struct qos_pipe_params *prof = + &qinfo->port_params.pipe_profiles[i]; + struct qos_red_pipe_params *wred; + + SLIST_FOREACH(wred, &prof->red_head, list) { + ret = qos_sched_update_wred_prof(qinfo, grp, + wred, + new_dscp_mask); + if (ret == -1) + return; + } + + struct queue_map *qmap = &qinfo->queue_map[i]; + + ret = qos_sched_update_map(qinfo, grp, qmap, + new_dscp_mask); + if (ret == -1) + return; + } + + for (i = 0; i < qinfo->n_subports; i++) { + struct subport_info *subport = &qinfo->subport[i]; + struct qos_mark_map *mark_map = subport->mark_map; + + if (!mark_map || SLIST_EMPTY(&mark_map->dscp_grps)) + continue; + + struct dscp_grp_list *dscp_grp; + SLIST_FOREACH(dscp_grp, &mark_map->dscp_grps, list) { + if (!strcmp(grp, dscp_grp->name)) { + qinfo->reset_port = QOS_NPF_COMMIT; + break; + } + } + } + } +} + /* * Carry out any one-time initialisation that required when the * vyatta-dataplane starts up. @@ -91,8 +356,126 @@ struct qos_dev qos_devices[NUM_DEVS] = { void qos_init(void) { + int i, ret; + if (rte_red_set_scaling(MAX_RED_QUEUE_LENGTH) != 0) rte_panic("Failed to set RED scaling\n"); + + qos_external_buf_monitor_init(); + SLIST_INIT(&qos_qinfos.qinfo_head); + + for (i = 0; i < NUM_DEVS; i++) { + if (qos_devices[i].qos_init) { + ret = (qos_devices[i].qos_init)(); + if (ret) + rte_panic("Failed to initialize dev %d\n", i); + } + } +} + +/* Create new QoS egress map object */ +struct egress_map_subport_info * +qos_egress_map_subport_new(struct ifnet *ifp, struct ifnet *parent_ifp, + bool is_sub_if) +{ + struct egress_map_subport_info *parent_egr_map = NULL; + struct egress_map_subport_info *egr_map_info = NULL; + struct egress_map_subport_info *temp_egr_map_info = NULL; + struct egress_map_subport_info *list_entry = NULL; + struct egress_map_subport_info *list_prev_entry = NULL; + bool new_list = false; + + if (is_sub_if && parent_ifp->egr_map_info) { + SLIST_FOREACH(list_entry, + &parent_ifp->egr_map_info->egr_map_head, + egr_map_list) { + if (list_entry->vlan_id < ifp->if_vlan) + list_prev_entry = list_entry; + else + break; + } + if (list_entry && list_entry->vlan_id == ifp->if_vlan) + return list_entry; + temp_egr_map_info = list_prev_entry; + } else if (!parent_ifp->egr_map_info) { + new_list = true; + /* Create an entry for parent first */ + parent_ifp->egr_map_info = calloc(1, sizeof( + struct egress_map_info)); + if (!parent_ifp->egr_map_info) { + RTE_LOG(ERR, DATAPLANE, + "Failed to allocate memory for egress map info\n"); + return NULL; + } + parent_egr_map = calloc(1, sizeof( + struct egress_map_subport_info)); + if (!parent_egr_map) { + RTE_LOG(ERR, DATAPLANE, + "Failed to allocate memory for egress map subport for parent\n"); + free(ifp->egr_map_info); + return NULL; + } + parent_egr_map->vlan_id = 0; + SLIST_INIT(&parent_ifp->egr_map_info->egr_map_head); + SLIST_INSERT_HEAD(&parent_ifp->egr_map_info->egr_map_head, + parent_egr_map, egr_map_list); + temp_egr_map_info = parent_egr_map; + } else { + temp_egr_map_info = SLIST_FIRST( + &parent_ifp->egr_map_info->egr_map_head); + } + + /* Create a entry for the sub-port now */ + if (ifp->if_type == IFT_L2VLAN && ifp->if_vlan != 0) { + egr_map_info = calloc(1, sizeof( + struct egress_map_subport_info)); + if (!egr_map_info) { + RTE_LOG(ERR, DATAPLANE, + "Failed to allocate memory for egress map subport\n"); + if (new_list) { + SLIST_REMOVE_HEAD( + &ifp->egr_map_info->egr_map_head, + egr_map_list); + free(parent_egr_map); + free(parent_ifp->egr_map_info); + parent_ifp->egr_map_info = NULL; + } + return NULL; + } + egr_map_info->vlan_id = ifp->if_vlan; + if (temp_egr_map_info) { + SLIST_INSERT_AFTER(temp_egr_map_info, + egr_map_info, egr_map_list); + temp_egr_map_info = egr_map_info; + } + } + return temp_egr_map_info; +} + +/* Get QoS egress map object for a given parent ifp and vlan */ +struct egress_map_subport_info * +qos_egress_map_subport_get(struct ifnet *parent_ifp, + int vlan_id) +{ + struct egress_map_subport_info *list_entry = NULL; + + if (!parent_ifp) + return NULL; + + if (!parent_ifp->egr_map_info) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "%s: parent egr_map_info is NULL\n", __func__); + return NULL; + } + + SLIST_FOREACH(list_entry, &parent_ifp->egr_map_info->egr_map_head, + egr_map_list) { + if (list_entry->vlan_id == vlan_id) + return list_entry; + if (list_entry->vlan_id > vlan_id) + break; + } + return NULL; } /* Sets the PCP value to map to the given queue for a particular profile. */ @@ -152,7 +535,7 @@ static int qos_sched_profile_dscp_map_set(struct sched_info *qinfo, } else qmap->dscp2q[dscp] = q; - if (qmap->dscp_enabled == 0) { + if (qmap->designation == 0 && qmap->dscp_enabled == 0) { DP_DEBUG(QOS, INFO, DATAPLANE, "DSCP map not enabled, enabling\n"); qmap->dscp_enabled = 1; @@ -161,33 +544,130 @@ static int qos_sched_profile_dscp_map_set(struct sched_info *qinfo, return 1; } +static int qos_sched_setup_dscp_map(struct sched_info *qinfo, + unsigned int profile, uint64_t dscp_mask, + char *name, uint8_t q) +{ + struct queue_map *qmap = &qinfo->queue_map[profile]; + struct qos_dscp_map *map = qmap->dscp_maps; + int i; + + if (!map) { + map = calloc(1, sizeof(struct qos_dscp_map)); + if (!map) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Queue dscp map allocation failure\n"); + return -1; + } + qmap->dscp_maps = map; + } else if (map->num_maps == QOS_MAX_DSCP_MAPS) { + DP_DEBUG(QOS, ERR, DATAPLANE, "Too many dscp maps\n"); + return -1; + } + + i = strlen(name) + 1; + map->dscp_grp_names[map->num_maps] = calloc(1, i); + if (!map->dscp_grp_names[map->num_maps]) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Failed to alloc dscp map\n"); + if (!map->num_maps) { + free(qmap->dscp_maps); + qmap->dscp_maps = NULL; + } + return -1; + } + + strcpy(map->dscp_grp_names[map->num_maps], name); + map->dscp_mask[map->num_maps] = dscp_mask; + map->qmap[map->num_maps] = q; + map->num_maps++; + + return 0; +} + /* * Returns the rate (bytes/sec) for the given bandwidth structure. If bandwidth * is given as a percentage, calculates the rate from the parent. Otherwise * returns the rate provided in the bandwidth structure. */ -static uint32_t qos_rate_get(struct qos_rate_info *bw_info, uint32_t parent_bw) +static uint64_t qos_rate_get(struct qos_rate_info *bw_info, uint64_t parent_bw, + struct sched_info *qinfo, bool limit) +{ + uint64_t rate; + + if (bw_info->bw_is_percent) { + const float precision = 0.0001; + float full_pct = bw_info->rate.bw_percent; + uint32_t whole_pct = (uint32_t)bw_info->rate.bw_percent; + + if (fabsf(full_pct - (float)whole_pct) < precision) + rate = (parent_bw * whole_pct) / 100; + else + rate = (uint64_t) (parent_bw * full_pct) / 100; + } else + rate = bw_info->rate.bandwidth; + + if (!limit) + return rate; + + rate = QOS_CHECK_RATE(qinfo)(rate, parent_bw); + + return rate; +} + +void qos_abs_rate_save(struct qos_rate_info *bw_info, uint64_t abs_bw) { - return bw_info->bw_is_percent ? - ((uint64_t)parent_bw * bw_info->rate.bw_percent) / 100 : - bw_info->rate.bandwidth; + bw_info->bw_is_percent = false; + bw_info->rate.bandwidth = abs_bw; +} + +static void qos_percent_rate_save(struct qos_rate_info *bw_info, + float percent_bw) +{ + bw_info->bw_is_percent = true; + bw_info->rate.bw_percent = percent_bw; } /* - * Sets the rate (bytes/sec) into the given bandwidth structure. Returns - * the calculated rate of the entity (see qos_rate_get for details) + * Returns the burst (bytes) for the given bandwidth structure. If burst + * is specified in msec, calculates the burst value based on the given + * rate (bytes/sec). */ -static uint32_t qos_rate_set(struct qos_rate_info *bw_info, - uint32_t bw, bool is_percent, uint32_t parent_bw) +static uint32_t qos_burst_get(struct qos_rate_info *bw_info, uint64_t rate) { - bw_info->bw_is_percent = is_percent; + #define DEFAULT_BURST_MS (4) - if (is_percent) - bw_info->rate.bw_percent = bw; - else - bw_info->rate.bandwidth = bw; + if (bw_info->burst_is_time) + return (rate * bw_info->burst.time_ms) / 1000; + + if (bw_info->burst.size) + return bw_info->burst.size; + + return (rate * DEFAULT_BURST_MS) / 1000; +} + +/* + * Sets the burst size (bytes) into the given bandwidth structure. Returns + * the burst size provided. + */ +static uint32_t qos_abs_burst_set(struct qos_rate_info *bw_info, + uint32_t burst) +{ + bw_info->burst_is_time = false; + bw_info->burst.size = burst; + return burst; +} - return qos_rate_get(bw_info, parent_bw); +/* + * Sets the burst time (msec) into the given bandwidth structure. Returns + * the calculated burst of the entity (see qos_burst_get for details) + */ +static uint32_t qos_time_burst_set(struct qos_rate_info *bw_info, + uint32_t burst, uint64_t rate) +{ + bw_info->burst_is_time = true; + bw_info->burst.time_ms = burst; + return qos_burst_get(bw_info, rate); } /* @@ -210,82 +690,290 @@ static uint32_t qos_period_set(struct qos_rate_info *bw_info, uint32_t period) return qos_period_get(bw_info, period); } -/* - * NB: Releasing of NPF resources needs done outside of RCU as a) - * the database of config is not designed for RCU and so will result - * in out-of-order events and b) the NPF running config does its own - * RCU actions, so should not be called from within an RCU callback. - */ -void qos_subport_npf_free(struct sched_info *qinfo) +static bool qos_qsize_type_get(const char *size_type_arg, + enum qos_queue_size_type *size_type) { - unsigned int i; - uint32_t j; + if (!strcmp(size_type_arg, "packets")) + *size_type = QOS_QUEUE_SIZE_PACKETS; + else if (!strcmp(size_type_arg, "bytes")) + *size_type = QOS_QUEUE_SIZE_BYTES; + else if (!strcmp(size_type_arg, "usec")) + *size_type = QOS_QUEUE_SIZE_USEC; + else { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Invalid queue size type field\n"); + return false; + } + return true; +} - for (i = 0, j = 0; - i < qinfo->port_params.n_subports_per_port; - j = 0, i++) { - struct subport_info *sinfo = qinfo->subport + i; - int ret_val; - struct mark_reqs *mark_list, *free_mark; +bool qos_wred_threshold_get(struct qos_red_params *wred_params, + uint64_t rate, uint32_t *wred_min_th, uint32_t *wred_max_th) +{ + if (wred_params->qsize_type == QOS_QUEUE_SIZE_BYTES || + wred_params->qsize_type == QOS_QUEUE_SIZE_PACKETS) { + *wred_min_th = wred_params->min_th; + *wred_max_th = wred_params->max_th; + return true; + } - while (j++ < sinfo->match_id) { - ret_val = npf_cfg_auto_attach_rule_delete( - NPF_RULE_CLASS_QOS, sinfo->attach_name, - j, NULL); - if (ret_val < 0) { - DP_DEBUG(QOS, ERR, DATAPLANE, - "Deleting match for class failed.\n"); + if (wred_params->qsize_type == QOS_QUEUE_SIZE_USEC && + rate != 0) { + uint64_t min_th = ((rate * wred_params->min_th) / USEC_PER_SEC); + uint64_t max_th = ((rate * wred_params->max_th) / USEC_PER_SEC); + + /* Range check after byte conversion. */ + if ((min_th != 0) && (max_th != 0)) { + /* Squash the values if out of range */ + if (min_th < MIN_THRESHOLD_RANGE_MIN) { + RTE_LOG(INFO, QOS, + "Rounding up min_th from " + "%"PRIu64" to %d\n", + min_th, MIN_THRESHOLD_RANGE_MIN); + min_th = MIN_THRESHOLD_RANGE_MIN; + } else if (min_th > MIN_THRESHOLD_RANGE_MAX) { + RTE_LOG(INFO, QOS, + "Rounding down min_th from " + "%"PRIu64" to %d\n", + min_th, MIN_THRESHOLD_RANGE_MAX); + min_th = MIN_THRESHOLD_RANGE_MAX; } + + if (max_th < MAX_THRESHOLD_RANGE_MIN) { + RTE_LOG(INFO, QOS, + "Rounding up max_th from " + "%"PRIu64" to %d\n", + max_th, MAX_THRESHOLD_RANGE_MIN); + max_th = MAX_THRESHOLD_RANGE_MIN; + } else if (max_th > MAX_THRESHOLD_RANGE_MAX) { + RTE_LOG(INFO, QOS, + "Rounding down max_th from " + "%"PRIu64" to %d\n", + max_th, MAX_THRESHOLD_RANGE_MAX); + max_th = MAX_THRESHOLD_RANGE_MAX; + } + *wred_min_th = (uint32_t)min_th; + *wred_max_th = (uint32_t)max_th; } - for (mark_list = sinfo->marks; mark_list; ) { - free_mark = mark_list; - mark_list = mark_list->next; - free(free_mark); - DP_DEBUG(QOS, DEBUG, DATAPLANE, - "freeing mark from subport %s\n", - sinfo->attach_name); - } - npf_attpt_item_set_down(NPF_ATTACH_TYPE_QOS, - sinfo->attach_name); + return true; } + + RTE_LOG(ERR, QOS, "Invalid threshold type\n"); + return false; } -static void qos_subport_free(struct sched_info *qinfo) +uint32_t qos_queue_size_get(uint32_t qsize, + enum qos_queue_size_type qsize_type, + uint64_t rate) { - unsigned int i; + if (qsize_type == QOS_QUEUE_SIZE_BYTES || + qsize_type == QOS_QUEUE_SIZE_PACKETS) + return qsize; + + if (qsize_type == QOS_QUEUE_SIZE_USEC) { + uint64_t queue_limit = ((rate * qsize) / USEC_PER_SEC); + + /* Queue limit range check after byte conversion. */ + if (queue_limit) { + if (queue_limit > MAX_QUEUE_LIMIT_BYTES) { + RTE_LOG(INFO, QOS, + "Rounding down queue limit from " + "%"PRIu64" to %d\n", + queue_limit, MAX_QUEUE_LIMIT_BYTES); + queue_limit = MAX_QUEUE_LIMIT_BYTES; + } else if (queue_limit < MIN_QUEUE_LIMIT_BYTES) { + RTE_LOG(INFO, QOS, + "Rounding up queue limit from " + "%"PRIu64" to %d\n", + queue_limit, MIN_QUEUE_LIMIT_BYTES); + queue_limit = MIN_QUEUE_LIMIT_BYTES; + } + } + return (uint32_t)queue_limit; + } - for (i = 0; i < qinfo->port_params.n_subports_per_port; i++) { - struct subport_info *sinfo = qinfo->subport + i; + DP_DEBUG(QOS, ERR, DATAPLANE, + "Invalid Queue size type.\n"); + return 0; +} - free(sinfo->profile_map); +uint32_t qos_sp_qsize_get(struct qos_port_params *pp, + struct subport_info *sinfo, int tc) +{ + uint32_t qsize = qos_queue_size_get(sinfo->qsize[tc], + sinfo->qsize_type, + sinfo->params.tb_rate); + + if (!qsize) { + /* + * If subports don't have Queue size defined, + * inherit their queue sizes from the port. + */ + qsize = qos_queue_size_get(pp->qsize[tc], + pp->qsize_type, + pp->rate); } - free(qinfo->subport); + + return qsize; } -/* Destroy QoS scheduler object */ -void qos_sched_free(struct sched_info *qinfo) +struct qos_red_pipe_params * +qos_red_find_q_params(struct qos_pipe_params *pipe, unsigned int qindex) +{ + struct qos_red_pipe_params *wred_params = NULL; + + SLIST_FOREACH(wred_params, &pipe->red_head, list) { + if (wred_params->qindex == qindex) + break; + } + return wred_params; +} + +static int +qos_red_init_q_params(struct qos_red_q_params *wred_params, + enum qos_queue_size_type qsize_type, unsigned int qmax, + unsigned int qmin, unsigned int prob, + bool wred_per_dscp, uint64_t dscp_set, + char *grp_name, uint8_t dp) +{ + int wred_index, ret; + + if (!wred_params || wred_params->num_maps > RTE_MAX_DSCP_MAPS) { + RTE_LOG(ERR, SCHED, "Invalid DSCP map init params\n"); + return -1; + } + + if (wred_per_dscp) + wred_index = wred_params->num_maps; + else + wred_index = dp; + wred_params->dps_in_use |= (1 << wred_index); + wred_params->qparams[wred_index].max_th = qmax; + wred_params->qparams[wred_index].min_th = qmin; + wred_params->qparams[wred_index].qsize_type = qsize_type; + wred_params->qparams[wred_index].maxp_inv = prob; + wred_params->dscp_set[wred_index] = dscp_set; + ret = asprintf(&wred_params->grp_names[wred_index], "%s", grp_name); + if (ret < 0) { + wred_params->grp_names[wred_index] = NULL; + return ret; + } + wred_params->num_maps++; + return 0; +} + +struct qos_red_pipe_params * +qos_red_alloc_q_params(struct qos_pipe_params *pipe, unsigned int qindex) +{ + struct qos_red_pipe_params *wred_params; + + wred_params = calloc(1, sizeof(struct qos_red_pipe_params)); + if (!wred_params) { + RTE_LOG(ERR, SCHED, "qred_info calloc failed\n"); + return NULL; + } + wred_params->qindex = qindex; + SLIST_INSERT_HEAD(&pipe->red_head, wred_params, list); + return wred_params; +} + +static void qos_free_q_params(struct qos_pipe_params *pipe, int i) +{ + struct qos_red_pipe_params *wred_params; + struct qos_red_q_params *qparams; + + while ((wred_params = SLIST_FIRST(&pipe->red_head)) != NULL) { + int j; + + SLIST_REMOVE_HEAD(&pipe->red_head, list); + DP_DEBUG(QOS, DEBUG, DATAPLANE, + "Freeing Q RED params qindex %u profile " + "%u pipe %p wred_params %p\n", + wred_params->qindex, i, pipe, wred_params); + qparams = &(wred_params->red_q_params); + for (j = 0; j < RTE_NUM_DSCP_MAPS; j++) { + if (qparams->grp_names[j]) + free(qparams->grp_names[j]); + } + free(wred_params); + } +} + +/* + * NB: Releasing of NPF resources needs done outside of RCU as a) + * the database of config is not designed for RCU and so will result + * in out-of-order events and b) the NPF running config does its own + * RCU actions, so should not be called from within an RCU callback. + */ +void qos_subport_npf_free(struct sched_info *qinfo) +{ + unsigned int i; + uint32_t j; + + if (!qinfo->subport) + return; + + for (i = 0, j = 0; + i < qinfo->port_params.n_subports_per_port; + j = 0, i++) { + struct subport_info *sinfo = qinfo->subport + i; + int ret_val; + struct mark_reqs *mark_list, *free_mark; + + while (j++ < sinfo->match_id) { + ret_val = npf_cfg_auto_attach_rule_delete( + NPF_RULE_CLASS_QOS, sinfo->attach_name, + j, NULL); + if (ret_val < 0) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Deleting match for class failed.\n"); + } + } + for (mark_list = sinfo->marks; mark_list; ) { + free_mark = mark_list; + mark_list = mark_list->next; + free(free_mark); + DP_DEBUG(QOS, DEBUG, DATAPLANE, + "freeing mark from subport %s\n", + sinfo->attach_name); + } + npf_attpt_item_set_down(NPF_ATTACH_TYPE_QOS, + sinfo->attach_name); + } +} + +static void qos_subport_free(struct sched_info *qinfo) +{ + unsigned int i; + + for (i = 0; i < qinfo->port_params.n_subports_per_port; i++) { + struct subport_info *sinfo = qinfo->subport + i; + + free(sinfo->profile_map); + } + free(qinfo->subport); +} + +/* Destroy QoS scheduler object */ +void qos_sched_free(struct sched_info *qinfo) { unsigned int i; - struct rte_sched_pipe_params *pp; + struct qos_pipe_params *pp; for (i = 0; i < qinfo->port_params.n_pipe_profiles; i++) { - struct profile_wred_info *profile_wred = - &qinfo->wred_profiles[i]; unsigned int j; + struct queue_map *qmap; pp = &qinfo->port_params.pipe_profiles[i]; - rte_red_free_q_params(pp, i); - for (j = 0; j < RTE_SCHED_QUEUES_PER_PIPE; j++) { - struct queue_wred_info *queue_wred = - &profile_wred->queue_wred[j]; - unsigned int k; - - for (k = 0; k < queue_wred->num_maps; k++) - free(queue_wred->dscp_grp_names[k]); + qos_free_q_params(pp, i); + qmap = &qinfo->queue_map[i]; + if (qmap && qmap->dscp_maps) { + for (j = 0; j < qmap->dscp_maps->num_maps; j++) + free(qmap->dscp_maps->dscp_grp_names[j]); + free(qmap->dscp_maps); } } - free(qinfo->wred_profiles); free(qinfo->port_params.pipe_profiles); free(qinfo->profile_rates); free(qinfo->profile_tc_rates); @@ -315,16 +1003,11 @@ struct sched_info *qos_sched_new(struct ifnet *ifp, { struct sched_info *qinfo; unsigned int i, j; - struct rte_sched_pipe_params *pipe_params; + struct qos_pipe_params *pipe_params; struct qos_rate_info *profile_rates; struct qos_tc_rate_info *profile_tc_rates; - int socketid = rte_eth_dev_socket_id(ifp->if_port); - char sched_name[32]; unsigned int queues; - if (socketid < 0) /* SOCKET_ID_ANY */ - socketid = 0; - qinfo = zmalloc_aligned(sizeof(struct sched_info)); if (!qinfo) goto nomem0; @@ -342,10 +1025,6 @@ struct sched_info *qos_sched_new(struct ifnet *ifp, if (!qinfo->subport) goto nomem1; - qinfo->wred_profiles = calloc(profiles, sizeof(*qinfo->wred_profiles)); - if (!qinfo->wred_profiles) - goto nomem1; - profile_rates = calloc(profiles, sizeof(struct qos_rate_info)); if (!profile_rates) goto nomem1; @@ -356,47 +1035,45 @@ struct sched_info *qos_sched_new(struct ifnet *ifp, goto nomem1; qinfo->profile_tc_rates = profile_tc_rates; - pipe_params = calloc(profiles, sizeof(struct rte_sched_pipe_params)); + pipe_params = calloc(profiles, sizeof(struct qos_pipe_params)); if (!pipe_params) goto nomem1; qinfo->port_params.pipe_profiles = pipe_params; - - /* XXX this is really unused by current DPDK code. */ - snprintf(sched_name, sizeof(sched_name), - "qos_port_%u", ifp->if_port); - qinfo->port_params.name = sched_name; - qinfo->enabled = false; - qinfo->port_params.socket = socketid; + qinfo->ifp = ifp; qinfo->port_params.frame_overhead = overhead; qinfo->port_params.n_subports_per_port = subports; qinfo->port_params.n_pipes_per_subport = pipes; qinfo->port_params.n_pipe_profiles = profiles; + qinfo->reset_port = QOS_INSTALL; rte_spinlock_init(&qinfo->stats_lock); + qinfo->port_params.qsize_type = QOS_QUEUE_SIZE_PACKETS; for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) qinfo->port_params.qsize[i] = DEFAULT_QSIZE; /* Default parms for pipes */ for (i = 0; i < profiles; i++) { - struct rte_sched_pipe_params *pp = &pipe_params[i]; + struct qos_pipe_params *pp = &pipe_params[i]; struct queue_map *qmap = &qinfo->queue_map[i]; - pp->tb_rate = qos_rate_set(&profile_rates[i], - UINT32_MAX, false, 0); - pp->tb_size = profile_rates[i].burst = DEFAULT_TBSIZE; - pp->tc_period = qos_period_set(&profile_rates[i], 10); + qos_abs_rate_save(&profile_rates[i], MAX_LINERATE); + pp->shaper.tb_rate = MAX_LINERATE; + pp->shaper.tb_size = qos_abs_burst_set(&profile_rates[i], + DEFAULT_TBSIZE); + pp->shaper.tc_period = qos_period_set(&profile_rates[i], + DEFAULT_PERIOD); #ifdef RTE_SCHED_SUBPORT_TC_OV - pp->tc_ov_weight = 0; + pp->shaper.tc_ov_weight = 0; #endif for (j = 0; j < RTE_SCHED_QUEUES_PER_PIPE; j++) pp->wrr_weights[j] = 1; for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) { - pp->tc_rate[j] = - qos_rate_set(&profile_tc_rates[i].tc_rate[j], - UINT32_MAX, false, 0); + pp->shaper.tc_rate[j] = MAX_LINERATE; + qos_abs_rate_save(&profile_tc_rates[i].tc_rate[j], + MAX_LINERATE); } qmap->dscp_enabled = 0; @@ -411,6 +1088,7 @@ struct sched_info *qos_sched_new(struct ifnet *ifp, & RTE_SCHED_TC_MASK; qmap->local_priority = 0; + qmap->designation = 0; /* * Set up the default pipe-queue to tc-n/wrr-0 qmap information @@ -419,7 +1097,7 @@ struct sched_info *qos_sched_new(struct ifnet *ifp, qmap->conf_ids[QMAP(j, 0)] = CONF_ID_Q_DEFAULT | (j * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS); - SLIST_INIT(&pp->qred_head); + SLIST_INIT(&pp->red_head); } for (i = 0; i < subports; i++) { @@ -441,21 +1119,26 @@ struct sched_info *qos_sched_new(struct ifnet *ifp, sp->profile_map = calloc(pipes, sizeof(uint8_t)); /* Default params */ - sp->params.tb_rate = qos_rate_set(&sp->subport_rate, UINT32_MAX, - false, 0); - sp->params.tb_size = sp->subport_rate.burst = DEFAULT_TBSIZE; - sp->params.tc_period = qos_period_set(&sp->subport_rate, 10); - + qos_abs_rate_save(&sp->subport_rate, MAX_LINERATE); + sp->params.tb_rate = MAX_LINERATE; + sp->params.tb_size = qos_abs_burst_set(&sp->subport_rate, + DEFAULT_TBSIZE); + sp->params.tc_period = qos_period_set(&sp->subport_rate, + DEFAULT_PERIOD); + + sp->qsize_type = QOS_QUEUE_SIZE_PACKETS; for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) { - sp->params.tc_rate[j] = - qos_rate_set(&sp->sp_tc_rates.tc_rate[j], - UINT32_MAX, false, 0); + qos_abs_rate_save(&sp->sp_tc_rates.tc_rate[j], + MAX_LINERATE); + sp->params.tc_rate[j] = MAX_LINERATE; sp->qsize[j] = 0; // Default to inherit from port } } DP_DEBUG(QOS, DEBUG, DATAPLANE, - "New Qos configuration %s\n", qinfo->port_params.name); + "New Qos configuration qos_port_%u\n", ifp->if_port); + + SLIST_INSERT_HEAD(&qos_qinfos.qinfo_head, qinfo, list); return qinfo; @@ -469,37 +1152,41 @@ struct sched_info *qos_sched_new(struct ifnet *ifp, /* Ensure the parameters are within acceptable bounds */ void qos_sched_subport_params_check( - struct rte_sched_subport_params *params, + struct qos_shaper_conf *params, struct qos_rate_info *config_rate, struct qos_rate_info *config_tc_rate, - uint16_t max_pkt_len, uint32_t bps) + uint16_t max_pkt_len, uint32_t max_burst_size, uint64_t bps, + struct sched_info *qinfo) { - uint32_t min_rate = (max_pkt_len * 1000) / params->tc_period; + uint32_t min_rate = (max_pkt_len * 1000 * 1000) / params->tc_period; uint32_t tc_period = 0, period = 0; unsigned int i; - params->tb_rate = qos_rate_get(config_rate, bps); + params->tb_rate = qos_rate_get(config_rate, bps, qinfo, true); /* squash rate down to actual line rate */ if (params->tb_rate > bps) params->tb_rate = bps; - params->tb_size = config_rate->burst; + params->tb_size = qos_burst_get(config_rate, params->tb_rate); if (params->tb_size < max_pkt_len) params->tb_size = max_pkt_len; + if (params->tb_size > max_burst_size) + params->tb_size = max_burst_size; + period = params->tc_period; for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { params->tc_rate[i] = qos_rate_get(&config_tc_rate[i], - params->tb_rate); - if (params->tc_rate[i] > bps) - params->tc_rate[i] = bps; + params->tb_rate, qinfo, + false); if (params->tc_rate[i] > params->tb_rate) params->tc_rate[i] = params->tb_rate; if (params->tc_rate[i] < min_rate) { - tc_period = (max_pkt_len * 1000) / params->tc_rate[i]; + tc_period = (max_pkt_len * 1000 * 1000) / + params->tc_rate[i]; /* account for rounding, ensure non-zero */ tc_period++; if (tc_period > period) @@ -511,12 +1198,12 @@ void qos_sched_subport_params_check( } /* Allocate and initialize a handle to QoS scheduler. - * Only called by master thread. + * Only called by main thread. */ int qos_sched_start(struct ifnet *ifp, uint64_t speed) { struct sched_info *qinfo = ifp->if_qos; - uint32_t bps; + uint64_t bps; uint16_t max_pkt_len; /* NB if_mtu_adjusted allows for any QinQ vlan headers @@ -538,7 +1225,7 @@ int qos_sched_start(struct ifnet *ifp, uint64_t speed) bps = (speed * 1000 * 1000) / 8; /* bytes/sec */ DP_DEBUG(QOS, INFO, DATAPLANE, - "Qos start %s rate = %"PRIu32" bytes/sec\n", + "Qos start %s rate = %"PRIu64" bytes/sec\n", ifp->if_name, bps); qinfo->port_params.mtu = ifp->if_mtu_adjusted; @@ -551,11 +1238,13 @@ int qos_sched_start(struct ifnet *ifp, uint64_t speed) return -1; } + qinfo->reset_port = QOS_NPF_READY; + return 0; } /* Cleanup scheduler when link goes down - * Use RCU to set the pointer because destroyed by master thread + * Use RCU to set the pointer because destroyed by main thread * but referenced by Tx thread */ void qos_sched_stop(struct ifnet *ifp) @@ -624,7 +1313,7 @@ static void qos_show_pipe_config(json_writer_t *wr, unsigned int subport, unsigned int pipe) { const struct subport_info *sinfo = &qinfo->subport[subport]; - struct rte_sched_pipe_params *p = + struct qos_pipe_params *p = qinfo->port_params.pipe_profiles + sinfo->profile_map[pipe]; unsigned int i; @@ -632,20 +1321,23 @@ static void qos_show_pipe_config(json_writer_t *wr, jsonw_start_object(wr); jsonw_name(wr, "tb_rate"); - jsonw_uint(wr, p->tb_rate); + jsonw_uint(wr, p->shaper.tb_rate); jsonw_name(wr, "tb_size"); - jsonw_uint(wr, p->tb_size); + jsonw_uint(wr, p->shaper.tb_size); jsonw_name(wr, "tc_rates"); jsonw_start_array(wr); for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) - jsonw_uint(wr, p->tc_rate[i]); + jsonw_uint(wr, p->shaper.tc_rate[i]); jsonw_end_array(wr); jsonw_name(wr, "tc_period"); - jsonw_uint(wr, p->tc_period); + jsonw_uint(wr, (p->shaper.tc_period / 1000)); + + jsonw_name(wr, "tc_period_us"); + jsonw_uint(wr, p->shaper.tc_period); jsonw_name(wr, "wrr_weights"); jsonw_start_array(wr); @@ -672,7 +1364,7 @@ static void qos_show_map(json_writer_t *wr, const struct sched_info *qinfo, * We only ever use the PCP map if it has been explicitly enabled. * See qos_npf_classify. */ - if (!qmap->pcp_enabled || !optimised_json) { + if (qmap->dscp_enabled || !optimised_json) { jsonw_name(wr, "dscp2q"); jsonw_start_array(wr); for (i = 0; i < MAX_DSCP; i++) @@ -687,6 +1379,16 @@ static void qos_show_map(json_writer_t *wr, const struct sched_info *qinfo, jsonw_uint(wr, qmap->pcp2q[i]); jsonw_end_array(wr); } + if (qmap->designation || !optimised_json) { + struct qos_pipe_params *params = + &qinfo->port_params.pipe_profiles[profile]; + + jsonw_name(wr, "designation"); + jsonw_start_array(wr); + for (i = 0; i < INGRESS_DESIGNATORS; i++) + jsonw_uint(wr, params->designation[i]); + jsonw_end_array(wr); + } } uint32_t qos_sched_calc_qindex(struct sched_info *qinfo, unsigned int subport, @@ -888,18 +1590,162 @@ static void qos_show_ifp_platform(json_writer_t *wr, jsonw_end_array(wr); /* subports */ } +struct qos_sched_map_info { + uint16_t vlan; + fal_object_t qos_map; +}; + static void show_ifp_qos(struct ifnet *ifp, void *arg) { struct qos_show_context *context = arg; json_writer_t *wr = context->wr; struct sched_info *qinfo = ifp->if_qos; - unsigned int i; + unsigned int i, num_maps = 0; + struct cds_lfht_iter iter; + struct if_vlan_feat *vlan_feat; + struct fal_attribute_t qos_map_attr; + struct qos_sched_map_info ingress_maps[VLAN_N_VID]; + bool is_ingress_map_exist = false; + struct qos_sched_map_info egress_maps[VLAN_N_VID]; + int rv; - if (qinfo == NULL) - return; + if (context->is_platform) { + qos_map_attr.id = FAL_PORT_ATTR_QOS_INGRESS_MAP_ID; + rv = fal_l2_get_attrs(ifp->if_index, 1, &qos_map_attr); - jsonw_name(wr, ifp->if_name); - jsonw_start_object(wr); + if (rv != -ENOENT && qos_map_attr.value.objid) { + ingress_maps[0].qos_map = qos_map_attr.value.objid; + ingress_maps[0].vlan = 0; + num_maps++; + } + + if (ifp->vlan_feat_table) { + cds_lfht_for_each_entry(ifp->vlan_feat_table, &iter, + vlan_feat, vlan_feat_node) { + struct fal_attribute_t qos_map_attr; + + qos_map_attr.id = + FAL_VLAN_FEATURE_ATTR_QOS_INGRESS_MAP_ID; + fal_vlan_feature_get_attr( + vlan_feat->fal_vlan_feat, 1, + &qos_map_attr); + if (qos_map_attr.value.objid) { + ingress_maps[num_maps].qos_map = + qos_map_attr.value.objid; + ingress_maps[num_maps].vlan = + vlan_feat->vlan; + num_maps++; + } + } + } + + if (!context->sent_sysdef_map) { + if (qos_im_sysdef) { + jsonw_name(wr, "sysdef-map"); + jsonw_start_object(wr); + jsonw_uint_field(wr, "vlan", VLAN_N_VID); + fal_qos_dump_map(qos_im_sysdef->map_obj, wr); + jsonw_end_object(wr); + context->sent_sysdef_map = true; + } + } + } + + if (qinfo != NULL || num_maps != 0) { + jsonw_name(wr, ifp->if_name); + jsonw_start_object(wr); + is_ingress_map_exist = true; + } + + if (context->is_platform && num_maps) { + jsonw_name(wr, "ingress-maps"); + jsonw_start_array(wr); + for (i = 0; i < num_maps; i++) { + jsonw_start_object(wr); + jsonw_uint_field(wr, "vlan", ingress_maps[i].vlan); + fal_qos_dump_map(ingress_maps[i].qos_map, wr); + jsonw_end_object(wr); + } + jsonw_end_array(wr); /* ingress maps */ + + } + + num_maps = 0; + if (context->is_platform) { + qos_map_attr.id = FAL_ROUTER_INTERFACE_ATTR_EGRESS_QOS_MAP; + rv = if_get_l3_intf_attr(ifp, 1, &qos_map_attr); + + if (rv != -EOPNOTSUPP && qos_map_attr.value.objid) { + egress_maps[num_maps].qos_map = + qos_map_attr.value.objid; + egress_maps[num_maps].vlan = ifp->if_vlan; + num_maps++; + } + + if (num_maps == 0) { + qos_map_attr.id = FAL_PORT_ATTR_QOS_EGRESS_MAP_ID; + rv = fal_l2_get_attrs(ifp->if_index, 1, &qos_map_attr); + + if (rv != -ENOENT && qos_map_attr.value.objid) { + egress_maps[0].qos_map = + qos_map_attr.value.objid; + egress_maps[0].vlan = 0; + num_maps++; + } + + if (ifp->vlan_feat_table) { + cds_lfht_for_each_entry(ifp->vlan_feat_table, + &iter, vlan_feat, vlan_feat_node) { + struct fal_attribute_t qos_map_attr; + + qos_map_attr.id = + FAL_VLAN_FEATURE_ATTR_QOS_EGRESS_MAP_ID; + fal_vlan_feature_get_attr( + vlan_feat->fal_vlan_feat, 1, + &qos_map_attr); + if (qos_map_attr.value.objid) { + egress_maps[num_maps].qos_map = + qos_map_attr.value.objid; + egress_maps[num_maps].vlan = + vlan_feat->vlan; + num_maps++; + } + } + } + } + + if (qinfo == NULL && num_maps == 0) { + if (!is_ingress_map_exist) + return; + jsonw_end_object(wr); /* ifname */ + return; + } + + if (num_maps) { + if (!is_ingress_map_exist) { + jsonw_name(wr, ifp->if_name); + jsonw_start_object(wr); + } + jsonw_name(wr, "egress-maps"); + jsonw_start_array(wr); + for (i = 0; i < num_maps; i++) { + jsonw_start_object(wr); + jsonw_uint_field(wr, "vlan", egress_maps[ + i].vlan); + fal_qos_dump_map(egress_maps[i].qos_map, wr); + jsonw_end_object(wr); + } + jsonw_end_array(wr); /* egress maps */ + + if (qinfo == NULL) { + jsonw_end_object(wr); /* ifname */ + return; + } + } + } + + if ((qinfo == NULL) && (num_maps == 0)) + return; /* Put "shaper" tag on to allow for future alternates */ jsonw_name(wr, "shaper"); @@ -963,43 +1809,26 @@ static struct qos_mark_map *qos_mark_map_find(char *map_name) { struct qos_mark_map *mark_map; - cds_list_for_each_entry(mark_map, &qos_mark_map_list_head, list) { + cds_list_for_each_entry_rcu(mark_map, &qos_mark_map_list_head, list) { if (strcmp(mark_map->map_name, map_name) == 0) return mark_map; } return NULL; } -static int qos_mark_map_store(char *map_name, uint64_t dscp_set, - uint8_t pcp_value) -{ - struct qos_mark_map *mark_map; - uint8_t dscp; - - mark_map = qos_mark_map_find(map_name); - if (!mark_map) { - /* Allocate enough memory for the mark_map and its name */ - mark_map = calloc(1, sizeof(*mark_map) + strlen(map_name) + 1); - if (!mark_map) { - DP_DEBUG(QOS, ERR, DATAPLANE, - "no memory for mark-map\n"); - return -ENOMEM; - } - strcpy(mark_map->map_name, map_name); - cds_list_add_tail_rcu(&mark_map->list, - &qos_mark_map_list_head); - } - for (dscp = 0; dscp < MAX_DSCP; dscp++) { - if (dscp_set & (1ul << dscp)) - mark_map->pcp_value[dscp] = pcp_value; - } - return 0; -} - static void qos_mark_map_delete_rcu(struct rcu_head *head) { struct qos_mark_map *mark_map = caa_container_of(head, struct qos_mark_map, obj_rcu); + struct dscp_grp_list *dscp_grp; + + if (mark_map->mark_obj) + qos_hw_del_map(mark_map->mark_obj); + + while ((dscp_grp = SLIST_FIRST(&mark_map->dscp_grps)) != NULL) { + SLIST_REMOVE_HEAD(&mark_map->dscp_grps, list); + free(dscp_grp); + } free(mark_map); } @@ -1020,21 +1849,88 @@ static int qos_mark_map_delete(char *map_name) return 0; } +static int qos_mark_map_store(char *map_name, enum egress_map_type type, + uint64_t dscp_set, char *grp_name, + uint8_t designation, + enum fal_packet_colour color, + uint8_t remark_value) +{ + struct qos_mark_map *mark_map; + uint8_t dscp; + + mark_map = qos_mark_map_find(map_name); + if (!mark_map) { + /* Allocate enough memory for the mark_map and its name */ + mark_map = calloc(1, sizeof(*mark_map) + strlen(map_name) + 1); + if (!mark_map) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "no memory for mark-map\n"); + return -ENOMEM; + } + strcpy(mark_map->map_name, map_name); + cds_list_add_tail_rcu(&mark_map->list, + &qos_mark_map_list_head); + mark_map->type = type; + SLIST_INIT(&mark_map->dscp_grps); + } else if (mark_map->type != type) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Invalid mark-map type, types must be the same\n"); + return -EINVAL; + } + + if (type == EGRESS_DSCP) { + for (dscp = 0; dscp < MAX_DSCP; dscp++) { + if (dscp_set & (1ul << dscp)) + mark_map->pcp_value[dscp] = remark_value; + } + struct dscp_grp_list *dscp_grp; + dscp_grp = calloc(1, sizeof(*dscp_grp) + strlen(grp_name) + 1); + if (!dscp_grp) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Failed to allocate mark-map\n"); + qos_mark_map_delete(mark_map->map_name); + return -ENOMEM; + } + strcpy(dscp_grp->name, grp_name); + dscp_grp->pcp_val = remark_value; + SLIST_INSERT_HEAD(&mark_map->dscp_grps, dscp_grp, list); + } else { + int index = designation * FAL_NUM_PACKET_COLOURS + color; + struct qos_mark_map_entry *entry = + &mark_map->entries[index]; + + entry->des = designation; + entry->color = color; + entry->pcp_value = remark_value; + } + return 0; +} + static void show_qos_mark_map(struct qos_show_context *context) { json_writer_t *wr = context->wr; struct qos_mark_map *mark_map; - uint32_t i; + uint32_t i, num; jsonw_name(wr, "mark-maps"); jsonw_start_array(wr); - cds_list_for_each_entry(mark_map, &qos_mark_map_list_head, list) { + cds_list_for_each_entry_rcu(mark_map, &qos_mark_map_list_head, list) { jsonw_start_object(wr); jsonw_string_field(wr, "map-name", mark_map->map_name); + if (mark_map->type == EGRESS_DSCP) { + jsonw_string_field(wr, "map-type", "dscp"); + num = MAX_DSCP; + } else { + jsonw_string_field(wr, "map-type", "designation"); + num = FAL_QOS_MAP_DES_DP_VALUES; + } jsonw_name(wr, "pcp-values"); jsonw_start_array(wr); - for (i = 0; i < MAX_DSCP; i++) - jsonw_uint(wr, mark_map->pcp_value[i]); + for (i = 0; i < num; i++) + if (mark_map->type == EGRESS_DSCP) + jsonw_uint(wr, mark_map->pcp_value[i]); + else + jsonw_uint(wr, mark_map->entries[i].pcp_value); jsonw_end_array(wr); jsonw_end_object(wr); @@ -1042,45 +1938,390 @@ static void show_qos_mark_map(struct qos_show_context *context) jsonw_end_array(wr); } -/* Handle: "qos show [interface]" - * Output is in JSON - */ -static int cmd_qos_show(FILE *f, int argc, char **argv) +static void show_qos_buf_threshold( + struct qos_show_context *context) { - struct qos_show_context context; + json_writer_t *wr = context->wr; + char str[40]; + uint32_t threshold = 0; - if (argc >= 2 && !strcmp(argv[1], "platform")) { - context.is_platform = true; - argc--; - argv++; - } else - context.is_platform = false; + if (!qos_ext_buf_get_threshold(&threshold)) + sprintf(str, "Not configured yet"); + else + sprintf(str, "%d%%", threshold); - context.wr = jsonw_new(f); - if (!context.wr) - return -1; + jsonw_name(wr, "buf-threshold"); + jsonw_start_object(wr); + jsonw_string_field(wr, "threshold", str); + jsonw_end_object(wr); +} - jsonw_pretty(context.wr, true); +static void show_qos_buf_utilization( + struct qos_show_context *context) +{ + json_writer_t *wr = context->wr; + struct qos_external_buffer_congest_stats buf_stats; + struct qos_external_buffer_sample *samples = 0; + enum qos_ext_buf_evt_notify_mode n_mode = 0; - context.optimised_json = false; - if (argc == 1) - ifnet_walk(show_ifp_qos, &context); - else { - if (!strcmp(argv[1], "action-groups")) { - ifnet_walk(show_ifp_qos_act_grps, &context); - } else if (strcmp(argv[1], "mark-maps") == 0) { - show_qos_mark_map(&context); - } else { - while (--argc > 0) { - struct ifnet *ifp = ifnet_byifname(*++argv); + if (!qos_ext_buf_get_stats(&buf_stats)) { + DP_DEBUG(QOS, DEBUG, DATAPLANE, + "failed to get buffer-utilization\n"); + return; + } - if (!ifp) { - fprintf(f, "Unknown interface: %s\n", - *argv); - jsonw_destroy(&context.wr); - return -1; - } - show_ifp_qos(ifp, &context); + samples = buf_stats.buf_samples; + n_mode = buf_stats.cur_state.period_data.notify_mode; + + jsonw_name(wr, "ext-buf-stats"); + jsonw_start_object(wr); + jsonw_uint_field(wr, "total-buf-units", buf_stats.max_buf_desc); + jsonw_uint_field(wr, "total-rejected-packets", + buf_stats.rejected_pkt_cnt); + + if (n_mode == EXT_BUF_EVT_NOTIFY_MODE_MINUTE) + jsonw_string_field(wr, "mode", "1-minute"); + else if (n_mode == EXT_BUF_EVT_NOTIFY_MODE_TEN_SEC) + jsonw_string_field(wr, "mode", "10-seconds"); + else if (n_mode == EXT_BUF_EVT_NOTIFY_MODE_HOUR) { + if (buf_stats.cur_state.period_data.bad_sample_in_period) + jsonw_string_field(wr, "mode", + "1-hour (with pending SNMP notification)"); + else + jsonw_string_field(wr, "mode", "1-hour"); + } + + jsonw_name(wr, "latest-samples"); + jsonw_start_array(wr); + for (int i = 0; i < EXT_BUF_STATUS_STATS_CNT; i++) { + /* show latest sample at first */ + int idx = (buf_stats.cur_sample_idx - i + + EXT_BUF_STATUS_STATS_CNT) % EXT_BUF_STATUS_STATS_CNT; + jsonw_start_object(wr); + jsonw_uint_field(wr, "free", samples[idx].ext_buf_free); + jsonw_uint_field(wr, "used", buf_stats.max_buf_desc - + samples[idx].ext_buf_free); + jsonw_uint_field(wr, "uti-rate", + samples[idx].utilization_rate); + jsonw_uint_field(wr, "rejected", + samples[idx].ext_buf_pkt_reject); + jsonw_end_object(wr); + } + jsonw_end_array(wr); + + jsonw_end_object(wr); +} + +static void show_qos_ingress_map(struct qos_show_context *context, + struct qos_ingress_map *map) +{ + json_writer_t *wr = context->wr; + int i, j; + + jsonw_start_object(wr); + jsonw_string_field(wr, "name", map->name); + jsonw_string_field(wr, "type", + (map->type == INGRESS_DSCP) ? "dscp" : "pcp"); + jsonw_bool_field(wr, "system-default", map->sysdef); + jsonw_name(wr, "map"); + jsonw_start_array(wr); + for (i = 0; i < INGRESS_DESIGNATORS; i++) { + if (!map->designation[i].dps_in_use) + continue; + jsonw_start_object(wr); + jsonw_uint_field(wr, "designation", i); + jsonw_name(wr, "DPs"); + jsonw_start_array(wr); + for (j = 0; j < NUM_DPS; j++) { + if (!(map->designation[i].dps_in_use & (1 << j))) + continue; + jsonw_start_object(wr); + jsonw_uint_field(wr, "DP", j); + jsonw_uint_field(wr, "pcp/mask", + map->designation[i].mask[j]); + jsonw_end_object(wr); + } + jsonw_end_array(wr); + jsonw_end_object(wr); + } + jsonw_end_array(wr); + jsonw_end_object(wr); +} + +static struct qos_ingress_map *qos_lookup_map_byobj(fal_object_t objid) +{ + struct qos_ingress_map *map; + + cds_list_for_each_entry_rcu(map, &qos_ingress_maps, list) + if (map->map_obj == objid) + return map; + + return NULL; +} + +static struct qos_mark_map *qos_lookup_egress_map_byobj(fal_object_t objid) +{ + struct qos_mark_map *map; + + cds_list_for_each_entry_rcu(map, &qos_egress_maps, list) + if (map->mark_obj == objid) + return map; + + return NULL; +} + +static void show_qos_ingress_maps(struct qos_show_context *context, + struct ifnet *ifp, unsigned int vlan) +{ + json_writer_t *wr = context->wr; + fal_object_t objid; + struct qos_ingress_map *map; + + if (ifp) { + objid = qos_hw_get_att_ingress_map(ifp, vlan); + if (!objid) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "No ingress-map created\n"); + return; + } + map = qos_lookup_map_byobj(objid); + if (!map) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "No ingress-map matching obj %lu\n", objid); + return; + } + jsonw_name(wr, "ingress-maps"); + jsonw_start_array(wr); + + show_qos_ingress_map(context, map); + + jsonw_end_array(wr); + return; + } + + /* + * Let's see what map type we have if any + * The first if is the new api where we separate the classfication + * into ingress maps separate from the policy. + * The second is a legacy config where the classification is still + * part of the policy. + */ + if (!cds_list_empty(&qos_ingress_maps)) { + jsonw_name(wr, "ingress-maps"); + jsonw_start_array(wr); + + cds_list_for_each_entry_rcu(map, &qos_ingress_maps, list) + show_qos_ingress_map(context, map); + + jsonw_end_array(wr); + } else if (!SLIST_EMPTY(&qos_qinfos.qinfo_head)) { + struct sched_info *qinfo; + struct queue_map *qmap; + + /* + * We only support a single profile on this platform so + * the qmap will always be index 0 + */ + qinfo = SLIST_FIRST(&qos_qinfos.qinfo_head); + qmap = &qinfo->queue_map[0]; + if (!qmap || !qmap->dscp_enabled) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Invalid map type configuration\n"); + return; + } + qos_hw_show_legacy_map(qmap, wr); + } +} + +static void show_qos_egress_map(struct qos_show_context *context, + struct qos_mark_map *map) +{ + json_writer_t *wr = context->wr; + int i; + + jsonw_start_object(wr); + jsonw_string_field(wr, "name", map->map_name); + jsonw_string_field(wr, "type", + (map->type == EGRESS_DSCPGRP_DSCP) ? + "dscp" : "pcp"); + jsonw_name(wr, "map"); + jsonw_start_array(wr); + for (i = 0; i < MAX_DSCP; i++) { + jsonw_start_object(wr); + jsonw_uint_field(wr, "indscp", i); + jsonw_uint_field(wr, "value", map->pcp_value[i]); + jsonw_end_object(wr); + } + jsonw_end_array(wr); + jsonw_end_object(wr); +} + +static void show_qos_egress_maps(struct qos_show_context *context, + struct ifnet *ifp, unsigned int vlan) +{ + json_writer_t *wr = context->wr; + fal_object_t objid; + struct qos_mark_map *map; + + if (ifp) { + if ((ifp->if_type == IFT_BRIDGE) || + (ifp->if_type == IFT_L2VLAN)) { + objid = ifp->egr_map_obj; + } else { + objid = qos_hw_get_att_egress_map(ifp, vlan); + if (!objid) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "No egress-map created\n"); + return; + } + } + map = qos_lookup_egress_map_byobj(objid); + if (!map) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "No egress-map matching obj %lu\n", objid); + return; + } + jsonw_name(wr, "egress-maps"); + jsonw_start_array(wr); + + show_qos_egress_map(context, map); + + jsonw_end_array(wr); + return; + } + + if (!cds_list_empty(&qos_egress_maps)) { + jsonw_name(wr, "egress-maps"); + jsonw_start_array(wr); + + cds_list_for_each_entry_rcu(map, &qos_egress_maps, list) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "egress-map %s\n", map->map_name); + show_qos_egress_map(context, map); + } + + jsonw_end_array(wr); + } +} + +/* Handle: "qos show [interface]" + * "qos show platform" + * "qos show platform buf-threshold" + * "qos show platform buf-utilization" + * "qos show ingress-maps" + * "qos show [interface] ingress-map" + * "qos show policers [interface]" + * "qos show egress-maps" + * "qos show [interface] egress-map" + * Output is in JSON + */ +static int cmd_qos_show(FILE *f, int argc, char **argv) +{ + struct qos_show_context context; + + if (argc >= 2 && !strcmp(argv[1], "platform")) { + context.is_platform = true; + context.sent_sysdef_map = false; + argc--; + argv++; + } else + context.is_platform = false; + + context.wr = jsonw_new(f); + if (!context.wr) + return -1; + + jsonw_pretty(context.wr, true); + + context.optimised_json = false; + if (argc == 1) + dp_ifnet_walk(show_ifp_qos, &context); + else { + if (!strcmp(argv[1], "action-groups")) { + dp_ifnet_walk(show_ifp_qos_act_grps, &context); + } else if (strcmp(argv[1], "mark-maps") == 0) { + show_qos_mark_map(&context); + } else if (strcmp(argv[1], "buf-threshold") == 0) { + show_qos_buf_threshold(&context); + } else if (strcmp(argv[1], "buf-utilization") == 0) { + show_qos_buf_utilization(&context); + } else if (strcmp(argv[1], "ingress-maps") == 0) { + struct ifnet *ifp = NULL; + unsigned int vlan = 0; + + if (argc == 5) { + if ((strcmp("vlan", argv[2]) != 0) || + get_unsigned(argv[3], &vlan) < 0) { + fprintf(f, + "Invalid syntax interface\n"); + return -1; + } + ifp = dp_ifnet_byifname(argv[4]); + if (!ifp) { + fprintf(f, "Unknown interface: %s\n", + *argv); + return -1; + } + } + show_qos_ingress_maps(&context, ifp, vlan); + } else if (strcmp(argv[1], "egress-maps") == 0) { + struct ifnet *ifp = NULL; + unsigned int vlan = 0; + + if (argc == 5) { + if ((strcmp("vlan", argv[2]) != 0) || + get_unsigned(argv[3], &vlan) < 0) { + fprintf(f, + "Invalid syntax interface\n"); + return -1; + } + ifp = dp_ifnet_byifname(argv[4]); + if (!ifp) { + fprintf(f, "Unknown interface: %s\n", + *argv); + return -1; + } + } + show_qos_egress_maps(&context, ifp, vlan); + } else if (argc > 2 && !strcmp(argv[1], "policers")) { + argv += 2; + + struct ifnet *ifp = dp_ifnet_byifname(*argv); + if (!ifp) { + fprintf(f, "Unknown interface: %s\n", *argv); + return -1; + } + struct sched_info *qinfo = ifp->if_qos; + if (!qinfo) { + fprintf(f, "No qos on interface: %s\n", *argv); + return -1; + } + struct subport_info *sport; + unsigned int i; + for (i = 0; i < qinfo->port_params.n_subports_per_port; + i++) { + struct npf_act_grp *act_grp; + sport = &qinfo->subport[i]; + act_grp = sport->act_grp_list; + if (!act_grp) + continue; + + npf_action_group_show_policer(act_grp, + &context); + } + } else if (argc == 2 && !strcmp(argv[1], "buffer-errors")) { + qos_hw_dump_buf_errors(context.wr); + } else { + while (--argc > 0) { + struct ifnet *ifp = dp_ifnet_byifname(*++argv); + + if (!ifp) { + fprintf(f, "Unknown interface: %s\n", + *argv); + jsonw_destroy(&context.wr); + return -1; + } + show_ifp_qos(ifp, &context); } } } @@ -1109,9 +2350,9 @@ static int cmd_qos_optimised_show(FILE *f, int argc, char **argv) context.is_platform = false; if (argc == 1) - ifnet_walk(show_ifp_qos, &context); + dp_ifnet_walk(show_ifp_qos, &context); else { - struct ifnet *ifp = ifnet_byifname(*++argv); + struct ifnet *ifp = dp_ifnet_byifname(*++argv); if (!ifp) { fprintf(f, "Unknown interface: %s\n", @@ -1197,7 +2438,7 @@ static int cmd_qos_clear(FILE *f, int argc, char **argv) /* * No interface name, clear all interfaces. */ - ifnet_walk(clear_ifp_qos_stats, NULL); + dp_ifnet_walk(clear_ifp_qos_stats, NULL); } else if (argc == 2) { /* * Clear the selected interface. @@ -1209,7 +2450,7 @@ static int cmd_qos_clear(FILE *f, int argc, char **argv) struct ifnet *ifp; /* Initial interface name check */ - ifp = ifnet_byifname(*++argv); + ifp = dp_ifnet_byifname(*++argv); if (!ifp) { fprintf(f, "Unknown interface: %s\n", *argv); return -1; @@ -1233,7 +2474,7 @@ static int cmd_qos_clear(FILE *f, int argc, char **argv) } /* Get the trunk interface */ - ifp = ifnet_byifname(if_name); + ifp = dp_ifnet_byifname(if_name); if (!ifp) { fprintf(f, "Unknown interface: %s\n", *argv); return -1; @@ -1270,12 +2511,12 @@ static int cmd_qos_hw(FILE *f, int argc, char **argv) context.optimised_json = false; if (argc == 1) { - ifnet_walk(show_ifp_qos_hw, &context); + dp_ifnet_walk(show_ifp_qos_hw, &context); } else if (argc == 2) { struct ifnet *ifp; /* Initial interface name check */ - ifp = ifnet_byifname(*++argv); + ifp = dp_ifnet_byifname(*++argv); if (!ifp) { fprintf(f, "Unknown interface: %s\n", *argv); jsonw_destroy(&context.wr); @@ -1359,23 +2600,39 @@ static int cmd_qos_port(struct ifnet *ifp, int argc, char **argv) { unsigned int subports = 0, pipes = 0, profiles = 1; int32_t overhead = RTE_SCHED_FRAME_OVERHEAD_DEFAULT; + bool hw_config = false; int ret; /* * Expected command format: * - * "port subports pipes profiles [overhead ]" + * "port subports pipes profiles [overhead ] " * * - port-id * - number of configured subports * - number of configured pipes * - number of configured profiles * - frame-overhead + * - queue limit type, "ql_packets" or "ql_bytes" + * + * Note that we can currently only support queue limits in + * bytes in hardware and only support queue limits in packets + * in software (DPDK). So use this setting to force the port to + * have hardware or software qos. If the current port type is + * such that the config cannot be applied, ie. byte limits on + * a software port or packet limits on a hw port then the port + * will not be qos enabled unless/until hardware forwarding is + * enabled/disabled. */ --argc, ++argv; /* skip "port" */ while (argc > 0) { unsigned int value; + if (argc == 1 && !strncmp(argv[0], "ql_", 3)) { + hw_config = !strcmp(argv[0], "ql_bytes"); + break; + } + if (argc < 2) { DP_DEBUG(QOS, ERR, DATAPLANE, "missing value qos port ... %s\n", argv[0]); @@ -1414,19 +2671,16 @@ static int cmd_qos_port(struct ifnet *ifp, int argc, char **argv) argc -= 2, argv += 2; } - if (subports == 0 || subports > ETHER_MAX_VLAN_ID) { + if (subports == 0 || subports > RTE_ETHER_MAX_VLAN_ID) { DP_DEBUG(QOS, ERR, DATAPLANE, "bad subports value: %u\n", subports); return -EINVAL; } - /* - * ENODEV means there's no hardware support for this device - */ - ret = qos_hw_port(ifp, subports, pipes, profiles, overhead); - if (ret == -ENODEV) - return qos_dpdk_port(ifp, subports, pipes, profiles, overhead); - + if (hw_config) + ret = qos_hw_port(ifp, subports, pipes, profiles, overhead); + else + ret = qos_dpdk_port(ifp, subports, pipes, profiles, overhead); return ret; } @@ -1449,47 +2703,39 @@ static int cmd_qos_subport_queue(struct subport_info *sinfo, unsigned int qid, */ /* parse qos subport S queue Q rate R */ - bool rate_given = false; - bool rate_is_percent = false; + struct qos_shaper_conf *params = &sinfo->params; if (argc < 4) { DP_DEBUG(QOS, ERR, DATAPLANE, "queue missing tc rate\n"); return -EINVAL; } - if (strcmp(argv[2], "rate") == 0) { - rate_given = true; - } else if (strcmp(argv[2], "percent") == 0) { - rate_given = true; - rate_is_percent = true; + if (qid >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) { + RTE_LOG(ERR, QOS, "traffic-class %u out of range\n", qid); + return -EINVAL; } - if (rate_given) { - unsigned int rate; - struct rte_sched_subport_params *params = &sinfo->params; - - if (get_unsigned(argv[3], &rate) < 0) { - RTE_LOG(ERR, QOS, "missing rate for queue\n"); - return -EINVAL; - } + if (strcmp(argv[2], "percent") == 0) { + float rate; - if (rate_is_percent && rate > 100) { + if (get_float(argv[3], &rate) < 0 || + rate < 0 || rate > 100) { RTE_LOG(ERR, QOS, - "rate percentage %u out of range\n", rate); - return -EINVAL; + "rate percentage %s out of range\n", argv[3]); + return -EINVAL; } + qos_percent_rate_save(&sinfo->sp_tc_rates.tc_rate[qid], + rate); + } else if (strcmp(argv[2], "rate") == 0) { + unsigned long rate; - if (qid >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) { - RTE_LOG(ERR, QOS, "traffic-class %u out of range\n", - qid); + if (get_unsigned_long(argv[3], &rate) < 0) { + RTE_LOG(ERR, QOS, "missing rate for queue\n"); return -EINVAL; } - params->tc_rate[qid] = - qos_rate_set( - &sinfo->sp_tc_rates.tc_rate[qid], - rate, rate_is_percent, - params->tb_rate); + qos_abs_rate_save(&sinfo->sp_tc_rates.tc_rate[qid], rate); + params->tc_rate[qid] = rate; } else { RTE_LOG(ERR, QOS, "unknown subport queue parameter: '%s'\n", argv[2]); @@ -1514,8 +2760,11 @@ static int cmd_qos_subport(struct ifnet *ifp, int argc, char **argv) /* * Expected command format: * + * "subport auto size [period ]" * "subport rate size [period ]" + * "subport rate msec [period ]" * "subport percent size [period ]" + * "subport percent msec [period ]" * "subport queue rate size " * "subport queue percent size " * "subport mark-map @@ -1523,13 +2772,14 @@ static int cmd_qos_subport(struct ifnet *ifp, int argc, char **argv) * - subport-id * - subport shaper bandwidth rate * - subport shaper max-burst size - * - subport token-bucket period + * - subport token-bucket period in microseconds * - traffic-class-id (0..3) * - traffic-class shaper bandwidth rate * - traffic-class shaper max-burst size (not-used) * - mark-map name * - subport shaper percentage bandwidth rate * - traffic class shaper percentage bandwidth rate + * - subport shaper max-burst size in msec */ --argc, ++argv; /* skip "subport" */ if (argc < 2) { @@ -1551,7 +2801,7 @@ static int cmd_qos_subport(struct ifnet *ifp, int argc, char **argv) --argc, ++argv; struct subport_info *sinfo = qinfo->subport + subport; - struct rte_sched_subport_params *params = &sinfo->params; + struct qos_shaper_conf *params = &sinfo->params; while (argc > 0) { unsigned int value; @@ -1576,41 +2826,55 @@ static int cmd_qos_subport(struct ifnet *ifp, int argc, char **argv) * Save the mark-map pointer in the subport */ sinfo->mark_map = mark_map; - } else { - if (get_unsigned(argv[1], &value) < 0) { - RTE_LOG(ERR, QOS, "number expected after %s\n", - argv[0]); + } else if (strcmp(argv[0], "percent") == 0) { + float percent_bw; + + if (get_float(argv[1], &percent_bw) < 0 || + percent_bw < 0 || percent_bw > 100) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "rate percentage %s out of range\n", argv[1]); return -EINVAL; } + /* bytes/sec */ + qos_percent_rate_save(&sinfo->subport_rate, percent_bw); + } else if (strcmp(argv[0], "auto") == 0) { + sinfo->auto_speed = true; + argc--, argv++; + continue; + } else if (strcmp(argv[0], "rate") == 0) { + unsigned long rate; - if (strcmp(argv[0], "rate") == 0) { - /* bytes/sec */ - params->tb_rate = - qos_rate_set(&sinfo->subport_rate, - value, false, 0); - } else if (strcmp(argv[0], "percent") == 0) { - /* bytes/sec */ - params->tb_rate = - qos_rate_set(&sinfo->subport_rate, - value, true, - ifp->if_qos->port_params.rate); - } else if (strcmp(argv[0], "size") == 0) { - /* credits (bytes) */ - params->tb_size = sinfo->subport_rate.burst = - value; - } else if (strcmp(argv[0], "period") == 0) { - params->tc_period = - qos_period_set(&sinfo->subport_rate, - value); - } else if (strcmp(argv[0], "queue") == 0) { - /* - * Parse qos subport S queue Q rate R - * Nothing more to parse after queue so can - * just return. - */ - return cmd_qos_subport_queue(sinfo, value, - argc, argv); + if (get_unsigned_long(argv[1], &rate)) { + RTE_LOG(ERR, QOS, "number expected after %s\n", + argv[0]); + return -EINVAL; } + /* bytes/sec */ + qos_abs_rate_save(&sinfo->subport_rate, rate); + params->tb_rate = rate; + } else if (get_unsigned(argv[1], &value) < 0) { + RTE_LOG(ERR, QOS, "number expected after %s\n", + argv[0]); + return -EINVAL; + } else if (strcmp(argv[0], "size") == 0) { + /* credits (bytes) */ + params->tb_size = + qos_abs_burst_set(&sinfo->subport_rate, value); + } else if (strcmp(argv[0], "msec") == 0) { + /* credits (bytes) */ + params->tb_size = + qos_time_burst_set(&sinfo->subport_rate, value, + params->tb_rate); + } else if (strcmp(argv[0], "period") == 0) { + params->tc_period = + qos_period_set(&sinfo->subport_rate, value); + } else if (strcmp(argv[0], "queue") == 0) { + /* + * Parse qos subport S queue Q rate R + * Nothing more to parse after queue so can + * just return. + */ + return cmd_qos_subport_queue(sinfo, value, argc, argv); } argc -= 2, argv += 2; } @@ -1660,8 +2924,13 @@ static int cmd_qos_pipe(struct ifnet *ifp, int argc, char **argv) DP_DEBUG(QOS, ERR, DATAPLANE, "profile %u out of range %u\n", profile, qinfo->port_params.n_pipe_profiles); else { + struct queue_map *qmap = &qinfo->queue_map[profile]; + qinfo->subport[subport].profile_map[pipe] = profile; qinfo->subport[subport].pipe_configured[pipe] = true; + /* Default map is DSCP */ + if (!qmap->pcp_enabled && !qmap->designation) + qmap->dscp_enabled = 1; return 0; } return -EINVAL; @@ -1705,13 +2974,14 @@ static int cmd_qos_profile_queue(struct sched_info *qinfo, unsigned int profile, * "queue rate size " * "queue percent size " * "queue wrr-weight " - * "queue dscp-group " + * "queue dscp-group " + * "queue drop-prec " * "queue wred-weight " * * - traffic-class-id (0..3) * - traffic-class shaper bandwidth rate * - traffic-class burst size (not-used) - * - qmap (wrr-id << 3 | tc_id) + * - qmap: (dp << 5) | (wrr-queue-id << 2) | tc-id (0x0-0x3) * - pipe-queue's wrr-weight (1..100) * - Name of the DSCP resource group * - wred max threshold (1..8191) @@ -1719,24 +2989,21 @@ static int cmd_qos_profile_queue(struct sched_info *qinfo, unsigned int profile, * - wred mark probability (1..255) * - wred filter weight (1..12) * - traffic-class shaper percentage bandwidth rate + * - drop precedence; "green", "yellow" or "red" + * - units ("bytes", "packets" or "usec") */ - struct rte_sched_pipe_params *pipe + struct qos_pipe_params *pipe = qinfo->port_params.pipe_profiles + profile; struct qos_rate_info *pipe_tc_rates = qinfo->profile_tc_rates[profile].tc_rate; - bool rate_given = false; - bool rate_is_percent = false; - if (strcmp(argv[2], "rate") == 0) { - rate_given = true; - } else if (strcmp(argv[2], "percent") == 0) { - rate_given = true; - rate_is_percent = true; + if (argc < 4) { + DP_DEBUG(QOS, ERR, DATAPLANE, "not enough arguments\n"); + return -EINVAL; } - if (rate_given) { - unsigned int rate; - bool rate_valid = false; + if (strcmp(argv[2], "percent") == 0) { + float percent_bw; if (value >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) { DP_DEBUG(QOS, ERR, DATAPLANE, @@ -1744,23 +3011,30 @@ static int cmd_qos_profile_queue(struct sched_info *qinfo, unsigned int profile, return -EINVAL; } - if (argc >= 4) { - if (get_unsigned(argv[3], &rate) == 0) { - if (!rate_is_percent || rate <= 100) - rate_valid = true; - } + if (get_float(argv[3], &percent_bw) < 0 || + percent_bw < 0 || percent_bw > 100) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "rate percentage %s out of range\n", argv[3]); + return -EINVAL; + } + + qos_percent_rate_save(&pipe_tc_rates[value], percent_bw); + } else if (strcmp(argv[2], "rate") == 0) { + unsigned long rate; + + if (value >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "traffic-class %u out of range\n", value); + return -EINVAL; } - if (!rate_valid) { - const char *err_msg = rate_is_percent ? - "bad percentage rate for queue" : - "bad rate for queue"; - DP_DEBUG(QOS, ERR, DATAPLANE, "%s\n", err_msg); + if (get_unsigned_long(argv[3], &rate) < 0) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "bad rate %s for queue\n", argv[3]); return -EINVAL; } - pipe->tc_rate[value] = qos_rate_set(&pipe_tc_rates[value], - rate, rate_is_percent, - pipe->tb_rate); + qos_abs_rate_save(&pipe_tc_rates[value], rate); + pipe->shaper.tc_rate[value] = rate; } else if (strcmp(argv[2], "wrr-weight") == 0) { unsigned int weight; unsigned int qindex; @@ -1773,7 +3047,7 @@ static int cmd_qos_profile_queue(struct sched_info *qinfo, unsigned int profile, "q mask 0x%x out of range\n", value); return -EINVAL; } - if (argc < 4 || get_unsigned(argv[3], &weight) < 0) { + if (get_unsigned(argv[3], &weight) < 0) { DP_DEBUG(QOS, ERR, DATAPLANE, "bad weight for queue\n"); return -EINVAL; } @@ -1796,129 +3070,109 @@ static int cmd_qos_profile_queue(struct sched_info *qinfo, unsigned int profile, value, true)) return -EINVAL; } - } else if (strcmp(argv[2], "dscp-group") == 0) { + } else if ((strcmp(argv[2], "dscp-group") == 0) || + (strcmp(argv[2], "drop-prec") == 0)) { unsigned int qmax, qmin, prob; unsigned int qindex; - uint64_t dscp_set; + bool wred_per_dscp; + uint64_t dscp_set = 0; + uint8_t dp = 0; int err; - struct rte_red_pipe_params *qred_info; - struct profile_wred_info *profile_wred; - struct queue_wred_info *queue_wred; - uint8_t map_index; + struct qos_red_pipe_params *qred_info; + enum qos_queue_size_type qsize_type; - if (argc < 7 || + if (argc < 8 || get_unsigned(argv[5], &qmax) < 0 || get_unsigned(argv[6], &qmin) < 0 || - get_unsigned(argv[7], &prob) < 0) { + get_unsigned(argv[7], &prob) < 0 || + !qos_qsize_type_get(argv[4], &qsize_type)) { DP_DEBUG(QOS, ERR, DATAPLANE, "Invalid per queue RED input\n"); return -EINVAL; } - err = npf_dscp_group_getmask(argv[3], &dscp_set); - if (err) { - DP_DEBUG(QOS, ERR, DATAPLANE, - "dscp mask retrieval failed\n"); - return -EINVAL; - } - /* - * Store the wred-map information for the DPDK - */ - qindex = q_from_mask(value); - profile_wred = &qinfo->wred_profiles[profile]; - queue_wred = &profile_wred->queue_wred[qindex]; - map_index = queue_wred->num_maps; - if (!strcmp(argv[4], "packets")) { - qred_info = rte_red_find_q_params(pipe, qindex); - if (!qred_info) - qred_info = rte_red_alloc_q_params(pipe, - qindex); - if (!qred_info) - return -EINVAL; - err = rte_red_init_q_params(&qred_info->red_q_params, - qmax, qmin, prob, dscp_set, argv[3]); - if (err < 0) + wred_per_dscp = strcmp(argv[2], "dscp-group") == 0; + + if (wred_per_dscp) { + err = npf_dscp_group_getmask(argv[3], &dscp_set); + if (err) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "dscp mask retrieval failed\n"); return -EINVAL; - queue_wred->unit = WRED_PACKETS; - } else if (!strcmp(argv[4], "bytes")) { + } + } else { /* - * Store the wred-map information for the FAL + * If we are using ingress maps, the wred parameters + * are identified directly against a drop precedence + * (colour) rather than a dscp-group as the same + * dscp group could classify to different colours in + * different ingress maps. */ - uint8_t name_len; - char *name_ptr; - struct red_params *red_ptr; - - if (map_index > QOS_MAX_DROP_PRECEDENCE) { + for (dp = 0; dp < NUM_DPS; dp++) { + if (!strcmp(argv[3], qos_dps[dp])) + break; + } + if (dp == NUM_DPS) { DP_DEBUG(QOS, ERR, DATAPLANE, - "profile %u queue %u has too many" - " wred-maps\n", - profile, qindex); + "Invalid drop-precedence value\n"); return -EINVAL; } - name_len = strlen(argv[3]); - name_ptr = malloc(name_len + 1); - if (name_ptr == NULL) { - DP_DEBUG(QOS, ERR, DATAPLANE, - "out of memory\n"); - return -ENOMEM; + } + + /* + * Store the wred-map information for the DPDK + */ + qindex = q_from_mask(value); + qred_info = qos_red_find_q_params(pipe, qindex); + if (!qred_info) + qred_info = qos_red_alloc_q_params(pipe, qindex); + if (!qred_info) + return -EINVAL; + + err = qos_red_init_q_params(&qred_info->red_q_params, + qsize_type, qmax, qmin, + prob, wred_per_dscp, dscp_set, + argv[3], dp); + + if (err < 0) { + if (qred_info->red_q_params.num_maps == 0) { + SLIST_REMOVE_HEAD(&pipe->red_head, list); + free(qred_info); } - strcpy(name_ptr, argv[3]); - queue_wred->dscp_grp_names[map_index] = name_ptr; - red_ptr = - &queue_wred->params.map_params_bytes[map_index]; - red_ptr->min_th = qmin; - red_ptr->max_th = qmax; - red_ptr->maxp_inv = prob; - queue_wred->num_maps++; - queue_wred->unit = WRED_BYTES; - } else { - DP_DEBUG(QOS, ERR, DATAPLANE, "Invalid unit field\n"); return -EINVAL; } + DP_DEBUG(QOS, DEBUG, DATAPLANE, "per Q red prof %d dscp-grp %s %u %u prob %u " - "mask %"PRIx64", map %u\n", profile, argv[3], qmin, - qmax, prob, dscp_set, map_index); + "mask %"PRIx64"\n", profile, argv[3], qmin, + qmax, prob, dscp_set); } else if (strcmp(argv[2], "wred-weight") == 0) { unsigned int wred_weight; unsigned int qindex; - struct rte_red_pipe_params *qred_info; - struct rte_red_q_params *qred; + struct qos_red_pipe_params *qred_info; + struct qos_red_q_params *qred; int i; - struct profile_wred_info *profile_wred; - struct queue_wred_info *queue_wred; - if (argc < 3 || get_unsigned(argv[3], &wred_weight) < 0) { + if (get_unsigned(argv[3], &wred_weight) < 0) { DP_DEBUG(QOS, ERR, DATAPLANE, "Invalid per queue RED weight\n"); return -EINVAL; } qindex = q_from_mask(value); - profile_wred = &qinfo->wred_profiles[profile]; - queue_wred = &profile_wred->queue_wred[qindex]; + qred_info = qos_red_find_q_params(pipe, qindex); + if (!qred_info) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Invalid wred-weight command\n"); + return -EINVAL; + } - /* - * Store the queue's filter weight for the DPDK - */ - if (queue_wred->unit == WRED_PACKETS) { - qred_info = rte_red_find_q_params(pipe, qindex); - if (!qred_info) { - DP_DEBUG(QOS, ERR, DATAPLANE, - "Invalid wred-weight command\n"); - return -EINVAL; - } - for (i = 0, qred = &qred_info->red_q_params; - i < qred->num_maps; i++) { + for (i = 0, qred = &qred_info->red_q_params; + i < NUM_DPS; i++) { + if (qred->dps_in_use & (1 << i)) qred->qparams[i].wq_log2 = wred_weight; - } - } else { - - /* - * Store the queue's filter weight for the FAL - */ - queue_wred->filter_weight = wred_weight; } + qred->filter_weight = wred_weight; } else { DP_DEBUG(QOS, ERR, DATAPLANE, "unknown profile queue parameter: '%s'\n", argv[2]); @@ -1927,6 +3181,50 @@ static int cmd_qos_profile_queue(struct sched_info *qinfo, unsigned int profile, return 0; } +static int cmd_qos_profile_designation(struct queue_map *qmap, + struct qos_pipe_params *pipe, + int argc, char **argv) +{ + unsigned int des; + unsigned int value; + + /* + * Expected command format: + * + * "qos profile designation queue " + * + * - port id + * - profile id + * - designation, classifier to queue (0..7) + * - queue, tc and wrr mask + */ + if ((get_unsigned(argv[0], &des) < 0) || des > MAX_DESIGNATOR) { + DP_DEBUG(QOS, ERR, DATAPLANE, "Invalid designation id: %s\n", + argv[0]); + return -EINVAL; + } + argc--; argv++; + + if (strcmp(argv[0], "queue") != 0) { + DP_DEBUG(QOS, ERR, DATAPLANE, "Invalid designation cmd: %s\n", + argv[0]); + return -EINVAL; + } + argc--; argv++; + + if (get_unsigned(argv[0], &value) < 0) { + DP_DEBUG(QOS, ERR, DATAPLANE, "Invalid queue index: %s\n", + argv[0]); + return -EINVAL; + } + + pipe->designation[des] = value; + pipe->des_set |= (1 << des); + qmap->designation = 1; + + return 0; +} + static int cmd_qos_profile(struct ifnet *ifp, int argc, char **argv) { struct sched_info *qinfo = ifp->if_qos; @@ -1941,23 +3239,28 @@ static int cmd_qos_profile(struct ifnet *ifp, int argc, char **argv) * Expected command formats: * * "profile rate size [period ]" + * "profile rate msec [period ]" * "profile percent size [period ]" + * "profile percent msec [period ]" * "profile queue rate size " * "profile [queue wrr-weight ]" - * "profile [queue dscp-group

]" + * "profile [queue dscp-group

]" + * "profile [queue drop-prec

]" * "profile [queue wred-weight ]" * "profile [over-weight ]" * "profile [pcp ]" * "profile [dscp ]" + * "profile [dscp-group ]" + * "profile designation queue * * - profile-id * - profile shaper bandwidth rate * - profile shaper max-burst size - * - profile token-bucket period + * - profile token-bucket period in microseconds * - traffic-class-id (0..3) * - traffic-class shaper bandwidth rate * - traffic-class burst size (not-used) - * - (dp << 5) | (wrr-queue-id << 3) | traffic-class-id (0x0..0x1F) + * - qmap: (dp << 5) | (wrr-queue-id << 2) | tc-id (0x0-0x3) * - pipe-queue's wrr-weight * - profile overweight value * - PCP value (0..7) @@ -1968,6 +3271,10 @@ static int cmd_qos_profile(struct ifnet *ifp, int argc, char **argv) *

- wred mark probability (1..255) * - wred filter weight (1..12) * - profile shaper percentage bandwidth rate + * - profile shaper max-burst size in msec + * - classification value used to determine queue + * - drop precedence; "green", "yellow" or "red" + * - units ("bytes", "packets" or "usec") */ --argc, ++argv; /* skip "profile" */ if (argc < 2) { @@ -1986,13 +3293,11 @@ static int cmd_qos_profile(struct ifnet *ifp, int argc, char **argv) } --argc, ++argv; /* skip profile id */ - struct rte_sched_pipe_params *pipe + struct qos_pipe_params *pipe = qinfo->port_params.pipe_profiles + profile; while (argc > 0) { unsigned int value; - bool rate_given = false; - bool rate_is_percent = false; if (argc < 2) { DP_DEBUG(QOS, ERR, DATAPLANE, @@ -2000,41 +3305,91 @@ static int cmd_qos_profile(struct ifnet *ifp, int argc, char **argv) return -EINVAL; } - if (get_unsigned(argv[1], &value) < 0) { - DP_DEBUG(QOS, ERR, DATAPLANE, - "number expected after %s\n", argv[0]); - return -EINVAL; - } + if (strcmp(argv[0], "dscp-group") == 0) { + unsigned int q; + uint64_t dscp_mask, i; + int err, j; - if (strcmp(argv[0], "rate") == 0) { - rate_given = true; - } else if (strcmp(argv[0], "percent") == 0) { - rate_given = true; - rate_is_percent = true; + err = npf_dscp_group_getmask(argv[1], &dscp_mask); + if (err) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Failed to extract dscp mask from group\n"); + return -EINVAL; + } + if (get_unsigned(argv[2], &q) < 0) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "missing queue for dscp-group\n"); + return -EINVAL; + } + if (!valid_qmap(q)) + return -EINVAL; + + for (i = 1, j = 0; j <= 63; i = i << 1, j++) { + if (dscp_mask & i) { + if (!qos_sched_profile_dscp_map_set + (qinfo, profile, j, + q, false)) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "profile_dscp_set failed\n"); + return -1; + } + } + } + + if (qos_sched_setup_dscp_map(qinfo, profile, dscp_mask, + argv[1], (uint8_t)q)) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "dscp map setup failed\n"); + return -1; + } + + DP_DEBUG(QOS, DEBUG, DATAPLANE, + "map dscp-group %s %"PRIx64" %x\n", + argv[1], dscp_mask, q); + break; /* don't continue parsing line */ } - if (rate_given) { - if (rate_is_percent && value > 100) { + if (strcmp(argv[0], "percent") == 0) { + float percent_bw; + + if (get_float(argv[1], &percent_bw) < 0 || + percent_bw < 0 || percent_bw > 100) { DP_DEBUG(QOS, ERR, DATAPLANE, - "bad percentage rate for queue\n"); + "rate percentage %s out of range\n", argv[1]); return -EINVAL; } /* bytes/sec */ - pipe->tb_rate = qos_rate_set( - &qinfo->profile_rates[profile], - value, rate_is_percent, - qinfo->port_params.rate); - } else if (strcmp(argv[0], "size") == 0) { - pipe->tb_size = qinfo->profile_rates[profile].burst = - value; /*credits*/ + qos_percent_rate_save(&qinfo->profile_rates[profile], + percent_bw); + } else if (strcmp(argv[0], "rate") == 0) { + unsigned long rate; + if (get_unsigned_long(argv[1], &rate)) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Failed to retrieve rate %s\n", argv[1]); + return -EINVAL; + } + qos_abs_rate_save(&qinfo->profile_rates[profile], rate); + pipe->shaper.tb_rate = rate; + } else if (get_unsigned(argv[1], &value) < 0) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "number expected after %s\n", argv[0]); + return -EINVAL; + } else if (strcmp(argv[0], "size") == 0) { + pipe->shaper.tb_size = qos_abs_burst_set( + &qinfo->profile_rates[profile], + value); /*credits*/ + } else if (strcmp(argv[0], "msec") == 0) { + pipe->shaper.tb_size = qos_time_burst_set( + &qinfo->profile_rates[profile], + value, pipe->shaper.tb_rate); } else if (strcmp(argv[0], "period") == 0) { - pipe->tc_period = qos_period_set( - &qinfo->profile_rates[profile], - value); /* ms */ + pipe->shaper.tc_period = + qos_period_set(&qinfo->profile_rates[profile], + value); /* microseconds */ #ifdef RTE_SCHED_SUBPORT_TC_OV } else if (strcmp(argv[0], "over-weight") == 0) { - pipe->tc_ov_weight = value; + pipe->shaper.tc_ov_weight = value; #endif } else if (strcmp(argv[0], "pcp") == 0) { unsigned int q; @@ -2088,6 +3443,11 @@ static int cmd_qos_profile(struct ifnet *ifp, int argc, char **argv) return status; break; /* don't continue parsing line */ + } else if (strcmp(argv[0], "designation") == 0) { + argc--; argv++; + return cmd_qos_profile_designation( + &qinfo->queue_map[profile], + pipe, argc, argv); } else { DP_DEBUG(QOS, ERR, DATAPLANE, "unknown pipe parameter: %s\n", argv[0]); @@ -2208,46 +3568,52 @@ static int cmd_qos_match(struct ifnet *ifp, int argc, char **argv) } /* configure RED parameters */ -static int cmd_qos_red(struct rte_red_params red_params[][e_RTE_METER_COLORS], +static int cmd_qos_red(struct qos_red_params red_params[][RTE_COLORS], unsigned int tc, int argc, char *argv[]) { unsigned int value, color; - struct rte_red_params red; + struct qos_red_params red; /* * Expected command format: * - * "red " + * "red " * * - meter-colour (not-used: green/yellow/red) - * - min-threshold - * - max-threshold - * - mark-probability - * - filter-weight + * - units ("bytes", "packets" or "usec") + * - min-threshold + * - max-threshold + * - mark-probability + * - filter-weight */ - if (argc < 6) + + if (argc < 7) return -1; if (get_unsigned(argv[1], &color) < 0) return -2; - if (color >= e_RTE_METER_COLORS) + if (color >= RTE_COLORS) return -3; - if (get_unsigned(argv[2], &value) < 0) + if (!qos_qsize_type_get(argv[2], &value)) return -4; - red.min_th = value; + red.qsize_type = value; if (get_unsigned(argv[3], &value) < 0) return -5; - red.max_th = value; + red.min_th = value; if (get_unsigned(argv[4], &value) < 0) return -6; - red.maxp_inv = value; + red.max_th = value; if (get_unsigned(argv[5], &value) < 0) return -7; + red.maxp_inv = value; + + if (get_unsigned(argv[6], &value) < 0) + return -8; red.wq_log2 = value; red_params[tc][color] = red; @@ -2269,7 +3635,8 @@ static int cmd_qos_params(struct ifnet *ifp, int argc, char **argv) /* * Expected command format: * - * "param [subport ] [limit ] [red ]" + * "param [subport ] [limit ] + * [red ]" * * - subport-id * - traffic-class-id (0..3) @@ -2279,6 +3646,7 @@ static int cmd_qos_params(struct ifnet *ifp, int argc, char **argv) * - max-threshold * - mark-probability * - filter-weight + * - size type ("packets", "bytes", "usec") */ --argc, ++argv; /* skip "param" */ if (argc < 2) { @@ -2320,6 +3688,7 @@ static int cmd_qos_params(struct ifnet *ifp, int argc, char **argv) if (strcmp(argv[0], "limit") == 0) { unsigned int value; + enum qos_queue_size_type qsize_type; if (argc < 3) { DP_DEBUG(QOS, ERR, DATAPLANE, @@ -2327,9 +3696,12 @@ static int cmd_qos_params(struct ifnet *ifp, int argc, char **argv) return -EINVAL; } + if (!qos_qsize_type_get(argv[1], &qsize_type)) + return -EINVAL; + if (get_unsigned(argv[2], &value) < 0) { DP_DEBUG(QOS, ERR, DATAPLANE, - "number expected after limit units\n"); + "number expected after limit type\n"); return -EINVAL; } @@ -2337,18 +3709,20 @@ static int cmd_qos_params(struct ifnet *ifp, int argc, char **argv) * If it's packets we round down the value to 8192 * otherwise it's bytes which aren't limited */ - if (strcmp(argv[1], "packets") == 0) { - if (value > MAX_QSIZE) { - value = MAX_QSIZE; - RTE_LOG(INFO, QOS, - "Rounding down qsize to %d on %s\n", - MAX_QSIZE, ifp->if_name); - } + if ((qsize_type == QOS_QUEUE_SIZE_PACKETS) && + (value > MAX_QSIZE)) { + value = MAX_QSIZE; + RTE_LOG(INFO, QOS, + "Rounding down qsize to %d on %s\n", + MAX_QSIZE, ifp->if_name); } - if (subport_id == 0) + if (subport_id == 0) { qinfo->port_params.qsize[tc_id] = value; + qinfo->port_params.qsize_type = qsize_type; + } sinfo->qsize[tc_id] = value; + sinfo->qsize_type = qsize_type; argc--, argv++; /* Allow for new unit field */ } else if (strcmp(argv[0], "red") == 0) { int rc; @@ -2377,186 +3751,1002 @@ static int cmd_qos_disable(struct ifnet *ifp, { struct sched_info *qinfo = ifp->if_qos; - if (!qinfo) - return 0; + if (!qinfo) + return 0; + + /* + * Expected command format: + * + * "disable" + */ + DP_DEBUG(QOS, DEBUG, DATAPLANE, "QoS disabled on %s\n", ifp->if_name); + + SLIST_REMOVE(&qos_qinfos.qinfo_head, qinfo, sched_info, list); + + QOS_RM_GLOBAL_MAP(); + + return QOS_DISABLE(qinfo)(ifp, qinfo); +} + +static int cmd_qos_enable(struct ifnet *ifp, + int argc __unused, char **argv __unused) +{ + struct sched_info *qinfo = ifp->if_qos; + + /* + * Expected command format: + * + * "enable" + */ + if (!qinfo) { + DP_DEBUG(QOS, ERR, DATAPLANE, "Qos not configured\n"); + return -ENOENT; + } + + if (qinfo->enabled) { + DP_DEBUG(QOS, ERR, DATAPLANE, "Qos already enabled\n"); + return -EINVAL; + } + + qinfo->enabled = true; + + if (QOS_ENABLE(qinfo)(ifp, qinfo)) + return -ENODEV; + + DP_DEBUG(QOS, DEBUG, DATAPLANE, "QoS enabled on %s\n", ifp->if_name); + return 0; +} + +static int cmd_qos_mark_map(int argc, char **argv) +{ + char *map_name; + char *dscp_group_name = NULL; + int err; + uint32_t pcp_value; + uint64_t dscp_set = 0; + uint32_t designation = 0; + enum egress_map_type type; + enum fal_packet_colour color = 0; /* green */ + + /* + * Expected command format: + * + * "mark-map dscp-group pcp " + * "mark-map designation drop-prec pcp " + * "mark-map delete" + * + * - mark-map name + * - dscp-group resource group name + * - pcp-value (0..7) + * - designation value (0..7) + * - drop precedence ("green", "yellow", "red") + */ + + --argc, ++argv; /* skip "mark-map" */ + if (argc < 1) { + DP_DEBUG(QOS, DEBUG, DATAPLANE, "mark-map name missing\n"); + return -EINVAL; + } + + map_name = argv[0]; + + --argc, ++argv; /* skip "map-name" */ + if (argc == 1 && strcmp(argv[0], "delete") == 0) { + /* + * We are deleting a mark-map + */ + return qos_mark_map_delete(map_name); + } + + if (!strcmp(argv[0], "dscp-group")) { + if (argc != 4) { + DP_DEBUG(QOS, DEBUG, DATAPLANE, + "wrong number of dscp mark-map arguments\n"); + return -EINVAL; + } + dscp_group_name = argv[1]; + err = npf_dscp_group_getmask(dscp_group_name, &dscp_set); + if (err) { + DP_DEBUG(QOS, DEBUG, DATAPLANE, + "dscp mark retrieval failed\n"); + return -EINVAL; + } + type = EGRESS_DSCP; + } else if (!strcmp(argv[0], "designation")) { + if (argc != 6) { + DP_DEBUG(QOS, DEBUG, DATAPLANE, + "wrong number of des mark-map arguments\n"); + return -EINVAL; + } + if ((get_unsigned(argv[1], &designation) < 0) || + designation > MAX_DESIGNATOR) { + DP_DEBUG(QOS, DEBUG, DATAPLANE, + "invalid mark-map designation value %s\n", + argv[3]); + return -EINVAL; + } + if (!strcmp(argv[2], "drop-prec")) { + for (color = 0; color < NUM_DPS; color++) { + if (!strcmp(argv[3], qos_dps[color])) + break; + } + if (color == NUM_DPS) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Invalid drop-precedence value\n"); + return -EINVAL; + } + } else { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Missing ingress map drop-precedence\n"); + return -EINVAL; + } + type = EGRESS_DESIGNATION; + + /* account for extra drop-prec args */ + argc -= 2; + argv += 2; + + (void) argc; + (void) argv; + } else { + + DP_DEBUG(QOS, DEBUG, DATAPLANE, + "unknown mark-map keyword %s\n", argv[0]); + return -EINVAL; + } + + if (strcmp(argv[2], "pcp") != 0) { + DP_DEBUG(QOS, DEBUG, DATAPLANE, + "unknown mark-map keyword %s\n", argv[2]); + return -EINVAL; + } + if (get_unsigned(argv[3], &pcp_value) < 0) { + DP_DEBUG(QOS, DEBUG, DATAPLANE, + "invalid mark-map pcp value %s\n", argv[3]); + return -EINVAL; + } + if (pcp_value > 7) { + DP_DEBUG(QOS, DEBUG, DATAPLANE, + "invalid mark-map pcp value %u\n", pcp_value); + return -EINVAL; + } + return qos_mark_map_store(map_name, type, dscp_set, dscp_group_name, + (uint8_t)designation, color, + (uint8_t)pcp_value); +} + +static int cmd_qos_platform_buf_threshold(int argc, char **argv) +{ + unsigned int threshold; + + /* + * Expected command format: + * + * "buffer-threshold " + * "buffer-threshold delete" + * + * - threshold value in percentage + */ + --argc, ++argv; /* skip "buffer-threshold" */ + if (argc < 1) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "buffer-threshold missing threshold value\n"); + return -EINVAL; + } + + if (get_unsigned(argv[0], &threshold) < 0) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Buffer threshold is not a number: %s\n", argv[0]); + return -EINVAL; + } + + if (argc == 1) + qos_external_buf_threshold_interval(threshold); + else if (argc == 2 && (strcmp(argv[1], "delete") == 0)) + qos_external_buf_threshold_interval(0); + else { + char str[512] = {0}; + for (int i = 0; i < argc; i++) + sprintf(str + strlen(str), "%s ", argv[i]); + DP_DEBUG(QOS, ERR, DATAPLANE, + "Buffer threshold unknown parameter: %s\n", str); + return -EINVAL; + } + + return 0; +} + +static int cmd_qos_platform(int argc, char **argv) +{ + --argc, ++argv; /* skip "platform" */ + if (argc < 1) { + DP_DEBUG(QOS, ERR, DATAPLANE, "platform parameter missing\n"); + return -EINVAL; + } + + if (strcmp(argv[0], "buffer-threshold") == 0) + return cmd_qos_platform_buf_threshold(argc, argv); + + return 0; +} + +static uint8_t priority_local_designator = INGRESS_DESIGNATORS; + +uint8_t qos_get_prio_lp_des(void) +{ + return priority_local_designator; +} + +static int cmd_qos_local_prio_des(int argc, char **argv) +{ + uint8_t des; + + /* + * Expected command format: + * + * "lp-des " + * "lp-des delete" + * + * - designator value, 0-7 + */ + --argc, ++argv; /* skip "lp-des" */ + if (argc != 1) { + DP_DEBUG(QOS, ERR, DATAPLANE, "lp-des wrong number of args\n"); + return -EINVAL; + } + + if (!strcmp(argv[0], "delete")) { + priority_local_designator = INGRESS_DESIGNATORS; + return 0; + } + + if (!get_unsigned_char(argv[0], &des)) { + if (des >= INGRESS_DESIGNATORS) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "lp-des value %d out of range (0-7)\n", des); + return -EINVAL; + } + priority_local_designator = des; + return 0; + } + + DP_DEBUG(QOS, ERR, DATAPLANE, "Invalid designation value\n"); + return -EINVAL; +} + +/* Echo command to log */ +static void debug_cmd(int argc, char **argv) +{ + char buf[BUFSIZ], *cp; + int i; + + cp = buf; + for (i = 0; i < argc; i++) { + sprintf(cp, " %s", argv[i]); + cp += strlen(argv[i]) + 1; + } + *cp = 0; + + DP_DEBUG(QOS, INFO, DATAPLANE, "qos%s\n", buf); +} + +/* Process qos related op-mode commands */ +int cmd_qos_op(FILE *f, int argc, char **argv) +{ + --argc, ++argv; /* skip "qos" */ + if (argc < 1) { + fprintf(f, "usage: missing qos command\n"); + return -1; + } + + /* Check for op-mode commands first */ + if (strcmp(argv[0], "show") == 0) + return cmd_qos_show(f, argc, argv); + if (strcmp(argv[0], "optimised-show") == 0) + return cmd_qos_optimised_show(f, argc, argv); + if (strcmp(argv[0], "clear") == 0) + return cmd_qos_clear(f, argc, argv); + if (strcmp(argv[0], "hw") == 0) + return cmd_qos_hw(f, argc, argv); + if (strcmp(argv[0], "obj-db") == 0) + return cmd_qos_obj_db(f); + + fprintf(f, "unknown qos command: %s\n", argv[0]); + return -1; +} + +static struct qos_ingress_map *qos_ingress_map_find(char const *name) +{ + struct qos_ingress_map *map; + + cds_list_for_each_entry(map, &qos_ingress_maps, list) { + if (!strcmp(map->name, name)) + return map; + } + return NULL; +} + +static int qos_ingressm_trgt_attach(unsigned int ifindex, unsigned int vlan, + char const *name) +{ + struct qos_ingress_map *map; + int ret; + + if (!qos_ingressm.qos_ingressm_attach) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Device doesn't support ingress maps"); + return -EOPNOTSUPP; + } + + map = qos_ingress_map_find(name); + if (!map) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Ingress target cmd failed no map %s\n", name); + return -EINVAL; + } + + ret = (*qos_ingressm.qos_ingressm_attach)(ifindex, vlan, map); + if (ret) + return ret; + + DP_DEBUG(QOS, DEBUG, DATAPLANE, + "Attaching ingress map %s ifindex %u vlan %u\n", + name, ifindex, vlan); + return 0; +} + +static int qos_ingressm_trgt_detach(unsigned int ifindex, unsigned int vlan) +{ + int ret; + + if (!qos_ingressm.qos_ingressm_detach) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Device doesn't support ingress maps"); + return -EOPNOTSUPP; + } + + DP_DEBUG(QOS, DEBUG, DATAPLANE, "Detach ingress map target %u %u\n", + ifindex, vlan); + + ret = (*qos_ingressm.qos_ingressm_detach)(ifindex, vlan); + + return ret; +} + +static struct qos_ingress_map *qos_ingress_map_create(char const *name) +{ + struct qos_ingress_map *map; + + DP_DEBUG(QOS, DEBUG, DATAPLANE, "Create ingress-map %s\n", name); + map = calloc(1, sizeof(struct qos_ingress_map) + strlen(name) + 1); + if (!map) + return NULL; + strcpy(map->name, name); + map->type = INGRESS_UNDEF; + cds_list_add_tail_rcu(&map->list, &qos_ingress_maps); + return map; +} + +static void qos_ingress_map_delete_rcu(struct rcu_head *head) +{ + struct qos_ingress_map *map = + caa_container_of(head, struct qos_ingress_map, obj_rcu); + free(map); +} + +static int qos_ingress_map_delete(char const *name) +{ + struct qos_ingress_map *map; + int ret = 0; + + map = qos_ingress_map_find(name); + if (!map) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Invalid ingress-map delete %s\n", name); + return -ENOENT; + } + DP_DEBUG(QOS, ERR, DATAPLANE, "Delete ingress-map %s\n", name); + cds_list_del_rcu(&map->list); + ret = (*qos_ingressm.qos_ingressm_config)(map, false); + if (qos_im_sysdef == map) + qos_im_sysdef = NULL; + call_rcu(&map->obj_rcu, qos_ingress_map_delete_rcu); + return ret; +} + +static int +qos_ingress_map_get(char const *name, enum ingress_map_type type, + uint64_t mask, uint8_t des, uint8_t dp) +{ + struct qos_ingress_map *map; + + map = qos_ingress_map_find(name); + if (!map) { + map = qos_ingress_map_create(name); + if (!map) + return -ENOMEM; + map->type = type; + } else if (map->type != type) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Invalid type for ingress-map %s\n", name); + return -EINVAL; + } + + map->designation[des].dps_in_use |= (1 << dp); + map->designation[des].mask[dp] |= mask; + + DP_DEBUG(QOS, DEBUG, DATAPLANE, + "Added map name %s type %d des %u dp %u mask %"PRIx64"\n", + name, type, des, dp, mask); + + return 0; +} + +static int qos_ingress_map_sysdef(char const *name) +{ + struct qos_ingress_map *map; + + if (!qos_ingressm.qos_ingressm_config) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Device doesn't support ingress maps"); + return -EOPNOTSUPP; + } + + map = qos_ingress_map_find(name); + if (!map) { + if (qos_im_sysdef) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Ingress map system-default already alloced"); + return -EINVAL; + } + map = qos_ingress_map_create(name); + if (!map) + return -ENOMEM; + } else { + if (qos_im_sysdef && + (strcmp(qos_im_sysdef->name, map->name) != 0)) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Ingress map system-default already alloced"); + return -EINVAL; + } + } + + map->sysdef = true; + qos_im_sysdef = map; + + DP_DEBUG(QOS, DEBUG, DATAPLANE, + "Set system default ingress-map to %s\n", name); + + return 0; +} + +static int qos_ingress_map_complete(char const *name) +{ + struct qos_ingress_map *map; + int ret; + + map = qos_ingress_map_find(name); + if (!map) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "No ingress map found %s\n", name); + return -ENOENT; + } + + DP_DEBUG(QOS, ERR, DATAPLANE, "Completed ingress map %s\n", name); + + ret = (*qos_ingressm.qos_ingressm_config)(map, true); + + return ret; +} + +static int cmd_qos_ingress_map(struct ifnet *ifp, int argc, char **argv) +{ + const char *map_name; + uint64_t mask = 0; + enum ingress_map_type type = INGRESS_UNDEF; + uint8_t des, dp; + int ret; + + /* Skip ingress-map */ + argc--; argv++; + + /* + * Expected command format: + * + * "ingress-map dscp-group designator drop-prec " + * "ingress-map pcp designator drop-prec " + * "ingress-map complete" + * "ingress-map delete" + * "ingress-map system-default" + * "ingress-map vlan " + * "ingress-map vlan delete" + * + * - ingress-map name + * - dscp-group resource group name + * - TC queue designation (0..7) + * - PCP value (0..7) + * - vlan (0..4095) + * - drop precedence ("green", "yellow", "red") + */ + + map_name = argv[0]; + --argc, ++argv; /* skip name */ + + switch (argc) { + case 1: + /* + * delete - We are deleting an ingress-map + * system-default - We are setting a system-default + * complete - The definition of an ingress-map is complete + */ + if (!strcmp(argv[0], "delete")) + return qos_ingress_map_delete(map_name); + else if (!strcmp(argv[0], "system-default")) + return qos_ingress_map_sysdef(map_name); + else if (!strcmp(argv[0], "complete")) + return qos_ingress_map_complete(map_name); + break; + + case 2: + case 3: + if (strcmp(argv[0], "vlan") != 0) + break; + + unsigned int vlan; + argc--; argv++; + if ((get_unsigned(argv[0], &vlan) < 0) || + vlan >= VLAN_N_VID) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Invalid vlan value\n"); + return -EINVAL; + } + + if (!ifp) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Invalid ifp\n"); + return -EINVAL; + } + + argc--; argv++; + if (!argc) + return(qos_ingressm_trgt_attach(ifp->if_index, vlan, + map_name)); + if (!strcmp(argv[0], "delete")) + return(qos_ingressm_trgt_detach(ifp->if_index, + vlan)); + break; + + case 6: + if (!strcmp(argv[0], "dscp-group")) { + argc--; argv++; + ret = npf_dscp_group_getmask(argv[0], &mask); + if (ret) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Failed to retrieve dscp group %s\n", + argv[0]); + return -ENOENT; + } + argc--; argv++; + type = INGRESS_DSCP; + } else if (!strcmp(argv[0], "pcp")) { + unsigned int pcp; + + argc--; argv++; + if ((get_unsigned(argv[0], &pcp) < 0) || + pcp > MAX_DESIGNATOR) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Invalid PCP value\n"); + return -EINVAL; + } + argc--; argv++; + mask = 1UL << pcp; + type = INGRESS_PCP; + } else { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Missing ingress map type\n"); + return -EINVAL; + } + + + if (!strcmp(argv[0], "designation")) { + argc--; argv++; + if ((get_unsigned_char(argv[0], &des) < 0) || + des > MAX_DESIGNATOR) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Invalid designation value\n"); + return -EINVAL; + } + argc--; argv++; + } else { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Missing ingress map designation\n"); + return -EINVAL; + } + + if (!strcmp(argv[0], "drop-prec")) { + argc--; argv++; + for (dp = 0; dp < NUM_DPS; dp++) { + if (!strcmp(argv[0], qos_dps[dp])) + break; + } + if (dp == NUM_DPS) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Invalid drop-precedence value\n"); + return -EINVAL; + } + } else { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Missing ingress map drop-precedence\n"); + return -EINVAL; + } + + return(qos_ingress_map_get(map_name, type, mask, des, dp)); + default: + DP_DEBUG(QOS, ERR, DATAPLANE, + "Ingress map command has wrong number of args\n"); + break; + } + + DP_DEBUG(QOS, ERR, DATAPLANE, + "Invalid ingress-map command\n"); + + return -EINVAL; +} + +struct qos_mark_map *qos_egress_map_find(char const *name) +{ + struct qos_mark_map *map; + + cds_list_for_each_entry(map, &qos_egress_maps, list) { + if (!strcmp(map->map_name, name)) + return map; + } + return NULL; +} + +static int qos_egressm_trgt_attach(unsigned int ifindex, unsigned int vlan, + char const *name) +{ + struct qos_mark_map *map; + int ret; + + if (!qos_egressm.qos_egressm_attach) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Device doesn't support egress maps"); + return -EOPNOTSUPP; + } + + map = qos_egress_map_find(name); + if (!map) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Egress target cmd failed no map %s\n", name); + return -EINVAL; + } + + ret = (*qos_egressm.qos_egressm_attach)(ifindex, vlan, map); + if (ret) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Failed to attach egress " + "map:%s on ifindex:%d, vlan:%d ret:%d\n", + name, ifindex, vlan, ret); + return ret; + } + + DP_DEBUG(QOS, DEBUG, DATAPLANE, + "Attaching egress map %s ifindex %u vlan %u\n", + name, ifindex, vlan); + return 0; +} + +static int qos_egressm_trgt_detach(unsigned int ifindex, unsigned int vlan, + char const *name) +{ + struct qos_mark_map *map; + int ret; + + if (!qos_egressm.qos_egressm_detach) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Device doesn't support egress maps"); + return -EOPNOTSUPP; + } + + map = qos_egress_map_find(name); + if (!map) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Egress target cmd failed no map %s\n", name); + return -EINVAL; + } + + ret = (*qos_egressm.qos_egressm_detach)(ifindex, vlan, map); + + return ret; +} + +static struct qos_mark_map *qos_egress_map_create(char const *name) +{ + struct qos_mark_map *map; + + DP_DEBUG(QOS, DEBUG, DATAPLANE, "Create egress-map %s\n", name); + map = calloc(1, sizeof(struct qos_mark_map) + strlen(name) + 1); + if (!map) + return NULL; + strcpy(map->map_name, name); + map->type = EGRESS_UNDEF; + cds_list_add_tail_rcu(&map->list, &qos_egress_maps); + return map; +} + +static void qos_egress_map_delete_rcu(struct rcu_head *head) +{ + struct qos_mark_map *map = + caa_container_of(head, struct qos_mark_map, obj_rcu); + free(map); +} + +static int qos_egress_map_delete(char const *name) +{ + struct qos_mark_map *map; + int ret = 0; + + map = qos_egress_map_find(name); + if (!map) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Invalid egress-map delete %s\n", name); + return -ENOENT; + } + DP_DEBUG(QOS, ERR, DATAPLANE, "Delete egress-map %s\n", name); + cds_list_del_rcu(&map->list); + ret = (*qos_egressm.qos_egressm_config)(map, false); + call_rcu(&map->obj_rcu, qos_egress_map_delete_rcu); + return ret; +} + +static int +qos_egress_map_set(char const *name, enum egress_map_type type, + uint64_t mask, uint8_t remark_value) +{ + struct qos_mark_map *map; + uint8_t dscp; + + map = qos_egress_map_find(name); + if (!map) { + map = qos_egress_map_create(name); + if (!map) + return -ENOMEM; + map->type = type; + } else if (map->type != type) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Invalid type for egress-map %s\n", name); + return -EINVAL; + } - /* - * Expected command format: - * - * "disable" - */ - ifp->qos_software_fwd = 0; - DP_DEBUG(QOS, DEBUG, DATAPLANE, "QoS disabled on %s\n", ifp->if_name); + map->type = type; + map->dscp_used |= mask; + for (dscp = 0; dscp < MAX_DSCP; dscp++) { + if (mask & (1ul << dscp)) + map->pcp_value[dscp] = remark_value; + } - return QOS_DISABLE(qinfo)(ifp, qinfo); + return 0; } -static int cmd_qos_enable(struct ifnet *ifp, - int argc __unused, char **argv __unused) +static int qos_egress_map_complete(char const *name) { - struct sched_info *qinfo = ifp->if_qos; + struct qos_mark_map *map; + int ret; - /* - * Expected command format: - * - * "enable" - */ - if (!qinfo) { - DP_DEBUG(QOS, ERR, DATAPLANE, "Qos not configured\n"); + map = qos_egress_map_find(name); + if (!map) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "No egress map found %s\n", name); return -ENOENT; } - if (qinfo->enabled) { - DP_DEBUG(QOS, ERR, DATAPLANE, "Qos already enabled\n"); - return -EINVAL; - } - - qinfo->enabled = true; + DP_DEBUG(QOS, ERR, DATAPLANE, "Completed egress map %s\n", name); - if (QOS_ENABLE(qinfo)(ifp, qinfo)) - return -ENODEV; + ret = (*qos_egressm.qos_egressm_config)(map, true); - DP_DEBUG(QOS, DEBUG, DATAPLANE, "QoS enabled on %s\n", ifp->if_name); - return 0; + return ret; } -static int cmd_qos_mark_map(int argc, char **argv) +static int cmd_qos_egress_map(struct ifnet *ifp, int argc, char **argv) { - char *map_name; - char *dscp_group_name; - int err; - uint32_t pcp_value; - uint64_t dscp_set; + const char *map_name; + enum egress_map_type type = EGRESS_UNDEF; + uint8_t remark_value; + uint64_t mask = 0; + int ret; + + /* Skip egress-map */ + argc--; argv++; /* * Expected command format: * - * "mark-map dscp-group pcp " - * "mark-map delete" + * "egress-map designation dscp " + * "egress-map delete" + * "egress-map complete" + * "egress-map vlan " + * "egress-map vlan delete" + * + * - egress-map name + * - designation value (0..7) + * - dscp-value (0..63) + * - vlan * - * - mark-map name - * - dscp-group resource group name - * - pcp-value (0..7) */ - --argc, ++argv; /* skip "mark-map" */ - if (argc < 1) { - DP_DEBUG(QOS, DEBUG, DATAPLANE, "mark-map name missing\n"); - return -EINVAL; - } - map_name = argv[0]; + --argc, ++argv; /* skip name */ - --argc, ++argv; /* skip "map-name" */ - if (argc == 1 && strcmp(argv[0], "delete") == 0) { + switch (argc) { + case 1: /* - * We are deleting a mark-map + * delete - We are deleting an ingress-map + * system-default - We are setting a system-default + * complete - The definition of an ingress-map is complete */ - return qos_mark_map_delete(map_name); - } else if (argc != 4) { - DP_DEBUG(QOS, DEBUG, DATAPLANE, - "wrong number of mark-map arguments\n"); - return -EINVAL; - } + if (!strcmp(argv[0], "delete")) + return qos_egress_map_delete(map_name); + else if (!strcmp(argv[0], "complete")) + return qos_egress_map_complete(map_name); + break; + + case 2: + case 3: + if (strcmp(argv[0], "vlan") != 0) + break; - if (strcmp(argv[0], "dscp-group") != 0) { - DP_DEBUG(QOS, DEBUG, DATAPLANE, - "unknown mark-map keyword %s\n", argv[0]); - return -EINVAL; - } - dscp_group_name = argv[1]; - err = npf_dscp_group_getmask(dscp_group_name, &dscp_set); - if (err) { - DP_DEBUG(QOS, DEBUG, DATAPLANE, - "dscp mark retrieval failed\n"); - return -EINVAL; - } + unsigned int vlan; + argc--; argv++; + if ((get_unsigned(argv[0], &vlan) < 0) || + vlan >= VLAN_N_VID) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Invalid vlan value\n"); + return -EINVAL; + } - if (strcmp(argv[2], "pcp") != 0) { - DP_DEBUG(QOS, DEBUG, DATAPLANE, - "unknown mark-map keyword %s\n", argv[2]); + if (!ifp) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Invalid ifp\n"); return -EINVAL; } - if (get_unsigned(argv[3], &pcp_value) < 0) { - DP_DEBUG(QOS, DEBUG, DATAPLANE, - "invalid mark-map pcp value %s\n", argv[3]); - return -EINVAL; - } - if (pcp_value > 7) { - DP_DEBUG(QOS, DEBUG, DATAPLANE, - "invalid mark-map pcp value %u\n", pcp_value); - return -EINVAL; + + argc--; argv++; + if (!argc) + return(qos_egressm_trgt_attach(ifp->if_index, vlan, + map_name)); + if (!strcmp(argv[0], "delete")) + return(qos_egressm_trgt_detach(ifp->if_index, vlan, + map_name)); + break; + + case 4: + if (!strcmp(argv[0], "dscp-group")) { + argc--; argv++; + ret = npf_dscp_group_getmask(argv[0], &mask); + if (ret) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Failed to retrieve dscp group %s\n", + argv[0]); + return -ENOENT; + } + argc--; argv++; + } else { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Missing egress map type\n"); + return -EINVAL; + } + + if (!strcmp(argv[0], "dscp")) { + argc--; argv++; + if ((get_unsigned_char(argv[0], &remark_value) < 0) || + remark_value > MAX_DSCP) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Invalid DSCP value\n"); + return -EINVAL; + } + argc--; argv++; + type = EGRESS_DSCPGRP_DSCP; + } else { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Missing egress map DSCP value\n"); + return -EINVAL; + } + + return(qos_egress_map_set(map_name, type, mask, remark_value)); + + default: + DP_DEBUG(QOS, ERR, DATAPLANE, + "Egress map command has wrong number of args\n"); + break; } - return qos_mark_map_store(map_name, dscp_set, (uint8_t)pcp_value); + + DP_DEBUG(QOS, ERR, DATAPLANE, + "Invalid egress-map command\n"); + + return -EINVAL; } -/* Echo command to log */ -static void debug_cmd(int argc, char **argv) +/* + * There is a race between NEWLINK messages from the kernel + * and the NEWPORT response from the controller. So we need + * to store interface specific commands for which there is no + * interface in the expectation that the ifp will exist shortly :-( + */ +static struct cfg_if_list *qos_cfg_list; + +static void +qos_if_index_set(struct ifnet *ifp) { - char buf[BUFSIZ], *cp; - int i; + int rv; + struct ifnet *parent_ifp = NULL; + struct egress_map_subport_info *egr_map_subport = NULL; - cp = buf; - for (i = 0; i < argc; i++) { - sprintf(cp, " %s", argv[i]); - cp += strlen(argv[i]) + 1; - } - *cp = 0; + rv = cfg_if_list_replay(&qos_cfg_list, ifp->if_name, cmd_qos_cfg); - DP_DEBUG(QOS, INFO, DATAPLANE, "qos%s\n", buf); + if (rv) + DP_DEBUG(QOS, ERR, DATAPLANE, + "qos cache replay failed for %s, rv %d (%s)", + ifp->if_name, rv, strerror(-rv)); + + parent_ifp = ifp->if_parent; + if (parent_ifp) { + /* + * Create a egress map object for sub-ports by default + * since it might inherit the same from parent + */ + if (ifp->if_type == IFT_L2VLAN) { + egr_map_subport = qos_egress_map_subport_new(ifp, + parent_ifp, true); + if (!egr_map_subport) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Failed to create egr_map_subport\n"); + return; + } + } + } } -/* Process qos related op-mode commands */ -int cmd_qos_op(FILE *f, int argc, char **argv) +static void +qos_if_index_unset(struct ifnet *ifp, uint32_t ifindex __unused) { - --argc, ++argv; /* skip "qos" */ - if (argc < 1) { - fprintf(f, "usage: missing qos command\n"); - return -1; - } + int rv; - /* Check for op-mode commands first */ - if (strcmp(argv[0], "show") == 0) - return cmd_qos_show(f, argc, argv); - else if (strcmp(argv[0], "optimised-show") == 0) - return cmd_qos_optimised_show(f, argc, argv); - else if (strcmp(argv[0], "clear") == 0) - return cmd_qos_clear(f, argc, argv); - else if (strcmp(argv[0], "hw") == 0) - return cmd_qos_hw(f, argc, argv); - else if (strcmp(argv[0], "obj-db") == 0) - return cmd_qos_obj_db(f); - else - fprintf(f, "unknown qos command: %s\n", argv[0]); + rv = cfg_if_list_replay(&qos_cfg_list, ifp->if_name, NULL); - return -1; + if (rv) + DP_DEBUG(QOS, ERR, DATAPLANE, + "qos cache remove failed for %s, rv %d (%s)", + ifp->if_name, rv, strerror(-rv)); } /* Process qos related config commands */ int cmd_qos_cfg(__unused FILE * f, int argc, char **argv) { - unsigned int ifindex; + int rv; - --argc, ++argv; /* skip "qos" */ - if (argc < 1) { + if (argc < 2) { DP_DEBUG(QOS, ERR, DATAPLANE, "usage: missing qos command\n"); return -EINVAL; } - debug_cmd(argc, argv); + debug_cmd(argc-1, argv+1); - /* Config-mode commands start with ifindex */ - if (get_unsigned(argv[0], &ifindex) < 0) { - DP_DEBUG(QOS, ERR, DATAPLANE, "usage: qos IFINDEX ...\n"); - return -ENODEV; + if (argc == 2 && !strcmp(argv[1], "commit")) { + qos_sched_npf_commit(); + return 0; } /* - * QoS uses an if-index of zero to signal a global object, i.e. - * one that isn't tied to one particular interface. + * QoS uses a special marker to signal a global object, i.e. + * one that isn't tied to one particular interface. The string + * is deliberately longer than IFNAMSIZ so it can never be confused + * with a real ifname. */ - if (ifindex == 0) { - --argc, ++argv; /* skip IFINDEX */ + if (!strcmp(argv[1], "global-object-cmd")) { + --argc, ++argv; /* skip "qos" */ + --argc, ++argv; /* skip global marker*/ if (argc < 1) { RTE_LOG(ERR, QOS, "missing qos subcommand\n"); return -EINVAL; @@ -2564,52 +4754,85 @@ int cmd_qos_cfg(__unused FILE * f, int argc, char **argv) if (strcmp(argv[0], "mark-map") == 0) return cmd_qos_mark_map(argc, argv); + if (strcmp(argv[0], "platform") == 0) + return cmd_qos_platform(argc, argv); + if (strcmp(argv[0], "ingress-map") == 0) + return cmd_qos_ingress_map(NULL, argc, argv); + if (strcmp(argv[0], "egress-map") == 0) + return cmd_qos_egress_map(NULL, argc, argv); + if (strcmp(argv[0], "lp-des") == 0) + return cmd_qos_local_prio_des(argc, argv); return -EINVAL; } - struct ifnet *ifp = ifnet_byifindex(ifindex); + /* + * All other Config-mode commands start with an interface name which + * vplaned should guarantee will be present. Unfortunately due to + * a race condition we currently have a cache/replay mechanism to + * cope with that not being the case. + */ + struct ifnet *ifp = dp_ifnet_byifname(argv[1]); if (!ifp) { - DP_DEBUG(QOS, ERR, DATAPLANE, "unknown ifindex %u\n", ifindex); - return -ENODEV; - } - if (ifp->if_type != IFT_ETHER) { - DP_DEBUG(QOS, ERR, DATAPLANE, - "Qos only possible on physical ports\n"); - return -EINVAL; + /* + * Interface not found, attempt to cache the command for replay + * if it turns up later. + */ + DP_DEBUG(QOS, DEBUG, DATAPLANE, + "qos interface %s not found, cache cmd\n", argv[1]); + rv = cfg_if_list_cache_command(&qos_cfg_list, argv[1], + argc, argv); + if (rv) + DP_DEBUG(QOS, ERR, DATAPLANE, + "qos cache cmd for %s failed %d(%s)\n", + argv[1], rv, strerror(-rv)); + return rv; } - if (ifp->if_port == IF_PORT_ID_INVALID) - return 0; - - --argc, ++argv; /* skip IFINDEX */ + --argc, ++argv; /* skip "qos" */ + --argc, ++argv; /* skip IFNAME */ if (argc < 1) { DP_DEBUG(QOS, ERR, DATAPLANE, "missing qos subcommand\n"); return -EINVAL; } + /* + * Egress-map is still supported on VIF although its part of + * policymap + */ + if ((ifp->if_type != IFT_ETHER) && + (strcmp(argv[0], "egress-map") != 0)) { + DP_DEBUG(QOS, ERR, DATAPLANE, + "Qos only possible on physical ports\n"); + return -EINVAL; + } + if (strcmp(argv[0], "port") == 0) return cmd_qos_port(ifp, argc, argv); - else if (strcmp(argv[0], "param") == 0) + if (strcmp(argv[0], "param") == 0) return cmd_qos_params(ifp, argc, argv); - else if (strcmp(argv[0], "subport") == 0) + if (strcmp(argv[0], "subport") == 0) return cmd_qos_subport(ifp, argc, argv); - else if (strcmp(argv[0], "pipe") == 0) + if (strcmp(argv[0], "pipe") == 0) return cmd_qos_pipe(ifp, argc, argv); - else if (strcmp(argv[0], "profile") == 0) + if (strcmp(argv[0], "profile") == 0) return cmd_qos_profile(ifp, argc, argv); - else if (strcmp(argv[0], "vlan") == 0) + if (strcmp(argv[0], "vlan") == 0) return cmd_qos_vlan(ifp, argc, argv); - else if (strcmp(argv[0], "match") == 0) + if (strcmp(argv[0], "match") == 0) return cmd_qos_match(ifp, argc, argv); - else if (strcmp(argv[0], "disable") == 0) + if (strcmp(argv[0], "disable") == 0) return cmd_qos_disable(ifp, argc, argv); - else if (strcmp(argv[0], "enable") == 0) + if (strcmp(argv[0], "enable") == 0) return cmd_qos_enable(ifp, argc, argv); - else - DP_DEBUG(QOS, ERR, DATAPLANE, "unknown qos command: %s\n", - argv[0]); + if (strcmp(argv[0], "ingress-map") == 0) + return cmd_qos_ingress_map(ifp, argc, argv); + if (strcmp(argv[0], "egress-map") == 0) + return cmd_qos_egress_map(ifp, argc, argv); + + DP_DEBUG(QOS, ERR, DATAPLANE, "unknown qos command: %s\n", + argv[0]); return -EINVAL; } @@ -2626,7 +4849,7 @@ qos_extract_attachpoint(char const *name, struct ifnet **ifp) continue; ifname[len] = '\0'; - *ifp = ifnet_byifname(ifname); + *ifp = dp_ifnet_byifname(ifname); if (!*ifp) return NULL; return name + len + 1; @@ -2678,7 +4901,7 @@ struct ifnet *qos_get_vlan_ifp(const char *att_pnt, uint16_t *vlan_id) return ifp; snprintf(&vlan_ifp_name[0], IFNAMSIZ, "%s.%d", ifp->if_name, *vlan_id); - ifp = ifnet_byifname(vlan_ifp_name); + ifp = dp_ifnet_byifname(vlan_ifp_name); return ifp; } @@ -2724,59 +4947,6 @@ int16_t qos_get_overhead(const char *name) return qos_get_overhead_from_ifnet(ifp); } -static void -qos_sched_update_subport_stats(struct sched_info *qinfo, unsigned int subport) -{ - struct subport_info *sinfo = qinfo->subport + subport; - struct rte_sched_subport_stats64 *queue_stats = &sinfo->queue_stats; - - QOS_SUBPORT_RD_STATS(qinfo)(qinfo, subport, queue_stats); -} - -static void -qos_sched_update_pipe_stats(struct sched_info *qinfo, unsigned int subport, - unsigned int pipe) -{ - uint32_t tc; - uint32_t q; - - for (tc = 0; tc < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc++) { - for (q = 0; q < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; q++) { - uint32_t qid; - uint64_t qlen; - bool qlen_in_pkts; - struct queue_stats *queue_stats; - - qid = qos_sched_calc_qindex(qinfo, subport, pipe, tc, - q); - queue_stats = qinfo->queue_stats + qid; - - QOS_QUEUE_RD_STATS(qinfo)(qinfo, subport, pipe, tc, - q, queue_stats, &qlen, - &qlen_in_pkts); - } - } -} - -void qos_sched_update_if_stats(const struct ifnet *ifp) -{ - struct sched_info *qinfo = ifp->if_qos; - unsigned int subport; - unsigned int pipe; - - if (qinfo == NULL) - return; - - for (subport = 0; subport < qinfo->n_subports; subport++) { - if (QOS_CONFIGURED(qinfo)) { - qos_sched_update_subport_stats(qinfo, subport); - for (pipe = 0; pipe < qinfo->n_pipes; pipe++) - qos_sched_update_pipe_stats(qinfo, subport, - pipe); - } - } -} - bool qos_sched_subport_get_stats(struct sched_info *qinfo, uint16_t vlan_id, struct rte_sched_subport_stats64 *stats) { @@ -2816,7 +4986,7 @@ bool qos_sched_subport_get_stats(struct sched_info *qinfo, uint16_t vlan_id, } void qos_save_mark_req(const char *att_pnt, enum qos_mark_type type, - uint16_t no_qinqs, void **handle) + uint16_t refs, void **handle) { struct subport_info *subport; struct ifnet *ifp = NULL; @@ -2836,7 +5006,7 @@ void qos_save_mark_req(const char *att_pnt, enum qos_mark_type type, mark_req->handle = handle; mark_req->type = type; mark_req->next = subport->marks; - mark_req->refs = no_qinqs; + mark_req->refs = refs; subport->marks = mark_req; DP_DEBUG(QOS, DEBUG, DATAPLANE, @@ -2936,13 +5106,45 @@ static void qos_if_link_change(struct ifnet *ifp, bool up, uint32_t speed) { + if (!ifp->if_qos || speed == ETH_SPEED_NUM_NONE) + return; + + /* + * We can only start QoS if the config (hw vs sw) matches + * the current state of the port (hw vs sw). + */ + if ((ifp->if_qos->dev_id == QOS_HW_ID && ifp->hw_forwarding) || + (ifp->if_qos->dev_id == QOS_DPDK_ID && !ifp->hw_forwarding)) { + + if (up) + qos_sched_start(ifp, speed); + else + qos_sched_stop(ifp); + } +} + +static void +qos_if_mtu_change(struct ifnet *ifp, uint32_t mtu __unused) +{ + struct rte_eth_link link; + if (!ifp->if_qos) return; - if (up) - qos_sched_start(ifp, speed); - else - qos_sched_stop(ifp); + rte_eth_link_get_nowait(ifp->if_port, &link); + if (link.link_status) { + /* + * Since changing the MTU can influence the burst size and as + * result affect the shaper functionality, ensure that for + * software based QoS support the scheduler is stopped and + * started. HW Qos support is able to cope with this and + * as a result doesn't need changing. + */ + if (ifp->if_qos->dev_id == QOS_DPDK_ID && !ifp->hw_forwarding) { + qos_sched_stop(ifp); + qos_sched_start(ifp, link.link_speed); + } + } } static void @@ -2956,12 +5158,54 @@ qos_if_delete(struct ifnet *ifp) DP_DEBUG(QOS, DEBUG, DATAPLANE, "QoS disabled for interface %s delete\n", ifp->if_name); + SLIST_REMOVE(&qos_qinfos.qinfo_head, qinfo, sched_info, list); + + QOS_RM_GLOBAL_MAP(); + QOS_DISABLE(qinfo)(ifp, qinfo); } +static void +qos_if_feat_mode_change(struct ifnet *ifp, enum if_feat_mode_event event) +{ + struct sched_info *qinfo = ifp->if_qos; + bool up = false; + + if (!qinfo) + return; + + if (event == IF_FEAT_MODE_EVENT_L2_FAL_ENABLED) { + DP_DEBUG(QOS, DEBUG, DATAPLANE, + "Hw switching enabled for Interface %s\n", + ifp->if_name); + if (ifp->if_qos->dev_id == QOS_HW_ID) + up = true; + } else if (event == IF_FEAT_MODE_EVENT_L2_FAL_DISABLED) { + DP_DEBUG(QOS, DEBUG, DATAPLANE, + "Hw switching disabled for Interface %s\n", + ifp->if_name); + if (ifp->if_qos->dev_id == QOS_DPDK_ID) + up = true; + } + + if (up) { + struct rte_eth_link link; + + rte_eth_link_get_nowait(ifp->if_port, &link); + if (link.link_status && link.link_speed != ETH_SPEED_NUM_NONE) + qos_sched_start(ifp, link.link_speed); + } else { + qos_sched_stop(ifp); + } +} + static const struct dp_event_ops qos_events = { .if_link_change = qos_if_link_change, .if_delete = qos_if_delete, + .if_feat_mode_change = qos_if_feat_mode_change, + .if_index_set = qos_if_index_set, + .if_index_unset = qos_if_index_unset, + .if_mtu_change = qos_if_mtu_change, }; DP_STARTUP_EVENT_REGISTER(qos_events); diff --git a/src/rcu.c b/src/rcu.c new file mode 100644 index 00000000..9d18197b --- /dev/null +++ b/src/rcu.c @@ -0,0 +1,74 @@ +/*- + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2011-2017 by Brocade Communications Systems, Inc. + * All rights reserved. + */ + +#include +#include + +#include "debug.h" + +#include "rcu.h" + +struct rte_rcu_qsbr *dp_qsbr_rcu_v; + +static __thread int rcu_registered; + +int dp_rcu_setup(void) +{ + size_t sz; + + if (dp_qsbr_rcu_v) + return 0; + + /* Allocate global DPDK QSBR RCU variable */ + sz = rte_rcu_qsbr_get_memsize(RTE_MAX_LCORE); + dp_qsbr_rcu_v = rte_zmalloc(NULL, sz, RTE_CACHE_LINE_SIZE); + if (!dp_qsbr_rcu_v) { + RTE_LOG(ERR, DATAPLANE, + "Could not allocate DPDK QSBR RCU variable\n"); + return -ENOMEM; + } + + if (rte_rcu_qsbr_init(dp_qsbr_rcu_v, RTE_MAX_LCORE)) { + RTE_LOG(ERR, DATAPLANE, + "Failed to initialize DPDK QSBR RCU variable\n"); + return -rte_errno; + } + + return 0; +} + +struct rte_rcu_qsbr *dp_rcu_qsbr_get(void) +{ + return dp_qsbr_rcu_v; +} + +void dp_rcu_register_thread(void) +{ + unsigned int lcore_id = dp_lcore_id(); + + if (rcu_registered++ == 0) { + /* userspace RCU */ + rcu_register_thread(); + + /* DPDK RCU QSBR */ + rte_rcu_qsbr_thread_register(dp_qsbr_rcu_v, lcore_id); + rte_rcu_qsbr_thread_online(dp_qsbr_rcu_v, lcore_id); + } +} + +void dp_rcu_unregister_thread(void) +{ + unsigned int lcore_id = dp_lcore_id(); + + if (--rcu_registered == 0) { + /* userspace RCU */ + rcu_unregister_thread(); + + /* DPDK RCU QSBR */ + rte_rcu_qsbr_thread_offline(dp_qsbr_rcu_v, lcore_id); + rte_rcu_qsbr_thread_unregister(dp_qsbr_rcu_v, lcore_id); + } +} diff --git a/src/route.c b/src/route.c index 384a9935..ef802ec1 100644 --- a/src/route.c +++ b/src/route.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -37,87 +36,52 @@ #include "dp_event.h" #include "ecmp.h" #include "fal.h" +#include "ip_forward.h" +#include "if_llatbl.h" #include "if_var.h" #include "json_writer.h" +#include "lcore_sched.h" #include "lpm/lpm.h" /* Use Vyatta modified version */ #include "mpls/mpls.h" -#include "pktmbuf.h" +#include "mpls/mpls_label_table.h" +#include "pktmbuf_internal.h" #include "pd_show.h" #include "route.h" #include "urcu.h" #include "vplane_debug.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" struct rte_mbuf; -/* - * The nexthop in LPM is 22 bits but dpdk hash tables currently have a - * limit of 2^20 entries. - */ -#define NEXTHOP_HASH_TBL_SIZE RTE_FBK_HASH_ENTRIES_MAX -#define NEXTHOP_HASH_TBL_MIN (UINT8_MAX + 1) - /* These are stored in a memory pool to allow for mapping * index/offset into pointer: * * addr +----------+ * ---->| | - * | L P M | idx +-----------+ - * | +---->| nexthop_u | - * | | +-----------+ - * | | | | - * +----------+ +-----------+ - * | nexthop | - * | 0 | - * +-----------+ - * | ... | - * +-----------+ - * | nexthop | - * | count - 1 | - * +-----------+ + * | L P M | idx +--------------+ + * | +---->| nexthop_list | + * | | +--------------+ + * | | | | + * +----------+ +--------------+ + * | nexthop | + * | 0 | + * +--------------+ + * | ... | + * +--------------+ + * | nexthop | + * | count - 1 | + * +--------------+ */ -/* - * This is the nexthop information result of route lookup - allows for - * multiple nexthops in the case of ECMP - */ -struct next_hop_u { - struct next_hop *siblings; /* array of next_hop */ - uint8_t nsiblings; /* # of next_hops */ - uint8_t proto; /* routing protocol */ - uint32_t index; - uint32_t refcount; /* # of LPM's referring */ - struct next_hop hop0; /* optimization for non-ECMP */ - struct cds_lfht_node nh_node; - enum pd_obj_state pd_state; - fal_object_t nhg_fal_obj; /* FAL handle for next_hop_group */ - fal_object_t *nh_fal_obj; /* Per-nh FAL handles */ - struct rcu_head rcu; -} __rte_cache_aligned; - -struct nexthop_table { - uint32_t in_use; /* # of entries used */ - uint32_t rover; /* next free slot to look at */ - struct next_hop_u *entry[NEXTHOP_HASH_TBL_SIZE]; /* array of entries */ - uint32_t neigh_present; - uint32_t neigh_created; -}; - /* Nexthop entry table, could be per-namespace */ static struct nexthop_table nh_tbl __hot_data; /* Index for next hop table */ static struct cds_lfht *nexthop_hash; -/* Well-known blackhole next_hop_u for failure cases */ -static struct next_hop_u *nextu_blackhole; - -struct nexthop_hash_key { - const struct next_hop *nh; - size_t size; - uint8_t proto; -}; +/* Well-known blackhole next_hop_list for failure cases */ +static struct next_hop_list *nextl_blackhole; static pthread_mutex_t route_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -151,6 +115,16 @@ static const struct reserved_route { static uint32_t route_sw_stats[PD_OBJ_STATE_LAST]; static uint32_t route_hw_stats[PD_OBJ_STATE_LAST]; +static struct nexthop_table *route_get_nh_table(void) +{ + return &nh_tbl; +} + +static struct cds_lfht *route_get_nh_hash_table(void) +{ + return nexthop_hash; +} + /* * Wrapper round the nexthop_new function. This one keeps track of the * failures and successes. @@ -161,7 +135,7 @@ route_nexthop_new(const struct next_hop *nh, uint16_t size, uint8_t proto, { int rc; - rc = nexthop_new(nh, size, proto, slot); + rc = nexthop_new(AF_INET, nh, size, proto, FAL_NHG_USE_IP, slot); if (rc >= 0) return rc; @@ -182,17 +156,19 @@ route_nexthop_new(const struct next_hop *nh, uint16_t size, uint8_t proto, * failures and successes. */ static int -route_lpm_add(vrfid_t vrf_id, struct lpm *lpm, uint32_t ip, - uint8_t depth, uint32_t next_hop, int16_t scope) +route_lpm_add(vrfid_t vrf_id, fal_object_t vrf_obj, struct lpm *lpm, + uint32_t ip, uint8_t depth, uint32_t next_hop, int16_t scope) { int rc; struct pd_obj_state_and_flags *pd_state; struct pd_obj_state_and_flags *old_pd_state; + enum pd_obj_state nhl_pd_state; + fal_object_t nhg_fal_obj; + struct next_hop *hops; uint32_t old_nh; bool demoted = false; - struct next_hop_u *nextu = - rcu_dereference(nh_tbl.entry[next_hop]); bool update_pd_state = true; + size_t size; rc = lpm_add(lpm, ntohl(ip), depth, next_hop, scope, &pd_state, &old_nh, &old_pd_state); @@ -225,25 +201,25 @@ route_lpm_add(vrfid_t vrf_id, struct lpm *lpm, uint32_t ip, return rc; } - if (nextu->pd_state != PD_OBJ_STATE_FULL) { - pd_state->state = nextu->pd_state; - nextu = nextu_blackhole; + size = next_hop_list_get_fal_nhs(AF_INET, next_hop, &hops); + nhg_fal_obj = next_hop_list_get_fal_obj( + AF_INET, next_hop, &nhl_pd_state); + + if (nhl_pd_state != PD_OBJ_STATE_FULL && + nhl_pd_state != PD_OBJ_STATE_NOT_NEEDED) { + pd_state->state = nhl_pd_state; update_pd_state = false; } if (demoted) { if (old_pd_state->created) { - rc = fal_ip4_upd_route(vrf_id, ip, depth, + rc = fal_ip4_upd_route(vrf_id, vrf_obj, ip, depth, lpm_get_id(lpm), - nextu->siblings, - nextu->nsiblings, - nextu->nhg_fal_obj); + hops, size, nhg_fal_obj); } else { - rc = fal_ip4_new_route(vrf_id, ip, depth, + rc = fal_ip4_new_route(vrf_id, vrf_obj, ip, depth, lpm_get_id(lpm), - nextu->siblings, - nextu->nsiblings, - nextu->nhg_fal_obj); + hops, size, nhg_fal_obj); } if (update_pd_state) pd_state->state = fal_state_to_pd_state(rc); @@ -260,10 +236,8 @@ route_lpm_add(vrfid_t vrf_id, struct lpm *lpm, uint32_t ip, * We have successfully added to the lpm, and now need to update the * platform, if there is one. */ - rc = fal_ip4_new_route(vrf_id, ip, depth, lpm_get_id(lpm), - nextu->siblings, - nextu->nsiblings, - nextu->nhg_fal_obj); + rc = fal_ip4_new_route(vrf_id, vrf_obj, ip, depth, lpm_get_id(lpm), + hops, size, nhg_fal_obj); if (update_pd_state) pd_state->state = fal_state_to_pd_state(rc); if (!rc) @@ -279,8 +253,110 @@ route_lpm_add(vrfid_t vrf_id, struct lpm *lpm, uint32_t ip, } static int -route_lpm_delete(vrfid_t vrf_id, struct lpm *lpm, uint32_t ip, - uint8_t depth, uint32_t *next_hop, int16_t scope) +route_lpm_update(vrfid_t vrf_id, fal_object_t vrf_obj, struct lpm *lpm, + uint32_t ip, uint8_t depth, + uint32_t *old_nh, + uint32_t next_hop, int16_t scope) +{ + int rc; + struct pd_obj_state_and_flags pd_state; + struct pd_obj_state_and_flags *old_pd_state; + struct pd_obj_state_and_flags *new_pd_state; + uint32_t new_nh; + uint32_t dummy_old_nh; + bool update_new_pd_state = true; + enum pd_obj_state nhl_pd_state; + fal_object_t nhg_fal_obj; + struct next_hop *hops; + size_t size; + + /* + * Remove an old entry from the lpm, and add a new one. lpm + * does not currently support make-before-break + */ + rc = lpm_delete(lpm, ntohl(ip), depth, old_nh, + scope, &pd_state, &new_nh, + &new_pd_state); + switch (rc) { + case LPM_SUCCESS: + /* Success */ + route_sw_stats[PD_OBJ_STATE_FULL]--; + break; + case LPM_HIGHER_SCOPE_EXISTS: + route_sw_stats[PD_OBJ_STATE_NOT_NEEDED]--; + break; + case LPM_LOWER_SCOPE_EXISTS: + /* Deleted, but lower scope was promoted so is now programmed */ + route_sw_stats[PD_OBJ_STATE_NOT_NEEDED]--; + break; + + default: + return rc; + } + + /* + * This is a replace, so the old_nh was got from the delete above, + * so make sure we don't overwrite that value here + */ + rc = lpm_add(lpm, ntohl(ip), depth, next_hop, scope, + &new_pd_state, &dummy_old_nh, &old_pd_state); + switch (rc) { + case LPM_SUCCESS: + /* Success */ + route_sw_stats[PD_OBJ_STATE_FULL]++; + break; + case LPM_HIGHER_SCOPE_EXISTS: + /* + * Success, but there is a higher scope rule, so this is + * not needed in the fal. + */ + route_sw_stats[PD_OBJ_STATE_NOT_NEEDED]++; + break; + case LPM_LOWER_SCOPE_EXISTS: + /* Added, but lower scope route was demoted. */ + route_sw_stats[PD_OBJ_STATE_NOT_NEEDED]++; + break; + case -ENOSPC: + route_sw_stats[PD_OBJ_STATE_NO_RESOURCE]++; + break; + default: + route_sw_stats[PD_OBJ_STATE_ERROR]++; + } + + size = next_hop_list_get_fal_nhs(AF_INET, next_hop, &hops); + nhg_fal_obj = next_hop_list_get_fal_obj( + AF_INET, next_hop, &nhl_pd_state); + + if (nhl_pd_state != PD_OBJ_STATE_FULL && + nhl_pd_state != PD_OBJ_STATE_NOT_NEEDED) { + new_pd_state->state = nhl_pd_state; + update_new_pd_state = false; + } + + if (pd_state.created) { + rc = fal_ip4_upd_route(vrf_id, vrf_obj, ip, depth, + lpm_get_id(lpm), hops, size, + nhg_fal_obj); + } else { + rc = fal_ip4_new_route(vrf_id, vrf_obj, ip, depth, + lpm_get_id(lpm), hops, size, + nhg_fal_obj); + } + + route_hw_stats[pd_state.state]--; + if (!rc || pd_state.created) + new_pd_state->created = true; + if (update_new_pd_state) + new_pd_state->state = fal_state_to_pd_state(rc); + route_hw_stats[new_pd_state->state]++; + /* Successfully added to SW, so return success. */ + return 0; +} + +static int +route_lpm_delete(vrfid_t vrf_id, fal_object_t vrf_obj, struct lpm *lpm, + uint32_t ip, uint8_t depth, uint32_t *next_hop, + int16_t scope) { int rc; @@ -312,28 +388,30 @@ route_lpm_delete(vrfid_t vrf_id, struct lpm *lpm, uint32_t ip, } if (promoted) { - struct next_hop_u *nextu = - rcu_dereference(nh_tbl.entry[new_nh]); bool update_new_pd_state = true; - - if (nextu->pd_state != PD_OBJ_STATE_FULL) { - new_pd_state->state = nextu->pd_state; - nextu = nextu_blackhole; + enum pd_obj_state nhl_pd_state; + fal_object_t nhg_fal_obj; + struct next_hop *hops; + size_t size; + + size = next_hop_list_get_fal_nhs(AF_INET, new_nh, &hops); + nhg_fal_obj = next_hop_list_get_fal_obj( + AF_INET, new_nh, &nhl_pd_state); + + if (nhl_pd_state != PD_OBJ_STATE_FULL && + nhl_pd_state != PD_OBJ_STATE_NOT_NEEDED) { + new_pd_state->state = nhl_pd_state; update_new_pd_state = false; } if (pd_state.created) { - rc = fal_ip4_upd_route(vrf_id, ip, depth, + rc = fal_ip4_upd_route(vrf_id, vrf_obj, ip, depth, lpm_get_id(lpm), - nextu->siblings, - nextu->nsiblings, - nextu->nhg_fal_obj); + hops, size, nhg_fal_obj); } else { - rc = fal_ip4_new_route(vrf_id, ip, depth, + rc = fal_ip4_new_route(vrf_id, vrf_obj, ip, depth, lpm_get_id(lpm), - nextu->siblings, - nextu->nsiblings, - nextu->nhg_fal_obj); + hops, size, nhg_fal_obj); } if (update_new_pd_state) new_pd_state->state = fal_state_to_pd_state(rc); @@ -346,7 +424,8 @@ route_lpm_delete(vrfid_t vrf_id, struct lpm *lpm, uint32_t ip, /* successfully removed and no lower scope promoted */ if (pd_state.created) { - rc = fal_ip4_del_route(vrf_id, ip, depth, lpm_get_id(lpm)); + rc = fal_ip4_del_route(vrf_id, vrf_obj, ip, depth, + lpm_get_id(lpm)); switch (rc) { case 0: route_hw_stats[pd_state.state]--; @@ -426,6 +505,10 @@ rt_lpm_add_reserved_routes(struct lpm *lpm, struct vrf *vrf) { char b[INET_ADDRSTRLEN]; unsigned int rt_idx; + struct ip_addr addr_any = { + .type = AF_INET, + .address.ip_v4.s_addr = INADDR_ANY, + }; if (vrf->v_id == VRF_INVALID_ID) return true; @@ -436,7 +519,7 @@ rt_lpm_add_reserved_routes(struct lpm *lpm, struct vrf *vrf) uint32_t nh_idx; int err_code; - nhop = nexthop_create(NULL, INADDR_ANY, + nhop = nexthop_create(NULL, &addr_any, reserved_routes[rt_idx].flags, 0, NULL); if (!nhop) @@ -457,6 +540,7 @@ rt_lpm_add_reserved_routes(struct lpm *lpm, struct vrf *vrf) err_code = route_lpm_add( vrf->v_id, + vrf->v_fal_obj, lpm, addr, reserved_routes[rt_idx].prefix_length, @@ -472,7 +556,7 @@ rt_lpm_add_reserved_routes(struct lpm *lpm, struct vrf *vrf) } free(nhop); if (err_code != 0) { - nexthop_put(nh_idx); + nexthop_put(AF_INET, nh_idx); return false; } } @@ -496,6 +580,7 @@ rt_lpm_del_reserved_routes(struct lpm *lpm, struct vrf *vrf) err_code = route_lpm_delete( vrf->v_id, + vrf->v_fal_obj, lpm, addr, reserved_routes[rt_idx].prefix_length, @@ -511,7 +596,7 @@ rt_lpm_del_reserved_routes(struct lpm *lpm, struct vrf *vrf) nh_idx, err_code); return false; } - nexthop_put(nh_idx); + nexthop_put(AF_INET, nh_idx); } return true; @@ -542,23 +627,6 @@ static struct lpm *rt_create_lpm(uint32_t id, struct vrf *vrf) rcu_assign_pointer(vrf->v_rt4_head.rt_table[id], lpm); - /* - * Alias all tables other than the main one from the default - * VRF into other VRFs. - */ - if (vrf->v_id == VRF_DEFAULT_ID && id != RT_TABLE_MAIN) { - struct vrf *dst_vrf; - vrfid_t vrf_id; - - VRF_FOREACH_KERNEL(dst_vrf, vrf_id) { - if (rt_lpm_resize(&dst_vrf->v_rt4_head, id) < 0) - return NULL; - - rcu_assign_pointer(dst_vrf->v_rt4_head.rt_table[id], - lpm); - } - } - return lpm; } @@ -570,60 +638,13 @@ static struct lpm *rt_get_lpm(struct route_head *rt_head, uint32_t id) return rcu_dereference(rt_head->rt_table[id]); } -static struct next_hop *nexthop_mp_select(struct next_hop *next, - uint32_t size, - uint32_t hash) -{ - uint16_t path; - - if (ecmp_max_path && ecmp_max_path < size) - size = ecmp_max_path; - - path = ecmp_lookup(size, hash); - if (unlikely(next[path].flags & RTF_DEAD)) { - /* retry to find a good path */ - for (path = 0; path < size; path++) { - if (!(next[path].flags & RTF_DEAD)) - break; - } - - if (path == size) - return NULL; - } - return next + path; -} - -/* - * Obtain a nexthop from a nexthop(_u) index - */ -inline struct next_hop *nexthop_select(uint32_t nh_idx, - const struct rte_mbuf *m, - uint16_t ether_type) -{ - struct next_hop_u *nextu; - struct next_hop *next; - uint32_t size; - - nextu = rcu_dereference(nh_tbl.entry[nh_idx]); - if (unlikely(!nextu)) - return NULL; - - size = nextu->nsiblings; - next = nextu->siblings; - - if (likely(size == 1)) - return next; - - return nexthop_mp_select(next, size, ecmp_mbuf_hash(m, ether_type)); -} - struct next_hop *nexthop_get(uint32_t nh_idx, uint8_t *size) { - struct next_hop_u *nextu; + struct next_hop_list *nextl; - nextu = rcu_dereference(nh_tbl.entry[nh_idx]); - *size = nextu->nsiblings; - return nextu->siblings; + nextl = rcu_dereference(nh_tbl.entry[nh_idx]); + *size = nextl->nsiblings; + return nextl->siblings; } /* Check if route table exists */ @@ -642,9 +663,9 @@ bool rt_valid_tblid(vrfid_t vrfid, uint32_t tbl_id) * * Returns RCU protected nexthop structure or NULL. */ -ALWAYS_INLINE -struct next_hop *rt_lookup(in_addr_t dst, uint32_t tblid, - const struct rte_mbuf *m) +ALWAYS_INLINE __hot_func +struct next_hop *dp_rt_lookup(in_addr_t dst, uint32_t tbl_id, + const struct rte_mbuf *m) { vrfid_t vrfid = pktmbuf_get_vrf(m); struct vrf *vrf = vrf_get_rcu(vrfid); @@ -652,7 +673,7 @@ struct next_hop *rt_lookup(in_addr_t dst, uint32_t tblid, if (!vrf) return NULL; - return rt_lookup_fast(vrf, dst, tblid, m); + return rt_lookup_fast(vrf, dst, tbl_id, m); } /* @@ -676,7 +697,7 @@ struct next_hop *rt_lookup_fast(struct vrf *vrf, in_addr_t dst, if (unlikely(lpm_lookup(lpm, ntohl(dst), &idx) != 0)) return NULL; - nh = nexthop_select(idx, m, ETHER_TYPE_IPv4); + nh = nexthop_select(AF_INET, idx, m, RTE_ETHER_TYPE_IPV4); if (nh && unlikely(nh->flags & RTF_NOROUTE)) return NULL; return nh; @@ -685,7 +706,7 @@ struct next_hop *rt_lookup_fast(struct vrf *vrf, in_addr_t dst, inline bool is_local_ipv4(vrfid_t vrf_id, in_addr_t dst) { struct vrf *vrf = vrf_get_rcu(vrf_id); - struct next_hop_u *nextu; + struct next_hop_list *nextl; struct next_hop *next; struct lpm *lpm; uint32_t idx; @@ -698,459 +719,63 @@ inline bool is_local_ipv4(vrfid_t vrf_id, in_addr_t dst) if (unlikely(lpm_lookup(lpm, ntohl(dst), &idx) != 0)) return false; - nextu = rcu_dereference(nh_tbl.entry[idx]); - if (unlikely(!nextu)) + nextl = rcu_dereference(nh_tbl.entry[idx]); + if (unlikely(!nextl)) return false; - next = rcu_dereference(nextu->siblings); + next = rcu_dereference(nextl->siblings); if (next->flags & RTF_LOCAL) return true; return false; } -struct next_hop * -nexthop_create(struct ifnet *ifp, in_addr_t gw, uint32_t flags, - uint16_t num_labels, label_t *labels) -{ - struct next_hop *next = malloc(sizeof(struct next_hop)); - - if (next) { - next->gateway = gw; - next->flags = flags; - nh4_set_ifp(next, ifp); - - if (!nh_outlabels_set(&next->outlabels, num_labels, - labels)) { - RTE_LOG(ERR, ROUTE, - "Failed to set outlabels for nexthop with %u labels\n", - num_labels); - free(next); - return NULL; - } - } - return next; -} - -/* - * Create an array of next_hops based on the hops in the NHU. - */ -static struct next_hop *nexthop_create_copy(struct next_hop_u *nhu, int *size) -{ - struct next_hop *next, *n; - struct next_hop *array = rcu_dereference(nhu->siblings); - int i; - - *size = nhu->nsiblings; - n = next = calloc(sizeof(struct next_hop), *size); - if (!next) - return NULL; - - for (i = 0; i < nhu->nsiblings; i++) { - struct next_hop *nhu_next = array + i; - - memcpy(n, nhu_next, sizeof(struct next_hop)); - nh_outlabels_copy(&nhu_next->outlabels, &n->outlabels); - n++; - } - return next; -} - static int nexthop_hashfn(const struct nexthop_hash_key *key, unsigned long seed __rte_unused) { size_t size = key->size; - uint32_t hash_keys[size * 3]; + uint32_t hash_keys[size * 3 + 1]; struct ifnet *ifp; uint16_t i, j = 0; for (i = 0; i < size; i++, j += 3) { - hash_keys[j] = key->nh[i].gateway; - ifp = nh4_get_ifp(&key->nh[i]); + hash_keys[j] = key->nh[i].gateway.address.ip_v4.s_addr; + ifp = dp_nh_get_ifp(&key->nh[i]); hash_keys[j+1] = ifp ? ifp->if_index : 0; hash_keys[j+2] = key->nh[i].flags & NH_FLAGS_CMP_MASK; } - return rte_jhash_32b(hash_keys, size * 3, 0); + hash_keys[size * 3] = key->use; + + return rte_jhash_32b(hash_keys, size * 3 + 1, 0); } static int nexthop_cmpfn(struct cds_lfht_node *node, const void *key) { const struct nexthop_hash_key *h_key = key; - const struct next_hop_u *nu = - caa_container_of(node, const struct next_hop_u, nh_node); + const struct next_hop_list *nl = + caa_container_of(node, const struct next_hop_list, nh_node); uint16_t i; - if (h_key->size != nu->nsiblings) + if (h_key->size != nl->nsiblings || + h_key->use != nl->use || h_key->proto != nl->proto) return false; for (i = 0; i < h_key->size; i++) { - if ((nu->proto != h_key->proto) || - (nh4_get_ifp(&nu->siblings[i]) != - nh4_get_ifp(&h_key->nh[i])) || - ((nu->siblings[i].flags & NH_FLAGS_CMP_MASK) != + if ((dp_nh_get_ifp(&nl->siblings[i]) != + dp_nh_get_ifp(&h_key->nh[i])) || + ((nl->siblings[i].flags & NH_FLAGS_CMP_MASK) != (h_key->nh[i].flags & NH_FLAGS_CMP_MASK)) || - (nu->siblings[i].gateway != h_key->nh[i].gateway) || - !nh_outlabels_cmpfn(&nu->siblings[i].outlabels, + (nl->siblings[i].gateway.address.ip_v4.s_addr != + h_key->nh[i].gateway.address.ip_v4.s_addr) || + !nh_outlabels_cmpfn(&nl->siblings[i].outlabels, &h_key->nh[i].outlabels)) return false; } return true; } -static struct next_hop_u *nexthop_lookup(const struct nexthop_hash_key *key) -{ - struct cds_lfht_iter iter; - struct cds_lfht_node *node; - - cds_lfht_lookup(nexthop_hash, - nexthop_hashfn(key, 0), - nexthop_cmpfn, key, &iter); - node = cds_lfht_iter_get_node(&iter); - if (node) - return caa_container_of(node, struct next_hop_u, nh_node); - else - return NULL; -} - -static int -nexthop_hash_insert(struct next_hop_u *nu, - const struct nexthop_hash_key *key) -{ - struct cds_lfht_node *ret_node; - - cds_lfht_node_init(&nu->nh_node); - unsigned long hash = nexthop_hashfn(key, 0); - - ret_node = cds_lfht_add_unique(nexthop_hash, hash, - nexthop_cmpfn, key, - &nu->nh_node); - - return (ret_node != &nu->nh_node) ? EEXIST : 0; -} - -static struct next_hop_u *nexthop_alloc(int size) -{ - struct next_hop_u *nextu; - - nextu = calloc(1, sizeof(*nextu)); - if (unlikely(!nextu)) { - RTE_LOG(ERR, ROUTE, "can't alloc next_hop_u\n"); - return NULL; - } - - nextu->nh_fal_obj = calloc(size, sizeof(*nextu->nh_fal_obj)); - if (!nextu->nh_fal_obj) { - free(nextu); - return NULL; - } - - if (size == 1) { - /* Optimize for non-ECMP case by staying in cache line */ - nextu->siblings = &nextu->hop0; - } else { - nextu->siblings = calloc(1, size * sizeof(struct next_hop)); - if (unlikely(nextu->siblings == NULL)) { - free(nextu->nh_fal_obj); - free(nextu); - return NULL; - } - } - nextu->nsiblings = size; - return nextu; -} - -static void __nexthop_destroy(struct next_hop_u *nextu) -{ - unsigned int i; - - for (i = 0; i < nextu->nsiblings; i++) - nh_outlabels_destroy(&nextu->siblings[i].outlabels); - if (nextu->siblings != &nextu->hop0) - free(nextu->siblings); - - free(nextu->nh_fal_obj); - free(nextu); -} - -/* - * Remove the old NH from the hash and add the new one. Can not - * use a call to cds_lfht_add_replace() or any of the variants - * as the key for the new NH may be very different in the case - * where there are a different number of paths. - */ -static int -nexthop_hash_del_add(struct next_hop_u *old_nu, - struct next_hop_u *new_nu) -{ - struct nexthop_hash_key key = {.nh = new_nu->siblings, - .size = new_nu->nsiblings, - .proto = new_nu->proto }; - int rc; - - /* Remove old one */ - rc = cds_lfht_del(nexthop_hash, &old_nu->nh_node); - assert(rc == 0); - if (rc != 0) - return rc; - - /* add new one */ - return nexthop_hash_insert(new_nu, &key); -} - -/* Reuse existing next hop entry */ -static struct next_hop_u *nexthop_reuse(const struct nexthop_hash_key *key, - uint32_t *slot) -{ - struct next_hop_u *nu; - int index; - - nu = nexthop_lookup(key); - if (!nu) - return NULL; - - index = nu->index; - - *slot = index; - ++nu->refcount; - - DP_DEBUG(ROUTE, DEBUG, ROUTE, - "nexthop reuse: nexthop %d, refs %u\n", - index, nu->refcount); - - return nu; -} - -/* Callback from RCU after all other threads are done. */ -static void nexthop_destroy(struct rcu_head *head) -{ - struct next_hop_u *nextu - = caa_container_of(head, struct next_hop_u, rcu); - - __nexthop_destroy(nextu); -} - -/* Lookup (or create) nexthop based on hop information */ -int nexthop_new(const struct next_hop *nh, uint16_t size, uint8_t proto, - uint32_t *slot) -{ - struct nexthop_hash_key key = { - .nh = nh, .size = size, .proto = proto }; - struct next_hop_u *nextu; - uint32_t rover = nh_tbl.rover; - uint32_t nh_iter; - int ret; - - nextu = nexthop_reuse(&key, slot); - if (nextu) - return 0; - - if (unlikely(nh_tbl.in_use == NEXTHOP_HASH_TBL_SIZE)) { - RTE_LOG(ERR, ROUTE, "next_hop_u full\n"); - return -ENOSPC; - } - - nextu = nexthop_alloc(size); - if (!nextu) - return -ENOMEM; - - nextu->nsiblings = size; - nextu->refcount = 1; - nextu->index = rover; - nextu->proto = proto; - if (size == 1) { - nextu->hop0 = *nh; - } else { - memcpy(nextu->siblings, nh, size * sizeof(struct next_hop)); - } - if (unlikely(nexthop_hash_insert(nextu, &key))) { - __nexthop_destroy(nextu); - return -ENOMEM; - } - - ret = fal_ip4_new_next_hops(nextu->nsiblings, nextu->siblings, - &nextu->nhg_fal_obj, - nextu->nh_fal_obj); - if (ret < 0) { - if (ret != -EOPNOTSUPP) - RTE_LOG(ERR, ROUTE, - "FAL IPv4 next-hop-group create failed: %s\n", - strerror(-ret)); - nextu->pd_state = fal_state_to_pd_state(ret); - } else - nextu->pd_state = PD_OBJ_STATE_FULL; - - nh_iter = rover; - do { - nh_iter++; - if (nh_iter >= NEXTHOP_HASH_TBL_SIZE) - nh_iter = 0; - } while ((rcu_dereference(nh_tbl.entry[nh_iter]) != NULL) && - likely(nh_iter != rover)); - - nh_tbl.rover = nh_iter; - *slot = rover; - nh_tbl.in_use++; - - rcu_assign_pointer(nh_tbl.entry[rover], nextu); - - return 0; -} - -static bool nh_is_connected(const struct next_hop *nh) -{ - if (nh->flags & (RTF_BLACKHOLE | RTF_REJECT | - RTF_SLOWPATH | RTF_GATEWAY | - RTF_LOCAL | RTF_NOROUTE)) - return false; - - return true; -} - -static bool nh_is_local(const struct next_hop *nh) -{ - if (nh->flags & RTF_LOCAL) - return true; - - return false; -} - -static bool nh_is_gw(const struct next_hop *nh) -{ - if (nh->flags & RTF_GATEWAY) - return true; - - return false; -} - -/* - * Modifying a NH in non atomic way, so this must be atomically swapped - * into the forwarding state when ready - */ -static void nh4_set_neigh_present(struct next_hop *next_hop, - struct llentry *lle) -{ - assert((next_hop->flags & RTF_NEIGH_PRESENT) == 0); - next_hop->flags |= RTF_NEIGH_PRESENT; - next_hop->u.lle = lle; - nh_tbl.neigh_present++; -} - -static void nh4_clear_neigh_present(struct next_hop *next_hop) -{ - assert(next_hop->flags & RTF_NEIGH_PRESENT); - next_hop->flags &= ~RTF_NEIGH_PRESENT; - next_hop->u.ifp = next_hop->u.lle->ifp; - nh_tbl.neigh_present--; -} - -static void nh4_set_neigh_created(struct next_hop *next_hop, - struct llentry *lle) -{ - assert((next_hop->flags & RTF_NEIGH_CREATED) == 0); - next_hop->flags |= RTF_NEIGH_CREATED; - next_hop->u.lle = lle; - nh_tbl.neigh_created++; -} - -static void nh4_clear_neigh_created(struct next_hop *next_hop) -{ - assert(next_hop->flags & RTF_NEIGH_CREATED); - next_hop->flags &= ~RTF_NEIGH_CREATED; - next_hop->u.ifp = next_hop->u.lle->ifp; - nh_tbl.neigh_created--; -} - -static int nextu_nc_count(const struct next_hop_u *nhu) -{ - int count = 0; - int i; - struct next_hop *array = rcu_dereference(nhu->siblings); - - for (i = 0; i < nhu->nsiblings; i++) { - struct next_hop *next = array + i; - - if (nh4_is_neigh_created(next)) - count++; - } - return count; -} - -static struct next_hop *nextu_find_path_using_ifp(struct next_hop_u *nhu, - struct ifnet *ifp, - int *sibling) -{ - int i; - struct next_hop *array = rcu_dereference(nhu->siblings); - - for (i = 0; i < nhu->nsiblings; i++) { - struct next_hop *next = array + i; - - if (nh4_get_ifp(next) == ifp) { - *sibling = i; - return next; - } - } - return NULL; -} - -static bool nextu_is_any_connected(const struct next_hop_u *nhu) -{ - int i; - struct next_hop *array = rcu_dereference(nhu->siblings); - - for (i = 0; i < nhu->nsiblings; i++) { - struct next_hop *next = array + i; - - if (nh_is_connected(next)) - return true; - } - return false; -} - -/* - * Drops reference to nexthop - * and if last reference frees it for reuse. - */ -void nexthop_put(uint32_t idx) -{ - struct next_hop_u *nextu = rcu_dereference(nh_tbl.entry[idx]); - - if (--nextu->refcount == 0) { - struct next_hop *array = nextu->siblings; - int ret; - int i; - - nh_tbl.entry[idx] = NULL; - --nh_tbl.in_use; - - for (i = 0; i < nextu->nsiblings; i++) { - struct next_hop *nh = array + i; - - if (nh4_is_neigh_present(nh)) - nh_tbl.neigh_present--; - if (nh4_is_neigh_created(nh)) - nh_tbl.neigh_created--; - } - - if (nextu->pd_state == PD_OBJ_STATE_FULL) { - ret = fal_ip4_del_next_hops(nextu->nhg_fal_obj, - nextu->nsiblings, - nextu->siblings, - nextu->nh_fal_obj); - if (ret < 0) { - RTE_LOG(ERR, ROUTE, - "FAL IPv4 next-hop-group delete failed: %s\n", - strerror(-ret)); - } - } - - cds_lfht_del(nexthop_hash, &nextu->nh_node); - call_rcu(&nextu->rcu, nexthop_destroy); - } -} - enum nh_change { NH_NO_CHANGE, NH_SET_NEIGH_CREATED, @@ -1165,14 +790,15 @@ enum nh_change { * then remove it. */ static int -route_nh_replace(struct next_hop_u *nextu, uint32_t nh_idx, struct llentry *lle, - uint32_t *new_nextu_idx_for_del, +route_nh_replace(int family, struct next_hop_list *nextl, + uint32_t nh_idx, struct llentry *lle, + uint32_t *new_nextl_idx_for_del, enum nh_change (*nh_processing_cb)(struct next_hop *next, int sibling, void *arg), void *arg) { - struct next_hop_u *new_nextu = NULL; + struct next_hop_list *new_nextl = NULL; struct next_hop *old_array; struct next_hop *new_array = NULL; enum nh_change nh_change; @@ -1180,50 +806,50 @@ route_nh_replace(struct next_hop_u *nextu, uint32_t nh_idx, struct llentry *lle, int i; int deleted = 0; - ASSERT_MASTER(); + ASSERT_MAIN(); /* walk all the NHs, copying as we go */ - old_array = rcu_dereference(nextu->siblings); - new_nextu = nexthop_alloc(nextu->nsiblings); - if (!new_nextu) + old_array = rcu_dereference(nextl->siblings); + new_nextl = next_hop_list_create_copy_start(AF_INET, nextl); + if (!new_nextl) return 0; - new_nextu->proto = nextu->proto; - new_nextu->index = nextu->index; - new_nextu->refcount = nextu->refcount; - new_array = rcu_dereference(new_nextu->siblings); - for (i = 0; i < nextu->nsiblings; i++) { + new_array = rcu_dereference(new_nextl->siblings); + + for (i = 0; i < nextl->nsiblings; i++) { struct next_hop *next = old_array + i; struct next_hop *new_next = new_array + i - deleted; nh_change = nh_processing_cb(next, i, arg); /* Copy across old NH */ - memcpy(new_next, next, sizeof(struct next_hop)); - nh_outlabels_copy(&next->outlabels, &new_next->outlabels); + if (next_hop_copy(next, new_next) < 0) { + __nexthop_destroy(new_nextl); + return 0; + } switch (nh_change) { case NH_NO_CHANGE: break; case NH_SET_NEIGH_CREATED: any_change = true; - nh4_set_neigh_created(new_next, lle); + nh_set_neigh_created(family, new_next, lle); break; case NH_CLEAR_NEIGH_CREATED: any_change = true; - nh4_clear_neigh_created(new_next); + nh_clear_neigh_created(family, new_next); break; case NH_SET_NEIGH_PRESENT: any_change = true; - nh4_set_neigh_present(new_next, lle); + nh_set_neigh_present(family, new_next, lle); break; case NH_CLEAR_NEIGH_PRESENT: any_change = true; - nh4_clear_neigh_present(new_next); + nh_clear_neigh_present(family, new_next); break; case NH_DELETE: - if (!new_nextu_idx_for_del) { - __nexthop_destroy(new_nextu); + if (!new_nextl_idx_for_del) { + __nexthop_destroy(new_nextl); return -1; } any_change = true; @@ -1234,42 +860,27 @@ route_nh_replace(struct next_hop_u *nextu, uint32_t nh_idx, struct llentry *lle, /* Did we make any changes? If not then we can return */ if (!any_change) { - __nexthop_destroy(new_nextu); + __nexthop_destroy(new_nextl); return 0; } if (deleted) { /* * We are deleting at least one nh - create a new - * nextu for caller to deal with. + * next_hop_list for caller to deal with. */ - if (deleted != nextu->nsiblings && - route_nexthop_new(nextu->siblings, nextu->nsiblings, - nextu->proto, new_nextu_idx_for_del) < 0) - deleted = nextu->nsiblings; - __nexthop_destroy(new_nextu); + if (deleted != nextl->nsiblings && + route_nexthop_new(nextl->siblings, nextl->nsiblings, + nextl->proto, new_nextl_idx_for_del) < 0) + deleted = nextl->nsiblings; + __nexthop_destroy(new_nextl); return deleted; } - if (nexthop_hash_del_add(nextu, new_nextu)) { - __nexthop_destroy(new_nextu); + if (next_hop_list_create_copy_finish(AF_INET, nextl, new_nextl, + nh_idx) < 0) RTE_LOG(ERR, ROUTE, "nh replace failed\n"); - return 0; - } - /* - * It's safe to copy over the FAL objects without - * notifications as there are no FAL-visible changes to the - * object - it maintains its own linkage to the neighbour - */ - new_nextu->nhg_fal_obj = nextu->nhg_fal_obj; - memcpy(new_nextu->nh_fal_obj, nextu->nh_fal_obj, - new_nextu->nsiblings * sizeof(*new_nextu->nh_fal_obj)); - - assert(nh_tbl.entry[nh_idx] == nextu); - rcu_xchg_pointer(&nh_tbl.entry[nh_idx], new_nextu); - - call_rcu(&nextu->rcu, nexthop_destroy); return 0; } @@ -1286,17 +897,17 @@ static void subtree_walk_route_cleanup_cb(struct lpm *lpm, void *arg) { struct subtree_walk_arg *changing = arg; - struct next_hop_u *nextu = rcu_dereference(nh_tbl.entry[idx]); + struct next_hop_list *nextl = rcu_dereference(nh_tbl.entry[idx]); uint32_t cover_ip; uint8_t cover_depth; uint32_t cover_nh_idx; int neigh_created = 0; int ret; - if (!nextu) + if (!nextl) return; - neigh_created = nextu_nc_count(nextu); + neigh_created = next_hop_list_nc_count(nextl); if (neigh_created == 0) return; @@ -1335,9 +946,9 @@ static void subtree_walk_route_cleanup_cb(struct lpm *lpm, * still be forwarded, but with an arp lookup required until the * entry is recreaetd with correct values. */ - ret = route_lpm_delete(changing->vrf->v_id, - lpm, htonl(masked_ip), 32, &cover_nh_idx, - RT_SCOPE_LINK); + ret = route_lpm_delete(changing->vrf->v_id, changing->vrf->v_fal_obj, + lpm, htonl(masked_ip), 32, &cover_nh_idx, + RT_SCOPE_LINK); if (ret < 0) { char b[INET_ADDRSTRLEN]; in_addr_t dst = htonl(masked_ip); @@ -1349,7 +960,7 @@ static void subtree_walk_route_cleanup_cb(struct lpm *lpm, ret); } - nexthop_put(idx); + nexthop_put(AF_INET, idx); } static unsigned int lle_routing_insert_arp_cb(struct lltable *llt __unused, @@ -1363,7 +974,7 @@ static unsigned int lle_routing_insert_arp_cb(struct lltable *llt __unused, } -static void route_change_process_nh(struct next_hop_u *nhu, +static void route_change_process_nh(struct next_hop_list *nhl, enum nh_change (*upd_neigh_present_cb)( struct next_hop *next, int sibling, @@ -1374,11 +985,11 @@ static void route_change_process_nh(struct next_hop_u *nhu, int i; - index = nhu->index; - array = rcu_dereference(nhu->siblings); - for (i = 0; i < nhu->nsiblings; i++) { + index = nhl->index; + array = rcu_dereference(nhl->siblings); + for (i = 0; i < nhl->nsiblings; i++) { const struct next_hop *next = array + i; - const struct ifnet *ifp = nh4_get_ifp(next); + const struct ifnet *ifp = dp_nh_get_ifp(next); if (!ifp) /* happens for local routes */ @@ -1391,10 +1002,11 @@ static void route_change_process_nh(struct next_hop_u *nhu, * Is there an lle on this interface with a * matching address. */ - struct llentry *lle = in_lltable_find((struct ifnet *)ifp, - next->gateway); + struct llentry *lle = in_lltable_find( + (struct ifnet *)ifp, + next->gateway.address.ip_v4.s_addr); if (lle) { - route_nh_replace(nhu, nhu->index, lle, NULL, + route_nh_replace(AF_INET, nhl, nhl->index, lle, NULL, upd_neigh_present_cb, lle); /* @@ -1402,8 +1014,8 @@ static void route_change_process_nh(struct next_hop_u *nhu, * replaced by prev func, and will not * then be found in hash table. */ - nhu = rcu_dereference(nh_tbl.entry[index]); - if (!nhu) + nhl = rcu_dereference(nh_tbl.entry[index]); + if (!nhl) break; } } @@ -1415,16 +1027,16 @@ walk_nhs_for_route_change(enum nh_change (*upd_neigh_present_cb)( int sibling, void *arg)) { - struct next_hop_u *nhu; + struct next_hop_list *nhl; struct cds_lfht_iter iter; struct cds_lfht_node *node; - ASSERT_MASTER(); + ASSERT_MAIN(); cds_lfht_for_each(nexthop_hash, &iter, node) { - nhu = caa_container_of(node, struct next_hop_u, nh_node); + nhl = caa_container_of(node, struct next_hop_list, nh_node); - route_change_process_nh(nhu, upd_neigh_present_cb); + route_change_process_nh(nhl, upd_neigh_present_cb); } } @@ -1439,12 +1051,13 @@ static enum nh_change routing_arp_add_gw_nh_replace_cb(struct next_hop *next, struct in_addr *ip = ll_ipv4_addr(lle); struct ifnet *ifp = rcu_dereference(lle->ifp); - if (!nh_is_gw(next) || (next->gateway != ip->s_addr)) + if (!nh_is_gw(next) || + (next->gateway.address.ip_v4.s_addr != ip->s_addr)) return NH_NO_CHANGE; - if (nh4_get_ifp(next) != ifp) + if (dp_nh_get_ifp(next) != ifp) return NH_NO_CHANGE; - if (nh_is_local(next) || nh4_is_neigh_present(next) || - nh4_is_neigh_created(next)) + if (nh_is_local(next) || nh_is_neigh_present(next) || + nh_is_neigh_created(next)) return NH_NO_CHANGE; return NH_SET_NEIGH_PRESENT; @@ -1462,7 +1075,7 @@ route_change_link_arp(struct vrf *vrf, struct lpm *lpm, int16_t scope __unused) { int i; - const struct next_hop_u *nextu; + const struct next_hop_list *nextl; const struct next_hop *array; struct subtree_walk_arg subtree_arg = { .ip = ip, @@ -1485,17 +1098,17 @@ route_change_link_arp(struct vrf *vrf, struct lpm *lpm, * as the cover need to be checked to see if they are still accurate, * and removed if not. */ - nextu = rcu_dereference(nh_tbl.entry[next_hop]); - if (nextu_is_any_connected(nextu)) { + nextl = rcu_dereference(nh_tbl.entry[next_hop]); + if (next_hop_list_is_any_connected(nextl)) { lpm_subtree_walk(lpm, ip, depth, subtree_walk_route_cleanup_cb, &subtree_arg); } else if (lpm_find_cover(lpm, ip, depth, &cover_ip, &cover_depth, &cover_idx) == 0) { - const struct next_hop_u *cover_nextu; + const struct next_hop_list *cover_nextl; - cover_nextu = rcu_dereference(nh_tbl.entry[cover_idx]); - if (nextu_is_any_connected(cover_nextu)) { + cover_nextl = rcu_dereference(nh_tbl.entry[cover_idx]); + if (next_hop_list_is_any_connected(cover_nextl)) { lpm_subtree_walk(lpm, ip, depth, subtree_walk_route_cleanup_cb, &subtree_arg); @@ -1503,10 +1116,10 @@ route_change_link_arp(struct vrf *vrf, struct lpm *lpm, } /* Walk all the interfaces arp entries to do /32 processing */ - array = rcu_dereference(nextu->siblings); - for (i = 0; i < nextu->nsiblings; i++) { + array = rcu_dereference(nextl->siblings); + for (i = 0; i < nextl->nsiblings; i++) { const struct next_hop *next = array + i; - const struct ifnet *ifp = nh4_get_ifp(next); + const struct ifnet *ifp = dp_nh_get_ifp(next); if (!ifp) /* happens for local routes */ @@ -1528,7 +1141,7 @@ static void route_delete_unlink_arp(struct vrf *vrf, struct lpm *lpm, uint32_t ip, uint8_t depth) { - const struct next_hop_u *nextu; + const struct next_hop_list *nextl; uint32_t nh_idx; struct subtree_walk_arg subtree_arg = { .ip = ip, @@ -1554,8 +1167,8 @@ route_delete_unlink_arp(struct vrf *vrf, struct lpm *lpm, uint32_t ip, if (lpm_lookup_exact(lpm, ip, depth, &nh_idx)) return; - nextu = rcu_dereference(nh_tbl.entry[nh_idx]); - if (nextu_is_any_connected(nextu)) { + nextl = rcu_dereference(nh_tbl.entry[nh_idx]); + if (next_hop_list_is_any_connected(nextl)) { subtree_walk_route_cleanup_cb(lpm, ip, depth, nh_idx, &subtree_arg); lpm_subtree_walk(lpm, ip, depth, @@ -1563,10 +1176,10 @@ route_delete_unlink_arp(struct vrf *vrf, struct lpm *lpm, uint32_t ip, &subtree_arg); } else if (lpm_find_cover(lpm, ip, depth, &cover_ip, &cover_depth, &cover_idx) == 0) { - const struct next_hop_u *cover_nextu; + const struct next_hop_list *cover_nextl; - cover_nextu = rcu_dereference(nh_tbl.entry[cover_idx]); - if (nextu_is_any_connected(cover_nextu)) { + cover_nextl = rcu_dereference(nh_tbl.entry[cover_idx]); + if (next_hop_list_is_any_connected(cover_nextl)) { lpm_subtree_walk(lpm, ip, depth, subtree_walk_route_cleanup_cb, &subtree_arg); @@ -1582,7 +1195,7 @@ route_delete_unlink_arp(struct vrf *vrf, struct lpm *lpm, uint32_t ip, static void route_delete_relink_arp(struct lpm *lpm, uint32_t ip, uint8_t depth) { - const struct next_hop_u *nextu; + const struct next_hop_list *nextl; uint32_t cover_ip; uint8_t cover_depth; uint32_t cover_nh_idx; @@ -1599,11 +1212,11 @@ route_delete_relink_arp(struct lpm *lpm, uint32_t ip, uint8_t depth) } /* Walk all the interfaces arp entries to do /32 processing */ - nextu = rcu_dereference(nh_tbl.entry[cover_nh_idx]); - array = rcu_dereference(nextu->siblings); - for (i = 0; i < nextu->nsiblings; i++) { + nextl = rcu_dereference(nh_tbl.entry[cover_nh_idx]); + array = rcu_dereference(nextl->siblings); + for (i = 0; i < nextl->nsiblings; i++) { const struct next_hop *next = array + i; - const struct ifnet *ifp = nh4_get_ifp(next); + const struct ifnet *ifp = dp_nh_get_ifp(next); if (!ifp) /* happens for local routes */ @@ -1625,6 +1238,7 @@ int rt_insert(vrfid_t vrf_id, in_addr_t dst, uint8_t depth, uint32_t tableid, uint8_t scope, uint8_t proto, struct next_hop hops[], size_t size, bool replace) { + uint32_t old_idx; uint32_t idx = 0; int err_code; char b[INET_ADDRSTRLEN]; @@ -1643,35 +1257,15 @@ int rt_insert(vrfid_t vrf_id, in_addr_t dst, uint8_t depth, uint32_t tableid, return -ENOENT; vrf = vrf_get_rcu(vrf_id); - lpm = vrf ? rt_get_lpm(&vrf->v_rt4_head, tableid) : NULL; + if (!vrf) + return -ENOENT; + lpm = rt_get_lpm(&vrf->v_rt4_head, tableid); if (lpm == NULL) { - if (is_nondefault_vrf(vrf_id)) { - /* - * Should have a VRF by this point since we - * needed it to find the VRF ID from the table ID - */ - if (!vrf) - return -ENOENT; - /* Add refcount on default VRF */ - if (!vrf_find_or_create(VRF_DEFAULT_ID)) - return -ENOENT; - } else { - vrf = vrf_find_or_create(vrf_id); - if (vrf == NULL) - return -ENOENT; - } - lpm = rt_create_lpm(tableid, vrf); if (lpm == NULL) { err_code = -ENOENT; goto err; } - } else if (rt_lpm_is_empty(lpm)) { - /* Incr ref count when first route is added */ - if (!is_nondefault_vrf(vrf_id)) - vrf = vrf_find_or_create(vrf_id); - else if (!vrf_find_or_create(VRF_DEFAULT_ID)) - return -ENOENT; } /* @@ -1687,8 +1281,9 @@ int rt_insert(vrfid_t vrf_id, in_addr_t dst, uint8_t depth, uint32_t tableid, if (hops[i].flags & RTF_GATEWAY) continue; - assert(hops[i].gateway == 0); - hops[i].gateway = dst; + assert(hops[i].gateway.address.ip_v4.s_addr == 0); + hops[i].gateway.address.ip_v4.s_addr = dst; + hops[i].gateway.type = AF_INET; } } @@ -1704,20 +1299,26 @@ int rt_insert(vrfid_t vrf_id, in_addr_t dst, uint8_t depth, uint32_t tableid, } pthread_mutex_lock(&route_mutex); - if (replace) { - uint32_t old; - route_delete_unlink_arp(vrf, lpm, ntohl(dst), depth); - if (route_lpm_delete(vrf_id, lpm, dst, depth, &old, - scope) >= 0) - nexthop_put(old); - else + route_delete_unlink_arp(vrf, lpm, ntohl(dst), depth); + if (replace) { + if (lpm_nexthop_lookup(lpm, ntohl(dst), depth, scope, + &old_idx) != 0) replace = false; } + if (replace) { + err_code = route_lpm_update(vrf_id, vrf->v_fal_obj, + lpm, dst, depth, + &old_idx, idx, scope); + } else + err_code = route_lpm_add(vrf_id, vrf->v_fal_obj, lpm, + dst, depth, idx, scope); - err_code = route_lpm_add(vrf_id, lpm, dst, depth, idx, scope); - if (err_code >= 0) + if (err_code >= 0) { + if (replace) + nexthop_put(AF_INET, old_idx); route_change_link_arp(vrf, lpm, ntohl(dst), depth, idx, scope); + } pthread_mutex_unlock(&route_mutex); @@ -1727,7 +1328,7 @@ int rt_insert(vrfid_t vrf_id, in_addr_t dst, uint8_t depth, uint32_t tableid, replace ? "replace" : "add", inet_ntop(AF_INET, &dst, b, sizeof(b)), depth, idx, err_code); - nexthop_put(idx); + nexthop_put(AF_INET, idx); goto err; } @@ -1739,13 +1340,6 @@ int rt_insert(vrfid_t vrf_id, in_addr_t dst, uint8_t depth, uint32_t tableid, return 0; err: - if (vrf && (lpm == NULL || (lpm && rt_lpm_is_empty(lpm)))) { - if (!is_nondefault_vrf(vrf->v_id)) - vrf_delete_by_ptr(vrf); - else - vrf_delete(VRF_DEFAULT_ID); - } - return err_code; } @@ -1778,23 +1372,16 @@ int rt_delete(vrfid_t vrf_id, in_addr_t dst, uint8_t depth, pthread_mutex_lock(&route_mutex); route_delete_unlink_arp(vrf, lpm, ntohl(dst), depth); - err = route_lpm_delete(vrf_id, lpm, dst, depth, &idx, scope); + err = route_lpm_delete(vrf_id, vrf->v_fal_obj, lpm, dst, + depth, &idx, scope); if (err >= 0) { /* Drop reference count on nexthop entry. */ - nexthop_put(idx); + nexthop_put(AF_INET, idx); route_delete_relink_arp(lpm, ntohl(dst), depth); } pthread_mutex_unlock(&route_mutex); - /*unlock the VRF if this is the last route in this LPM*/ - if (rt_lpm_is_empty(lpm)) { - if (!is_nondefault_vrf(vrf->v_id)) - vrf_delete_by_ptr(vrf); - else - vrf_delete(VRF_DEFAULT_ID); - } - if (err) /* * Expected now we get all deletes from RIB and still act on @@ -1809,12 +1396,9 @@ int rt_delete(vrfid_t vrf_id, in_addr_t dst, uint8_t depth, } /* cleaner for the next hop */ -static void flush_cleanup(struct lpm *lpm __rte_unused, - uint32_t ip, - uint8_t depth, - int16_t scope __rte_unused, - uint32_t idx, - struct pd_obj_state_and_flags pd_state, +static void flush_cleanup(struct lpm *lpm, + struct lpm_walk_params *params, + struct pd_obj_state_and_flags *pd_state, void *arg) { struct vrf *vrf = arg; @@ -1822,31 +1406,33 @@ static void flush_cleanup(struct lpm *lpm __rte_unused, route_sw_stats[PD_OBJ_STATE_FULL]--; - if (pd_state.created) { - ret = fal_ip4_del_route(vrf->v_id, htonl(ip), depth, + if (pd_state->created) { + ret = fal_ip4_del_route(vrf->v_id, vrf->v_fal_obj, + htonl(params->ip), + params->depth, lpm_get_id(lpm)); switch (ret) { case 0: - route_hw_stats[pd_state.state]--; + route_hw_stats[pd_state->state]--; break; default: /* General failure */ if (ret < 0) { char b[INET_ADDRSTRLEN]; - in_addr_t dst = htonl(ip); + in_addr_t dst = htonl(params->ip); DP_LOG_W_VRF( ERR, ROUTE, vrf->v_id, "route delete %s/%d failed via FAL (%d)\n", inet_ntop(AF_INET, &dst, b, sizeof(b)), - depth, ret); + params->depth, ret); } break; } } else - route_hw_stats[pd_state.state]--; + route_hw_stats[pd_state->state]--; - nexthop_put(idx); + nexthop_put(AF_INET, params->next_hop); } void rt_flush(struct vrf *vrf) @@ -1854,18 +1440,13 @@ void rt_flush(struct vrf *vrf) unsigned int id; struct route_head rt_head = vrf->v_rt4_head; + if (vrf->v_id == VRF_INVALID_ID) + return; + pthread_mutex_lock(&route_mutex); for (id = 0; id < rt_head.rt_rtm_max; id++) { struct lpm *lpm = rt_head.rt_table[id]; - /* - * Tables in VRFs alias those in the default VRF, so - * don't flush them - */ - if (is_nondefault_vrf(vrf->v_id) && - id != RT_TABLE_UNSPEC) - continue; - if (lpm && !rt_lpm_is_empty(lpm)) { lpm_delete_all(lpm, flush_cleanup, vrf); /* decrement ref cnt for empty LPM */ @@ -1874,11 +1455,6 @@ void rt_flush(struct vrf *vrf) "Failed to replace reserved routes %s\n", vrf->v_name); } - if (!is_nondefault_vrf(vrf->v_id)) - vrf_delete_by_ptr(vrf); - else - vrf_delete(VRF_DEFAULT_ID); - } } pthread_mutex_unlock(&route_mutex); @@ -1897,6 +1473,20 @@ void rt_flush_all(enum cont_src_en cont_src) rt_flush(vrf); } +static struct next_hop_list * +route_get_nh_blackhole(void) +{ + return nextl_blackhole; +} + +struct nh_common nh4_common = { + .nh_hash = nexthop_hashfn, + .nh_compare = nexthop_cmpfn, + .nh_get_hash_tbl = route_get_nh_hash_table, + .nh_get_nh_tbl = route_get_nh_table, + .nh_get_blackhole = route_get_nh_blackhole, +}; + void nexthop_tbl_init(void) { struct next_hop nh_drop = { @@ -1912,12 +1502,15 @@ void nexthop_tbl_init(void) if (!nexthop_hash) rte_panic("nexthop_tbl_init: can't create nexthop hash\n"); + nh_common_register(AF_INET, &nh4_common); + /* reserve a drop nexthop */ - if (nexthop_new(&nh_drop, 1, RTPROT_UNSPEC, &idx)) + if (nexthop_new(AF_INET, &nh_drop, 1, RTPROT_UNSPEC, FAL_NHG_USE_IP, + &idx)) rte_panic("%s: can't create drop nexthop\n", __func__); - nextu_blackhole = + nextl_blackhole = rcu_dereference(nh_tbl.entry[idx]); - if (!nextu_blackhole) + if (!nextl_blackhole) rte_panic("%s: can't create drop nexthop\n", __func__); } @@ -1932,37 +1525,6 @@ int route_init(struct vrf *vrf) return -1; } - /* - * All tables other than main alias tables in the default VRF. - * This is necessary in order to easily support PBR setvrf + tableid. - */ - if (is_nondefault_vrf(vrf->v_id)) { - struct vrf *default_vrf = vrf_get_rcu(VRF_DEFAULT_ID); - struct route_head *rt_head = &default_vrf->v_rt4_head; - uint32_t id; - - /* - * Stash the main table LPM point so it can be freed - * and also so it can be assigned over RT_TABLE_MAIN - * when unaliasing later. - */ - rcu_assign_pointer(vrf->v_rt4_head.rt_table[RT_TABLE_UNSPEC], - lpm); - - for (id = 1; id < rt_head->rt_rtm_max; id++) { - struct lpm *src_lpm = rt_head->rt_table[id]; - - if (!src_lpm || id == RT_TABLE_MAIN) - continue; - - if (rt_lpm_resize(&vrf->v_rt4_head, id) < 0) - return -1; - - rcu_assign_pointer(vrf->v_rt4_head.rt_table[id], - src_lpm); - } - } - return 0; } @@ -1975,14 +1537,6 @@ void route_uninit(struct vrf *vrf, struct route_head *rt_head) for (id = 0; id < rt_head->rt_rtm_max; id++) { struct lpm *lpm = rt_head->rt_table[id]; - /* - * VRF tables alias those in the default VRF so don't - * free them. - */ - if (is_nondefault_vrf(vrf->v_id) && - id != RT_TABLE_UNSPEC) - continue; - if (lpm) { /* rule_count == 0, means table has been flushed */ if (lpm_rule_count(lpm) != 0) { @@ -2002,59 +1556,40 @@ void route_uninit(struct vrf *vrf, struct route_head *rt_head) rt_head->rt_table = NULL; } -bool route_link_vrf_to_table(struct vrf *vrf, uint32_t tableid) -{ - struct vrf *default_vrf = vrf_get_rcu(VRF_DEFAULT_ID); - struct route_head *rt_head = &default_vrf->v_rt4_head; - struct lpm *new_lpm; - - new_lpm = rt_get_lpm(rt_head, tableid); - if (!new_lpm) { - new_lpm = rt_create_lpm(tableid, default_vrf); - if (!new_lpm) - return false; - } - - /* - * Alias the main table to the given tableid in the default - * VRF. - */ - rcu_assign_pointer(vrf->v_rt4_head.rt_table[RT_TABLE_MAIN], - new_lpm); - - return true; -} - -bool route_unlink_vrf_from_table(struct vrf *vrf) -{ - struct lpm *old_main_lpm = vrf->v_rt4_head.rt_table[ - RT_TABLE_UNSPEC]; - - /* - * Unalias the main table. We require the pointer to be valid - * so we use the table we initially created the VRF with. - */ - rcu_assign_pointer(vrf->v_rt4_head.rt_table[RT_TABLE_MAIN], - old_main_lpm); - - return true; -} - -void rt_print_nexthop(json_writer_t *json, uint32_t next_hop) +void rt_print_nexthop(json_writer_t *json, uint32_t next_hop, + enum rt_print_nexthop_verbosity v) { - const struct next_hop_u *nextu = + const struct next_hop_list *nextl = rcu_dereference(nh_tbl.entry[next_hop]); const struct next_hop *array; unsigned int i, j; jsonw_uint_field(json, "nh_index", next_hop); - if (unlikely(!nextu)) + if (unlikely(!nextl)) return; - array = rcu_dereference(nextu->siblings); - jsonw_uint_field(json, "nh_refcount", nextu->refcount); + array = rcu_dereference(nextl->siblings); + jsonw_uint_field(json, "nh_refcount", nextl->refcount); + /* + * FAL may access hardware which may be slow or may otherwise + * increase the data returned greatly, so only output this + * information if requested. + */ + if (v == RT_PRINT_NH_DETAIL && + fal_state_is_obj_present(nextl->pd_state)) { + /* + * name disambuigates between next-hop-group state + * and possible future route state given we don't have a + * separate JSON object for the two. + */ + jsonw_name(json, "nhg_platform_state"); + jsonw_start_object(json); + fal_ip_dump_next_hop_group(nextl->nhg_fal_obj, json); + jsonw_end_object(json); + } + nexthop_map_display(nextl, json); jsonw_name(json, "next_hop"); jsonw_start_array(json); - for (i = 0; i < nextu->nsiblings; i++) { + for (i = 0; i < nextl->nsiblings; i++) { const struct next_hop *next = array + i; const struct ifnet *ifp; @@ -2074,8 +1609,10 @@ void rt_print_nexthop(json_writer_t *json, uint32_t next_hop) jsonw_string_field(json, "state", "gateway"); jsonw_string_field(json, "via", - inet_ntop(AF_INET, &next->gateway, - b1, sizeof(b1))); + inet_ntop( + AF_INET, + &next->gateway.address.ip_v4, + b1, sizeof(b1))); } else jsonw_string_field(json, "state", "directly connected"); @@ -2085,8 +1622,10 @@ void rt_print_nexthop(json_writer_t *json, uint32_t next_hop) jsonw_bool_field(json, "neigh_present", true); if (next->flags & RTF_NEIGH_CREATED) jsonw_bool_field(json, "neigh_created", true); + if (next->flags & RTF_BACKUP) + jsonw_bool_field(json, "backup", true); - ifp = nh4_get_ifp(next); + ifp = dp_nh_get_ifp(next); if (ifp) jsonw_string_field(json, "ifname", ifp->if_name); @@ -2102,6 +1641,19 @@ void rt_print_nexthop(json_writer_t *json, uint32_t next_hop) jsonw_end_array(json); } + /* + * FAL may access hardware which may be slow or may otherwise + * increase the data returned greatly, so only output this + * information if requested. + */ + if (v == RT_PRINT_NH_DETAIL && + fal_state_is_obj_present(nextl->pd_state)) { + jsonw_name(json, "platform_state"); + jsonw_start_object(json); + fal_ip_dump_next_hop(nextl->nh_fal_obj[i], json); + jsonw_end_object(json); + } + jsonw_end_object(json); } jsonw_end_array(json); @@ -2112,33 +1664,34 @@ void rt_print_nexthop(json_writer_t *json, uint32_t next_hop) */ static void rt_local_display( struct lpm *lpm __rte_unused, - uint32_t ip, uint8_t depth __rte_unused, - int16_t scope __rte_unused, - uint32_t next_hop, - struct pd_obj_state_and_flags pd_state __rte_unused, + struct lpm_walk_params *params, + struct pd_obj_state_and_flags *pd_state __rte_unused, void *arg) { FILE *f = arg; - in_addr_t dst = htonl(ip); + in_addr_t dst = htonl(params->ip); char b[INET_ADDRSTRLEN]; - const struct next_hop_u *nextu = - rcu_dereference(nh_tbl.entry[next_hop]); + const struct next_hop_list *nextl = + rcu_dereference(nh_tbl.entry[params->next_hop]); const struct next_hop *nh; - if (unlikely(!nextu)) + if (unlikely(!nextl)) return; - nh = rcu_dereference(nextu->siblings); + nh = rcu_dereference(nextl->siblings); - if (nh->flags & RTF_LOCAL && !rt_is_reserved(ip, depth, scope)) + if (nh->flags & RTF_LOCAL && !rt_is_reserved(params->ip, + params->depth, + params->scope)) fprintf(f, "\t%s\n", inet_ntop(AF_INET, &dst, b, sizeof(b))); } static void __rt_display(json_writer_t *json, in_addr_t *dst, uint8_t depth, - int16_t scope, const struct next_hop_u *nextu, + int16_t scope, const struct next_hop_list *nextl, uint32_t next_hop) { char b1[INET_ADDRSTRLEN]; char b2[INET6_ADDRSTRLEN]; /* extra room for mask, not for ipv6 here */ + const char *use_str = NULL; jsonw_start_object(json); @@ -2146,56 +1699,66 @@ static void __rt_display(json_writer_t *json, in_addr_t *dst, uint8_t depth, inet_ntop(AF_INET, dst, b1, sizeof(b1)), depth); jsonw_string_field(json, "prefix", b2); jsonw_int_field(json, "scope", scope); - jsonw_uint_field(json, "proto", nextu->proto); - rt_print_nexthop(json, next_hop); + jsonw_uint_field(json, "proto", nextl->proto); + switch (nextl->use) { + case FAL_NHG_USE_IP: + use_str = "ip"; + break; + case FAL_NHG_USE_MPLS_LABEL_SWITCH: + use_str = "mpls-lswitch"; + break; + } + if (use_str) + jsonw_string_field(json, "use", use_str); + rt_print_nexthop(json, next_hop, RT_PRINT_NH_BRIEF); jsonw_end_object(json); } static void rt_display(struct lpm *lpm __rte_unused, - uint32_t ip, uint8_t depth, int16_t scope, - uint32_t next_hop, - struct pd_obj_state_and_flags pd_state __rte_unused, + struct lpm_walk_params *params, + struct pd_obj_state_and_flags *pd_state __rte_unused, void *arg) { json_writer_t *json = arg; - in_addr_t dst = htonl(ip); + in_addr_t dst = htonl(params->ip); - const struct next_hop_u *nextu = - rcu_dereference(nh_tbl.entry[next_hop]); + const struct next_hop_list *nextl = + rcu_dereference(nh_tbl.entry[params->next_hop]); const struct next_hop *nh; - if (unlikely(!nextu)) + if (unlikely(!nextl)) return; - nh = rcu_dereference(nextu->siblings); + nh = rcu_dereference(nextl->siblings); /* Filter local route being displayed */ if (nh->flags & RTF_LOCAL) return; /* Don't show if any paths are NEIGH_CREATED. */ - if (nextu_nc_count(nextu)) + if (next_hop_list_nc_count(nextl)) return; - if (rt_is_reserved(ip, depth, scope)) + if (rt_is_reserved(params->ip, params->depth, params->scope)) return; - __rt_display(json, &dst, depth, scope, nextu, next_hop); + __rt_display(json, &dst, params->depth, params->scope, nextl, + params->next_hop); } static void rt_display_all(struct lpm *lpm __rte_unused, - uint32_t ip, uint8_t depth, int16_t scope, - uint32_t next_hop, - struct pd_obj_state_and_flags pd_state __rte_unused, + struct lpm_walk_params *params, + struct pd_obj_state_and_flags *pd_state __rte_unused, void *arg) { json_writer_t *json = arg; - in_addr_t dst = htonl(ip); - const struct next_hop_u *nextu = - rcu_dereference(nh_tbl.entry[next_hop]); + in_addr_t dst = htonl(params->ip); + const struct next_hop_list *nextl = + rcu_dereference(nh_tbl.entry[params->next_hop]); - if (unlikely(!nextu)) + if (unlikely(!nextl)) return; - __rt_display(json, &dst, depth, scope, nextu, next_hop); + __rt_display(json, &dst, params->depth, params->scope, nextl, + params->next_hop); } /* Route rule list (RB-tree) is not RCU safe */ @@ -2228,17 +1791,18 @@ enum if_state_rx { }; static void rt_if_dead(struct lpm *lpm, struct vrf *vrf, - uint32_t ip, uint8_t depth, int16_t scope, - uint32_t idx, void *arg, enum if_state_rx state_rx) + struct lpm_walk_params *params, + void *arg, enum if_state_rx state_rx) { - struct next_hop_u *nextu = rcu_dereference(nh_tbl.entry[idx]); + struct next_hop_list *nextl = + rcu_dereference(nh_tbl.entry[params->next_hop]); const struct ifnet *ifp = arg; unsigned int i, matches = 0; - for (i = 0; i < nextu->nsiblings; i++) { - struct next_hop *nh = nextu->siblings + i; + for (i = 0; i < nextl->nsiblings; i++) { + struct next_hop *nh = nextl->siblings + i; - if (nh4_get_ifp(nh) == ifp) { + if (dp_nh_get_ifp(nh) == ifp) { /* No longer check if connected, as kernel will not * signal explicitly for flushing */ @@ -2251,47 +1815,46 @@ static void rt_if_dead(struct lpm *lpm, struct vrf *vrf, if (matches == 0) return; - if (matches == nextu->nsiblings || state_rx == IF_RX_LINK_DEL) { + if (matches == nextl->nsiblings || state_rx == IF_RX_LINK_DEL) { /* * Delete entire route if; * Either all nh's for this route are dead * Or interface on one nh has been deleted. This mimics Kernel * behaviour but is bad as we have other ECMP nh's available */ - route_lpm_delete(vrf->v_id, lpm, htonl(ip), depth, NULL, - scope); - nexthop_put(idx); + route_lpm_delete(vrf->v_id, vrf->v_fal_obj, lpm, + htonl(params->ip), + params->depth, NULL, + params->scope); + nexthop_put(AF_INET, params->next_hop); } } /* Interface is being deleted */ static void rt_if_deleted(struct lpm *lpm, struct vrf *vrf, - uint32_t ip, uint8_t depth, int16_t scope, - uint32_t idx, - struct pd_obj_state_and_flags pd_state __rte_unused, + struct lpm_walk_params *params, + struct pd_obj_state_and_flags *pd_state __rte_unused, void *arg) { - rt_if_dead(lpm, vrf, ip, depth, scope, idx, arg, IF_RX_LINK_DEL); + rt_if_dead(lpm, vrf, params, arg, IF_RX_LINK_DEL); } static void rt_if_clear_slowpath_flag( struct lpm *lpm __rte_unused, struct vrf *vrf __rte_unused, - uint32_t ip __rte_unused, - uint8_t depth __rte_unused, - int16_t scope __rte_unused, - uint32_t idx, - struct pd_obj_state_and_flags pd_state __rte_unused, + struct lpm_walk_params *params, + struct pd_obj_state_and_flags *pd_state __rte_unused, void *arg) { - struct next_hop_u *nextu = rcu_dereference(nh_tbl.entry[idx]); + struct next_hop_list *nextl = + rcu_dereference(nh_tbl.entry[params->next_hop]); const struct ifnet *ifp = arg; unsigned int i; - for (i = 0; i < nextu->nsiblings; i++) { - struct next_hop *nh = nextu->siblings + i; + for (i = 0; i < nextl->nsiblings; i++) { + struct next_hop *nh = nextl->siblings + i; - if (nh4_get_ifp(nh) == ifp) + if (dp_nh_get_ifp(nh) == ifp) nh->flags &= ~RTF_SLOWPATH; } } @@ -2299,21 +1862,19 @@ static void rt_if_clear_slowpath_flag( static void rt_if_set_slowpath_flag( struct lpm *lpm __rte_unused, struct vrf *vrf __rte_unused, - uint32_t ip __rte_unused, - uint8_t depth __rte_unused, - int16_t scope __rte_unused, - uint32_t idx, - struct pd_obj_state_and_flags pd_state __rte_unused, + struct lpm_walk_params *params, + struct pd_obj_state_and_flags *pd_state __rte_unused, void *arg) { - struct next_hop_u *nextu = rcu_dereference(nh_tbl.entry[idx]); + struct next_hop_list *nextl = + rcu_dereference(nh_tbl.entry[params->next_hop]); const struct ifnet *ifp = arg; unsigned int i; - for (i = 0; i < nextu->nsiblings; i++) { - struct next_hop *nh = nextu->siblings + i; + for (i = 0; i < nextl->nsiblings; i++) { + struct next_hop *nh = nextl->siblings + i; - if (nh4_get_ifp(nh) == ifp) + if (dp_nh_get_ifp(nh) == ifp) nh->flags |= RTF_SLOWPATH; } } @@ -2321,28 +1882,27 @@ static void rt_if_set_slowpath_flag( struct rt_vrf_lpm_walk_ctx { struct vrf *vrf; void (*func)(struct lpm *lpm, struct vrf *vrf, - uint32_t ip, uint8_t depth, int16_t scope, - uint32_t next_hop, struct pd_obj_state_and_flags pd_state, + struct lpm_walk_params *params, + struct pd_obj_state_and_flags *pd_state, void *arg); void *arg; }; -static void rt_vrf_lpm_walk_cb(struct lpm *lpm, uint32_t ip, - uint8_t depth, int16_t scope, - uint32_t idx, - struct pd_obj_state_and_flags pd_state, +static void rt_vrf_lpm_walk_cb(struct lpm *lpm, + struct lpm_walk_params *params, + struct pd_obj_state_and_flags *pd_state, void *arg) { const struct rt_vrf_lpm_walk_ctx *ctx = arg; - ctx->func(lpm, ctx->vrf, ip, depth, scope, idx, pd_state, ctx->arg); + ctx->func(lpm, ctx->vrf, params, pd_state, ctx->arg); } static void rt_lpm_walk_util( void (*func)(struct lpm *lpm, struct vrf *vrf, - uint32_t ip, uint8_t depth, int16_t scope, - uint32_t next_hop, struct pd_obj_state_and_flags pd_state, + struct lpm_walk_params *params, + struct pd_obj_state_and_flags *pd_state, void *arg), void *arg) { @@ -2359,32 +1919,9 @@ static void rt_lpm_walk_util( .arg = arg, }; - /* - * We have some alaising of vrf table. - * - * Each table is linked into every vrf, - * so only show tables less than MAIN - * in the default vrf. Each non default vrf has - * aliased a default vrf table, starting with an - * ID of 256. Show these in the non default vrf. - */ - if (vrf->v_id == VRF_DEFAULT_ID && - id > RT_TABLE_MAIN) - continue; - if (vrf->v_id != VRF_DEFAULT_ID && - id != RT_TABLE_MAIN) - continue; - - if (lpm && !rt_lpm_is_empty(lpm)) { + if (lpm && !rt_lpm_is_empty(lpm)) lpm_walk_all_safe(lpm, rt_vrf_lpm_walk_cb, &ctx); - if (rt_lpm_is_empty(lpm)) { - if (!is_nondefault_vrf(vrf->v_id)) - vrf_delete_by_ptr(vrf); - else - vrf_delete(VRF_DEFAULT_ID); - } - } } } } @@ -2505,37 +2042,63 @@ int rt_show(struct route_head *rt_head, json_writer_t *json, uint32_t tblid, if (lpm_lookup(lpm, ntohl(addr->s_addr), &next_hop) != 0) jsonw_string_field(json, "state", "nomatch"); else - rt_print_nexthop(json, next_hop); + rt_print_nexthop(json, next_hop, RT_PRINT_NH_DETAIL); + jsonw_end_object(json); + return 0; +} + +int rt_show_exact(struct route_head *rt_head, json_writer_t *json, + uint32_t tblid, const struct in_addr *addr, uint8_t plen) +{ + char b1[INET_ADDRSTRLEN]; + char b2[INET_ADDRSTRLEN + sizeof("/255")]; + struct lpm *lpm = rt_get_lpm(rt_head, tblid); + uint32_t next_hop; + + if (lpm == NULL) { + RTE_LOG(ERR, ROUTE, "Unknown route table\n"); + return 0; + } + + jsonw_start_object(json); + + sprintf(b2, "%s/%u", + inet_ntop(AF_INET, addr, b1, sizeof(b1)), plen); + jsonw_string_field(json, "prefix", b2); + + if (lpm_lookup_exact(lpm, ntohl(addr->s_addr), plen, &next_hop) != 0) + jsonw_string_field(json, "state", "nomatch"); + else + rt_print_nexthop(json, next_hop, RT_PRINT_NH_DETAIL); jsonw_end_object(json); return 0; } static void rt_summarize(struct lpm *lpm __rte_unused, - uint32_t ip, uint8_t depth, - int16_t scope, - uint32_t nh_idx, - struct pd_obj_state_and_flags pd_state __rte_unused, + struct lpm_walk_params *params, + struct pd_obj_state_and_flags *pd_state __rte_unused, void *arg) { uint32_t *rt_used = arg; - const struct next_hop_u *nextu = rcu_dereference(nh_tbl.entry[nh_idx]); + const struct next_hop_list *nextl = + rcu_dereference(nh_tbl.entry[params->next_hop]); const struct next_hop *nh; - if (unlikely(!nextu)) + if (unlikely(!nextl)) return; - nh = rcu_dereference(nextu->siblings); + nh = rcu_dereference(nextl->siblings); /* Filter local route being displayed */ if (nh->flags & RTF_LOCAL) return; /* Don't show if any paths are NEIGH_CREATED */ - if (nextu_nc_count(nextu)) + if (next_hop_list_nc_count(nextl)) return; - if (rt_is_reserved(ip, depth, scope)) + if (rt_is_reserved(params->ip, params->depth, params->scope)) return; - ++rt_used[depth]; + ++rt_used[params->depth]; } static double nexthop_hash_load_factor(void) @@ -2545,7 +2108,7 @@ static double nexthop_hash_load_factor(void) double factor; cds_lfht_count_nodes(nexthop_hash, &dummy, &count, &dummy); - factor = count / NEXTHOP_HASH_TBL_SIZE; + factor = (double) count / (double) NEXTHOP_HASH_TBL_SIZE; return factor; } @@ -2584,7 +2147,7 @@ int rt_stats(struct route_head *rt_head, json_writer_t *json, uint32_t id) jsonw_uint_field(json, "used", nh_tbl.in_use); jsonw_uint_field(json, "free", NEXTHOP_HASH_TBL_SIZE - nh_tbl.in_use); jsonw_uint_field(json, "hash", - 100. * nexthop_hash_load_factor()); + (unsigned int) (100. * nexthop_hash_load_factor())); jsonw_uint_field(json, "neigh_present", nh_tbl.neigh_present); jsonw_uint_field(json, "neigh_created", nh_tbl.neigh_created); jsonw_end_object(json); @@ -2595,7 +2158,7 @@ int rt_stats(struct route_head *rt_head, json_writer_t *json, uint32_t id) /* * Get egress interface for destination address. * - * Must only be used on master thread. + * Must only be used on main thread. * Note for multipath routes, the first interface is always returned. */ struct ifnet *nhif_dst_lookup(const struct vrf *vrf, @@ -2603,7 +2166,7 @@ struct ifnet *nhif_dst_lookup(const struct vrf *vrf, bool *connected) { struct ifnet *ifp; - const struct next_hop_u *nextu; + const struct next_hop_list *nextl; const struct next_hop *next; uint32_t nhindex; @@ -2611,15 +2174,15 @@ struct ifnet *nhif_dst_lookup(const struct vrf *vrf, ntohl(dst), &nhindex) != 0) return NULL; - nextu = nh_tbl.entry[nhindex]; - if (nextu == NULL) + nextl = nh_tbl.entry[nhindex]; + if (nextl == NULL) return NULL; - next = nextu->siblings; + next = nextl->siblings; if (next == NULL) return NULL; - ifp = nh4_get_ifp(next); + ifp = dp_nh_get_ifp(next); if (ifp && connected) *connected = nh_is_connected(next); @@ -2638,32 +2201,32 @@ struct ifnet *nhif_dst_lookup(const struct vrf *vrf, * nh - IP address of the next hop * ifindex - If index of the outgoing interface */ -int nh_lookup_by_index(uint32_t nhindex, uint32_t hash, in_addr_t *nh, +int dp_nh_lookup_by_index(uint32_t nhindex, uint32_t hash, in_addr_t *nh, uint32_t *ifindex) { - const struct next_hop_u *nextu; + const struct next_hop_list *nextl; struct next_hop *next; struct ifnet *ifp; uint32_t size; - nextu = rcu_dereference(nh_tbl.entry[nhindex]); - if (nextu == NULL) + nextl = rcu_dereference(nh_tbl.entry[nhindex]); + if (nextl == NULL) return -1; - next = nextu->siblings; + next = nextl->siblings; if (!next) return -1; - size = nextu->nsiblings; + size = nextl->nsiblings; if (size > 1) - next = nexthop_mp_select(next, size, hash); + next = nexthop_mp_select(nextl, next, size, hash); if (next->flags & RTF_GATEWAY) - *nh = next->gateway; + *nh = next->gateway.address.ip_v4.s_addr; else *nh = INADDR_ANY; - ifp = nh4_get_ifp(next); + ifp = dp_nh_get_ifp(next); if (!ifp) return -1; @@ -2675,7 +2238,7 @@ static void route_create_arp(struct vrf *vrf, struct lpm *lpm, struct in_addr *ip, struct llentry *lle) { - struct next_hop_u *nextu; + struct next_hop_list *nextl; uint32_t nh_idx; struct next_hop *nh; struct next_hop *cover_nh; @@ -2684,44 +2247,46 @@ route_create_arp(struct vrf *vrf, struct lpm *lpm, int size; if (lpm_lookup(lpm, ntohl(ip->s_addr), &nh_idx) == 0) { - nextu = rcu_dereference(nh_tbl.entry[nh_idx]); + nextl = rcu_dereference(nh_tbl.entry[nh_idx]); /* * Note that this does not support a connected with multiple * paths that use the same ifp. */ - cover_nh = nextu_find_path_using_ifp(nextu, ifp, &sibling); + cover_nh = next_hop_list_find_path_using_ifp(nextl, ifp, + &sibling); if (cover_nh && nh_is_connected(cover_nh)) { /* * Have a connected cover so create a new entry for * this. Will only be 1 NEIGH_CREATED path, but * need to inherit other paths from the cover. */ - nh = nexthop_create_copy(nextu, &size); + nh = next_hop_list_copy_next_hops(nextl, &size); if (!nh) return; /* * Set the correct NH to be NEIGH_CREATED. As this - * is copied from the cover nextu, the sibling gives + * is copied from the cover nextl, the sibling gives * the NH for the correct interface */ - nh4_set_neigh_created(&nh[sibling], lle); + nh_set_neigh_created(AF_INET, &nh[sibling], lle); /* * This is a /32 we are creating, therefore not a GW. * Set the GW (but not the flag) so that we do not * share with non /32 routes such as the connected * cover. */ - assert(nh[sibling].gateway == 0); - nh[sibling].gateway = ip->s_addr; + assert(nh[sibling].gateway.address.ip_v4.s_addr == 0); + nh[sibling].gateway.address.ip_v4.s_addr = ip->s_addr; + nh[sibling].gateway.type = AF_INET; if (route_nexthop_new(nh, size, RTPROT_UNSPEC, &nh_idx) < 0) { free(nh); return; } - route_lpm_add(vrf->v_id, lpm, ip->s_addr, - 32, nh_idx, RT_SCOPE_LINK); + route_lpm_add(vrf->v_id, vrf->v_fal_obj, lpm, + ip->s_addr, 32, nh_idx, RT_SCOPE_LINK); free(nh); } } @@ -2738,11 +2303,12 @@ static enum nh_change routing_arp_del_gw_nh_replace_cb(struct next_hop *next, struct in_addr *ip = ll_ipv4_addr(lle); struct ifnet *ifp = rcu_dereference(lle->ifp); - if (!nh_is_gw(next) || (next->gateway != ip->s_addr)) + if (!nh_is_gw(next) || + (next->gateway.address.ip_v4.s_addr != ip->s_addr)) return NH_NO_CHANGE; - if (nh4_get_ifp(next) != ifp) + if (dp_nh_get_ifp(next) != ifp) return NH_NO_CHANGE; - if (nh_is_local(next) || !nh4_is_neigh_present(next)) + if (nh_is_local(next) || !nh_is_neigh_present(next)) return NH_NO_CHANGE; return NH_CLEAR_NEIGH_PRESENT; @@ -2756,15 +2322,15 @@ walk_nhs_for_arp_change(struct llentry *lle, int sibling, void *arg)) { - struct next_hop_u *nhu; + struct next_hop_list *nhl; struct cds_lfht_iter iter; struct cds_lfht_node *node; - ASSERT_MASTER(); + ASSERT_MAIN(); cds_lfht_for_each(nexthop_hash, &iter, node) { - nhu = caa_container_of(node, struct next_hop_u, nh_node); - route_nh_replace(nhu, nhu->index, lle, NULL, + nhl = caa_container_of(node, struct next_hop_list, nh_node); + route_nh_replace(AF_INET, nhl, nhl->index, lle, NULL, upd_neigh_present_cb, lle); } } @@ -2790,9 +2356,9 @@ static enum nh_change routing_arp_add_nh_replace_cb(struct next_hop *next, if (!nh_is_connected(next)) return NH_NO_CHANGE; - if (nh4_is_neigh_present(next) || nh4_is_neigh_created(next)) + if (nh_is_neigh_present(next) || nh_is_neigh_created(next)) return NH_NO_CHANGE; - if (args->ifp != nh4_get_ifp(next)) + if (args->ifp != dp_nh_get_ifp(next)) return NH_NO_CHANGE; if (args->count) @@ -2810,16 +2376,16 @@ static enum nh_change routing_arp_del_nh_replace_cb(struct next_hop *next, { struct ifnet *ifp = arg; - if (!nh_is_connected(next) || !nh4_is_neigh_present(next)) + if (!nh_is_connected(next) || !nh_is_neigh_present(next)) return NH_NO_CHANGE; - if (ifp != nh4_get_ifp(next)) + if (ifp != dp_nh_get_ifp(next)) return NH_NO_CHANGE; return NH_CLEAR_NEIGH_PRESENT; } struct arp_remove_purge_arg { - int count; /* Count of number of NEIGH_CREATED in parent nextu */ + int count; /* Count of number of NEIGH_CREATED in parent nextl */ int sibling; /* Sibling that had the arp entry removed */ }; @@ -2840,8 +2406,7 @@ static enum nh_change arp_removal_nh_purge_cb(struct next_hop *next __unused, if (sibling == args->sibling) { if (args->count > 1) return NH_CLEAR_NEIGH_CREATED; - else - return NH_DELETE; + return NH_DELETE; } if (args->count > 1) @@ -2872,7 +2437,7 @@ routing_insert_arp_safe(struct llentry *lle, bool arp_change) struct in_addr *ip = ll_ipv4_addr(lle); struct vrf *vrf = get_vrf(if_vrfid(lle->ifp)); struct lpm *lpm; - struct next_hop_u *nextu; + struct next_hop_list *nextl; uint32_t nh_idx; struct ifnet *ifp = rcu_dereference(lle->ifp); struct next_hop *nh; @@ -2882,7 +2447,7 @@ routing_insert_arp_safe(struct llentry *lle, bool arp_change) pthread_mutex_lock(&route_mutex); if (lpm_lookup_exact(lpm, ntohl(ip->s_addr), 32, &nh_idx) == 0) { /* We already have a /32 so add the shortcut if connected */ - nextu = rcu_dereference(nh_tbl.entry[nh_idx]); + nextl = rcu_dereference(nh_tbl.entry[nh_idx]); /* * Do we already have a nh for this interface? @@ -2891,14 +2456,14 @@ routing_insert_arp_safe(struct llentry *lle, bool arp_change) * modify the set of NHs, to reflect the ones the * cover has. */ - nh = nextu_find_path_using_ifp(nextu, ifp, &sibling); + nh = next_hop_list_find_path_using_ifp(nextl, ifp, &sibling); if (nh) { struct arp_add_nh_replace_arg arg = { .ifp = ifp, - .count = nextu_nc_count(nextu), + .count = next_hop_list_nc_count(nextl), }; - route_nh_replace(nextu, nh_idx, lle, NULL, + route_nh_replace(AF_INET, nextl, nh_idx, lle, NULL, routing_arp_add_nh_replace_cb, &arg); } } else { @@ -2931,7 +2496,7 @@ routing_remove_arp_safe(struct llentry *lle) struct in_addr *ip = ll_ipv4_addr(lle); struct vrf *vrf = get_vrf(if_vrfid(lle->ifp)); struct lpm *lpm; - struct next_hop_u *nextu; + struct next_hop_list *nextl; uint32_t nh_idx; struct ifnet *ifp = rcu_dereference(lle->ifp); int sibling; @@ -2941,43 +2506,48 @@ routing_remove_arp_safe(struct llentry *lle) pthread_mutex_lock(&route_mutex); if (lpm_lookup_exact(lpm, ntohl(ip->s_addr), 32, &nh_idx) == 0) { /* We have a /32 so unlink the arp (if there) */ - nextu = rcu_dereference(nh_tbl.entry[nh_idx]); + nextl = rcu_dereference(nh_tbl.entry[nh_idx]); + if (unlikely(!nextl)) + goto unlock; /* Do we already have a nh for this interface? */ - nh = nextu_find_path_using_ifp(nextu, ifp, &sibling); - if (nh && nh4_is_neigh_created(nh)) { + nh = next_hop_list_find_path_using_ifp(nextl, ifp, &sibling); + if (nh && nh_is_neigh_created(nh)) { /* Are we removing a path or the entire NH */ - if (nextu->nsiblings == 1) { - route_lpm_delete(vrf->v_id, - lpm, ip->s_addr, 32, - &nh_idx, RT_SCOPE_LINK); - nexthop_put(nh_idx); + if (nextl->nsiblings == 1) { + route_lpm_delete(vrf->v_id, vrf->v_fal_obj, + lpm, ip->s_addr, 32, + &nh_idx, RT_SCOPE_LINK); + nexthop_put(AF_INET, nh_idx); } else { struct arp_remove_purge_arg args = { - .count = nextu_nc_count(nextu), + .count = next_hop_list_nc_count(nextl), .sibling = sibling, }; int del; uint32_t new_nh_idx; - del = route_nh_replace(nextu, nh_idx, lle, + del = route_nh_replace(AF_INET, + nextl, nh_idx, lle, &new_nh_idx, arp_removal_nh_purge_cb, &args); /* Can not delete a subset of paths here */ - if (del == nextu->nsiblings) { + if (del == nextl->nsiblings) { route_lpm_delete(vrf->v_id, - lpm, ip->s_addr, - 32, &nh_idx, - RT_SCOPE_LINK); - nexthop_put(nh_idx); + vrf->v_fal_obj, + lpm, ip->s_addr, + 32, &nh_idx, + RT_SCOPE_LINK); + nexthop_put(AF_INET, nh_idx); } } } else { - route_nh_replace(nextu, nh_idx, NULL, NULL, + route_nh_replace(AF_INET, nextl, nh_idx, NULL, NULL, routing_arp_del_nh_replace_cb, ifp); } } +unlock: pthread_mutex_unlock(&route_mutex); /* @@ -3006,9 +2576,8 @@ struct rt_show_subset { }; static void rt_show_subset(struct lpm *lpm, struct vrf *vrf, - uint32_t ip, uint8_t depth, int16_t scope, - uint32_t idx, - struct pd_obj_state_and_flags pd_state, + struct lpm_walk_params *params, + struct pd_obj_state_and_flags *pd_state, void *arg) { struct rt_show_subset *subset = arg; @@ -3017,15 +2586,14 @@ static void rt_show_subset(struct lpm *lpm, struct vrf *vrf, subset->vrf = vrf->v_id; jsonw_start_object(subset->json); jsonw_uint_field(subset->json, "vrf_id", - vrf_get_external_id(vrf->v_id)); + dp_vrf_get_external_id(vrf->v_id)); jsonw_uint_field(subset->json, "table", lpm_get_id(lpm)); jsonw_end_object(subset->json); } - if (subset->subset == pd_state.state) - rt_display_all(lpm, ip, depth, scope, idx, pd_state, - subset->json); + if (subset->subset == pd_state->state) + rt_display_all(lpm, params, pd_state, subset->json); } /* @@ -3043,8 +2611,95 @@ int route_get_pd_subset_data(json_writer_t *json, enum pd_obj_state subset) return 0; } +static void route_fal_upd_for_changed_nhl( + struct lpm *lpm, struct vrf *vrf, + struct lpm_walk_params *params, + struct pd_obj_state_and_flags *pd_state, + void *arg) +{ + const uint32_t *filter_nhl_index = arg; + int rc; + + if (params->next_hop != *filter_nhl_index) + return; + + if (pd_state->state != PD_OBJ_STATE_FULL) + return; + + struct next_hop_list *nextl = + rcu_dereference(nh_tbl.entry[params->next_hop]); + + rc = fal_ip4_upd_route(vrf->v_id, vrf->v_fal_obj, + htonl(params->ip), params->depth, + lpm_get_id(lpm), nextl->siblings, + nextl->nsiblings, nextl->nhg_fal_obj); + + pd_state->state = fal_state_to_pd_state(rc); + + /* Kick trackers so that clients can learn about FAL changes */ + params->call_tracker_cbs = true; +} + +static void +route_handle_fal_l3_enable_change(struct ifnet *ifp) +{ + struct cds_lfht_node *node; + fal_object_t *old_nh_objs; + struct next_hop_list *nhl; + uint32_t nhls_updated = 0; + struct cds_lfht_iter iter; + fal_object_t old_nhg_obj; + + cds_lfht_for_each(nexthop_hash, &iter, node) { + nhl = caa_container_of(node, struct next_hop_list, nh_node); + + if (!next_hop_list_fal_l3_enable_changed(AF_INET, + nhl, ifp, + &old_nhg_obj, + &old_nh_objs)) + continue; + + /* + * This is going to be very expensive if there are a + * lot of routes present in the system. The only + * consolation is that this is only anticipated to be + * done on major changes such as interface creation + * and removal. + */ + rt_lpm_walk_util(route_fal_upd_for_changed_nhl, + &nhl->index); + mpls_update_all_routes_for_nh_change(AF_INET, nhl->index); + + next_hop_list_fal_l3_enable_changed_finish( + AF_INET, nhl, old_nhg_obj, old_nh_objs); + + nhls_updated++; + } + + if (nhls_updated) + RTE_LOG(DEBUG, ROUTE, + "Updated %u IPv4 next hop lists due to FAL L3 state change of interface %s\n", + nhls_updated, ifp->if_name); +} + +static void +rt_if_feat_mode_change(struct ifnet *ifp, + enum if_feat_mode_event event) +{ + switch (event) { + case IF_FEAT_MODE_EVENT_L3_FAL_ENABLED: + case IF_FEAT_MODE_EVENT_L3_FAL_DISABLED: + route_handle_fal_l3_enable_change(ifp); + break; + default: + break; + } +} + static const struct dp_event_ops route_events = { .if_index_unset = rt_if_purge, + .if_feat_mode_change = rt_if_feat_mode_change, + .vrf_delete = rt_flush, }; DP_STARTUP_EVENT_REGISTER(route_events); diff --git a/src/route.h b/src/route.h index 9fa4a6ff..89356d43 100644 --- a/src/route.h +++ b/src/route.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -13,12 +13,15 @@ #include #include +#include "fal_plugin.h" #include "compiler.h" #include "control.h" +#include "ip_forward.h" #include "json_writer.h" #include "mpls/mpls.h" +#include "nh_common.h" #include "pd_show.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "route_flags.h" #include "urcu.h" #include "util.h" @@ -41,40 +44,18 @@ tableid_in_pbr_range(uint32_t tableid) return (tableid > 0 && tableid <= PBR_TABLEID_MAX); } -/* Output information associated with a single nexthop */ -struct next_hop { - union { - struct ifnet *ifp; /* target interface */ - struct llentry *lle; /* lle entry to use when sending */ - } u; - uint32_t flags; /* routing flags */ - union next_hop_outlabels outlabels; - in_addr_t gateway; /* nexthop IP address */ -}; - /* * Nexthop (output information) related APIs */ -struct next_hop * -nexthop_create(struct ifnet *ifp, in_addr_t gw, uint32_t flags, - uint16_t num_labels, label_t *labels); -void nexthop_put(uint32_t idx); -int nexthop_new(const struct next_hop *nh, uint16_t size, uint8_t proto, - uint32_t *slot); -struct next_hop *nexthop_select(uint32_t nh_idx, const struct rte_mbuf *m, - uint16_t ether_type); struct next_hop *nexthop_get(uint32_t nh_idx, uint8_t *size); -void rt_print_nexthop(json_writer_t *json, uint32_t next_hop); +void rt_print_nexthop(json_writer_t *json, uint32_t next_hop, + enum rt_print_nexthop_verbosity v); /* * IPv4 route table apis. */ int route_init(struct vrf *vrf); void route_uninit(struct vrf *vrf, struct route_head *rt_head); -bool route_link_vrf_to_table(struct vrf *vrf, uint32_t tableid); -bool route_unlink_vrf_from_table(struct vrf *vrf); -struct next_hop *rt_lookup(in_addr_t dst, uint32_t tbl_id, - const struct rte_mbuf *m) __hot_func; struct next_hop *rt_lookup_fast(struct vrf *vrf, in_addr_t dst, uint32_t tblid, const struct rte_mbuf *m); @@ -101,6 +82,8 @@ void rt_if_handle_in_dataplane(struct ifnet *ifp); void rt_if_punt_to_slowpath(struct ifnet *ifp); int rt_show(struct route_head *rt_head, json_writer_t *json, uint32_t tblid, const struct in_addr *addr); +int rt_show_exact(struct route_head *rt_head, json_writer_t *json, + uint32_t tblid, const struct in_addr *addr, uint8_t plen); void nexthop_tbl_init(void); bool rt_valid_tblid(vrfid_t vrfid, uint32_t tblid); int rt_local_show(struct route_head *rt_head, uint32_t id, FILE *f); @@ -115,8 +98,6 @@ nexthop_is_local(const struct next_hop *nh) struct ifnet *nhif_dst_lookup(const struct vrf *vrf, in_addr_t dst, bool *connected); -int nh_lookup_by_index(uint32_t nhindex, uint32_t hash, in_addr_t *nh, - uint32_t *nh_ifindex); void routing_insert_arp_safe(struct llentry *lle, bool arp_change); void routing_remove_arp_safe(struct llentry *lle); @@ -126,4 +107,9 @@ uint32_t *route_hw_stats_get(void); int route_get_pd_subset_data(json_writer_t *json, enum pd_obj_state subset); +int rt_show_platform_routes(const struct fal_ip_address_t *pfx, + uint8_t prefixlen, + uint32_t attr_count, + const struct fal_attribute_t *attr_list, + void *arg); #endif diff --git a/src/route_broker.c b/src/route_broker.c index 3059eba7..76547e6f 100644 --- a/src/route_broker.c +++ b/src/route_broker.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018,2020 AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -13,16 +14,22 @@ #include #include -#include "config.h" +#include "config_internal.h" #include "control.h" -#include "event.h" -#include "master.h" +#include "event_internal.h" +#include "ip_rt_protobuf.h" +#include "controller.h" #include "netlink.h" #include "route_broker.h" #include "vplane_debug.h" #include "vplane_log.h" #include "zmq_dp.h" +/* netlink format */ +#define ROUTE_BROKER_FORMAT_NL 0x0 +/* protobuf format */ +#define ROUTE_BROKER_FORMAT_PB 0x1 + #define BROKER_KEEPALIVE_TIMER_SEC 10 static struct rte_timer broker_keepalive_timer[CONT_SRC_COUNT]; @@ -54,7 +61,7 @@ static int dp_rt_msg_recv(zsock_t *sock, zmq_msg_t *route_msg) return -1; } -static int route_recv(void *arg) +static int route_netlink_recv(void *arg) { zmq_msg_t route_msg; zsock_t *sock = arg; @@ -80,12 +87,37 @@ static int route_recv(void *arg) return 0; } +static int route_pb_recv(void *arg) +{ + zmq_msg_t route_msg; + zsock_t *sock = arg; + + errno = 0; + int rc = dp_rt_msg_recv(sock, &route_msg); + if (rc != 0) { + if (errno == 0) + return 0; + return -1; + } + + rc = ip_route_pb_handler(zmq_msg_data(&route_msg), + zmq_msg_size(&route_msg), + CONT_SRC_MAIN); + if (rc) + DP_DEBUG(ROUTE, NOTICE, DATAPLANE, + "route message not handled\n"); + + zmq_msg_close(&route_msg); + + return 0; +} + /* * Open a pull socket using the given url and register the event handler. */ static int open_route_broker_data_sock(enum cont_src_en cont_src, - const char *data_url) + const char *data_url, bool protobuf_fmt) { zsock_t *data_sock; @@ -98,8 +130,10 @@ open_route_broker_data_sock(enum cont_src_en cont_src, cont_src_set_broker_data(cont_src, data_sock); - register_event_socket(zsock_resolve(data_sock), route_recv, - data_sock); + dp_register_event_socket( + zsock_resolve(data_sock), + protobuf_fmt ? route_pb_recv : route_netlink_recv, + data_sock); return 0; } @@ -230,6 +264,7 @@ static int broker_ctrl_recv(void *src) { enum cont_src_en cont_src = (enum cont_src_en)src; zsock_t *zsocket = cont_src_get_broker_ctrl(cont_src); + uint32_t data_format = ROUTE_BROKER_FORMAT_NL; char *uuid = NULL; char *data_url = NULL; char *str = NULL; @@ -247,7 +282,7 @@ static int broker_ctrl_recv(void *src) reset_dataplane(cont_src, false); goto out; } - if (strcmp("ACCEPT", str)) { + if (strcmp("ACCEPT", str) != 0) { RTE_LOG(ERR, DATAPLANE, "unrecognized message from broker ctrl %s\n", str); @@ -256,7 +291,7 @@ static int broker_ctrl_recv(void *src) } uuid = zmsg_popstr(msg); - if (strcmp(uuid, config.uuid)) { + if (strcmp(uuid, config.uuid) != 0) { RTE_LOG(ERR, DATAPLANE, "route broker(%s) ACCEPT message mis-match on UUID\n", cont_src_name(cont_src)); @@ -272,8 +307,10 @@ static int broker_ctrl_recv(void *src) rc = -1; goto out; } + zmsg_popu32(msg, &data_format); - open_route_broker_data_sock(cont_src, data_url); + open_route_broker_data_sock(cont_src, data_url, + data_format == ROUTE_BROKER_FORMAT_PB); start_route_broker_keepalives(cont_src); out: free(str); @@ -306,7 +343,7 @@ int init_route_broker_ctrl_connection(zsock_t *socket, rc = send_route_broker_ctrl_connect(socket, cont_src); if (rc < 0) RTE_LOG(ERR, DATAPLANE, - "master(%s) ZMQ failed to connect to route broker\n", + "main(%s) ZMQ failed to connect to route broker\n", cont_src_name(cont_src)); return rc; } @@ -361,13 +398,13 @@ void route_broker_unsubscribe(enum cont_src_en cont_src) zsock_t *broker_data = cont_src_get_broker_data(cont_src); if (broker_ctrl) { - unregister_event_socket(zsock_resolve(broker_ctrl)); + dp_unregister_event_socket(zsock_resolve(broker_ctrl)); zsock_destroy(&broker_ctrl); cont_src_set_broker_ctrl(cont_src, NULL); } if (broker_data) { - unregister_event_socket(zsock_resolve(broker_data)); + dp_unregister_event_socket(zsock_resolve(broker_data)); zsock_destroy(&broker_data); cont_src_set_broker_data(cont_src, NULL); rte_timer_stop(&broker_keepalive_timer[cont_src]); diff --git a/src/route_broker.h b/src/route_broker.h index 328b4206..cf682b19 100644 --- a/src/route_broker.h +++ b/src/route_broker.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ diff --git a/src/route_flags.h b/src/route_flags.h index 1d515779..cfae0bb7 100644 --- a/src/route_flags.h +++ b/src/route_flags.h @@ -30,7 +30,7 @@ */ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. + * Copyright (c) 2017-2019, AT&T Intellectual Property. * All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. @@ -54,14 +54,23 @@ #define RTF_MULTICAST 0x100 /* route represents a mcast address */ #define RTF_OUTLABEL 0x200 /* output label rather than local label */ #define RTF_NOROUTE 0x400 /* trigger no-route behaviour */ +/* backup path only to be used once primary path(s) are unusable */ +#define RTF_BACKUP 0x800 +#define RTF_UNUSABLE 0x1000 /* A primary path with a backup is unusable */ #define RTF_NEIGH_CREATED 0x10000 /* Nexthop was created to store neigh info */ #define RTF_NEIGH_PRESENT 0x20000 /* Nexthop contains neigh info */ /* * When comparing NHs for equality, mask the flags as the NEIGH_ ones are - * local optimisations. + * local optimisations and UNUSABLE is local state */ -#define NH_FLAGS_CMP_MASK ~(RTF_NEIGH_CREATED | RTF_NEIGH_PRESENT) +#define NH_FLAGS_CMP_MASK ~(RTF_NEIGH_CREATED | RTF_NEIGH_PRESENT | \ + RTF_UNUSABLE) + +enum rt_print_nexthop_verbosity { + RT_PRINT_NH_BRIEF, + RT_PRINT_NH_DETAIL, +}; #endif /* ROUTE_FLAGS_H */ diff --git a/src/rt_commands.c b/src/rt_commands.c index 175d8c51..46eb2784 100644 --- a/src/rt_commands.c +++ b/src/rt_commands.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -33,19 +33,25 @@ #include "netinet6/nd6_nbr.h" #include "route.h" #include "util.h" -#include "vrf.h" +#include "vrf_internal.h" +#include "vrf_if.h" #include "fal.h" #include "control.h" #include "dp_event.h" #include "vplane_log.h" +#include "protobuf.h" +#include "protobuf/GArpConfig.pb-c.h" + /* * Commands: * route - show main table * route [vrf_id ID] table - show PBR table * route [vrf_id ID] [table N] summary - show main table summary - * route [vrf_id ID] [table N] lookup address + * route [vrf_id ID] [table N] lookup

[] * route [vrf_id ID] [table N] all - show all local optimisations + * route [vrf_id ID] [table N] platform [cnt]- show routes in + * the platform (hardware) */ int cmd_route(FILE *f, int argc, char **argv) { @@ -60,12 +66,6 @@ int cmd_route(FILE *f, int argc, char **argv) argv += 2; } - vrf = vrf_get_rcu_from_external(vrf_id); - if (vrf == NULL) { - fprintf(f, "no vrf exist\n"); - return -1; - } - if (argc > 1 && strcmp(argv[1], "table") == 0) { if (argc == 2) { fprintf(f, "missing table id\n"); @@ -85,6 +85,21 @@ int cmd_route(FILE *f, int argc, char **argv) argv += 2; } + if (vrf_is_vrf_table_id(tblid)) { + if (vrf_lookup_by_tableid(tblid, &vrf_id, &tblid) < 0) { + fprintf(f, "no vrf exists for table %u\n", tblid); + return -1; + } + vrf = vrf_get_rcu(vrf_id); + } else { + vrf = dp_vrf_get_rcu_from_external(vrf_id); + } + + if (vrf == NULL) { + fprintf(f, "no vrf exists\n"); + return -1; + } + json_writer_t *json = jsonw_new(f); int err = -1; @@ -147,6 +162,7 @@ int cmd_route(FILE *f, int argc, char **argv) jsonw_end_object(json); } else if (strcmp(argv[1], "lookup") == 0) { struct in_addr in; + long plen = -1; if (argc == 2) { fprintf(f, "missing address\n"); @@ -158,16 +174,64 @@ int cmd_route(FILE *f, int argc, char **argv) goto error; } + if (argc > 3) { + plen = strtol(argv[3], NULL, 10); + if (plen < 0 || plen > 32) { + fprintf(f, "invalid prefix length\n"); + goto error; + } + } + jsonw_name(json, "route_lookup"); jsonw_start_array(json); - err = rt_show(&vrf->v_rt4_head, json, tblid, &in); + if (plen >= 0) + err = rt_show_exact(&vrf->v_rt4_head, json, tblid, &in, + plen); + else + err = rt_show(&vrf->v_rt4_head, json, tblid, &in); + jsonw_end_array(json); + } else if (strcmp(argv[1], "platform") == 0) { + + long cnt = UINT32_MAX; + + if (argc > 2) { + cnt = strtol(argv[2], NULL, 10); + if (cnt < 0 || cnt > UINT32_MAX) { + fprintf(f, "invalid count\n"); + goto error; + } + } + struct fal_attribute_t attr_list[] = { + { FAL_ROUTE_WALK_ATTR_VRFID, + .value.u32 = vrf_id }, + { FAL_ROUTE_WALK_ATTR_TABLEID, + .value.u32 = tblid }, + { FAL_ROUTE_WALK_ATTR_CNT, + .value.u32 = cnt }, + { FAL_ROUTE_WALK_ATTR_FAMILY, + .value.u32 = FAL_IP_ADDR_FAMILY_IPV4 }, + { FAL_ROUTE_WALK_ATTR_TYPE, + .value.u32 = FAL_ROUTE_WALK_TYPE_ALL }, + }; + + jsonw_name(json, "route_platform_show"); + + jsonw_start_array(json); + + err = fal_ip_walk_routes(rt_show_platform_routes, + RTE_DIM(attr_list), + attr_list, json); jsonw_end_array(json); + + /*TODO For scale, get_next from a prefix can be added */ + } else { fprintf(f, "Usage: route [vrf_id ID] [table N] [show]\n" " route [vrf_id ID] [table N] all\n" " route [vrf_id ID] [table N] summary\n" - " route [vrf_id ID] [table N] lookup ADDR\n"); + " route [vrf_id ID] [table N] lookup ADDR [PREFIXLENGTH]\n" + " route [vrf_id ID] [table N] platform [cnt]\n"); } error: @@ -200,7 +264,7 @@ int cmd_multicast(FILE *f, int argc, char **argv) if (argc >= 4 && strcmp(argv[2], "vrf_id") == 0) vrf_id = strtoul(argv[3], NULL, 10); - vrf = vrf_get_rcu_from_external(vrf_id); + vrf = dp_vrf_get_rcu_from_external(vrf_id); if (!vrf) { fprintf(f, "vrf %u does not exist\n", vrf_id); return -1; @@ -223,6 +287,8 @@ static const char *arp_flags(uint16_t flags) { static char buf[32]; + flags &= ~LLE_INTERNAL_MASK; + if (flags & LLE_DELETED) return "DELETED"; if (flags & LLE_STATIC) @@ -231,7 +297,7 @@ static const char *arp_flags(uint16_t flags) return "VALID"; if (flags & LLE_LOCAL) return "LOCAL"; - if (flags == 0 || flags == LLE_FWDING) + if (flags == 0) return "PENDING"; snprintf(buf, sizeof(buf), "%#x", flags); @@ -256,6 +322,14 @@ static void lle_dump(const struct ifnet *ifp, struct llentry *la, void *arg) ether_ntoa_r(&la->ll_addr, mac); jsonw_string_field(json, "mac", mac); jsonw_string_field(json, "ifname", ifp->if_name); + + if (la->la_flags & LLE_CREATED_IN_HW) { + jsonw_name(json, "platform_state"); + jsonw_start_object(json); + fal_ip4_dump_neigh(ifp->if_index, ifp->fal_l3, sin, json); + jsonw_end_object(json); + } + jsonw_end_object(json); } @@ -280,7 +354,7 @@ static void arp_flush_addr(const struct ifnet *ifp, if (satosin(sa)->sin_addr.s_addr == in->s_addr) { rte_spinlock_lock(&la->ll_lock); - llentry_destroy(ifp->if_lltable, la); + arp_entry_destroy(ifp->if_lltable, la); rte_spinlock_unlock(&la->ll_lock); } } @@ -296,7 +370,6 @@ static unsigned int arp_flush_entry(struct lltable *llt, struct llentry *la, void *arg __unused) { struct sockaddr *sa = ll_sockaddr(la); - unsigned int count; if (la->la_flags & LLE_STATIC) return 0; @@ -304,9 +377,10 @@ static unsigned int arp_flush_entry(struct lltable *llt, struct llentry *la, if (sa->sa_family != AF_INET) return 0; - count = llentry_destroy(llt, la); + arp_entry_destroy(llt, la); - return count; + /* Dropped pkts are tracked in the ARP stats */ + return 0; } static const char *const nd6_state[ND6_LLINFO_MAX + 1] = { @@ -332,6 +406,14 @@ static void lle6_dump(const struct ifnet *ifp, struct llentry *la, void *arg) ether_ntoa_r(&la->ll_addr, mac); jsonw_string_field(json, "mac", mac); jsonw_string_field(json, "ifname", ifp->if_name); + + if (la->la_flags & LLE_CREATED_IN_HW) { + jsonw_name(json, "platform_state"); + jsonw_start_object(json); + fal_ip6_dump_neigh(ifp->if_index, ifp->fal_l3, sin6, json); + jsonw_end_object(json); + } + jsonw_end_object(json); } @@ -395,14 +477,14 @@ static int nbr_res_show(FILE *f, sa_family_t af, int argc, char **argv) if (argc == 1) { if (af == AF_INET) - ifnet_walk(arp_dump, json); + dp_ifnet_walk(arp_dump, json); else - ifnet_walk(nd6_dump, json); + dp_ifnet_walk(nd6_dump, json); goto end; } while (--argc) { - struct ifnet *ifp = ifnet_byifname(*++argv); + struct ifnet *ifp = dp_ifnet_byifname(*++argv); if (!ifp) { err = -1; @@ -437,11 +519,11 @@ static int nbr_res_flush(FILE *f, sa_family_t af, int argc, char **argv) } if (af == AF_INET) - ifnet_walk(arp_flush_dev, &addr.address.ip_v4); + dp_ifnet_walk(arp_flush_dev, &addr.address.ip_v4); else - ifnet_walk(nd6_flush_dev, &addr.address.ip_v6); + dp_ifnet_walk(nd6_flush_dev, &addr.address.ip_v6); } else if (strcmp(argv[1], "dev") == 0) { - struct ifnet *ifp = ifnet_byifname(argv[2]); + struct ifnet *ifp = dp_ifnet_byifname(argv[2]); if (!ifp) { fprintf(f, "unknown interface\n"); @@ -459,6 +541,15 @@ static int nbr_res_flush(FILE *f, sa_family_t af, int argc, char **argv) return 0; } +/* Process get sub-command */ +static int nbr_res_get_cfg(FILE *f, sa_family_t af) +{ + if (af == AF_INET) + return cmd_arp_get_cfg(f); + + return cmd_nd6_get_cfg(f); +} + struct garp_op_ctx { const char *if_name; bool set; @@ -477,97 +568,19 @@ static void if_garp_op_update(struct ifnet *ifp, void *param) ifp->ip_garp_op.garp_rep_action = ctx->action; } -static struct cfg_if_list *cfg_garp_list; -static int cmd_garp(FILE *f, int argc, char **argv); - -static void -garp_event_if_index_set(struct ifnet *ifp, uint32_t ifindex); -static void -garp_event_if_index_unset(struct ifnet *ifp, uint32_t ifindex); - -static const struct dp_event_ops garp_event_ops = { - .if_index_set = garp_event_if_index_set, - .if_index_unset = garp_event_if_index_unset, -}; - -static void -garp_event_if_index_set(struct ifnet *ifp, uint32_t ifindex __unused) -{ - struct cfg_if_list_entry *le; - - if (!cfg_garp_list) - return; - - le = cfg_if_list_lookup(cfg_garp_list, ifp->if_name); - if (!le) - return; - - RTE_LOG(INFO, DATAPLANE, - "Replaying garp command %s for interface %s\n", - le->le_buf, ifp->if_name); - cmd_garp(NULL, le->le_argc, le->le_argv); - cfg_if_list_del(cfg_garp_list, ifp->if_name); - if (!cfg_garp_list->if_list_count) { - cfg_if_list_destroy(&cfg_garp_list); - dp_event_unregister(&garp_event_ops); - } -} - -static void -garp_event_if_index_unset(struct ifnet *ifp, uint32_t ifindex __unused) -{ - if (!cfg_garp_list) - return; - - cfg_if_list_del(cfg_garp_list, ifp->if_name); - if (!cfg_garp_list->if_list_count) { - cfg_if_list_destroy(&cfg_garp_list); - dp_event_unregister(&garp_event_ops); - } -} - -static int garp_replay_init(void) -{ - if (!cfg_garp_list) { - cfg_garp_list = cfg_if_list_create(); - if (!cfg_garp_list) - return -ENOMEM; - } - dp_event_register(&garp_event_ops); - return 0; -} - static int cmd_garp_global(struct garp_op_ctx *ctx) { if (!ctx->set) ctx->action = GARP_PKT_UPDATE; set_garp_cfg(ctx->op, ctx->action); - ifnet_walk(if_garp_op_update, ctx); + dp_ifnet_walk(if_garp_op_update, ctx); return 0; } -static int cmd_garp_intf(struct garp_op_ctx *ctx, - int argc, char **argv) +static void cmd_garp_intf_arpop(struct garp_op_ctx *ctx, struct ifnet *ifp) { struct garp_cfg glob_cfg; - struct ifnet *ifp; - - ifp = ifnet_byifname(ctx->if_name); - if (!ifp) { - if (!cfg_garp_list && garp_replay_init()) { - RTE_LOG(ERR, DATAPLANE, - "Could not set up cmd replay cache\n"); - return -ENOMEM; - } - - RTE_LOG(INFO, DATAPLANE, - "Caching garp command for interface %s\n", - argv[3]); - cfg_if_list_add(cfg_garp_list, ctx->if_name, argc, argv); - return 0; - } - if (ctx->set) { if (ctx->op == ARPOP_REQUEST) { ifp->ip_garp_op.garp_req_default = 0; @@ -588,6 +601,22 @@ static int cmd_garp_intf(struct garp_op_ctx *ctx, glob_cfg.garp_rep_action; } } +} + +static int cmd_garp_intf_pb(struct garp_op_ctx *ctx) +{ + + struct ifnet *ifp; + + ifp = dp_ifnet_byifname(ctx->if_name); + if (!ifp) { + RTE_LOG(INFO, DATAPLANE, + "garp applied, but interface missing %s\n", + ctx->if_name); + return -1; + } + + cmd_garp_intf_arpop(ctx, ifp); return 0; } @@ -601,70 +630,25 @@ static int cmd_garp_intf(struct garp_op_ctx *ctx, * DELETE all -> set default to UPDATE. update all interfaces which * don't have an override */ -static int cmd_garp(FILE *f, int argc, char **argv) -{ - struct garp_op_ctx ctx; - - if (argc != 6) - goto error; - - if (!strcmp(argv[2], "SET")) - ctx.set = true; - else if (!strcmp(argv[2], "DELETE")) - ctx.set = false; - else - goto error; - - if (!strcmp(argv[3], "all")) - ctx.if_name = NULL; - else - ctx.if_name = argv[3]; - - if (!strcmp(argv[4], "request")) - ctx.op = ARPOP_REQUEST; - else if (!strcmp(argv[4], "reply")) - ctx.op = ARPOP_REPLY; - else - goto error; - - if (!strcmp(argv[5], "update")) - ctx.action = GARP_PKT_UPDATE; - else if (!strcmp(argv[5], "drop")) - ctx.action = GARP_PKT_DROP; - else - goto error; - - if (!ctx.if_name) - cmd_garp_global(&ctx); - else - cmd_garp_intf(&ctx, argc, argv); - - return 0; - -error: - if (f) - fprintf(f, - "Usage: arp gratuitous \n"); - return -1; -} -/* Process neighbor resolution command */ +/* Process neighbor resolution operational command */ static int cmd_nbr_res(FILE *f, sa_family_t af, int argc, char **argv) { if (argc == 1) return nbr_res_show(f, af, argc, argv); - --argc, ++argv; /* skip "arp" */ + --argc, ++argv; /* skip "arp" or "nd6" keyword */ if (strcmp(argv[0], "show") == 0) return nbr_res_show(f, af, argc, argv); - else if (strcmp(argv[0], "flush") == 0) + if (strcmp(argv[0], "flush") == 0) return nbr_res_flush(f, af, argc, argv); - else { - fprintf(f, "unknown command action\n"); - return -1; - } + if (!strcmp(argv[0], "get")) + return nbr_res_get_cfg(f, af); + + fprintf(f, "unknown command action\n"); + return -1; } /* Process "arp ..." command */ @@ -673,23 +657,205 @@ int cmd_arp(FILE *f, int argc, char **argv) return cmd_nbr_res(f, AF_INET, argc, argv); } -/* Process "arp ..." config command */ -int cmd_arp_cfg(FILE *f, int argc, char **argv) +/* + * cmd_garp_cfg_handler (replacing cmd_garp) + * Protobuf handler for gratuitous arp commands. + * See the GArpConfig.proto file for details. + */ +static int +cmd_garp_cfg_handler(struct pb_msg *msg) { - if (argc < 2) - goto error; + void *payload = (void *)((char *)msg->msg); + int len = msg->msg_len; + int ret = 0; - if (strcmp(argv[1], "gratuitous") == 0) - return cmd_garp(f, argc, argv); + GArpConfig *smsg = garp_config__unpack(NULL, len, payload); -error: - fprintf(f, "unknown command action\n"); - return -1; + if (!smsg) { + RTE_LOG(ERR, DATAPLANE, + "failed to read GArpConfig protobuf command\n"); + return -1; + } + + struct garp_op_ctx ctx; + + ctx.set = smsg->set; + ctx.if_name = smsg->ifname; + switch (smsg->op) { + case GARP_CONFIG__ARP_OP__ARPOP_REQUEST: + ctx.op = ARPOP_REQUEST; + break; + case GARP_CONFIG__ARP_OP__ARPOP_REPLY: + ctx.op = ARPOP_REPLY; + break; + default: + RTE_LOG(ERR, DATAPLANE, + "Error: Invalid garp command\n"); + ret = -1; + goto end; + } + + switch (smsg->action) { + case GARP_CONFIG__GARP_PKT_ACTION__GARP_PKT_DROP: + ctx.action = GARP_PKT_DROP; + break; + case GARP_CONFIG__GARP_PKT_ACTION__GARP_PKT_UPDATE: + ctx.action = GARP_PKT_UPDATE; + break; + default: + RTE_LOG(ERR, DATAPLANE, + "Error: Invalid garp command\n"); + ret = -1; + goto end; + } + + if (*ctx.if_name == '\0' || !strcmp(ctx.if_name, "all")) + cmd_garp_global(&ctx); + else + cmd_garp_intf_pb(&ctx); +end: + garp_config__free_unpacked(smsg, NULL); + return ret; } +PB_REGISTER_CMD(garp_cfg_cmd) = { + .cmd = "vyatta:garp", + .handler = cmd_garp_cfg_handler, +}; /* Process "nd6 ..." command */ int cmd_nd6(FILE *f, int argc, char **argv) { return cmd_nbr_res(f, AF_INET6, argc, argv); } + +int rt_show_platform_routes(const struct fal_ip_address_t *pfx, + uint8_t prefixlen, + uint32_t attr_count, + const struct fal_attribute_t *attr_list, + void *arg) +{ + uint32_t i, nh_idx; + char buf[INET6_ADDRSTRLEN+4]; + json_writer_t *wr = (json_writer_t *)arg; + const char *ifname = NULL; + fal_object_t nhg = 0; + struct fal_attribute_t attr; + struct fal_attribute_t *nhg_attr_list; + int rv; + uint32_t nhc; + enum fal_packet_action_t action = UINT32_MAX; + + if (!arg || !pfx) + return -1; + sprintf(buf, "%s/%u", fal_ip_address_t_to_str(pfx, buf, + sizeof(buf)), prefixlen); + + jsonw_start_object(wr); + jsonw_string_field(wr, "prefix", buf); + + for (i = 0; i < attr_count; i++) { + switch (attr_list[i].id) { + case FAL_ROUTE_ENTRY_ATTR_NEXT_HOP_GROUP: + nhg = attr_list[i].value.objid; + break; + case FAL_ROUTE_ENTRY_ATTR_PACKET_ACTION: + action = attr_list[i].value.u32; + break; + default: + RTE_LOG(INFO, DATAPLANE, + "%s: Unhandled list attribute %d\n", + __func__, attr_list[i].id); + } + } + switch (action) { + case FAL_PACKET_ACTION_DROP: + jsonw_string_field(wr, "action", "Drop"); + break; + case FAL_PACKET_ACTION_FORWARD: + jsonw_string_field(wr, "action", "Forward"); + break; + case FAL_PACKET_ACTION_TRAP: + jsonw_string_field(wr, "action", "Punt"); + break; + default: + break; + } + if (!nhg) { + jsonw_end_object(wr); + return 0; + } + /* Get next hop count */ + attr.id = FAL_NEXT_HOP_GROUP_ATTR_NEXTHOP_COUNT; + + rv = fal_ip_get_next_hop_group_attrs(nhg, 1, &attr); + if (rv) { + jsonw_end_object(wr); + return 0; + } + nhc = attr.value.u32; + if (!nhc) { + jsonw_end_object(wr); + return 0; + } + /* Get list of next hop object ids from next hop group object */ + nhg_attr_list = calloc(nhc, sizeof(*nhg_attr_list)); + if (!nhg_attr_list) { + RTE_LOG(ERR, DATAPLANE, "%s: out of memory\n", __func__); + return -ENOMEM; + } + for (nh_idx = 0; nh_idx < nhc; nh_idx++) + nhg_attr_list[nh_idx].id = + FAL_NEXT_HOP_GROUP_ATTR_NEXTHOP_OBJECT; + + rv = fal_ip_get_next_hop_group_attrs(nhg, nhc, nhg_attr_list); + if (rv) { + jsonw_end_object(wr); + free(nhg_attr_list); + return 0; + } + jsonw_name(wr, "nexthop"); + jsonw_start_array(wr); + + for (nh_idx = 0; nh_idx < nhc; nh_idx++) { + struct fal_attribute_t nh_attr_list[] = { + { FAL_NEXT_HOP_ATTR_INTF, + .value.u32 = UINT32_MAX }, + { FAL_NEXT_HOP_ATTR_IP, + .value.ipaddr = { 0 } }, + }; + + rv = fal_ip_get_next_hop_attrs( + nhg_attr_list[nh_idx].value.objid, + RTE_DIM(nh_attr_list), + nh_attr_list); + if (rv) { + RTE_LOG(ERR, DATAPLANE, + "%s: nhg get attr failed rv %d\n", + __func__, rv); + jsonw_end_array(wr); + jsonw_end_object(wr); + free(nhg_attr_list); + return 0; + } + jsonw_start_object(wr); + + if (nh_attr_list[0].value.u32 != UINT32_MAX) { + ifname = ifnet_indextoname_safe( + nh_attr_list[0].value.u32); + if (ifname) + jsonw_string_field(wr, "ifname", ifname); + } + if (!fal_is_ipaddr_empty(&nh_attr_list[1].value.ipaddr)) { + + fal_ip_address_t_to_str(&nh_attr_list[1].value.ipaddr, + buf, sizeof(buf)); + jsonw_string_field(wr, "via", buf); + } + jsonw_end_object(wr); + } + jsonw_end_array(wr); + jsonw_end_object(wr); + free(nhg_attr_list); + return 0; +} diff --git a/src/rt_tracker.c b/src/rt_tracker.c index 674f53ff..d8034a8d 100644 --- a/src/rt_tracker.c +++ b/src/rt_tracker.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2019 AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -12,6 +12,7 @@ #include +#include "ip_forward.h" #include "lpm/lpm.h" #include "lpm/lpm6.h" #include "rt_tracker.h" @@ -186,7 +187,7 @@ rt_tracker_match_dst(struct cds_lfht_node *node, const void *key) = caa_container_of(node, const struct rt_tracker_info, rti_node); - return addr_eq(addr, &ti_info->dst_addr); + return dp_addr_eq(addr, &ti_info->dst_addr); } static struct rt_tracker_info * @@ -242,7 +243,7 @@ rt_tracker_insert(struct cds_lfht *table, struct rt_tracker_info *ti_info) * rt_tracker_info */ struct rt_tracker_info * -rt_tracker_add(struct vrf *vrf, struct ip_addr *addr, void *cb_ctx, +dp_rt_tracker_add(struct vrf *vrf, struct ip_addr *addr, void *cb_ctx, tracker_change_notif cb) { int ret = -1; @@ -306,18 +307,18 @@ rt_tracker_add(struct vrf *vrf, struct ip_addr *addr, void *cb_ctx, } void -rt_tracker_delete(const struct vrf *vrf, struct ip_addr *ip, void *cb_ctx) +dp_rt_tracker_delete(const struct vrf *vrf, struct ip_addr *addr, void *cb_ctx) { struct rt_tracker_info *ti_info; - ti_info = rt_tracker_lookup(vrf->v_rt_tracker_tbl, ip); + ti_info = rt_tracker_lookup(vrf->v_rt_tracker_tbl, addr); if (!ti_info) { RTE_LOG(ERR, LPM, "Delete tracker: NOT FOUND\n"); return; } rt_tracker_client_delete(ti_info, cb_ctx); if (cds_list_empty(&ti_info->rti_client_list)) { - switch (ip->type) { + switch (addr->type) { case AF_INET: lpm_tracker_delete(ti_info); break; @@ -330,6 +331,34 @@ rt_tracker_delete(const struct vrf *vrf, struct ip_addr *ip, void *cb_ctx) } } +/* + * Get tracking status from RT Tracker + * @param[in] rt_info Route tracker information + * + * @return true if being tracking, false otherwise. + */ +bool dp_get_rt_tracker_tracking(struct rt_tracker_info *rt_info) +{ + if (!rt_info) + return false; + + return rt_info->tracking; +} + +/* + * Get tracking status from RT Tracker + * @param[in] rt_info Route tracker information + * + * @return Index of NH. + */ +uint32_t dp_get_rt_tracker_nh_index(struct rt_tracker_info *rt_info) +{ + if (!rt_info) + return 0; + + return rt_info->nhindex; +} + static void rt_tracker_walk(struct vrf *vrf, void (*cb)(struct rt_tracker_info *ti_info, void *cb_ctx), @@ -412,7 +441,7 @@ int cmd_rt_tracker_op(FILE *f, int argc, char **argv) if (argc != 2) return -1; - if (strcmp("show", argv[1])) + if (strcmp("show", argv[1]) != 0) return -1; wr = jsonw_new(f); diff --git a/src/rt_tracker.h b/src/rt_tracker.h index 9e944da8..aad9179a 100644 --- a/src/rt_tracker.h +++ b/src/rt_tracker.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2019 AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -14,7 +14,7 @@ #include #include "urcu.h" -#include "vrf.h" +#include "vrf_internal.h" typedef void (*tracker_change_notif)(void *cb_ctx); @@ -38,16 +38,10 @@ struct rt_tracker_info { bool tracking; }; - -struct rt_tracker_info * -rt_tracker_add(struct vrf *vrf, struct ip_addr *addr, void *cb_ctx, - tracker_change_notif cb); -void -rt_tracker_delete(const struct vrf *vrf, struct ip_addr *addr, void *cb_ctx); void rt_tracker_uninit(struct vrf *vrf); uint32_t -rt_tracker_client_count(struct rt_tracker_info *ti_info); +rt_tracker_client_count(struct rt_tracker_info *tracker); int cmd_rt_tracker_op(FILE *f, int argc, char **argv); diff --git a/src/session/session.c b/src/session/session.c index b7234acc..ab4f3507 100644 --- a/src/session/session.c +++ b/src/session/session.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -30,16 +30,18 @@ #include "compiler.h" #include "dp_event.h" +#include "dp_session.h" #include "if_var.h" #include "main.h" #include "netinet6/in6.h" #include "npf_shim.h" #include "netinet6/ip6_funcs.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "session.h" #include "session_feature.h" #include "urcu.h" #include "vplane_log.h" +#include "npf_pack.h" /* * Session implementation for dataplane features. @@ -187,6 +189,8 @@ static rte_atomic32_t sessions_used; static int32_t sessions_max = DEFAULT_MAX_SESSIONS; static bool session_gc_run = true; +static int32_t user_data_id = -1; + /* Global session logging configuration */ static struct session_log_cfg session_global_log_cfg; @@ -340,9 +344,8 @@ void se_expire(struct session *s) { uint16_t exp = s->se_flags & ~SESSION_EXPIRED; - if (rte_atomic16_cmpset(&s->se_flags, exp, (exp | SESSION_EXPIRED))) { + if (rte_atomic16_cmpset(&s->se_flags, exp, (exp | SESSION_EXPIRED))) session_feature_session_expire(s); - } } static inline void sl_unlink(struct session_link *sl) @@ -413,6 +416,25 @@ void sentry_delete(struct sentry *sen) } } +/* + * Determine a sessions time-to-expire. Note that this can go negative due + * the periodic nature of the garbage collection. Used by show command. + */ +int sess_time_to_expire(const struct session *s) +{ + int tmp; + + if (s->se_flags & SESSION_EXPIRED) + tmp = 0; + else if (!s->se_etime) + tmp = s->se_custom_timeout ? + s->se_custom_timeout : s->se_timeout; + else + tmp = (int) (s->se_etime - get_dp_uptime()); + + return tmp; +} + /* Get etime based on config */ static inline uint32_t se_timeout(struct session *s) { @@ -426,8 +448,9 @@ int reclaim_session(struct session *s, uint64_t uptime) int rc = 0; /* Expired is the same as a timeout */ - if (s->se_flags & SESSION_EXPIRED) + if (s->se_flags & SESSION_EXPIRED) { return 1; + } if (!s->se_idle) { /* Session not idle, update etime */ @@ -560,7 +583,7 @@ unsigned long sentry_hash(const struct sentry_packet *sp) { unsigned long hash; - hash = rte_jhash_2words(sp->sp_vrfid, sp->sp_protocol, sp->sp_ifindex); + hash = rte_jhash_1word(sp->sp_protocol, sp->sp_ifindex); return rte_jhash_32b(sp->sp_addrids, sp->sp_len, hash); } @@ -728,13 +751,13 @@ int session_table_destroy_all(void) } /* Get counts of nodes in sentry and session ht's - for UTs */ -void session_table_counts(unsigned long *sen_ht, unsigned long *sess_ht) +void session_table_counts(unsigned long *sen_cnt, unsigned long *sess_cnt) { long dummy; - cds_lfht_count_nodes(sentry_ht, &dummy, sen_ht, &dummy); - cds_lfht_count_nodes(session_ht, &dummy, sess_ht, &dummy); + cds_lfht_count_nodes(sentry_ht, &dummy, sen_cnt, &dummy); + cds_lfht_count_nodes(session_ht, &dummy, sess_cnt, &dummy); } /* @@ -749,9 +772,9 @@ void session_counts(uint32_t *used, uint32_t *max, struct session_counts *sc) } /* Set the max session limit */ -void session_set_max_sessions(uint32_t count) +void session_set_max_sessions(uint32_t max) { - sessions_max = count ? count : DEFAULT_MAX_SESSIONS; + sessions_max = max ? max : DEFAULT_MAX_SESSIONS; } void session_set_global_logging_cfg(struct session_log_cfg *scfg) @@ -781,7 +804,7 @@ void ids_set(uint32_t *loc, uint16_t sid, uint16_t did) } static ALWAYS_INLINE -void ids_extract(uint32_t *loc, uint16_t *sid, uint16_t *did) +void ids_extract(const uint32_t *loc, uint16_t *sid, uint16_t *did) { *sid = *loc >> 16; *did = *loc & 0xFFFF; @@ -879,7 +902,7 @@ static int pkt_parse_ipv4(struct rte_mbuf *m, uint32_t if_index, uint16_t did; /* Ensure IP header is available */ - off = pktmbuf_l2_len(m); + off = dp_pktmbuf_l2_len(m); ip = (struct iphdr *)rte_pktmbuf_read(m, off, sizeof(struct iphdr), buf); if (!ip) @@ -894,7 +917,7 @@ static int pkt_parse_ipv4(struct rte_mbuf *m, uint32_t if_index, /* Length of the array match */ sp->sp_len = SENTRY_LEN_IPV4; - off = pktmbuf_l2_len(m) + pktmbuf_l3_len(m); + off = dp_pktmbuf_l2_len(m) + dp_pktmbuf_l3_len(m); rc = se_parse_ids(m, off, ip->protocol, &sid, &did); if (rc) return rc; @@ -1199,6 +1222,15 @@ static struct session *se_alloc(void) return s; } +/* + * Reset the session_id to 0. Used by UTs between tests so we start each test + * with a known session ID value. + */ +void session_reset_session_id(void) +{ + rte_atomic64_set(&session_id, 0); +} + /* Initialise the logging requirements of the session */ static void se_init_logging(struct session *s) { @@ -1213,11 +1245,9 @@ static void se_init_logging(struct session *s) } /* Allocate and init a session */ -static struct session *se_create(struct sentry_packet *sp, uint32_t timeout, - struct rte_mbuf *m) +static struct session *se_create(struct sentry_packet *sp, uint32_t timeout) { struct session *s; - uint16_t eth_type; s = se_alloc(); if (!s) @@ -1226,16 +1256,12 @@ static struct session *se_create(struct sentry_packet *sp, uint32_t timeout, s->se_protocol = sp->sp_protocol; s->se_timeout = timeout; - /* set custom etime */ - if (sp->sp_sentry_flags & SENTRY_IPv4) - eth_type = htons(ETHER_TYPE_IPv4); - else - eth_type = htons(ETHER_TYPE_IPv6); - s->se_custom_timeout = npf_custom_session_timeout(sp->sp_vrfid, - eth_type, m); - s->se_vrfid = sp->sp_vrfid; s->se_create_time = rte_get_timer_cycles(); + rte_atomic64_init(&s->se_pkts_in); + rte_atomic64_init(&s->se_bytes_in); + rte_atomic64_init(&s->se_pkts_out); + rte_atomic64_init(&s->se_bytes_out); se_init_logging(s); return s; @@ -1264,10 +1290,18 @@ int session_establish(struct rte_mbuf *m, const struct ifnet *ifp, /* Set the protocol timeout, and current protocol state */ void session_set_protocol_state_timeout(struct session *s, uint8_t state, - uint32_t timeout) + enum dp_session_state gen_state, + uint32_t timeout) { s->se_timeout = timeout; s->se_protocol_state = state; + s->se_gen_state = gen_state; +} + +/* Set the custom timeout */ +void session_set_custom_timeout(struct session *s, uint32_t timeout) +{ + s->se_custom_timeout = timeout; } /* Insert forw/back sentries based on packet. */ @@ -1355,6 +1389,25 @@ void session_sentry_extract(struct sentry *sen, uint32_t *if_index, int *af, ids_extract(&sen->sen_addrids[0], sid, did); } +/* Extract addrs from a sentry */ +void session_sentry_extract_addrs(const struct sentry *sen, int *af, + const void **saddr, const void **daddr) +{ + if (sen->sen_flags & SENTRY_IPv4) { + *af = AF_INET; + *saddr = &sen->sen_addrids[1]; + *daddr = &sen->sen_addrids[2]; + } else if (sen->sen_flags & SENTRY_IPv6) { + *af = AF_INET6; + *saddr = &sen->sen_addrids[1]; + *daddr = &sen->sen_addrids[5]; + } else { + *af = 0; + *saddr = NULL; + *daddr = NULL; + } +} + /* Destroy a session */ void session_expire(struct session *s, struct rte_mbuf *m) { @@ -1445,7 +1498,7 @@ int session_link(struct session *parent, struct session *child) /* * Check for expired sessions now, after the lock. * - * This prevents a race where master could be expiring sessions + * This prevents a race where main could be expiring sessions * while a link is about to occur. */ if ((parent->se_flags | child->se_flags) & SESSION_EXPIRED) { @@ -1465,7 +1518,7 @@ int session_link(struct session *parent, struct session *child) return 0; } - cds_list_add_tail(&psl->sl_children, &csl->sl_link); + cds_list_add_tail(&csl->sl_link, &psl->sl_children); rte_atomic16_inc(&parent->se_link_cnt); csl->sl_parent = parent; @@ -1606,7 +1659,7 @@ int session_create_from_sentry_packets(struct rte_mbuf *m, if (rc) return rc; - s = se_create(sp_forw, timeout, m); + s = se_create(sp_forw, timeout); if (!s) { slot_put(); return -ENOMEM; @@ -1697,3 +1750,286 @@ static void __attribute__ ((constructor)) session_event_init(void) dp_event_register(&ops); } + +int session_npf_pack_stats_pack(struct session *s, + struct npf_pack_dp_sess_stats *stats) +{ + if (!s || !stats) + return -EINVAL; + + stats->pdss_pkts_in = rte_atomic64_read(&s->se_pkts_in); + stats->pdss_bytes_in = rte_atomic64_read(&s->se_bytes_in); + stats->pdss_pkts_out = rte_atomic64_read(&s->se_pkts_out); + stats->pdss_bytes_out = rte_atomic64_read(&s->se_bytes_out); + + return 0; +} + +int session_npf_pack_stats_restore(struct session *s, + struct npf_pack_dp_sess_stats *stats) +{ + if (!s || !stats) + return -EINVAL; + + rte_atomic64_set(&s->se_pkts_in, stats->pdss_pkts_in); + rte_atomic64_set(&s->se_bytes_in, stats->pdss_bytes_in); + rte_atomic64_set(&s->se_pkts_out, stats->pdss_pkts_out); + rte_atomic64_set(&s->se_bytes_out, stats->pdss_bytes_out); + + return 0; +} + +int session_npf_pack_sentry_pack(struct session *s, + struct npf_pack_sentry_packet *psp) +{ + struct sentry *s_sen; + struct sentry_packet *psp_forw; + struct sentry_packet *psp_back; + struct ifnet *ifp; + int i; + + if (!s || !psp) + return -EINVAL; + + s_sen = s->se_sen; + if (!s_sen) + return -EINVAL; + + ifp = dp_ifnet_byifindex(s_sen->sen_ifindex); + if (!ifp) + return -EINVAL; + strncpy(psp->psp_ifname, ifp->if_name, IFNAMSIZ); + + psp_forw = &psp->psp_forw; + psp_forw->sp_sentry_flags = s_sen->sen_flags; + + if (s_sen->sen_flags & SENTRY_IPv4) + psp_forw->sp_sentry_flags = SENTRY_IPv4; + else + psp_forw->sp_sentry_flags = SENTRY_IPv6; + + psp_forw->sp_protocol = s_sen->sen_protocol; + psp_forw->sp_len = s_sen->sen_len; + + for (i = 0; i < s_sen->sen_len; i++) + psp_forw->sp_addrids[i] = s_sen->sen_addrids[i]; + + psp_back = &psp->psp_back; + memset(psp_back, 0, sizeof(*psp_back)); + sentry_packet_reverse(psp_forw, psp_back); + + return 0; +} + +int session_npf_pack_sentry_restore(struct npf_pack_sentry_packet *psp, + struct ifnet **ifp) +{ + struct ifnet *s_ifp; + + if (!psp) + return -EINVAL; + + s_ifp = dp_ifnet_byifname(psp->psp_ifname); + if (!s_ifp) + return -EINVAL; + + psp->psp_forw.sp_vrfid = s_ifp->if_vrfid; + psp->psp_back.sp_vrfid = s_ifp->if_vrfid; + psp->psp_forw.sp_ifindex = s_ifp->if_index; + psp->psp_back.sp_ifindex = s_ifp->if_index; + + *ifp = s_ifp; + + return 0; +} + +int session_npf_pack_pack(struct session *s, struct npf_pack_dp_session *pds, + struct npf_pack_sentry_packet *psp, + struct npf_pack_dp_sess_stats *stats) +{ + if (!s || !pds || !psp || !stats) + return -EINVAL; + + pds->pds_id = s->se_id; + pds->pds_flags = s->se_flags; + pds->pds_protocol = s->se_protocol; + pds->pds_custom_timeout = s->se_custom_timeout; + pds->pds_timeout = s->se_timeout; + pds->pds_protocol_state = s->se_protocol_state; + pds->pds_gen_state = s->se_gen_state; + pds->pds_fw = session_is_fw(s); + pds->pds_snat = session_is_snat(s); + pds->pds_dnat = session_is_dnat(s); + pds->pds_nat64 = session_is_nat64(s); + pds->pds_nat46 = session_is_nat46(s); + pds->pds_alg = session_is_alg(s); + pds->pds_in = session_is_in(s); + pds->pds_out = session_is_out(s); + pds->pds_app = session_is_app(s); + + if (session_npf_pack_stats_pack(s, stats)) + return -EINVAL; + + return session_npf_pack_sentry_pack(s, psp); +} + +int session_npf_pack_restore(struct npf_pack_dp_session *pds, + struct npf_pack_sentry_packet *psp, + struct npf_pack_dp_sess_stats *stats, + struct session **session) +{ + struct session *s; + struct sentry_packet psp_forw; + struct sentry_packet psp_back; + struct sentry_packet *forw = &psp_forw; + struct sentry_packet *back = &psp_back; + struct sentry *sen_forw; + struct ifnet *ifp; + bool created = false; + int rc; + + if (!pds || !psp) + return -EINVAL; + + rc = session_npf_pack_sentry_restore(psp, &ifp); + if (rc) + return rc; + + rc = slot_get(); + if (rc) + return rc; + + s = session_alloc(); + if (!s) { + slot_put(); + return -ENOMEM; + } + + s->se_vrfid = ifp->if_vrfid; + s->se_flags = pds->pds_flags; + s->se_protocol = pds->pds_protocol; + s->se_custom_timeout = pds->pds_custom_timeout; + s->se_timeout = pds->pds_timeout; + s->se_etime = get_dp_uptime() + se_timeout(s); + s->se_protocol_state = pds->pds_protocol_state; + s->se_gen_state = pds->pds_gen_state; + s->se_fw = pds->pds_fw; + s->se_snat = pds->pds_snat; + s->se_dnat = pds->pds_dnat; + s->se_nat64 = pds->pds_nat64; + s->se_nat46 = pds->pds_nat46; + s->se_alg = pds->pds_alg; + s->se_in = pds->pds_in; + s->se_out = pds->pds_out; + s->se_app = pds->pds_app; + + s->se_create_time = rte_get_timer_cycles(); + rte_atomic64_init(&s->se_pkts_in); + rte_atomic64_init(&s->se_bytes_in); + rte_atomic64_init(&s->se_pkts_out); + rte_atomic64_init(&s->se_bytes_out); + se_init_logging(s); + + memcpy(forw, &psp->psp_forw, sizeof(*forw)); + memcpy(back, &psp->psp_back, sizeof(*back)); + + rc = sentry_packet_insert_both(s, forw, back, SENTRY_INIT, + &sen_forw, &created); + if (rc || !created) + goto error; + + /* Add the session to the session hash table. */ + cds_lfht_add(session_ht, s->se_id, &s->se_node); + s->se_flags = SESSION_INSERTED; + + rc = session_npf_pack_stats_restore(s, stats); + if (rc) + goto error; + + *session = s; + return 0; + +error: + slot_put(); + free(s); + return rc; +} + +uint32_t session_get_npf_pack_timeout(struct session *s) +{ + if (s) + return se_timeout(s); + return 0; +} + +int dp_session_user_data_register(void) +{ + int old = uatomic_cmpxchg(&user_data_id, -1, 0); + if (old != -1) + return -EBUSY; + return 0; +} + +int dp_session_user_data_unregister(int id) +{ + int old = uatomic_cmpxchg(&user_data_id, -1, id); + if (old != id) + return -ENOENT; + return 0; + +} + +bool dp_session_set_private(int id __unused, + struct session *session, void *data) +{ + void *old; + + if (!session) + return 0; + + if (data == NULL) { + old = rcu_xchg_pointer(&session->se_private, NULL); + return old != NULL; + } + + old = rcu_cmpxchg_pointer(&session->se_private, NULL, data); + return old == NULL; +} + +void *dp_session_get_private(int id __unused, + const struct session *session) +{ + if (!session) + return NULL; + + return rcu_dereference(session->se_private); +} + +bool dp_session_is_established(const struct session *session) +{ + if (!session) + return false; + return session->se_gen_state == SESSION_STATE_ESTABLISHED; +} + +bool dp_session_is_expired(const struct session *session) +{ + return !session || (session->se_flags & SESSION_EXPIRED); +} + +enum dp_session_state dp_session_get_state(const struct session *session) +{ + return session->se_gen_state; +} + +const char *dp_session_get_state_name(const struct session *session, bool upper) +{ + return dp_session_state_name(session->se_gen_state, upper); +} + +uint64_t dp_session_unique_id(const struct session *session) +{ + if (!session) + return 0; + return session->se_id; +} diff --git a/src/session/session.h b/src/session/session.h index 88a953e7..aea9b70a 100644 --- a/src/session/session.h +++ b/src/session/session.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -19,9 +19,13 @@ #include "if_var.h" #include "urcu.h" #include "util.h" +#include "dp_session.h" struct ifnet; struct rte_mbuf; +struct npf_pack_dp_session; +struct npf_pack_sentry_packet; +struct npf_pack_dp_sess_stats; /* * For polling during UT cleanup. @@ -48,7 +52,7 @@ enum session_feature_type { SESSION_FEATURE_TEST, /* For UTs, never delete */ SESSION_FEATURE_NPF, SESSION_FEATURE_END, /* Must be last */ -}; +} __attribute__ ((__packed__)); /* Session flags */ #define SESSION_EXPIRED 0x01 @@ -78,7 +82,7 @@ struct session_feature; * * The 'pack' and 'unpack' ops require some detail. These routines are * called (if defined) during session syncing (aka: Connsync) on the - * master node (pack) and peer node (unpack). + * active node (pack) and backup node (unpack). * * The pack routine must write whatever data, in whatever format is desired. * It should only write data that is required to restore (unpack) the @@ -101,6 +105,7 @@ struct session_feature_ops { void (*json)(json_writer_t *json, struct session_feature *sf); void (*log)(enum session_log_event event, struct session *s, struct session_feature *sf); + int (*nat_info)(void *data, uint32_t *taddr, uint16_t *tport); }; #define SESS_FEAT_REQ_EXPIRY 0x01 /* feature marked for expiry */ @@ -111,12 +116,13 @@ struct session_feature { struct cds_lfht_node sf_session_node; struct session *sf_session; void *sf_data; - uint32_t sf_idx; - enum session_feature_type sf_type; const struct session_feature_ops *sf_ops; struct rcu_head sf_rcu_head; uint64_t sf_expire_time; + uint32_t sf_idx; uint16_t sf_flags; + enum session_feature_type sf_type; + uint8_t sf_pad; }; /* Session sentry structs. */ @@ -170,7 +176,7 @@ struct session { rte_atomic16_t se_sen_cnt; /* Sentry count */ uint16_t se_flags; uint8_t se_protocol; - uint8_t pad[1]; + uint8_t pad1; struct session_link *se_link; /* For linking of sessions */ struct sentry *se_sen; /* Cached INIT sentry */ uint64_t se_id; /* id of this session */ @@ -181,18 +187,38 @@ struct session { uint64_t se_etime; /* Expiration timeout */ uint8_t se_protocol_state; /* For display */ uint8_t se_idle:1; - uint8_t se_nat:1; /* nat? */ + + /* The following bit flags are used for op-mode commands */ + uint8_t se_fw:1; /* firewall? */ + uint8_t se_snat:1; /* snat? */ + uint8_t se_dnat:1; /* dnat? */ uint8_t se_nat64:1; /* nat64? */ uint8_t se_nat46:1; /* nat46? */ uint8_t se_alg:1; /* alg? */ + uint8_t se_in:1; /* inbound? */ + uint8_t se_out:1; /* outbound? */ + uint8_t se_app:1; /* application (dpi)? */ + uint8_t se_log_creation:1; uint8_t se_log_deletion:1; uint8_t se_log_periodic:1; + uint8_t se_gen_state; /* Generic state for display */ uint32_t se_log_interval; uint64_t se_ltime; /* time of next periodic log */ uint64_t se_create_time; /* time session was created */ + rte_atomic64_t se_pkts_in; + rte_atomic64_t se_bytes_in; + /* --- cacheline 2 boundary (128 bytes) --- */ + rte_atomic64_t se_pkts_out; + rte_atomic64_t se_bytes_out; + void *se_private; }; +static_assert(offsetof(struct session, se_rcu_head) == 64, + "first cache line exceeded"); +static_assert(offsetof(struct session, se_pkts_out) == 128, + "second cache line exceeded"); + /* For UTs, counts of various sessions */ struct session_counts { uint32_t sc_nat; /* Num of NPF features with NAT */ @@ -221,6 +247,19 @@ static inline void session_set_alg(struct session *s) s->se_alg = 1; } +/** + * Mark a session as being a firewall session + * + * This state remains until the session is deleted. + * + * @param s + * The session + */ +static inline void session_set_fw(struct session *s) +{ + s->se_fw = 1; +} + /** * Mark a session as being natted. * @@ -229,9 +268,14 @@ static inline void session_set_alg(struct session *s) * @param s * The session */ -static inline void session_set_nat(struct session *s) +static inline void session_set_snat(struct session *s) { - s->se_nat = 1; + s->se_snat = 1; +} + +static inline void session_set_dnat(struct session *s) +{ + s->se_dnat = 1; } /** @@ -260,6 +304,36 @@ static inline void session_set_nat46(struct session *s) s->se_nat46 = 1; } +/** + * Mark session created by inbound flow. + * + * @param s The session + */ +static inline void session_set_in(struct session *s) +{ + s->se_in = 1; +} + +/** + * Mark session created by outbound flow. + * + * @param s The session + */ +static inline void session_set_out(struct session *s) +{ + s->se_out = 1; +} + +/** + * Mark session as a dpi session + * + * @param s The session + */ +static inline void session_set_app(struct session *s) +{ + s->se_app = 1; +} + /** * Test an ALG session. * @@ -267,11 +341,21 @@ static inline void session_set_nat46(struct session *s) * * @param s The session */ -static inline bool session_is_alg(struct session *s) +static inline bool session_is_alg(const struct session *s) { return s->se_alg == 1; } +/** + * Is this a firewall session + * + * @param s The session + */ +static inline bool session_is_fw(const struct session *s) +{ + return s->se_fw == 1; +} + /** * Test a session as being natted. * @@ -280,9 +364,19 @@ static inline bool session_is_alg(struct session *s) * @param s * The session */ -static inline bool session_is_nat(struct session *s) +static inline bool session_is_snat(const struct session *s) { - return s->se_nat == 1; + return s->se_snat == 1; +} + +static inline bool session_is_dnat(const struct session *s) +{ + return s->se_dnat == 1; +} + +static inline bool session_is_nat(const struct session *s) +{ + return s->se_snat == 1 || s->se_dnat == 1; } /** @@ -291,7 +385,7 @@ static inline bool session_is_nat(struct session *s) * @param s * The session */ -static inline bool session_is_nat64(struct session *s) +static inline bool session_is_nat64(const struct session *s) { return s->se_nat64 == 1; } @@ -302,11 +396,37 @@ static inline bool session_is_nat64(struct session *s) * @param s * The session */ -static inline bool session_is_nat46(struct session *s) +static inline bool session_is_nat46(const struct session *s) { return s->se_nat46 == 1; } +/** + * Test session in/out. + * + * @param s The session + */ +static inline bool session_is_in(const struct session *s) +{ + return s->se_in == 1; +} + +static inline bool session_is_out(const struct session *s) +{ + return s->se_out == 1; +} + +/** + * Test if this is a dpi session + * + * @param s + * The session + */ +static inline bool session_is_app(const struct session *s) +{ + return s->se_app == 1; +} + /** * Establish a session from a packet. * @@ -401,7 +521,7 @@ int session_sentry_insert_pkt(struct session *s, uint32_t if_index, * * Create and insert an additional sentry for this session. * - * @param s + * @param se * The session. * * @param if_index @@ -414,18 +534,18 @@ int session_sentry_insert_pkt(struct session *s, uint32_t if_index, * @param sid * The source id for matching. * - * @param saddr + * @param sa * The source address (IPv4 or 6) * * @param did * The destination id. * - * @param daddr + * @param da * The destination address. */ -int session_sentry_insert(struct session *m, uint32_t if_index, uint16_t flags, - uint16_t sid, const void *saddr, - uint16_t did, const void *daddr); +int session_sentry_insert(struct session *se, uint32_t if_index, uint16_t flags, + uint16_t sid, const void *sa, + uint16_t did, const void *da); /** * Max sessions @@ -507,11 +627,26 @@ struct session *session_base_parent(struct session *s); * @param state * The current protocol state. * + * @param gen_state + * The generic or common protocol state. Derived from 'state'. + * * @param timeout * The protocol state timeout. */ void session_set_protocol_state_timeout(struct session *s, uint8_t state, - uint32_t timeout); + enum dp_session_state gen_state, + uint32_t timeout); + +/** + * Set custom timeout + * + * @param s + * The session. + * + * @param timeout + * The custom timeout. + */ +void session_set_custom_timeout(struct session *s, uint32_t timeout); /** * Init @@ -702,8 +837,8 @@ void session_unlink_all(struct session *s); typedef int (*sentry_walk_t)(struct sentry *sen, void *data); typedef int (*session_walk_t)(struct session *s, void *data); -int sentry_table_walk(sentry_walk_t func, void *data); -int session_table_walk(session_walk_t func, void *data); +int sentry_table_walk(sentry_walk_t cb, void *data); +int session_table_walk(session_walk_t cb, void *data); /** @@ -725,7 +860,7 @@ int session_table_walk(session_walk_t func, void *data); * @param do_unlink * If TRUE, unlink the sessions. * - * @param func + * @param cb * The callback to execute. * * @param data @@ -734,7 +869,7 @@ int session_table_walk(session_walk_t func, void *data); typedef void (session_link_walk_t)(struct session *s, void *data); void session_link_walk(struct session *s, bool do_unlink, - session_link_walk_t *func, void *data); + session_link_walk_t *cb, void *data); /** * Destroy all sentries/sessions. @@ -743,6 +878,13 @@ void session_link_walk(struct session *s, bool do_unlink, */ int session_table_destroy_all(void); +/** + * Reset the session_id to 0. + * + * Used by UTs between tests. + */ +void session_reset_session_id(void); + /** * Extract elements of a sentry. * @@ -771,6 +913,10 @@ void session_sentry_extract(struct sentry *sen, uint32_t *if_index, int *af, const void **saddr, uint16_t *sid, const void **daddr, uint16_t *did); +/* Extract addrs from a sentry */ +void session_sentry_extract_addrs(const struct sentry *sen, int *af, + const void **saddr, const void **daddr); + /** * Execute the session GC path. * @@ -788,6 +934,24 @@ void session_gc(void); */ struct session *session_alloc(void); +int session_npf_pack_pack(struct session *s, struct npf_pack_dp_session *pds, + struct npf_pack_sentry_packet *psp, + struct npf_pack_dp_sess_stats *stats); +int session_npf_pack_sentry_pack(struct session *s, + struct npf_pack_sentry_packet *psp); +int session_npf_pack_restore(struct npf_pack_dp_session *pds, + struct npf_pack_sentry_packet *psp, + struct npf_pack_dp_sess_stats *stats, + struct session **session); +int session_npf_pack_sentry_restore(struct npf_pack_sentry_packet *psp, + struct ifnet **ifp); +uint32_t session_get_npf_pack_timeout(struct session *s); +int session_npf_pack_stats_pack(struct session *s, + struct npf_pack_dp_sess_stats *stats); +int session_npf_pack_stats_restore(struct session *s, + struct npf_pack_dp_sess_stats *stats); +int sess_time_to_expire(const struct session *s); + static inline uint64_t session_get_id(struct session *s) { if (s) @@ -795,4 +959,29 @@ static inline uint64_t session_get_id(struct session *s) return 0; } +/** + * Save session stats. + * + * The specified number of bytes and one packet are added to + * the specified session's counters + * + * @param s The session + * @param dir_in True if the direction is "in"; false for "out" + * @param bytes Byte count to add to the session + */ +static inline void se_save_stats(struct session *s, + bool dir_in, + uint64_t bytes) +{ + assert(s); + + if (dir_in) { + rte_atomic64_inc(&s->se_pkts_in); + rte_atomic64_add(&s->se_bytes_in, bytes); + } else { + rte_atomic64_inc(&s->se_pkts_out); + rte_atomic64_add(&s->se_bytes_out, bytes); + } +} + #endif /* _SESSION_H_ */ diff --git a/src/session/session_cmds.c b/src/session/session_cmds.c index 30a48815..22e94db0 100644 --- a/src/session/session_cmds.c +++ b/src/session/session_cmds.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -25,6 +25,7 @@ #include "session.h" #include "session_cmds.h" #include "session_feature.h" +#include "session_op.h" #include "session_private.h" #include "urcu.h" #include "util.h" @@ -48,10 +49,12 @@ struct cmd_entry { cmd_handler _hndlr; }; -#define SD_FILTER_NAT 0x01 -#define SD_FILTER_NAT64 0x02 -#define SD_FILTER_NAT46 0x04 -#define SD_FILTER_ALG 0x08 +#define SD_FILTER_NONE 0x00 +#define SD_FILTER_NAT 0x01 +#define SD_FILTER_NAT64 0x02 +#define SD_FILTER_NAT46 0x04 +#define SD_FILTER_ALG 0x08 +#define SD_FILTER_CONN_ID 0x10 struct session_dump { FILE *sd_fp; @@ -60,6 +63,7 @@ struct session_dump { void *sd_data; bool sd_features; uint8_t sd_filter; + ulong sd_conn_id; }; /* Parameters for session expiration by filtering */ @@ -68,6 +72,7 @@ struct session_dump { #define FILTER_BY_ANY_SRC_ID 0x04 #define FILTER_BY_ANY_DST_ID 0x08 #define FILTER_BY_ID 0x10 +#define FILTER_BY_ANY_PROTO 0x20 struct session_filter_params { uint32_t sf_srcip[4]; @@ -78,6 +83,7 @@ struct session_filter_params { uint8_t sf_d_af; uint16_t sf_flags; uint64_t sf_id; + uint16_t sf_proto; }; /* @@ -148,49 +154,33 @@ static int cmd_feature_json(struct session *s __unused, return 0; } -static int cmd_session_json(struct session *s, void *data) +void +cmd_session_json(struct session *s, json_writer_t *json, bool add_feat, + bool is_json_array) { - struct session_dump *sd = data; char buf[INET6_ADDRSTRLEN]; uint32_t if_index; const void *saddr; const void *daddr; uint16_t sid; uint16_t did; - json_writer_t *json = sd->sd_data; struct sentry *init_sen = rcu_dereference(s->se_sen); int tmp; - if (sd->sd_filter) { - if ((sd->sd_filter & SD_FILTER_NAT) && - !session_is_nat(s)) - return 0; - if ((sd->sd_filter & SD_FILTER_NAT64) && - !session_is_nat64(s)) - return 0; - if ((sd->sd_filter & SD_FILTER_NAT46) && - !session_is_nat46(s)) - return 0; - if ((sd->sd_filter & SD_FILTER_ALG) && - !session_is_alg(s)) - return 0; - } - - /* Skip? */ - if (sd->sd_start-- > 0) - return 0; - - /* Filled? */ - if (sd->sd_count-- <= 0) - return -1; /* Stop walk we are full */ - /* No sentry? (racing with expiration) */ if (!init_sen) - return 0; + return; - sprintf(buf, "%lu", s->se_id); - jsonw_name(json, buf); - jsonw_start_object(json); + if (is_json_array) { + /* New array element */ + jsonw_start_object(json); + jsonw_uint_field(json, "id", s->se_id); + } else { + /* New named object (session ID is the name) */ + sprintf(buf, "%lu", s->se_id); + jsonw_name(json, buf); + jsonw_start_object(json); + } /* Extract addrs/ids from the sentry */ session_sentry_extract(init_sen, &if_index, &tmp, &saddr, &sid, &daddr, @@ -207,25 +197,26 @@ static int cmd_session_json(struct session *s, void *data) jsonw_uint_field(json, "proto", s->se_protocol); jsonw_string_field(json, "interface", ifnet_indextoname_safe(if_index)); - if (s->se_flags & SESSION_EXPIRED) - tmp = 0; - else if (!s->se_etime) - tmp = s->se_custom_timeout ? - s->se_custom_timeout : s->se_timeout; - else - tmp = (int) (s->se_etime - get_dp_uptime()); - - jsonw_int_field(json, "time_to_expire", tmp); + jsonw_int_field(json, "time_to_expire", sess_time_to_expire(s)); jsonw_int_field(json, "state_expire_window", s->se_timeout); jsonw_int_field(json, "state", s->se_protocol_state); + jsonw_int_field(json, "gen_state", s->se_gen_state); if (s->se_link && s->se_link->sl_parent) jsonw_uint_field(json, "parent", s->se_link->sl_parent->se_id); else jsonw_uint_field(json, "parent", 0); + uint64_t ts = rte_get_timer_cycles(); + if (ts > s->se_create_time) + jsonw_uint_field(json, "duration", + (ts - s->se_create_time) / rte_get_timer_hz()); + + /* Bitmap of features enabled on this session */ + jsonw_uint_field(json, "feature_type", sess_feature_type_bm(s)); + /* Add feature json if desired */ - if (sd->sd_features) { + if (add_feat) { jsonw_int_field(json, "features_count", rte_atomic16_read(&s->se_feature_count)); jsonw_name(json, "features"); @@ -235,7 +226,55 @@ static int cmd_session_json(struct session *s, void *data) jsonw_end_array(json); } + /* Session counters */ + jsonw_name(json, "counters"); + jsonw_start_object(json); + jsonw_uint_field(json, "packets_in", + rte_atomic64_read(&s->se_pkts_in)); + jsonw_uint_field(json, "bytes_in", + rte_atomic64_read(&s->se_bytes_in)); + jsonw_uint_field(json, "packets_out", + rte_atomic64_read(&s->se_pkts_out)); + jsonw_uint_field(json, "bytes_out", + rte_atomic64_read(&s->se_bytes_out)); + jsonw_end_object(json); /* End of counters */ + jsonw_end_object(json); +} + +static int cmd_session_json_cb(struct session *s, void *data) +{ + struct session_dump *sd = data; + json_writer_t *json = sd->sd_data; + + if (sd->sd_filter) { + if ((sd->sd_filter & SD_FILTER_NAT) && + !session_is_nat(s)) + return 0; + if ((sd->sd_filter & SD_FILTER_NAT64) && + !session_is_nat64(s)) + return 0; + if ((sd->sd_filter & SD_FILTER_NAT46) && + !session_is_nat46(s)) + return 0; + if ((sd->sd_filter & SD_FILTER_ALG) && + !session_is_alg(s)) + return 0; + if ((sd->sd_filter & SD_FILTER_CONN_ID) && + (s->se_id != sd->sd_conn_id)) + return 0; + } + + /* Skip? */ + if (sd->sd_start-- > 0) + return 0; + + /* Filled? */ + if (sd->sd_count-- <= 0) + return -1; /* Stop walk we are full */ + + /* Add the session json */ + cmd_session_json(s, json, sd->sd_features, false); return 0; } @@ -311,30 +350,20 @@ static void cmd_session_show_summary(FILE *fp) jsonw_destroy(&json); } -static void cmd_session_show(FILE *fp, bool features, uint8_t filter, - int start, int count) +static void cmd_session_show(struct session_dump *sd) { - struct session_dump sd; - json_writer_t *json; - - sd.sd_fp = fp; - sd.sd_start = start; - sd.sd_count = count; - sd.sd_features = features; - sd.sd_filter = filter; - - json = jsonw_new(fp); - sd.sd_data = json; + json_writer_t *json = jsonw_new(sd->sd_fp); + sd->sd_data = json; - if (count <= 0 || count >= MAX_JSON_SESSIONS) - sd.sd_count = MAX_JSON_SESSIONS; + if (sd->sd_count <= 0 || sd->sd_count >= MAX_JSON_SESSIONS) + sd->sd_count = MAX_JSON_SESSIONS; jsonw_name(json, "config"); jsonw_start_object(json); jsonw_name(json, "sessions"); jsonw_start_object(json); - session_table_walk(cmd_session_json, &sd); + session_table_walk(cmd_session_json_cb, sd); jsonw_end_object(json); jsonw_end_object(json); @@ -404,12 +433,12 @@ static int cmd_session_expire_id(struct session *s, void *data) /* Init session filter params */ static int cmd_init_sf(FILE *f, struct session_filter_params *sf, const char *saddr, const char *sid, const char *daddr, - const char *did) + const char *did, const char *proto) { int tmp; /* Source address */ - if (strncmp(saddr, "any", 3)) { + if (strncmp(saddr, "any", 3) != 0) { if (inet_pton(AF_INET, saddr, sf->sf_srcip) == 1) sf->sf_s_af = AF_INET; else if (inet_pton(AF_INET6, saddr, sf->sf_srcip) == 1) @@ -422,7 +451,7 @@ static int cmd_init_sf(FILE *f, struct session_filter_params *sf, sf->sf_flags |= FILTER_BY_ANY_SRCIP; /* Destination address */ - if (strncmp(daddr, "any", 3)) { + if (strncmp(daddr, "any", 3) != 0) { if (inet_pton(AF_INET, daddr, sf->sf_dstip) == 1) sf->sf_d_af = AF_INET; else if (inet_pton(AF_INET6, daddr, sf->sf_dstip) == 1) @@ -435,7 +464,7 @@ static int cmd_init_sf(FILE *f, struct session_filter_params *sf, sf->sf_flags |= FILTER_BY_ANY_DSTIP; /* Source port/id */ - if (strncmp(sid, "any", 3)) { + if (strncmp(sid, "any", 3) != 0) { tmp = arg_to_int(sid); if (tmp < 0 || tmp > USHRT_MAX) { cmd_err(f, "invalid filter source id: %s\n", sid); @@ -446,7 +475,7 @@ static int cmd_init_sf(FILE *f, struct session_filter_params *sf, sf->sf_flags |= FILTER_BY_ANY_SRC_ID; /* Destination port/id */ - if (strncmp(did, "any", 3)) { + if (strncmp(did, "any", 3) != 0) { tmp = arg_to_int(did); if (tmp < 0 || tmp > USHRT_MAX) { cmd_err(f, "invalid filter destination id: %s\n", did); @@ -456,6 +485,17 @@ static int cmd_init_sf(FILE *f, struct session_filter_params *sf, } else sf->sf_flags |= FILTER_BY_ANY_DST_ID; + /* protocol */ + if (strncmp(proto, "any", 3) != 0) { + tmp = arg_to_int(proto); + if (tmp < 0 || tmp > USHRT_MAX) { + cmd_err(f, "invalid filter protocol: %s\n", proto); + return -1; + } + sf->sf_proto = tmp; + } else + sf->sf_flags |= FILTER_BY_ANY_PROTO; + return 0; } @@ -478,18 +518,27 @@ static bool cmd_filter_match(struct sentry *sen, uint32_t if_index; uint16_t sid; uint16_t did; + uint16_t proto; const void *daddr; const void *saddr; int af; session_sentry_extract(sen, &if_index, &af, &saddr, &sid, &daddr, &did); + proto = sen->sen_protocol; + + /* protocol */ + if (!(sf->sf_flags & FILTER_BY_ANY_PROTO) && + proto != sf->sf_proto) + return false; /* Source port/id */ - if (!(sf->sf_flags & FILTER_BY_ANY_SRC_ID) && sid != sf->sf_src_id) + if (!(sf->sf_flags & FILTER_BY_ANY_SRC_ID) && + ntohs(sid) != sf->sf_src_id) return false; /* Destination port/id */ - if (!(sf->sf_flags & FILTER_BY_ANY_DST_ID) && did != sf->sf_dst_id) + if (!(sf->sf_flags & FILTER_BY_ANY_DST_ID) && + ntohs(did) != sf->sf_dst_id) return false; /* Source addr */ @@ -699,7 +748,25 @@ static int cmd_op_walk_sessions_summary(FILE *f, int argc __unused, static int cmd_op_delete_sessions(FILE *f, int argc, char **argv) { - struct session_filter_params sf = { {0} }; + /* + * argv: [1 ] [2] [3 ] [4] [5 ] [6] [7 ] [8] [9 ] [10] + * cli ex: saddr any sport 300 daddr any dport any proto any + */ + enum { + FT_SRC_ADDR_NAME = 1, + FT_SRC_ADDR_VALUE = 2, + FT_SRC_PORT_NAME = 3, + FT_SRC_PORT_VALUE = 4, + FT_DST_ADDR_NAME = 5, + FT_DST_ADDR_VALUE = 6, + FT_DST_PORT_NAME = 7, + FT_DST_PORT_VALUE = 8, + FT_PROTO_NAME = 9, + FT_PROTO_VALUE = 10, + NUM_FLT_PARAMS = 11 + }; + + struct session_filter_params sf = { .sf_id = 0 }; int rc; if (argc < 1) { @@ -722,7 +789,17 @@ cmd_op_delete_sessions(FILE *f, int argc, char **argv) return 0; } if (strcmp(argv[0], "filter") == 0) { - rc = cmd_init_sf(f, &sf, argv[2], argv[4], argv[6], argv[8]); + if (argc < NUM_FLT_PARAMS) { + cmd_err(f, "%s", err_str_missing_arg); + return -1; + } + + rc = cmd_init_sf(f, &sf, + argv[FT_SRC_ADDR_VALUE], + argv[FT_SRC_PORT_VALUE], + argv[FT_DST_ADDR_VALUE], + argv[FT_DST_PORT_VALUE], + argv[FT_PROTO_VALUE]); if (rc) return rc; sentry_table_walk(cmd_sentry_expire_filter, &sf); @@ -773,13 +850,31 @@ static int cmd_op_walk_sessions(FILE *f, int argc, char **argv) int start = 0; int count = 0; int rc; + ulong conn_id = 0; + bool have_conn_id = false; + + /* Parse an initial "id N" connection ID, if any */ + if (argc >= 1 && !strcmp(argv[0], "id")) { + conn_id = arg_to_long(argv[1]); + have_conn_id = true; + argc -= 2; + argv += 2; + } + /* Now parse the "start" and "count" limits, if any */ rc = cmd_parse_limits(f, argc, argv, &start, &count); if (rc) return rc; - /* No feature json */ - cmd_session_show(f, false, 0, start, count); + struct session_dump sd = { + .sd_fp = f, + .sd_features = false, /* No feature json */ + .sd_filter = have_conn_id ? SD_FILTER_CONN_ID : SD_FILTER_NONE, + .sd_conn_id = conn_id, + .sd_start = start, + .sd_count = count, + }; + cmd_session_show(&sd); return 0; } @@ -793,8 +888,15 @@ static int cmd_op_walk_sessions_full(FILE *f, int argc, char **argv) if (rc) return rc; - /* Include feature json */ - cmd_session_show(f, true, 0, start, count); + struct session_dump sd = { + .sd_fp = f, + .sd_features = true, /* Include feature json */ + .sd_filter = 0, + .sd_conn_id = 0, + .sd_start = start, + .sd_count = count, + }; + cmd_session_show(&sd); return 0; } @@ -811,8 +913,15 @@ static int cmd_op_walk_sessions_nat64(FILE *f, int argc, char **argv) if (rc) return rc; - /* Include feature json */ - cmd_session_show(f, true, SD_FILTER_NAT64, start, count); + struct session_dump sd = { + .sd_fp = f, + .sd_features = true, /* Include feature json */ + .sd_filter = SD_FILTER_NAT64, + .sd_conn_id = 0, + .sd_start = start, + .sd_count = count, + }; + cmd_session_show(&sd); return 0; } @@ -829,8 +938,15 @@ static int cmd_op_walk_sessions_nat46(FILE *f, int argc, char **argv) if (rc) return rc; - /* Include feature json */ - cmd_session_show(f, true, SD_FILTER_NAT46, start, count); + struct session_dump sd = { + .sd_fp = f, + .sd_features = true, /* Include feature json */ + .sd_filter = SD_FILTER_NAT46, + .sd_conn_id = 0, + .sd_start = start, + .sd_count = count, + }; + cmd_session_show(&sd); return 0; } @@ -961,6 +1077,9 @@ enum cmd_op { OP_SHOW_SESSIONS, OP_SHOW_SENTRIES, OP_DELETE, + OP_LIST, + OP_SHOW_DP_SESSIONS, + OP_CLEAR_DP_SESSIONS, }; enum cmd_cfg { @@ -997,6 +1116,18 @@ static const struct session_command session_cmd_op[] = { .tokens = "clear session", .handler = cmd_op_delete_sessions, }, + [OP_LIST] = { + .tokens = "list", + .handler = cmd_op_list, + }, + [OP_SHOW_DP_SESSIONS] = { + .tokens = "show dataplane sessions", + .handler = cmd_op_show_dp_sessions, + }, + [OP_CLEAR_DP_SESSIONS] = { + .tokens = "clear dataplane sessions", + .handler = cmd_op_clear_dp_sessions, + }, }; static const struct session_command session_cmd_cfg[] = { diff --git a/src/session/session_cmds.h b/src/session/session_cmds.h index 1ad1fef7..adf8f97b 100644 --- a/src/session/session_cmds.h +++ b/src/session/session_cmds.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -11,6 +11,10 @@ #include +struct session; + +void cmd_session_json(struct session *s, json_writer_t *json, bool add_feat, + bool is_json_array); int cmd_session_op(FILE *f, int argc, char **argv); int cmd_session_ut(FILE *f, int argc, char **argv); int cmd_session_cfg(FILE *f, int argc, char **argv); diff --git a/src/session/session_op.c b/src/session/session_op.c new file mode 100644 index 00000000..dda03d85 --- /dev/null +++ b/src/session/session_op.c @@ -0,0 +1,1465 @@ +/* + * Copyright (c) 2020-2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "if_var.h" +#include "in6_var.h" +#include "ip_addr.h" +#include "compiler.h" +#include "json_writer.h" +#include "npf_shim.h" +#include "session.h" +#include "session_cmds.h" +#include "session_feature.h" +#include "session_op.h" +#include "session_private.h" +#include "urcu.h" +#include "util.h" +#include "vplane_log.h" + +enum sd_order { + SD_ORDER_NONE, + SD_ORDER_DESCENDING, + SD_ORDER_ASCENDING +}; + +enum sd_orderby { + SD_ORDERBY_NONE, + SD_ORDERBY_SADDR, + SD_ORDERBY_DADDR, + SD_ORDERBY_TADDR, + SD_ORDERBY_ID, + SD_ORDERBY_TO, +}; + +static inline bool sd_orderby_addr(enum sd_orderby orderby) +{ + return orderby == SD_ORDERBY_SADDR || + orderby == SD_ORDERBY_DADDR || + orderby == SD_ORDERBY_TADDR; +} + +enum sf_dir { + SF_DIR_NONE, + SF_DIR_IN, + SF_DIR_OUT, +}; + +/* + * 'other' is any session for which the feature is none or unknown, i.e. *not* + * nat, nat64, alg, or dpi. + */ +#define SF_FEATURE_ANY 0x00 +#define SF_FEATURE_FW 0x01 +#define SF_FEATURE_SNAT 0x02 +#define SF_FEATURE_DNAT 0x04 +#define SF_FEATURE_NAT64 0x08 +#define SF_FEATURE_NAT46 0x10 +#define SF_FEATURE_ALG 0x20 +#define SF_FEATURE_APP 0x40 +#define SF_FEATURE_CONN_ID 0x80 +#define SF_FEATURE_OTHER 0x100 + +/* + * Session filter for list, show and clear commands + */ +struct session_filter { + uint32_t sf_addrids[SENTRY_LEN_IPV6]; + uint32_t sf_mask[SENTRY_LEN_IPV6]; + uint sf_addrids_depth; /* Number of words to cmp */ + + bool sf_ip; + bool sf_ip6; + enum sf_dir sf_dir; + uint8_t sf_s_af; + uint8_t sf_d_af; + uint16_t sf_features; /* Session feature fltr */ + uint16_t sf_proto; + uint32_t sf_ifindex; + uint64_t sf_id; + + /* Translation address, mask, port, and addr family */ + struct in6_addr sf_taddr; + struct in6_addr sf_tmask; + uint16_t sf_tport; + uint8_t sf_taf; +}; + +/* + * Session dump for show command + */ +struct session_dump { + FILE *sd_fp; + json_writer_t *sd_json; + int sd_start; + int sd_count; + bool sd_features; /* Add features to json */ + bool sd_summary; + struct session_filter *sd_sf; /* Session filter */ + + /* For ordered retrieval */ + enum sd_order sd_order; + enum sd_orderby sd_orderby; + uint8_t sd_af; /* For sd_start_addr and sd_end_addr */ + + union { + struct in6_addr sd_start_addr; + uint64_t sd_start_id; + uint32_t sd_start_timeout; + }; + union { + struct in6_addr sd_end_addr; + uint64_t sd_end_id; + uint32_t sd_end_timeout; + }; +}; + +struct sess_summary_proto { + uint32_t sp_total; + uint32_t sp_closed; + uint32_t sp_opening; + uint32_t sp_estbd; + uint32_t sp_closing; +}; + +struct session_summary { + uint32_t ss_total; + uint32_t ss_ip; + uint32_t ss_ip6; + uint32_t ss_in; + uint32_t ss_out; + uint32_t ss_ft_snat; + uint32_t ss_ft_dnat; + uint32_t ss_ft_nat64; + uint32_t ss_ft_nat46; + uint32_t ss_ft_alg; + uint32_t ss_ft_app; + uint32_t ss_ft_other; + + struct sess_summary_proto ss_tcp; + struct sess_summary_proto ss_udp; + struct sess_summary_proto ss_other; +}; + +static void __attribute__((format(printf, 2, 3))) cmd_err(FILE *f, + const char *format, ...) +{ + char str[100]; + va_list ap; + + va_start(ap, format); + vsnprintf(str, sizeof(str), format, ap); + va_end(ap); + + RTE_LOG(DEBUG, DATAPLANE, "%s\n", str); + + if (f) { + json_writer_t *json = jsonw_new(f); + if (json) { + jsonw_string_field(json, "__error", str); + jsonw_destroy(&json); + } + } +} + +/* Word offset into sf_addrids */ +static inline uint sess_addrids_offs(uint8_t af, bool src) +{ + if (src) + return 1; + return (af == AF_INET) ? 2 : 5; +} + +static inline struct in6_addr *sess_addrids_saddr(uint8_t af, uint32_t *addrids) +{ + uint offs = sess_addrids_offs(af, true); + return (struct in6_addr *)&addrids[offs]; +} + +static inline struct in6_addr *sess_addrids_daddr(uint8_t af, uint32_t *addrids) +{ + uint offs = sess_addrids_offs(af, false); + return (struct in6_addr *)&addrids[offs]; +} + +/* Extract an uint from a string */ +static uint arg_to_uint(const char *arg, int *error) +{ + char *p; + unsigned long val; + + if (!arg) { + *error = -EINVAL; + return 0; + } + + val = strtoul(arg, &p, 10); + if (p == arg || val > UINT_MAX) { + *error = -EINVAL; + return 0; + } + return (uint32_t) val; +} + +/* Extract an ulong from a string */ +static ulong arg_to_ulong(const char *arg, int *error) +{ + char *p; + unsigned long val; + + if (!arg) { + *error = -EINVAL; + return 0; + } + + val = strtoul(arg, &p, 10); + if (p == arg) { + *error = -EINVAL; + return 0ul; + } + return val; +} + +/* + * Parse address or prefix and mask. Returns < 0 for failure. + */ +static int +cmd_op_parse_addr_mask(const char *addr_str, struct in6_addr *addr, + struct in6_addr *mask, uint8_t *af) +{ + char *p, *pp = NULL; + uint pfx_len = 0; + int rc = 0; + + /* Separate address from prefix length (if present) */ + p = strchr(addr_str, '/'); + if (p) { + pp = p + 1; + *p = '\0'; + + pfx_len = arg_to_uint(pp, &rc); + if (rc < 0) + return rc; + } + + /* IPv4 or IPv6? */ + if (inet_pton(AF_INET, addr_str, addr) == 1) { + *af = AF_INET; + + if (pfx_len == 0 || pfx_len > 32) + pfx_len = 32; + + mask->s6_addr32[0] = prefixlen_to_mask(pfx_len); + + } else if (inet_pton(AF_INET6, addr_str, addr) == 1) { + *af = AF_INET6; + + if (pfx_len == 0 || pfx_len > 128) + pfx_len = 128; + + in6_prefixlen2mask(mask, pfx_len); + + } else + return -EINVAL; + + return 0; +} + +/* + * Finish setting up the addrids filter + */ +static int +cmd_op_finalize_addrids_fltr(FILE *f, struct session_filter *sf, + uint16_t sport, uint16_t dport) +{ + uint offs, len; + + if (sf->sf_s_af && sf->sf_d_af && + (sf->sf_s_af != sf->sf_d_af)) { + cmd_err(f, + "Mismatch between src-addr and dest-addr filters\n"); + return -EINVAL; + } + + /* Setup port filters */ + if (sport || dport) { + sf->sf_addrids[0] = htons(sport) << 16 | htons(dport); + + if (dport) + sf->sf_mask[0] = 0x0000FFFF; + if (sport) + sf->sf_mask[0] |= 0xFFFF0000; + + /* Depth is the number of words to compare */ + sf->sf_addrids_depth = 1; + } + + /* Overwrite depth if src-addr specified */ + if (sf->sf_s_af) { + offs = sess_addrids_offs(sf->sf_s_af, true); + len = (sf->sf_s_af == AF_INET) ? 1 : 4; + + sf->sf_addrids_depth = offs + len; + } + + /* Overwrite depth if dst-addr specified */ + if (sf->sf_d_af) { + offs = sess_addrids_offs(sf->sf_d_af, false); + len = (sf->sf_d_af == AF_INET) ? 1 : 4; + + sf->sf_addrids_depth = offs + len; + } + return 0; +} + +/* + * Finalize the ordering parameters + */ +static int +cmd_op_finalize_orderby(FILE *f, struct session_filter *sf, + struct session_dump *sd, char *start, char *end) +{ + uint start_alen = 0, end_alen = 0; + int error = 0; + + /* + * The 'start' and 'end' options are dependent on the 'orderby' + * option. Note that when requesting a list of items from sessions, + * then start and end will not be specified. + */ + if (!start || !sd) + return 0; + + /* + * If fetching session unordered, then just a 'start' and 'count' will + * be specified. + */ + if (sd->sd_orderby == SD_ORDERBY_NONE) { + uint tmp = arg_to_uint(start, &error); + if (error < 0 || tmp > INT_MAX) + goto error; + + sd->sd_start = (int)tmp; + return 0; + } + + if (!end) + return -EINVAL; + + if (sd_orderby_addr(sd->sd_orderby)) { + + if (inet_pton(AF_INET, start, &sd->sd_start_addr) == 1) + start_alen = 4; + else if (inet_pton(AF_INET6, start, &sd->sd_start_addr) == 1) + start_alen = 16; + else + goto error; + + if (inet_pton(AF_INET, end, &sd->sd_end_addr) == 1) + end_alen = 4; + else if (inet_pton(AF_INET6, end, &sd->sd_end_addr) == 1) + end_alen = 16; + else + goto error; + + if (end_alen != start_alen) + goto error; + + if (memcmp(&sd->sd_start_addr, &sd->sd_end_addr, + start_alen) > 0) + sd->sd_order = SD_ORDER_DESCENDING; + else + sd->sd_order = SD_ORDER_ASCENDING; + + if (start_alen == 4) { + sf->sf_ip = true; + sf->sf_ip6 = false; + sd->sd_af = AF_INET; + } else { + sf->sf_ip = false; + sf->sf_ip6 = true; + sd->sd_af = AF_INET6; + } + } + + /* + * If ordering by trans addr then session filter MUST be SNAT or DNAT. + */ + if (sd->sd_orderby == SD_ORDERBY_TADDR) { + if (sf->sf_features != SF_FEATURE_SNAT && + sf->sf_features != SF_FEATURE_DNAT) + goto error; + } + + if (sd->sd_orderby == SD_ORDERBY_ID) { + ulong tmp; + + tmp = arg_to_ulong(start, &error); + if (error < 0) + goto error; + sd->sd_start_id = tmp; + + tmp = arg_to_ulong(end, &error); + if (error < 0) + goto error; + sd->sd_end_id = tmp; + + if (sd->sd_start_id > sd->sd_end_id) + sd->sd_order = SD_ORDER_DESCENDING; + else + sd->sd_order = SD_ORDER_ASCENDING; + } + + if (sd->sd_orderby == SD_ORDERBY_TO) { + uint tmp; + + tmp = arg_to_uint(start, &error); + if (error < 0) + goto error; + sd->sd_start_timeout = tmp; + + tmp = arg_to_uint(end, &error); + if (error < 0) + goto error; + sd->sd_end_timeout = tmp; + + if (sd->sd_start_timeout > sd->sd_end_timeout) + sd->sd_order = SD_ORDER_DESCENDING; + else + sd->sd_order = SD_ORDER_ASCENDING; + } + + return 0; + +error: + cmd_err(f, "Error with orderby params\n"); + return -EINVAL; +} + +static int +cmd_op_parse_src_addr(FILE *f, int *argcp, char ***argvp, + struct session_filter *sf) +{ + struct in6_addr addr, mask; + struct in6_addr *sf_addr, *sf_mask; + char *val; + int error; + uint8_t af; + + val = next_arg(argcp, argvp); + if (!val) + goto error; + + error = cmd_op_parse_addr_mask(val, &addr, &mask, &af); + if (error < 0) + goto error; + + sf_addr = sess_addrids_saddr(af, sf->sf_addrids); + *sf_addr = addr; + sf->sf_s_af = af; + + sf_mask = sess_addrids_saddr(af, sf->sf_mask); + *sf_mask = mask; + + return 0; + +error: + cmd_err(f, "Error with src-addr filter params\n"); + return -EINVAL; +} + +static int +cmd_op_parse_dst_addr(FILE *f, int *argcp, char ***argvp, + struct session_filter *sf) +{ + struct in6_addr addr, mask; + struct in6_addr *sf_addr, *sf_mask; + char *val; + int error; + uint8_t af; + + val = next_arg(argcp, argvp); + if (!val) + goto error; + + error = cmd_op_parse_addr_mask(val, &addr, &mask, &af); + if (error < 0) + goto error; + + sf_addr = sess_addrids_daddr(af, sf->sf_addrids); + *sf_addr = addr; + sf->sf_s_af = af; + + sf_mask = sess_addrids_daddr(af, sf->sf_mask); + *sf_mask = mask; + + return 0; + +error: + cmd_err(f, "Error with dest-addr filter params\n"); + return -EINVAL; +} + +static int +cmd_op_parse_trans_addr(FILE *f, int *argcp, char ***argvp, + struct session_filter *sf) +{ + char *val; + int error, i, alen = 0; + + val = next_arg(argcp, argvp); + if (!val) + goto error; + + error = cmd_op_parse_addr_mask(val, &sf->sf_taddr, &sf->sf_tmask, + &sf->sf_taf); + if (error < 0) + goto error; + + if (sf->sf_taf == AF_INET) + alen = 4; + else if (sf->sf_taf == AF_INET6) + alen = 16; + else + goto error; + + /* Clear the host bits */ + for (i = 0; i < alen; i++) + sf->sf_taddr.s6_addr[i] = + sf->sf_taddr.s6_addr[i] & sf->sf_tmask.s6_addr[i]; + + return 0; + +error: + cmd_err(f, "Error with translation address filter params\n"); + return -EINVAL; +} + +static int cmd_op_parse_feat(FILE *f, int *argcp, char ***argvp, + struct session_filter *sf) +{ + char *val; + + val = next_arg(argcp, argvp); + if (!val) { + cmd_err(f, "Missing parameter to feat command\n"); + return -EINVAL; + } + + if (!strcmp(val, "other")) + sf->sf_features |= SF_FEATURE_OTHER; + else if (!strcmp(val, "firewall")) + sf->sf_features |= SF_FEATURE_FW; + else if (!strcmp(val, "snat")) + sf->sf_features |= SF_FEATURE_SNAT; + else if (!strcmp(val, "dnat")) + sf->sf_features |= SF_FEATURE_DNAT; + else if (!strcmp(val, "nat64")) + sf->sf_features |= SF_FEATURE_NAT64; + else if (!strcmp(val, "nat46")) + sf->sf_features |= SF_FEATURE_NAT46; + else if (!strcmp(val, "alg")) + sf->sf_features |= SF_FEATURE_ALG; + else if (!strcmp(val, "application")) + sf->sf_features |= SF_FEATURE_APP; + else { + cmd_err(f, "Invalid parameter \"%s\" to feat command\n", val); + return -EINVAL; + } + + return 0; +} + +static int cmd_op_parse_orderby(FILE *f, int *argcp, char ***argvp, + struct session_dump *sd) +{ + char *val; + + val = next_arg(argcp, argvp); + if (!val) { + cmd_err(f, "Missing parameter to orderby command\n"); + return -EINVAL; + } + + if (!sd) + return 0; + + if (!strcmp(val, "dst_addr")) + sd->sd_orderby = SD_ORDERBY_DADDR; + else if (!strcmp(val, "src_addr")) + sd->sd_orderby = SD_ORDERBY_SADDR; + else if (!strcmp(val, "trans_addr")) + sd->sd_orderby = SD_ORDERBY_TADDR; + else if (!strcmp(val, "id")) + sd->sd_orderby = SD_ORDERBY_ID; + else if (!strcmp(val, "time_to_expire")) + sd->sd_orderby = SD_ORDERBY_TO; + else { + cmd_err(f, "Invalid option \"%s\" to orderby command\n", val); + return -EINVAL; + } + + return 0; +} + +/* + * Parse arguments. + */ +static int +cmd_op_parse(FILE *f, int argc, char **argv, struct session_filter *sf, + struct session_dump *sd) +{ + char *start = NULL, *end = NULL; + uint16_t sport = 0, dport = 0; + char *cmd, *val; + int error = 0; + + while (argc > 0) { + cmd = next_arg(&argc, &argv); + + if (!strcmp(cmd, "ip")) + /* IP sessions */ + sf->sf_ip = true; + + else if (!strcmp(cmd, "ip6")) + /* IPv6 sessions */ + sf->sf_ip6 = true; + + else if (!strcmp(cmd, "id")) { + /* + * Session ID filter + */ + ulong tmp; + val = next_arg(&argc, &argv); + if (!val) + goto error_missing_param; + + tmp = arg_to_ulong(val, &error); + if (error < 0) + goto error_param_value; + + sf->sf_id = tmp; + + } else if (!strcmp(cmd, "dir")) { + val = next_arg(&argc, &argv); + if (!val) + goto error_missing_param; + + if (!strcmp(val, "in")) + sf->sf_dir = SF_DIR_IN; + else if (!strcmp(val, "out")) + sf->sf_dir = SF_DIR_OUT; + + } else if (!strcmp(cmd, "intf")) { + /* + * Interface filter + */ + struct ifnet *ifp; + val = next_arg(&argc, &argv); + if (!val) + goto error_missing_param; + + ifp = dp_ifnet_byifname(val); + if (!ifp) + goto error_param_value; + + sf->sf_ifindex = ifp->if_index; + + } else if (!strcmp(cmd, "src-addr")) { + /* + * Source address filter + */ + error = cmd_op_parse_src_addr(f, &argc, &argv, sf); + if (error < 0) + return error; + + } else if (!strcmp(cmd, "dst-addr")) { + /* + * Destination address filter + */ + error = cmd_op_parse_dst_addr(f, &argc, &argv, sf); + if (error < 0) + return error; + + } else if (!strcmp(cmd, "src-port")) { + /* + * Source port filter + */ + uint tmp; + val = next_arg(&argc, &argv); + if (!val) + goto error_missing_param; + + tmp = arg_to_uint(val, &error); + if (error < 0 || tmp > USHRT_MAX) + goto error_param_value; + + sport = tmp; + + } else if (!strcmp(cmd, "dst-port")) { + /* + * Destination port filter + */ + uint tmp; + val = next_arg(&argc, &argv); + if (!val) + goto error_missing_param; + + tmp = arg_to_uint(val, &error); + if (error < 0 || tmp > USHRT_MAX) + goto error_param_value; + + dport = tmp; + + } else if (!strcmp(cmd, "proto")) { + /* + * Protocol filter + */ + uint tmp; + val = next_arg(&argc, &argv); + if (!val) + goto error_missing_param; + + tmp = arg_to_uint(val, &error); + if (error < 0 || tmp > UCHAR_MAX) + goto error_param_value; + + sf->sf_proto = tmp; + + } else if (!strcmp(cmd, "trans-addr")) { + /* + * Translation address filter + */ + error = cmd_op_parse_trans_addr(f, &argc, &argv, sf); + if (error < 0) + return error; + + } else if (!strcmp(cmd, "trans-port")) { + /* + * Translation port filter + */ + uint tmp; + val = next_arg(&argc, &argv); + if (!val) + goto error_missing_param; + + tmp = arg_to_uint(val, &error); + if (error < 0 || tmp > USHRT_MAX) + goto error_param_value; + + /* cmd_feature_nat_info returns network order */ + sf->sf_tport = htons(tmp); + + } else if (!strcmp(cmd, "feat")) { + /* + * Session feature filter + */ + error = cmd_op_parse_feat(f, &argc, &argv, sf); + if (error < 0) + return error; + + } else if (!strcmp(cmd, "count")) { + /* + * Session dump count + */ + uint tmp; + + val = next_arg(&argc, &argv); + if (!val) + goto error_missing_param; + + tmp = arg_to_uint(val, &error); + if (error < 0) + goto error_param_value; + + if (sd) + sd->sd_count = tmp; + + } else if (!strcmp(cmd, "orderby")) { + + error = cmd_op_parse_orderby(f, &argc, &argv, sd); + if (error < 0) + return error; + + } else if (!strcmp(cmd, "start")) { + val = next_arg(&argc, &argv); + if (!val) + goto error_missing_param; + start = val; + + } else if (!strcmp(cmd, "end")) { + val = next_arg(&argc, &argv); + if (!val) + goto error_missing_param; + end = val; + + } else if (!strcmp(cmd, "brief")) { + if (sd) + sd->sd_features = false; + + } else if (!strcmp(cmd, "summary")) { + if (sd) + sd->sd_summary = true; + } + } + cmd = NULL; + + error = cmd_op_finalize_addrids_fltr(f, sf, sport, dport); + if (error < 0) + return error; + + if (cmd_op_finalize_orderby(f, sf, sd, start, end) < 0) + return error; + + /* Default to both IP and IPv6 if neither is specified */ + if (!sf->sf_ip && !sf->sf_ip6) { + sf->sf_ip = true; + sf->sf_ip6 = true; + } + + return 0; + +error_missing_param: + cmd_err(f, "Missing parameter to \"%s\" command\n", cmd); + return -EINVAL; + +error_param_value: + cmd_err(f, "Error with parameter to \"%s\" command\n", cmd); + return -EINVAL; +} + +uint16_t sess_feature_type_bm(const struct session *s) +{ + uint16_t sess_feat = 0; + + if (session_is_fw(s)) + sess_feat |= SF_FEATURE_FW; + + if (session_is_snat(s)) + sess_feat |= SF_FEATURE_SNAT; + + if (session_is_dnat(s)) + sess_feat |= SF_FEATURE_DNAT; + + if (session_is_nat64(s)) + sess_feat |= SF_FEATURE_NAT64; + + if (session_is_nat46(s)) + sess_feat |= SF_FEATURE_NAT46; + + if (session_is_alg(s)) + sess_feat |= SF_FEATURE_ALG; + + if (session_is_app(s)) + sess_feat |= SF_FEATURE_APP; + + if (sess_feat == 0) + sess_feat |= SF_FEATURE_OTHER; + + return sess_feat; +} + +static int cmd_feature_nat_info(const struct session *s, uint32_t *taddr, + uint16_t *tport) +{ + const struct session_feature_ops *ops; + void *sf_data; + + ops = feature_operations[SESSION_FEATURE_NPF]; + + /* Only if the feature has a nat_info op */ + if (!ops->nat_info) + return -ENOENT; + + sf_data = session_feature_get((struct session *)s, + s->se_sen->sen_ifindex, + SESSION_FEATURE_NPF); + if (!sf_data) + return -ENOENT; + + return ops->nat_info(sf_data, taddr, tport); +} + +/* + * Filter. Returns false if pkt is to be blocked by the filter. + */ +static bool +cmd_session_filter(const struct session *s, const struct session_filter *sf) +{ + const struct sentry *sen = rcu_dereference(s->se_sen); + uint i; + + /* Session ID */ + if (sf->sf_id && sf->sf_id != s->se_id) + return false; + + /* Address family */ + if ((sen->sen_flags & SENTRY_IPv4) != 0) { + if (!sf->sf_ip) + return false; + } else if ((sen->sen_flags & SENTRY_IPv6) != 0) { + if (!sf->sf_ip6) + return false; + } + + /* Direction */ + if (sf->sf_dir) { + if (sf->sf_dir == SF_DIR_IN && !session_is_in(s)) + return false; + + if (sf->sf_dir == SF_DIR_OUT && !session_is_out(s)) + return false; + } + + /* Interface */ + if (sf->sf_ifindex && sf->sf_ifindex != sen->sen_ifindex) + return false; + + /* Protocol */ + if (sf->sf_proto && sf->sf_proto != sen->sen_protocol) + return false; + + /* Port numbers, Source address, Dest address */ + for (i = 0; i < sf->sf_addrids_depth; i++) { + uint32_t mask = sf->sf_mask[i]; + + if ((sf->sf_addrids[i] & mask) != (sen->sen_addrids[i] & mask)) + return false; + } + + /* Session features */ + if (sf->sf_features) { + uint16_t sess_feat = sess_feature_type_bm(s); + + if ((sf->sf_features & sess_feat) == 0) + return false; + } + + /* Translation address and/or port */ + if (sf->sf_taf || sf->sf_tport) { + + /* Only SNAT and DNAT are supported for now */ + if (session_is_snat(s) || session_is_dnat(s)) { + uint32_t taddr; + uint16_t tport; + int rc; + + /* Get sessions translation address and port */ + rc = cmd_feature_nat_info(s, &taddr, &tport); + if (rc < 0) + return false; + + /* + * If a translation address filter is specified then + * compare with session translation address + */ + if (sf->sf_taf == AF_INET && + (taddr & sf->sf_tmask.s6_addr32[0]) != + sf->sf_taddr.s6_addr32[0]) + return false; + + /* + * If a translation port filter is specified then + * compare with session translation port. Both + * sf_tport and tport are in network order. + */ + if (sf->sf_tport && tport != sf->sf_tport) + return false; + } else + return false; + } + + return true; +} + +/* + * Does this session belong to the current batch? + * + * We consider both ascending and descending ordering. As such we use + * convenience variables a1, a2, b1, and b2 in the two comparison operations, + * and initialise them according to ascending/descending and whats being + * compared. + * + * For ascending, so we want target (a1) >= start (a2), and + * target (b1) <= end (b2) + * + * For descending, so we want target (a2) <= start (a1), and + * target (b2) >= end (b1) + */ +static bool +cmd_session_batch(const struct session *s, struct session_dump *sd) +{ + const struct sentry *sen = rcu_dereference(s->se_sen); + + if (sd->sd_orderby == SD_ORDERBY_NONE) { + /* Skip? */ + if (sd->sd_start-- > 0) + return false; + + /* Filled? */ + if (sd->sd_count-- <= 0) + return false; + + } else if (sd->sd_orderby == SD_ORDERBY_SADDR || + sd->sd_orderby == SD_ORDERBY_DADDR) { + + bool src = (sd->sd_orderby == SD_ORDERBY_SADDR); + const struct in6_addr *sentry_addr; + const void *saddr; + const void *daddr; + uint alen; + int af; + + if (sd->sd_af == 0) + return false; + + if (sd->sd_af == AF_INET && + (sen->sen_flags & SENTRY_IPv4) == 0) + return false; + + if (sd->sd_af == AF_INET6 && + (sen->sen_flags & SENTRY_IPv6) == 0) + return false; + + /* Extract addrs from the sentry */ + session_sentry_extract_addrs(sen, &af, &saddr, &daddr); + sentry_addr = src ? saddr : daddr; + + /* Is sentry < start addr? */ + alen = (af == AF_INET) ? 4 : 16; + + const void *a1, *a2, *b1, *b2; + + if (sd->sd_order == SD_ORDER_DESCENDING) { + a1 = &sd->sd_start_addr; + b1 = &sd->sd_end_addr; + a2 = b2 = sentry_addr; + } else { + a1 = b1 = sentry_addr; + a2 = &sd->sd_start_addr; + b2 = &sd->sd_end_addr; + } + + if (memcmp(a1, a2, alen) < 0 || memcmp(b1, b2, alen) > 0) + return false; + + } else if (sd->sd_orderby == SD_ORDERBY_TADDR) { + uint32_t taddr; + uint16_t tport; + int rc; + + rc = cmd_feature_nat_info(s, &taddr, &tport); + if (rc < 0) + return false; + + const void *a1, *a2, *b1, *b2; + + if (sd->sd_order == SD_ORDER_DESCENDING) { + a1 = &sd->sd_start_addr; + b1 = &sd->sd_end_addr; + a2 = b2 = &taddr; + } else { + a1 = b1 = &taddr; + a2 = &sd->sd_start_addr; + b2 = &sd->sd_end_addr; + } + + if (memcmp(a1, a2, sizeof(taddr)) < 0 || + memcmp(b1, b2, sizeof(taddr)) > 0) + return false; + + } else if (sd->sd_orderby == SD_ORDERBY_ID) { + uint64_t a1, a2, b1, b2; + + if (sd->sd_order == SD_ORDER_DESCENDING) { + a1 = sd->sd_start_id; + b1 = sd->sd_end_id; + a2 = b2 = s->se_id; + } else { + a1 = b1 = s->se_id; + a2 = sd->sd_start_id; + b2 = sd->sd_end_id; + } + if (a1 < a2 || b1 > b2) + return false; + + } else if (sd->sd_orderby == SD_ORDERBY_TO) { + sess_time_to_expire(s); + uint32_t a1, a2, b1, b2; + + if (sd->sd_order == SD_ORDER_DESCENDING) { + a1 = sd->sd_start_timeout; + b1 = sd->sd_end_timeout; + a2 = b2 = sess_time_to_expire(s); + } else { + a1 = b1 = sess_time_to_expire(s); + a2 = sd->sd_start_timeout; + b2 = sd->sd_end_timeout; + } + if (a1 < a2 || b1 > b2) + return false; + + } + return true; +} + +/* + * Callback for session table walk + */ +static int cmd_session_show_cb(struct session *s, void *data) +{ + struct session_dump *sd = data; + struct session_filter *sf = sd->sd_sf; + + if (!cmd_session_filter(s, sf)) + return 0; + + if (!cmd_session_batch(s, sd)) + return 0; + + cmd_session_json(s, sd->sd_json, sd->sd_features, true); + return 0; +} + +static int cmd_session_show(struct session_dump *sd) +{ + json_writer_t *json; + + json = jsonw_new(sd->sd_fp); + if (!json) + return -EINVAL; + sd->sd_json = json; + + jsonw_name(json, "sessions"); + jsonw_start_array(json); + + session_table_walk(cmd_session_show_cb, sd); + + jsonw_end_array(json); + jsonw_destroy(&json); + + return 0; +} + +/* + * Callback for session table summary walk + */ +static int cmd_session_show_summary_cb(struct session *s, void *data) +{ + const struct sentry *sen = rcu_dereference(s->se_sen); + struct session_summary *ss = data; + + ss->ss_total++; + + /* Address family */ + if ((sen->sen_flags & SENTRY_IPv4) != 0) + ss->ss_ip++; + else if ((sen->sen_flags & SENTRY_IPv6) != 0) + ss->ss_ip6++; + + if (session_is_in(s)) + ss->ss_in++; + else if (session_is_out(s)) + ss->ss_out++; + + if (session_is_snat(s)) + ss->ss_ft_snat++; + else if (session_is_dnat(s)) + ss->ss_ft_dnat++; + else if (session_is_nat64(s)) + ss->ss_ft_nat64++; + else if (session_is_nat46(s)) + ss->ss_ft_nat46++; + else if (session_is_alg(s)) + ss->ss_ft_alg++; + else if (session_is_app(s)) + ss->ss_ft_app++; + else + ss->ss_ft_other++; + + struct sess_summary_proto *sp; + + if (sen->sen_protocol == IPPROTO_TCP) + sp = &ss->ss_tcp; + else if (sen->sen_protocol == IPPROTO_UDP) + sp = &ss->ss_udp; + else + sp = &ss->ss_other; + + sp->sp_total++; + + switch (s->se_gen_state) { + case SESSION_STATE_NONE: + case SESSION_STATE_CLOSED: + sp->sp_closed++; + break; + case SESSION_STATE_NEW: + sp->sp_opening++; + break; + case SESSION_STATE_ESTABLISHED: + sp->sp_estbd++; + break; + case SESSION_STATE_TERMINATING: + sp->sp_closing++; + break; + }; + + return 0; +} + +static void +cmd_sess_summary_proto(json_writer_t *json, const char *name, + struct sess_summary_proto *sp) +{ + jsonw_name(json, name); + jsonw_start_object(json); + + jsonw_uint_field(json, "total", sp->sp_total); + jsonw_uint_field(json, "closed", sp->sp_closed); + jsonw_uint_field(json, "opening", sp->sp_opening); + jsonw_uint_field(json, "established", sp->sp_estbd); + jsonw_uint_field(json, "closing", sp->sp_closing); + + jsonw_end_object(json); +} + +static int cmd_session_show_summary(struct session_dump *sd) +{ + json_writer_t *json; + struct session_summary summary = {0}; + struct session_summary *ss = &summary; + + json = jsonw_new(sd->sd_fp); + if (!json) + return -EINVAL; + + session_table_walk(cmd_session_show_summary_cb, ss); + + jsonw_name(json, "summary"); + jsonw_start_object(json); + + jsonw_uint_field(json, "total", ss->ss_total); + + jsonw_name(json, "address-family"); + jsonw_start_object(json); + jsonw_uint_field(json, "ip", ss->ss_ip); + jsonw_uint_field(json, "ip6", ss->ss_ip6); + jsonw_end_object(json); + + jsonw_name(json, "direction"); + jsonw_start_object(json); + jsonw_uint_field(json, "in", ss->ss_in); + jsonw_uint_field(json, "out", ss->ss_out); + jsonw_end_object(json); + + jsonw_name(json, "protocol"); + jsonw_start_object(json); + cmd_sess_summary_proto(json, "tcp", &ss->ss_tcp); + cmd_sess_summary_proto(json, "udp", &ss->ss_udp); + cmd_sess_summary_proto(json, "other", &ss->ss_other); + jsonw_end_object(json); + + jsonw_name(json, "feature"); + jsonw_start_object(json); + jsonw_uint_field(json, "snat", ss->ss_ft_snat); + jsonw_uint_field(json, "dnat", ss->ss_ft_dnat); + jsonw_uint_field(json, "nat64", ss->ss_ft_nat64); + jsonw_uint_field(json, "nat46", ss->ss_ft_nat46); + jsonw_uint_field(json, "alg", ss->ss_ft_alg); + jsonw_uint_field(json, "app", ss->ss_ft_app); + jsonw_uint_field(json, "other", ss->ss_ft_other); + jsonw_end_object(json); + + jsonw_end_object(json); + jsonw_destroy(&json); + + return 0; +} + +/* + * cmd_op_show_dp_sessions + */ +int cmd_op_show_dp_sessions(FILE *f, int argc, char **argv) +{ + struct session_filter sf; + struct session_dump sd; + int rc; + + memset(&sf, 0, sizeof(sf)); + memset(&sd, 0, sizeof(sd)); + + sd.sd_fp = f; + sd.sd_sf = &sf; + + /* Return features in json unless 'brief' option is specified */ + sd.sd_features = true; + + rc = cmd_op_parse(f, argc, argv, &sf, &sd); + if (rc < 0) + return rc; + + if (sd.sd_summary) { + cmd_session_show_summary(&sd); + return 0; + } + + cmd_session_show(&sd); + + return 0; +} + +/* + * Session walker callback function for returning a list of items from the + * sessions. + */ +static int cmd_session_json_list(struct session *s, void *data) +{ + struct sentry *sen = rcu_dereference(s->se_sen); + struct session_dump *sd = data; + struct session_filter *sf = sd->sd_sf; + + if (!cmd_session_filter(s, sf)) + return 0; + + switch (sd->sd_orderby) { + case SD_ORDERBY_SADDR: + case SD_ORDERBY_DADDR: { + const struct in6_addr *addr; + const void *saddr; + const void *daddr; + int af; + + /* Extract addrs from the sentry */ + session_sentry_extract_addrs(sen, &af, &saddr, &daddr); + addr = (sd->sd_orderby == SD_ORDERBY_SADDR) ? saddr : daddr; + + /* + * IP addresses are returned as uints. IPv6 addrs are + * returned as strings. + */ + if (af == AF_INET) + jsonw_uint(sd->sd_json, ntohl(addr->s6_addr32[0])); + else { + char addr_str[INET6_ADDRSTRLEN]; + + inet_ntop(AF_INET6, addr, addr_str, sizeof(addr_str)); + jsonw_string(sd->sd_json, addr_str); + } + break; + } + case SD_ORDERBY_TADDR: { + uint32_t taddr; + uint16_t tport; + int rc; + + rc = cmd_feature_nat_info(s, &taddr, &tport); + if (rc < 0) + return 0; + + jsonw_uint(sd->sd_json, ntohl(taddr)); + + break; + } + case SD_ORDERBY_ID: + jsonw_uint(sd->sd_json, s->se_id); + break; + + case SD_ORDERBY_TO: + jsonw_uint(sd->sd_json, sess_time_to_expire(s)); + break; + + case SD_ORDERBY_NONE: + break; + }; + + return 0; +} + +/* + * cmd_op_list + */ +int cmd_op_list(FILE *f, int argc, char **argv) +{ + struct session_filter sf; + struct session_dump sd; + int rc; + + memset(&sf, 0, sizeof(sf)); + memset(&sd, 0, sizeof(sd)); + + sd.sd_fp = f; + sd.sd_sf = &sf; + + /* Default to return list of source addresses */ + sd.sd_orderby = SD_ORDERBY_SADDR; + + rc = cmd_op_parse(f, argc, argv, &sf, &sd); + if (rc < 0) + return rc; + + json_writer_t *json; + + json = jsonw_new(sd.sd_fp); + if (!json) + return -EINVAL; + sd.sd_json = json; + + jsonw_name(json, "list"); + jsonw_start_array(json); + + session_table_walk(cmd_session_json_list, &sd); + + jsonw_end_array(json); + jsonw_destroy(&json); + + return 0; +} + +/* + * Callback for session table walk by 'clear' command + */ +static int cmd_session_clear_cb(struct session *s, void *data) +{ + struct session_filter *sf = data; + + if (!cmd_session_filter(s, sf)) + return 0; + + session_expire(s, NULL); + return 0; +} + +/* + * cmd_op_clear_dp_sessions + */ +int cmd_op_clear_dp_sessions(FILE *f, int argc, char **argv) +{ + struct session_filter sf; + int rc; + + memset(&sf, 0, sizeof(sf)); + + rc = cmd_op_parse(f, argc, argv, &sf, NULL); + if (rc < 0) + return rc; + + session_table_walk(cmd_session_clear_cb, &sf); + + return 0; +} diff --git a/src/session/session_op.h b/src/session/session_op.h new file mode 100644 index 00000000..4ad50878 --- /dev/null +++ b/src/session/session_op.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef SESSION_OP_H +#define SESSION_OP_H + +#include +#include +#include +#include +#include +#include + +uint16_t sess_feature_type_bm(const struct session *s); +int cmd_op_list(FILE *f, int argc, char **argv); +int cmd_op_show_dp_sessions(FILE *f, int argc, char **argv); +int cmd_op_clear_dp_sessions(FILE *f, int argc, char **argv); + +#endif /* SESSION_OP_H */ diff --git a/src/session/session_private.h b/src/session/session_private.h index f16852c9..12fca931 100644 --- a/src/session/session_private.h +++ b/src/session/session_private.h @@ -1,6 +1,6 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ diff --git a/src/session/session_watch.c b/src/session/session_watch.c new file mode 100644 index 00000000..387a7ecb --- /dev/null +++ b/src/session/session_watch.c @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#include +#include +#include +#include +#include +#include + +#include "dp_session.h" +#include "npf/npf_state.h" +#include "session/session.h" +#include "session/session_watch.h" + +/* + * Hold session watch pointer. + */ +struct session_watch_info { + struct session_watch *watch; + bool watch_on; +}; + +static struct session_watch_info watch_ctx; + +int dp_session_watch_register(struct session_watch *se_watch) +{ + if (!rcu_cmpxchg_pointer(&watch_ctx.watch, NULL, se_watch)) { + watch_ctx.watch_on = true; + return 0; + } + + return -EBUSY; +} + +int dp_session_watch_unregister(int watcher_id __unused) +{ + struct session_watch **p = &watch_ctx.watch; + uint8_t old = watch_ctx.watch_on; + + watch_ctx.watch_on = false; + if (rcu_xchg_pointer(p, NULL) != NULL) + return 0; + watch_ctx.watch_on = old; + return -ENOENT; +} + +bool is_watch_on(void) +{ + return watch_ctx.watch_on; +} + +static struct session_watch *session_watch_get(void) +{ + struct session_watch *p = rcu_dereference(watch_ctx.watch); + return p; +} + +static bool check_session_type(struct session *session, unsigned int flags) +{ + if (dp_is_session_type(flags, FW) && session_is_fw(session)) + return true; + if (dp_is_session_type(flags, NAT) && session_is_nat(session)) + return true; + if (dp_is_session_type(flags, NAT64) && session_is_nat64(session)) + return true; + if (dp_is_session_type(flags, NAT46) && session_is_nat46(session)) + return true; + if (dp_is_session_type(flags, ALG) && session_is_alg(session)) + return true; + + return false; +} + +/* + * call notfication function for established sessions. + * The call back function is called unconditionally. + */ +void session_do_watch(struct session *session, enum dp_session_hook hook) +{ + struct session_watch *wt = session_watch_get(); + + if (wt == NULL) + return; + + if (!check_session_type(session, wt->types)) + return; + + if (wt->fn) + wt->fn(session, hook, wt->data); +} + +struct dp_session_walk_data { + unsigned int types; + dp_session_walk_t *fn; + void *data; +}; + +static int session_walk_cb(struct session *session, void *data) +{ + struct dp_session_walk_data *wd = (struct dp_session_walk_data *)data; + + if (!check_session_type(session, wd->types)) + return 0; + return wd->fn(session, wd->data); +} + +int dp_session_table_walk(dp_session_walk_t *fn, void *data, unsigned int types) +{ + struct dp_session_walk_data wd = { .types = types, + .fn = fn, + .data = data, + }; + return session_table_walk(session_walk_cb, &wd); +} + diff --git a/src/session/session_watch.h b/src/session/session_watch.h new file mode 100644 index 00000000..568ed918 --- /dev/null +++ b/src/session/session_watch.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#ifndef SESSION_WATCH_H +#define SESSION_WATCH_H + +#include +#include "dp_session.h" + +bool is_watch_on(void); + +/* + * call notfication function for established sessions. + * skip closed/closing sessions if the sessions were never + * + * Skip session with pending acks. + */ +void session_do_watch(struct session *session, enum dp_session_hook hook); +#endif + diff --git a/src/sfp.c b/src/sfp.c index 2d046a18..9b8c4356 100644 --- a/src/sfp.c +++ b/src/sfp.c @@ -54,6 +54,7 @@ #include #include #include +#include struct _nv { int v; @@ -319,7 +320,6 @@ static struct _nv warning_flags[] = { { 0x00, NULL } }; - static struct _nv rx_pwr_aw_chan_upper_flags[] = { { 0x4, "rx_power_low_warn" }, { 0x5, "rx_power_high_warn" }, @@ -386,6 +386,41 @@ static struct _nv voltage_alarm_warn_flags[] = { { 0x00, NULL } }; +#define SFP_CALIB_CONST_RX_PWR_SIZE 4 +#define SFP_CALIB_CONST_RX_PWR_CNT 5 +#define SFP_CALIB_CONST_SL_OFF_START 0x4c +#define SFP_CALIB_CONST_SL_OFF_SIZE 2 + +/* + * Type of calibration constant + * The enum values are in the order in which the + * entries appear in EEPROM + */ +enum sfp_calib_const_type { + SFP_CALIB_CONST_LASER_BIAS, + SFP_CALIB_CONST_TX_PWR, + SFP_CALIB_CONST_TEMPERATURE, + SFP_CALIB_CONST_VOLTAGE, + SFP_CALIB_CONST_MAX +}; + +static const char *sfp_calib_const_strs[SFP_CALIB_CONST_MAX] = { + "tx_laser", + "tx_pwr", + "temperature", + "voltage" +}; + +struct slope_off { + float slope; + int16_t offset; +}; + +struct sfp_calibration_constants { + union ieee754_float rx_pwr[SFP_CALIB_CONST_RX_PWR_CNT]; + struct slope_off slope_offs[SFP_CALIB_CONST_MAX]; +}; + /* * Retrieves a section of eeprom data for parsing & display */ @@ -1041,15 +1076,21 @@ print_qsfp_vendor(const struct rte_dev_eeprom_info *eeprom_info, */ static void convert_sff_temp(json_writer_t *wr, const char *field_name, - uint8_t *xbuf) + const uint8_t *xbuf, + const struct sfp_calibration_constants *c_consts) { int16_t temp; double d; + const struct slope_off *so; temp = (xbuf[0] > 0x7f) ? xbuf[0] - (0xff + 1) : xbuf[0]; d = (double)temp + (double)xbuf[1] / 256; + if (c_consts) { + so = &c_consts->slope_offs[SFP_CALIB_CONST_TEMPERATURE]; + d = (so->slope * d) + so->offset; + } jsonw_float_field(wr, field_name, d); } @@ -1059,11 +1100,18 @@ convert_sff_temp(json_writer_t *wr, const char *field_name, */ static void convert_sff_voltage(json_writer_t *wr, const char *field_name, - uint8_t *xbuf) + const uint8_t *xbuf, + const struct sfp_calibration_constants *c_consts) { double d; + const struct slope_off *so; d = (double)((xbuf[0] << 8) | xbuf[1]); + + if (c_consts) { + so = &c_consts->slope_offs[SFP_CALIB_CONST_VOLTAGE]; + d = (so->slope * d) + so->offset; + } jsonw_float_field(wr, field_name, d / 10000); } @@ -1073,46 +1121,72 @@ convert_sff_voltage(json_writer_t *wr, const char *field_name, */ static void convert_sff_power(json_writer_t *wr, const char *field_name, - uint8_t *xbuf) + const uint8_t *xbuf, bool rx, + const struct sfp_calibration_constants *c_consts) { - double mW; + double mW, tmp_mW; + int i; + + tmp_mW = (xbuf[0] << 8) + xbuf[1]; + + if (c_consts) { + if (rx) { + mW = (c_consts->rx_pwr[0].f + + c_consts->rx_pwr[1].f * tmp_mW); + for (i = 2; i < SFP_CALIB_CONST_RX_PWR_CNT; i++) + mW += c_consts->rx_pwr[i].f * tmp_mW * + pow(10, i); + } else { + const struct slope_off *so = + &c_consts->slope_offs[SFP_CALIB_CONST_TX_PWR]; - mW = (xbuf[0] << 8) + xbuf[1]; + mW = (so->slope * tmp_mW) + so->offset; + } + } else + mW = tmp_mW; jsonw_float_field(wr, field_name, mW / 10000); } static void convert_sff_bias(json_writer_t *wr, const char *field_name, - uint8_t *xbuf) + const uint8_t *xbuf, + const struct sfp_calibration_constants *c_consts) { double mA; + const struct slope_off *so; mA = (xbuf[0] << 8) + xbuf[1]; + if (c_consts) { + so = &c_consts->slope_offs[SFP_CALIB_CONST_LASER_BIAS]; + mA = (so->slope * mA) + so->offset; + } jsonw_float_field(wr, field_name, mA / 500); } static void print_sfp_temp(const struct rte_dev_eeprom_info *eeprom_info, + const struct sfp_calibration_constants *c_consts, json_writer_t *wr) { uint8_t xbuf[2]; memset(xbuf, 0, sizeof(xbuf)); get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_TEMP, 2, xbuf); - convert_sff_temp(wr, "temperature_C", xbuf); + convert_sff_temp(wr, "temperature_C", xbuf, c_consts); } static void print_sfp_voltage(const struct rte_dev_eeprom_info *eeprom_info, + const struct sfp_calibration_constants *c_consts, json_writer_t *wr) { uint8_t xbuf[2]; memset(xbuf, 0, sizeof(xbuf)); get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_VCC, 2, xbuf); - convert_sff_voltage(wr, "voltage_V", xbuf); + convert_sff_voltage(wr, "voltage_V", xbuf, c_consts); } static void @@ -1214,6 +1288,95 @@ print_qsfp_encoding(const struct rte_dev_eeprom_info *eeprom_info, convert_sff_8436_encoding(wr, xbuf); } +static void +get_sfp_calibration_constants(const struct rte_dev_eeprom_info *eeprom_info, + struct sfp_calibration_constants *c_consts, + json_writer_t *wr) +{ + uint16_t i, offset, cursor; + uint8_t xbuf[4]; + union ieee754_float rx_pwr; + char json_field_name[30], json_str[40]; + + jsonw_name(wr, "raw_calibration_data"); + jsonw_start_object(wr); + cursor = SFF_8472_RX_POWER4; + for (i = 0; i < SFP_CALIB_CONST_RX_PWR_CNT; i++) { + get_eeprom_data(eeprom_info, SFF_8472_DIAG, + cursor, SFP_CALIB_CONST_RX_PWR_SIZE, + xbuf); + snprintf(json_field_name, 30, "%2d: rx_pwr_%d", + cursor, SFP_CALIB_CONST_MAX - i); + snprintf(json_str, 40, "%02x %02x %02x %02x", + xbuf[0], xbuf[1], xbuf[2], xbuf[3]); + jsonw_string_field(wr, json_field_name, json_str); + + rx_pwr.ieee.negative = (xbuf[0] & 0x80) >> 7; + rx_pwr.ieee.exponent = (((xbuf[0] & 0x7f) << 1) | + ((xbuf[1] & 0x80) >> 7)); + rx_pwr.ieee.mantissa += (((xbuf[1] & 0x7f) << 16) | + (xbuf[2] << 8) | xbuf[3]); + + c_consts->rx_pwr[SFP_CALIB_CONST_MAX - i] = rx_pwr; + cursor += SFP_CALIB_CONST_RX_PWR_SIZE; + } + + cursor = SFF_8472_TX_I_SLOPE; + for (i = 0; i < SFP_CALIB_CONST_MAX; i++) { + get_eeprom_data(eeprom_info, SFF_8472_DIAG, + cursor, SFP_CALIB_CONST_SL_OFF_SIZE, + xbuf); + + snprintf(json_field_name, 30, "%02d: %s_slope", + cursor, sfp_calib_const_strs[i]); + snprintf(json_str, 40, "%02x %02x", xbuf[0], xbuf[1]); + jsonw_string_field(wr, json_field_name, json_str); + + c_consts->slope_offs[i].slope = (float)xbuf[0] + + (float)xbuf[1]/256; + cursor += SFP_CALIB_CONST_SL_OFF_SIZE; + + get_eeprom_data(eeprom_info, SFF_8472_DIAG, + cursor, SFP_CALIB_CONST_SL_OFF_SIZE, + (uint8_t *)&offset); + + snprintf(json_field_name, 30, "%02d: %s_offset", + cursor, sfp_calib_const_strs[i]); + snprintf(json_str, 40, "%02x %02x", ((uint8_t *)&offset)[0], + ((uint8_t *)&offset)[1]); + jsonw_string_field(wr, json_field_name, json_str); + + c_consts->slope_offs[i].offset = ntohs(offset); + cursor += SFP_CALIB_CONST_SL_OFF_SIZE; + } + jsonw_end_object(wr); +} + +static void +print_sfp_calibration_constants(struct sfp_calibration_constants *c_consts, + json_writer_t *wr) +{ +#define CONST_STR_LEN 20 + char const_str[CONST_STR_LEN]; + uint8_t i; + + for (i = 0; i < SFP_CALIB_CONST_RX_PWR_CNT; i++) { + snprintf(const_str, CONST_STR_LEN, "rx_pwr_%1d", i); + jsonw_float_field(wr, const_str, c_consts->rx_pwr[i].f); + } + + for (i = 0; i < SFP_CALIB_CONST_MAX; i++) { + snprintf(const_str, CONST_STR_LEN, "%s_slope", + sfp_calib_const_strs[i]); + jsonw_float_field(wr, const_str, + c_consts->slope_offs[i].slope); + snprintf(const_str, CONST_STR_LEN, "%s_offset", + sfp_calib_const_strs[i]); + jsonw_int_field(wr, const_str, + c_consts->slope_offs[i].offset); + } +} + static void print_qsfp_temp(const struct rte_dev_eeprom_info *eeprom_info, json_writer_t *wr) @@ -1222,7 +1385,7 @@ print_qsfp_temp(const struct rte_dev_eeprom_info *eeprom_info, memset(xbuf, 0, sizeof(xbuf)); get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8436_TEMP, 2, xbuf); - convert_sff_temp(wr, "temperature_C", xbuf); + convert_sff_temp(wr, "temperature_C", xbuf, NULL); } static void @@ -1233,40 +1396,43 @@ print_qsfp_voltage(const struct rte_dev_eeprom_info *eeprom_info, memset(xbuf, 0, sizeof(xbuf)); get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8436_VCC, 2, xbuf); - convert_sff_voltage(wr, "voltage_V", xbuf); + convert_sff_voltage(wr, "voltage_V", xbuf, NULL); } static void print_sfp_rx_power(const struct rte_dev_eeprom_info *eeprom_info, + const struct sfp_calibration_constants *c_consts, json_writer_t *wr) { uint8_t xbuf[2]; memset(xbuf, 0, sizeof(xbuf)); get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_RX_POWER, 2, xbuf); - convert_sff_power(wr, "rx_power_mW", xbuf); + convert_sff_power(wr, "rx_power_mW", xbuf, true, c_consts); } static void print_sfp_tx_power(const struct rte_dev_eeprom_info *eeprom_info, + const struct sfp_calibration_constants *c_consts, json_writer_t *wr) { uint8_t xbuf[2]; memset(xbuf, 0, sizeof(xbuf)); get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_TX_POWER, 2, xbuf); - convert_sff_power(wr, "tx_power_mW", xbuf); + convert_sff_power(wr, "tx_power_mW", xbuf, false, c_consts); } static void print_sfp_laser_bias(const struct rte_dev_eeprom_info *eeprom_info, + const struct sfp_calibration_constants *c_consts, json_writer_t *wr) { uint8_t xbuf[2]; memset(xbuf, 0, sizeof(xbuf)); get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_TX_BIAS, 2, xbuf); - convert_sff_bias(wr, "laser_bias", xbuf); + convert_sff_bias(wr, "laser_bias", xbuf, c_consts); } static void @@ -1278,7 +1444,7 @@ print_qsfp_rx_power(const struct rte_dev_eeprom_info *eeprom_info, memset(xbuf, 0, sizeof(xbuf)); get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8436_RX_CH1_MSB + (chan * 2), 2, xbuf); - convert_sff_power(wr, "rx_power_mW", xbuf); + convert_sff_power(wr, "rx_power_mW", xbuf, true, NULL); } static void @@ -1290,7 +1456,7 @@ print_qsfp_tx_power(const struct rte_dev_eeprom_info *eeprom_info, memset(xbuf, 0, sizeof(xbuf)); get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8436_TX_CH1_MSB + (chan * 2), 2, xbuf); - convert_sff_power(wr, "tx_power_mW", xbuf); + convert_sff_power(wr, "tx_power_mW", xbuf, false, NULL); } static void @@ -1302,7 +1468,7 @@ print_qsfp_laser_bias(const struct rte_dev_eeprom_info *eeprom_info, memset(xbuf, 0, sizeof(xbuf)); get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8436_TX_BIAS_CH1_MSB + (chan * 2), 2, xbuf); - convert_sff_power(wr, "laser_bias", xbuf); + convert_sff_bias(wr, "laser_bias", xbuf, NULL); } static void @@ -1344,16 +1510,16 @@ print_qsfp_temp_thresholds(const struct rte_dev_eeprom_info *eeprom_info, memset(xbuf, 0, sizeof(xbuf)); if (!get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8636_TEMP_HIGH_ALARM, 2, xbuf)) - convert_sff_temp(wr, "high_temp_alarm_thresh", xbuf); + convert_sff_temp(wr, "high_temp_alarm_thresh", xbuf, NULL); if (!get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8636_TEMP_LOW_ALARM, 2, xbuf)) - convert_sff_temp(wr, "low_temp_alarm_thresh", xbuf); + convert_sff_temp(wr, "low_temp_alarm_thresh", xbuf, NULL); if (!get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8636_TEMP_HIGH_WARN, 2, xbuf)) - convert_sff_temp(wr, "high_temp_warn_thresh", xbuf); + convert_sff_temp(wr, "high_temp_warn_thresh", xbuf, NULL); if (!get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8636_TEMP_LOW_WARN, 2, xbuf)) - convert_sff_temp(wr, "low_temp_warn_thresh", xbuf); + convert_sff_temp(wr, "low_temp_warn_thresh", xbuf, NULL); } static void @@ -1365,16 +1531,20 @@ print_qsfp_voltage_thresholds(const struct rte_dev_eeprom_info *eeprom_info, memset(xbuf, 0, sizeof(xbuf)); if (!get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8636_VOLTAGE_HIGH_ALARM, 2, xbuf)) - convert_sff_voltage(wr, "high_voltage_alarm_thresh", xbuf); + convert_sff_voltage(wr, "high_voltage_alarm_thresh", + xbuf, NULL); if (!get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8636_VOLTAGE_LOW_ALARM, 2, xbuf)) - convert_sff_voltage(wr, "low_voltage_alarm_thresh", xbuf); + convert_sff_voltage(wr, "low_voltage_alarm_thresh", + xbuf, NULL); if (!get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8636_VOLTAGE_HIGH_WARN, 2, xbuf)) - convert_sff_voltage(wr, "high_voltage_warn_thresh", xbuf); + convert_sff_voltage(wr, "high_voltage_warn_thresh", + xbuf, NULL); if (!get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8636_VOLTAGE_LOW_WARN, 2, xbuf)) - convert_sff_voltage(wr, "low_voltage_warn_thresh", xbuf); + convert_sff_voltage(wr, "low_voltage_warn_thresh", + xbuf, NULL); } static void @@ -1386,16 +1556,16 @@ print_qsfp_bias_thresholds(const struct rte_dev_eeprom_info *eeprom_info, memset(xbuf, 0, sizeof(xbuf)); if (!get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8636_TX_BIAS_HIGH_ALARM, 2, xbuf)) - convert_sff_bias(wr, "high_bias_alarm_thresh", xbuf); + convert_sff_bias(wr, "high_bias_alarm_thresh", xbuf, NULL); if (!get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8636_TX_BIAS_LOW_ALARM, 2, xbuf)) - convert_sff_bias(wr, "low_bias_alarm_thresh", xbuf); + convert_sff_bias(wr, "low_bias_alarm_thresh", xbuf, NULL); if (!get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8636_TX_BIAS_HIGH_WARN, 2, xbuf)) - convert_sff_bias(wr, "high_bias_warn_thresh", xbuf); + convert_sff_bias(wr, "high_bias_warn_thresh", xbuf, NULL); if (!get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8636_TX_BIAS_LOW_WARN, 2, xbuf)) - convert_sff_bias(wr, "low_bias_warn_thresh", xbuf); + convert_sff_bias(wr, "low_bias_warn_thresh", xbuf, NULL); } static void @@ -1407,16 +1577,20 @@ print_qsfp_tx_power_thresholds(const struct rte_dev_eeprom_info *eeprom_info, memset(xbuf, 0, sizeof(xbuf)); if (!get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8636_TX_POWER_HIGH_ALARM, 2, xbuf)) - convert_sff_power(wr, "high_tx_power_alarm_thresh", xbuf); + convert_sff_power(wr, "high_tx_power_alarm_thresh", xbuf, + false, NULL); if (!get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8636_TX_POWER_LOW_ALARM, 2, xbuf)) - convert_sff_power(wr, "low_tx_power_alarm_thresh", xbuf); + convert_sff_power(wr, "low_tx_power_alarm_thresh", xbuf, + false, NULL); if (!get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8636_TX_POWER_HIGH_WARN, 2, xbuf)) - convert_sff_power(wr, "high_tx_power_warn_thresh", xbuf); + convert_sff_power(wr, "high_tx_power_warn_thresh", xbuf, + false, NULL); if (!get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8636_TX_POWER_LOW_WARN, 2, xbuf)) - convert_sff_power(wr, "low_tx_power_warn_thresh", xbuf); + convert_sff_power(wr, "low_tx_power_warn_thresh", xbuf, + false, NULL); } static void @@ -1428,16 +1602,20 @@ print_qsfp_rx_power_thresholds(const struct rte_dev_eeprom_info *eeprom_info, memset(xbuf, 0, sizeof(xbuf)); if (!get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8636_RX_POWER_HIGH_ALARM, 2, xbuf)) - convert_sff_power(wr, "high_rx_power_alarm_thresh", xbuf); + convert_sff_power(wr, "high_rx_power_alarm_thresh", xbuf, + true, NULL); if (!get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8636_RX_POWER_LOW_ALARM, 2, xbuf)) - convert_sff_power(wr, "low_rx_power_alarm_thresh", xbuf); + convert_sff_power(wr, "low_rx_power_alarm_thresh", xbuf, + true, NULL); if (!get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8636_RX_POWER_HIGH_WARN, 2, xbuf)) - convert_sff_power(wr, "high_rx_power_warn_thresh", xbuf); + convert_sff_power(wr, "high_rx_power_warn_thresh", xbuf, + true, NULL); if (!get_eeprom_data(eeprom_info, SFF_8436_BASE, SFF_8636_RX_POWER_LOW_WARN, 2, xbuf)) - convert_sff_power(wr, "low_rx_power_warn_thresh", xbuf); + convert_sff_power(wr, "low_rx_power_warn_thresh", xbuf, + true, NULL); } static void @@ -1460,16 +1638,16 @@ print_temp_thresholds(const struct rte_dev_eeprom_info *eeprom_info, memset(xbuf, 0, sizeof(xbuf)); if (!get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_TEMP_HIGH_ALM, 2, xbuf)) - convert_sff_temp(wr, "high_temp_alarm_thresh", xbuf); + convert_sff_temp(wr, "high_temp_alarm_thresh", xbuf, NULL); if (!get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_TEMP_LOW_ALM, 2, xbuf)) - convert_sff_temp(wr, "low_temp_alarm_thresh", xbuf); + convert_sff_temp(wr, "low_temp_alarm_thresh", xbuf, NULL); if (!get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_TEMP_HIGH_WARN, 2, xbuf)) - convert_sff_temp(wr, "high_temp_warn_thresh", xbuf); + convert_sff_temp(wr, "high_temp_warn_thresh", xbuf, NULL); if (!get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_TEMP_LOW_WARN, 2, xbuf)) - convert_sff_temp(wr, "low_temp_warn_thresh", xbuf); + convert_sff_temp(wr, "low_temp_warn_thresh", xbuf, NULL); } static void @@ -1481,16 +1659,20 @@ print_voltage_thresholds(const struct rte_dev_eeprom_info *eeprom_info, memset(xbuf, 0, sizeof(xbuf)); if (!get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_VOLTAGE_HIGH_ALM, 2, xbuf)) - convert_sff_voltage(wr, "high_voltage_alarm_thresh", xbuf); + convert_sff_voltage(wr, "high_voltage_alarm_thresh", xbuf, + NULL); if (!get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_VOLTAGE_LOW_ALM, 2, xbuf)) - convert_sff_voltage(wr, "low_voltage_alarm_thresh", xbuf); + convert_sff_voltage(wr, "low_voltage_alarm_thresh", xbuf, + NULL); if (!get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_VOLTAGE_HIGH_WARN, 2, xbuf)) - convert_sff_voltage(wr, "high_voltage_warn_thresh", xbuf); + convert_sff_voltage(wr, "high_voltage_warn_thresh", xbuf, + NULL); if (!get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_VOLTAGE_LOW_WARN, 2, xbuf)) - convert_sff_voltage(wr, "low_voltage_warn_thresh", xbuf); + convert_sff_voltage(wr, "low_voltage_warn_thresh", xbuf, + NULL); } static void @@ -1502,16 +1684,16 @@ print_bias_thresholds(const struct rte_dev_eeprom_info *eeprom_info, memset(xbuf, 0, sizeof(xbuf)); if (!get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_BIAS_HIGH_ALM, 2, xbuf)) - convert_sff_bias(wr, "high_bias_alarm_thresh", xbuf); + convert_sff_bias(wr, "high_bias_alarm_thresh", xbuf, NULL); if (!get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_BIAS_LOW_ALM, 2, xbuf)) - convert_sff_bias(wr, "low_bias_alarm_thresh", xbuf); + convert_sff_bias(wr, "low_bias_alarm_thresh", xbuf, NULL); if (!get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_BIAS_HIGH_WARN, 2, xbuf)) - convert_sff_bias(wr, "high_bias_warn_thresh", xbuf); + convert_sff_bias(wr, "high_bias_warn_thresh", xbuf, NULL); if (!get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_BIAS_LOW_WARN, 2, xbuf)) - convert_sff_bias(wr, "low_bias_warn_thresh", xbuf); + convert_sff_bias(wr, "low_bias_warn_thresh", xbuf, NULL); } static void @@ -1523,16 +1705,20 @@ print_tx_power_thresholds(const struct rte_dev_eeprom_info *eeprom_info, memset(xbuf, 0, sizeof(xbuf)); if (!get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_TX_POWER_HIGH_ALM, 2, xbuf)) - convert_sff_power(wr, "high_tx_power_alarm_thresh", xbuf); + convert_sff_power(wr, "high_tx_power_alarm_thresh", xbuf, + false, NULL); if (!get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_TX_POWER_LOW_ALM, 2, xbuf)) - convert_sff_power(wr, "low_tx_power_alarm_thresh", xbuf); + convert_sff_power(wr, "low_tx_power_alarm_thresh", xbuf, + false, NULL); if (!get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_TX_POWER_HIGH_WARN, 2, xbuf)) - convert_sff_power(wr, "high_tx_power_warn_thresh", xbuf); + convert_sff_power(wr, "high_tx_power_warn_thresh", xbuf, + false, NULL); if (!get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_TX_POWER_LOW_WARN, 2, xbuf)) - convert_sff_power(wr, "low_tx_power_warn_thresh", xbuf); + convert_sff_power(wr, "low_tx_power_warn_thresh", xbuf, + false, NULL); } static void @@ -1544,16 +1730,20 @@ print_rx_power_thresholds(const struct rte_dev_eeprom_info *eeprom_info, memset(xbuf, 0, sizeof(xbuf)); if (!get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_RX_POWER_HIGH_ALM, 2, xbuf)) - convert_sff_power(wr, "high_rx_power_alarm_thresh", xbuf); + convert_sff_power(wr, "high_rx_power_alarm_thresh", xbuf, + false, NULL); if (!get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_RX_POWER_LOW_ALM, 2, xbuf)) - convert_sff_power(wr, "low_rx_power_alarm_thresh", xbuf); + convert_sff_power(wr, "low_rx_power_alarm_thresh", xbuf, + false, NULL); if (!get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_RX_POWER_HIGH_WARN, 2, xbuf)) - convert_sff_power(wr, "high_rx_power_warn_thresh", xbuf); + convert_sff_power(wr, "high_rx_power_warn_thresh", xbuf, + false, NULL); if (!get_eeprom_data(eeprom_info, SFF_8472_DIAG, SFF_8472_RX_POWER_LOW_WARN, 2, xbuf)) - convert_sff_power(wr, "low_rx_power_warn_thresh", xbuf); + convert_sff_power(wr, "low_rx_power_warn_thresh", xbuf, + false, NULL); } static void @@ -1568,7 +1758,7 @@ print_sfp_thresholds(const struct rte_dev_eeprom_info *eeprom_info, } static void -convert_aw_flags(json_writer_t *wr, struct _nv *x, uint8_t *xbuf) +convert_aw_flags(json_writer_t *wr, struct _nv *x, const uint8_t *xbuf) { uint16_t flags; @@ -1714,7 +1904,8 @@ print_sfp_status(const struct rte_eth_dev_module_info *module_info, const struct rte_dev_eeprom_info *eeprom_info, json_writer_t *wr) { - uint8_t diag_type, flags; + struct sfp_calibration_constants c_consts, *c_const_p; + uint8_t diag_type; int do_diag = 0; /* Read diagnostic monitoring type */ @@ -1726,8 +1917,7 @@ print_sfp_status(const struct rte_eth_dev_module_info *module_info, * Read monitoring data IFF it is supplied AND is * internally calibrated */ - flags = SFF_8472_DDM_DONE | SFF_8472_DDM_INTERNAL; - if ((diag_type & flags) == flags) + if (diag_type & SFF_8472_DDM_DONE) do_diag = 1; /* Transceiver type */ @@ -1751,11 +1941,18 @@ print_sfp_status(const struct rte_eth_dev_module_info *module_info, * Request current measurements iff they are provided: */ if (do_diag != 0) { - print_sfp_temp(eeprom_info, wr); - print_sfp_voltage(eeprom_info, wr); - print_sfp_rx_power(eeprom_info, wr); - print_sfp_tx_power(eeprom_info, wr); - print_sfp_laser_bias(eeprom_info, wr); + if (diag_type & SFF_8472_DDM_EXTERNAL) { + c_const_p = &c_consts; + get_sfp_calibration_constants(eeprom_info, c_const_p, + wr); + print_sfp_calibration_constants(c_const_p, wr); + } else + c_const_p = NULL; + print_sfp_temp(eeprom_info, c_const_p, wr); + print_sfp_voltage(eeprom_info, c_const_p, wr); + print_sfp_rx_power(eeprom_info, c_const_p, wr); + print_sfp_tx_power(eeprom_info, c_const_p, wr); + print_sfp_laser_bias(eeprom_info, c_const_p, wr); } print_sfp_thresholds(eeprom_info, wr); print_sfp_alarm_flags(eeprom_info, wr); diff --git a/src/shadow.c b/src/shadow.c index 2fdad1e8..3819250d 100644 --- a/src/shadow.c +++ b/src/shadow.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -47,31 +47,30 @@ #include "compat.h" #include "compiler.h" -#include "config.h" +#include "config_internal.h" #include "crypto/crypto_forward.h" #include "crypto/vti.h" +#include "dp_event.h" #include "ether.h" -#include "gre.h" +#include "if/gre.h" #include "if_var.h" #include "ip_funcs.h" #include "json_writer.h" #include "lag.h" #include "main.h" -#include "nh.h" -#include "pktmbuf.h" +#include "nh_common.h" +#include "pktmbuf_internal.h" #include "pl_common.h" #include "pl_fused.h" +#include "rcu.h" #include "route.h" #include "route_v6.h" #include "shadow.h" -#include "urcu.h" #include "vplane_log.h" /* Get a port number for spathintf out of range of physical ports */ #define DATAPLANE_SPATH_PORT (DATAPLANE_MAX_PORTS) -/* Number of buffers queued from dataplane to slowpath thread */ -#define SHADOW_IO_RING_SIZE 256 #define SHADOW_IO_RING_HWM 32 #define SHADOW_IO_RING_BURST 8 @@ -115,8 +114,8 @@ local_shadow_if(struct rte_mbuf *m, struct ifnet *inp_ifp) if (pktmbuf_mdata_invar_exists(m, PKT_MDATA_INVAR_SPATH)) { struct pktmbuf_mdata *mdata = pktmbuf_mdata(m); - if (mdata->md_spath.pi.proto != htons(ETHER_TYPE_TEB) && - unlikely(!rte_pktmbuf_adj(m, sizeof(struct ether_hdr)))) + if (mdata->md_spath.pi.proto != htons(RTE_ETHER_TYPE_TEB) && + unlikely(!rte_pktmbuf_adj(m, sizeof(struct rte_ether_hdr)))) return NULL; portid = DATAPLANE_SPATH_PORT; @@ -133,7 +132,7 @@ local_shadow_if(struct rte_mbuf *m, struct ifnet *inp_ifp) */ set_spath_rx_meta_data(m, inp_ifp, ntohs(ethhdr(m)->ether_type), TUN_META_FLAGS_DEFAULT); - if (!rte_pktmbuf_adj(m, sizeof(struct ether_hdr))) + if (!rte_pktmbuf_adj(m, sizeof(struct rte_ether_hdr))) return NULL; portid = DATAPLANE_SPATH_PORT; @@ -144,11 +143,11 @@ local_shadow_if(struct rte_mbuf *m, struct ifnet *inp_ifp) /* * Pass received packets into the Linux TCP/IP stack. - * Use ring to pass packets to master thread. + * Use ring to pass packets to main thread. * * Always consumes (free) mbuf */ -void local_packet(struct ifnet *ifp, struct rte_mbuf *m) +void local_packet_internal(struct ifnet *ifp, struct rte_mbuf *m) { unsigned int free_space; @@ -171,7 +170,7 @@ void local_packet(struct ifnet *ifp, struct rte_mbuf *m) struct pktmbuf_mdata *mdata = pktmbuf_mdata(m); struct ifnet *member_ifp; - member_ifp = ifnet_byifindex( + member_ifp = dp_ifnet_byifindex( mdata->md_bridge.member_ifindex); if (member_ifp) ifp = member_ifp; @@ -192,7 +191,7 @@ void local_packet(struct ifnet *ifp, struct rte_mbuf *m) sii->congested = false; if (CMM_LOAD_SHARED(sii->wake_me)) { - /* wake up slowpath thread on the master. */ + /* wake up slowpath thread on the main. */ static const uint64_t incr = 1; if (unlikely(write(event_fd, &incr, sizeof(incr)) < 0)) @@ -207,7 +206,31 @@ full: __cold_label; drop: __cold_label; if_incr_dropped(ifp); - rte_pktmbuf_free(m); + { + struct pl_packet pkt = { + .mbuf = m, + .l2_pkt_type = pkt_mbuf_get_l2_traffic_type(m), + .in_ifp = ifp + }; + pipeline_fused_term_drop(&pkt); + } +} + +/* + * Pass received packets into the Linux TCP/IP stack. + * Use ring to pass packets to main thread. + * + * Always consumes (free) mbuf + */ +void local_packet(struct ifnet *ifp, struct rte_mbuf *m) +{ + struct pl_packet pkt = { + .mbuf = m, + .l2_pkt_type = pkt_mbuf_get_l2_traffic_type(m), + .in_ifp = ifp + }; + + pipeline_fused_l2_local(&pkt); } /* @@ -273,7 +296,7 @@ static int shadow_writer(zloop_t *loop __rte_unused, return -1; } - rcu_thread_online(); + dp_rcu_thread_online(); for (i = 0; i < SHADOW_WRITE_POLLS; i++) { npkts = 0; @@ -318,20 +341,7 @@ static int shadow_writer(zloop_t *loop __rte_unused, strerror(errno)); } - rcu_thread_offline(); - return 0; -} - -/* Destroy obsolete TUN/TAP device - * This is used when dataplane and controller are on the same machine. - */ -static int tap_destroy(portid_t port) -{ - const struct ifnet *ifp = ifport_table[port]; - - if (ifp) - tap_teardown(ifp->if_name); - + dp_rcu_thread_offline(); return 0; } @@ -365,13 +375,13 @@ void shadow_init_spath_ring(int tun_fd) static uint8_t shadow_feature_if_output(struct ifnet *ifp, struct rte_mbuf *m, - struct ether_hdr *hdr) + struct rte_ether_hdr *hdr) { - if (hdr->ether_type == htons(ETHER_TYPE_IPv4)) { + if (hdr->ether_type == htons(RTE_ETHER_TYPE_IPV4)) { if (ip_spath_output(ifp, m) < 0) /* pak freed, but not yet counted */ return 1; - } else if (hdr->ether_type == htons(ETHER_TYPE_IPv6)) { + } else if (hdr->ether_type == htons(RTE_ETHER_TYPE_IPV6)) { if (ip6_spath_output(ifp, m) < 0) /* pak freed, but not yet counted */ return 1; @@ -384,29 +394,28 @@ shadow_feature_if_output(struct ifnet *ifp, struct rte_mbuf *m, static int shadow_output(struct shadow_if_info *sii, struct rte_mbuf *m, struct ifnet *ifp) { - struct ether_hdr *hdr = ethhdr(m); - struct ifnet *master; + struct rte_ether_hdr *hdr = ethhdr(m); + struct ifnet *team; if (!(ifp->if_flags & IFF_UP)) return -1; - pktmbuf_l2_len(m) = ETHER_HDR_LEN; + dp_pktmbuf_l2_len(m) = RTE_ETHER_HDR_LEN; - master = rcu_dereference(ifp->aggregator); + team = rcu_dereference(ifp->aggregator); - if (unlikely(hdr->ether_type == htons(ETHER_TYPE_SLOW))) { - if (master) { - int ret = lag_etype_slow_tx(master, ifp, m); + if (unlikely(hdr->ether_type == htons(RTE_ETHER_TYPE_SLOW))) { + if (team) { + int ret = lag_etype_slow_tx(team, ifp, m); return ret; } - return -1; } - if (master) { - if (!(master->if_flags & IFF_UP)) + if (team) { + if (!(team->if_flags & IFF_UP)) return -1; - ifp = master; + ifp = team; } uint16_t vif = vid_from_pkt(m, if_tpid(ifp)); @@ -420,7 +429,8 @@ static int shadow_output(struct shadow_if_info *sii, struct rte_mbuf *m, if (vifp->qinq_outer) { struct ifnet *cvlan; - uint16_t vid = vid_decap(m, ETHER_TYPE_VLAN); + uint16_t vid = vid_decap(m, + RTE_ETHER_TYPE_VLAN); cvlan = if_vlan_lookup(vifp, vid & VLAN_VID_MASK); @@ -458,19 +468,19 @@ int tap_reader(zloop_t *loop, zmq_pollitem_t *item, void *arg) if (ret <= 0) return ret; - rcu_thread_online(); + dp_rcu_thread_online(); if (shadow_output(sii, m, ifp) < 0) goto drop; ++sii->ts_packets; - rcu_thread_offline(); + dp_rcu_thread_offline(); return 0; drop: ++sii->ts_errors; rte_pktmbuf_free(m); - rcu_thread_offline(); + dp_rcu_thread_offline(); return 0; } @@ -480,26 +490,26 @@ int spath_reader(zloop_t *loop __rte_unused, zmq_pollitem_t *item, { struct tun_pi pi; struct tun_meta meta; - struct ether_hdr *ether; + struct rte_ether_hdr *ether; struct ifnet *ifp = NULL, *host_ifp, *s2s_ifp = NULL; struct rte_mbuf *m = NULL; enum cont_src_en cont_src = CONT_SRC_MAIN; struct shadow_if_info *sii = arg; - union next_hop_v4_or_v6_ptr nh = {NULL}; + struct next_hop *nh = NULL; int ret = spath_receive(item, &pi, &meta, sii, &m); if (ret <= 0) return ret; - rcu_thread_online(); + dp_rcu_thread_online(); if (!(meta.flags & TUN_META_FLAG_IIF)) { RTE_LOG(ERR, DATAPLANE, "spath missing iif\n"); goto drop; } - ifp = ifnet_byifindex(meta.iif); + ifp = dp_ifnet_byifindex(meta.iif); if (ifp) cont_src = ifp->if_cont_src; @@ -516,10 +526,11 @@ int spath_reader(zloop_t *loop __rte_unused, zmq_pollitem_t *item, */ if (!ifp || (!(is_gre(ifp) && gre_encap_l2_frame(ntohs(pi.proto))) && !(is_bridge(ifp) || is_l2vlan(ifp)))) { - if (rte_pktmbuf_prepend(m, sizeof(struct ether_hdr)) == NULL) + if (rte_pktmbuf_prepend(m, + sizeof(struct rte_ether_hdr)) == NULL) goto drop; - pktmbuf_l2_len(m) = ETHER_HDR_LEN; - ether = rte_pktmbuf_mtod(m, struct ether_hdr *); + dp_pktmbuf_l2_len(m) = RTE_ETHER_HDR_LEN; + ether = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); ether->ether_type = pi.proto; /* @@ -530,8 +541,6 @@ int spath_reader(zloop_t *loop __rte_unused, zmq_pollitem_t *item, */ set_spath_rx_meta_data(m, NULL, ntohs(pi.proto), TUN_META_FLAGS_NONE); - if (likely(pi.proto == ETHER_TYPE_IPv4)) - pktmbuf_l3_len(m) = iphdr(m)->ihl << 2; } pktmbuf_mdata_set(m, PKT_MDATA_FROM_US); @@ -540,19 +549,27 @@ int spath_reader(zloop_t *loop __rte_unused, zmq_pollitem_t *item, /* * This is the s2s case. If there is a mark then it * represents the ifindex that is part of the selector, - * or if no ifindex in the selector then the vrf master. + * or if no ifindex in the selector then the vrf. */ if (meta.flags & TUN_META_FLAG_MARK) { - struct ifnet *temp_ifp = ifnet_byifindex(meta.mark); + struct ifnet *temp_ifp = dp_ifnet_byifindex(meta.mark); if (temp_ifp) { pktmbuf_set_vrf(m, if_vrfid(temp_ifp)); - if (temp_ifp->if_type != IFT_VRFMASTER) { + if (temp_ifp->if_type != IFT_VRF) { /* set s2s_ifp for later */ s2s_ifp = temp_ifp; } } } + /* + * Need to setup the L3 len in the mbuf if this is an + * IPv4 packet. Site to site packets , are + * arriving with their proto in the reverse byte + * order. + */ + if (ntohs(pi.proto) == RTE_ETHER_TYPE_IPV4) + dp_pktmbuf_l3_len(m) = iphdr(m)->ihl << 2; } if (!ifp) { @@ -563,36 +580,22 @@ int spath_reader(zloop_t *loop __rte_unused, zmq_pollitem_t *item, * feature point configured which might have * output features we need to run before encryption. */ - if (likely((pi.proto) == ETHER_TYPE_IPv4)) { - struct next_hop nh4 = {.u.ifp = s2s_ifp}; - - if (s2s_ifp) - nh.v4 = &nh4; - if (unlikely - (crypto_policy_check_outbound(host_ifp, &m, - RT_TABLE_MAIN, - htons(pi.proto), - &nh))) - goto rcu_offline; - else if (nh.v4) - ifp = nh4_get_ifp(nh.v4); - else - goto drop; - } else if (likely((pi.proto) == ETHER_TYPE_IPv6)) { - struct next_hop_v6 nh6 = {.u.ifp = s2s_ifp}; + if (likely((ntohs(pi.proto)) == RTE_ETHER_TYPE_IPV4) || + likely((ntohs(pi.proto)) == RTE_ETHER_TYPE_IPV6)) { + struct next_hop nh46 = {.u.ifp = s2s_ifp}; if (s2s_ifp) - nh.v6 = &nh6; + nh = &nh46; if (unlikely (crypto_policy_check_outbound(host_ifp, &m, RT_TABLE_MAIN, - htons(pi.proto), + pi.proto, &nh))) goto rcu_offline; - else if (nh.v6) - ifp = nh6_get_ifp(nh.v6); + else if (nh) + ifp = dp_nh_get_ifp(nh); else goto drop; } @@ -606,8 +609,8 @@ int spath_reader(zloop_t *loop __rte_unused, zmq_pollitem_t *item, is_l2vlan(ifp) || is_s2s_feat_attach(ifp))) { if (is_bridge(ifp) || is_l2vlan(ifp)) { - ether = rte_pktmbuf_mtod(m, struct ether_hdr *); - pktmbuf_l2_len(m) = ETHER_HDR_LEN; + ether = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); + dp_pktmbuf_l2_len(m) = RTE_ETHER_HDR_LEN; shadow_feature_if_output(ifp, m, ether); } else if (is_gre(ifp)) { const in_addr_t *dst; @@ -618,18 +621,16 @@ int spath_reader(zloop_t *loop __rte_unused, zmq_pollitem_t *item, dst = mgre_nbma_to_tun_addr(ifp, &meta.mark); bool consumed = false; - if (likely(pi.proto == htons(ETHER_TYPE_IPv4))) + if (likely(pi.proto == htons(RTE_ETHER_TYPE_IPV4))) consumed = ip_spath_filter(ifp, &m); - else if (likely(pi.proto == htons(ETHER_TYPE_IPv6))) + else if (likely(pi.proto == htons(RTE_ETHER_TYPE_IPV6))) consumed = ip6_spath_filter(ifp, &m); if (!consumed) gre_tunnel_fragment_and_send( host_ifp, ifp, dst, m, ntohs(pi.proto)); } else if (is_vti(ifp) || is_s2s_feat_attach(ifp)) { - /* Fix ether proto endian-ness */ - ether = rte_pktmbuf_mtod(m, struct ether_hdr *); - ether->ether_type = ntohs(ether->ether_type); + ether = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); struct iphdr *ip = iphdr(m); bool consumed = false; if (likely(ip->version == 4)) @@ -641,14 +642,15 @@ int spath_reader(zloop_t *loop __rte_unused, zmq_pollitem_t *item, ntohs(ether->ether_type)); } } else { - if (likely(pi.proto == htons(ETHER_TYPE_IPv4))) { + if (likely(pi.proto == htons(RTE_ETHER_TYPE_IPV4))) { struct pl_packet pl_pkt = { .mbuf = m, .l2_pkt_type = L2_PKT_UNICAST, .in_ifp = ifp, }; pipeline_fused_ipv4_validate(&pl_pkt); - } else if (likely(pi.proto == htons(ETHER_TYPE_IPv6))) { + } else if (likely(pi.proto == + htons(RTE_ETHER_TYPE_IPV6))) { struct pl_packet pl_pkt = { .mbuf = m, .in_ifp = ifp, @@ -671,7 +673,7 @@ int spath_reader(zloop_t *loop __rte_unused, zmq_pollitem_t *item, rcu_offline: if (sii) ++sii->ts_packets; - rcu_thread_offline(); + dp_rcu_thread_offline(); return 0; } @@ -690,7 +692,7 @@ static void del_handler_tap_fd(zloop_t *loop, struct shadow_if_info *sii) static int shadow_send_event(enum shadow_ev type, portid_t port, - const char *ifname, const struct ether_addr *eth) + const char *ifname, const struct rte_ether_addr *eth) { zsock_t *sock = zsock_new_req(shadow_inproc); int rv; @@ -726,7 +728,7 @@ shadow_port_needed(portid_t port) /* Initialize a shadow interface. */ int shadow_init_port(portid_t port, const char *ifname, - const struct ether_addr *eth) + const struct rte_ether_addr *eth) { if (!shadow_port_needed(port)) return 0; @@ -740,6 +742,13 @@ void shadow_uninit_port(portid_t port) if (!shadow_port_needed(port)) return; + /* + * if called during shutdown then ignore - the thread has + * already or is about to terminate + */ + if (zsys_interrupted) + return; + shadow_send_event(SHADOW_REMOVE, port, NULL, NULL); } @@ -844,7 +853,6 @@ static void shadow_remove_event(zloop_t *loop, portid_t port) rcu_assign_pointer(shadow_if[port], NULL); del_handler_tap_fd(loop, sii); - tap_destroy(port); /* * Drain ring @@ -861,13 +869,13 @@ static void shadow_cleanup(void *arg) zloop_t **loop = arg; zloop_destroy(loop); - rcu_unregister_thread(); + dp_rcu_unregister_thread(); } static int shadow_handle_event(zloop_t *loop, zsock_t *sock, void *arg __rte_unused) { - const struct ether_addr *addr; + const struct rte_ether_addr *addr; const char *ifname; portid_t port; uint8_t type; @@ -878,8 +886,8 @@ static int shadow_handle_event(zloop_t *loop, zsock_t *sock, &call_rv); if (rv >= 0) { *call_rv = 0; - rcu_thread_online(); - rcu_read_lock(); + dp_rcu_thread_online(); + dp_rcu_read_lock(); switch (type) { case SHADOW_ADD: *call_rv = shadow_add_event(loop, port, ifname); @@ -893,8 +901,8 @@ static int shadow_handle_event(zloop_t *loop, zsock_t *sock, *call_rv = -EINVAL; break; } - rcu_read_unlock(); - rcu_thread_offline(); + dp_rcu_read_unlock(); + dp_rcu_thread_offline(); } else { RTE_LOG(ERR, DATAPLANE, "shadow-event: failed to receive event\n"); @@ -953,8 +961,8 @@ static void *shadow_handler(void *args) shadow_if[DATAPLANE_SPATH_PORT]) < 0) rte_panic("spath poller setup failed\n"); - rcu_register_thread(); - rcu_thread_offline(); + dp_rcu_register_thread(); + dp_rcu_thread_offline(); while (!zsys_interrupted) { if (zloop_start(loop) != 0) @@ -967,7 +975,8 @@ static void *shadow_handler(void *args) } /* Setup global data for shadow */ -void shadow_init(void) +static void +shadow_init(void) { event_fd = eventfd(0, EFD_NONBLOCK); if (event_fd < 0) @@ -988,7 +997,8 @@ void shadow_init(void) rte_panic("shadow thread creation failed\n"); } -void shadow_destroy(void) +static void +shadow_destroy(void) { int join_rc; struct shadow_if_info *sii; @@ -1030,7 +1040,7 @@ void shadow_show_summary(FILE *f, const char *name) (!ifp || !rte_eth_dev_is_valid_port(port)))) continue; - if (name && ifp && strcmp(name, ifp->if_name)) + if (name && ifp && strcmp(name, ifp->if_name) != 0) continue; jsonw_start_object(wr); @@ -1111,3 +1121,10 @@ struct shadow_if_info *get_fd2shadowif(int fd) return NULL; } + +static const struct dp_event_ops shadow_events = { + .init = shadow_init, + .uninit = shadow_destroy, +}; + +DP_STARTUP_EVENT_REGISTER(shadow_events); diff --git a/src/shadow.h b/src/shadow.h index ceb55f1b..eecbdb9c 100644 --- a/src/shadow.h +++ b/src/shadow.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -16,16 +16,19 @@ #include #include "control.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "urcu.h" #include "util.h" -struct ether_addr; +struct rte_ether_addr; struct ifnet; struct rte_mbuf; struct tun_meta; struct tun_pi; +/* Number of buffers queued from dataplane to slowpath thread */ +#define SHADOW_IO_RING_SIZE 1024 + /* per interface data structure * rx - packets received on NIC and going to kernel * tx - packets from kernel going to NIC @@ -55,20 +58,15 @@ void shadow_stop_port(portid_t portid); /* Initialize a shadow interface port. */ int shadow_init_port(portid_t portid, const char *ifname, - const struct ether_addr *eth_addr); + const struct rte_ether_addr *eth_addr); void shadow_uninit_port(portid_t port); -/* Initialize state for shadow tunnel commnication */ -void shadow_init(void); -void shadow_destroy(void); - /* Display shadow interface statistics */ void shadow_show_summary(FILE *f, const char *name); struct ifnet *get_lo_ifp(enum cont_src_en cont_src); int shadow_add_event(zloop_t *loop, portid_t port, const char *ifname); int tap_attach(const char *ifname); -void tap_teardown(const char *ifname); void shadow_init_spath_ring(int tun_fd); int slowpath_init(void); @@ -76,7 +74,7 @@ int slowpath_init(void); void set_spath_rx_meta_data(struct rte_mbuf *m, const struct ifnet *ifp, uint16_t proto, uint8_t meta_mask); int tap_receive(zloop_t *loop, zmq_pollitem_t *item, struct shadow_if_info *sii, - struct rte_mbuf **m); + struct rte_mbuf **pkt); int spath_receive(zmq_pollitem_t *item, struct tun_pi *pi, struct tun_meta *meta, struct shadow_if_info *sii, struct rte_mbuf **mbuf); diff --git a/src/shadow_receive.c b/src/shadow_receive.c index 11383718..2d8c09b6 100644 --- a/src/shadow_receive.c +++ b/src/shadow_receive.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2012-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -38,28 +37,32 @@ #include #include -#include "bridge_port.h" +#include +#include + #include "compat.h" -#include "config.h" +#include "config_internal.h" #include "ether.h" +#include "if/bridge/bridge_port.h" #include "if_var.h" #include "l2_rx_fltr.h" #include "l2tp/l2tpeth.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "pipeline/nodes/pppoe/pppoe.h" #include "shadow.h" #include "urcu.h" #include "util.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" -struct mnl_socket; struct rte_mempool; #define GRE_OVERHEAD_IPV4 32 /* IP + GRE + VLAN */ #define GRE_OVERHEAD_IPV6 52 /* IPv6 + GRE + VLAN */ #define MIN_GRE_PKT 42 /* IP + GRE + Ether */ +#define ESMC_ETH_SUBTYPE 0x0A + static struct rte_mbuf *pkt_to_mbuf(struct rte_mempool *mp, vrfid_t vrf_id, const uint8_t *pkt, int len) { @@ -108,7 +111,7 @@ int tap_receive(zloop_t *loop, zmq_pollitem_t *item, { struct ifnet *ifp = ifnet_byport(sii->port); /* 8 is added to allow space for 2 VLAN headers (i.e. QinQ) */ - const size_t max_pkt = ifp->if_mtu + ETHER_HDR_LEN + 8; + const size_t max_pkt = ifp->if_mtu + RTE_ETHER_HDR_LEN + 8; void *base; ssize_t len; struct rte_mbuf *m = NULL; @@ -171,7 +174,7 @@ int spath_receive(zmq_pollitem_t *item, struct tun_pi *pi, { ssize_t len; struct iovec io[3]; - uint8_t pkt[ETHER_MAX_JUMBO_FRAME_LEN]; + uint8_t pkt[RTE_ETHER_MAX_JUMBO_FRAME_LEN]; struct ifnet *ifp = NULL; vrfid_t vrf_id = VRF_DEFAULT_ID; portid_t portid; @@ -207,7 +210,7 @@ int spath_receive(zmq_pollitem_t *item, struct tun_pi *pi, return -1; } - ifp = ifnet_byifindex(meta->iif); + ifp = dp_ifnet_byifindex(meta->iif); if (ifp) { portid = ifp->if_port == IF_PORT_ID_INVALID ? 0 : ifp->if_port; vrf_id = if_vrfid(ifp); @@ -275,40 +278,9 @@ int slowpath_init(void) return fd; } -/* Send request and parse response */ -static int mnl_talk(struct mnl_socket *nl, struct nlmsghdr *nlh) -{ - unsigned int portid = mnl_socket_get_portid(nl); - uint32_t seq = time(NULL); - - nlh->nlmsg_flags |= NLM_F_ACK; - nlh->nlmsg_seq = seq; - - if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { - RTE_LOG(ERR, DATAPLANE, - "mnl_socket_sendto failed: %s\n", strerror(errno)); - return MNL_CB_ERROR; - } - - char buf[MNL_SOCKET_BUFFER_SIZE]; - ssize_t count = mnl_socket_recvfrom(nl, buf, sizeof(buf)); - - if (count < 0) { - RTE_LOG(ERR, DATAPLANE, - "mnl_socket_recvfrom failed: %s\n", strerror(errno)); - return MNL_CB_ERROR; - } - - return mnl_cb_run(buf, count, seq, portid, NULL, NULL); -} - /* Setup TUN/TAP device */ int tap_attach(const char *ifname) { - char buf[MNL_SOCKET_BUFFER_SIZE]; - struct mnl_socket *nl; - struct nlmsghdr *nlh; - struct ifinfomsg *ifi; struct ifreq ifr; int fd; @@ -337,43 +309,6 @@ int tap_attach(const char *ifname) goto fail; } - /* Default to no carrier, link up happens later */ - nl = mnl_socket_open(NETLINK_ROUTE); - if (!nl) { - RTE_LOG(ERR, DATAPLANE, - "%s(): mnl_socket_open failed\n", __func__); - goto fail; - } - - if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) { - RTE_LOG(ERR, DATAPLANE, - "%s(): mnl_socket_bind failed\n", __func__); - goto fail; - } - - nlh = mnl_nlmsg_put_header(buf); - nlh->nlmsg_type = RTM_NEWLINK; - nlh->nlmsg_flags = NLM_F_REQUEST; - - ifi = mnl_nlmsg_put_extra_header(nlh, sizeof(struct ifinfomsg)); - ifi->ifi_family = AF_UNSPEC; - ifi->ifi_flags = 0; - mnl_attr_put_strz(nlh, IFLA_IFNAME, ifname); - - /* Setup modes of TAP device to allow controlling - * link state from daemon. - */ - mnl_attr_put_u8(nlh, IFLA_LINKMODE, IF_LINK_MODE_DORMANT); - /* Default to no carrier, link up happens later */ - mnl_attr_put_u8(nlh, IFLA_OPERSTATE, IF_OPER_DORMANT); - - if (mnl_talk(nl, nlh) < 0) { - RTE_LOG(ERR, DATAPLANE, "%s(): can not setup %s: %s\n", - __func__, ifname, strerror(errno)); - goto fail; - } - - mnl_socket_close(nl); return fd; fail: @@ -381,38 +316,6 @@ int tap_attach(const char *ifname) return -1; } -/* Teardown TUN/TAP device */ -void tap_teardown(const char *ifname) -{ - char buf[MNL_SOCKET_BUFFER_SIZE]; - struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE); - - if (!nl) - RTE_LOG(ERR, DATAPLANE, - "%s(): mnl_socket_open failed\n", __func__); - - if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) - RTE_LOG(ERR, DATAPLANE, - "%s(): mnl_socket_bind failed\n", __func__); - - struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf); - - nlh->nlmsg_type = RTM_DELLINK; - nlh->nlmsg_flags = NLM_F_REQUEST; - - struct ifinfomsg *ifi - = mnl_nlmsg_put_extra_header(nlh, sizeof(struct ifinfomsg)); - ifi->ifi_family = AF_UNSPEC; - - mnl_attr_put_strz(nlh, IFLA_IFNAME, ifname); - - if (mnl_talk(nl, nlh) < 0) - RTE_LOG(ERR, DATAPLANE, "%s(): can not teardown %s: %s\n", - __func__, ifname, strerror(errno)); - - mnl_socket_close(nl); -} - /* Collect fragmented mbuf and send to TAP device * Note: this function builds meta data to send to TAP device * onto stack by using alloca() before sending. @@ -445,8 +348,8 @@ int tuntap_write(int fd, struct rte_mbuf *m, struct ifnet *ifp) if (m->ol_flags & PKT_RX_VLAN) { bool sw_qinq_inner = false; uint16_t sw_outer_vlan; - const struct ether_hdr *oeh - = rte_pktmbuf_mtod(m, const struct ether_hdr *); + const struct rte_ether_hdr *oeh + = rte_pktmbuf_mtod(m, const struct rte_ether_hdr *); if (!ifp) { /* @@ -468,11 +371,11 @@ int tuntap_write(int fd, struct rte_mbuf *m, struct ifnet *ifp) if (!ifp->qinq_inner && !sw_qinq_inner) { struct { - struct ether_hdr eh; - struct vlan_hdr vh; + struct rte_ether_hdr eh; + struct rte_vlan_hdr vh; } *vhdr = alloca(sizeof(*vhdr)); - memcpy(&vhdr->eh, oeh, 2 * ETHER_ADDR_LEN); + memcpy(&vhdr->eh, oeh, 2 * RTE_ETHER_ADDR_LEN); vhdr->eh.ether_type = htons(if_tpid(ifp)); vhdr->vh.vlan_tci = htons(m->vlan_tci); vhdr->vh.eth_proto = oeh->ether_type; @@ -482,12 +385,12 @@ int tuntap_write(int fd, struct rte_mbuf *m, struct ifnet *ifp) ++n; } else { struct { - struct ether_hdr eh; - struct vlan_hdr vh1; - struct vlan_hdr vh2; + struct rte_ether_hdr eh; + struct rte_vlan_hdr vh1; + struct rte_vlan_hdr vh2; } *qinqhdr = alloca(sizeof(*qinqhdr)); - memcpy(&qinqhdr->eh, oeh, 2 * ETHER_ADDR_LEN); + memcpy(&qinqhdr->eh, oeh, 2 * RTE_ETHER_ADDR_LEN); if (!sw_qinq_inner) { qinqhdr->eh.ether_type = htons(if_tpid(ifp->if_parent)); @@ -498,7 +401,7 @@ int tuntap_write(int fd, struct rte_mbuf *m, struct ifnet *ifp) qinqhdr->vh1.vlan_tci = htons(sw_outer_vlan); qinqhdr->vh2.vlan_tci = htons(m->vlan_tci); } - qinqhdr->vh1.eth_proto = htons(ETHER_TYPE_VLAN); + qinqhdr->vh1.eth_proto = htons(RTE_ETHER_TYPE_VLAN); qinqhdr->vh2.eth_proto = oeh->ether_type; iov[n].iov_base = qinqhdr; @@ -508,8 +411,9 @@ int tuntap_write(int fd, struct rte_mbuf *m, struct ifnet *ifp) } /* Skip original Ethernet header in the data packet */ - iov[n].iov_base = pktmbuf_mtol3(m, char *); - iov[n].iov_len = rte_pktmbuf_data_len(m) - pktmbuf_l2_len(m); + iov[n].iov_base = dp_pktmbuf_mtol3(m, char *); + iov[n].iov_len = rte_pktmbuf_data_len(m) - + dp_pktmbuf_l2_len(m); ++n; m = m->next; @@ -531,10 +435,11 @@ int tuntap_write(int fd, struct rte_mbuf *m, struct ifnet *ifp) */ bool local_packet_filter(const struct ifnet *ifp, struct rte_mbuf *m) { - const struct ether_hdr *eh = ethhdr(m); + const struct rte_ether_hdr *eh = ethhdr(m); + struct slow_protocol_frame *slow_hdr; /* Filter out unwanted multicasts */ - if (is_multicast_ether_addr(&eh->d_addr) && + if (rte_is_multicast_ether_addr(&eh->d_addr) && ifp->if_mac_filtr_active && l2_mcfltr_node_lookup(ifp, &eh->d_addr) == NULL) return false; @@ -587,8 +492,14 @@ bool local_packet_filter(const struct ifnet *ifp, struct rte_mbuf *m) set_spath_rx_meta_data(m, ifp, ntohs(eh->ether_type), TUN_META_FLAGS_DEFAULT); } else if (!ifp->aggregator) { - if (eh->ether_type == htons(ETHER_TYPE_SLOW)) - return false; + if (eh->ether_type == htons(RTE_ETHER_TYPE_SLOW)) { + slow_hdr = rte_pktmbuf_mtod(m, + struct slow_protocol_frame *); + /* Allow ESMC frames on the interface */ + if (slow_hdr && (slow_hdr->slow_protocol.subtype != + ESMC_ETH_SUBTYPE)) + return false; + } } return true; diff --git a/src/snmp_mib.h b/src/snmp_mib.h index 58c388f1..b66202ec 100644 --- a/src/snmp_mib.h +++ b/src/snmp_mib.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * diff --git a/src/soft_ticks.h b/src/soft_ticks.h index f1e8cbc0..b1eadbca 100644 --- a/src/soft_ticks.h +++ b/src/soft_ticks.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -7,10 +7,19 @@ #define _SOFT_TICKS_H_ /* - * Export of soft_ticks from master.c + * Export of soft_ticks etc. from controller.c + * + * get_dp_uptime() may also be used to return the dataplane uptime in seconds. + * (This is also derived from soft_ticks) */ -/* Milliseconds */ +/* Milliseconds since dataplane started. Updated every 10ms */ extern volatile uint64_t soft_ticks; +/* Microsecs since dataplane started. Updated every 10ms */ +extern uint64_t soft_ticks_us; + +/* Unix epoch in microsecs. Updated every 10ms. */ +extern uint64_t unix_epoch_us; + #endif /* _SOFT_TICKS_H_ */ diff --git a/src/storm_ctl.c b/src/storm_ctl.c index 6cd666a4..799a3725 100644 --- a/src/storm_ctl.c +++ b/src/storm_ctl.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018-2019, AT&T Intellectual Property. + * Copyright (c) 2018-2021, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -8,17 +8,21 @@ */ #include -#include -#include -#include -#include -#include -#include #include #include -#include -#include -#include "bridge_port.h" + +#include "control.h" +#include "commands.h" +#include "dp_event.h" +#include "event.h" +#include "fal.h" +#include "if/bridge/bridge_port.h" +#include "if_var.h" +#include "controller.h" +#include "vplane_debug.h" +#include "vplane_log.h" +#include "zmq_dp.h" +#include "util.h" enum dp_storm_ctl_threshold { DP_STORM_CTL_THRESHOLD_NONE = 0, @@ -81,7 +85,6 @@ struct if_storm_ctl_info { struct cds_lfht *sc_instance_tbl; }; -static struct cfg_if_list *cfg_list_storm; static unsigned int storm_ctl_policy_cnt; #define STORM_CTL_DETECTION_DEFAULT_INTERVAL 5 @@ -97,66 +100,10 @@ static struct cds_lfht *storm_ctl_profile_tbl; static bool storm_ctl_monitor_running; -static void -storm_ctl_event_if_index_set(struct ifnet *ifp, uint32_t ifindex); -static void -storm_ctl_event_if_index_unset(struct ifnet *ifp, uint32_t ifindex); static void storm_ctl_trigger_actions(struct storm_ctl_instance *instance, enum fal_traffic_type tr_type, uint64_t pkt_drops); -static const struct dp_event_ops storm_ctl_event_ops = { - .if_index_set = storm_ctl_event_if_index_set, - .if_index_unset = storm_ctl_event_if_index_unset, -}; - -static void -storm_ctl_event_if_index_set(struct ifnet *ifp, uint32_t ifindex __unused) -{ - struct cfg_if_list_entry *le; - - if (!cfg_list_storm) - return; - - le = cfg_if_list_lookup(cfg_list_storm, ifp->if_name); - if (!le) - return; - - RTE_LOG(INFO, DATAPLANE, - "Replaying storm_ctl command %s for interface %s\n", - le->le_buf, ifp->if_name); - cmd_storm_ctl_cfg(NULL, le->le_argc, le->le_argv); - cfg_if_list_del(cfg_list_storm, ifp->if_name); - if (!cfg_list_storm->if_list_count) { - cfg_if_list_destroy(&cfg_list_storm); - dp_event_unregister(&storm_ctl_event_ops); - } -} - -static void -storm_ctl_event_if_index_unset(struct ifnet *ifp, uint32_t ifindex __unused) -{ - if (!cfg_list_storm) - return; - - cfg_if_list_del(cfg_list_storm, ifp->if_name); - if (!cfg_list_storm->if_list_count) { - cfg_if_list_destroy(&cfg_list_storm); - dp_event_unregister(&storm_ctl_event_ops); - } -} - -static int storm_ctl_replay_init(void) -{ - if (!cfg_list_storm) { - cfg_list_storm = cfg_if_list_create(); - if (!cfg_list_storm) - return -ENOMEM; - } - dp_event_register(&storm_ctl_event_ops); - return 0; -} - static struct rte_timer storm_ctl_monitor_tmr; static void storm_ctl_compare_stats(struct ifnet *ifp, void *arg __rte_unused) @@ -181,7 +128,8 @@ static void storm_ctl_compare_stats(struct ifnet *ifp, void *arg __rte_unused) if (!instance->sci_policy[tr_type].threshold_val) continue; - fal_obj = instance->sci_fal_obj[tr_type]; + fal_obj = rcu_dereference( + instance->sci_fal_obj[tr_type]); if (fal_obj == FAL_NULL_OBJECT_ID) continue; @@ -220,7 +168,7 @@ static void storm_ctl_compare_stats(struct ifnet *ifp, void *arg __rte_unused) static void storm_ctl_tmr_hdlr(struct rte_timer *timer __rte_unused, void *arg __rte_unused) { - ifnet_walk(storm_ctl_compare_stats, NULL); + dp_ifnet_walk(storm_ctl_compare_stats, NULL); } /* @@ -348,12 +296,51 @@ static void storm_ctl_free_instance(struct rcu_head *head) free(instance); } +/* Returns true if should be kept around, or false otherwise */ +static bool storm_ctl_cfg_check_profile(struct storm_ctl_profile *profile) +{ + enum fal_traffic_type traf; + + if (profile->scp_recovery_interval || + profile->scp_actions) + return true; + + for (traf = FAL_TRAFFIC_UCAST; traf < FAL_TRAFFIC_MAX; traf++) { + if (profile->scp_policies[traf].threshold_val) + return true; + } + + if (!cds_list_empty(&profile->scp_instance_list)) + return true; + + return false; +} + +static void storm_ctl_free_profile(struct rcu_head *head) +{ + struct storm_ctl_profile *profile; + + profile = caa_container_of(head, struct storm_ctl_profile, scp_rcu); + free(profile->scp_name); + free(profile); +} + +static void +storm_ctl_delete_profile(struct storm_ctl_profile *profile) +{ + cds_lfht_del(storm_ctl_profile_tbl, &profile->scp_node); + call_rcu(&profile->scp_rcu, storm_ctl_free_profile); +} + static void storm_ctl_del_instance_internal(struct cds_lfht *sc_instance_tbl, struct storm_ctl_instance *instance) { - cds_list_del(&instance->sci_profile_list); cds_lfht_del(sc_instance_tbl, &instance->sci_node); + cds_list_del(&instance->sci_profile_list); + if (!storm_ctl_cfg_check_profile(instance->sci_profile)) + storm_ctl_delete_profile(instance->sci_profile); + if (storm_ctl_policy_cnt == 1) storm_ctl_monitor_stop(); storm_ctl_policy_cnt--; @@ -377,6 +364,29 @@ static int storm_ctl_del_instance(struct if_storm_ctl_info *sc_info, return 0; } +static bool +storm_control_can_create_in_fal(struct ifnet *ifp, uint16_t vlan) +{ + if (if_check_any_except_emb_feat( + ifp, IF_EMB_FEAT_BRIDGE_MEMBER)) { + DP_DEBUG(STORM_CTL, DEBUG, DATAPLANE, + "interface %s not ready for FAL updates due to embellished features\n", + ifp->if_name); + return false; + } + + if (vlan && !ifp->if_l3_enabled && + (!ifp->if_brport || + !bridge_port_is_vlan_member(ifp->if_brport, vlan))) { + DP_DEBUG(STORM_CTL, DEBUG, DATAPLANE, + "interface %s vlan %u not ready for FAL updates due to VLAN not created\n", + ifp->if_name, vlan); + return false; + } + + return true; +} + static enum fal_port_attr_t fal_traffic_t_to_storm_ctl_type(enum fal_traffic_type traffic) { @@ -408,17 +418,20 @@ fal_traffic_t_to_vlan_feat_type(enum fal_traffic_type traffic) } static void fal_policer_get_sc_stats(struct storm_ctl_instance *instance, + uint32_t num_stats, enum fal_policer_stat_type cntr_ids[], uint64_t cntrs[], enum fal_traffic_type traf) { + fal_object_t fal_obj; int rv; - if (!instance->sci_fal_obj[traf]) + fal_obj = CMM_LOAD_SHARED(instance->sci_fal_obj[traf]); + if (!fal_obj) return; - rv = fal_policer_get_stats_ext(instance->sci_fal_obj[traf], - FAL_POLICER_STAT_MAX, + rv = fal_policer_get_stats_ext(fal_obj, + num_stats, cntr_ids, FAL_STATS_MODE_READ, cntrs); @@ -435,15 +448,17 @@ static int fal_policer_get_cfg(struct storm_ctl_instance *instance, enum fal_traffic_type traf) { struct fal_attribute_t policer_attr[2] = {}; + fal_object_t fal_obj; int rv; - if (!instance->sci_fal_obj[traf]) + fal_obj = CMM_LOAD_SHARED(instance->sci_fal_obj[traf]); + if (!fal_obj) return 0; policer_attr[0].id = FAL_POLICER_ATTR_CIR; policer_attr[1].id = FAL_POLICER_ATTR_CBS; - rv = fal_policer_get_attr(instance->sci_fal_obj[traf], + rv = fal_policer_get_attr(fal_obj, ARRAY_SIZE(policer_attr), policer_attr); if (rv && rv != -EOPNOTSUPP) { @@ -452,9 +467,9 @@ static int fal_policer_get_cfg(struct storm_ctl_instance *instance, instance->sci_ifp->if_name, instance->sci_vlan, rv); return rv; } - /* convert from bytes to kilobits */ - *max_rate = policer_attr[0].value.u64 * 8 / 1024; - *max_burst = policer_attr[1].value.u64 * 8 / 1024; + + *max_rate = BYTES_TO_METRIC_KBITS(policer_attr[0].value.u64); + *max_burst = BYTES_TO_METRIC_KBITS(policer_attr[1].value.u64); return 0; } @@ -463,12 +478,14 @@ static uint64_t storm_ctl_policy_get_fal_rate(struct dp_storm_ctl_policy *policy, struct ifnet *ifp) { - struct if_link_status link; + struct dp_ifnet_link_status link; if (policy->threshold_type == DP_STORM_CTL_THRESHOLD_ABS) return policy->threshold_val; - else if (policy->threshold_type == DP_STORM_CTL_THRESHOLD_PCT) { - if_get_link_status(ifp, &link); + if (policy->threshold_type == DP_STORM_CTL_THRESHOLD_PCT) { + if (ifp->if_type == IFT_L2VLAN) + ifp = ifp->if_parent; + dp_ifnet_link_status(ifp, &link); return ((uint64_t)link.link_speed * 1000 * policy->threshold_val)/10000; } @@ -482,7 +499,8 @@ static int fal_policer_apply_profile(struct storm_ctl_profile *profile, { uint64_t rate = 0; /* burst needs to be non 0 to start policer */ - uint64_t burst = 1 * (1024 / 8); /* convert from kilobits into bytes */ + uint64_t burst = METRIC_KBITS_TO_BYTES(1); + uint64_t kbits; int rv = 0; struct if_vlan_feat *vlan_feat; struct ifnet *ifp; @@ -500,38 +518,43 @@ static int fal_policer_apply_profile(struct storm_ctl_profile *profile, .value.u64 = rate} }; struct fal_attribute_t vlan_attr[3] = { - { .id = FAL_VLAN_FEATURE_INTERFACE_ID, - .value.u32 = instance->sci_ifp->if_index }, - { .id = FAL_VLAN_FEATURE_VLAN_ID, - .value.u16 = vlan } + { .id = FAL_VLAN_FEATURE_INTERFACE_ID }, + { .id = FAL_VLAN_FEATURE_VLAN_ID } }; struct fal_attribute_t port_attr; - + fal_object_t fal_obj; /* Work out rate. If this is an absolute value then use it */ - policer_attr[4].value.u64 = storm_ctl_policy_get_fal_rate( - &profile->scp_policies[traf], instance->sci_ifp) - * (1024 / 8); /* convert from kilobits into bytes */ - + kbits = storm_ctl_policy_get_fal_rate(&profile->scp_policies[traf], + instance->sci_ifp); + policer_attr[4].value.u64 = METRIC_KBITS_TO_BYTES(kbits); rv = fal_policer_create(ARRAY_SIZE(policer_attr), policer_attr, - &instance->sci_fal_obj[traf]); + &fal_obj); if (rv && rv != -EOPNOTSUPP) { RTE_LOG(ERR, STORM_CTL, "Could not create policer for %s %d in fal (%d)\n", instance->sci_ifp->if_name, vlan, rv); return rv; } + CMM_STORE_SHARED(instance->sci_fal_obj[traf], fal_obj); + + ifp = instance->sci_ifp; + if (ifp->if_type == IFT_L2VLAN) { + vlan = ifp->if_vlan; + ifp = ifp->if_parent; + } if (vlan) { /* * We have to create a vlan_feat, apply the policer to it, and * then apply the vlan_feat to the port directly. */ + vlan_attr[0].value.u32 = ifp->if_index; + vlan_attr[1].value.u16 = vlan; vlan_attr[2].id = fal_traffic_t_to_vlan_feat_type(traf); vlan_attr[2].value.objid = instance->sci_fal_obj[traf]; - ifp = instance->sci_ifp; vlan_feat = if_vlan_feat_get(ifp, vlan); if (!vlan_feat) { rv = if_vlan_feat_create(ifp, vlan, FAL_NULL_OBJECT_ID); @@ -569,44 +592,12 @@ static int fal_policer_apply_profile(struct storm_ctl_profile *profile, } else { port_attr.id = fal_traffic_t_to_storm_ctl_type(traf); port_attr.value.objid = instance->sci_fal_obj[traf]; - fal_l2_upd_port(instance->sci_ifp->if_index, &port_attr); + fal_l2_upd_port(ifp->if_index, &port_attr); } return rv; } -/* - * The rate has changed, update fal - */ -static int fal_policer_modify_profile(struct storm_ctl_profile *profile, - uint16_t vlan, - struct storm_ctl_instance *instance, - enum fal_traffic_type traf) -{ - - struct fal_attribute_t policer_bind_attr = {}; - int rv; - - if (!instance->sci_fal_obj[traf]) - fal_policer_apply_profile(profile, vlan, - instance, traf); - - policer_bind_attr.id = FAL_POLICER_ATTR_CIR; - policer_bind_attr.value.u64 = storm_ctl_policy_get_fal_rate( - &profile->scp_policies[traf], instance->sci_ifp) - * (1024 / 8); /* convert from kilobits into bytes */ - - rv = fal_policer_set_attr(instance->sci_fal_obj[traf], - &policer_bind_attr); - if (rv && rv != -EOPNOTSUPP) { - RTE_LOG(ERR, STORM_CTL, - "Could not update policer for %s %d in fal (%d)\n", - instance->sci_ifp->if_name, vlan, rv); - return rv; - } - return rv; -} - static int fal_policer_unapply_profile(struct ifnet *ifp, uint16_t vlan, struct storm_ctl_instance *instance, @@ -615,10 +606,12 @@ static int fal_policer_unapply_profile(struct ifnet *ifp, int rv = 0; struct fal_attribute_t port_attr; struct if_vlan_feat *vlan_feat = NULL; - struct fal_attribute_t vlan_attr[2] = { - { .id = FAL_VLAN_FEATURE_VLAN_ID, - .value.u16 = vlan } - }; + struct fal_attribute_t vlan_attr; + + if (ifp->if_type == IFT_L2VLAN) { + vlan = ifp->if_vlan; + ifp = ifp->if_parent; + } if (vlan) { vlan_feat = if_vlan_feat_get(ifp, vlan); @@ -630,11 +623,11 @@ static int fal_policer_unapply_profile(struct ifnet *ifp, } /* Remove the storm control from the vlan feature */ - vlan_attr[1].id = fal_traffic_t_to_vlan_feat_type(traf); - vlan_attr[1].value.objid = FAL_NULL_OBJECT_ID; + vlan_attr.id = fal_traffic_t_to_vlan_feat_type(traf); + vlan_attr.value.objid = FAL_NULL_OBJECT_ID; rv = fal_vlan_feature_set_attr(vlan_feat->fal_vlan_feat, - &vlan_attr[1]); + &vlan_attr); if (rv && rv != -EOPNOTSUPP) { RTE_LOG(ERR, STORM_CTL, "Could not remove vlan_feat for vlan %d in fal (%d)\n", @@ -656,7 +649,7 @@ static int fal_policer_unapply_profile(struct ifnet *ifp, ifp->if_name, vlan, rv); return rv; } - instance->sci_fal_obj[traf] = FAL_NULL_OBJECT_ID; + CMM_STORE_SHARED(instance->sci_fal_obj[traf], FAL_NULL_OBJECT_ID); if (vlan_feat && !vlan_feat->refcount) { rv = fal_vlan_feature_delete(vlan_feat->fal_vlan_feat); @@ -682,6 +675,47 @@ static int fal_policer_unapply_profile(struct ifnet *ifp, return rv; } +/* + * The rate has changed, update fal + */ +static int fal_policer_modify_profile(struct storm_ctl_profile *profile, + uint16_t vlan, + struct storm_ctl_instance *instance, + enum fal_traffic_type traf) +{ + + struct fal_attribute_t policer_bind_attr = {}; + int rv; + uint64_t kbits; + + if (!storm_control_can_create_in_fal(instance->sci_ifp, vlan)) + return 0; + + if (!instance->sci_fal_obj[traf]) + return fal_policer_apply_profile(profile, vlan, + instance, traf); + if (instance->sci_fal_obj[traf] && + profile->scp_policies[traf].threshold_type == + DP_STORM_CTL_THRESHOLD_NONE) + return fal_policer_unapply_profile(instance->sci_ifp, vlan, + instance, traf); + + kbits = storm_ctl_policy_get_fal_rate(&profile->scp_policies[traf], + instance->sci_ifp); + policer_bind_attr.id = FAL_POLICER_ATTR_CIR; + policer_bind_attr.value.u64 = METRIC_KBITS_TO_BYTES(kbits); + + rv = fal_policer_set_attr(instance->sci_fal_obj[traf], + &policer_bind_attr); + if (rv && rv != -EOPNOTSUPP) { + RTE_LOG(ERR, STORM_CTL, + "Could not update policer for %s %d in fal (%d)\n", + instance->sci_ifp->if_name, vlan, rv); + return rv; + } + return rv; +} + static bool storm_ctl_fal_update_needed(struct dp_storm_ctl_policy *prof_pol, struct dp_storm_ctl_policy *inst_pol) { @@ -709,19 +743,22 @@ static void storm_ctl_fal_update_profile(struct storm_ctl_profile *profile) &profile->scp_policies[i], &instance->sci_policy[i])) continue; - - rv = fal_policer_modify_profile(profile, - instance->sci_vlan, - instance, i); - if (rv) { - RTE_LOG(ERR, STORM_CTL, - "Could not update %s threshold for interface %s vlan %d\n", - fal_traffic_type_to_str(i), - instance->sci_ifp->if_name, - instance->sci_vlan); - } else - instance->sci_policy[i] = - profile->scp_policies[i]; + if (storm_control_can_create_in_fal( + instance->sci_ifp, instance->sci_vlan)) { + rv = fal_policer_modify_profile( + profile, instance->sci_vlan, + instance, i); + if (rv) { + RTE_LOG(ERR, STORM_CTL, + "Could not update %s threshold for interface %s vlan %d\n", + fal_traffic_type_to_str(i), + instance->sci_ifp->if_name, + instance->sci_vlan); + continue; + } + } + instance->sci_policy[i] = + profile->scp_policies[i]; } } } @@ -826,7 +863,8 @@ static int storm_ctl_set_threshold(bool set, * The value is restricted to have 2 fractional * digits in the yang model */ - policy->threshold_val = strtof(val, NULL) * 100; + policy->threshold_val = (unsigned long) + (strtof(val, NULL) * 100); } else { profile->scp_policies[tr_type].threshold_type = DP_STORM_CTL_THRESHOLD_NONE; @@ -924,38 +962,6 @@ static inline int storm_ctl_profile_name_match_fn(struct cds_lfht_node *node, return 0; } -static bool storm_ctl_cfg_check_profile(struct storm_ctl_profile *profile) -{ - enum fal_traffic_type traf; - - if (profile->scp_recovery_interval || - profile->scp_actions) - return true; - - for (traf = FAL_TRAFFIC_UCAST; traf < FAL_TRAFFIC_MAX; traf++) { - if (profile->scp_policies[traf].threshold_val) - return true; - } - - return false; -} - -static void storm_ctl_free_profile(struct rcu_head *head) -{ - struct storm_ctl_profile *profile; - - profile = caa_container_of(head, struct storm_ctl_profile, scp_rcu); - free(profile->scp_name); - free(profile); -} - -static void -storm_ctl_delete_profile(struct storm_ctl_profile *profile) -{ - cds_lfht_del(storm_ctl_profile_tbl, &profile->scp_node); - call_rcu(&profile->scp_rcu, storm_ctl_free_profile); -} - static struct storm_ctl_profile * storm_ctl_add_profile(const char *name) { @@ -1078,7 +1084,8 @@ static int storm_ctl_set_profile(bool set, FILE *f, int argc, char **argv) return storm_ctl_set_threshold(set, tr_type, bw_type, argv[6], profile); - } else if (!set) { + } + if (!set) { storm_ctl_set_threshold(set, tr_type, DP_STORM_CTL_THRESHOLD_NONE, 0, profile); @@ -1205,7 +1212,7 @@ static int send_storm_ctl_notification(struct storm_ctl_instance *instance, if (result < 0) goto err; - return send_dp_event(msg); + return dp_send_event_to_vplaned(msg); err: if (instance->sci_vlan) snprintf(vlan_str, 13, " (vlan %d)", instance->sci_vlan); @@ -1234,73 +1241,15 @@ static void storm_ctl_trigger_actions(struct storm_ctl_instance *instance, if_stop(ifp); storm_ctl_recovery_tmr_start(ifp, interval); } - } else { - RTE_LOG(ERR, STORM_CTL, - "Could not find storm-ctl instance for %s", - ifp->if_name); } if (storm_ctl_notification) send_storm_ctl_notification(instance, tr_type, pkt_drops); } -int storm_ctl_set_profile_on_intf(bool set, struct ifnet *ifp, - struct storm_ctl_profile *profile, - uint16_t vlan); - -/* - * storm-ctl SET vlan profile - * storm-ctl DELETE vlan - */ -static int storm_ctl_set_intf_vlan_cfg(bool set, struct ifnet *ifp, - const char *vlan_str, - const char *profile_name) -{ - uint16_t vlan; - struct storm_ctl_profile *profile = NULL; - struct if_storm_ctl_info *sc_info; - int rv; - - vlan = atoi(vlan_str); - sc_info = ifp->sc_info; - if (set) { - profile = storm_ctl_find_profile(profile_name); - if (!profile) { - profile = storm_ctl_add_profile(profile_name); - if (!profile) { - RTE_LOG(ERR, STORM_CTL, - "Could not create storm control profile %s\n", - profile_name); - return -ENOMEM; - } - } - - rv = storm_ctl_set_profile_on_intf(set, ifp, profile, vlan); - if (rv) { - RTE_LOG(ERR, STORM_CTL, - "Could not update (%s, %d) with profile %s\n", - ifp->if_name, vlan, profile->scp_name); - return rv; - } - return 0; - } - - if (!sc_info) - return -ENOENT; - - rv = storm_ctl_set_profile_on_intf(set, ifp, NULL, vlan); - if (rv) { - RTE_LOG(ERR, STORM_CTL, - "Could not remove profile from (%s, %d)\n", - ifp->if_name, vlan); - return rv; - } - return 0; -} - -int storm_ctl_set_profile_on_intf(bool set, struct ifnet *ifp, - struct storm_ctl_profile *profile, - uint16_t vlan) +static int storm_ctl_set_profile_on_intf(bool set, struct ifnet *ifp, + struct storm_ctl_profile *profile, + uint16_t vlan) { struct storm_ctl_instance *instance; enum fal_traffic_type i; @@ -1343,11 +1292,8 @@ int storm_ctl_set_profile_on_intf(bool set, struct ifnet *ifp, if (storm_ctl_fal_update_needed( &profile->scp_policies[i], &instance->sci_policy[i])) { - - if (!vlan || - (ifp->if_brport && - bridge_port_is_vlan_member(ifp->if_brport, - vlan))) + if (storm_control_can_create_in_fal( + instance->sci_ifp, vlan)) fal_policer_apply_profile(profile, vlan, instance, i); instance->sci_policy[i] = @@ -1385,6 +1331,56 @@ int storm_ctl_set_profile_on_intf(bool set, struct ifnet *ifp, return rv; } +/* + * storm-ctl SET vlan profile + * storm-ctl DELETE vlan + */ +static int storm_ctl_set_intf_vlan_cfg(bool set, struct ifnet *ifp, + const char *vlan_str, + const char *profile_name) +{ + uint16_t vlan; + struct storm_ctl_profile *profile = NULL; + struct if_storm_ctl_info *sc_info; + int rv; + + vlan = atoi(vlan_str); + sc_info = ifp->sc_info; + if (set) { + profile = storm_ctl_find_profile(profile_name); + if (!profile) { + profile = storm_ctl_add_profile(profile_name); + if (!profile) { + RTE_LOG(ERR, STORM_CTL, + "Could not create storm control profile %s\n", + profile_name); + return -ENOMEM; + } + } + + rv = storm_ctl_set_profile_on_intf(set, ifp, profile, vlan); + if (rv) { + RTE_LOG(ERR, STORM_CTL, + "Could not update (%s, %d) with profile %s\n", + ifp->if_name, vlan, profile->scp_name); + return rv; + } + return 0; + } + + if (!sc_info) + return -ENOENT; + + rv = storm_ctl_set_profile_on_intf(set, ifp, NULL, vlan); + if (rv) { + RTE_LOG(ERR, STORM_CTL, + "Could not remove profile from (%s, %d)\n", + ifp->if_name, vlan); + return rv; + } + return 0; +} + /* * storm-ctl profile * storm-ctl profile @@ -1394,28 +1390,23 @@ static int storm_ctl_set_intf_cfg(bool set, FILE *f, int argc, char **argv) char *ifname; struct ifnet *ifp = NULL; struct storm_ctl_profile *profile; - int rv = 0; + int rv = 0, vlan; if (argc < 4) goto error; ifname = argv[2]; - ifp = ifnet_byifname(ifname); + ifp = dp_ifnet_byifname(ifname); if (!ifp) { - if (!cfg_list_storm && storm_ctl_replay_init()) { - RTE_LOG(ERR, DATAPLANE, - "Could not set up command replay cache\n"); - return -ENOMEM; - } - - RTE_LOG(INFO, DATAPLANE, - "Caching storm-ctl command for interface %s\n", + RTE_LOG(ERR, DATAPLANE, + "Storm control applied, but interface missing %s\n", ifname); - cfg_if_list_add(cfg_list_storm, ifname, argc, argv); - return 0; + return -1; } - if (ifp->if_type != IFT_ETHER) { + vlan = ifp->if_vlan; + + if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN) { fprintf(f, "storm-ctl command not supported on %s", ifp->if_name); return -1; @@ -1451,20 +1442,20 @@ static int storm_ctl_set_intf_cfg(bool set, FILE *f, int argc, char **argv) } } return storm_ctl_set_profile_on_intf(set, ifp, - profile, 0); - } else { - storm_ctl_set_profile_on_intf(set, ifp, NULL, 0); - goto check_ifp_sc; + profile, vlan); } + storm_ctl_set_profile_on_intf(set, ifp, NULL, vlan); + goto check_ifp_sc; } if (!strcmp(argv[3], "vlan")) { if (set && argc == 7) { - if (strcmp(argv[5], "profile")) + if (strcmp(argv[5], "profile") != 0) goto error; return storm_ctl_set_intf_vlan_cfg(set, ifp, argv[4], argv[6]); - } else if (!set && argc == 5) { + } + if (!set && argc == 5) { storm_ctl_set_intf_vlan_cfg(set, ifp, argv[4], argv[6]); goto check_ifp_sc; } else { @@ -1530,13 +1521,13 @@ int cmd_storm_ctl_cfg(FILE *f, int argc, char **argv) if (!strcmp(argv[2], "detection-interval")) return storm_ctl_set_detection_interval(set, f, argc, argv); - else if (!strcmp(argv[2], "notification")) { + if (!strcmp(argv[2], "notification")) { storm_ctl_set_notification(set); return 0; - } else if (!strcmp(argv[2], "profile")) { + } + if (!strcmp(argv[2], "profile")) return storm_ctl_set_profile(set, f, argc, argv); - } else - return storm_ctl_set_intf_cfg(set, f, argc, argv); + return storm_ctl_set_intf_cfg(set, f, argc, argv); error: fprintf(f, "Usage: storm-ctl < cmd | ifname >"); @@ -1547,7 +1538,7 @@ static struct ifnet *storm_ctl_intf_check(char *ifname, FILE *f) { struct ifnet *ifp; - ifp = ifnet_byifname(ifname); + ifp = dp_ifnet_byifname(ifname); if (!ifp) { fprintf(f, "Could not find interface %s\n", ifname); @@ -1588,10 +1579,15 @@ static void storm_ctl_show_instance(json_writer_t *wr, [FAL_POLICER_STAT_RED_PACKETS] = "pkts_dropped", [FAL_POLICER_STAT_RED_BYTES] = "bytes_dropped" }; - enum fal_policer_stat_type cntr_ids[FAL_POLICER_STAT_MAX], j; - - for (j = 0; j < FAL_POLICER_STAT_MAX; j++) - cntr_ids[j] = j; + enum fal_policer_stat_type cntr_ids[] = { + FAL_POLICER_STAT_GREEN_PACKETS, + FAL_POLICER_STAT_GREEN_BYTES, + FAL_POLICER_STAT_RED_PACKETS, + FAL_POLICER_STAT_RED_BYTES + }; + uint32_t num_stats = ARRAY_SIZE(cntr_ids); + enum fal_policer_stat_type j; + fal_object_t fal_obj; jsonw_start_object(wr); jsonw_string_field(wr, "profile", @@ -1611,14 +1607,17 @@ static void storm_ctl_show_instance(json_writer_t *wr, jsonw_uint_field(wr, "max_rate_kbps", max_rate); jsonw_uint_field(wr, "burst_kbps", burst_rate); - if (instance->sci_fal_obj[i]) - fal_policer_dump(instance->sci_fal_obj[i], wr); + fal_obj = CMM_LOAD_SHARED(instance->sci_fal_obj[i]); + if (fal_obj) + fal_policer_dump(fal_obj, wr); memset(cntrs, 0, sizeof(cntrs)); - fal_policer_get_sc_stats(instance, cntr_ids, cntrs, i); + fal_policer_get_sc_stats(instance, num_stats, cntr_ids, + cntrs, i); - for (j = 0; j < FAL_POLICER_STAT_MAX; j++) - jsonw_uint_field(wr, fal_stat_strs[j], cntrs[j]); + for (j = 0; j < num_stats; j++) + jsonw_uint_field(wr, fal_stat_strs[cntr_ids[j]], + cntrs[j]); jsonw_end_object(wr); } jsonw_end_object(wr); @@ -1765,7 +1764,7 @@ static int cmd_storm_ctl_show(FILE *f, int argc, char **argv) if (ifp) storm_ctl_show_intf(ifp, wr); else - ifnet_walk(storm_ctl_show_intf, wr); + dp_ifnet_walk(storm_ctl_show_intf, wr); jsonw_end_array(wr); jsonw_end_object(wr); jsonw_destroy(&wr); @@ -1779,13 +1778,17 @@ static int cmd_storm_ctl_show(FILE *f, int argc, char **argv) static void storm_ctl_clear_intf_stats(struct ifnet *ifp, void *ctx __unused) { int i, rc; - enum fal_policer_stat_type cntr_ids[FAL_POLICER_STAT_MAX]; + enum fal_policer_stat_type cntr_ids[] = { + FAL_POLICER_STAT_GREEN_PACKETS, + FAL_POLICER_STAT_GREEN_BYTES, + FAL_POLICER_STAT_RED_PACKETS, + FAL_POLICER_STAT_RED_BYTES + }; + uint32_t num_stats = ARRAY_SIZE(cntr_ids); struct if_storm_ctl_info *sc_info; struct storm_ctl_instance *instance; struct cds_lfht_iter iter; - - for (i = 0; i < FAL_POLICER_STAT_MAX; i++) - cntr_ids[i] = i; + fal_object_t fal_obj; sc_info = rcu_dereference(ifp->sc_info); if (!sc_info) @@ -1797,11 +1800,12 @@ static void storm_ctl_clear_intf_stats(struct ifnet *ifp, void *ctx __unused) memset(instance->sci_pkt_drops, 0, sizeof(instance->sci_pkt_drops)); for (i = 0; i < FAL_TRAFFIC_MAX; i++) { - if (!instance->sci_fal_obj[i]) + fal_obj = CMM_LOAD_SHARED(instance->sci_fal_obj[i]); + if (!fal_obj) continue; - rc = fal_policer_clear_stats(instance->sci_fal_obj[i], - FAL_POLICER_STAT_MAX, + rc = fal_policer_clear_stats(fal_obj, + num_stats, cntr_ids); if (rc) { RTE_LOG(ERR, DATAPLANE, @@ -1820,11 +1824,11 @@ static int cmd_storm_ctl_clear(FILE *f, int argc, char **argv) if (argc < 3) goto error; - if (strcmp(argv[2], "stats")) + if (strcmp(argv[2], "stats") != 0) goto error; if (argc == 4) { - ifp = ifnet_byifname(argv[3]); + ifp = dp_ifnet_byifname(argv[3]); if (!ifp) { fprintf(f, "Could not find interface %s", argv[3]); @@ -1835,7 +1839,7 @@ static int cmd_storm_ctl_clear(FILE *f, int argc, char **argv) if (ifp) storm_ctl_clear_intf_stats(ifp, NULL); else - ifnet_walk(storm_ctl_clear_intf_stats, NULL); + dp_ifnet_walk(storm_ctl_clear_intf_stats, NULL); return 0; @@ -1886,6 +1890,10 @@ storm_ctl_if_vlan_add(struct ifnet *ifp, if (!instance) return; + if (!storm_control_can_create_in_fal( + instance->sci_ifp, instance->sci_vlan)) + return; + /* Apply rates from the profile */ for (int i = FAL_TRAFFIC_UCAST; i < FAL_TRAFFIC_MAX; i++) { fal_policer_apply_profile(instance->sci_profile, @@ -1908,38 +1916,112 @@ storm_ctl_if_vlan_del(struct ifnet *ifp, /* Apply rates from the profile */ for (int i = FAL_TRAFFIC_UCAST; i < FAL_TRAFFIC_MAX; i++) { - fal_policer_unapply_profile(instance->sci_ifp, vlan, - instance, i); + if (instance->sci_fal_obj[i]) + fal_policer_unapply_profile(instance->sci_ifp, vlan, + instance, i); } } static void -storm_ctl_if_index_pre_unset(struct ifnet *ifp) +storm_ctl_if_fal_apply(struct ifnet *ifp) { struct storm_ctl_instance *instance; struct cds_lfht_iter iter; - if (!ifp->sc_info || !ifp->sc_info->sc_instance_tbl) - return; + DP_DEBUG(STORM_CTL, DEBUG, DATAPLANE, + "trigger FAL apply storm control to interface %s\n", + ifp->if_name); cds_lfht_for_each_entry(ifp->sc_info->sc_instance_tbl, &iter, instance, sci_node) { + if (instance->sci_vlan && + (!ifp->if_brport || + !bridge_port_is_vlan_member(ifp->if_brport, + instance->sci_vlan))) + continue; for (int i = FAL_TRAFFIC_UCAST; i < FAL_TRAFFIC_MAX; i++) { - fal_policer_unapply_profile(instance->sci_ifp, - instance->sci_vlan, - instance, i); + if (instance->sci_fal_obj[i]) + continue; + fal_policer_apply_profile(instance->sci_profile, + instance->sci_vlan, + instance, i); + } + } +} + +static void +storm_ctl_if_fal_unapply(struct ifnet *ifp) +{ + struct storm_ctl_instance *instance; + struct cds_lfht_iter iter; + + DP_DEBUG(STORM_CTL, DEBUG, DATAPLANE, + "trigger FAL unapply storm control to interface %s\n", + ifp->if_name); + + cds_lfht_for_each_entry(ifp->sc_info->sc_instance_tbl, &iter, + instance, sci_node) { + for (int i = FAL_TRAFFIC_UCAST; i < FAL_TRAFFIC_MAX; i++) { + if (instance->sci_fal_obj[i]) + fal_policer_unapply_profile(instance->sci_ifp, + instance->sci_vlan, + instance, i); } + } +} + +static void +storm_ctl_if_del(struct ifnet *ifp) +{ + struct storm_ctl_instance *instance; + struct cds_lfht_iter iter; + + cds_lfht_for_each_entry(ifp->sc_info->sc_instance_tbl, &iter, + instance, sci_node) { storm_ctl_del_instance_internal(ifp->sc_info->sc_instance_tbl, instance); } storm_ctl_del_ctx(ifp); } +static void +storm_ctl_if_feat_mode_change(struct ifnet *ifp, + enum if_feat_mode_event event) +{ + if (!ifp->sc_info || !ifp->sc_info->sc_instance_tbl) + /* nothing to do */ + return; + + switch (event) { + case IF_FEAT_MODE_EVENT_L2_FAL_ENABLED: + if (storm_control_can_create_in_fal(ifp, 0)) + storm_ctl_if_fal_apply(ifp); + break; + case IF_FEAT_MODE_EVENT_L2_FAL_DISABLED: + storm_ctl_if_fal_unapply(ifp); + break; + case IF_FEAT_MODE_EVENT_EMB_FEAT_CHANGED: + if (storm_control_can_create_in_fal(ifp, 0)) + storm_ctl_if_fal_apply(ifp); + else + storm_ctl_if_fal_unapply(ifp); + break; + case IF_FEAT_MODE_EVENT_L2_DELETED: + DP_DEBUG(STORM_CTL, DEBUG, DATAPLANE, + "trigger storm control delete for interface %s\n", + ifp->if_name); + storm_ctl_if_del(ifp); + break; + default: + break; + } +} + static const struct dp_event_ops storm_ctl_events = { .if_link_change = storm_ctl_if_link_change, .if_vlan_add = storm_ctl_if_vlan_add, .if_vlan_del = storm_ctl_if_vlan_del, - .if_index_pre_unset = storm_ctl_if_index_pre_unset, + .if_feat_mode_change = storm_ctl_if_feat_mode_change, }; DP_STARTUP_EVENT_REGISTER(storm_ctl_events); diff --git a/src/switchport.c b/src/switchport.c index 60c9f085..06a13876 100644 --- a/src/switchport.c +++ b/src/switchport.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018-2019, AT&T Intellectual Property. + * Copyright (c) 2018-2020, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -8,79 +8,17 @@ */ #include -#include "bridge.h" #include "control.h" #include "fal.h" #include "if_var.h" #include #include "vplane_log.h" #include -#include "dp_event.h" #include "commands.h" -struct cfg_if_list *cfg_list; - -static void -switchport_event_if_index_set(struct ifnet *ifp, uint32_t ifindex); -static void -switchport_event_if_index_unset(struct ifnet *ifp, uint32_t ifindex); - -static const struct dp_event_ops switchport_event_ops = { - .if_index_set = switchport_event_if_index_set, - .if_index_unset = switchport_event_if_index_unset, -}; - -static void -switchport_event_if_index_set(struct ifnet *ifp, uint32_t ifindex __unused) -{ - struct cfg_if_list_entry *le; - - if (!cfg_list) - return; - - le = cfg_if_list_lookup(cfg_list, ifp->if_name); - if (!le) - return; - - RTE_LOG(INFO, DATAPLANE, - "Replaying switchport command %s for interface %s\n", - le->le_buf, ifp->if_name); - cmd_switchport(NULL, le->le_argc, le->le_argv); - cfg_if_list_del(cfg_list, ifp->if_name); - if (!cfg_list->if_list_count) { - cfg_if_list_destroy(&cfg_list); - dp_event_unregister(&switchport_event_ops); - } -} - -static void -switchport_event_if_index_unset(struct ifnet *ifp, uint32_t ifindex __unused) -{ - if (!cfg_list) - return; - - cfg_if_list_del(cfg_list, ifp->if_name); - if (!cfg_list->if_list_count) { - cfg_if_list_destroy(&cfg_list); - dp_event_unregister(&switchport_event_ops); - } -} - -static int switchport_replay_init(void) -{ - if (!cfg_list) { - cfg_list = cfg_if_list_create(); - if (!cfg_list) - return -ENOMEM; - } - dp_event_register(&switchport_event_ops); - return 0; -} - int cmd_switchport(FILE *f, int argc, char **argv) { struct ifnet *ifp; - struct fal_attribute_t attr; if (argc != 4) { if (f) { @@ -93,36 +31,26 @@ int cmd_switchport(FILE *f, int argc, char **argv) return -EINVAL; } - ifp = ifnet_byifname(argv[1]); + ifp = dp_ifnet_byifname(argv[1]); if (!ifp) { - if (!cfg_list && switchport_replay_init()) { - RTE_LOG(ERR, DATAPLANE, - "Could not set up command replay cache\n"); - return -ENOMEM; - } - RTE_LOG(INFO, DATAPLANE, - "Caching switchport command for interface %s\n", - argv[1]); - cfg_if_list_add(cfg_list, argv[1], argc, argv); - return 0; + "switchport command but interface missing %s\n", + argv[1]); + fprintf(f, "%s: failed to find %s\n", __func__, argv[1]); + return -EINVAL; } if (!strcmp(argv[2], "hw-switching")) { - if (!strcmp(argv[3], "enable")) { - attr.value.u8 = FAL_PORT_HW_SWITCHING_ENABLE; + if (!strcmp(argv[3], "enable")) ifp->hw_forwarding = true; - } else if (!strcmp(argv[3], "disable")) { - attr.value.u8 = FAL_PORT_HW_SWITCHING_DISABLE; + else if (!strcmp(argv[3], "disable")) ifp->hw_forwarding = false; - } else + else return -EINVAL; - attr.id = FAL_PORT_ATTR_HW_SWITCH_MODE; - fal_l2_upd_port(ifp->if_index, &attr); - dp_event(DP_EVT_IF_HW_SWITCHING_CHANGE, 0, ifp, - ifp->hw_forwarding, 0, NULL); - /* TODO move bridge code to using event */ - bridge_upd_hw_forwarding(ifp); + if_change_features_mode(ifp, + ifp->hw_forwarding ? + IF_FEAT_MODE_FLAG_L2_FAL_ENABLE : + IF_FEAT_MODE_FLAG_L2_FAL_DISABLE); return 0; } diff --git a/src/team.c b/src/team.c index aec90546..13757e1c 100644 --- a/src/team.c +++ b/src/team.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -26,8 +26,8 @@ struct nlmsghdr; #include struct team_port_info { - struct ifnet *ifp_master; - struct ifnet *ifp_slave; + struct ifnet *ifp_bond; + struct ifnet *ifp_member; uint32_t ifindex; uint32_t port_ifindex; int changed; @@ -38,8 +38,8 @@ struct team_port_info { }; struct team_option_info { - struct ifnet *ifp_master; - struct ifnet *ifp_slave; + struct ifnet *ifp_bond; + struct ifnet *ifp_member; uint32_t ifindex; uint32_t changed; union { @@ -111,9 +111,9 @@ static int process_team_ports(const struct team_port_info *info) return MNL_CB_OK; if (info->removed) - rv = lag_slave_delete(info->ifp_master, info->ifp_slave); + rv = lag_member_delete(info->ifp_bond, info->ifp_member); else - rv = lag_slave_add(info->ifp_master, info->ifp_slave); + rv = lag_member_add(info->ifp_bond, info->ifp_member); DP_DEBUG(LAG, INFO, DATAPLANE, "team %s %u %u %s%s\n", info->removed ? "remove" : "add", @@ -146,28 +146,28 @@ static int process_team_portlist(const struct nlmsghdr *nlh) if (ret != MNL_CB_OK) return ret; - info.ifp_master = ifnet_byteam(info.ifindex); - if (info.ifp_master == NULL) { + info.ifp_bond = ifnet_byteam(info.ifindex); + if (info.ifp_bond == NULL) { DP_DEBUG(LAG, ERR, DATAPLANE, - "team unable to find master for slave ifindex %d\n", + "team unable to find team for member ifindex %d\n", info.port_ifindex); return MNL_CB_OK; } if (info.port_ifindex) { - info.ifp_slave = ifnet_byifindex(info.port_ifindex); + info.ifp_member = dp_ifnet_byifindex(info.port_ifindex); - if (info.ifp_slave == NULL) { + if (info.ifp_member == NULL) { DP_DEBUG(LAG, ERR, DATAPLANE, - "team unable to find slave ifindex %d\n", + "team unable to find member ifindex %d\n", info.port_ifindex); return MNL_CB_OK; } - if (info.ifp_slave->aggregator != NULL && - info.ifp_slave->aggregator != info.ifp_master) { + if (info.ifp_member->aggregator != NULL && + info.ifp_member->aggregator != info.ifp_bond) { DP_DEBUG(LAG, ERR, DATAPLANE, - "team slave ifindex %d unexpected master\n", + "team member ifindex %d unexpected team\n", info.port_ifindex); return MNL_CB_OK; } @@ -274,13 +274,13 @@ static int team_option_list(const struct nlattr *attr, void *data) static int process_team_options(const struct team_option_info *info) { if (!strcmp(info->name, "enabled")) { - lag_select(info->ifp_slave, info->data.u32); - lag_slave_sync_mac_address(info->ifp_slave); + lag_select(info->ifp_member, info->data.u32); + lag_member_sync_mac_address(info->ifp_member); } else if (!strcmp(info->name, "mode")) { if (!strcmp(info->data.str, "activebackup")) - lag_mode_set_activebackup(info->ifp_master); + lag_mode_set_activebackup(info->ifp_bond); else if (!strcmp(info->data.str, "loadbalance")) - lag_mode_set_balance(info->ifp_master); + lag_mode_set_balance(info->ifp_bond); else { DP_DEBUG(LAG, ERR, DATAPLANE, "team unknown mode \"%s\"\n", info->data.str); @@ -294,10 +294,10 @@ static int process_team_options(const struct team_option_info *info) /* future work */ return MNL_CB_OK; else if (!strcmp(info->name, "activeport")) { - struct ifnet *ifp_slave = ifnet_byifindex(info->data.u32); + struct ifnet *ifp_member = dp_ifnet_byifindex(info->data.u32); - if (ifp_slave) - lag_activeport(info->ifp_master, ifp_slave); + if (ifp_member) + lag_set_activeport(info->ifp_bond, ifp_member); else DP_DEBUG(LAG, ERR, DATAPLANE, "team cannot find activeport ifindex %u\n", @@ -332,19 +332,19 @@ static int process_team_optionlist(const struct nlmsghdr *nlh) if (ret != MNL_CB_OK) return ret; - info.ifp_master = ifnet_byteam(info.ifindex); - if (info.ifp_master == NULL) { + info.ifp_bond = ifnet_byteam(info.ifindex); + if (info.ifp_bond == NULL) { DP_DEBUG(LAG, ERR, DATAPLANE, - "team cannot find master ifindex %d\n", info.ifindex); + "team cannot find team ifindex %d\n", info.ifindex); return MNL_CB_OK; } if (info.port_ifindex) { - info.ifp_slave = ifnet_byifindex(info.port_ifindex); - if (!info.ifp_slave || - info.ifp_slave->aggregator != info.ifp_master) { + info.ifp_member = dp_ifnet_byifindex(info.port_ifindex); + if (!info.ifp_member || + info.ifp_member->aggregator != info.ifp_bond) { DP_DEBUG(LAG, ERR, DATAPLANE, - "team master changed for slave ifindex %d\n", + "team team changed for member ifindex %d\n", info.port_ifindex); return MNL_CB_OK; } diff --git a/src/transceiver.h b/src/transceiver.h index 08e38f80..ed2a08e7 100644 --- a/src/transceiver.h +++ b/src/transceiver.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018, AT&T Intellectual Property. + * Copyright (c) 2018-2019, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only diff --git a/src/udp_handler.c b/src/udp_handler.c index 8dd524a0..10b2179a 100644 --- a/src/udp_handler.c +++ b/src/udp_handler.c @@ -3,7 +3,7 @@ * * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. - * Copyright (c) 2017, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017,2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -20,7 +20,7 @@ #include "compiler.h" #include "l2tp/l2tpeth.h" -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "snmp_mib.h" #include "udp_handler.h" @@ -47,14 +47,14 @@ int udp_handler_lookup(short af, uint32_t dest_port, udp_port_handler *handler) (void **)handler); } -/* Not thread safe, must be called on master thread */ +/* Not thread safe, must be called on main thread */ int udp_handler_register(short af, uint32_t dest_port, udp_port_handler handler) { return rte_hash_add_key_data(af == AF_INET ? ipv4_udp_table : ipv6_udp_table, &dest_port, handler); } -/* Not thread safe, must be called on master thread */ +/* Not thread safe, must be called on main thread */ void udp_handler_unregister(short af, uint32_t dest_port) { rte_hash_del_key(af == AF_INET ? ipv4_udp_table : ipv6_udp_table, @@ -110,18 +110,17 @@ int udp_input(struct rte_mbuf *m, int af, struct ifnet *input_ifp) return -1; } - udp = pktmbuf_mtol4(m, struct udphdr *); + udp = dp_pktmbuf_mtol4(m, struct udphdr *); if ((udp_handler_lookup(af, udp->dest, &handler) >= 0) && - !handler(m, pktmbuf_mtol3(m, void *), udp, input_ifp)) { + !handler(m, dp_pktmbuf_mtol3(m, void *), udp, input_ifp)) { UDPSTAT_INC(UDP_MIB_INDATAGRAMS); return 0; } if (af == AF_INET) return l2tp_udpv4_recv_encap( - m, pktmbuf_mtol3(m, const void *), udp); - else - return l2tp_udpv6_recv_encap( - m, pktmbuf_mtol3(m, const void *), udp); + m, dp_pktmbuf_mtol3(m, const void *), udp); + return l2tp_udpv6_recv_encap( + m, dp_pktmbuf_mtol3(m, const void *), udp); } diff --git a/src/udp_handler.h b/src/udp_handler.h index e0082739..2845fcd1 100644 --- a/src/udp_handler.h +++ b/src/udp_handler.h @@ -1,7 +1,7 @@ /* * Handlers for well known UDP ports, registered at init time. * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -14,9 +14,8 @@ #include #include -#include "if_var.h" +#include "interface.h" -struct ifnet; struct rte_mbuf; struct udphdr; diff --git a/src/util.c b/src/util.c index e0072b99..5dc7737e 100644 --- a/src/util.c +++ b/src/util.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -25,7 +25,11 @@ #include #include #include +#include #include +#include +#include +#include #include "bitmask.h" #include "urcu.h" @@ -143,13 +147,14 @@ static int __net_ratelimit(uint64_t now) printed = 0; epoch = 0; return 1; - } else if (printed < 10) { + } + if (printed < 10) { ++printed; return 1; - } else { - ++missed; - return 0; } + + ++missed; + return 0; } @@ -174,10 +179,10 @@ static unsigned int xdigit2val(unsigned char c) { if (isdigit(c)) return c - '0'; - else if (isupper(c)) + if (isupper(c)) return c - 'A' + 10; - else - return c - 'a' + 10; + + return c - 'a' + 10; } @@ -316,6 +321,19 @@ int str_unsplit(char *buf, size_t n, int argc, char **argv) return 0; } +/* Pop next argument from list */ +char *next_arg(int *argcp, char ***argvp) +{ + char *arg = NULL; + + if (*argcp > 0) { + arg = *argvp[0]; + *argcp -= 1; + *argvp += 1; + } + return arg; +} + /* Like snprintf but concatinates to existing string */ size_t snprintfcat(char *buf, size_t size, const char *fmt, ...) { @@ -333,6 +351,25 @@ size_t snprintfcat(char *buf, size_t size, const char *fmt, ...) * returns 0 on success, -errno on error */ int get_unsigned(const char *str, unsigned int *ptr) +{ + int ret; + unsigned long val = 0; + + ret = get_unsigned_long(str, &val); + if (ret) + return ret; + + if (val > UINT_MAX) + return -ERANGE; + + *ptr = val; + return 0; +} + +/* convert string to unsigned value. + * returns 0 on success, -errno on error + */ +int get_unsigned_long(const char *str, unsigned long *ptr) { char *endp = NULL; unsigned long val; @@ -343,8 +380,6 @@ int get_unsigned(const char *str, unsigned int *ptr) return -EINVAL; if (val == ULONG_MAX && errno == ERANGE) return -ERANGE; - if (val > UINT_MAX) - return -ERANGE; *ptr = val; return 0; @@ -377,7 +412,7 @@ int get_signed(const char *str, int *ptr) int get_unsigned_short(const char *str, unsigned short *ptr) { int result; - unsigned int val; + unsigned int val = 0; result = get_unsigned(str, &val); if (result < 0) @@ -396,7 +431,7 @@ int get_unsigned_short(const char *str, unsigned short *ptr) int get_unsigned_char(const char *str, unsigned char *ptr) { int result; - unsigned int val; + unsigned int val = 0; result = get_unsigned(str, &val); if (result < 0) @@ -451,15 +486,34 @@ int get_bool(const char *str, bool *ptr) return 0; } +/* convert string to float value. + * returns 0 on success, -errno on error + */ +float get_float(const char *str, float *ptr) +{ + char *endp = NULL; + float val; + + errno = 0; + val = strtof(str, &endp); + if (*str == '\0' || !endp || *endp) + return -EINVAL; + if (errno == ERANGE) + return -ERANGE; + + *ptr = val; + return 0; +} + static unsigned char xdigit(int c) { if (isdigit(c)) return c - '0'; - else if (isupper(c)) + if (isupper(c)) return c - 'A' + 10; - else - return c - 'a' + 10; + return c - 'a' + 10; } + /* * Parse bitmask expressed as hex * needs to handle up to RTE_MAX_LCORE and RTE_MAX_ETHPORTS bits (ie 128) @@ -493,6 +547,27 @@ int bitmask_parse(bitmask_t *msk, const char *str) return 0; } +int bitmask_parse_bytes(bitmask_t *mask, const uint8_t *bytes, uint8_t len) +{ + unsigned int offs; + int i; + + if (len > BITMASK_BYTESZ) + return -EINVAL; + + bitmask_zero(mask); + + for (i = len - 1, offs = 0; i >= 0; --i, ++offs) { + uint64_t val; + + val = bytes[i]; + val <<= (8 * (offs % (UINT64_BIT / 8))); + mask->_bits[offs / (UINT64_BIT / 8)] |= val; + } + + return 0; +} + /* Print out bitmask as long hex string. */ void bitmask_sprint(const bitmask_t *msk, char *buf, size_t sz) { @@ -569,57 +644,9 @@ int defer_rcu_huge(void *ptr, size_t sz) return 0; } -static struct rte_timer dp_ht_defer_timer = RTE_TIMER_INITIALIZER; -struct cds_list_head dp_ht_defer_list = CDS_LIST_HEAD_INIT(dp_ht_defer_list); -static rte_spinlock_t dp_ht_defer_lock = RTE_SPINLOCK_INITIALIZER; - -struct dp_ht_defer_entry { - struct cds_list_head list; - struct cds_lfht *table; -}; - -static void dp_ht_destroy_event(struct rte_timer *tim __rte_unused, - void *arg __rte_unused) -{ - struct dp_ht_defer_entry *entry; - struct dp_ht_defer_entry *next; - - rte_spinlock_lock(&dp_ht_defer_lock); - cds_list_for_each_entry_safe(entry, next, &dp_ht_defer_list, list) { - if (!cds_lfht_destroy(entry->table, NULL)) - RTE_LOG(ERR, DATAPLANE, - "hash table could not be deleted"); - cds_list_del(&entry->list); - free(entry); - } - rte_spinlock_unlock(&dp_ht_defer_lock); -} - -/* - * We can't call cds_lfht_destroy from a call_rcu thread. In that case - * we can call this func which will queue it and set a timer event so that - * the master thread can destroy it. - */ void dp_ht_destroy_deferred(struct cds_lfht *table) { - struct dp_ht_defer_entry *new; - - new = malloc(sizeof(*new)); - if (!new) { - RTE_LOG(ERR, DATAPLANE, - "No mem to store hash table for later destruction"); - return; - } - - new->table = table; - rte_spinlock_lock(&dp_ht_defer_lock); - cds_list_add_tail(&new->list, &dp_ht_defer_list); - rte_spinlock_unlock(&dp_ht_defer_lock); - - /* fire the timer immediately on master */ - rte_timer_reset(&dp_ht_defer_timer, 0, SINGLE, - rte_get_master_lcore(), dp_ht_destroy_event, NULL); - + cds_lfht_destroy(table, NULL); } static inline bool is_switch_driver(const char *driver_name) @@ -642,7 +669,7 @@ bool get_switch_dev_info(const char *drv_name, const char *drv_dev_name, return false; drv_len = strlen(drv_name); - if (strncmp(drv_dev_name, drv_name, drv_len)) + if (strncmp(drv_dev_name, drv_name, drv_len) != 0) return false; /* strip driver prefix + swX from name */ @@ -660,3 +687,77 @@ bool get_switch_dev_info(const char *drv_name, const char *drv_dev_name, return true; } + +/* + * Add or remove a flag from the effective capability set. + * Note the flag must already be present in the permitted set. + */ +int +change_capability(cap_value_t capability, bool on) +{ + cap_t caps; + cap_value_t cap_flag[1]; + int rc; + + if (!cap_valid(capability)) { + RTE_LOG(ERR, DATAPLANE, + "Invalid capability %d\n", capability); + return -1; + } + + caps = cap_get_proc(); + if (caps == NULL) { + RTE_LOG(ERR, DATAPLANE, + "Failed to get current capabilities\n"); + return -1; + } + + cap_flag[0] = capability; + rc = cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_flag, + on ? CAP_SET : CAP_CLEAR); + if (rc < 0) { + RTE_LOG(ERR, DATAPLANE, + "Failed to %s flag for capability %d\n", + on ? "set" : "clear", capability); + goto out; + } + + rc = cap_set_proc(caps); + if (rc < 0) + RTE_LOG(ERR, DATAPLANE, + "Failed to %s capability %d\n", + on ? "enable" : "disable", capability); + +out: + cap_free(caps); + return rc; +} + +/* + * There is no wrapper for this function. The value returned by + * gettid is the thread id and this is not the same as the pid + * or the POSIX thread id. It represents the value used by + * the kernel's native thread implementation. + */ +static unsigned long gettid(void) +{ + return syscall(SYS_gettid); +} + +/* Change the nice value of the current thread (not pthread) */ +void renice(int value) +{ + int rc; + + if (change_capability(CAP_SYS_NICE, true) == 0) { + rc = setpriority(PRIO_PROCESS, gettid(), value); + if (rc < 0) + RTE_LOG(ERR, DATAPLANE, + "%s: failed to set thread priority: %s\n", + __func__, strerror(errno)); + change_capability(CAP_SYS_NICE, false); + } else + RTE_LOG(ERR, DATAPLANE, + "%s: failed to set CAP_SYS_NICE: %s\n", + __func__, strerror(errno)); +} diff --git a/src/util.h b/src/util.h index 7b0d3fed..73c1c2e3 100644 --- a/src/util.h +++ b/src/util.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -26,13 +26,16 @@ #include #include #include +#include #include #include "compiler.h" -#include "urcu.h" +#include "lcore_sched.h" +#include "soft_ticks.h" +#include "vrf.h" struct cds_lfht; -struct ether_addr; +struct rte_ether_addr; #ifndef ARRAY_SIZE #define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) @@ -44,18 +47,20 @@ struct ether_addr; #define US_PER_MS 1000u #define S_PER_DAY 86400u #define USEC_PER_SEC 1000000u +#define MSEC_PER_SEC 1000 +#define USEC_PER_MSEC 1000 #define NSEC_PER_USEC 1000 #ifndef _NETINET_ETHER_H /* Convert 48 bit Ethernet ADDRess to ASCII. */ -char *ether_ntoa(const struct ether_addr *__addr); -char *ether_ntoa_r(const struct ether_addr *__addr, char *__buf); +char *ether_ntoa(const struct rte_ether_addr *__addr); +char *ether_ntoa_r(const struct rte_ether_addr *__addr, char *__buf); /* Convert ASCII string S to 48 bit Ethernet address. */ -struct ether_addr *ether_aton(const char *__asc); -struct ether_addr *ether_aton_r(const char *__asc, - struct ether_addr *__addr); +struct rte_ether_addr *ether_aton(const char *__asc); +struct rte_ether_addr *ether_aton_r(const char *__asc, + struct rte_ether_addr *__addr); #endif /* @@ -64,9 +69,6 @@ struct ether_addr *ether_aton_r(const char *__asc, */ #define barrier() asm volatile("" : : : "memory") -/* vrfid type */ -typedef uint32_t vrfid_t; - struct free_huge_info { void *ptr; size_t sz; @@ -124,24 +126,20 @@ static inline void set_bit_32(uint32_t *field32, uint8_t bit_num) *field32 |= (1U << bit_num); } -/* Like rte_lcore_id() - * but for all non-dataplane threads returns 0 instead of LCORE_ID_ANY - */ -RTE_DECLARE_PER_LCORE(unsigned int, _dp_lcore_id); -static ALWAYS_INLINE -unsigned int dp_lcore_id(void) -{ - return RTE_PER_LCORE(_dp_lcore_id); -} - /* Iterate each lcore id that dp_lcore_id could return */ #define FOREACH_DP_LCORE(_i) \ for ((_i) = 0; (_i) <= get_lcore_max(); (_i)++) -/* Current time since boot */ +/* + * Convert bytes to mertic kbits and vice versa + */ +#define BYTES_TO_METRIC_KBITS(bytes) ((uint32_t)((bytes) / 125)) +#define METRIC_KBITS_TO_BYTES(kbits) (((uint64_t)(kbits)) * 125) + +/* Current time in seconds since boot */ static inline time_t get_dp_uptime(void) { - return (time_t) (rte_get_timer_cycles() / rte_get_timer_hz()); + return (time_t) (soft_ticks / MSEC_PER_SEC); } static inline uint64_t timespec_diff_us(struct timespec *start, @@ -165,13 +163,16 @@ const char *nlmsg_type(unsigned int type); const char *ndm_state(uint16_t); int get_bool(const char *str, bool *ptr); int get_unsigned(const char *str, unsigned int *ptr); +int get_unsigned_long(const char *str, unsigned long *ptr); int get_signed(const char *str, int *ptr); int get_signed_char(const char *str, signed char *ptr); int get_unsigned_short(const char *str, unsigned short *ptr); int get_unsigned_char(const char *str, unsigned char *ptr); +float get_float(const char *str, float *ptr); int net_ratelimit(void); bool secondary_cpu(unsigned int id); int str_unsplit(char *, size_t, int, char **); +char *next_arg(int *argcp, char ***argvp); size_t snprintfcat(char *buf, size_t size, const char *fmt, ...) __attribute__ ((__format__(__printf__, 3, 4))); @@ -191,4 +192,7 @@ bool get_switch_dev_info(const char *drv_name, int *switch_id, char *dev_name); +int change_capability(cap_value_t capability, bool on); +void renice(int value); + #endif /* UTIL_H */ diff --git a/src/version.h.in b/src/version.h similarity index 52% rename from src/version.h.in rename to src/version.h index d9031e41..7419e26f 100644 --- a/src/version.h.in +++ b/src/version.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -9,7 +9,6 @@ #define _VERSION_H_ #define DATAPLANE_PROGNAME "Dataplane" -#define DATAPLANE_VERSION "@PACKAGE_VERSION@" -#define DATAPLANE_COPYRIGHT "Copyright (c) @COPYRIGHT_YEAR@, AT&T Intellectual Property. All rights reserved." +#define DATAPLANE_VERSION PACKAGE_VERSION #endif diff --git a/src/vlan_modify.c b/src/vlan_modify.c index 1298ba2f..ef960f47 100644 --- a/src/vlan_modify.c +++ b/src/vlan_modify.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * @@ -134,10 +134,10 @@ vlan_mod_flt_log_action(struct vlan_mod_ft_cls_action *action) vlan_mod_flt_log_act_vlan(&action->data.vlan); break; case VLAN_MOD_FILTER_CLS_ACTION_MIRRED: - RTE_LOG(NOTICE, DATAPLANE, "Act_mirred:\n"); + RTE_LOG(DEBUG, DATAPLANE, "Act_mirred:\n"); break; default: - RTE_LOG(NOTICE, DATAPLANE, "Act_unknown:\n"); + RTE_LOG(DEBUG, DATAPLANE, "Act_unknown:\n"); } } @@ -428,7 +428,7 @@ vlan_mod_flt_get_classify_vlan(struct vlan_mod_filter_list_entry *entry, static bool vlan_mod_enable_fwding(struct vlan_mod_chain_list_entry *entry) { - struct ifnet *intf = ifnet_byifindex(entry->key.ifindex); + struct ifnet *intf = dp_ifnet_byifindex(entry->key.ifindex); struct vlan_mod_tbl_entry *vlan_mod_tbl; static struct vlan_mod_tbl_entry *vlan_mod_default; @@ -470,9 +470,9 @@ vlan_mod_enable_fwding(struct vlan_mod_chain_list_entry *entry) rcu_assign_pointer(intf->vlan_mod_tbl, vlan_mod_tbl); rcu_assign_pointer(intf->vlan_mod_default, vlan_mod_default); - intf->vlan_modify = true; pl_node_add_feature_by_inst(&vlan_mod_in_feat, intf); + pl_node_add_feature_by_inst(&vlan_mod_out_feat, intf); return true; } @@ -616,6 +616,8 @@ vlan_mod_flt_alloc_filter_entry(struct vlan_mod_tc_filter_key *key) struct vlan_mod_filter_list_entry *entry; entry = zmalloc_aligned(sizeof(*entry)); + if (!entry) + return NULL; entry->key = *key; return entry; @@ -642,7 +644,6 @@ vlan_mod_flt_lookup_chain(struct vlan_mod_tc_filter_key *key, { struct vlan_mod_chain_list_entry *entry; struct vlan_mod_tc_filter_key s_key; - char key_string[VLAN_MOD_FLT_KEY_STR_LEN + 1]; /* The chain list is search with a less specific key * than the filter has, so copy the key, and mask out @@ -678,9 +679,6 @@ vlan_mod_flt_lookup_chain(struct vlan_mod_tc_filter_key *key, vlan_mod_flt_head_init(&entry->filter_head); entry->key = s_key; - RTE_LOG(INFO, DATAPLANE, "vlan_mod: new chain entry: %s\n", - vlan_mod_flt_key_str(key_string, &entry->key)); - filter_chain_head->list_count++; if (!vlan_mod_enable_fwding(entry)) { @@ -711,14 +709,6 @@ static int vlan_mod_flt_add_entry(struct vlan_mod_tc_filter_key *key, return MNL_CB_ERROR; } - chain_entry = vlan_mod_flt_lookup_chain(key, true, true); - - if (!chain_entry) - return MNL_CB_ERROR; - - list_head = &chain_entry->filter_head; - - old = vlan_mod_flt_lookup_filter(list_head, key); new = vlan_mod_flt_alloc_filter_entry(key); if (!new) { RTE_LOG(ERR, DATAPLANE, @@ -726,16 +716,6 @@ static int vlan_mod_flt_add_entry(struct vlan_mod_tc_filter_key *key, return MNL_CB_ERROR; } - /* - * To save looking up the chain head when dealing with an filter - * entry stash it. - */ - new->parent = chain_entry; - - RTE_LOG(INFO, DATAPLANE, "vlan_mod: %s chain entry: %s\n", - old ? "update" : "new", - vlan_mod_flt_key_str(key_string, &new->key)); - if (vlan_mod_flt_extr_base_attr(new, tcm, tb) != MNL_CB_OK) { RTE_LOG(INFO, DATAPLANE, "vlan_mod: %s chain entry: update ignore\n", @@ -754,6 +734,25 @@ static int vlan_mod_flt_add_entry(struct vlan_mod_tc_filter_key *key, return MNL_CB_ERROR; } + chain_entry = vlan_mod_flt_lookup_chain(key, true, true); + + if (!chain_entry) + return MNL_CB_ERROR; + + list_head = &chain_entry->filter_head; + + /* + * To save looking up the chain head when dealing with an filter + * entry stash it. + */ + new->parent = chain_entry; + + old = vlan_mod_flt_lookup_filter(list_head, key); + + RTE_LOG(INFO, DATAPLANE, "vlan_mod: %s chain entry: %s\n", + old ? "update" : "new", + vlan_mod_flt_key_str(key_string, &new->key)); + if (old) { if (vlan_mod_flt_get_classify_vlan(old, &old_vlan) != MNL_CB_OK) { @@ -905,7 +904,7 @@ vlan_mod_flt_chain_entry_delete(struct vlan_mod_chain_list_entry *chain_entry) } else { struct ifnet *intf; - intf = ifnet_byifindex(chain_entry->key.ifindex); + intf = dp_ifnet_byifindex(chain_entry->key.ifindex); if (!intf) { RTE_LOG(ERR, DATAPLANE, "vlan_mod: no intf %d\n", @@ -913,7 +912,8 @@ vlan_mod_flt_chain_entry_delete(struct vlan_mod_chain_list_entry *chain_entry) return MNL_CB_ERROR; } pl_node_remove_feature_by_inst(&vlan_mod_in_feat, intf); - intf->vlan_modify = false; + pl_node_remove_feature_by_inst(&vlan_mod_out_feat, intf); + rcu_assign_pointer(intf->vlan_mod_tbl, NULL); rcu_assign_pointer(intf->vlan_mod_default, NULL); } @@ -1101,7 +1101,7 @@ static void vlan_mod_show_entry_chain(json_writer_t *wr, struct ifnet *intf; if (!append) { - intf = ifnet_byifindex(chain->key.ifindex); + intf = dp_ifnet_byifindex(chain->key.ifindex); if (!intf) { RTE_LOG(ERR, DATAPLANE, "vlan_mod: no intf %d\n", chain->key.ifindex); @@ -1135,7 +1135,7 @@ static void vlan_mod_show_fwding_table(json_writer_t *wr, struct vlan_mod_tbl_entry *vlan_mod_tbl, *vlan_mod_default; struct vlan_mod_ft_cls_action *act; - intf = ifnet_byifindex(entry->key.ifindex); + intf = dp_ifnet_byifindex(entry->key.ifindex); if (!intf) { RTE_LOG(ERR, DATAPLANE, "vlan_mod: no intf %d\n", entry->key.ifindex); @@ -1222,7 +1222,7 @@ void vlan_mod_cmd(FILE *f, int argc, char **argv) bool intf_mode = false; wr = jsonw_new(f); - if (!wr || !filter_chain_head) + if (!wr) return; if ((argc == 2) && (streq(argv[1], "intf"))) intf_mode = true; @@ -1240,6 +1240,12 @@ void vlan_mod_cmd(FILE *f, int argc, char **argv) jsonw_name(wr, "intfs"); jsonw_start_array(wr); + if (!filter_chain_head) { + jsonw_end_array(wr); + jsonw_destroy(&wr); + return; + } + cds_list_for_each_entry_rcu(entry, &filter_chain_head->list_head, chain_next) { @@ -1258,10 +1264,11 @@ void vlan_mod_cmd(FILE *f, int argc, char **argv) ifindex = entry->key.ifindex; } - if (ifindex != -1) + if (ifindex != -1) { jsonw_end_array(wr); + jsonw_end_object(wr); + } - jsonw_end_object(wr); jsonw_end_array(wr); if (!intf_mode) { @@ -1289,7 +1296,7 @@ int vlan_mod_flt_entry_add(const struct nlmsghdr *nlh) if (ret != MNL_CB_OK) return ret; - if (!filter_type || strcmp(filter_type, "u32")) { + if (!filter_type || strcmp(filter_type, "u32") != 0) { RTE_LOG(NOTICE, DATAPLANE, "Unsupported tc filter type %s\n", filter_type); @@ -1346,31 +1353,3 @@ int vlan_mod_flt_chain_delete(const struct nlmsghdr *nlh __unused) return MNL_CB_OK; } - -struct rte_mbuf * -vlan_modify_egress(struct ifnet *ifp, struct rte_mbuf **m) -{ - uint16_t vlan; - struct vlan_mod_ft_cls_action *action; - struct rte_mbuf *buf = *m; - - vlan = vlan_mod_get_vlan(buf, ifp, VLAN_MOD_DIR_EGRESS); - if (vlan == 0) - return *m; - action = vlan_modify_get_action(ifp, vlan, VLAN_MOD_DIR_EGRESS); - if (!action) - return *m; - - switch (action->data.vlan.action) { - case VLAN_MOD_FILTER_ACT_VLAN_POP: - return vlan_mod_tag_pop(ifp, m, VLAN_MOD_DIR_EGRESS); - case VLAN_MOD_FILTER_ACT_VLAN_PUSH: - return vlan_mod_tag_push(ifp, m, action, VLAN_MOD_DIR_EGRESS); - case VLAN_MOD_FILTER_ACT_VLAN_MOD: - return vlan_mod_tag_modify(ifp, m, action, VLAN_MOD_DIR_EGRESS); - default: - return NULL; - } - - return *m; -} diff --git a/src/vlan_modify.h b/src/vlan_modify.h index 94254858..f10fe245 100644 --- a/src/vlan_modify.h +++ b/src/vlan_modify.h @@ -64,8 +64,8 @@ vlan_mod_pcp_arbitrate(struct vlan_mod_ft_cls_action *action, uint8_t pcp) static inline uint16_t vlan_mod_alt_proto(uint16_t proto) { - return (proto == ETHER_TYPE_VLAN) ? - ETHER_TYPE_QINQ : ETHER_TYPE_VLAN; + return (proto == RTE_ETHER_TYPE_VLAN) ? + RTE_ETHER_TYPE_QINQ : RTE_ETHER_TYPE_VLAN; } static inline void @@ -301,7 +301,4 @@ vlan_modify_get_action(struct ifnet *ifp, uint16_t vlan, return action; } -struct rte_mbuf * -vlan_modify_egress(struct ifnet *ifp, struct rte_mbuf **m); - #endif /* VLAN_MODIFY_H */ diff --git a/src/vplane_debug.h b/src/vplane_debug.h index 8dfcab9f..bf6e43a6 100644 --- a/src/vplane_debug.h +++ b/src/vplane_debug.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2013-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -18,7 +18,7 @@ * Flags controlling which debug messages show up in the * system log. * - * Keep this in sync with debug_bits[] in commands.c + * Keep this in sync with debug_bits[] in debug.c */ #define DP_DBG_INIT (1u << 0) #define DP_DBG_LINK (1u << 1) @@ -50,12 +50,16 @@ #define DP_DBG_MULTICAST (1u << 27) #define DP_DBG_MPLS_CTRL (1u << 28) #define DP_DBG_MPLS_PKTERR (1ull << 29) -#define DP_DBG_DPI (1ull << 31) /* Deep Packet Inspection */ -#define DP_DBG_QOS_DP (1ull << 32) -#define DP_DBG_QOS_HW (1ull << 33) -#define DP_DBG_STORM_CTL (1ull << 34) -#define DP_DBG_CPP_RL (1ull << 35) -#define DP_DBG_PTP (1ull << 36) +#define DP_DBG_DPI (1ull << 30) /* Deep Packet Inspection */ +#define DP_DBG_QOS_DP (1ull << 31) +#define DP_DBG_QOS_HW (1ull << 32) +#define DP_DBG_STORM_CTL (1ull << 33) +#define DP_DBG_CPP_RL (1ull << 34) +#define DP_DBG_PTP (1ull << 35) +#define DP_DBG_CGNAT (1ull << 36) +#define DP_DBG_FLOW_CACHE (1ull << 37) +#define DP_DBG_MAC_LIMIT (1ull << 38) +#define DP_DBG_GPC (1ull << 39) /* Default to only debugging startup and link events. * Skip ARP and route since they can flood log. @@ -64,6 +68,7 @@ (DP_DBG_INIT | DP_DBG_LINK | DP_DBG_NETLINK_IF) extern uint64_t dp_debug; +extern uint64_t dp_debug_init; /* * Macro to selectively enable logging by feature. @@ -80,4 +85,7 @@ extern uint64_t dp_debug; */ #define DP_DEBUG_ENABLED(m) (unlikely(dp_debug & DP_DBG_##m)) +int cmd_debug(FILE *f, int argc, char **argv); +int cmd_log(FILE *f, int argc, char **argv); +void debug_init(void); #endif /* _MAIN_H_ */ diff --git a/src/vplane_log.h b/src/vplane_log.h index b174237e..825b4e49 100644 --- a/src/vplane_log.h +++ b/src/vplane_log.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2013-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -10,9 +10,9 @@ #define VPLANE_LOG_H #include +#include "debug.h" /* USER1 - infrastructure */ -#define RTE_LOGTYPE_DATAPLANE RTE_LOGTYPE_USER1 #define RTE_LOGTYPE_SHADOW RTE_LOGTYPE_USER1 #define RTE_LOGTYPE_DIST RTE_LOGTYPE_USER1 #define RTE_LOGTYPE_FAL RTE_LOGTYPE_USER1 @@ -48,6 +48,8 @@ #define RTE_LOGTYPE_CPP_RL RTE_LOGTYPE_USER4 #define RTE_LOGTYPE_CGNAT RTE_LOGTYPE_USER4 #define RTE_LOGTYPE_ACL_HW RTE_LOGTYPE_USER4 +#define RTE_LOGTYPE_MAC_LIMIT RTE_LOGTYPE_USER4 +#define RTE_LOGTYPE_GPC RTE_LOGTYPE_USER4 /* USER5 - crypto */ #define RTE_LOGTYPE_VTI RTE_LOGTYPE_USER5 diff --git a/src/vrf.c b/src/vrf.c index 3a51696e..d7e5df50 100644 --- a/src/vrf.c +++ b/src/vrf.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -22,7 +22,8 @@ #include "compiler.h" #include "crypto/vti.h" #include "dp_event.h" -#include "gre.h" +#include "fal.h" +#include "if/gre.h" #include "ip_mcast.h" #include "lpm/lpm.h" #include "main.h" @@ -35,13 +36,15 @@ #include "util.h" #include "vplane_debug.h" #include "vplane_log.h" -#include "vrf.h" +#include "vrf_internal.h" #include "vrf_if.h" struct nlattr; struct vrf *vrf_table[VRF_ID_MAX] __hot_data = {NULL}; +static uint32_t vrf_table_hw_stats[PD_OBJ_STATE_LAST]; + /* * Infrastructure to handle table maps received out of order * w.r.t netlink. @@ -169,12 +172,10 @@ vrf_destroy(struct rcu_head *head) /* * Ref count records one count per - * 1. Interface bound to the VRF - * 2. Non-empty route table - i.e. each struct lpm - * belonging to the VRF holds a ref count so long as it has at - * least one route. - * 3. Explicit vrf creation cmd - each of these is interpreted + * 2. Explicit vrf creation cmd - each of these is interpreted * as denoting the existence of a 'reference' held in the * kernel or above until an explicit delete is received. + * 3. Other features referencing VRF */ static inline void vrf_inc_ref_count(struct vrf *vrf) @@ -206,7 +207,7 @@ static void vrf_find_saved_tablemap(struct vrf *vrf) } static struct vrf * -vrf_alloc(vrfid_t vrf_id) +vrf_alloc(vrfid_t vrf_id, fal_object_t vrf_obj, enum pd_obj_state pd_state) { struct vrf *vrf_var; @@ -217,6 +218,8 @@ vrf_alloc(vrfid_t vrf_id) goto err; vrf_var->v_id = vrf_id; + vrf_var->v_fal_obj = vrf_obj; + vrf_var->v_pd_state = pd_state; if (route_init(vrf_var) < 0) goto err; @@ -249,6 +252,8 @@ vrf_alloc(vrfid_t vrf_id) } } + vrf_table_hw_stats[vrf_var->v_pd_state]++; + return vrf_var; err: if (vrf_var) @@ -257,10 +262,19 @@ vrf_alloc(vrfid_t vrf_id) return NULL; } -static struct vrf* +static struct vrf * vrf_create(vrfid_t vrf_id) { + struct fal_attribute_t attr_list[] = { + { + .id = FAL_VRF_ATTR_ID, + .value.u32 = vrf_id, + }, + }; + enum pd_obj_state pd_state; + fal_object_t vrf_obj; struct vrf *vrf_var; + int ret; if (vrf_id >= VRF_ID_MAX) { DP_LOG_W_VRF(ERR, DATAPLANE, vrf_id, "ID > %d\n", @@ -268,6 +282,13 @@ vrf_create(vrfid_t vrf_id) return NULL; } + ret = fal_vrf_create(ARRAY_SIZE(attr_list), attr_list, + &vrf_obj); + if (ret < 0 && ret != -EOPNOTSUPP) + DP_LOG_W_VRF(ERR, DATAPLANE, vrf_id, + "FAL create failed: %s\n", strerror(-ret)); + pd_state = fal_state_to_pd_state(ret); + vrf_var = get_vrf(vrf_id); if (vrf_var) { /* this is an error - if the vrf might exist already then @@ -278,12 +299,21 @@ vrf_create(vrfid_t vrf_id) return NULL; } - vrf_var = vrf_alloc(vrf_id); - if (vrf_var == NULL) + vrf_var = vrf_alloc(vrf_id, vrf_obj, pd_state); + if (vrf_var == NULL) { + if (vrf_obj) { + ret = fal_vrf_delete(vrf_obj); + if (ret < 0 && ret != -EOPNOTSUPP) + DP_LOG_W_VRF(ERR, DATAPLANE, vrf_id, + "FAL delete failed: %s\n", + strerror(-ret)); + } return NULL; + } vrf_inc_ref_count(vrf_var); rcu_assign_pointer(vrf_table[vrf_id], vrf_var); + return vrf_var; } @@ -314,6 +344,7 @@ vrf_find_or_create(vrfid_t vrf_id) void vrf_delete_by_ptr(struct vrf *vrf) { vrfid_t vrf_id; + int ret; if (unlikely(vrf == NULL)) { DP_LOG_W_VRF(ERR, DATAPLANE, VRF_INVALID_ID, @@ -345,6 +376,15 @@ void vrf_delete_by_ptr(struct vrf *vrf) */ vrf_table[vrf_id] = NULL; + if (vrf->v_fal_obj) { + ret = fal_vrf_delete(vrf->v_fal_obj); + if (ret < 0 && ret != -EOPNOTSUPP) + DP_LOG_W_VRF(ERR, DATAPLANE, vrf_id, + "FAL delete failed: %s\n", + strerror(-ret)); + } + vrf_table_hw_stats[vrf->v_pd_state]--; + call_rcu(&vrf->rcu, vrf_destroy); } @@ -359,13 +399,13 @@ void vrf_delete(vrfid_t vrf_id) vrf_delete_by_ptr(vrf_var); } -struct ifnet *vrfmaster_create(const char *ifname, uint32_t if_index, +struct ifnet *vrf_if_create(const char *ifname, uint32_t if_index, uint32_t vrf_tableid) { struct vrf_softc *vrsc; struct ifnet *ifp; - ifp = if_alloc(ifname, IFT_VRFMASTER, 65535, NULL, SOCKET_ID_ANY); + ifp = if_alloc(ifname, IFT_VRF, 65535, NULL, SOCKET_ID_ANY, NULL); if (!ifp) { RTE_LOG(ERR, DATAPLANE, "out of memory for vrf_ifnet\n"); @@ -385,7 +425,7 @@ struct ifnet *vrfmaster_create(const char *ifname, uint32_t if_index, return ifp; } -static void vrfmaster_free_rcu(struct rcu_head *head) +static void vrf_if_free_rcu(struct rcu_head *head) { struct vrf_softc *vrsc = caa_container_of(head, struct vrf_softc, vrfsc_rcu); @@ -393,33 +433,29 @@ static void vrfmaster_free_rcu(struct rcu_head *head) free(vrsc); } -static void vrfmaster_delete(struct ifnet *ifp) +static void vrf_if_delete(struct ifnet *ifp) { struct vrf_softc *vrsc = ifp->if_softc; - struct vrf *vrf = vrf_get_rcu(vrfmaster_get_vrfid(ifp)); - - route_unlink_vrf_from_table(vrf); - route6_unlink_vrf_from_table(vrf); - call_rcu(&vrsc->vrfsc_rcu, vrfmaster_free_rcu); + call_rcu(&vrsc->vrfsc_rcu, vrf_if_free_rcu); } -static void vrfmaster_show_info(json_writer_t *wr, struct ifnet *ifp) +static void vrf_if_show_info(json_writer_t *wr, struct ifnet *ifp) { struct vrf_softc *vrsc = ifp->if_softc; - jsonw_name(wr, "vrfmaster"); + jsonw_name(wr, "vrf"); jsonw_start_object(wr); jsonw_uint_field(wr, "tableid", vrsc->vrfsc_tableid); jsonw_end_object(wr); } -vrfid_t vrfmaster_get_vrfid(struct ifnet *ifp) +vrfid_t vrf_if_get_vrfid(struct ifnet *ifp) { struct vrf *vrf; vrfid_t i; - assert(ifp->if_type == IFT_VRFMASTER); + assert(ifp->if_type == IFT_VRF); if (ifp->if_vrfid != VRF_DEFAULT_ID) return ifp->if_vrfid; @@ -435,13 +471,13 @@ vrfid_t vrfmaster_get_vrfid(struct ifnet *ifp) return VRF_INVALID_ID; } -void vrf_set_external_id(struct vrf *vrf, vrfid_t xid) +void vrf_set_external_id(struct vrf *vrf, vrfid_t external_id) { - vrf->v_external_id = xid; + vrf->v_external_id = external_id; vrf_find_saved_tablemap(vrf); } -vrfid_t vrf_get_external_id(uint32_t internal_id) +vrfid_t dp_vrf_get_external_id(uint32_t internal_id) { struct vrf *vrf; @@ -452,51 +488,78 @@ vrfid_t vrf_get_external_id(uint32_t internal_id) return vrf ? vrf->v_external_id : VRF_INVALID_ID; } -vrfid_t vrfmaster_get_tableid(struct ifnet *ifp) +vrfid_t vrf_if_get_tableid(struct ifnet *ifp) { struct vrf_softc *vrsc = ifp->if_softc; - assert(ifp->if_type == IFT_VRFMASTER); + assert(ifp->if_type == IFT_VRF); return vrsc->vrfsc_tableid; } -struct vrfmaster_lookup_by_tableid_ctx { - uint32_t tableid; +vrfid_t dp_vrf_get_vid(struct vrf *vrf) +{ + return vrf->v_id; +} + +struct vrf_lookup_by_tableid_ctx { + uint32_t kernel_tableid; struct ifnet *ifp; + uint32_t user_tableid; }; -static void vrfmaster_lookup_by_tableid_worker(struct ifnet *ifp, void *arg) +static void vrf_lookup_by_tableid_worker(struct ifnet *ifp, void *arg) { - struct vrfmaster_lookup_by_tableid_ctx *ctx = arg; + struct vrf_lookup_by_tableid_ctx *ctx = arg; + struct vrf *vrf; + unsigned int i; - if (ifp->if_type == IFT_VRFMASTER && - vrfmaster_get_tableid(ifp) == ctx->tableid) + if (ctx->ifp || ifp->if_type != IFT_VRF) + return; + + if (vrf_if_get_tableid(ifp) == ctx->kernel_tableid) { ctx->ifp = ifp; + ctx->user_tableid = RT_TABLE_MAIN; + } else { + vrf = vrf_get_rcu(vrf_if_get_vrfid(ifp)); + for (i = 0; i <= PBR_TABLEID_MAX; i++) { + if (vrf->v_pbrtablemap[i] == ctx->kernel_tableid) { + ctx->ifp = ifp; + ctx->user_tableid = i; + } + } + } } -struct ifnet *vrfmaster_lookup_by_tableid(uint32_t tableid) +int vrf_lookup_by_tableid(uint32_t kernel_tableid, vrfid_t *vrfid, + uint32_t *user_tableid) { - struct vrfmaster_lookup_by_tableid_ctx ctx = { - .tableid = tableid, + struct vrf_lookup_by_tableid_ctx ctx = { + .kernel_tableid = kernel_tableid, .ifp = NULL, }; - ifnet_walk(vrfmaster_lookup_by_tableid_worker, &ctx); - return ctx.ifp; + dp_ifnet_walk(vrf_lookup_by_tableid_worker, &ctx); + if (!ctx.ifp) + return -ENOENT; + + *vrfid = vrf_if_get_vrfid(ctx.ifp); + *user_tableid = ctx.user_tableid; + + return 0; } -struct vrf *vrf_get_rcu_from_external(vrfid_t external_id) +struct vrf *dp_vrf_get_rcu_from_external(vrfid_t external_id) { - struct ifnet *master_ifp; + struct ifnet *vrf_ifp; if (!is_nondefault_vrf(external_id)) return vrf_get_rcu(external_id); - master_ifp = ifnet_byifindex(external_id); - if (!master_ifp || master_ifp->if_type != IFT_VRFMASTER) + vrf_ifp = dp_ifnet_byifindex(external_id); + if (!vrf_ifp || vrf_ifp->if_type != IFT_VRF) return VRF_INVALID_ID; - return vrf_get_rcu(vrfmaster_get_vrfid(master_ifp)); + return vrf_get_rcu(vrf_if_get_vrfid(vrf_ifp)); } static void vrf_save_tablemap_for_replay(vrfid_t vrf_id, uint8_t pbr_tblid, @@ -535,25 +598,33 @@ int cmd_tablemap_cfg(FILE *f, int argc, char **argv) if (!tableid_in_pbr_range(pbr_tblid)) return 0; - vrf = vrf_get_rcu_from_external(vrf_id); + vrf = dp_vrf_get_rcu_from_external(vrf_id); if (vrf && vrf->v_id == VRF_DEFAULT_ID) return 0; if (vrf == NULL) vrf_save_tablemap_for_replay(vrf_id, pbr_tblid, kernel_tblid); - else + else { vrf->v_pbrtablemap[pbr_tblid] = kernel_tblid; + /* + * Routes may have been incomplete pending the + * appearance of this tablemap, so try to complete + * them now. + */ + incomplete_routes_make_complete(); + } + return 0; } static int -vrfmaster_dump(struct ifnet *ifp, json_writer_t *wr, +vrf_if_dump(struct ifnet *ifp, json_writer_t *wr, enum if_dump_state_type type) { switch (type) { case IF_DS_STATE: - vrfmaster_show_info(wr, ifp); + vrf_if_show_info(wr, ifp); break; default: break; @@ -562,17 +633,24 @@ vrfmaster_dump(struct ifnet *ifp, json_writer_t *wr, return 0; } -static const struct ift_ops vrfmaster_if_ops = { - .ifop_uninit = vrfmaster_delete, - .ifop_dump = vrfmaster_dump, +static enum dp_ifnet_iana_type +vrf_if_iana_type(struct ifnet *ifp __unused) +{ + return DP_IFTYPE_IANA_OTHER; +} + +static const struct ift_ops vrf_if_ops = { + .ifop_uninit = vrf_if_delete, + .ifop_dump = vrf_if_dump, + .ifop_iana_type = vrf_if_iana_type, }; void vrf_init(void) { - int ret = if_register_type(IFT_VRFMASTER, &vrfmaster_if_ops); + int ret = if_register_type(IFT_VRF, &vrf_if_ops); if (ret < 0) - rte_panic("Failed to register VRF Master type: %s", + rte_panic("Failed to register VRF type: %s", strerror(-ret)); /* @@ -583,7 +661,7 @@ void vrf_init(void) * drop all received traffic. * * We also ensure we send a create notification event for - * these two vrs which are not associated with a vrfmaster + * these two vrs which are not associated with a vrf * interface. */ struct vrf *vrf; @@ -593,6 +671,9 @@ void vrf_init(void) rte_panic("Can't init the default vrf\n"); dp_event(DP_EVT_VRF_CREATE, 0, vrf, 0, 0, NULL); + _Static_assert(VRF_INVALID_ID == FAL_INVALID_VRF_ID, + "Invalid VRF ID for dataplane and FAL don't match"); + vrf = vrf_find_or_create(VRF_INVALID_ID); if (!vrf) rte_panic("Can't init the invalid vrf\n"); @@ -604,3 +685,47 @@ void vrf_cleanup(void) vrf_delete(VRF_DEFAULT_ID); vrf_delete(VRF_INVALID_ID); } + +uint32_t *vrf_table_hw_stats_get(void) +{ + return vrf_table_hw_stats; +} + +int vrf_table_get_pd_subset_data(json_writer_t *json, + enum pd_obj_state subset) +{ + struct vrf *vrf; + vrfid_t vrf_id; + + jsonw_start_object(json); + jsonw_name(json, "vrf_table"); + jsonw_start_array(json); + + for (vrf_id = 0; vrf_id < ARRAY_SIZE(vrf_table); vrf_id++) { + /* + * skip invalid VRF since it doesn't have an external + * ID mapping. + */ + if (vrf_id == VRF_INVALID_ID) + continue; + + vrf = rcu_dereference(vrf_table[vrf_id]); + if (!vrf) + continue; + + if (subset != PD_OBJ_STATE_LAST && + subset != vrf->v_pd_state) + continue; + + jsonw_start_object(json); + + jsonw_uint_field(json, "id", vrf->v_external_id); + + jsonw_end_object(json); + } + + jsonw_end_array(json); + jsonw_end_object(json); + + return 0; +} diff --git a/src/vrf_if.h b/src/vrf_if.h index 3ddfe8bc..ee4da362 100644 --- a/src/vrf_if.h +++ b/src/vrf_if.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -11,7 +11,7 @@ #include "json_writer.h" #include "urcu.h" -#include "vrf.h" +#include "vrf_internal.h" struct ifnet; @@ -20,12 +20,13 @@ struct vrf_softc { struct rcu_head vrfsc_rcu; }; -struct ifnet *vrfmaster_create(const char *ifname, uint32_t if_index, +struct ifnet *vrf_if_create(const char *ifname, uint32_t if_index, uint32_t vrf_tableid); -vrfid_t vrfmaster_get_vrfid(struct ifnet *ifp); -vrfid_t vrfmaster_get_tableid(struct ifnet *ifp); +vrfid_t vrf_if_get_vrfid(struct ifnet *ifp); +vrfid_t vrf_if_get_tableid(struct ifnet *ifp); -struct ifnet *vrfmaster_lookup_by_tableid(uint32_t tableid); +int vrf_lookup_by_tableid(uint32_t kernel_tableid, vrfid_t *vrfid, + uint32_t *user_tableid); #endif /* VRF_IF_H */ diff --git a/src/vrf.h b/src/vrf_internal.h similarity index 87% rename from src/vrf.h rename to src/vrf_internal.h index 5710e6df..055c9b42 100644 --- a/src/vrf.h +++ b/src/vrf_internal.h @@ -1,7 +1,7 @@ -#ifndef VRF_H -#define VRF_H +#ifndef VRF_INTERNAL_H +#define VRF_INTERNAL_H /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -17,7 +17,7 @@ #include "arp.h" #include "compat.h" -#include "gre.h" +#include "if/gre.h" #include "ip_mcast.h" #include "netinet/ip_mroute.h" #include "netinet6/ip6_mroute.h" @@ -27,13 +27,12 @@ #include "snmp_mib.h" #include "urcu.h" #include "util.h" +#include "vrf.h" struct npf_config; struct npf_alg_instance; struct npf_timeout; - -#define VRF_INVALID_ID 0 -#define VRF_DEFAULT_ID 1 +struct apt_instance; struct vrf_per_core_stats { struct ipstats_mib ip; @@ -61,12 +60,15 @@ struct vrf { struct crypto_vrf_ctx *crypto; struct npf_config *v_npf; struct npf_alg_instance *v_ai; + struct apt_instance *v_apt; struct npf_timeout *v_to; struct cds_lfht *v_rt_tracker_tbl; struct rcu_head rcu; char v_name[VRF_NAME_SIZE]; uint32_t v_external_id; + fal_object_t v_fal_obj; + enum pd_obj_state v_pd_state; /* SNMP Statistics */ struct arp_stats v_arpstat; @@ -75,6 +77,9 @@ struct vrf { struct vrf_per_core_stats v_stats[]; }; +static_assert(offsetof(struct vrf, v_pbrtablemap) == 64, + "first cache line exceeded"); + #define VRF_ID_KERNEL_MAX 4096 #define VRF_ID_UPLINK_COUNT 1 #define VRF_ID_MAX (VRF_ID_KERNEL_MAX + VRF_ID_UPLINK_COUNT) @@ -125,6 +130,11 @@ static inline bool is_nondefault_vrf(vrfid_t vrf_id) return vrf_id != VRF_DEFAULT_ID && vrf_id != VRF_UPLINK_ID; } +static inline bool vrf_is_vrf_table_id(uint32_t tableid) +{ + return tableid > RT_TABLE_LOCAL; +} + #define DP_LOG_W_VRF(l, t, vrf_id, fmt, args...) do { \ if (vrf_id > VRF_DEFAULT_ID) \ RTE_LOG(l, t, "[%s] ID: %u " fmt, \ @@ -160,9 +170,11 @@ void vrf_delete(uint32_t vrf_id); void vrf_delete_all(enum cont_src_en cont_src); void vrf_init(void); void vrf_cleanup(void); -vrfid_t vrf_get_external_id(vrfid_t internal_id); void vrf_set_external_id(struct vrf *vrf, uint32_t external_id); -struct vrf *vrf_get_rcu_from_external(vrfid_t external_id); + +uint32_t *vrf_table_hw_stats_get(void); +int vrf_table_get_pd_subset_data(json_writer_t *json, + enum pd_obj_state subset); /* * Set up PBR tablemap in vrf to map PBR tables (1-128) @@ -170,4 +182,4 @@ struct vrf *vrf_get_rcu_from_external(vrfid_t external_id); */ int cmd_tablemap_cfg(FILE *f, int argc, char **argv); -#endif /* VRF_H */ +#endif /* VRF_INTERNAL_H */ diff --git a/src/zmq_dp.c b/src/zmq_dp.c index b401ae5c..efe9d6dc 100644 --- a/src/zmq_dp.c +++ b/src/zmq_dp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -13,7 +13,7 @@ #include #include -#include "config.h" +#include "config_internal.h" #include "vplane_log.h" #include "zmq_dp.h" @@ -306,12 +306,3 @@ send_controller_connect(zsock_t *csocket, enum cont_src_en cont_src) return rc; } - -/* - * The following functions should be used exclusively to enable unit-tests. - * They should **not** be used to implement real functionality. - */ -int __test_build_connect_msg(zmsg_t *msg, enum cont_src_en cont_src) -{ - return build_connect_msg(msg, cont_src); -} diff --git a/src/zmq_dp.h b/src/zmq_dp.h index 93b91990..9365759d 100644 --- a/src/zmq_dp.h +++ b/src/zmq_dp.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -59,10 +59,4 @@ int zmsg_send_and_destroy(zmsg_t **msg, void *dest); */ int zactor_terminated(zloop_t *loop, zsock_t *sock, void *arg); -/* - * The following APIs are exclusively to be used for unit-testing purposes. They - * should not be used in production code. - */ -int __test_build_connect_msg(zmsg_t *msg, enum cont_src_en cont_src); - #endif diff --git a/tests/common/README b/tests/common/README deleted file mode 100644 index 91bc9108..00000000 --- a/tests/common/README +++ /dev/null @@ -1 +0,0 @@ -Common code for use by per-file unit tests and whole_dp tests. diff --git a/tests/whole_dp/XML_for_JUnit.xsl b/tests/whole_dp/XML_for_JUnit.xsl new file mode 100644 index 00000000..3e5ff8e8 --- /dev/null +++ b/tests/whole_dp/XML_for_JUnit.xsl @@ -0,0 +1,33 @@ + + + + + + + + + + + + 0 + + + + + + + + + + + + + + + + + + / + + + diff --git a/tests/whole_dp/dataplane_test.sh b/tests/whole_dp/dataplane_test.sh index 64b4a231..9eb8bc8a 100755 --- a/tests/whole_dp/dataplane_test.sh +++ b/tests/whole_dp/dataplane_test.sh @@ -19,4 +19,4 @@ if [ -z "$VALGRIND" ]; then CATCHSEGV="catchsegv" fi -${CATCHSEGV} ./dataplane_test "$@" +${CATCHSEGV} ./dataplane_test -P .libs "$@" diff --git a/tests/whole_dp/dummyfs/proc/cpuinfo b/tests/whole_dp/dummyfs/proc/cpuinfo deleted file mode 100644 index 0826206c..00000000 --- a/tests/whole_dp/dummyfs/proc/cpuinfo +++ /dev/null @@ -1,52 +0,0 @@ -processor : 0 -vendor_id : GenuineIntel -cpu family : 6 -model : 69 -model name : Intel(R) Core(TM) i7-4600U CPU @ 2.10GHz -stepping : 1 -microcode : 0x1c -cpu MHz : 2700.000 -cache size : 4096 KB -physical id : 0 -siblings : 2 -core id : 0 -cpu cores : 2 -apicid : 0 -initial apicid : 0 -fpu : yes -fpu_exception : yes -cpuid level : 13 -wp : yes -flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 fma cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm ida arat epb xsaveopt pln pts dtherm tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid -bogomips : 5387.35 -clflush size : 64 -cache_alignment : 64 -address sizes : 39 bits physical, 48 bits virtual -power management: - -processor : 1 -vendor_id : GenuineIntel -cpu family : 6 -model : 69 -model name : Intel(R) Core(TM) i7-4600U CPU @ 2.10GHz -stepping : 1 -microcode : 0x1c -cpu MHz : 2698.417 -cache size : 4096 KB -physical id : 0 -siblings : 2 -core id : 1 -cpu cores : 2 -apicid : 2 -initial apicid : 2 -fpu : yes -fpu_exception : yes -cpuid level : 13 -wp : yes -flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 fma cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm ida arat epb xsaveopt pln pts dtherm tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid -bogomips : 5387.35 -clflush size : 64 -cache_alignment : 64 -address sizes : 39 bits physical, 48 bits virtual -power management: - diff --git a/tests/whole_dp/dummyfs/run/dataplane/platform.conf b/tests/whole_dp/dummyfs/run/dataplane/platform.conf new file mode 100644 index 00000000..f1540d9b --- /dev/null +++ b/tests/whole_dp/dummyfs/run/dataplane/platform.conf @@ -0,0 +1,2 @@ +[dataplane] +fal_plugin = ./fal_plugin_test.so diff --git a/tests/whole_dp/dummyfs/sys/bus/pci/devices/.dummy b/tests/whole_dp/dummyfs/sys/bus/pci/devices/.dummy deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu0/topology/core_id b/tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu0/topology/core_id deleted file mode 100644 index 573541ac..00000000 --- a/tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu0/topology/core_id +++ /dev/null @@ -1 +0,0 @@ -0 diff --git a/tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu0/topology/physical_package_id b/tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu0/topology/physical_package_id deleted file mode 100644 index 573541ac..00000000 --- a/tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu0/topology/physical_package_id +++ /dev/null @@ -1 +0,0 @@ -0 diff --git a/tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu0/topology/thread_siblings b/tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu0/topology/thread_siblings deleted file mode 100644 index b49fce72..00000000 --- a/tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu0/topology/thread_siblings +++ /dev/null @@ -1 +0,0 @@ -00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000005 diff --git a/tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu1/topology/core_id b/tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu1/topology/core_id deleted file mode 100644 index d00491fd..00000000 --- a/tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu1/topology/core_id +++ /dev/null @@ -1 +0,0 @@ -1 diff --git a/tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu1/topology/physical_package_id b/tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu1/topology/physical_package_id deleted file mode 100644 index 573541ac..00000000 --- a/tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu1/topology/physical_package_id +++ /dev/null @@ -1 +0,0 @@ -0 diff --git a/tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu1/topology/thread_siblings b/tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu1/topology/thread_siblings deleted file mode 100644 index 202edc0b..00000000 --- a/tests/whole_dp/dummyfs/sys/devices/system/cpu/cpu1/topology/thread_siblings +++ /dev/null @@ -1 +0,0 @@ -00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,0000000a diff --git a/tests/whole_dp/dummyfs/sys/devices/system/cpu/online b/tests/whole_dp/dummyfs/sys/devices/system/cpu/online deleted file mode 100644 index 8b0fab86..00000000 --- a/tests/whole_dp/dummyfs/sys/devices/system/cpu/online +++ /dev/null @@ -1 +0,0 @@ -0-1 diff --git a/tests/whole_dp/dummyfs/sys/module/.dummy b/tests/whole_dp/dummyfs/sys/module/.dummy deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/whole_dp/meson.build b/tests/whole_dp/meson.build new file mode 100644 index 00000000..ee111c8b --- /dev/null +++ b/tests/whole_dp/meson.build @@ -0,0 +1,298 @@ +# SPDX-License-Identifier: LGPL-2.1-only +# Copyright (c) 2020-2021, AT&T Intellectual Property. All rights reserved. + +internal_test_inc = include_directories( + 'src' +) + +test_lib_sources = files( + 'src/dp_test.c', + 'src/dp_test_cmd_check.c', + 'src/dp_test_cmd_state.c', + 'src/dp_test_console.c', + 'src/dp_test_controller.c', + 'src/dp_test_cpp_lim.c', + 'src/dp_test_crypto_lib.c', + 'src/dp_test_crypto_utils.c', + 'src/dp_test_json_utils.c', + 'src/dp_test_lib.c', + 'src/dp_test_lib_exp.c', + 'src/dp_test_lib_intf.c', + 'src/dp_test_lib_pb.c', + 'src/dp_test_lib_pkt.c', + 'src/dp_test_lib_portmonitor.c', + 'src/dp_test_lib_tcp.c', + 'src/dp_test_netlink_state.c', + 'src/dp_test_npf_alg_lib.c', + 'src/dp_test_npf_alg_sip_data.c', + 'src/dp_test_npf_alg_sip_data1.c', + 'src/dp_test_npf_alg_sip_data2.c', + 'src/dp_test_npf_alg_sip_data3.c', + 'src/dp_test_npf_alg_sip_data4.c', + 'src/dp_test_npf_fw_lib.c', + 'src/dp_test_npf_lib.c', + 'src/dp_test_npf_nat_lib.c', + 'src/dp_test_npf_portmap_lib.c', + 'src/dp_test_npf_sess_lib.c', + 'src/dp_test_pktmbuf_lib.c', + 'src/dp_test_qos_lib.c', + 'src/dp_test_route_broker.c', + 'src/dp_test_session_internal_lib.c', + 'src/dp_test_session_lib.c', + 'src/dp_test_str.c', + 'src/dp_test_stubs.c', + 'src/dp_test_stubs_linux.c', + 'src/dp_test_wrapped_funcs.c', + 'src/dp_test_xfrm_server.c', +) + +# Only files that declare a CK test suite (using DP_DECL_TEST_SUITE()) +# These will be iterated over using CK_RUN_SUITE for testing +# Everything else belongs in test_lib_sources above +check_tests = [ + 'dp_test_arp.c', + 'dp_test_bitmask.c', + 'dp_test_bridge.c', + 'dp_test_bridge_n.c', + 'dp_test_bridge_vlan_filter.c', + 'dp_test_cpp_lim_fal.c', + 'dp_test_cross_connect.c', + 'dp_test_crypto_block_policy.c', + 'dp_test_crypto_multi_tunnel.c', + 'dp_test_crypto_perf_scale.c', + 'dp_test_crypto_policy.c', + 'dp_test_crypto_site_to_site.c', + 'dp_test_crypto_site_to_site_passthru.c', + 'dp_test_esp.c', + 'dp_test_fails.c', + 'dp_test_gpc_pb.c', + 'dp_test_gre.c', + 'dp_test_gre6.c', + 'dp_test_if_config.c', + 'dp_test_intf_incomplete.c', + 'dp_test_ip.c', + 'dp_test_ip6.c', + 'dp_test_ip6_icmp.c', + 'dp_test_ip6_neigh.c', + 'dp_test_ip_arp.c', + 'dp_test_ip_icmp.c', + 'dp_test_ip_multicast.c', + 'dp_test_ip_n.c', + 'dp_test_ip_pic_edge.c', + 'dp_test_mac_limit.c', + 'dp_test_mpls.c', + 'dp_test_mstp_cmds.c', + 'dp_test_mstp_fwd.c', + 'dp_test_nat.c', + 'dp_test_npf_acl.c', + 'dp_test_npf_addrgrp.c', + 'dp_test_npf_alg_ftp.c', + 'dp_test_npf_alg_sip_nat.c', + 'dp_test_npf_alg_rpc.c', + 'dp_test_npf_alg_tftp.c', + 'dp_test_npf_bridge.c', + 'dp_test_npf_cgnat.c', + 'dp_test_npf_commands.c', + 'dp_test_npf_defrag.c', + 'dp_test_npf_dscp.c', + 'dp_test_npf_feat.c', + 'dp_test_npf_fw.c', + 'dp_test_npf_fw_ipv6.c', + 'dp_test_npf_golden.c', + 'dp_test_npf_hairpin.c', + 'dp_test_npf_icmp.c', + 'dp_test_npf_local.c', + 'dp_test_npf_mbuf.c', + 'dp_test_npf_nat.c', + 'dp_test_npf_nat64.c', + 'dp_test_npf_nptv6.c', + 'dp_test_npf_prot_group.c', + 'dp_test_npf_ptree.c', + 'dp_test_npf_qos.c', + 'dp_test_npf_rldb.c', + 'dp_test_npf_ruleset_state.c', + 'dp_test_npf_session_limit.c', + 'dp_test_npf_snat_overrun.c', + 'dp_test_npf_tblset.c', + 'dp_test_npf_tcp.c', + 'dp_test_npf_vti.c', + 'dp_test_npf_zone.c', + 'dp_test_pbr.c', + 'dp_test_poe_cmds.c', + 'dp_test_portmonitor.c', + 'dp_test_portmonitor_commands.c', + 'dp_test_ppp.c', + 'dp_test_ptp.c', + 'dp_test_qos_basic.c', + 'dp_test_qos_burst.c', + 'dp_test_qos_class.c', + 'dp_test_qos_ext_buf_monitor.c', + 'dp_test_qos_fal.c', + 'dp_test_route_tracker.c', + 'dp_test_session.c', + 'dp_test_session_cmds.c', + 'dp_test_sfp.c', + 'dp_test_slow_path.c', + 'dp_test_storm_ctl.c', + 'dp_test_switch.c', + 'dp_test_switch_vlan.c', + 'dp_test_tcp_mss_clamp.c', + 'dp_test_vrf.c', + 'dp_test_vti.c', + 'dp_test_vxlan.c', + 'dp_test_xfrm.c', +] + +check_test_sources = [] +foreach test : check_tests + check_test_sources += files('src' / test) +endforeach + +if get_option('all_tests') + dataplane_test_full_run = ['-DDP_TEST_FULL_RUN'] + test_timeout = 600 +else + dataplane_test_full_run = [] + test_timeout = 120 +endif + +dataplane_test = executable( + 'dataplane_test', + sources: [ + check_test_sources, + dataplane_common_sources, + test_lib_sources + ], + dependencies: [ + check_dep, + dataplane_deps, + json_dep, + rte_pmd_ring_dep, + ], + include_directories: [ + public_include, + internal_inc, + public_test_include, + internal_test_inc + ], + override_options: [ + 'optimization=0', + 'b_lto=false' + ], + c_args: [ + dataplane_test_full_run, + '-U_FILE_OFFSET_BITS', # dp_test_stubs_linux.c does not like this + cc.get_supported_arguments([ + '-Wno-unused-parameter', + '-Wno-format-overflow' + ]) + ], + link_args : [ + '-Wl,-wrap,main', + '-Wl,-wrap,RAND_bytes', + '-Wl,-wrap,rte_pktmbuf_pool_create', + '-Wl,-wrap,rte_mempool_create', + '-Wl,-wrap,rte_eal_init', + '-Wl,-wrap,popen', + '-Wl,-wrap,pclose' + ], + link_with: [jsonw_library], + export_dynamic: true, + install: true, + install_dir: get_option('prefix') / get_option('bindir') +) + +sample_test_plugin_sources = files( + 'src/dp_test_pipeline.c', +) + +sample_test_plugin = shared_module('sample_test', + sources: [sample_test_plugin_sources, sample_generated_protobuf_c], + include_directories: [public_include, public_test_include], + dependencies: [dpdk_dep, protobuf_generated_c_dependency] +) + +fal_test_plugin_sources = files( + 'src/fal_plugin_test.c', + 'src/fal_plugin_sw_port.c', + 'src/fal_plugin_framer.c', + 'src/fal_plugin_qos.c', + 'src/fal_plugin_pm.c', + 'src/fal_plugin_policer.c', + 'src/fal_plugin_cpp_limiter.c', + 'src/fal_plugin_ptp.c' +) + +fal_test_plugin = shared_module('fal_plugin_test', + sources: [fal_test_plugin_sources], + dependencies: [ + check_dep, + dpdk_dep, + json_dep, + pipeline_dep, + protobuf_generated_c_dependency, + swport_dep, + ], + include_directories: [public_include, internal_inc], + override_options: [ + 'optimization=0', + 'b_lto=false' + ], + c_args: [ + '-Wno-unused-parameter' + ], + gnu_symbol_visibility: 'hidden', + name_prefix: '', + install: true, + install_dir: get_option('prefix') / get_option('libdir') / meson.project_name() +) + +test_driver_config = configure_file( + input: '../../dataplane-drivers-default.conf', + output: 'dataplane-drivers-default.conf', + copy: true) + +dummyfs = custom_target('dummyfs', + command: ['ln', '-s', + meson.source_root() / 'tests/whole_dp/dummyfs', + meson.current_build_dir() / 'dummyfs'], + output: 'dummyfs' +) + +if get_option('b_sanitize') == 'address' + dataplane_test_env=['ASAN_OPTIONS=verify_asan_link_order=0:detect_leaks=0'] +else + dataplane_test_env = [] +endif + +lcore_number = 0 +cores_available = run_command('nproc').stdout().to_int() + +foreach suite : check_tests + ['dp_test_pipeline.c'] + + suite_env = ['CK_RUN_SUITE=@0@'.format(suite), 'CK_XML_LOG_FILE_NAME=test_@0@.xml'.format(suite)] + dataplane_test_env + + test(suite, dataplane_test, + depends: [sample_plugin, sample_test_plugin, fal_test_plugin, dummyfs], + workdir: meson.current_build_dir(), + args: ['-l @0@'.format(lcore_number), '-d1', '-F', meson.build_root() / 'src/pipeline/nodes/sample', '-P', meson.current_build_dir()], + env: suite_env, + timeout: test_timeout + ) + + lcore_number += 1 + if (lcore_number >= cores_available) + lcore_number = 0 + endif + +endforeach + +valgrind = find_program('valgrind', required: false) +if valgrind.found() + add_test_setup('valgrind', + exe_wrapper : [valgrind, + '--error-exitcode=1', '--tool=memcheck', '--leak-check=full', '--show-reachable=no', + '--suppressions=@0@'.format(meson.current_source_dir() / 'valgrind_suppressions') + ], + timeout_multiplier : 100) +endif diff --git a/tests/whole_dp/readme.md b/tests/whole_dp/readme.md index d1efd68c..6f21de6f 100644 --- a/tests/whole_dp/readme.md +++ b/tests/whole_dp/readme.md @@ -18,13 +18,24 @@ build and run the tests. Setting `DEB_BUILD_OPTIONS="verbose"` `DH_VERBOSE=1` will generate detailed test output. -For rebuilds, you can simply do `make -j4 check` or -`make -j4 dataplane_test_run dataplane_test_run` from the build directory -(most likely `obj-x86_64-linux-gnu`). This will also re-run configure and -automake as required if you update any of the makefiles. +Additional 'slow' tests can be optionally be enabled with either: + +- If using the debian package build: `DEB_BUILD_OPTIONS="all_tests"` +- If using meson directly: `meson setup -Dall_tests=true ` + +It's often useful to disable Link Time Optimization to reduce rebuild times: + +- If using the debian package build: `DEB_BUILD_OPTIONS="no_lto"` +- If using meson directly: `meson setup -Db_lto=false ` + +The tests are integrated into the [Meson Unit Test execution framework][1]. +Use `meson test --help` for more information. + +For rebuilds, you can simply do `meson test` from the build directory +(most likely `build`). Detailed test output can also be generated with the following: -`make -j4 check V=1 VERBOSE=1` +`meson test -v` ### Chroot Environment Whilst the tests build and run via OBS and osc-buildpackage, for ease @@ -42,21 +53,36 @@ to avoid the need to ensure the settings in the chroot match those outside. Note that binding `/proc` isn't necessary to build and run the unit tests, but it avoids warnings if gdb is used on a running process. + ## Running VR Tests -These are run automatically as part of `make check` and the package build. -However, they can be run manually using `make -j4 dataplane_test_run`. +These are run automatically as part of `meson test` and the package build. ## Running Individual Tests -Run a single suite using the `CK_RUN_SUITE` environment variable: +You can get a list of CK test suites with: +`meson test --list` -`make -j4 dataplane_test_run CK_RUN_SUITE=dp_test_bridge.c` +Indiviual CK test suites can be run with: +`meson test dp_test_bridge.c` Run a single test using the `CK_RUN_CASE` environment variable: -`make -j4 dataplane_test_run CK_RUN_CASE=bridge_unicast` +`CK_RUN_CASE=bridge_unicast meson test dp_test_bridge.c` -## Directly executing the test binary -The test binary can be directly executed, which is particularly useful with GDB: +or run directly after building only the test executable. + +``` shell +ninja tests/whole_dp/dummyfs tests/whole_dp/fal_plugin_test.so tests/whole_dp/libsample_test.so src/pipeline/nodes/sample/sample_plugin.so +ninja tests/whole_dp/dataplane_test +cd tests/whole_dp +CK_RUN_CASE=bridge_unicast ./dataplane_test -d2 -F ../../src/pipeline/nodes/sample -P . +``` + +## Running test in GDB +The test binary can be executed in gdb: + +`CK_RUN_CASE=bridge_unicast meson test --gdb -v dp_test_bridge.c` + +or directly: `CK_RUN_CASE=bridge_unicast gdb --args ./dataplane_test -f -d2` @@ -64,6 +90,29 @@ The test binary can be directly executed, which is particularly useful with GDB: Use `./dataplane_test -h` for more help +## Adding tests via plugins +The dataplane supports adding features via plugins that live in different git repos. +To be able to test this there is support for adding UT plugins too. These can then +be used to test the feature plugins. + +The vyatta-dataplane-dev package will install all the files needed to build the +feature plugins against the dataplane and also to build and run the unit tests. + +To run the unit tests from outside the dataplane source tree you need to run in +'external' mode. This is done by passing in the -E flag. Doing this causes the +tests to use some different paths for files, pulling in the ones that are provided +by the dev package. + +For example, the bfd dataplane plugin will run the tests as: +`/usr/bin/dataplane_test -d 0 -E` + +When doing this the feat plugins will be picked up from: +`/usr/lib/*/vyatta-dataplane/pipeline/plugins/sample_plugin.so` + +And the unit test plugins will be picked up from the directory that the test binary +is invoked from. + + ## Checking for memory leaks It is good practice to ensure your code is not leaking memory. To that @@ -108,6 +157,8 @@ Dataplane: * pub/sub to controller * dealer/router to controller + * push/pull to broker + * push/pull for xfrms * console connection Test code: @@ -115,9 +166,10 @@ Test code: * Wrapper functions (main and a version of random crypto uses) * Dummy controller providing the zmq connections and ability to use them * zmq console connection - send commands and get output + * zmq broker connection - to send route updates * JSON parsing to allow us to parse command replies - * netlink generation to inject state - * dummy /proc /sys filesystems + * netlink generation to inject state (interfaces etc) + * injection of routes (via a protobuf based format) * Check UT infra http://libcheck.github.io/check/ * New 'main' which brings the tests up and runs them * stubs for dataplane code not included, shadow.c and a few other files @@ -131,20 +183,21 @@ then calls. The dataplane code then goes through the normal init sequence. It calls 'rte_eal_init' to initialise dpdk. The standard dpdk init queries the pci bus to find the set of interfaces, and it queries the filesystem to get the number of cores. The test -environment fakes this up so that consistent results are returned irrespective of -where the tests are being run. Arguments are passed into the dpdk init to stop it using -hugepages (we are not testing performance) and to provide a set of interfaces - each of -the interfaces we use is using the rte_eth_null driver, which is a standard PMD. +environment by default uses only a singler core so we can be sure that we are not +asking for cores the processor does not have. Arguments are passed into the dpdk init +to stop it using hugepages (we are not testing performance) and to provide a set of +interfaces - each of the interfaces we use is using the rte_eth_null driver, which is +a standard PMD. Once it has gone through the dpdk init, it proceeds through the rest of the init as normal, and then the forwarding thread drops into the forwarding_loop, and the -master thread is in the master_loop. +main thread is in the main_loop. To allow the dataplane to get through the init handshake the test thread has to provide the controller/console ends of the zmq connections. It does this by spawning a further thread to provide the controller request thread side. This thread will listen to the MYPORT messages, and reply as required to allow init to proceed. Meanwhile the test -thread (which is the zmq publisher) waits until the master_state is ready (i.e the +thread (which is the zmq publisher) waits until the main loop is ready (i.e the dataplane is ready). It then creates the test interfaces (sends the default netlink state) for each of them, and then is ready to start the tests. @@ -162,7 +215,7 @@ the next test. ### Why did we choose this model -We chose this model (as opposed to per file tests) because we wanted to be able to +We chose this model (as opposed to the per file tests) because we wanted to be able to test the dataplane as a whole. This approach gives the following benefits: - test the dataplane from observable input/output @@ -191,7 +244,7 @@ behaviour. Most tests want to do some variation of: * clean The state is injected to the dataplane via the zmq pub/sub socket, so we don't want to -send the packets until that state has been fully applied in the dataplane by the master +send the packets until that state has been fully applied in the dataplane by the main thread (in the standard way). The forwarding thread (we pretend we are on a 2 core system) is polling the interfaces rx queues to see if there are packets to forward. They are forwarded as soon as they are found, so we need to make sure we don't insert the @@ -230,6 +283,8 @@ Interfaces are named 'dpxTy' and we create 20 interfaces. These interfaces get setup at init via the netlink APIs (see below). +We also add 2 switchport interfaces to allow testing on switchports. + Within the dpdk, each of these PMDs has an rx and a tx ring associated with it. When the PMD is queried to see if it has any packets, it returns the packets on the rx ring. To inject packets we simply add them to the ring on the receiving interface. @@ -279,7 +334,7 @@ Verifying with JSON follows this standard pattern: * given a string (for example a route) turn this into the set of 'expected JSON' * create the 'cmd' string - * every millisec for 1 second: + * every millisec for 2 seconds: * send the cmd to dataplane via the console zmq * wait for the response * compare the expected JSON with the returned JSON. The comparisons can be @@ -320,12 +375,8 @@ add/remove a route via netlink. The netlink func will pass the line etc through a failure the user knows what was being attempted when the failure happened. - - - ### Creating a packet and the expectations for it - #### Interface Test name vs Real name Tests are written using the distributed interface format @@ -411,8 +462,8 @@ transformed to be on the same vplane. ####### Switch Ports -Two switch ports have been added to the system, "sw_port_0_0" and -"sw_port_0_7". These interfaces are in addition to all the local +Two switch ports have been added to the system, "dp1sw_port_0_0" and +"dp1sw_port_0_7". These interfaces are in addition to all the local interfaces. Specific switch port tests have been added. The switch ports are created with dp1T0 as their hardware switch backplane interconnect. Specific tests use this interconnet to queue and receive @@ -489,3 +540,4 @@ These are some of the things on the todo list: * Potential improvements to the way packets are displayed on failures. +[1]: https://mesonbuild.com/Unit-tests.html "Meson Unit Test Execution Framework" diff --git a/tests/whole_dp/src/dp_test.c b/tests/whole_dp/src/dp_test.c index dd2d25c8..20d1cb35 100644 --- a/tests/whole_dp/src/dp_test.c +++ b/tests/whole_dp/src/dp_test.c @@ -1,11 +1,12 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ - +#include +#include #include #include #include @@ -20,17 +21,19 @@ #include #include #include "vplane_debug.h" -#include "master.h" +#include "controller.h" #include "if_llatbl.h" #include "dp_test_controller.h" #include "dp_test.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" -#include "dp_test_cmd_check.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test/dp_test_cmd_check.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_console.h" +#include "dp_test/dp_test_macros.h" #include "dp_test_route_broker.h" +#include "dp_test_xfrm_server.h" /* DPDK debug level */ char rte_log_level[2]; @@ -47,23 +50,23 @@ char rte_log_level[2]; * - setup controller: * - create ZMQ subscriber (controller is publisher) * - register handlers for incoming pub messages - * - prepare master thread (master_loop) + * - prepare main thread (main_loop) * - create ZMQ dealer (connected to controller ROUTER) * - * - MASTER_SETUP: + * - MAIN_SETUP: * setup interfaces (build shadow state, tuntaps ...) * send data to controller: * "MYPORT" + 6 part with data in. * wait for response: * "OK" + seq + ifindex assigned for this port. * - * - MASTER_RESYNC: + * - MAIN_RESYNC: * send "WHATSUP" on DEALER/ROUTER socket * do loop polling for messages: * process netlink/cmd messages * until msg is THATSALLFOLKS * - * - MASTER_READY: + * - MAIN_READY: * do while not shutdown: * - process incoming events. * @@ -143,9 +146,12 @@ dp_test_usage(int status) " -u, --uplink Run uplink tests (remote controller)\n" " -h, --help Display this help and exit\n" " -p, --poison Poison mbuf data before each test\n" - " -r, --routing-domain Use routing-domain VRF model\n" + " -F --feat_plugin_dir Extra directory to check for feat plugins\n" + " -P --plugin-directory Unit-Test plugin directory\n" + " -E, --external When being run from plugin code\n" + " -H, --platform Specify the platform_conf file to use\n" + " -l, --lcore_list Specify the lcore list passed to dpdk. eg. '0,1'\n" "ENV VARS:\n" - " CK_RUN_SUITE Run a single suite\n" " CK_RUN_CASE Run a single test\n" " eg CK_RUN_CASE=bridge_unicast dp_test\n", @@ -154,6 +160,27 @@ dp_test_usage(int status) exit(status); } +#define MAX_UT_PLUGIN_DIR_LEN 128 + +static char dp_ut_plugin_dir[MAX_UT_PLUGIN_DIR_LEN] = "."; +char dp_ut_dummyfs_dir[PATH_MAX] = "dummyfs/"; +static char drv_cfgfile[PATH_MAX] = "dataplane-drivers-default.conf"; +static const char *dp_feat_plugin_dir = "."; +static const char *dp_test_platform_file = PLATFORM_FILE; + +/* + * Is the test being run from an external code tree, in which case a different + * set of paths are used. + */ +bool from_external; + +/* + * lcore list to pass through to dpdk. By default assume only 1 cpu as we + * know that all processors always have that. + * eg. Using cores 0-3, could be presented as "0,1,2,3" + */ +static const char *lcore_list = "0"; + static void dp_test_debug_default(void) { @@ -207,6 +234,8 @@ dp_test_debug_arg(const char *optarg) bool dp_test_poison; uint32_t count = 1; /* The number of times to run each test */ +static char *extra_cfg_buf; + static int dp_test_parse_args(int argc, char **argv) { @@ -219,11 +248,15 @@ dp_test_parse_args(int argc, char **argv) { "help", no_argument, NULL, 'h' }, { "poison", no_argument, NULL, 'p' }, { "count", required_argument, NULL, 'c' }, - { "routing-domain", no_argument, NULL, 'r' }, + { "feat_plugin_dir", required_argument, NULL, 'F'}, + { "plugin-directory", required_argument, NULL, 'P' }, + { "platform", required_argument, NULL, 'H' }, + { "external", no_argument, NULL, 'E' }, + { "lcore_list", required_argument, NULL, 'l' }, { NULL, 0, NULL, 0} }; - while ((opt = getopt_long(argc, argv, "c:d:uhpr", + while ((opt = getopt_long(argc, argv, "c:d:P:F:uhpEH:l:", lgopts, &option_index)) != EOF) { switch (opt) { @@ -242,7 +275,24 @@ dp_test_parse_args(int argc, char **argv) case 'c': count = strtoul(optarg, NULL, 0); break; - + case 'F': + dp_feat_plugin_dir = optarg; + break; + case 'H': + dp_test_platform_file = optarg; + break; + case 'P': + memcpy(dp_ut_plugin_dir, optarg, + strnlen(optarg, MAX_UT_PLUGIN_DIR_LEN)); + printf("%s: plug-in directory\n", dp_ut_plugin_dir); + break; + case 'E': + from_external = true; + printf("UTs being run from external repo, using paths from dev package\n"); + break; + case 'l': + lcore_list = optarg; + break; default: fprintf(stderr, "Unknown option %c\n", opt); dp_test_usage(1); @@ -325,7 +375,7 @@ dp_test_thread_run(zsock_t *pipe, void *args) * Wait for the vplaned-local ready state to be reached. * For VR this is a no-op */ - while (!dp_test_master_ready(CONT_SRC_UPLINK)) + while (!dp_test_main_ready(CONT_SRC_UPLINK)) sleep(1); json_object *intf_set; @@ -336,7 +386,7 @@ dp_test_thread_run(zsock_t *pipe, void *args) * Wait for the VR vplaned / vplaned-remote via uplink ready * state to be reached */ - while (!dp_test_master_ready(CONT_SRC_MAIN)) + while (!dp_test_main_ready(CONT_SRC_MAIN)) sleep(1); dp_test_intf_create_default_set(intf_set); @@ -379,8 +429,33 @@ dp_test_thread_run(zsock_t *pipe, void *args) zsock_send(pipe, "i", dp_test_thread_internal_retval); } -int stat(const char *path, struct stat *buf) +int dp_test_add_to_cfg_file(int argc, char **argv) { + int i; + int size = 0; + int remaining; + char *ptr; + + if (argc > DP_MAX_EXTRA_CFG_LINES) + return -EINVAL; + + for (i = 0; i < argc; i++) + size += strlen(argv[i]); + + size += argc * 2; + + extra_cfg_buf = malloc(size); + if (!extra_cfg_buf) + return -ENOMEM; + + remaining = size; + ptr = extra_cfg_buf; + for (i = 0; i < argc; i++) { + size = snprintf(ptr, remaining, "%s\n", argv[i]); + remaining -= size; + ptr += size; + } + return 0; } @@ -394,9 +469,10 @@ static char *get_conf_file_name(void) } static void generate_conf_file(const char *cfgfile, const char *console_ep, - const char *console_ep_uplink, char *req_ipc, char *req_ipc_uplink, - const char *broker_ctrl_ep) + const char *broker_ctrl_ep, + const char *xfrm_server_push_ep, + const char *xfrm_server_pull_ep) { char buf[1024]; FILE *f; @@ -407,6 +483,9 @@ static void generate_conf_file(const char *cfgfile, const char *console_ep, exit(2); } + if (!extra_cfg_buf) + extra_cfg_buf = (char *)""; + const char *controller_ip_str, *dp_ip_str, *comment_str, *uplink_mac; const char *control_intf; uint16_t uuid; @@ -432,14 +511,17 @@ static void generate_conf_file(const char *cfgfile, const char *console_ep, "%s%s\n" /* vr defines local ip */ "%s%s\n" /* uplink uses dynamic address for local ip */ "control=%s\n" - "control-uplink=%s\n" "interface=lo\n" "uuid=%i\n" "dataplane-id=%i\n" "uplink-mac=%s\n" + "%s\n" "[RIB]\n" "%s%s\n" /* vr defines local ip */ - "control=%s\n", + "control=%s\n" + "[XFRM_CLIENT]\n" + "pull=%s\n" + "push=%s\n", dp_test_pname, comment_str, controller_ip_str, @@ -450,13 +532,15 @@ static void generate_conf_file(const char *cfgfile, const char *console_ep, control_intf ? "control-interface=" : "", control_intf ? control_intf : "", console_ep, - console_ep_uplink, uuid, dp_id, uplink_mac, + extra_cfg_buf, dp_ip_str ? "ip=" : "", dp_ip_str ? dp_ip_str : "", - broker_ctrl_ep); + broker_ctrl_ep, + xfrm_server_push_ep, + xfrm_server_pull_ep); if (fwrite(buf, 1, strlen(buf) + 1, f) != strlen(buf) + 1) { fprintf(stderr, "Unable to write config\n"); @@ -475,54 +559,116 @@ static const char *get_rte_file_prefix(void) return file_prefix; } -static void cleanup_temp_files(const char *cfgfile, - const char *rte_file_prefix) +static void cleanup_temp_files(const char *cfgfile) { - char buffer[PATH_MAX]; - if (unlink(cfgfile)) perror("unlink dp config file"); - /* refer to eal_runtime_config_path in dpdk */ - snprintf(buffer, sizeof(buffer), "%s/.%s_config", getenv("HOME"), - rte_file_prefix); - if (unlink(buffer)) - perror("unlink dpdk config file"); } +static void unit_test_load_plugin(const char *buf) +{ + int (*unit_test_plugin_init)(const char **name); + int rv; + void *handle; + const char *signature_buf; + + handle = dlopen(buf, RTLD_NOW); + if (handle == NULL) { + RTE_LOG(ERR, DATAPLANE, + "failed to load unit_test plug-in: %s\n", + dlerror()); + return; + } + + /* Check it has an init func */ + unit_test_plugin_init = dlsym(handle, "dp_ut_plugin_init"); + if (!unit_test_plugin_init) { + /* Not a unit_test plugin library */ + dlclose(handle); + return; + } + + RTE_LOG(INFO, DATAPLANE, + "loaded unit-test plug-in: %s\n", buf); + rv = unit_test_plugin_init(&signature_buf); + if (rv) { + RTE_LOG(INFO, DATAPLANE, + "Failed to initialised unit-test plug-in: %s\n", buf); + dlclose(handle); + return; + } + + RTE_LOG(INFO, DATAPLANE, + "initialised unit plug-in: %s %s\n", buf, signature_buf); +} + + +static void unit_test_load_plugins(const char *directory) +{ + /* + * Iterate through directory loading pipeline plugins + */ + DIR *dp; + struct dirent *ep; + + dp = opendir(directory); + RTE_LOG(INFO, DATAPLANE, "Checking for unit-test plugins in %s\n", + directory); + + if (dp != NULL) { + while ((ep = readdir(dp))) { + /* restrict to .so files only */ + char *tmp = strrchr(ep->d_name, '.'); + + if (!tmp) + continue; + if (strcmp(tmp, ".so") != 0) + continue; + + char buf[1024]; + + snprintf(buf, 1024, "%s/%s", + directory, ep->d_name); + unit_test_load_plugin(buf); + } + } else { + /* + * The directory not existing is normal so don't log + * an error in that case. + */ + if (errno != ENOENT) + RTE_LOG(ERR, DATAPLANE, + "error opening unit-test plug-in directory \"%s\": %s\n", + directory, strerror(errno)); + return; + } + closedir(dp); +} + + + bool dp_test_fal_plugin_called; +uint32_t dp_test_fal_plugin_state; +void *dp_test_fal_plugin_ptr; bool dp_test_abort_on_fail = true; int __wrap_main(int argc, char **argv) { char *cfgfile = get_conf_file_name(); - const char *drv_cfgfile = "dataplane-drivers-default.conf"; const char *console_ep = dp_test_console_set_endpoint(CONT_SRC_MAIN); - const char *console_ep_uplink = - dp_test_console_set_endpoint(CONT_SRC_UPLINK); char *broker_ctrl_ep; char *req_ipc, *req_ipc_uplink = NULL; const char *rte_file_prefix = get_rte_file_prefix(); - const char *dp_args[] = { - "/usr/sbin/dataplane_test", - "-f", cfgfile, - "-c", drv_cfgfile, - "-C", console_ep, - "-g", "root", - "--", - "-n", "1", - "-c", "0x1", - "--syslog", "local6", - "--no-huge", - "-m", "1024", - "--file-prefix", rte_file_prefix, - "--log-level", rte_log_level, - }; int ret; int dp_test_real_main_retval; int dp_test_thread_internal_retval; zactor_t *dp_test_actor; zactor_t *dp_test_broker_actor; + zactor_t *dp_test_xfrm_server_actor; + char *xfrm_server_resp; + char xfrm_push_url[MAX_XFRM_SOCKET_NAME_SIZE]; + char xfrm_pull_url[MAX_XFRM_SOCKET_NAME_SIZE]; /* Preserve name of myself. */ dp_test_pname = strrchr(argv[0], '/'); @@ -542,6 +688,20 @@ int __wrap_main(int argc, char **argv) ret = dp_test_parse_args(argc, argv); if (ret < 0) return -1; + + if (from_external) { + /* Setup paths if running from an external src tree */ + strncpy(dp_ut_dummyfs_dir, + "/usr/share/vyatta-dataplane/tests/whole_dp/dummyfs/", + PATH_MAX); + strncpy(drv_cfgfile, + "/usr/share/vyatta-dataplane/tests/dataplane-drivers-default.conf", + PATH_MAX); + } + + /* Load unit-test plugins if present */ + unit_test_load_plugins(dp_ut_plugin_dir); + /* Start req and pub threads to emulate vplaned */ dp_test_actor = zactor_new(dp_test_thread_run, NULL); if (!dp_test_actor) { @@ -554,8 +714,17 @@ int __wrap_main(int argc, char **argv) dp_test_broker_actor = zactor_new(dp_test_broker_thread_run, NULL); broker_ctrl_ep = zstr_recv(dp_test_broker_actor); - generate_conf_file(cfgfile, console_ep, console_ep_uplink, req_ipc, - req_ipc_uplink, broker_ctrl_ep); + dp_test_xfrm_server_actor = + zactor_new(dp_test_xfrm_server_thread_run, NULL); + xfrm_server_resp = zstr_recv(dp_test_xfrm_server_actor); + dp_test_assert_internal(sscanf(xfrm_server_resp, "%s %s", + xfrm_push_url, xfrm_pull_url) == 2); + + generate_conf_file(cfgfile, console_ep, req_ipc, + req_ipc_uplink, broker_ctrl_ep, + xfrm_push_url, xfrm_pull_url); + + zstr_free(&xfrm_server_resp); zstr_free(&broker_ctrl_ep); zstr_free(&req_ipc); if (req_ipc_uplink) @@ -568,6 +737,24 @@ int __wrap_main(int argc, char **argv) lltable_probe_timer_set_enabled(false); dp_test_intf_init(); + const char *dp_args[] = { + "/usr/sbin/dataplane_test", + "-f", cfgfile, + "-c", drv_cfgfile, + "-F", dp_feat_plugin_dir, + "-C", console_ep, + "-g", "root", + "-P", dp_test_platform_file, + "--", + "-n", "1", + "-l", lcore_list, + "--syslog", "local6", + "--no-huge", + "-m", "1024", + "--file-prefix", rte_file_prefix, + "--log-level", rte_log_level, + }; + dp_test_real_main_retval = __real_main(ARRAY_SIZE(dp_args), (char **)dp_args); @@ -577,7 +764,8 @@ int __wrap_main(int argc, char **argv) zsock_recv(dp_test_actor, "i", &dp_test_thread_internal_retval); zactor_destroy(&dp_test_actor); zactor_destroy(&dp_test_broker_actor); - cleanup_temp_files(cfgfile, rte_file_prefix); + zactor_destroy(&dp_test_xfrm_server_actor); + cleanup_temp_files(cfgfile); /* * Since return code is see by shell as a uchar we effectively return - @@ -591,6 +779,6 @@ int __wrap_main(int argc, char **argv) */ if (dp_test_real_main_retval != 0) return dp_test_real_main_retval - 128; - else - return dp_test_thread_internal_retval; + + return dp_test_thread_internal_retval; } diff --git a/tests/whole_dp/src/dp_test.h b/tests/whole_dp/src/dp_test.h index 5ab56465..e5a216a7 100644 --- a/tests/whole_dp/src/dp_test.h +++ b/tests/whole_dp/src/dp_test.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017,2019-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -19,8 +19,8 @@ */ #include #include -#include "dp_test_cmd_check.h" -#include "dp_test_macros.h" +#include "dp_test/dp_test_cmd_check.h" +#include "dp_test/dp_test_macros.h" #include "if_var.h" @@ -45,7 +45,6 @@ int dp_test_debug_get(void); /* The entry point into the dataplane test process */ int dataplane_test_main(int argc, char **argv); int dp_test_run_tests(void *ctx); -Suite *dp_test_get_suite(const char *filename); int __wrap_main(int argc, char **argv); int __real_main(int argc, char **argv); @@ -70,4 +69,7 @@ int __real_rte_eal_init(int argc, char **argv); FILE *__wrap_popen(const char *command, const char *type); int __wrap_pclose(FILE *stream); +extern bool from_external; +extern char dp_ut_dummyfs_dir[PATH_MAX]; + #endif /* _DP_TEST_H_ */ diff --git a/tests/whole_dp/src/dp_test_arp.c b/tests/whole_dp/src/dp_test_arp.c index 712a737a..4bc87913 100644 --- a/tests/whole_dp/src/dp_test_arp.c +++ b/tests/whole_dp/src/dp_test_arp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -19,13 +19,16 @@ #include "dp_test.h" #include "dp_test_controller.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_cmd_state.h" +#include "protobuf/GArpConfig.pb-c.h" +#include "protobuf/DataplaneEnvelope.pb-c.h" + /* * Arp for interface address * - no existing arp entry - create and reply @@ -53,13 +56,13 @@ struct ether_arp { struct arphdr ea_hdr; /* fixed-size header */ - struct ether_addr arp_sha; /* sender hardware address */ + struct rte_ether_addr arp_sha; /* sender hardware address */ in_addr_t arp_spa; /* sender protocol address */ - struct ether_addr arp_tha; /* target hardware address */ + struct rte_ether_addr arp_tha; /* target hardware address */ in_addr_t arp_tpa; /* target protocol address */ -} __attribute__ ((__packed__)); +} __attribute__ ((__packed__)) __attribute__((aligned(2))); -static struct ether_addr peer_mac, peer_mac2; +static struct rte_ether_addr peer_mac, peer_mac2; static in_addr_t peer_ip, not_our_ip; static const char *iifmac; static struct arp_stats zero_arp_stats; @@ -72,8 +75,8 @@ dp_test_zero_arp_stats(const char *ifname) struct vrf *vrf; ifindex = dp_test_intf_name2index(ifname); - ifp = ifnet_byifindex(ifindex); - vrf = vrf_get_rcu_from_external(ifp->if_vrfid); + ifp = dp_ifnet_byifindex(ifindex); + vrf = dp_vrf_get_rcu_from_external(ifp->if_vrfid); if (vrf) memset(&vrf->v_arpstat, 0, sizeof(vrf->v_arpstat)); @@ -86,11 +89,11 @@ dp_test_zero_arp_stats(const char *ifname) struct vrf *vrf; \ \ ifindex = dp_test_intf_name2index(ifname); \ - ifp = ifnet_byifindex(ifindex); \ - vrf = vrf_get_rcu_from_external(ifp->if_vrfid); \ + ifp = dp_ifnet_byifindex(ifindex); \ + vrf = dp_vrf_get_rcu_from_external(ifp->if_vrfid); \ \ if (vrf) { \ - dp_test_fail_unless(vrf->v_arpstat.stat == value, \ + dp_test_fail_unless(vrf->v_arpstat.stat == (value), \ "\nIncorrect ARP stats counter " \ #stat \ " %"PRIu64" - should be %d\n", \ @@ -108,8 +111,8 @@ static void _dp_test_verify_all_arp_stats_zero(const char *ifname, struct vrf *vrf; ifindex = dp_test_intf_name2index(ifname); - ifp = ifnet_byifindex(ifindex); - vrf = vrf_get_rcu_from_external(ifp->if_vrfid); + ifp = dp_ifnet_byifindex(ifindex); + vrf = dp_vrf_get_rcu_from_external(ifp->if_vrfid); if (vrf) { _dp_test_fail_unless( @@ -444,37 +447,71 @@ static void dp_test_verify_garp_state(const char *file, const char *func, ctx.rep_str = garp_rep_str; if (ifname) { - ifp = ifnet_byifname(ifname); + ifp = dp_ifnet_byifname(ifname); dp_test_verify_intf_garp_state(ifp, &ctx); } else - ifnet_walk(dp_test_verify_intf_garp_state, &ctx); + dp_ifnet_walk(dp_test_verify_intf_garp_state, &ctx); +} + +static void +dp_test_create_and_send_garp_msg(const char *ifname, + const bool set, + const GArpConfig__ArpOp op, + const GArpConfig__GarpPktAction action) +{ + int len; + GArpConfig garp = GARP_CONFIG__INIT; + garp.ifname = (char *)ifname; + garp.set = set; + garp.has_set = true; + garp.op = op; + garp.has_op = true; + garp.action = action; + garp.has_action = true; + + len = garp_config__get_packed_size(&garp); + + void *buf2 = malloc(len); + dp_test_assert_internal(buf2); + + garp_config__pack(&garp, buf2); + + dp_test_lib_pb_wrap_and_send_pb("vyatta:garp", buf2, len); +} + +static void +dp_test_garp_execute(const char *ifname, + const bool set, + const GArpConfig__ArpOp op, + const GArpConfig__GarpPktAction action) +{ + dp_test_create_and_send_garp_msg(ifname, set, op, action); } static void dp_test_garp_drop(int arp_op, const char *peer_mac, const char *exp_peer_mac, const char *file, const char *function, int line) { - const char *op_str; const char *exp_req, *exp_rep; struct dp_test_expected *exp; struct rte_mbuf *arp_pak; char real_ifname[IFNAMSIZ]; + GArpConfig__ArpOp garp_op; if (arp_op == ARPOP_REQUEST) { - op_str = "request"; + garp_op = GARP_CONFIG__ARP_OP__ARPOP_REQUEST; exp_req = "Drop"; exp_rep = "Update"; } else { - op_str = "reply"; + garp_op = GARP_CONFIG__ARP_OP__ARPOP_REPLY; exp_req = "Update"; exp_rep = "Drop"; } /* set default action to drop */ - dp_test_send_config_src(dp_test_cont_src_get(), - "arp gratuitous SET %s %s drop", - dp_test_intf_real(IIFNAME, real_ifname), - op_str); + dp_test_garp_execute(dp_test_intf_real(IIFNAME, real_ifname), + true, garp_op, + GARP_CONFIG__GARP_PKT_ACTION__GARP_PKT_DROP); dp_test_verify_garp_state(file, function, line, real_ifname, exp_req, exp_rep); @@ -505,9 +542,9 @@ static void dp_test_garp_drop(int arp_op, const char *peer_mac, dp_test_verify_all_arp_stats_zero(IIFNAME); /* restore default operation */ - dp_test_send_config_src(dp_test_cont_src_get(), - "arp gratuitous DELETE %s %s drop", - real_ifname, op_str); + dp_test_garp_execute(dp_test_intf_real(IIFNAME, real_ifname), + false, garp_op, + GARP_CONFIG__GARP_PKT_ACTION__GARP_PKT_DROP); dp_test_verify_garp_state(file, function, line, real_ifname, "Update", "Update"); } @@ -678,33 +715,38 @@ DP_START_TEST(garp, garp_cmd) char real_ifname[IFNAMSIZ]; /* set default action to drop */ - dp_test_send_config_src(dp_test_cont_src_get(), - "arp gratuitous SET all request drop"); - dp_test_send_config_src(dp_test_cont_src_get(), - "arp gratuitous SET all reply drop"); + dp_test_garp_execute("", true, GARP_CONFIG__ARP_OP__ARPOP_REQUEST, + GARP_CONFIG__GARP_PKT_ACTION__GARP_PKT_DROP); + + dp_test_garp_execute("", true, GARP_CONFIG__ARP_OP__ARPOP_REPLY, + GARP_CONFIG__GARP_PKT_ACTION__GARP_PKT_DROP); + dp_test_verify_garp_state(__FILE__, __func__, __LINE__, NULL, "Drop", "Drop"); /* override one action on one interface */ dp_test_intf_real("dp1T3", real_ifname); - dp_test_send_config_src(dp_test_cont_src_get(), - "arp gratuitous SET %s request update", - real_ifname); + + dp_test_garp_execute(real_ifname, true, + GARP_CONFIG__ARP_OP__ARPOP_REQUEST, + GARP_CONFIG__GARP_PKT_ACTION__GARP_PKT_UPDATE); + dp_test_verify_garp_state(__FILE__, __func__, __LINE__, real_ifname, "Update", "Drop"); /* clear one action on one interface */ - dp_test_send_config_src(dp_test_cont_src_get(), - "arp gratuitous DELETE %s request update", - real_ifname); + dp_test_garp_execute(real_ifname, false, + GARP_CONFIG__ARP_OP__ARPOP_REQUEST, + GARP_CONFIG__GARP_PKT_ACTION__GARP_PKT_UPDATE); + dp_test_verify_garp_state(__FILE__, __func__, __LINE__, real_ifname, "Drop", "Drop"); /* restore default action */ - dp_test_send_config_src(dp_test_cont_src_get(), - "arp gratuitous DELETE all request drop"); - dp_test_send_config_src(dp_test_cont_src_get(), - "arp gratuitous DELETE all reply drop"); + dp_test_garp_execute("", false, GARP_CONFIG__ARP_OP__ARPOP_REQUEST, + GARP_CONFIG__GARP_PKT_ACTION__GARP_PKT_DROP); + dp_test_garp_execute("", false, GARP_CONFIG__ARP_OP__ARPOP_REPLY, + GARP_CONFIG__GARP_PKT_ACTION__GARP_PKT_DROP); dp_test_verify_garp_state(__FILE__, __func__, __LINE__, NULL, "Update", "Update"); } DP_END_TEST; @@ -941,14 +983,14 @@ DP_START_TEST(l3_arp_vlan, l3_arp_vlan_proto) PEER_MAC, DONTCARE_MAC, PEER_IP, OUR_IP, 0); dp_test_insert_8021q_hdr(arp_pak, vlan_id, ETH_P_8021AD, - ETHER_TYPE_ARP); + RTE_ETHER_TYPE_ARP); exp_pak = dp_test_create_arp_pak(ARPOP_REPLY, iifmac, PEER_MAC, iifmac, PEER_MAC, OUR_IP, PEER_IP, 0); dp_test_insert_8021q_hdr(exp_pak, vlan_id, ETH_P_8021AD, - ETHER_TYPE_ARP); + RTE_ETHER_TYPE_ARP); exp = dp_test_exp_create_with_packet(exp_pak); @@ -978,7 +1020,7 @@ DP_START_TEST(l3_arp_vlan, l3_arp_vlan_proto) PEER_MAC, DONTCARE_MAC, PEER_IP, OUR_IP, 0); dp_test_insert_8021q_hdr(arp_pak, vlan_id, ETH_P_8021AD, - ETHER_TYPE_ARP); + RTE_ETHER_TYPE_ARP); dp_test_insert_8021q_hdr(arp_pak, vlan_id, ETH_P_8021AD, ETH_P_8021AD); @@ -1176,7 +1218,7 @@ DP_START_TEST(arp_macvlan, req_dp_parent) dp_test_pktmbuf_eth_init(ip_pak, dp_test_intf_name2mac_str(IIFNAME2), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp_pak = dp_test_create_arp_pak(ARPOP_REQUEST, macvlan_mac, BCAST_MAC, @@ -1198,7 +1240,7 @@ DP_START_TEST(arp_macvlan, req_dp_parent) exp_pak = dp_test_create_ipv4_pak("10.42.42.42", PEER_IP, 1, &len); dp_test_pktmbuf_eth_init(exp_pak, PEER_MAC, macvlan_mac, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(exp_pak); exp = dp_test_exp_create_m(NULL, 2); @@ -1399,7 +1441,7 @@ DP_START_TEST(bridge_arp_req_l3_fwd, bridge_arp_req_l3_fwd) (void)dp_test_pktmbuf_eth_init(ip_pak, dp_test_intf_name2mac_str(IIFNAME2), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp_pak = dp_test_create_arp_pak(ARPOP_REQUEST, dp_test_intf_name2mac_str(bname), @@ -1419,6 +1461,8 @@ DP_START_TEST(bridge_arp_req_l3_fwd, bridge_arp_req_l3_fwd) /* Clean Up */ dp_test_neigh_clear_entry(bname, PEER_IP); + DP_TEST_VERIFY_AND_CLEAR_ARP_STAT(bname, dropped, 1); + dp_test_nl_del_ip_addr_and_connected(bname, OUR_IP "/24"); dp_test_nl_del_ip_addr_and_connected(IIFNAME2, OUR_IP2 "/24"); dp_test_intf_bridge_remove_port(bname, bport); diff --git a/tests/whole_dp/src/dp_test_bitmask.c b/tests/whole_dp/src/dp_test_bitmask.c index ea04557a..c881fce9 100644 --- a/tests/whole_dp/src/dp_test_bitmask.c +++ b/tests/whole_dp/src/dp_test_bitmask.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. + * Copyright (c) 2017-2020, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -8,8 +8,7 @@ */ #include "dp_test_controller.h" -#include "dp_test_lib_cmd.h" -#include "dp_test_macros.h" +#include "dp_test/dp_test_macros.h" #include "bitmask.h" diff --git a/tests/whole_dp/src/dp_test_bridge.c b/tests/whole_dp/src/dp_test_bridge.c index 103958b7..bbbabc5e 100644 --- a/tests/whole_dp/src/dp_test_bridge.c +++ b/tests/whole_dp/src/dp_test_bridge.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -10,12 +10,12 @@ #include "dp_test.h" #include "dp_test_console.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_pkt.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "ip_funcs.h" #include "in_cksum.h" @@ -224,7 +224,7 @@ DP_START_TEST(broadcast_2port_ipv4, broadcast_2port_ipv4) test_pak = dp_test_create_ipv4_pak("10.73.0.1", "3.3.3.3", 1, &len); (void)dp_test_pktmbuf_eth_init(test_pak, mac_bcast, mac_a, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* We expect the dataplane to flood to member ports and to slowpath */ exp = dp_test_exp_create_m(test_pak, 2); @@ -243,7 +243,7 @@ DP_START_TEST(broadcast_2port_ipv4, broadcast_2port_ipv4) /* Create frame from mac_b to mac_a, reply will be L2 unicast */ test_pak = dp_test_create_ipv4_pak("3.3.3.3", "10.73.0.1", 1, &len); (void)dp_test_pktmbuf_eth_init(test_pak, mac_a, mac_b, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* * Create pak we expect to receive on the tx ring @@ -290,7 +290,7 @@ DP_START_TEST(broadcast_2port_ipv6, broadcast_2port_ipv6) test_pak = dp_test_create_ipv6_pak("2001:1:1::2", "2002:2:2::2", 1, &len); (void)dp_test_pktmbuf_eth_init(test_pak, mac_bcast, mac_a, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); /* We expect the dataplane to flood to member ports and to slowpath */ exp = dp_test_exp_create_m(test_pak, 2); @@ -310,7 +310,7 @@ DP_START_TEST(broadcast_2port_ipv6, broadcast_2port_ipv6) test_pak = dp_test_create_ipv6_pak("2002:2:2::2", "2001:1:1::2", 1, &len); (void)dp_test_pktmbuf_eth_init(test_pak, mac_a, mac_b, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); /* * Create pak we expect to receive on the tx ring @@ -351,7 +351,7 @@ DP_START_TEST(broadcast_2port_ipv4_multi, broadcast_2port_ipv4_multi) test_pak = dp_test_create_ipv4_pak(ipv4_multi, "3.3.3.3", 1, &len); (void)dp_test_pktmbuf_eth_init(test_pak, mac_multi, mac_a, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* We expect the dataplane to flood to member ports and to slowpath */ exp = dp_test_exp_create_m(test_pak, 2); @@ -651,7 +651,7 @@ DP_START_TEST(bridge_2port_2vif_comb, 2vif_comb_ucast) struct dp_test_pkt_desc_t pktA = { .text = "Neighbour 1 -> Bridge IP address", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.0.1.2", .l2_src = "0:0:a4:0:0:aa", .l3_dst = "10.0.1.1", @@ -853,7 +853,7 @@ DP_START_TEST(bridge_2port_2vif_comb, 2vif_comb_ucast_vlan_proto) struct dp_test_pkt_desc_t pktA = { .text = "Neighbour 1 -> Bridge IP address", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.0.1.2", .l2_src = "0:0:a4:0:0:aa", .l3_dst = "10.0.1.1", @@ -877,7 +877,7 @@ DP_START_TEST(bridge_2port_2vif_comb, 2vif_comb_ucast_vlan_proto) /* Set the vlan in the test pak after we have created the exp pak */ dp_test_insert_8021q_hdr(test_pak, 10, ETH_P_8021AD, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_exp_set_vlan_tci(exp, 10); /* Run the test */ @@ -1125,12 +1125,12 @@ DP_START_TEST(bridge_gre, unicast) "1.1.2.2", "1.1.2.1", 1, &gre_pl_len, ETH_P_TEB, 0, 0, &gre_payload); memcpy(gre_payload, rte_pktmbuf_mtod(payload_pak, - const struct ether_hdr *), + const struct rte_ether_hdr *), gre_pl_len); dp_test_set_pak_ip_field(iphdr(test_pak), DP_TEST_SET_DF, 1); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), mac_ip_neigh, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(payload_pak); rte_pktmbuf_free(payload_pak); @@ -1151,12 +1151,12 @@ DP_START_TEST(bridge_gre, unicast) "1.1.2.1", "1.1.2.2", 1, &gre_pl_len, ETH_P_TEB, 0, 0, &gre_payload); memcpy(gre_payload, rte_pktmbuf_mtod(test_pak, - const struct ether_hdr *), + const struct rte_ether_hdr *), gre_pl_len); dp_test_set_pak_ip_field(iphdr(expected_pak), DP_TEST_SET_DF, 1); dp_test_pktmbuf_eth_init(expected_pak, mac_ip_neigh, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(expected_pak); dp_test_exp_set_oif_name(exp, "dp1T1"); @@ -1175,12 +1175,12 @@ DP_START_TEST(bridge_gre, unicast) "1.1.2.1", "1.1.2.2", 1, &gre_pl_len, ETH_P_TEB, 0, 0, &gre_payload); memcpy(gre_payload, - rte_pktmbuf_mtod(test_pak, const struct ether_hdr *), + rte_pktmbuf_mtod(test_pak, const struct rte_ether_hdr *), gre_pl_len); dp_test_set_pak_ip_field(iphdr(expected_pak), DP_TEST_SET_DF, 1); dp_test_pktmbuf_eth_init(expected_pak, mac_ip_neigh, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(expected_pak); dp_test_exp_set_oif_name(exp, "dp1T1"); @@ -1199,12 +1199,12 @@ DP_START_TEST(bridge_gre, unicast) "1.1.2.2", "1.1.2.1", 1, &gre_pl_len, ETH_P_TEB, 0, 0, &gre_payload); memcpy(gre_payload, rte_pktmbuf_mtod(payload_pak, - const struct ether_hdr *), + const struct rte_ether_hdr *), gre_pl_len); dp_test_set_pak_ip_field(iphdr(test_pak), DP_TEST_SET_DF, 1); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), mac_ip_neigh, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(payload_pak); rte_pktmbuf_free(payload_pak); @@ -1224,12 +1224,12 @@ DP_START_TEST(bridge_gre, unicast) "1.1.2.1", "1.1.2.2", 1, &gre_pl_len, ETH_P_TEB, 0, 0, &gre_payload); memcpy(gre_payload, rte_pktmbuf_mtod(test_pak, - const struct ether_hdr *), + const struct rte_ether_hdr *), gre_pl_len); dp_test_set_pak_ip_field(iphdr(expected_pak), DP_TEST_SET_DF, 1); dp_test_pktmbuf_eth_init(expected_pak, mac_ip_neigh, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(expected_pak); dp_test_exp_set_oif_name(exp, "dp1T1"); @@ -1279,7 +1279,7 @@ DP_START_TEST(bridge_gre, frag) dp_test_pktmbuf_eth_init(payload_pak, mac_b, mac_a, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); struct rte_mbuf *frag_payload[2]; uint16_t frag_sizes[2] = { @@ -1297,14 +1297,14 @@ DP_START_TEST(bridge_gre, frag) dp_test_set_pak_ip_field(iphdr(frag_payload[0]), DP_TEST_SET_DF, 1); dp_test_pktmbuf_eth_prepend(frag_payload[0], mac_ip_neigh, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_pktmbuf_gre_prepend(frag_payload[1], ETH_P_TEB, 0); dp_test_pktmbuf_ip_prepend(frag_payload[1], "1.1.2.1", "1.1.2.2", IPPROTO_GRE); dp_test_set_pak_ip_field(iphdr(frag_payload[1]), DP_TEST_SET_DF, 1); dp_test_pktmbuf_eth_prepend(frag_payload[1], mac_ip_neigh, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create_m(NULL, 2); @@ -1322,11 +1322,11 @@ DP_START_TEST(bridge_gre, frag) dp_test_pktmbuf_eth_init(payload_pak, mac_b, mac_a, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); ip = iphdr(payload_pak); ip->tot_len = htons(2000); ip->check = 0; - ip->check = in_cksum_hdr(ip); + ip->check = dp_in_cksum_hdr(ip); exp = dp_test_exp_create(payload_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -1338,7 +1338,7 @@ DP_START_TEST(bridge_gre, frag) dp_test_pktmbuf_eth_init(payload_pak, mac_b, mac_a, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); ip = iphdr(payload_pak); ip->ihl = 0; @@ -1398,12 +1398,12 @@ DP_START_TEST(bridge_gre, vlan) "1.1.2.2", "1.1.2.1", 1, &gre_pl_len, ETH_P_TEB, 0, 0, &gre_payload); memcpy(gre_payload, rte_pktmbuf_mtod(payload_pak, - const struct ether_hdr *), + const struct rte_ether_hdr *), gre_pl_len); dp_test_set_pak_ip_field(iphdr(test_pak), DP_TEST_SET_DF, 1); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), mac_ip_neigh, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(payload_pak); rte_pktmbuf_free(payload_pak); @@ -1426,12 +1426,12 @@ DP_START_TEST(bridge_gre, vlan) "1.1.2.1", "1.1.2.2", 1, &gre_pl_len, ETH_P_TEB, 0, 0, &gre_payload); memcpy(gre_payload, rte_pktmbuf_mtod(test_pak, - const struct ether_hdr *), + const struct rte_ether_hdr *), gre_pl_len); dp_test_set_pak_ip_field(iphdr(expected_pak), DP_TEST_SET_DF, 1); dp_test_pktmbuf_eth_init(expected_pak, mac_ip_neigh, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(expected_pak); dp_test_exp_set_oif_name(exp, "dp1T1"); @@ -1456,7 +1456,7 @@ DP_START_TEST(bridge_gre, vlan) /* * Check that the flush code can delete entries safely at the same time as - * they are removed from the master thread. + * they are removed from the main thread. */ DP_DECL_TEST_CASE(bridge_suite, bridge_flush, NULL, NULL); DP_START_TEST(bridge_flush, bridge_flush) diff --git a/tests/whole_dp/src/dp_test_bridge_n.c b/tests/whole_dp/src/dp_test_bridge_n.c index 6d2022e7..c4f192b5 100644 --- a/tests/whole_dp/src/dp_test_bridge_n.c +++ b/tests/whole_dp/src/dp_test_bridge_n.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -12,8 +12,8 @@ * single test. */ #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" -#include "dp_test_macros.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test/dp_test_macros.h" DP_DECL_TEST_SUITE(bridge_suite_n); diff --git a/tests/whole_dp/src/dp_test_bridge_vlan_filter.c b/tests/whole_dp/src/dp_test_bridge_vlan_filter.c index 58f9c241..e755d1b3 100644 --- a/tests/whole_dp/src/dp_test_bridge_vlan_filter.c +++ b/tests/whole_dp/src/dp_test_bridge_vlan_filter.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -9,12 +9,12 @@ */ #include "dp_test.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_pkt.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "ip_funcs.h" @@ -312,7 +312,7 @@ DP_START_TEST(broadcast_vlan_filter_2port_ipv4, test_pak = dp_test_create_ipv4_pak("10.73.0.1", "3.3.3.3", 1, &len); (void)dp_test_pktmbuf_eth_init(test_pak, mac_bcast, mac_a, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_pktmbuf_vlan_init(test_pak, 10); /* We expect the dataplane to flood to member ports and to slowpath */ @@ -332,7 +332,7 @@ DP_START_TEST(broadcast_vlan_filter_2port_ipv4, /* Create frame from mac_b to mac_a, reply will be L2 unicast */ test_pak = dp_test_create_ipv4_pak("3.3.3.3", "10.73.0.1", 1, &len); (void)dp_test_pktmbuf_eth_init(test_pak, mac_a, mac_b, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_pktmbuf_vlan_init(test_pak, 10); /* @@ -392,7 +392,7 @@ DP_START_TEST(broadcast_vlan_filter_2port_ipv6, test_pak = dp_test_create_ipv6_pak("2001:1:1::2", "2002:2:2::2", 1, &len); (void)dp_test_pktmbuf_eth_init(test_pak, mac_bcast, mac_a, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); dp_test_pktmbuf_vlan_init(test_pak, 10); /* We expect the dataplane to flood to member ports and to slowpath */ @@ -413,7 +413,7 @@ DP_START_TEST(broadcast_vlan_filter_2port_ipv6, test_pak = dp_test_create_ipv6_pak("2002:2:2::2", "2001:1:1::2", 1, &len); (void)dp_test_pktmbuf_eth_init(test_pak, mac_a, mac_b, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); dp_test_pktmbuf_vlan_init(test_pak, 10); /* @@ -467,7 +467,7 @@ DP_START_TEST(broadcast_vlan_filter_2port_ipv4_multi, test_pak = dp_test_create_ipv4_pak(ipv4_multi, "3.3.3.3", 1, &len); (void)dp_test_pktmbuf_eth_init(test_pak, mac_multi, mac_a, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_pktmbuf_vlan_init(test_pak, 10); /* We expect the dataplane to flood to member ports and to slowpath */ diff --git a/tests/whole_dp/src/dp_test_cmd_check.c b/tests/whole_dp/src/dp_test_cmd_check.c index 49eba047..65c69622 100644 --- a/tests/whole_dp/src/dp_test_cmd_check.c +++ b/tests/whole_dp/src/dp_test_cmd_check.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -7,21 +7,22 @@ * * Check dataplane internal state using operational commands */ -#include "dp_test_cmd_check.h" +#include "dp_test/dp_test_cmd_check.h" #include +#include "if_var.h" #include "mpls/mpls.h" #include "npf/npf_if.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_controller.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test.h" #include "dp_test_npf_lib.h" -#include "vrf.h" +#include "vrf_internal.h" #define STRINGIFZ(x) #x #define STRINGIFY(x) STRINGIFZ(x) @@ -42,22 +43,29 @@ char expected_npf_fw_portmap_str[DP_TEST_TMP_BUF]; "{" \ " \"apm\":" \ " { \"section_size\": 512," \ -" \"hash_memory\": 0," \ -" \"instances\": " \ -" [" \ -" { \"npf_id\": 1, " \ -" \"portmaps\": [ ]," \ -" }" \ -" ]," \ -" \"mapping_count\": 0," \ -" }" \ +" \"protocols\":" \ +" [ " \ +" { " \ +" \"protocol\": \"tcp\", " \ +" \"mapping_count\": 0" \ +" }, " \ +" { " \ +" \"protocol\": \"udp\", " \ +" \"mapping_count\": 0" \ +" }, " \ +" { " \ +" \"protocol\": \"other\", " \ +" \"mapping_count\": 0" \ +" } " \ +" ] " \ +" } " \ "}" char expected_vrf_str[DP_TEST_TMP_BUF]; /* VR vrf clean refcounts vrf 0 (invalid) = 1 for invalid vrf table - * vrf 1 (default) = 20 dpdk ports +1 loopback +2 switch ports +1 for - * default vrf table = 24 + * vrf 1 (default) = 20 dpdk ports +1 loopback +1 for + * default vrf table = 22 */ #define DP_TEST_EXP_VRF_VR_STR \ "{ \"vrf_table\":" \ @@ -69,7 +77,7 @@ char expected_vrf_str[DP_TEST_TMP_BUF]; " },{" \ " \"vrf_id\": 1," \ " \"internal_vrf_id\": 1," \ -" \"ref_count\": 24" \ +" \"ref_count\": 22" \ " } " \ " ] " \ "}" @@ -96,13 +104,7 @@ char expected_route_stats_str[DP_TEST_TMP_BUF]; char parse_err_str[10000]; static char mismatch_str[10000]; -#define DP_TEST_POLL_INTERVAL 1 /* ms */ -#define DP_TEST_POLL_TOTAL_TIME 2000 /* ms */ -#define DP_TEST_POLL_COUNT (DP_TEST_POLL_TOTAL_TIME / DP_TEST_POLL_INTERVAL) - -#define DP_TEST_WAIT_SEC_DEFAULT 1 - -static uint32_t dp_test_wait_sec = DP_TEST_WAIT_SEC_DEFAULT; +uint32_t dp_test_wait_sec = DP_TEST_WAIT_SEC_DEFAULT; void dp_test_wait_set(uint8_t wait_sec) { @@ -122,13 +124,17 @@ dp_test_check_state(zloop_t *loop, int poller, void *arg) char *reply; bool match; bool err; + int i; state->poll_cnt--; snprintf(buf, DP_TEST_TMP_BUF, "%s", state->cmd); - if (state->print) - printf("console req: looking for %s'%s'\n", - state->negate_match ? "absence of " : "", - state->expected); + if (state->print) { + for (i = 0; i < state->exp_count; i++) { + printf("console req: looking for %s'%s'\n", + state->negate_match ? "absence of " : "", + state->expected[i]); + } + } reply = dp_test_console_request_w_err(buf, &err, state->print); if (state->print) printf("console rep content: '%s'", reply ? reply : ""); @@ -137,13 +143,19 @@ dp_test_check_state(zloop_t *loop, int poller, void *arg) match = false; else { match = false; - switch (state->type) { - case DP_TEST_CHECK_STR_SUBSET: - match = strstr(reply, state->expected) != NULL; - break; - case DP_TEST_CHECK_STR_EXACT: - match = !strcmp(reply, state->expected); - break; + for (i = 0; i < state->exp_count; i++) { + switch (state->type) { + case DP_TEST_CHECK_STR_SUBSET: + match = strstr(reply, + state->expected[i]) != NULL; + break; + case DP_TEST_CHECK_STR_EXACT: + match = !strcmp(reply, state->expected[i]); + break; + } + if (match) + /* Can't negate if multiple matches */ + break; } } free(state->actual); @@ -160,7 +172,8 @@ dp_test_check_state(zloop_t *loop, int poller, void *arg) static void dp_test_check_state_with_show(const char *file, int line, const char *cmd, - const char *expected, bool exp_err, + int expected_count, + const char **expected, bool exp_err, bool negate_match, bool print, dp_test_check_str_type type, int poll_cnt) { @@ -168,6 +181,7 @@ dp_test_check_state_with_show(const char *file, int line, const char *cmd, poll_cnt = DP_TEST_POLL_COUNT; struct dp_test_cmd_check state = { .cmd = cmd, + .exp_count = expected_count, .expected = expected, .actual = NULL, .print = print, @@ -179,6 +193,7 @@ dp_test_check_state_with_show(const char *file, int line, const char *cmd, }; int timer; zloop_t *loop = zloop_new(); + int i; dp_test_assert_internal(loop); @@ -200,11 +215,22 @@ dp_test_check_state_with_show(const char *file, int line, const char *cmd, (poll_cnt - state.poll_cnt) * DP_TEST_POLL_INTERVAL, poll_cnt - state.poll_cnt, poll_cnt); - _dp_test_fail_unless(state.result, - file, line, - "\nUnexpected show state: '%s' %spresent in show %s:\n%s", - state.expected, state.negate_match ? "" : "not ", - state.cmd, state.actual); + if (!state.result && expected_count > 1) { + /* We have failed */ + printf("Expected one of %d:\n", expected_count); + for (i = 0; i < expected_count; i++) + printf("%s\n", expected[i]); + + _dp_test_fail_unless(state.result, + file, line, + "\nstate not present in show %s:\n%s", + state.cmd, state.actual); + } + _dp_test_fail_unless( + state.result, file, line, + "\nUnexpected show state: '%s' %spresent in show %s:\n%s", + state.expected[0], state.negate_match ? "" : "not ", + state.cmd, state.actual); free(state.actual); /* TODO: Remove me when ck_assert_msg is reliable. */ dp_test_assert_internal(state.result); @@ -212,11 +238,13 @@ dp_test_check_state_with_show(const char *file, int line, const char *cmd, void _dp_test_check_state_poll_show(const char *file, int line, - const char *cmd, const char *expected, - bool exp_ok, bool print, int poll_cnt, - dp_test_check_str_type type) + const char *cmd, + const char *expected, + bool exp_ok, bool print, int poll_cnt, + dp_test_check_str_type type) { - dp_test_check_state_with_show(file, line, cmd, expected, !exp_ok, + dp_test_check_state_with_show(file, line, cmd, 1, + &expected, !exp_ok, false, print, type, poll_cnt); } @@ -225,7 +253,18 @@ _dp_test_check_state_show(const char *file, int line, const char *cmd, const char *expected, bool print, dp_test_check_str_type type) { - dp_test_check_state_with_show(file, line, cmd, expected, false, false, + dp_test_check_state_with_show(file, line, cmd, 1, + &expected, false, false, + print, type, 0); +} + +void +_dp_test_check_state_show_one_of(const char *file, int line, const char *cmd, + int exp_count, const char **expected, + bool print, dp_test_check_str_type type) +{ + dp_test_check_state_with_show(file, line, cmd, exp_count, + expected, false, false, print, type, 0); } @@ -234,7 +273,8 @@ _dp_test_check_state_gone_show(const char *file, int line, const char *cmd, const char *expected, bool print, dp_test_check_str_type type) { - dp_test_check_state_with_show(file, line, cmd, expected, false, true, + dp_test_check_state_with_show(file, line, cmd, 1, + &expected, false, true, print, type, 0); } @@ -285,9 +325,9 @@ static struct cmd_expect_json cmd_expect_clean_json[] = { " \"tbl8s\":" " {" " \"used\":" - " 0," + " 14," /* for reserved routes */ " \"free\":" - " 256" + " 242" " }," " \"nexthop\": " \ " { " \ @@ -333,6 +373,9 @@ static struct cmd_expect_json cmd_expect_clean_json[] = { " }, " " \"l2_mcast_filters\": " " { " + " }, " + " \"eth-info\": " + " { " " } " " } " " ] " @@ -346,7 +389,12 @@ static struct cmd_expect_json cmd_expect_clean_json[] = { " {" " \"rekey_requests\": 0" " }," - " \"policy_count\": " + " \"total_policy_count\": " + " { " + " \"ipv4\": 0," + " \"ipv6\": 0" + " }, " + " \"live_policy_count\": " " { " " \"ipv4\": 0," " \"ipv6\": 0" @@ -404,8 +452,22 @@ static struct cmd_expect_json cmd_expect_clean_json[] = { "{" " \"apm\":" " { \"section_size\": 512," - " \"mapping_count\": 0," - " }" + " \"protocols\":" + " [ " + " { " + " \"protocol\": \"tcp\", " + " \"mapping_count\": 0" + " }, " + " { " + " \"protocol\": \"udp\", " + " \"mapping_count\": 0" + " }, " + " { " + " \"protocol\": \"other\", " + " \"mapping_count\": 0" + " } " + " ] " + " } " "}", "" }, { @@ -492,7 +554,7 @@ _dp_test_check_state_clean(const char *file, int line, bool print) if (!expected_json) { printf("%s:%d %s - %s", __FILE__, __LINE__, __func__, parse_err_str); - dp_test_assert_internal(!"dp_test_json_create failed"); + dp_test_abort_internal(); } if (cmd_expect_clean_json[i].filter[0]) @@ -534,13 +596,19 @@ dp_test_json_create(const char *fmt_str, ...) if (!jobj) { printf("%s:%d %s - %s", __FILE__, __LINE__, __func__, parse_err_str); - dp_test_assert_internal(!"dp_test_json_create failed"); + dp_test_abort_internal(); } return jobj; }; struct dp_test_show_cmd_poll_state { char request_str[DP_TEST_TMP_BUF]; /* poll req */ + void *pb_req; + int pb_req_len; + void *pb_resp; + int pb_resp_len; + dp_test_state_pb_cb pb_func; + void *pb_arg; json_object *json_resp; /* latest reply */ int poll_cnt; @@ -618,9 +686,40 @@ poll_for_matching_state(zloop_t *loop, int poller, void *arg) (cmd->poll_cnt == 0)) ? -1 : 0; } +static int +poll_for_matching_state_pb(zloop_t *loop, int poller, void *arg) +{ + struct dp_test_show_cmd_poll_state *cmd = arg; + + --(cmd->poll_cnt); + + zmsg_t *resp_msg; + dp_test_console_request_pb(cmd->pb_req, cmd->pb_req_len, + &resp_msg, + cmd->print); + + char *resp; + int resp_len = 0; + if (resp_msg && zmsg_size(resp_msg) > 0) { + zframe_t *frame = zmsg_first(resp_msg); + resp = (char *)zframe_data(frame); + resp_len = zframe_size(frame); + } + + if (resp_len > 0) + cmd->result = cmd->pb_func(resp, resp_len, cmd->pb_arg); + + zmsg_destroy(&resp_msg); + + /* return -1 to stop if we got what we want or run out of retries */ + return (cmd->result || + (cmd->poll_cnt == 0)) ? -1 : 0; +} + static bool dp_test_wait_for_expected_json(struct dp_test_show_cmd_poll_state *cmd, - json_object **actual_resp) + json_object **actual_resp, + unsigned int poll_interval) { zloop_t *loop = zloop_new(); int timer; @@ -630,7 +729,7 @@ dp_test_wait_for_expected_json(struct dp_test_show_cmd_poll_state *cmd, /* * loop every millisec, for up to dp_test_wait_sec. */ - timer = zloop_timer(loop, dp_test_wait_sec, 0, + timer = zloop_timer(loop, poll_interval, 0, poll_for_matching_state, cmd); dp_test_assert_internal(timer >= 0); @@ -645,13 +744,39 @@ dp_test_wait_for_expected_json(struct dp_test_show_cmd_poll_state *cmd, return cmd->result; } -void -_dp_test_check_json_poll_state(const char *cmd_str, json_object *expected_json, - json_object *filter_json, - enum dp_test_check_json_mode mode, - bool negate_match, int poll_cnt, - const char *file, const char *func __unused, - int line) +static bool +dp_test_wait_for_expected_pb(struct dp_test_show_cmd_poll_state *cmd) +{ + zloop_t *loop = zloop_new(); + int timer; + + assert(loop); + + /* + * loop every millisec, for up to dp_test_wait_sec. + */ + timer = zloop_timer(loop, dp_test_wait_sec, 0, + poll_for_matching_state_pb, cmd); + dp_test_assert_internal(timer >= 0); + + zloop_start(loop); + zloop_destroy(&loop); + + dp_test_wait_sec = DP_TEST_WAIT_SEC_DEFAULT; + + return cmd->result; +} + +static void +_dp_test_check_json_poll_state_internal(const char *cmd_str, + json_object *expected_json, + json_object *filter_json, + enum dp_test_check_json_mode mode, + bool negate_match, int poll_cnt, + unsigned int poll_interval, + const char *file, + const char *func __unused, + int line) { if (!poll_cnt) poll_cnt = DP_TEST_POLL_COUNT; @@ -686,7 +811,8 @@ _dp_test_check_json_poll_state(const char *cmd_str, json_object *expected_json, break; } - result = dp_test_wait_for_expected_json(&cmd, &actual_json); + result = dp_test_wait_for_expected_json(&cmd, &actual_json, + poll_interval); if (cmd.mismatches) dp_test_json_mismatch_print(cmd.mismatches, 2, mismatch_str, sizeof(mismatch_str)); @@ -719,6 +845,68 @@ _dp_test_check_json_poll_state(const char *cmd_str, json_object *expected_json, json_object_put(actual_json); } +void +_dp_test_check_json_poll_state(const char *cmd_str, json_object *expected_json, + json_object *filter_json, + enum dp_test_check_json_mode mode, + bool negate_match, int poll_cnt, + const char *file, const char *func, + int line) +{ + _dp_test_check_json_poll_state_internal(cmd_str, expected_json, + filter_json, mode, negate_match, + poll_cnt, DP_TEST_POLL_INTERVAL, + file, func, line); +} + +void +_dp_test_check_json_poll_state_interval(const char *cmd_str, + json_object *expected_json, + json_object *filter_json, + enum dp_test_check_json_mode mode, + bool negate_match, int poll_cnt, + unsigned int poll_interval, + const char *file, const char *func, + int line) +{ + _dp_test_check_json_poll_state_internal(cmd_str, expected_json, + filter_json, mode, negate_match, + poll_cnt, poll_interval, + file, func, line); +} + +void +_dp_test_check_pb_poll_state(void *cmd, int len, + dp_test_state_pb_cb cb, + void *arg, + int poll_cnt, + const char *file, const char *func __unused, + int line) +{ + if (!poll_cnt) + poll_cnt = DP_TEST_POLL_COUNT; + struct dp_test_show_cmd_poll_state show_cmd = { + .pb_req = cmd, + .pb_req_len = len, + .pb_func = cb, + .pb_arg = arg, + .print = false, + .json_resp = NULL, + .required_superset = NULL, + .required_subset = NULL, + .required_exact = NULL, + .poll_cnt = poll_cnt, + .mismatches = NULL, + .result = false, + }; + + bool result = dp_test_wait_for_expected_pb(&show_cmd); + if (!result) { + printf("failed to get response\n"); + abort(); + } +} + void _dp_test_check_json_state(const char *cmd_str, json_object *expected_json, json_object *filter_json, @@ -732,6 +920,19 @@ _dp_test_check_json_state(const char *cmd_str, json_object *expected_json, file, func, line); } +void +_dp_test_check_pb_state(void *buf, int len, + dp_test_state_pb_cb cb, + void *arg, + const char *file, const char *func __unused, + int line) +{ + _dp_test_check_pb_poll_state(buf, len, + cb, arg, + DP_TEST_POLL_COUNT, + file, func, line); +} + /* * Construct a route show json object without any nexthop information. * Nexthop info can be added via dp_test_json_route_add_nh() @@ -760,13 +961,13 @@ dp_test_json_route_add(json_object *route_set, mpls_ls_get_label(route->prefix.addr.addr.mpls)); if (!json_object_object_get_ex(route_set, "mpls_tables", &mpls_tables_json)) - dp_test_assert_internal(0); + dp_test_abort_internal(); mpls_table_json = json_object_array_get_idx( mpls_tables_json, 0); if (!json_object_object_get_ex(mpls_table_json, "mpls_routes", &routes)) - dp_test_assert_internal(0); + dp_test_abort_internal(); } else { if (route->prefix.addr.family == AF_INET) { route_lookup_str = "route_lookup"; @@ -794,7 +995,7 @@ dp_test_json_route_add(json_object *route_set, if (!json_object_object_get_ex(route_set, route_lookup_str, &routes)) - dp_test_assert_internal(0); + dp_test_abort_internal(); } else { json_route = dp_test_json_create( " {" @@ -813,12 +1014,12 @@ dp_test_json_route_add(json_object *route_set, if (!json_object_object_get_ex(route_set, route_show_str, &routes)) - dp_test_assert_internal(0); + dp_test_abort_internal(); } } if (json_object_array_add(routes, json_route) != 0) - dp_test_assert_internal(0); + dp_test_abort_internal(); return json_route; } @@ -902,6 +1103,7 @@ dp_test_json_route_add_nh(json_object *route_show, int route_family, written = 0; if (nh->nh_int) { + const char *backup_str; const char *neigh; if (nh->neigh_created) @@ -911,6 +1113,11 @@ dp_test_json_route_add_nh(json_object *route_show, int route_family, else neigh = ""; + if (nh->backup) + backup_str = " \"backup\": true, "; + else + backup_str = ""; + if (nh->nh_addr.family == AF_UNSPEC) state_str = "directly connected"; else @@ -919,9 +1126,11 @@ dp_test_json_route_add_nh(json_object *route_show, int route_family, " {" " \"state\": \"%s\", " "%s" + "%s" " \"ifname\": \"%s\", ", state_str, neigh, + backup_str, real_ifname); } else { if (route_family == AF_MPLS && @@ -960,6 +1169,8 @@ dp_test_json_route_add_nh(json_object *route_show, int route_family, written += spush(json_str + written, sizeof(json_str) - written, " } "); + (void) written; + nh_obj = dp_test_json_create("%s", json_str); /* add to the route show object */ @@ -1337,7 +1548,7 @@ json_object * dp_test_json_intf_add(json_object *intf_set, const char *ifname, const char *addr_prefix, bool uplink) { - struct ether_addr *mac_addr; + struct rte_ether_addr *mac_addr; char real_ifname[IFNAMSIZ]; const char *link_str; json_object *intfs; @@ -1351,7 +1562,7 @@ dp_test_json_intf_add(json_object *intf_set, const char *ifname, if (!json_object_object_get_ex(intf_set, "interfaces", &intfs)) - dp_test_assert_internal(0); + dp_test_abort_internal(); link_str = " \"link\": " @@ -1405,6 +1616,7 @@ dp_test_json_intf_add(json_object *intf_set, const char *ifname, " \"mtu\": 1500, " " \"flags\": 69699, " " \"hw_forwarding\": 0, " + " \"hw_l3\": 0, " " \"tpid_offloaded\": 1, " " \"ip_forwarding\": %u, " " \"ip_proxy_arp\": 0, " @@ -1467,7 +1679,7 @@ dp_test_json_intf_add(json_object *intf_set, const char *ifname, dp_test_exp_ipv6_out_feat()); if (json_object_array_add(intfs, intf) != 0) - dp_test_assert_internal(0); + dp_test_abort_internal(); return intf; } @@ -1495,7 +1707,7 @@ dp_test_json_intf_add_lo(json_object *intf_set, const char *ifname) if (!json_object_object_get_ex(intf_set, "interfaces", &intfs)) - dp_test_assert_internal(0); + dp_test_abort_internal(); intf = dp_test_json_create("{" " \"name\": \"%s\"," @@ -1508,6 +1720,7 @@ dp_test_json_intf_add_lo(json_object *intf_set, const char *ifname) " \"mtu\": 0, " " \"flags\": 73, " " \"hw_forwarding\": 0, " + " \"hw_l3\": 0, " " \"tpid_offloaded\": 1, " " \"ip_forwarding\": 0, " " \"ip_proxy_arp\": 0, " @@ -1562,7 +1775,7 @@ dp_test_json_intf_add_lo(json_object *intf_set, const char *ifname) dp_test_assert_internal(intf); if (json_object_array_add(intfs, intf) != 0) - dp_test_assert_internal(0); + dp_test_abort_internal(); return intf; } diff --git a/tests/whole_dp/src/dp_test_cmd_state.c b/tests/whole_dp/src/dp_test_cmd_state.c index 51b9ab4f..3c7ddc1a 100644 --- a/tests/whole_dp/src/dp_test_cmd_state.c +++ b/tests/whole_dp/src/dp_test_cmd_state.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -17,13 +17,13 @@ #include "if_llatbl.h" #include "netinet6/nd6_nbr.h" -#include "dp_test_macros.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" -#include "dp_test_cmd_check.h" +#include "dp_test/dp_test_macros.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test/dp_test_cmd_check.h" #include "dp_test_controller.h" #include "dp_test_console.h" -#include "dp_test_netlink_state.h" +#include "dp_test_netlink_state_internal.h" /* * Example CLI -> string cmd for nat @@ -48,7 +48,7 @@ * 2) Translates to this string command executed on the dataplane. * * cmd = "npf-ut add dnat:\"dpT10\" 10 nat-type=dnat trans-addr=90.90.90.90 " - * "action=accept src-addr=70.70.70.70"; + * "src-addr=70.70.70.70"; * * 3) And this JSON show reply to the command "npf-op show all: dnat" * @@ -65,7 +65,7 @@ * "bytes":0, * "packets":0, * "action": "pass in ", - * "match": "on dpT10 proto 6 from 100.0.100.100", + * "match": "on dpT10 proto-final 6 from 100.0.100.100", * "map": "dynamic 10.0.10.10 port 80-80 <- any", * "total_ts": 0, * "used_ts": 0 @@ -85,7 +85,7 @@ dp_test_cmd_replace_dnat(int rule_num, const char *ifname, const char *orig_ip, dp_test_intf_real(ifname, real_if_name); snprintf(cmd, sizeof(cmd), "npf-ut add dnat:%s %i nat-type=dnat " - "trans-addr=%s trans-port=%i action=accept proto=%d " + "trans-addr=%s trans-port=%i proto-final=%d " "src-addr=%s", real_if_name, rule_num, dnat_ip, dnat_port, proto, orig_ip); dp_test_console_request_reply(cmd, false); @@ -96,7 +96,7 @@ dp_test_cmd_replace_dnat(int rule_num, const char *ifname, const char *orig_ip, char expected[TEST_MAX_REPLY_LEN]; snprintf(cmd, TEST_MAX_CMD_LEN, "npf-op show all: dnat"); - snprintf(expected, TEST_MAX_REPLY_LEN, "proto %d from %s", + snprintf(expected, TEST_MAX_REPLY_LEN, "proto-final %d from %s", proto, orig_ip); dp_test_check_state_show(cmd, expected, false); } @@ -111,7 +111,7 @@ dp_test_cmd_delete_dnat(int rule_num, const char *ifname, const char *orig_ip, char real_if_name[IFNAMSIZ]; snprintf(state_cmd, TEST_MAX_CMD_LEN, "npf-op show all: dnat"); - snprintf(expected, TEST_MAX_REPLY_LEN, "proto %d from %s", + snprintf(expected, TEST_MAX_REPLY_LEN, "proto-final %d from %s", proto, orig_ip); dp_test_check_state_show(state_cmd, expected, false); @@ -148,7 +148,7 @@ dp_test_cmd_replace_snat(int rule_num, const char *ifname, const char *orig_ip, snprintf(cmd, sizeof(cmd), "npf-ut add snat:%s %i nat-type=snat trans-addr=%s" - "%s action=accept src-addr=%s", + "%s src-addr=%s", real_if_name, rule_num, snat_ip, tmp, orig_ip); dp_test_console_request_reply(cmd, false); @@ -199,7 +199,7 @@ _dp_test_neigh_clear_entry(const char *ifname, const char *ipaddr, struct llentry *lle; ifindex = dp_test_intf_name2index(ifname); - ifp = ifnet_byifindex(ifindex); + ifp = dp_ifnet_byifindex(ifindex); _dp_test_fail_unless(dp_test_addr_str_to_addr(ipaddr, &addr), file, line, @@ -210,7 +210,7 @@ _dp_test_neigh_clear_entry(const char *ifname, const char *ipaddr, lle = in_lltable_lookup(ifp, 0, addr.addr.ipv4); if (lle) rte_atomic16_test_and_set(&lle->ll_idle); - in_lltable_lookup(ifp, LLE_DELETE, addr.addr.ipv4); + in_lltable_lookup(ifp, LLE_DELETE | LLE_LOCAL, addr.addr.ipv4); break; case AF_INET6: lle = in6_lltable_lookup(ifp, 0, &addr.addr.ipv6); diff --git a/tests/whole_dp/src/dp_test_cmd_state.h b/tests/whole_dp/src/dp_test_cmd_state.h index 803f3683..9c04cd8a 100644 --- a/tests/whole_dp/src/dp_test_cmd_state.h +++ b/tests/whole_dp/src/dp_test_cmd_state.h @@ -1,4 +1,5 @@ /* + * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -22,7 +23,7 @@ dp_test_cmd_replace_dnat(int rule_num, const char *ifname, const char *orig_ip, uint16_t dnat_port); void dp_test_cmd_delete_dnat(int rule_num, const char *ifname, - const char *origin_ip, uint8_t proto); + const char *orig_ip, uint8_t proto); void dp_test_cmd_replace_snat(int rule_num, const char *ifname, const char *orig_ip, const char *snat_ip, struct dp_test_port_range *ports); diff --git a/tests/whole_dp/src/dp_test_console.c b/tests/whole_dp/src/dp_test_console.c index 35935457..c892bd16 100644 --- a/tests/whole_dp/src/dp_test_console.c +++ b/tests/whole_dp/src/dp_test_console.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -13,8 +13,8 @@ #include #include "dp_test_controller.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test.h" #include "dp_test_console.h" @@ -42,7 +42,7 @@ dp_test_cmd_sock_recv1(enum cont_src_en cont_src, zsock_t *cmd_sock, char *topic = zmsg_popstr(msg); if (!return_err) - dp_test_assert_internal(strncmp("ERROR", topic, 5)); + dp_test_assert_internal(strncmp("ERROR", topic, 5) != 0); else *return_err = !strncmp("ERROR", topic, 5); @@ -108,7 +108,7 @@ dp_test_console_request_w_err_src(enum cont_src_en cont_src, request); ret = zmsg_send(&msg, cmd_sock); if (ret == -1) - dp_test_assert_internal(0); + dp_test_abort_internal(); /* * Send message and await reply @@ -124,6 +124,52 @@ dp_test_console_request_w_err_src(enum cont_src_en cont_src, return reply; } +/* + * Execute a console request and return either the response and/or + * an error state flag (if one is provided). + * Protobuf version + */ +void +dp_test_console_request_pb_src(enum cont_src_en cont_src, + const char *req, int req_len, + zmsg_t **resp_msg, + bool print) +{ + int ret; + zsock_t *cmd_sock; + + /* + * Create ephemeral ZMQ channel to the console for sending + * show commands etc. to the dataplane. + */ + cmd_sock = zsock_new_req(cont_src_console[cont_src].console_ep); + dp_test_assert_internal(cmd_sock); + + /* + * Create request message + */ + zmsg_t *msg = zmsg_new(); + zmsg_addstr(msg, "protobuf"); + zmsg_addmem(msg, req, req_len); + if (print) + printf("console: send protobuf command of size: %d", req_len); + + /* + * Send message and await reply + */ + ret = zmsg_send(&msg, cmd_sock); + if (ret == -1) + dp_test_abort_internal(); + + *resp_msg = zmsg_recv(cmd_sock); + dp_test_assert_internal(*resp_msg); + + /* + * Kill ZMQ connection. + */ + zsock_destroy(&cmd_sock); +} + char * dp_test_console_request_w_err(const char *request, bool *err_ret, bool print) @@ -134,6 +180,18 @@ dp_test_console_request_w_err(const char *request, err_ret, print); } +void +dp_test_console_request_pb(const char *req, int req_len, + zmsg_t **resp_msg, + bool print) +{ + enum cont_src_en cont_src = dp_test_cont_src_get(); + dp_test_console_request_pb_src(cont_src, + req, req_len, + resp_msg, + print); +} + char * dp_test_console_request(const char *request, bool print) { @@ -153,23 +211,3 @@ dp_test_console_request_reply(const char *cmd, bool print) printf("console rep value: %s\n", reply ? reply : ""); free(reply); } - -char * -dp_test_console_request_src(enum cont_src_en cont_src, const char *request, - bool print) -{ - return dp_test_console_request_w_err_src(cont_src, request, NULL, - print); -} - -void -dp_test_console_request_reply_src(enum cont_src_en cont_src, const char *cmd, - bool print) -{ - char *reply; - - reply = dp_test_console_request_src(cont_src, cmd, print); - if (print) - printf("console rep value: %s\n", reply ? reply : ""); - free(reply); -} diff --git a/tests/whole_dp/src/dp_test_console.h b/tests/whole_dp/src/dp_test_console.h index cef8bb75..5798beb0 100644 --- a/tests/whole_dp/src/dp_test_console.h +++ b/tests/whole_dp/src/dp_test_console.h @@ -1,4 +1,5 @@ /*- + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -10,17 +11,19 @@ #ifndef _DP_TEST_CONSOLE_H_ #define _DP_TEST_CONSOLE_H_ -char *dp_test_console_request(const char *request, bool print); -char *dp_test_console_request_src(enum cont_src_en cont_src, - const char *request, bool print); char *dp_test_console_request_w_err(const char *request, bool *err_ret, bool print); char *dp_test_console_request_w_err_src(enum cont_src_en cont_src, const char *request, bool *err_ret, bool print); +void dp_test_console_request_pb_src(enum cont_src_en cont_src, + const char *req, int req_len, + zmsg_t **resp_msg, + bool print); +void dp_test_console_request_pb(const char *req, int req_len, + zmsg_t **resp_msg, + bool print); void dp_test_console_request_reply(const char *cmd, bool print); -void dp_test_console_request_reply_src(enum cont_src_en cont_src, - const char *cmd, bool print); char *dp_test_console_set_endpoint(enum cont_src_en cont_src); #endif /* _DP_TEST_CONSOLE_H_ */ diff --git a/tests/whole_dp/src/dp_test_controller.c b/tests/whole_dp/src/dp_test_controller.c index b9a3a88e..72c1f6b6 100644 --- a/tests/whole_dp/src/dp_test_controller.c +++ b/tests/whole_dp/src/dp_test_controller.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -29,10 +29,10 @@ #include "compat.h" #include "dp_test_controller.h" -#include "dp_test_netlink_state.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_json_utils.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test.h" #include "dp_test_route_broker.h" @@ -293,19 +293,19 @@ snapshot_new(void) static void delport_request(zsock_t *sock, zmsg_t *msg, zframe_t **envelope) { - return; + /* nothing to do */ } static void snapshot_send(snapshot_t *self, zsock_t *socket, zframe_t *to) { - return; + /* nothing to do */ } static void config_send(zsock_t *socket, zframe_t *to) { - return; + /* nothing to do */ } static void send_som(snapshot_t *snap, zsock_t *sock, zframe_t **envelope, @@ -337,8 +337,8 @@ send_eom(snapshot_t *snap, zsock_t *sock, zframe_t **envelope) nlmsg_send(msg, sock, false); } -static int -zmsg_popu32(zmsg_t *msg, uint32_t *p) +int +dp_test_zmsg_popu32(zmsg_t *msg, uint32_t *p) { zframe_t *frame = zmsg_pop(msg); @@ -400,7 +400,7 @@ zactor_terminated(zloop_t *loop __rte_unused, zsock_t *sock, * -1 if failed */ static int -port_create(zmsg_t *msg, uint64_t *seqno) +port_create(zmsg_t *msg, uint64_t *seqno, bool ippresent) { struct ip_addr myip; char err_str[BUFSIZ]; @@ -413,10 +413,11 @@ port_create(zmsg_t *msg, uint64_t *seqno) return -1; } - if (zmsg_popip(msg, &myip) < 0) { - err("missing local ip"); - return -1; - } + if (ippresent) + if (zmsg_popip(msg, &myip) < 0) { + err("missing local ip"); + return -1; + } json_str = zmsg_popstr(msg); if (!json_str) { @@ -447,7 +448,7 @@ connect_request(zsock_t *sock, zmsg_t *msg, zframe_t **envelope) char *control = NULL; char *uuid = NULL; - if (zmsg_popu32(msg, &version) < 0) { + if (dp_test_zmsg_popu32(msg, &version) < 0) { err("no version in connect"); return; } @@ -498,7 +499,7 @@ connect_request(zsock_t *sock, zmsg_t *msg, zframe_t **envelope) } static void -addport_request(zsock_t *sock, zmsg_t *msg, zframe_t **envelope) +newport_request(zsock_t *sock, zmsg_t *msg, zframe_t **envelope) { uint64_t seqno; int port, ifindex; @@ -506,7 +507,7 @@ addport_request(zsock_t *sock, zmsg_t *msg, zframe_t **envelope) dp_test_assert_internal(msg); - port = port_create(msg, &seqno); + port = port_create(msg, &seqno, true); if (port < 0) return; @@ -526,28 +527,101 @@ addport_request(zsock_t *sock, zmsg_t *msg, zframe_t **envelope) zstr_send(sock, ifname); } +static void +iniport_request(zsock_t *sock, zmsg_t *msg, zframe_t **envelope) +{ + uint64_t seqno; + int port; + uint32_t cookie; + char ifname[IFNAMSIZ]; + + dp_test_assert_internal(msg); + + port = port_create(msg, &seqno, false); + if (port < 0) + return; + + if (port >= dp_test_intf_count_local() + + dp_test_intf_switch_port_count()) { + err("port %u out of range", port); + return; + } + + dp_test_intf_port2name(port, ifname); + cookie = port; + zframe_send(envelope, sock, ZFRAME_MORE); + zstr_sendm(sock, "OK"); + seqno_sendm(sock, seqno); + ifindex_sendm(sock, cookie); + zstr_send(sock, ifname); +} + +static void +addport_request(zsock_t *sock, zmsg_t *msg, zframe_t **envelope) +{ + uint64_t seqno; + int port, ifindex; + char *ifname1; + uint32_t cookie; + char ifname2[IFNAMSIZ]; + + dp_test_assert_internal(msg); + + if (zmsg_popu64(msg, &seqno) < 0) { + err("missing sequence no"); + return; + } + if (dp_test_zmsg_popu32(msg, &cookie) < 0) { + err("missing cookie"); + return; + } + ifname1 = zmsg_popstr(msg); + port = cookie; + if ((port < 0) || (port >= dp_test_intf_count_local() + + dp_test_intf_switch_port_count())) { + err("port %u out of range", port); + return; + } + + ifindex = dp_test_intf_port2index(port); + dp_test_intf_port2name(port, ifname2); + + if (!streq(ifname1, ifname2)) { + err("port %u name mismatch %s %s", port, + ifname1, ifname2); + return; + } + + zframe_send(envelope, sock, ZFRAME_MORE); + zstr_sendm(sock, "OK"); + seqno_sendm(sock, seqno); + ifindex_sendm(sock, (uint32_t) ifindex); + zstr_send(sock, ifname1); + free(ifname1); +} + static void link_request(const char *state, zsock_t *sock, zmsg_t *msg, zframe_t **envelope) { - return; + /* nothing to do */ } static void stats_update(zmsg_t *msg) { - return; + /* nothing to do */ } static void mrt_request(zsock_t *sock, zmsg_t *msg, zframe_t **envelope) { - return; + /* nothing to do */ } static void mrt6_request(zsock_t *sock, zmsg_t *msg, zframe_t **envelope) { - return; + /* nothing to do */ } static void @@ -556,7 +630,7 @@ ifquery_request(snapshot_t *snap, zsock_t *sock, zmsg_t *msg, { uint32_t ifindex; - if (zmsg_popu32(msg, &ifindex) < 0) { + if (dp_test_zmsg_popu32(msg, &ifindex) < 0) { err("no ifindex in connect"); return; } @@ -629,6 +703,24 @@ config_error(zmsg_t *msg) expected_conf_err = 0; } +static void +ext_buf_congestion(zmsg_t *msg) +{ + +} + +static dp_test_event_msg_hdlr *msg_call_back; + +void dp_test_register_event_msg(dp_test_event_msg_hdlr handler) +{ + msg_call_back = handler; +} + +void dp_test_unregister_event_msg(void) +{ + msg_call_back = NULL; +} + static void dp_event_msg(zmsg_t *msg) { @@ -638,9 +730,14 @@ dp_event_msg(zmsg_t *msg) if (streq(event, "CONFERR")) config_error(msg); - else - dp_test_assert_internal(0); - + else if (streq(event, "QosExtBufCongestion")) + ext_buf_congestion(msg); + else if (msg_call_back) { + if ((msg_call_back)(event, msg)) + dp_test_abort_internal(); + } else { + dp_test_abort_internal(); + } free(event); } @@ -662,9 +759,13 @@ process_msg(enum cont_src_en cont_src, snapshot_t *snap, zsock_t *sock, if (streq(action, "CONNECT")) connect_request(sock, msg, &envelope); else if (streq(action, "NEWPORT")) - addport_request(sock, msg, &envelope); + newport_request(sock, msg, &envelope); else if (streq(action, "DELPORT")) delport_request(sock, msg, &envelope); + else if (streq(action, "INIPORT")) + iniport_request(sock, msg, &envelope); + else if (streq(action, "ADDPORT")) + addport_request(sock, msg, &envelope); else if (streq(action, "WHATSUP?")) { snapshot_send(snap, sock, envelope); config_send(sock, envelope); @@ -688,7 +789,7 @@ process_msg(enum cont_src_en cont_src, snapshot_t *snap, zsock_t *sock, else if (strncmp(action, "DPEVENT", 7) == 0) dp_event_msg(msg); else - dp_test_assert_internal(0); + dp_test_abort_internal(); free(action); zframe_destroy(&envelope); @@ -850,8 +951,8 @@ static int address_topic(const struct nlmsghdr *nlh, char *buf, size_t len) return -1; } - if (tb[IFA_ADDRESS]) - addr = mnl_attr_get_payload(tb[IFA_ADDRESS]); + if (tb[IFA_LOCAL]) + addr = mnl_attr_get_payload(tb[IFA_LOCAL]); else { notice("missing address in netlink message\n"); return -1; @@ -1199,14 +1300,42 @@ nl_propagate(const char *topic, const struct nlmsghdr *nlh) cont_info[cont_src_current].pub_sock, false); } +static void +data_send_free(void *data, void *hint) +{ + free(data); +} + +void nl_propagate_xfrm(zsock_t *sock, const struct nlmsghdr *nlh, + size_t size, const char *hdr) +{ + zmq_msg_t m; + + if (nlh) + zmq_msg_init_data(&m, (void *)nlh, size, + data_send_free, NULL); + zmq_send_const(zsock_resolve(sock), hdr, + strlen(hdr) + 1, nlh ? ZMQ_SNDMORE : 0); + if (nlh) + zmq_msg_send(&m, zsock_resolve(sock), 0); +} + void -nl_propagate_broker(const char *topic, const struct nlmsghdr *nlh) +nl_propagate_broker(const char *topic, void *data, size_t size) { - if (cont_src_current == CONT_SRC_MAIN) - nl_propagate_src(cont_src_current, topic, nlh, + if (cont_src_current == CONT_SRC_MAIN && + dp_test_route_broker_protobuf) { + zmq_msg_t m; + + zmq_msg_init_data(&m, data, size, + data_send_free, NULL); + + zmq_msg_send(&m, zsock_resolve(broker_data_sock), 0); + } else if (cont_src_current == CONT_SRC_MAIN) + nl_propagate_src(cont_src_current, topic, data, broker_data_sock, true); else - nl_propagate_src(cont_src_current, topic, nlh, + nl_propagate_src(cont_src_current, topic, data, cont_info[cont_src_current].pub_sock, false); } @@ -1262,10 +1391,10 @@ dp_test_request_thread(zsock_t *pipe, void *args) /* zactor API will send a $TERM ZMQ message on termination */ if (zloop_reader(loop, pipe, zactor_terminated, NULL)) - dp_test_assert_internal(0); + dp_test_abort_internal(); if (zloop_reader(loop, requests, req_handler, snap)) - dp_test_assert_internal(0); + dp_test_abort_internal(); ret = zloop_timer(loop, 1000, 0, check_expired, snap); dp_test_assert_internal(ret >= 0); @@ -1338,19 +1467,18 @@ extract_topic(const char *line) return NULL; } -void dp_test_send_config_src(enum cont_src_en cont_src, - const char *cmd_fmt_str, ...) + +static +void dp_test_send_config_inner(enum cont_src_en cont_src, + const char *cmd_fmt_str, va_list ap) { char cmd[DP_TEST_TMP_BUF]; char *cmd_copy; nlmsg_t *nmsg; char *topic; - va_list ap; int len; - va_start(ap, cmd_fmt_str); len = vsnprintf(cmd, sizeof(cmd), cmd_fmt_str, ap); - va_end(ap); dp_test_assert_internal(len < DP_TEST_TMP_BUF); cmd_copy = strdup(cmd); @@ -1372,6 +1500,25 @@ void dp_test_send_config_src(enum cont_src_en cont_src, nlmsg_send(nmsg, cont_info[cont_src].pub_sock, false); } +void dp_test_send_config_src(enum cont_src_en cont_src, + const char *cmd_fmt_str, ...) +{ + va_list ap; + + va_start(ap, cmd_fmt_str); + dp_test_send_config_inner(cont_src, cmd_fmt_str, ap); + va_end(ap); +} + +void dp_test_send_config(const char *cmd_fmt_str, ...) +{ + va_list args; + + va_start(args, cmd_fmt_str); + dp_test_send_config_inner(dp_test_cont_src_get(), cmd_fmt_str, args); + va_end(args); +} + void dp_test_send_config_src_pb(enum cont_src_en cont_src, void *cmd, size_t cmd_len) { diff --git a/tests/whole_dp/src/dp_test_controller.h b/tests/whole_dp/src/dp_test_controller.h index 3138f7fe..b589fd35 100644 --- a/tests/whole_dp/src/dp_test_controller.h +++ b/tests/whole_dp/src/dp_test_controller.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -25,7 +25,9 @@ void dp_test_controller_debug_set(int debug_val); int nl_generate_topic(const struct nlmsghdr *nlh, char *buf, size_t buflen); void nl_propagate(const char *topic, const struct nlmsghdr *nlh); -void nl_propagate_broker(const char *topic, const struct nlmsghdr *nlh); +void nl_propagate_broker(const char *topic, void *data, size_t size); +void nl_propagate_xfrm(zsock_t *sock, const struct nlmsghdr *nlh, size_t size, + const char *hdr); void dp_test_controller_init(enum cont_src_en cont_src, char **req_ipc); void dp_test_controller_close(enum cont_src_en cont_src); diff --git a/tests/whole_dp/src/dp_test_cpp_lim.c b/tests/whole_dp/src/dp_test_cpp_lim.c index 20e52ec2..986aab4f 100644 --- a/tests/whole_dp/src/dp_test_cpp_lim.c +++ b/tests/whole_dp/src/dp_test_cpp_lim.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -196,7 +196,7 @@ void check_cpp_rate_limiter_stats(void) int ret; int i; - enum fal_policer_stat_type cntr_ids[FAL_POLICER_STAT_MAX] = { + enum fal_policer_stat_type cntr_ids[] = { FAL_POLICER_STAT_GREEN_PACKETS, FAL_POLICER_STAT_GREEN_BYTES, FAL_POLICER_STAT_RED_PACKETS, diff --git a/tests/whole_dp/src/dp_test_cpp_lim_fal.c b/tests/whole_dp/src/dp_test_cpp_lim_fal.c index 0d88c7f3..d79914ed 100644 --- a/tests/whole_dp/src/dp_test_cpp_lim_fal.c +++ b/tests/whole_dp/src/dp_test_cpp_lim_fal.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -17,11 +17,11 @@ #include "dp_test.h" #include "dp_test_str.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" #include "dp_test_controller.h" #include "dp_test_json_utils.h" diff --git a/tests/whole_dp/src/dp_test_cross_connect.c b/tests/whole_dp/src/dp_test_cross_connect.c index 1a030ab6..e44ce340 100644 --- a/tests/whole_dp/src/dp_test_cross_connect.c +++ b/tests/whole_dp/src/dp_test_cross_connect.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * @@ -7,13 +7,13 @@ */ #include "dp_test.h" #include "dp_test_str.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" #include "dp_test_lib_pkt.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_controller.h" #include "dp_test_json_utils.h" -#include "dp_test_netlink_state.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" @@ -62,37 +62,24 @@ _dp_test_wait_for_xconnect(const char *src_intf, const char *dst_intf, __FILE__, __func__, __LINE__) static void -dp_test_create_xconnect_msg(const XConnectConfig__CommandType cmd, - const char *dp_ifname, - const char *new_ifname, - void **buf, int *len) +dp_test_create_and_send_xconnect_msg(const XConnectConfig__CommandType cmd, + const char *dp_ifname, + const char *new_ifname) { + int len; XConnectConfig xcon = XCONNECT_CONFIG__INIT; xcon.has_cmd = true; xcon.cmd = cmd; xcon.dp_ifname = (char *)dp_ifname; xcon.new_ifname = (char *)new_ifname; - *len = xconnect_config__get_packed_size(&xcon); - void *buf2 = malloc(*len); + len = xconnect_config__get_packed_size(&xcon); + void *buf2 = malloc(len); dp_test_assert_internal(buf2); xconnect_config__pack(&xcon, buf2); - DataplaneEnvelope msg = DATAPLANE_ENVELOPE__INIT; - msg.type = strdup("vyatta:xconnect"); - msg.msg.data = buf2; - msg.msg.len = *len; - - *len = dataplane_envelope__get_packed_size(&msg); - - *buf = malloc(*len); - dp_test_assert_internal(*buf); - - dataplane_envelope__pack(&msg, *buf); - - free(buf2); - free(msg.type); + dp_test_lib_pb_wrap_and_send_pb("vyatta:xconnect", buf2, len); } static void @@ -102,18 +89,11 @@ dp_test_execute(const XConnectConfig__CommandType cmd, { char real_ifname_src[IFNAMSIZ]; char real_ifname_dst[IFNAMSIZ]; - int len; - void *buf; - - dp_test_create_xconnect_msg(cmd, - dp_test_intf_real(intf1, real_ifname_src), - dp_test_intf_real(intf2, real_ifname_dst), - &buf, &len); - - dp_test_send_config_src_pb(dp_test_cont_src_get(), - buf, len); - free(buf); + dp_test_create_and_send_xconnect_msg( + cmd, + dp_test_intf_real(intf1, real_ifname_src), + dp_test_intf_real(intf2, real_ifname_dst)); } DP_START_TEST(xconnect_switching, xconnect_switching1) diff --git a/tests/whole_dp/src/dp_test_crypto_block_policy.c b/tests/whole_dp/src/dp_test_crypto_block_policy.c index 95542fa2..779f2ef6 100644 --- a/tests/whole_dp/src/dp_test_crypto_block_policy.c +++ b/tests/whole_dp/src/dp_test_crypto_block_policy.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -16,13 +16,14 @@ #include "ip_funcs.h" #include "dp_test.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_macros.h" -#include "dp_test_lib_intf.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_crypto_utils.h" -#include "dp_test_netlink_state.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test/dp_test_macros.h" + /* * The tests in this module check the operation of IPsec BLOCK policies @@ -173,7 +174,7 @@ generate_expectation(bool forward, struct udphdr *udphdr) (void)dp_test_pktmbuf_eth_init( pkt, PEER_MAC_ADDRESS, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); expectation = dp_test_exp_create(pkt); rte_pktmbuf_free(pkt); dp_test_exp_set_oif_name(expectation, "dp2T2"); @@ -221,9 +222,10 @@ static void setup(void) output_policy.dir = XFRM_POLICY_OUT; output_policy.family = AF_INET; output_policy.reqid = TUNNEL_REQID; - output_policy.priority = 0; + output_policy.priority = 200000; output_policy.mark = 0; output_policy.vrfid = VRF_DEFAULT_ID; + output_policy.rule_no = 0; /* * Add a route to the EAST network. This is to make sure @@ -264,6 +266,7 @@ DP_START_TEST(single_policy, single_allow_policy) struct dp_test_expected *expectation; struct rte_mbuf *input_pkt; + output_policy.rule_no++; output_policy.action = XFRM_POLICY_ALLOW; dp_test_crypto_create_policy(&output_policy); @@ -273,7 +276,7 @@ DP_START_TEST(single_policy, single_allow_policy) (void)dp_test_pktmbuf_eth_init(input_pkt, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); dp_test_pak_receive(input_pkt, "dp1T1", expectation); @@ -287,12 +290,13 @@ DP_START_TEST(single_policy, single_allow_policy) * This checks that a packet is correctly encrypted when it * matches a policy with an action of ALLOW */ -DP_START_TEST(single_policy, single_block_policy) +DP_START_TEST_FULL_RUN(single_policy, single_block_policy) { struct dp_test_expected *expectation; struct rte_mbuf *input_pkt; int payload_len; + output_policy.rule_no++; output_policy.action = XFRM_POLICY_BLOCK; dp_test_crypto_create_policy(&output_policy); @@ -306,7 +310,7 @@ DP_START_TEST(single_policy, single_block_policy) (void)dp_test_pktmbuf_eth_init(input_pkt, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); dp_test_pak_receive(input_pkt, "dp1T1", expectation); @@ -320,12 +324,13 @@ DP_START_TEST(single_policy, single_block_policy) * This checks that a policy can be modified from ALLOW * to BLOCK and that traffic is no longer forwarded. */ -DP_START_TEST(single_policy, modfy_allow_to_block) +DP_START_TEST_FULL_RUN(single_policy, modfy_allow_to_block) { struct dp_test_expected *expectation; struct rte_mbuf *input_pkt; int payload_len; + output_policy.rule_no++; output_policy.action = XFRM_POLICY_ALLOW; dp_test_crypto_create_policy(&output_policy); @@ -335,7 +340,7 @@ DP_START_TEST(single_policy, modfy_allow_to_block) (void)dp_test_pktmbuf_eth_init(input_pkt, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); dp_test_pak_receive(input_pkt, "dp1T1", expectation); @@ -353,7 +358,7 @@ DP_START_TEST(single_policy, modfy_allow_to_block) (void)dp_test_pktmbuf_eth_init(input_pkt, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); dp_test_pak_receive(input_pkt, "dp1T1", expectation); @@ -367,12 +372,13 @@ DP_START_TEST(single_policy, modfy_allow_to_block) * This checks that a policy can be modified from BLOCK * to ALLOW and that traffic is then encrypted and forwarded. */ -DP_START_TEST(single_policy, modfy_block_to_allow) +DP_START_TEST_FULL_RUN(single_policy, modfy_block_to_allow) { struct dp_test_expected *expectation; struct rte_mbuf *input_pkt; int payload_len; + output_policy.rule_no++; output_policy.action = XFRM_POLICY_BLOCK; dp_test_crypto_create_policy(&output_policy); @@ -386,7 +392,7 @@ DP_START_TEST(single_policy, modfy_block_to_allow) (void)dp_test_pktmbuf_eth_init(input_pkt, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); dp_test_pak_receive(input_pkt, "dp1T1", expectation); @@ -400,7 +406,7 @@ DP_START_TEST(single_policy, modfy_block_to_allow) (void)dp_test_pktmbuf_eth_init(input_pkt, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); dp_test_pak_receive(input_pkt, "dp1T1", expectation); diff --git a/tests/common/src/dp_test_crypto_lib.c b/tests/whole_dp/src/dp_test_crypto_lib.c similarity index 98% rename from tests/common/src/dp_test_crypto_lib.c rename to tests/whole_dp/src/dp_test_crypto_lib.c index dc129018..3503d1f1 100644 --- a/tests/common/src/dp_test_crypto_lib.c +++ b/tests/whole_dp/src/dp_test_crypto_lib.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -17,7 +17,7 @@ #include #include "dp_test_crypto_lib.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" /* * dp_test_prefix_str_to_xfrm_addr() diff --git a/tests/common/inc/dp_test_crypto_lib.h b/tests/whole_dp/src/dp_test_crypto_lib.h similarity index 92% rename from tests/common/inc/dp_test_crypto_lib.h rename to tests/whole_dp/src/dp_test_crypto_lib.h index fbcdf50f..b77aa569 100644 --- a/tests/common/inc/dp_test_crypto_lib.h +++ b/tests/whole_dp/src/dp_test_crypto_lib.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -13,7 +13,7 @@ #include #include #include -#include "../tests/whole_dp/src/dp_test_lib.h" +#include "../tests/whole_dp/src/dp_test_lib_internal.h" /* * A virtual feature point interface can be bound diff --git a/tests/whole_dp/src/dp_test_crypto_multi_tunnel.c b/tests/whole_dp/src/dp_test_crypto_multi_tunnel.c index 5c97e7a2..2ca19fd4 100644 --- a/tests/whole_dp/src/dp_test_crypto_multi_tunnel.c +++ b/tests/whole_dp/src/dp_test_crypto_multi_tunnel.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -15,14 +15,14 @@ #include "ip_funcs.h" #include "dp_test.h" -#include "dp_test_lib.h" -#include "dp_test_macros.h" -#include "dp_test_lib_intf.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test/dp_test_macros.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_crypto_utils.h" -#include "dp_test_netlink_state.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_lib_exp.h" - +#include "dp_test_npf_lib.h" /* * +-----------+ dp2T2 * | | @@ -156,7 +156,7 @@ static struct dp_test_expected *create_expected_packet(const char *src, NULL /* transport_hdr*/); (void)dp_test_pktmbuf_eth_init(pak, dst_mac, dp_test_intf_name2mac_str(ifname), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(pak); dp_test_exp_set_oif_name(exp, ifname); @@ -164,7 +164,7 @@ static struct dp_test_expected *create_expected_packet(const char *src, * Validate just the L2 header, IP header and the eight * bytes of the ESP header (including the SPI). */ - dp_test_exp_set_check_len(exp, (pktmbuf_l2_len(pak) + + dp_test_exp_set_check_len(exp, (dp_pktmbuf_l2_len(pak) + sizeof(struct iphdr) + 8)); rte_pktmbuf_free(pak); free(payload); @@ -189,6 +189,7 @@ static const struct dp_test_crypto_policy tun_1_in_policy = { .family = AF_INET, .reqid = TUN_1_REQID, .priority = 1000, + .rule_no = 1, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -203,6 +204,7 @@ static const struct dp_test_crypto_policy tun_1_out_policy = { .family = AF_INET, .reqid = TUN_1_REQID, .priority = 1000, + .rule_no = 2, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -245,6 +247,7 @@ static const struct dp_test_crypto_policy tun_2_in_policy = { .family = AF_INET, .reqid = TUN_2_REQID, .priority = 1000, + .rule_no = 3, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -259,6 +262,7 @@ static const struct dp_test_crypto_policy tun_2_out_policy = { .family = AF_INET, .reqid = TUN_2_REQID, .priority = 1000, + .rule_no = 4, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -341,6 +345,7 @@ static void teardown_two_tunnels_two_peers(void) dp_test_crypto_delete_sa(&tun_2_out_sa); dp_test_crypto_check_sa_count(VRF_DEFAULT_ID, 0); + dp_test_npf_cleanup(); } /* @@ -356,7 +361,7 @@ static void teardown_two_tunnels_two_peers(void) * is used based on the IP and ESP headers of the encrypted packet. It * does NOT check that the packet is correctly encrypted. */ -DP_START_TEST(multi_s2s_tunnel, two_tunnels_two_peers) +DP_START_TEST_FULL_RUN(multi_s2s_tunnel, two_tunnels_two_peers) { struct dp_test_expected *exp1, *exp2; struct rte_mbuf *pkt_tun1, *pkt_tun2; @@ -379,7 +384,7 @@ DP_START_TEST(multi_s2s_tunnel, two_tunnels_two_peers) (void)dp_test_pktmbuf_eth_init(pkt_tun1, dp_test_intf_name2mac_str("dp1T1"), SOURCE_MAC_ADDR, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Expect an ESP packet to TUN_1_REMOTE_IP_ADDR on dp2T2 */ exp1 = create_expected_packet(TUN_1_LOCAL_IP_ADDR, @@ -406,7 +411,7 @@ DP_START_TEST(multi_s2s_tunnel, two_tunnels_two_peers) (void)dp_test_pktmbuf_eth_init(pkt_tun2, dp_test_intf_name2mac_str("dp1T1"), SOURCE_MAC_ADDR, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Expect an ESP packet to TUN_2_REMOTE_IP_ADDR on dp3T3 */ exp2 = create_expected_packet(TUN_2_LOCAL_IP_ADDR, @@ -437,6 +442,7 @@ static const struct dp_test_crypto_policy tun_3_in_policy = { .family = AF_INET, .reqid = TUN_3_REQID, .priority = 1000, + .rule_no = 5, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -451,6 +457,7 @@ static const struct dp_test_crypto_policy tun_3_out_policy = { .family = AF_INET, .reqid = TUN_3_REQID, .priority = 1000, + .rule_no = 6, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -533,6 +540,7 @@ static void teardown_two_tunnels_one_peer(void) dp_test_crypto_delete_sa(&tun_3_out_sa); dp_test_crypto_check_sa_count(VRF_DEFAULT_ID, 0); + dp_test_npf_cleanup(); } /* @@ -548,7 +556,7 @@ static void teardown_two_tunnels_one_peer(void) * is used based on the IP and ESP headers of the encrypted packet. It * does NOT check that the packet is correctly encrypted. */ -DP_START_TEST(multi_s2s_tunnel, two_tunnels_one_peer) +DP_START_TEST_FULL_RUN(multi_s2s_tunnel, two_tunnels_one_peer) { struct rte_mbuf *pkt_tun1, *pkt_tun3; struct dp_test_expected *exp1, *exp3; @@ -570,7 +578,7 @@ DP_START_TEST(multi_s2s_tunnel, two_tunnels_one_peer) &ip, &icmp); (void)dp_test_pktmbuf_eth_init(pkt_tun1, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* Expect an ESP packet to TUN_1_REMOTE_IP_ADD on dp2T2 */ exp1 = create_expected_packet(TUN_1_LOCAL_IP_ADDR, @@ -596,7 +604,7 @@ DP_START_TEST(multi_s2s_tunnel, two_tunnels_one_peer) &ip, &icmp); (void)dp_test_pktmbuf_eth_init(pkt_tun3, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); exp3 = create_expected_packet(TUN_3_LOCAL_IP_ADDR, TUN_3_REMOTE_IP_ADDR, @@ -681,7 +689,7 @@ static void teardown_more_than_one_sa_for_tunnel(void) * is used based on the IP and ESP headers of the encrypted packet. It * does NOT check that the packet is correctly encrypted. */ -DP_START_TEST(multi_s2s_tunnel, more_than_one_sa_for_tunnel) +DP_START_TEST_FULL_RUN(multi_s2s_tunnel, more_than_one_sa_for_tunnel) { struct dp_test_expected *exp3; struct rte_mbuf *pkt_tun3; @@ -703,7 +711,7 @@ DP_START_TEST(multi_s2s_tunnel, more_than_one_sa_for_tunnel) &ip, &icmp); (void)dp_test_pktmbuf_eth_init(pkt_tun3, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* * Expect an ESP packet to TUN_3_REMOTE_IP_ADD on dp2T2. diff --git a/tests/whole_dp/src/dp_test_crypto_perf_scale.c b/tests/whole_dp/src/dp_test_crypto_perf_scale.c new file mode 100644 index 00000000..403135da --- /dev/null +++ b/tests/whole_dp/src/dp_test_crypto_perf_scale.c @@ -0,0 +1,213 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * Measure performance of IPsec tunnel setup + */ +#include + +#include +#include +#include + +#include "ip_funcs.h" + +#include "dp_test.h" +#include "dp_test_lib_internal.h" +#include "dp_test/dp_test_macros.h" +#include "dp_test/dp_test_lib_intf.h" +#include "dp_test/dp_test_pktmbuf_lib.h" +#include "dp_test_crypto_utils.h" +#include "dp_test/dp_test_netlink_state.h" +#include "dp_test_npf_lib.h" + +/* + * +-----------+ + * | | + * dp1T1 | | dp2T2 + * | | + * +-----+ UUT +-----+ 2.2.2.2/24 + * | | + * 1.1.1.2/24 | | + * | | + * +-----------+ + */ + +#define TEST_VRF 42 +#define SOURCE_IP_ADDR "1.1.1.1" +#define SOURCE_MAC_ADDR "aa:bb:cc:dd:1:1" + +/* + * Tunnel 1 parameters + */ +#define TUN_1_LOCAL_IP_ADDR "2.2.2.2" +#define TUN_1_REMOTE_IP_ADDR "2.2.2.3" +#define TUN_1_REMOTE_MAC_ADDR "aa:bb:cc:dd:2:3" +#define TUN_1_LOCAL_PREFIX "1.%d.%d.0/24" +#define TUN_1_REMOTE_PREFIX "8.%d.%d.0/24" +#define TUN_1_SINK_IP_ADDR "8.8.8.8" +#define TUN_1_IN_SA_SPI 0x22223333 +#define TUN_1_OUT_SA_SPI 0x33332222 +#define TUN_1_REQID_START 0x1111 + +#define TUN_1_EXTRA_OUT_SA_SPI 0x10002000 + +/* + * Tunnel 1 IPsec policies and SAs + * + * 1.1.1.0/2 -[2.2.2.2]========[2.2.2.3] -- 8.8.8.0/24 + */ +static struct dp_test_crypto_policy tun_1_in_policy = { + .d_prefix = TUN_1_LOCAL_PREFIX, + .s_prefix = TUN_1_REMOTE_PREFIX, + .proto = 0, + .dst = TUN_1_LOCAL_IP_ADDR, + .dst_family = AF_INET, + .dir = XFRM_POLICY_IN, + .family = AF_INET, + .reqid = TUN_1_REQID_START, + .priority = 1000, + .mark = 0, + .vrfid = VRF_DEFAULT_ID +}; + +static struct dp_test_crypto_policy tun_1_out_policy = { + .d_prefix = TUN_1_REMOTE_PREFIX, + .s_prefix = TUN_1_LOCAL_PREFIX, + .proto = 0, + .dst = TUN_1_REMOTE_IP_ADDR, + .dst_family = AF_INET, + .dir = XFRM_POLICY_OUT, + .family = AF_INET, + .reqid = TUN_1_REQID_START, + .priority = 1000, + .mark = 0, + .vrfid = VRF_DEFAULT_ID +}; + +DP_DECL_TEST_SUITE(crypto_perf_scale_suite); + +/* + * setup() + * + * Setup the interfaces, address and ARP neighbours which are common + * to all test cases in this module. + */ +static void setup(vrfid_t vrfid) +{ + if (vrfid != VRF_DEFAULT_ID) + dp_test_netlink_add_vrf(vrfid, 1); + + /* Set up local addresses and ARP cache entries for neighbours */ + dp_test_nl_add_ip_addr_and_connected_vrf("dp1T1", "1.1.1.0/24", vrfid); + dp_test_netlink_add_neigh("dp1T1", SOURCE_IP_ADDR, SOURCE_MAC_ADDR); + + dp_test_nl_add_ip_addr_and_connected_vrf("dp2T2", "2.2.2.0/24", vrfid); + dp_test_netlink_add_neigh("dp2T2", TUN_1_REMOTE_IP_ADDR, + TUN_1_REMOTE_MAC_ADDR); +} + +static void setup_or_teardown_tunnels(uint32_t tunnel_cnt, bool setup) +{ +#define PREFIX_SIZE 20 + char local_prefix[PREFIX_SIZE]; + char remote_prefix[PREFIX_SIZE]; + uint8_t i, j; + uint32_t reqid = TUN_1_REQID_START; + struct timespec start, end; + uint64_t ptime = 0; + uint32_t outer = tunnel_cnt / 253; + uint64_t cur_cnt; + + clock_gettime(CLOCK_REALTIME, &start); + + cur_cnt = 0; + for (i = 1; i <= outer; i++) { + for (j = 1; j <= 253; j++) { + snprintf(local_prefix, PREFIX_SIZE, + TUN_1_LOCAL_PREFIX, i, j); + snprintf(remote_prefix, PREFIX_SIZE, + TUN_1_REMOTE_PREFIX, i, j); + tun_1_in_policy.s_prefix = remote_prefix; + tun_1_in_policy.d_prefix = local_prefix; + tun_1_in_policy.reqid = reqid; + + tun_1_out_policy.s_prefix = local_prefix; + tun_1_out_policy.d_prefix = remote_prefix; + tun_1_out_policy.reqid = reqid++; + + if (setup) { + dp_test_crypto_create_policy_verify( + &tun_1_in_policy, false); + dp_test_crypto_create_policy_verify( + &tun_1_out_policy, false); + } else { + dp_test_crypto_delete_policy_verify( + &tun_1_in_policy, false); + dp_test_crypto_delete_policy_verify( + &tun_1_out_policy, false); + } + } + cur_cnt += 253 * 2; + } + + clock_gettime(CLOCK_REALTIME, &end); + ptime = timespec_diff_us(&start, &end); + printf("Time taken to request %s of %lu policies = %lu us\n", + setup ? "creation" : "deletion", cur_cnt, ptime); + + dp_test_crypto_check_policy_count(VRF_DEFAULT_ID, + setup ? cur_cnt : 0, AF_INET); + + if (setup) + dp_test_npf_cleanup(); + + clock_gettime(CLOCK_REALTIME, &end); + ptime = timespec_diff_us(&start, &end); + printf("Time taken to %s %lu policies = %lu us\n", + setup ? "install" : "delete", cur_cnt, ptime); +} + +/* + * teardown() + * + * Tear down the interfaces, address and ARP neighbours which are common + * to all test cases in this module. + */ +static void teardown(vrfid_t vrfid) +{ + /* Remove local addresses and ARP cache entries for neighbours */ + dp_test_netlink_del_neigh("dp2T2", TUN_1_REMOTE_IP_ADDR, + TUN_1_REMOTE_MAC_ADDR); + dp_test_nl_del_ip_addr_and_connected_vrf("dp2T2", "2.2.2.0/24", vrfid); + dp_test_netlink_del_neigh("dp1T1", SOURCE_IP_ADDR, SOURCE_MAC_ADDR); + dp_test_nl_del_ip_addr_and_connected_vrf("dp1T1", "1.1.1.0/24", vrfid); + + if (vrfid != VRF_DEFAULT_ID) + dp_test_netlink_del_vrf(vrfid, 0); +} + +DP_DECL_TEST_CASE(crypto_perf_scale_suite, crypto_policy_scale, NULL, NULL); + +/* + * TESTCASE: Policy scale + * + * This testcase tests the amount of time taken to build a set of policies + * incrementally. It is not run as part of the build as it can fail on + * slow machines or machines with a heavy workload. It is however useful + * for comparisons between different runs on the same machine. + */ +DP_START_TEST_DONT_RUN(crypto_policy_scale, policy_update_scale) +{ +#define MAX_TUNNEL_CNT 512 + setup(VRF_DEFAULT_ID); + + setup_or_teardown_tunnels(MAX_TUNNEL_CNT, true); + setup_or_teardown_tunnels(MAX_TUNNEL_CNT, false); + + teardown(VRF_DEFAULT_ID); + +} DP_END_TEST; + diff --git a/tests/whole_dp/src/dp_test_crypto_policy.c b/tests/whole_dp/src/dp_test_crypto_policy.c index 2b9c299e..0a0388e7 100644 --- a/tests/whole_dp/src/dp_test_crypto_policy.c +++ b/tests/whole_dp/src/dp_test_crypto_policy.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -15,12 +15,13 @@ #include "ip_funcs.h" #include "dp_test.h" -#include "dp_test_lib.h" -#include "dp_test_macros.h" -#include "dp_test_lib_intf.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test/dp_test_macros.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_crypto_utils.h" -#include "dp_test_netlink_state.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_npf_lib.h" /* * +-----------+ @@ -68,6 +69,7 @@ static const struct dp_test_crypto_policy tun_1_in_policy = { .family = AF_INET, .reqid = TUN_1_REQID, .priority = 1000, + .rule_no = 1, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -82,6 +84,7 @@ static const struct dp_test_crypto_policy tun_1_out_policy = { .family = AF_INET, .reqid = TUN_1_REQID, .priority = 1000, + .rule_no = 2, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -165,9 +168,8 @@ DP_DECL_TEST_CASE(crypto_policy_suite, crypto_policy, NULL, NULL); /* * TESTCASE: Simple policy update * - * This test exercises the scenario when a policy is - * updated and caused a list corruption leading to an infinite loop - * when a new entry is subsequently created for the policy. + * This test exercises a policy update that caused a list corruption + * leading to an inite loop when new is subsequently created for the policy. */ DP_START_TEST(crypto_policy, simple_policy_update) { @@ -195,6 +197,8 @@ DP_START_TEST(crypto_policy, simple_policy_update) dp_test_crypto_check_sa_count(VRF_DEFAULT_ID, 0); + dp_test_npf_cleanup(); + teardown(VRF_DEFAULT_ID); } DP_END_TEST; @@ -205,7 +209,7 @@ DP_START_TEST(crypto_policy, simple_policy_update) * of block, updated to change the action to allow and then * back to block. */ -DP_START_TEST(crypto_policy, update_policy_action) +DP_START_TEST_FULL_RUN(crypto_policy, update_policy_action) { static struct dp_test_crypto_policy the_policy = { .d_prefix = "16.1.2.0/24", @@ -218,6 +222,7 @@ DP_START_TEST(crypto_policy, update_policy_action) .family = AF_INET, .reqid = 1234, .priority = 1000, + .rule_no = 3, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -245,7 +250,7 @@ DP_START_TEST(crypto_policy, update_policy_action) teardown(VRF_DEFAULT_ID); } DP_END_TEST; -DP_START_TEST(crypto_policy, update_policy_action_vrf) +DP_START_TEST_FULL_RUN(crypto_policy, update_policy_action_vrf) { static struct dp_test_crypto_policy the_policy = { .d_prefix = "16.1.2.0/24", @@ -258,6 +263,7 @@ DP_START_TEST(crypto_policy, update_policy_action_vrf) .family = AF_INET, .reqid = 1234, .priority = 1000, + .rule_no = 4, .mark = 0, .vrfid = TEST_VRF }; diff --git a/tests/whole_dp/src/dp_test_crypto_site_to_site.c b/tests/whole_dp/src/dp_test_crypto_site_to_site.c index 1c0564d5..470d998a 100644 --- a/tests/whole_dp/src/dp_test_crypto_site_to_site.c +++ b/tests/whole_dp/src/dp_test_crypto_site_to_site.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -9,15 +9,17 @@ */ #include "dp_test.h" -#include "dp_test_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_crypto_utils.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_crypto_lib.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_console.h" #include "dp_test_controller.h" +#include "dp_test_npf_lib.h" +#include "dp_test_xfrm_server.h" #include "main.h" #include "in_cksum.h" @@ -98,6 +100,8 @@ #define LINK_LOCAL "169.254.0.1/32" #define LINK_LOCAL6 "fe80::1/128" +#define RULE_PRIORITY 1 + static void dp_test_create_and_send_s2s_msg(CryptoPolicyConfig__Action action, int af, @@ -112,7 +116,6 @@ dp_test_create_and_send_s2s_msg(CryptoPolicyConfig__Action action, uint32_t proto, int sel_ifindex) { - void *buf; int len; CryptoPolicyConfig con = CRYPTO_POLICY_CONFIG__INIT; @@ -136,58 +139,20 @@ dp_test_create_and_send_s2s_msg(CryptoPolicyConfig__Action action, uint32_t v6_saddr[4], v6_daddr[4]; IPAddress ip_daddr = IPADDRESS__INIT; IPAddress ip_saddr = IPADDRESS__INIT; - if (af == AF_INET) { - ip_daddr.address_oneof_case = - IPADDRESS__ADDRESS_ONEOF_IPV4_ADDR; - ip_saddr.address_oneof_case = - IPADDRESS__ADDRESS_ONEOF_IPV4_ADDR; - - inet_pton(AF_INET, daddr, &ip_daddr.ipv4_addr); - inet_pton(AF_INET, saddr, &ip_saddr.ipv4_addr); - con.sel_daddr = &ip_daddr; - con.sel_saddr = &ip_saddr; - } else { - ip_daddr.address_oneof_case = - IPADDRESS__ADDRESS_ONEOF_IPV6_ADDR; - ip_saddr.address_oneof_case = - IPADDRESS__ADDRESS_ONEOF_IPV6_ADDR; + dp_test_lib_pb_set_ip_addr(&ip_saddr, saddr, &v6_saddr); + con.sel_saddr = &ip_saddr; - inet_pton(AF_INET6, daddr, &v6_daddr); - inet_pton(AF_INET6, saddr, &v6_saddr); - - ip_daddr.ipv6_addr.len = 16; - ip_saddr.ipv6_addr.len = 16; - ip_daddr.ipv6_addr.data = (uint8_t *)v6_daddr; - ip_saddr.ipv6_addr.data = (uint8_t *)v6_saddr; - - con.sel_daddr = &ip_daddr; - con.sel_saddr = &ip_saddr; - } + dp_test_lib_pb_set_ip_addr(&ip_daddr, daddr, &v6_daddr); + con.sel_daddr = &ip_daddr; len = crypto_policy_config__get_packed_size(&con); void *buf2 = malloc(len); dp_test_assert_internal(buf2); crypto_policy_config__pack(&con, buf2); - DataplaneEnvelope msg = DATAPLANE_ENVELOPE__INIT; - msg.type = strdup("vyatta:crypto-policy"); - msg.msg.data = buf2; - msg.msg.len = len; - - len = dataplane_envelope__get_packed_size(&msg); - - buf = malloc(len); - dp_test_assert_internal(buf); - - dataplane_envelope__pack(&msg, buf); - free(buf2); - free(msg.type); - - dp_test_send_config_src_pb(dp_test_cont_src_get(), - buf, len); - free(buf); + dp_test_lib_pb_wrap_and_send_pb("vyatta:crypto-policy", buf2, len); } /* @@ -202,7 +167,8 @@ static struct dp_test_crypto_policy output_policy = { .dir = XFRM_POLICY_OUT, .family = AF_INET, .reqid = TUNNEL_REQID, - .priority = 0, + .priority = RULE_PRIORITY, + .rule_no = 1, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -216,7 +182,8 @@ static struct dp_test_crypto_policy output_policy6 = { .dir = XFRM_POLICY_OUT, .family = AF_INET6, .reqid = TUNNEL_REQID, - .priority = 0, + .priority = RULE_PRIORITY, + .rule_no = 2, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -230,7 +197,8 @@ static struct dp_test_crypto_policy output_policy46 = { .dir = XFRM_POLICY_OUT, .family = AF_INET, .reqid = TUNNEL_REQID, - .priority = 0, + .priority = RULE_PRIORITY, + .rule_no = 3, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -244,7 +212,8 @@ static struct dp_test_crypto_policy output_policy64 = { .dir = XFRM_POLICY_OUT, .family = AF_INET6, .reqid = TUNNEL_REQID, - .priority = 0, + .priority = RULE_PRIORITY, + .rule_no = 4, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -258,7 +227,8 @@ static struct dp_test_crypto_policy input_policy = { .dir = XFRM_POLICY_IN, .family = AF_INET, .reqid = TUNNEL_REQID, - .priority = 0, + .priority = RULE_PRIORITY, + .rule_no = 5, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -272,7 +242,8 @@ static struct dp_test_crypto_policy input_policy6 = { .dir = XFRM_POLICY_IN, .family = AF_INET6, .reqid = TUNNEL_REQID, - .priority = 0, + .priority = RULE_PRIORITY, + .rule_no = 6, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -287,7 +258,8 @@ static struct dp_test_crypto_policy input_policy64 = { .dir = XFRM_POLICY_IN, .family = AF_INET, .reqid = TUNNEL_REQID, - .priority = 0, + .priority = RULE_PRIORITY, + .rule_no = 7, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -302,7 +274,8 @@ static struct dp_test_crypto_policy input_policy46 = { .dir = XFRM_POLICY_IN, .family = AF_INET6, .reqid = TUNNEL_REQID, - .priority = 0, + .priority = RULE_PRIORITY, + .rule_no = 8, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -452,7 +425,6 @@ dp_test_create_and_send_vfp_set_msg(const char *intf, uint32_t ifindex, VFPSetConfig__Action action) { - void *buf; int len; VFPSetConfig vfp = VFPSET_CONFIG__INIT; @@ -469,24 +441,8 @@ dp_test_create_and_send_vfp_set_msg(const char *intf, dp_test_assert_internal(buf2); vfpset_config__pack(&vfp, buf2); - DataplaneEnvelope msg = DATAPLANE_ENVELOPE__INIT; - msg.type = strdup("vyatta:vfp-set"); - msg.msg.data = buf2; - msg.msg.len = len; - - len = dataplane_envelope__get_packed_size(&msg); - - buf = malloc(len); - dp_test_assert_internal(buf); - - dataplane_envelope__pack(&msg, buf); - - free(buf2); - free(msg.type); - dp_test_send_config_src_pb(dp_test_cont_src_get(), - buf, len); - free(buf); + dp_test_lib_pb_wrap_and_send_pb("vyatta:vfp-set", buf2, len); } static void _s2s_add_vfp_and_bind(vrfid_t vrfid, const char *file, @@ -804,7 +760,7 @@ static void _s2s_teardown_interfaces6(vrfid_t vrfid, enum vfp_presence with_vfp, #define s2s_teardown_interfaces_v4_v6(vrfid, vfp) \ { \ - vrfid == VRF_DEFAULT_ID ? \ + (vrfid) == VRF_DEFAULT_ID ? \ s2s_teardown_interfaces(vrfid, vfp) : \ s2s_teardown_interfaces_leave_vrf(vrfid, vfp), \ s2s_teardown_interfaces6(vrfid, vfp); \ @@ -815,11 +771,12 @@ static void _setup_policies(struct dp_test_crypto_policy *input, vrfid_t vrfid, const char *file, int line) { bool verify = true; + bool update = false; input->vrfid = vrfid; output->vrfid = vrfid; - _dp_test_crypto_create_policy(file, line, input, verify); - _dp_test_crypto_create_policy(file, line, output, verify); + _dp_test_crypto_create_policy(file, line, input, verify, update); + _dp_test_crypto_create_policy(file, line, output, verify, update); } #define setup_policies(input, output, vrf) \ _setup_policies(input, output, vrf, __FILE__, __LINE__) @@ -829,8 +786,8 @@ static void _teardown_policies(struct dp_test_crypto_policy *input, struct dp_test_crypto_policy *output, const char *file, int line) { - _dp_test_crypto_delete_policy(file, line, input); - _dp_test_crypto_delete_policy(file, line, output); + _dp_test_crypto_delete_policy(file, line, input, true); + _dp_test_crypto_delete_policy(file, line, output, true); } #define teardown_policies(input, output) \ _teardown_policies(input, output, __FILE__, __LINE__) @@ -866,8 +823,8 @@ static void _teardown_sas(struct dp_test_crypto_sa *input, const char *file, const char *func, int line) { - _dp_test_crypto_delete_sa(file, line, input); - _dp_test_crypto_delete_sa(file, line, output); + _dp_test_crypto_delete_sa_verify(file, line, input, true); + _dp_test_crypto_delete_sa_verify(file, line, output, true); } #define teardown_sas(input, output) \ @@ -878,6 +835,7 @@ static void s2s_common_setup(vrfid_t vrfid, enum dp_test_crypo_auth_algo auth_algo, struct dp_test_crypto_policy *ipolicy, struct dp_test_crypto_policy *opolicy, + uint8_t nipols, uint8_t nopols, unsigned int mode, enum vfp_presence with_vfp, enum vrf_and_xfrm_order out_of_order) { @@ -886,20 +844,35 @@ static void s2s_common_setup(vrfid_t vrfid, */ struct dp_test_crypto_policy *ipol, *opol; bool verify = true; + int i; ipol = ipolicy ? ipolicy : &input_policy; opol = opolicy ? opolicy : &output_policy; + if (!ipolicy) + nipols = 1; + if (!opolicy) + nopols = 1; s2s_setup_interfaces(vrfid, with_vfp, out_of_order); - ipol->vrfid = vrfid; - opol->vrfid = vrfid; - - if (out_of_order == VRF_XFRM_OUT_OF_ORDER) + if (out_of_order == VRF_XFRM_OUT_OF_ORDER) { verify = false; + /* + * We expect the update to fail due to incomplete + * interfaces so check for that + */ + dp_test_crypto_xfrm_set_nack(nipols + nopols); + } - dp_test_crypto_create_policy_verify(ipol, verify); - dp_test_crypto_create_policy_verify(opol, verify); + for (i = 0; i < nipols; i++) { + ipol[i].vrfid = vrfid; + dp_test_crypto_create_policy_verify(&ipol[i], verify); + } + + for (i = 0; i < nopols; i++) { + opol[i].vrfid = vrfid; + dp_test_crypto_create_policy_verify(&opol[i], verify); + } dp_test_crypto_check_sa_count(VRF_DEFAULT_ID, 0); if (with_vfp == VFP_TRUE) @@ -916,15 +889,37 @@ static void s2s_common_setup(vrfid_t vrfid, input_sa.vrfid = vrfid; output_sa.vrfid = vrfid; + if (out_of_order == VRF_XFRM_OUT_OF_ORDER) + /* + * We expect the sa creates to fail due to incomplete + * interfaces so check for that + */ + dp_test_crypto_xfrm_set_nack(2); + dp_test_crypto_create_sa_verify(&input_sa, verify); dp_test_crypto_create_sa_verify(&output_sa, verify); if (out_of_order == VRF_XFRM_OUT_OF_ORDER) { + /* + * We need to put a scheduling barrier between the two + * SA creations above and the completion of interface + * setup up below. There is a potential reordering + * race where the the interface could become complete + * in the dataplane before the attempted creation of + * the SAs above in the dataplane, and so rather than + * return an error as expected it returns OK. + */ + dp_test_crypto_check_xfrm_acks(); + s2s_setup_interfaces_finish(vrfid, with_vfp); - wait_for_policy(ipol, true); - wait_for_policy(opol, true); - wait_for_sa(&input_sa, true); - wait_for_sa(&output_sa, true); + + for (i = 0; i < nipols; i++) + dp_test_crypto_create_policy_verify(&ipol[i], true); + for (i = 0; i < nopols; i++) + dp_test_crypto_create_policy_verify(&ipol[i], true); + + dp_test_crypto_create_sa_verify(&input_sa, true); + dp_test_crypto_create_sa_verify(&output_sa, true); } if (with_vfp == VFP_TRUE) @@ -937,8 +932,11 @@ static void s2s_common_setup6(vrfid_t vrfid, enum dp_test_crypo_auth_algo auth_algo, struct dp_test_crypto_policy *ipolicy, struct dp_test_crypto_policy *opolicy, + uint8_t nipols, uint8_t nopols, unsigned int mode, enum vfp_presence with_vfp) { + int i; + /*************************************************** * Configure underlying topology */ @@ -948,11 +946,19 @@ static void s2s_common_setup6(vrfid_t vrfid, ipol = ipolicy ? ipolicy : &input_policy6; opol = opolicy ? opolicy : &output_policy6; + if (!ipolicy) + nipols = 1; + if (!opolicy) + nopols = 1; + for (i = 0; i < nipols; i++) { + ipol[i].vrfid = vrfid; + dp_test_crypto_create_policy(&ipol[i]); + } - ipol->vrfid = vrfid; - opol->vrfid = vrfid; - dp_test_crypto_create_policy(ipol); - dp_test_crypto_create_policy(opol); + for (i = 0; i < nopols; i++) { + opol[i].vrfid = vrfid; + dp_test_crypto_create_policy(&opol[i]); + } dp_test_crypto_check_sa_count(VRF_DEFAULT_ID, 0); if (with_vfp == VFP_TRUE) @@ -975,38 +981,80 @@ static void s2s_common_setup6(vrfid_t vrfid, static void s2s_common_teardown(vrfid_t vrfid, struct dp_test_crypto_policy *ipolicy, struct dp_test_crypto_policy *opolicy, + uint8_t nipols, uint8_t nopols, enum vfp_presence with_vfp, enum vrf_and_xfrm_order out_of_order) { + struct dp_test_crypto_policy *ipol, *opol; + int i; + + if (out_of_order == VRF_XFRM_OUT_OF_ORDER) { + /* + * Tear down the vrf first, this should cause + * a flush of all the ipsec state. + */ + s2s_teardown_interfaces(vrfid, with_vfp); + return; + } + + /* If no policies were supplied use defaults */ + ipol = ipolicy ? ipolicy : &input_policy; + opol = opolicy ? opolicy : &output_policy; + if (!ipolicy) + nipols = 1; + if (!opolicy) + nopols = 1; + dp_test_crypto_delete_sa(&input_sa); dp_test_crypto_delete_sa(&output_sa); - dp_test_crypto_delete_policy(ipolicy ? ipolicy : &input_policy); - dp_test_crypto_delete_policy(opolicy ? opolicy : &output_policy); + for (i = 0; i < nipols; i++) + dp_test_crypto_delete_policy(&ipol[i]); + + for (i = 0; i < nopols; i++) + dp_test_crypto_delete_policy(&opol[i]); /*************************************************** * Tear down topology */ s2s_teardown_interfaces(vrfid, with_vfp); + dp_test_npf_cleanup(); } static void s2s_common_teardown6(vrfid_t vrfid, struct dp_test_crypto_policy *ipolicy, struct dp_test_crypto_policy *opolicy, + uint8_t nipols, uint8_t nopols, enum vfp_presence with_vfp) { + struct dp_test_crypto_policy *ipol, *opol; + int i; + + /* If no policies were supplied use defaults */ + ipol = ipolicy ? ipolicy : &input_policy6; + opol = opolicy ? opolicy : &output_policy6; + if (!ipolicy) + nipols = 1; + if (!opolicy) + nopols = 1; + dp_test_crypto_delete_sa(&input_sa6); dp_test_crypto_delete_sa(&output_sa6); - dp_test_crypto_delete_policy(ipolicy ? ipolicy : &input_policy6); - dp_test_crypto_delete_policy(opolicy ? opolicy : &output_policy6); + for (i = 0; i < nipols; i++) + dp_test_crypto_delete_policy(&ipol[i]); + + for (i = 0; i < nopols; i++) + dp_test_crypto_delete_policy(&opol[i]); + /*************************************************** * Tear down topology */ s2s_teardown_interfaces6(vrfid, with_vfp); + dp_test_npf_cleanup(); } static void _build_pak_and_expected_encrypt(struct rte_mbuf **ping_pkt_p, @@ -1045,7 +1093,7 @@ static void _build_pak_and_expected_encrypt(struct rte_mbuf **ping_pkt_p, /* Fixup checksum too, bytes 11,12*/ expected_payload[11] = 0; expected_payload[12] = 0; - cksum = in_cksum_hdr((struct iphdr *)&expected_payload[0]); + cksum = dp_in_cksum_hdr((struct iphdr *)&expected_payload[0]); *((uint16_t *)&expected_payload[11]) = htons(cksum); } else { ping_pkt = build_input_packet6(local, remote); @@ -1062,7 +1110,8 @@ static void _build_pak_and_expected_encrypt(struct rte_mbuf **ping_pkt_p, dp_test_intf_name2mac_str(rx_intf), NULL, inner_addr.family == AF_INET ? - ETHER_TYPE_IPv4 : ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV4 : + RTE_ETHER_TYPE_IPV6); /* * Construct the expected encrypted packet. If src/dst are v4 @@ -1102,7 +1151,8 @@ static void _build_pak_and_expected_encrypt(struct rte_mbuf **ping_pkt_p, PEER_MAC_ADDR, dp_test_intf_name2mac_str(tx_intf), outer_addr.family == AF_INET ? - ETHER_TYPE_IPv4 : ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV4 : + RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(encrypted_pkt); rte_pktmbuf_free(encrypted_pkt); @@ -1156,7 +1206,8 @@ static void _build_pak_and_expected_decrypt(struct rte_mbuf **enc_pkt_p, (void)dp_test_pktmbuf_eth_init(expected_pkt, CLIENT_LOCAL_MAC_ADDR, dp_test_intf_name2mac_str(tx_intf), inner_addr.family == AF_INET ? - ETHER_TYPE_IPv4 : ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV4 : + RTE_ETHER_TYPE_IPV6); /* * Construct the expected encrypted packet. If src/dst are v4 @@ -1190,7 +1241,8 @@ static void _build_pak_and_expected_decrypt(struct rte_mbuf **enc_pkt_p, dp_test_intf_name2mac_str(rx_intf), PEER_MAC_ADDR, outer_addr.family == AF_INET ? - ETHER_TYPE_IPv4 : ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV4 : + RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(expected_pkt); rte_pktmbuf_free(expected_pkt); @@ -1210,7 +1262,6 @@ static void _build_pak_and_expected_decrypt(struct rte_mbuf **enc_pkt_p, transport_vrf, __FILE__, \ __func__, __LINE__) - static void null_encrypt_transport_main(vrfid_t vrfid) { /* @@ -1236,7 +1287,7 @@ static void null_encrypt_transport_main(vrfid_t vrfid) int payload_len; s2s_common_setup(vrfid, CRYPTO_CIPHER_NULL, CRYPTO_AUTH_NULL, - NULL, NULL, + NULL, NULL, 0, 0, XFRM_MODE_TRANSPORT, VFP_FALSE, VRF_XFRM_IN_ORDER); /* @@ -1245,13 +1296,13 @@ static void null_encrypt_transport_main(vrfid_t vrfid) ping_pkt = build_input_packet(CLIENT_LOCAL, CLIENT_REMOTE); (void)dp_test_pktmbuf_eth_init(ping_pkt, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); dp_test_set_pak_ip_field(iphdr(ping_pkt), DP_TEST_SET_PROTOCOL, 224); /* * Construct the expected encrypted packet */ - trans_mode_hdr = pktmbuf_mtol3(ping_pkt, struct iphdr *); + trans_mode_hdr = dp_pktmbuf_mtol3(ping_pkt, struct iphdr *); payload_len = sizeof(expected_payload); encrypted_pkt = dp_test_create_esp_ipv4_pak(PORT_EAST, PEER, 1, &payload_len, @@ -1267,7 +1318,7 @@ static void null_encrypt_transport_main(vrfid_t vrfid) (void)dp_test_pktmbuf_eth_init(encrypted_pkt, PEER_MAC_ADDR, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(encrypted_pkt); rte_pktmbuf_free(encrypted_pkt); @@ -1277,7 +1328,8 @@ static void null_encrypt_transport_main(vrfid_t vrfid) dp_test_pak_receive(ping_pkt, "dp1T1", exp); dp_test_crypto_check_sad_packets(vrfid, 1, 64); - s2s_common_teardown(vrfid, NULL, NULL, VFP_FALSE, VRF_XFRM_IN_ORDER); + s2s_common_teardown(vrfid, NULL, NULL, 0, 0, + VFP_FALSE, VRF_XFRM_IN_ORDER); } static void encrypt_aesgcm_main(vrfid_t vrfid) @@ -1308,7 +1360,7 @@ static void encrypt_aesgcm_main(vrfid_t vrfid) s2s_common_setup(vrfid, CRYPTO_CIPHER_AES128GCM, CRYPTO_AUTH_HMAC_SHA1, - NULL, NULL, + NULL, NULL, 0, 0, XFRM_MODE_TUNNEL, VFP_FALSE, VRF_XFRM_IN_ORDER); /* @@ -1317,7 +1369,7 @@ static void encrypt_aesgcm_main(vrfid_t vrfid) ping_pkt = build_input_packet(CLIENT_LOCAL, CLIENT_REMOTE); (void)dp_test_pktmbuf_eth_init(ping_pkt, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* * Construct the expected encrypted packet @@ -1337,7 +1389,7 @@ static void encrypt_aesgcm_main(vrfid_t vrfid) (void)dp_test_pktmbuf_eth_init(encrypted_pkt, PEER_MAC_ADDR, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(encrypted_pkt); rte_pktmbuf_free(encrypted_pkt); @@ -1370,7 +1422,8 @@ static void encrypt_aesgcm_main(vrfid_t vrfid) dp_test_assert_internal(stats_dp2T2.ifi_idropped == 0); dp_test_assert_internal(ifi_odropped(&stats_dp2T2) == 0); - s2s_common_teardown(vrfid, NULL, NULL, VFP_FALSE, VRF_XFRM_IN_ORDER); + s2s_common_teardown(vrfid, NULL, NULL, 0, 0, + VFP_FALSE, VRF_XFRM_IN_ORDER); } static void encrypt_main(vrfid_t vrfid, enum vrf_and_xfrm_order out_of_order) @@ -1402,16 +1455,21 @@ static void encrypt_main(vrfid_t vrfid, enum vrf_and_xfrm_order out_of_order) s2s_common_setup(vrfid, CRYPTO_CIPHER_AES_CBC, CRYPTO_AUTH_HMAC_SHA1, - NULL, NULL, + NULL, NULL, 0, 0, XFRM_MODE_TUNNEL, VFP_FALSE, out_of_order); + if (out_of_order) { + s2s_common_teardown(vrfid, NULL, NULL, 0, 0, + VFP_FALSE, out_of_order); + return; + } /* * Construct the input ICMP ping packet. */ ping_pkt = build_input_packet(CLIENT_LOCAL, CLIENT_REMOTE); (void)dp_test_pktmbuf_eth_init(ping_pkt, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* * Construct the expected encrypted packet @@ -1431,7 +1489,7 @@ static void encrypt_main(vrfid_t vrfid, enum vrf_and_xfrm_order out_of_order) (void)dp_test_pktmbuf_eth_init(encrypted_pkt, PEER_MAC_ADDR, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(encrypted_pkt); rte_pktmbuf_free(encrypted_pkt); @@ -1464,7 +1522,8 @@ static void encrypt_main(vrfid_t vrfid, enum vrf_and_xfrm_order out_of_order) dp_test_assert_internal(stats_dp2T2.ifi_idropped == 0); dp_test_assert_internal(ifi_odropped(&stats_dp2T2) == 0); - s2s_common_teardown(vrfid, NULL, NULL, VFP_FALSE, out_of_order); + s2s_common_teardown(vrfid, NULL, NULL, 0, 0, + VFP_FALSE, out_of_order); } static void encrypt6_main(vrfid_t vrfid) @@ -1498,7 +1557,7 @@ static void encrypt6_main(vrfid_t vrfid) s2s_common_setup6(vrfid, CRYPTO_CIPHER_AES_CBC, CRYPTO_AUTH_HMAC_SHA1, - NULL, NULL, + NULL, NULL, 0, 0, XFRM_MODE_TUNNEL, VFP_FALSE); /* @@ -1509,7 +1568,7 @@ static void encrypt6_main(vrfid_t vrfid) (void)dp_test_pktmbuf_eth_init(ping_pkt, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); /* * Construct the expected encrypted packet @@ -1528,7 +1587,7 @@ static void encrypt6_main(vrfid_t vrfid) (void)dp_test_pktmbuf_eth_init(encrypted_pkt, PEER_MAC_ADDR, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(encrypted_pkt); rte_pktmbuf_free(encrypted_pkt); @@ -1561,7 +1620,7 @@ static void encrypt6_main(vrfid_t vrfid) dp_test_assert_internal(stats_dp2T2.ifi_idropped == 0); dp_test_assert_internal(ifi_odropped(&stats_dp2T2) == 0); - s2s_common_teardown6(vrfid, NULL, NULL, VFP_FALSE); + s2s_common_teardown6(vrfid, NULL, NULL, 0, 0, VFP_FALSE); } static void bad_hash_algorithm_main(vrfid_t vrfid) @@ -1572,13 +1631,14 @@ static void bad_hash_algorithm_main(vrfid_t vrfid) s2s_common_setup(vrfid, CRYPTO_CIPHER_AES_CBC, CRYPTO_AUTH_HMAC_XCBC, - NULL, NULL, + NULL, NULL, 0, 0, XFRM_MODE_TUNNEL, VFP_FALSE, VRF_XFRM_IN_ORDER); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); dp_test_pak_receive(ping, "dp1T1", exp); - s2s_common_teardown(vrfid, NULL, NULL, VFP_FALSE, VRF_XFRM_IN_ORDER); + s2s_common_teardown(vrfid, NULL, NULL, 0, 0, + VFP_FALSE, VRF_XFRM_IN_ORDER); } static void bad_hash_algorithm6_main(vrfid_t vrfid) @@ -1589,13 +1649,13 @@ static void bad_hash_algorithm6_main(vrfid_t vrfid) s2s_common_setup6(vrfid, CRYPTO_CIPHER_AES_CBC, CRYPTO_AUTH_HMAC_XCBC, - NULL, NULL, + NULL, NULL, 0, 0, XFRM_MODE_TUNNEL, VFP_FALSE); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); dp_test_pak_receive(ping, "dp1T1", exp); - s2s_common_teardown6(vrfid, NULL, NULL, VFP_FALSE); + s2s_common_teardown6(vrfid, NULL, NULL, 0, 0, VFP_FALSE); } static void null_encrypt_main(vrfid_t vrfid, enum vfp_presence with_vfp) @@ -1627,7 +1687,7 @@ static void null_encrypt_main(vrfid_t vrfid, enum vfp_presence with_vfp) sizeof(payload_v4_icmp_null_enc)); s2s_common_setup(vrfid, CRYPTO_CIPHER_NULL, CRYPTO_AUTH_NULL, - NULL, NULL, + NULL, NULL, 0, 0, XFRM_MODE_TUNNEL, with_vfp, VRF_XFRM_IN_ORDER); /* @@ -1636,7 +1696,7 @@ static void null_encrypt_main(vrfid_t vrfid, enum vfp_presence with_vfp) ping_pkt = build_input_packet(CLIENT_LOCAL, CLIENT_REMOTE); (void)dp_test_pktmbuf_eth_init(ping_pkt, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* * Construct the expected encrypted packet @@ -1656,7 +1716,7 @@ static void null_encrypt_main(vrfid_t vrfid, enum vfp_presence with_vfp) (void)dp_test_pktmbuf_eth_init(encrypted_pkt, PEER_MAC_ADDR, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(encrypted_pkt); rte_pktmbuf_free(encrypted_pkt); @@ -1669,7 +1729,8 @@ static void null_encrypt_main(vrfid_t vrfid, enum vfp_presence with_vfp) dp_test_check_state_show("ifconfig vfp1", "tx_packets\": 1", false); - s2s_common_teardown(vrfid, NULL, NULL, with_vfp, VRF_XFRM_IN_ORDER); + s2s_common_teardown(vrfid, NULL, NULL, 0, 0, + with_vfp, VRF_XFRM_IN_ORDER); } static void null_encrypt6_transport_main(vrfid_t vrfid) @@ -1695,7 +1756,7 @@ static void null_encrypt6_transport_main(vrfid_t vrfid) int payload_len; s2s_common_setup6(vrfid, CRYPTO_CIPHER_NULL, CRYPTO_AUTH_NULL, - NULL, NULL, + NULL, NULL, 0, 0, XFRM_MODE_TRANSPORT, VFP_FALSE); /* @@ -1704,12 +1765,12 @@ static void null_encrypt6_transport_main(vrfid_t vrfid) ping_pkt = build_input_packet6(CLIENT_LOCAL6, CLIENT_REMOTE6); (void)dp_test_pktmbuf_eth_init(ping_pkt, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); /* * Construct the expected encrypted packet */ - trans_mode_hdr = pktmbuf_mtol3(ping_pkt, struct ip6_hdr *); + trans_mode_hdr = dp_pktmbuf_mtol3(ping_pkt, struct ip6_hdr *); payload_len = sizeof(expected_payload); encrypted_pkt = dp_test_create_esp_ipv6_pak(PORT_EAST6, PEER6, 1, &payload_len, @@ -1723,7 +1784,7 @@ static void null_encrypt6_transport_main(vrfid_t vrfid) (void)dp_test_pktmbuf_eth_init(encrypted_pkt, PEER_MAC_ADDR, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(encrypted_pkt); rte_pktmbuf_free(encrypted_pkt); @@ -1733,7 +1794,7 @@ static void null_encrypt6_transport_main(vrfid_t vrfid) dp_test_pak_receive(ping_pkt, "dp1T1", exp); dp_test_crypto_check_sad_packets(vrfid, 1, 64); - s2s_common_teardown6(vrfid, NULL, NULL, VFP_FALSE); + s2s_common_teardown6(vrfid, NULL, NULL, 0, 0, VFP_FALSE); } static void null_encrypt6_main(vrfid_t vrfid, enum vfp_presence with_vfp) @@ -1782,7 +1843,7 @@ static void null_encrypt6_main(vrfid_t vrfid, enum vfp_presence with_vfp) int payload_len; s2s_common_setup6(vrfid, CRYPTO_CIPHER_NULL, CRYPTO_AUTH_NULL, - NULL, NULL, + NULL, NULL, 0, 0, XFRM_MODE_TUNNEL, with_vfp); /* @@ -1791,7 +1852,7 @@ static void null_encrypt6_main(vrfid_t vrfid, enum vfp_presence with_vfp) ping_pkt = build_input_packet6(CLIENT_LOCAL6, CLIENT_REMOTE6); (void)dp_test_pktmbuf_eth_init(ping_pkt, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); /* * Construct the expected encrypted packet @@ -1809,7 +1870,7 @@ static void null_encrypt6_main(vrfid_t vrfid, enum vfp_presence with_vfp) (void)dp_test_pktmbuf_eth_init(encrypted_pkt, PEER_MAC_ADDR, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(encrypted_pkt); rte_pktmbuf_free(encrypted_pkt); @@ -1823,7 +1884,7 @@ static void null_encrypt6_main(vrfid_t vrfid, enum vfp_presence with_vfp) dp_test_check_state_show("ifconfig vfp1", "tx_packets\": 1", false); - s2s_common_teardown6(vrfid, NULL, NULL, with_vfp); + s2s_common_teardown6(vrfid, NULL, NULL, 0, 0, with_vfp); } static void s2s_toobig6_main(vrfid_t vrfid) @@ -1838,7 +1899,7 @@ static void s2s_toobig6_main(vrfid_t vrfid) s2s_common_setup6(vrfid, CRYPTO_CIPHER_AES_CBC, CRYPTO_AUTH_HMAC_SHA1, - NULL, NULL, + NULL, NULL, 0, 0, XFRM_MODE_TUNNEL, VFP_FALSE); /* @@ -1847,7 +1908,7 @@ static void s2s_toobig6_main(vrfid_t vrfid) test_pak = dp_test_create_ipv6_pak(CLIENT_LOCAL6, CLIENT_REMOTE6, 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), - CLIENT_LOCAL_MAC_ADDR, ETHER_TYPE_IPv6); + CLIENT_LOCAL_MAC_ADDR, RTE_ETHER_TYPE_IPV6); /* * Expected ICMP response @@ -1868,7 +1929,7 @@ static void s2s_toobig6_main(vrfid_t vrfid) (void)dp_test_pktmbuf_eth_init(icmp_pak, CLIENT_LOCAL_MAC_ADDR, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); icmp6->icmp6_cksum = 0; icmp6->icmp6_cksum = dp_test_ipv6_icmp_cksum(icmp_pak, ip6, icmp6); @@ -1880,7 +1941,7 @@ static void s2s_toobig6_main(vrfid_t vrfid) /* now send test pak and check we get expected back */ dp_test_pak_receive(test_pak, "dp1T1", exp); - s2s_common_teardown6(vrfid, NULL, NULL, VFP_FALSE); + s2s_common_teardown6(vrfid, NULL, NULL, 0, 0, VFP_FALSE); } static void null_decrypt_main(vrfid_t vrfid, enum inner_validity valid) @@ -1893,7 +1954,7 @@ static void null_decrypt_main(vrfid_t vrfid, enum inner_validity valid) int payload_len; s2s_common_setup(vrfid, CRYPTO_CIPHER_NULL, CRYPTO_AUTH_NULL, - NULL, NULL, + NULL, NULL, 0, 0, XFRM_MODE_TUNNEL, VFP_FALSE, VRF_XFRM_IN_ORDER); /* @@ -1905,8 +1966,8 @@ static void null_decrypt_main(vrfid_t vrfid, enum inner_validity valid) dp_test_pktmbuf_eth_init(expected_pkt, dp_test_intf_name2mac_str("dp2T2"), - "10:00:00:00:00:00", - ETHER_TYPE_IPv4); + PEER_MAC_ADDR, + RTE_ETHER_TYPE_IPV4); } else { expected_pkt = build_input_packet(CLIENT_REMOTE, CLIENT_LOCAL); @@ -1916,7 +1977,7 @@ static void null_decrypt_main(vrfid_t vrfid, enum inner_validity valid) dp_test_pktmbuf_eth_init(expected_pkt, CLIENT_LOCAL_MAC_ADDR, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); } /* @@ -1955,7 +2016,7 @@ static void null_decrypt_main(vrfid_t vrfid, enum inner_validity valid) (void)dp_test_pktmbuf_eth_init(encrypted_pkt, dp_test_intf_name2mac_str("dp2T2"), PEER_MAC_ADDR, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(expected_pkt); rte_pktmbuf_free(expected_pkt); @@ -1999,7 +2060,8 @@ static void null_decrypt_main(vrfid_t vrfid, enum inner_validity valid) dp_test_assert_internal(stats_dp2T2.ifi_idropped == 0); dp_test_assert_internal(ifi_odropped(&stats_dp2T2) == 0); - s2s_common_teardown(vrfid, NULL, NULL, VFP_FALSE, VRF_XFRM_IN_ORDER); + s2s_common_teardown(vrfid, NULL, NULL, 0, 0, + VFP_FALSE, VRF_XFRM_IN_ORDER); } static void null_decrypt_main6(vrfid_t vrfid, enum inner_validity valid) @@ -2012,7 +2074,7 @@ static void null_decrypt_main6(vrfid_t vrfid, enum inner_validity valid) int payload_len; s2s_common_setup6(vrfid, CRYPTO_CIPHER_NULL, CRYPTO_AUTH_NULL, - NULL, NULL, + NULL, NULL, 0, 0, XFRM_MODE_TUNNEL, VFP_FALSE); if (valid == INNER_LOCAL) { @@ -2020,8 +2082,8 @@ static void null_decrypt_main6(vrfid_t vrfid, enum inner_validity valid) dp_test_pktmbuf_eth_init(expected_pkt, dp_test_intf_name2mac_str("dp2T2"), - "10:00:00:00:00:00", - ETHER_TYPE_IPv6); + PEER_MAC_ADDR, + RTE_ETHER_TYPE_IPV6); } else { /* * Construct the output ICMP ping packet. We need to reduce @@ -2033,7 +2095,7 @@ static void null_decrypt_main6(vrfid_t vrfid, enum inner_validity valid) dp_test_pktmbuf_eth_init(expected_pkt, CLIENT_LOCAL_MAC_ADDR, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); } /* @@ -2069,7 +2131,7 @@ static void null_decrypt_main6(vrfid_t vrfid, enum inner_validity valid) (void)dp_test_pktmbuf_eth_init(encrypted_pkt, dp_test_intf_name2mac_str("dp2T2"), PEER_MAC_ADDR, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); rte_pktmbuf_trim(expected_pkt, 8); if (valid != INNER_LOCAL) @@ -2118,7 +2180,7 @@ static void null_decrypt_main6(vrfid_t vrfid, enum inner_validity valid) dp_test_assert_internal(stats_dp2T2.ifi_idropped == 0); dp_test_assert_internal(ifi_odropped(&stats_dp2T2) == 0); - s2s_common_teardown6(vrfid, NULL, NULL, VFP_FALSE); + s2s_common_teardown6(vrfid, NULL, NULL, 0, 0, VFP_FALSE); } static void @@ -2129,6 +2191,7 @@ test_plaintext_packet_matching_input_policy(vrfid_t vrfid, struct if_data *exp_stats_ifin, struct dp_test_crypto_policy *ipol, struct dp_test_crypto_policy *opol, + uint8_t nipols, uint8_t nopols, const char *saddr, const char *daddr, uint16_t udp_port, @@ -2145,14 +2208,14 @@ test_plaintext_packet_matching_input_policy(vrfid_t vrfid, s2s_common_setup(vrfid, CRYPTO_CIPHER_AES_CBC, CRYPTO_AUTH_HMAC_SHA1, - ipol, opol, + ipol, opol, nipols, nopols, XFRM_MODE_TUNNEL, VFP_FALSE, VRF_XFRM_IN_ORDER); pkt = dp_test_create_udp_ipv4_pak(saddr, daddr, udp_port, udp_port, 1, &len); (void)dp_test_pktmbuf_eth_init(pkt, dp_test_intf_name2mac_str(ifin), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* * The packet should be dropped because it is received in @@ -2191,7 +2254,8 @@ test_plaintext_packet_matching_input_policy(vrfid_t vrfid, inp2 = dp_test_get_vrf_stat(vrfid, AF_INET, IPSTATS_MIB_INPKTS); dp_test_verify_vrf_stats(inp, inp2, dis, dis2, del, del2, exp_status); - s2s_common_teardown(vrfid, ipol, opol, VFP_FALSE, VRF_XFRM_IN_ORDER); + s2s_common_teardown(vrfid, ipol, opol, nipols, nopols, + VFP_FALSE, VRF_XFRM_IN_ORDER); } static void drop_plaintext_packet_matching_input_policy_main(vrfid_t vrfid) @@ -2205,7 +2269,7 @@ static void drop_plaintext_packet_matching_input_policy_main(vrfid_t vrfid) "dp1T1", "dp2T2", &exp_stats_ifout, &exp_stats_ifin, - NULL, NULL, + NULL, NULL, 0, 0, CLIENT_REMOTE, CLIENT_LOCAL, 0, @@ -2235,7 +2299,7 @@ static void drop_plaintext_local_pkt_match_inpolicy(vrfid_t vrfid) "dp1T1", "dp2T2", &exp_stats_ifout, &exp_stats_ifin, - &my_ipol, &my_opol, + &my_ipol, &my_opol, 1, 1, CLIENT_REMOTE, PORT_EAST, 0, @@ -2251,7 +2315,7 @@ static void drop_plaintext_local_pkt_match_inpolicy(vrfid_t vrfid) "dp1T1", "dp2T2", &exp_stats_ifout, &exp_stats_ifin, - &my_ipol, &my_opol, + &my_ipol, &my_opol, 1, 1, CLIENT_REMOTE, PORT_EAST, 500, @@ -2280,7 +2344,7 @@ static void rx_plaintext_local_pkt_notmatch_inpolicy(vrfid_t vrfid) "dp2T2", "dp1T1", &exp_stats_ifout, &exp_stats_ifin, - &my_ipol, &my_opol, + &my_ipol, &my_opol, 1, 1, CLIENT_REMOTE, PORT_WEST, 0, @@ -2295,6 +2359,7 @@ test_plaintext_packet_matching_input_policy6(vrfid_t vrfid, struct if_data *exp_stats_ifin, struct dp_test_crypto_policy *ipol, struct dp_test_crypto_policy *opol, + uint8_t nipols, uint8_t nopols, const char *saddr, const char *daddr, uint16_t udp_port, @@ -2310,7 +2375,7 @@ test_plaintext_packet_matching_input_policy6(vrfid_t vrfid, s2s_common_setup6(vrfid, CRYPTO_CIPHER_AES_CBC, CRYPTO_AUTH_HMAC_SHA1, - ipol, opol, + ipol, opol, nipols, nopols, XFRM_MODE_TUNNEL, VFP_FALSE); pkt = dp_test_create_udp_ipv6_pak(saddr, daddr, udp_port, udp_port, @@ -2318,7 +2383,7 @@ test_plaintext_packet_matching_input_policy6(vrfid_t vrfid, dp_test_assert_internal(pkt != NULL); (void)dp_test_pktmbuf_eth_init(pkt, dp_test_intf_name2mac_str(ifin), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); /* * The packet may be dropped because if it is received in @@ -2355,7 +2420,7 @@ test_plaintext_packet_matching_input_policy6(vrfid_t vrfid, inp2 = dp_test_get_vrf_stat(vrfid, AF_INET6, IPSTATS_MIB_INPKTS); dp_test_verify_vrf_stats(inp, inp2, dis, dis2, del, del2, exp_status); - s2s_common_teardown6(vrfid, ipol, opol, VFP_FALSE); + s2s_common_teardown6(vrfid, ipol, opol, nipols, nopols, VFP_FALSE); } static void drop_plaintext_packet_matching_input_policy6_main(vrfid_t vrfid) @@ -2369,7 +2434,7 @@ static void drop_plaintext_packet_matching_input_policy6_main(vrfid_t vrfid) "dp1T1", "dp2T2", &exp_stats_ifout, &exp_stats_ifin, - NULL, NULL, + NULL, NULL, 0, 0, CLIENT_REMOTE6, CLIENT_LOCAL6, 0, @@ -2399,7 +2464,7 @@ static void drop_plaintext_local_pkt_match_inpolicy6(vrfid_t vrfid) "dp1T1", "dp2T2", &exp_stats_ifout, &exp_stats_ifin, - &my_ipol, &my_opol, + &my_ipol, &my_opol, 1, 1, CLIENT_REMOTE6, PORT_EAST6, 0, @@ -2415,7 +2480,7 @@ static void drop_plaintext_local_pkt_match_inpolicy6(vrfid_t vrfid) "dp1T1", "dp2T2", &exp_stats_ifout, &exp_stats_ifin, - &my_ipol, &my_opol, + &my_ipol, &my_opol, 1, 1, CLIENT_REMOTE6, PORT_EAST6, 500, @@ -2444,7 +2509,159 @@ static void rx_plaintext_local_pkt_notmatch_inpolicy6(vrfid_t vrfid) "dp2T2", "dp1T1", &exp_stats_ifout, &exp_stats_ifin, - &my_ipol, &my_opol, + &my_ipol, &my_opol, 1, 1, + CLIENT_REMOTE6, + PORT_WEST6, + 0, + DP_TEST_FWD_LOCAL); +} + +static void rx_match_policy_proto(vrfid_t vrfid) +{ + struct if_data exp_stats_ifout = {0}, exp_stats_ifin = {0}; + + struct dp_test_crypto_policy my_opol = output_policy; + + exp_stats_ifin.ifi_ipackets = 1; + + /* + * Add multiple policies to verify that we don't wrongly + * match a policy with the wrong protocol. + */ + static struct dp_test_crypto_policy my_ipol[3] = { + { + .d_prefix = NETWORK_WEST, + .s_prefix = NETWORK_REMOTE, + .proto = IPPROTO_UDP - 1, + .dst = PORT_EAST, + .dst_family = AF_INET, + .dir = XFRM_POLICY_IN, + .family = AF_INET, + .reqid = TUNNEL_REQID, + .priority = RULE_PRIORITY, + .rule_no = 1, + .mark = 0, + .vrfid = VRF_DEFAULT_ID, + .action = XFRM_POLICY_BLOCK, + }, + { + .d_prefix = NETWORK_WEST, + .s_prefix = NETWORK_REMOTE, + .proto = IPPROTO_UDP, + .dst = PORT_EAST, + .dst_family = AF_INET, + .dir = XFRM_POLICY_IN, + .family = AF_INET, + .reqid = TUNNEL_REQID, + .priority = RULE_PRIORITY, + .rule_no = 2, + .mark = 0, + .vrfid = VRF_DEFAULT_ID, + .action = XFRM_POLICY_ALLOW, + .passthrough = true + }, + { + .d_prefix = NETWORK_WEST, + .s_prefix = NETWORK_REMOTE, + .proto = IPPROTO_UDP + 1, + .dst = PORT_EAST, + .dst_family = AF_INET, + .dir = XFRM_POLICY_IN, + .family = AF_INET, + .reqid = TUNNEL_REQID, + .priority = RULE_PRIORITY, + .rule_no = 3, + .mark = 0, + .vrfid = VRF_DEFAULT_ID, + .action = XFRM_POLICY_BLOCK, + }, + }; + + my_opol.proto = IPPROTO_TCP; + my_opol.s_prefix = NETWORK_WEST; + my_opol.d_prefix = NETWORK_REMOTE; + + test_plaintext_packet_matching_input_policy(vrfid, + "dp2T2", "dp1T1", + &exp_stats_ifout, + &exp_stats_ifin, + my_ipol, &my_opol, 3, 1, + CLIENT_REMOTE, + PORT_WEST, + 0, + DP_TEST_FWD_LOCAL); +} + +static void rx_match_policy_proto6(vrfid_t vrfid) +{ + struct if_data exp_stats_ifout = {0}, exp_stats_ifin = {0}; + + struct dp_test_crypto_policy my_opol = output_policy6; + + exp_stats_ifin.ifi_ipackets = 1; + + /* + * Add multiple policies to verify that we don't wrongly + * match a policy with the wrong protocol. + */ + static struct dp_test_crypto_policy my_ipol[3] = { + { + .d_prefix = NETWORK_WEST6, + .s_prefix = NETWORK_REMOTE6, + .proto = IPPROTO_UDP - 1, + .dst = PORT_EAST6, + .dst_family = AF_INET6, + .dir = XFRM_POLICY_IN, + .family = AF_INET6, + .reqid = TUNNEL_REQID, + .priority = RULE_PRIORITY, + .rule_no = 1, + .mark = 0, + .vrfid = VRF_DEFAULT_ID, + .action = XFRM_POLICY_BLOCK, + }, + { + .d_prefix = NETWORK_WEST6, + .s_prefix = NETWORK_REMOTE6, + .proto = IPPROTO_UDP, + .dst = PORT_EAST6, + .dst_family = AF_INET6, + .dir = XFRM_POLICY_IN, + .family = AF_INET6, + .reqid = TUNNEL_REQID, + .priority = RULE_PRIORITY, + .rule_no = 2, + .mark = 0, + .vrfid = VRF_DEFAULT_ID, + .action = XFRM_POLICY_ALLOW, + .passthrough = true + }, + { + .d_prefix = NETWORK_WEST6, + .s_prefix = NETWORK_REMOTE6, + .proto = IPPROTO_UDP + 1, + .dst = PORT_EAST6, + .dst_family = AF_INET6, + .dir = XFRM_POLICY_IN, + .family = AF_INET6, + .reqid = TUNNEL_REQID, + .priority = RULE_PRIORITY, + .rule_no = 3, + .mark = 0, + .vrfid = VRF_DEFAULT_ID, + .action = XFRM_POLICY_BLOCK, + }, + }; + + my_opol.proto = IPPROTO_TCP; + my_opol.s_prefix = NETWORK_WEST6; + my_opol.d_prefix = NETWORK_REMOTE6; + + test_plaintext_packet_matching_input_policy6(vrfid, + "dp2T2", "dp1T1", + &exp_stats_ifout, + &exp_stats_ifin, + my_ipol, &my_opol, 3, 1, CLIENT_REMOTE6, PORT_WEST6, 0, @@ -2465,37 +2682,37 @@ DP_DECL_TEST_CASE(site_to_site_suite, encryption, NULL, NULL); * "encrypt" a packet using null encryption and null authentication * in transport mode. */ -DP_START_TEST(encryption, null_encrypt_transport) +DP_START_TEST_FULL_RUN(encryption, null_encrypt_transport) { null_encrypt_transport_main(VRF_DEFAULT_ID); } DP_END_TEST; -DP_START_TEST(encryption, null_encrypt_transport_vrf) +DP_START_TEST_FULL_RUN(encryption, null_encrypt_transport_vrf) { null_encrypt_transport_main(TEST_VRF); } DP_END_TEST; -DP_START_TEST(encryption, encrypt_aesgcm) +DP_START_TEST_FULL_RUN(encryption, encrypt_aesgcm) { encrypt_aesgcm_main(VRF_DEFAULT_ID); } DP_END_TEST; -DP_START_TEST(encryption, encrypt_aesgcm_vrf) +DP_START_TEST_FULL_RUN(encryption, encrypt_aesgcm_vrf) { encrypt_aesgcm_main(TEST_VRF); } DP_END_TEST; -DP_START_TEST(encryption, encrypt) +DP_START_TEST_FULL_RUN(encryption, encrypt) { encrypt_main(VRF_DEFAULT_ID, VRF_XFRM_IN_ORDER); } DP_END_TEST; -DP_START_TEST(encryption, encrypt_vrf) +DP_START_TEST_FULL_RUN(encryption, encrypt_vrf) { encrypt_main(TEST_VRF, VRF_XFRM_IN_ORDER); } DP_END_TEST; -DP_START_TEST(encryption, encrypt_vrf_out_of_order) +DP_START_TEST_FULL_RUN(encryption, encrypt_vrf_out_of_order) { encrypt_main(TEST_VRF, VRF_XFRM_OUT_OF_ORDER); } DP_END_TEST; @@ -2505,29 +2722,29 @@ DP_START_TEST(encryption, encrypt6) encrypt6_main(VRF_DEFAULT_ID); } DP_END_TEST; -DP_START_TEST(encryption, encrypt6_vrf) +DP_START_TEST_FULL_RUN(encryption, encrypt6_vrf) { encrypt6_main(TEST_VRF); } DP_END_TEST; /* test that an SA with an unrecognised algorithm will block traffic */ -DP_START_TEST(encryption, bad_hash_algorithm) +DP_START_TEST_FULL_RUN(encryption, bad_hash_algorithm) { bad_hash_algorithm_main(VRF_DEFAULT_ID); } DP_END_TEST; -DP_START_TEST(encryption, bad_hash_algorithm_vrf) +DP_START_TEST_FULL_RUN(encryption, bad_hash_algorithm_vrf) { bad_hash_algorithm_main(TEST_VRF); } DP_END_TEST; /* test that an SA with an unrecognised algorithm will block traffic */ -DP_START_TEST(encryption, bad_hash_algorithm6) +DP_START_TEST_FULL_RUN(encryption, bad_hash_algorithm6) { bad_hash_algorithm6_main(VRF_DEFAULT_ID); } DP_END_TEST; -DP_START_TEST(encryption, bad_hash_algorithm6_vrf) +DP_START_TEST_FULL_RUN(encryption, bad_hash_algorithm6_vrf) { bad_hash_algorithm6_main(TEST_VRF); } DP_END_TEST; @@ -2537,93 +2754,93 @@ DP_START_TEST(encryption, bad_hash_algorithm6_vrf) * * "encrypt" a packet using null encryption and null authentication. */ -DP_START_TEST(encryption, null_encrypt) +DP_START_TEST_FULL_RUN(encryption, null_encrypt) { null_encrypt_main(VRF_DEFAULT_ID, VFP_FALSE); } DP_END_TEST; -DP_START_TEST(encryption, null_encrypt_vfp) +DP_START_TEST_FULL_RUN(encryption, null_encrypt_vfp) { null_encrypt_main(VRF_DEFAULT_ID, VFP_TRUE); } DP_END_TEST; -DP_START_TEST(encryption, null_encrypt_vrf) +DP_START_TEST_FULL_RUN(encryption, null_encrypt_vrf) { null_encrypt_main(TEST_VRF, VFP_FALSE); } DP_END_TEST; -DP_START_TEST(encryption, null_encrypt6_transport) +DP_START_TEST_FULL_RUN(encryption, null_encrypt6_transport) { null_encrypt6_transport_main(VRF_DEFAULT_ID); } DP_END_TEST; -DP_START_TEST(encryption, null_encrypt6_transport_vrf) +DP_START_TEST_FULL_RUN(encryption, null_encrypt6_transport_vrf) { null_encrypt6_transport_main(TEST_VRF); } DP_END_TEST; -DP_START_TEST(encryption, null_encrypt6) +DP_START_TEST_FULL_RUN(encryption, null_encrypt6) { null_encrypt6_main(VRF_DEFAULT_ID, VFP_FALSE); } DP_END_TEST; -DP_START_TEST(encryption, null_encrypt6_vfp) +DP_START_TEST_FULL_RUN(encryption, null_encrypt6_vfp) { null_encrypt6_main(VRF_DEFAULT_ID, VFP_TRUE); } DP_END_TEST; -DP_START_TEST(encryption, null_encrypt6_vrf) +DP_START_TEST_FULL_RUN(encryption, null_encrypt6_vrf) { null_encrypt6_main(TEST_VRF, VFP_FALSE); } DP_END_TEST; DP_DECL_TEST_CASE(site_to_site_suite, s2s_toobig6, NULL, NULL); -DP_START_TEST(s2s_toobig6, s2s_toobig6) +DP_START_TEST_FULL_RUN(s2s_toobig6, s2s_toobig6) { s2s_toobig6_main(VRF_DEFAULT_ID); } DP_END_TEST; -DP_START_TEST(s2s_toobig6, s2s_toobig6_vrf) +DP_START_TEST_FULL_RUN(s2s_toobig6, s2s_toobig6_vrf) { s2s_toobig6_main(TEST_VRF); } DP_END_TEST; DP_DECL_TEST_CASE(site_to_site_suite, decryption, NULL, NULL); -DP_START_TEST(decryption, decrypt_null) +DP_START_TEST_FULL_RUN(decryption, decrypt_null) { null_decrypt_main(VRF_DEFAULT_ID, INNER_VALID); } DP_END_TEST; -DP_START_TEST(decryption, decrypt_null_invalid) +DP_START_TEST_FULL_RUN(decryption, decrypt_null_invalid) { null_decrypt_main(VRF_DEFAULT_ID, INNER_INVALID); } DP_END_TEST; DP_DECL_TEST_CASE(site_to_site_suite, decryption_local, NULL, NULL); -DP_START_TEST(decryption_local, decrypt_null_local) +DP_START_TEST_FULL_RUN(decryption_local, decrypt_null_local) { null_decrypt_main(VRF_DEFAULT_ID, INNER_LOCAL); } DP_END_TEST; -DP_START_TEST(decryption, decrypt_null_vrf) +DP_START_TEST_FULL_RUN(decryption, decrypt_null_vrf) { null_decrypt_main(TEST_VRF, INNER_VALID); } DP_END_TEST; -DP_START_TEST(decryption, decrypt_null6) +DP_START_TEST_FULL_RUN(decryption, decrypt_null6) { null_decrypt_main6(VRF_DEFAULT_ID, INNER_VALID); } DP_END_TEST; -DP_START_TEST(decryption, decrypt_null_invalid6) +DP_START_TEST_FULL_RUN(decryption, decrypt_null_invalid6) { null_decrypt_main6(VRF_DEFAULT_ID, INNER_INVALID); } DP_END_TEST; -DP_START_TEST(decryption_local, decrypt_null_local6) +DP_START_TEST_FULL_RUN(decryption_local, decrypt_null_local6) { null_decrypt_main6(VRF_DEFAULT_ID, INNER_LOCAL); } DP_END_TEST; @@ -2633,49 +2850,142 @@ DP_START_TEST(decryption_local, decrypt_null_local6) * plaintext, then it might be a spoof and must be dropped with * prejudice. */ -DP_START_TEST(decryption, drop_plaintext_packet_matching_input_policy) +DP_START_TEST_FULL_RUN(decryption, drop_plaintext_packet_matching_input_policy) { drop_plaintext_packet_matching_input_policy_main(VRF_DEFAULT_ID); } DP_END_TEST; -DP_START_TEST(decryption, drop_plaintext_local_pkt_match_inpolicy) +DP_START_TEST_FULL_RUN(decryption, drop_plaintext_local_pkt_match_inpolicy) { drop_plaintext_local_pkt_match_inpolicy(VRF_DEFAULT_ID); } DP_END_TEST; -DP_START_TEST(decryption, rx_plaintext_local_pkt_notmatch_inpolicy) +DP_START_TEST_FULL_RUN(decryption, rx_plaintext_local_pkt_notmatch_inpolicy) { rx_plaintext_local_pkt_notmatch_inpolicy(VRF_DEFAULT_ID); } DP_END_TEST; -DP_START_TEST(decryption, rx_plaintext_local_pkt_notmatch_inpolicy_vrf) +DP_START_TEST_FULL_RUN(decryption, rx_match_policy_proto) +{ + rx_match_policy_proto(VRF_DEFAULT_ID); +} DP_END_TEST; + +DP_START_TEST_FULL_RUN(decryption, rx_match_policy_proto_vrf) +{ + rx_match_policy_proto(TEST_VRF); +} DP_END_TEST; + +DP_START_TEST(decryption, rx_match_policy_proto6) +{ + rx_match_policy_proto6(VRF_DEFAULT_ID); +} DP_END_TEST; + +DP_START_TEST(decryption, rx_match_policy_proto6_vrf) +{ + rx_match_policy_proto6(TEST_VRF); +} DP_END_TEST; + +/* + * This test no longer works with overlay vrf support with the underlay in + * default becasuer the following happens. + * Packet arrives unencrypted, but the dest address (10.10.1.1) is in the + * TEST_VRF, not the default, so the route lookup does not find it. There is + * no route, so an icmp is sent. + * + * I don't see a good way to detect that the packet should have been encrypted + * as we would have to check all policies that have the transport in this vrf. + * At the moment the check is once we have decided it is local, but we can not + * even use that as the trigger. So, lets leave this test out. + */ +DP_START_TEST_DONT_RUN(decryption, + drop_plaintext_packet_matching_input_policy_vrf) +{ + drop_plaintext_packet_matching_input_policy_main(TEST_VRF); +} DP_END_TEST; + +/* + * This test no longer works with overlay vrf support with the underlay in + * default becasuer the following happens. + * Packet arrives unencrypted, but the dest address (10.10.1.1) is in the + * TEST_VRF, not the default, so the route lookup does not find it. There is + * no route, so an icmp is sent. + * + * I don't see a good way to detect that the packet should have been encrypted + * as we would have to check all policies that have the transport in this vrf. + * At the moment the check is once we have decided it is local, but we can not + * even use that as the trigger. So, lets leave this test out. + */ +DP_START_TEST_DONT_RUN(decryption, + drop_plaintext_local_pkt_match_inpolicy_vrf) +{ + drop_plaintext_local_pkt_match_inpolicy(TEST_VRF); +} DP_END_TEST; + +DP_START_TEST_FULL_RUN(decryption, rx_plaintext_local_pkt_notmatch_inpolicy_vrf) { rx_plaintext_local_pkt_notmatch_inpolicy(TEST_VRF); } DP_END_TEST; -DP_START_TEST(decryption, drop_plaintext_packet_matching_input_policy6) +DP_START_TEST_FULL_RUN(decryption, drop_plaintext_packet_matching_input_policy6) { drop_plaintext_packet_matching_input_policy6_main(VRF_DEFAULT_ID); } DP_END_TEST; -DP_START_TEST(decryption, drop_plaintext_local_pkt_match_inpolicy6) +DP_START_TEST_FULL_RUN(decryption, drop_plaintext_local_pkt_match_inpolicy6) { drop_plaintext_local_pkt_match_inpolicy6(VRF_DEFAULT_ID); } DP_END_TEST; -DP_START_TEST(decryption, rx_plaintext_local_pkt_notmatch_inpolicy6) +DP_START_TEST_FULL_RUN(decryption, rx_plaintext_local_pkt_notmatch_inpolicy6) { rx_plaintext_local_pkt_notmatch_inpolicy6(VRF_DEFAULT_ID); } DP_END_TEST; -DP_START_TEST(decryption, rx_plaintext_local_pkt_notmatch_inpolicy6_vrf) +/* + * This test no longer works with overlay vrf support with the underlay in + * default becasuer the following happens. + * Packet arrives unencrypted, but the dest address is in the + * TEST_VRF, not the default, so the route lookup does not find it. There is + * no route, so an icmp is sent. + * + * I don't see a good way to detect that the packet should have been encrypted + * as we would have to check all policies that have the transport in this vrf. + * At the moment the check is once we have decided it is local, but we can not + * even use that as the trigger. So, lets leave this test out. + */ +DP_START_TEST_DONT_RUN(decryption, + drop_plaintext_packet_matching_input_policy6_vrf) +{ + drop_plaintext_packet_matching_input_policy6_main(TEST_VRF); +} DP_END_TEST; + +/* + * This test no longer works with overlay vrf support with the underlay in + * default becasuer the following happens. + * Packet arrives unencrypted, but the dest address is in the + * TEST_VRF, not the default, so the route lookup does not find it. There is + * no route, so an icmp is sent. + * + * I don't see a good way to detect that the packet should have been encrypted + * as we would have to check all policies that have the transport in this vrf. + * At the moment the check is once we have decided it is local, but we can not + * even use that as the trigger. So, lets leave this test out. + */ +DP_START_TEST_DONT_RUN(decryption, + drop_plaintext_local_pkt_match_inpolicy6_vrf) +{ + drop_plaintext_local_pkt_match_inpolicy6(TEST_VRF); +} DP_END_TEST; + +DP_START_TEST_FULL_RUN(decryption, + rx_plaintext_local_pkt_notmatch_inpolicy6_vrf) { rx_plaintext_local_pkt_notmatch_inpolicy6(TEST_VRF); } DP_END_TEST; DP_DECL_TEST_CASE(site_to_site_suite, encryption46, NULL, NULL); -DP_START_TEST(encryption46, encrypt46_tunnel) +DP_START_TEST_FULL_RUN(encryption46, encrypt46_tunnel) { vrfid_t vrfid = VRF_DEFAULT_ID; char expected_payload[sizeof(payload_v4_icmp_null_enc)]; @@ -2706,7 +3016,7 @@ DP_START_TEST(encryption46, encrypt46_tunnel) } DP_END_TEST; -DP_START_TEST(encryption46, encrypt46_ecn_ect) +DP_START_TEST_FULL_RUN(encryption46, encrypt46_ecn_ect) { vrfid_t vrfid = VRF_DEFAULT_ID; char expected_payload[sizeof(payload_v4_icmp_null_enc)]; @@ -2736,7 +3046,7 @@ DP_START_TEST(encryption46, encrypt46_ecn_ect) s2s_teardown_interfaces_v4_v6(vrfid, VFP_FALSE); } DP_END_TEST; -DP_START_TEST(encryption46, encrypt46_ecn_ce) +DP_START_TEST_FULL_RUN(encryption46, encrypt46_ecn_ce) { vrfid_t vrfid = VRF_DEFAULT_ID; char expected_payload[sizeof(payload_v4_icmp_null_enc)]; @@ -2766,7 +3076,7 @@ DP_START_TEST(encryption46, encrypt46_ecn_ce) s2s_teardown_interfaces_v4_v6(vrfid, VFP_FALSE); } DP_END_TEST; -DP_START_TEST(encryption46, encrypt46_no_ecn) +DP_START_TEST_FULL_RUN(encryption46, encrypt46_no_ecn) { vrfid_t vrfid = VRF_DEFAULT_ID; char expected_payload[sizeof(payload_v4_icmp_null_enc)]; @@ -2801,7 +3111,7 @@ DP_START_TEST(encryption46, encrypt46_no_ecn) } DP_END_TEST; /* ecn3 is modified to ecn2, and dscp 1 is dropped */ -DP_START_TEST(encryption46, encrypt46_no_dscp) +DP_START_TEST_FULL_RUN(encryption46, encrypt46_no_dscp) { vrfid_t vrfid = VRF_DEFAULT_ID; char expected_payload[sizeof(payload_v4_icmp_null_enc)]; @@ -2835,7 +3145,7 @@ DP_START_TEST(encryption46, encrypt46_no_dscp) s2s_teardown_interfaces_v4_v6(vrfid, VFP_FALSE); } DP_END_TEST; -DP_START_TEST(encryption46, encrypt46_no_dscp_no_ecn) +DP_START_TEST_FULL_RUN(encryption46, encrypt46_no_dscp_no_ecn) { vrfid_t vrfid = VRF_DEFAULT_ID; char expected_payload[sizeof(payload_v4_icmp_null_enc)]; @@ -2873,7 +3183,7 @@ DP_START_TEST(encryption46, encrypt46_no_dscp_no_ecn) s2s_teardown_interfaces_v4_v6(vrfid, VFP_FALSE); } DP_END_TEST; -DP_START_TEST(encryption46, encrypt46_tunnel_test_vrf) +DP_START_TEST_FULL_RUN(encryption46, encrypt46_tunnel_test_vrf) { vrfid_t vrfid = TEST_VRF; char expected_payload[sizeof(payload_v4_icmp_null_enc)]; @@ -2907,7 +3217,7 @@ DP_START_TEST(encryption46, encrypt46_tunnel_test_vrf) DP_DECL_TEST_CASE(site_to_site_suite, encryption64, NULL, NULL); -DP_START_TEST(encryption64, encrypt64) +DP_START_TEST_FULL_RUN(encryption64, encrypt64) { vrfid_t vrfid = VRF_DEFAULT_ID; char expected_payload[sizeof(payload_v6_icmp_null_enc)]; @@ -2938,7 +3248,7 @@ DP_START_TEST(encryption64, encrypt64) s2s_teardown_interfaces_v4_v6(vrfid, VFP_FALSE); } DP_END_TEST; -DP_START_TEST(encryption64, encrypt64_ecn_ect) +DP_START_TEST_FULL_RUN(encryption64, encrypt64_ecn_ect) { vrfid_t vrfid = VRF_DEFAULT_ID; char expected_payload[sizeof(payload_v6_icmp_null_enc)]; @@ -2969,7 +3279,7 @@ DP_START_TEST(encryption64, encrypt64_ecn_ect) s2s_teardown_interfaces_v4_v6(vrfid, VFP_FALSE); } DP_END_TEST; -DP_START_TEST(encryption64, encrypt64_ecn_ce) +DP_START_TEST_FULL_RUN(encryption64, encrypt64_ecn_ce) { vrfid_t vrfid = VRF_DEFAULT_ID; char expected_payload[sizeof(payload_v6_icmp_null_enc)]; @@ -3000,7 +3310,7 @@ DP_START_TEST(encryption64, encrypt64_ecn_ce) s2s_teardown_interfaces_v4_v6(vrfid, VFP_FALSE); } DP_END_TEST; -DP_START_TEST(encryption64, encrypt64_no_ecn) +DP_START_TEST_FULL_RUN(encryption64, encrypt64_no_ecn) { vrfid_t vrfid = VRF_DEFAULT_ID; char expected_payload[sizeof(payload_v6_icmp_null_enc)]; @@ -3035,7 +3345,7 @@ DP_START_TEST(encryption64, encrypt64_no_ecn) } DP_END_TEST; /* ecn3 is modified to ecn2, and dscp 1 is dropped */ -DP_START_TEST(encryption64, encrypt64_no_dscp) +DP_START_TEST_FULL_RUN(encryption64, encrypt64_no_dscp) { vrfid_t vrfid = VRF_DEFAULT_ID; char expected_payload[sizeof(payload_v6_icmp_null_enc)]; @@ -3069,7 +3379,7 @@ DP_START_TEST(encryption64, encrypt64_no_dscp) s2s_teardown_interfaces_v4_v6(vrfid, VFP_FALSE); } DP_END_TEST; -DP_START_TEST(encryption64, encrypt64_no_dscp_no_ecn) +DP_START_TEST_FULL_RUN(encryption64, encrypt64_no_dscp_no_ecn) { vrfid_t vrfid = VRF_DEFAULT_ID; char expected_payload[sizeof(payload_v6_icmp_null_enc)]; @@ -3107,7 +3417,7 @@ DP_START_TEST(encryption64, encrypt64_no_dscp_no_ecn) s2s_teardown_interfaces_v4_v6(vrfid, VFP_FALSE); } DP_END_TEST; -DP_START_TEST(encryption64, encrypt64_test_vrf) +DP_START_TEST_FULL_RUN(encryption64, encrypt64_test_vrf) { vrfid_t vrfid = TEST_VRF; char expected_payload[sizeof(payload_v6_icmp_null_enc)]; @@ -3141,7 +3451,7 @@ DP_START_TEST(encryption64, encrypt64_test_vrf) DP_DECL_TEST_CASE(site_to_site_suite, decryption64, NULL, NULL); -DP_START_TEST(decryption64, decrypt64_tunnel) +DP_START_TEST_FULL_RUN(decryption64, decrypt64_tunnel) { vrfid_t vrfid = VRF_DEFAULT_ID; char transmit_payload[sizeof(payload_v4_icmp_null_enc_rem_to_loc)]; @@ -3171,7 +3481,7 @@ DP_START_TEST(decryption64, decrypt64_tunnel) s2s_teardown_interfaces_v4_v6(vrfid, VFP_FALSE); } DP_END_TEST; -DP_START_TEST(decryption64, decrypt64_tunnel_test_vrf) +DP_START_TEST_FULL_RUN(decryption64, decrypt64_tunnel_test_vrf) { vrfid_t vrfid = TEST_VRF; char transmit_payload[sizeof(payload_v4_icmp_null_enc_rem_to_loc)]; @@ -3203,7 +3513,7 @@ DP_START_TEST(decryption64, decrypt64_tunnel_test_vrf) DP_DECL_TEST_CASE(site_to_site_suite, decryption46, NULL, NULL); -DP_START_TEST(decryption46, decrypt46_tunnel) +DP_START_TEST_FULL_RUN(decryption46, decrypt46_tunnel) { vrfid_t vrfid = VRF_DEFAULT_ID; char transmit_payload[sizeof(payload_v6_icmp_null_enc_rem_to_loc)]; @@ -3233,7 +3543,7 @@ DP_START_TEST(decryption46, decrypt46_tunnel) s2s_teardown_interfaces_v4_v6(vrfid, VFP_FALSE); } DP_END_TEST; -DP_START_TEST(decryption46, decrypt46_tunnel_test_vrf) +DP_START_TEST_FULL_RUN(decryption46, decrypt46_tunnel_test_vrf) { vrfid_t vrfid = TEST_VRF; char transmit_payload[sizeof(payload_v6_icmp_null_enc_rem_to_loc)]; diff --git a/tests/whole_dp/src/dp_test_crypto_site_to_site_passthru.c b/tests/whole_dp/src/dp_test_crypto_site_to_site_passthru.c index 14896f88..7175f5f9 100644 --- a/tests/whole_dp/src/dp_test_crypto_site_to_site_passthru.c +++ b/tests/whole_dp/src/dp_test_crypto_site_to_site_passthru.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -8,15 +8,16 @@ */ #include "dp_test.h" -#include "dp_test_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_crypto_utils.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_crypto_lib.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_console.h" #include "dp_test_controller.h" +#include "dp_test_npf_lib.h" #include "main.h" #include "in_cksum.h" @@ -105,6 +106,7 @@ static struct dp_test_crypto_policy output_policy = { .family = AF_INET, .reqid = TUNNEL_REQID, .priority = 3000, + .rule_no = 1, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -119,10 +121,11 @@ static struct dp_test_crypto_policy output_passthru_policy = { .family = AF_INET, .reqid = TUNNEL_REQID, .priority = 1000, + .rule_no = 2, .mark = 0, .action = XFRM_POLICY_ALLOW, .vrfid = VRF_DEFAULT_ID, - .passthrough = TRUE + .passthrough = true }; static struct dp_test_crypto_policy output_policy6 = { @@ -135,6 +138,7 @@ static struct dp_test_crypto_policy output_policy6 = { .family = AF_INET6, .reqid = TUNNEL_REQID, .priority = 3000, + .rule_no = 3, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -149,10 +153,11 @@ static struct dp_test_crypto_policy output_passthru_policy6 = { .family = AF_INET6, .reqid = TUNNEL_REQID, .priority = 1000, + .rule_no = 4, .mark = 0, .action = XFRM_POLICY_ALLOW, .vrfid = VRF_DEFAULT_ID, - .passthrough = TRUE + .passthrough = true }; static struct dp_test_crypto_policy input_policy = { @@ -165,6 +170,7 @@ static struct dp_test_crypto_policy input_policy = { .family = AF_INET, .reqid = TUNNEL_REQID, .priority = 3000, + .rule_no = 5, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -179,10 +185,11 @@ static struct dp_test_crypto_policy input_passthru_policy = { .family = AF_INET, .reqid = TUNNEL_REQID, .priority = 1000, + .rule_no = 6, .mark = 0, .action = XFRM_POLICY_ALLOW, .vrfid = VRF_DEFAULT_ID, - .passthrough = TRUE + .passthrough = true }; static struct dp_test_crypto_policy input_policy6 = { @@ -195,6 +202,7 @@ static struct dp_test_crypto_policy input_policy6 = { .family = AF_INET6, .reqid = TUNNEL_REQID, .priority = 3000, + .rule_no = 7, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -209,10 +217,11 @@ static struct dp_test_crypto_policy input_passthru_policy6 = { .family = AF_INET6, .reqid = TUNNEL_REQID, .priority = 1000, + .rule_no = 8, .mark = 0, .action = XFRM_POLICY_ALLOW, .vrfid = VRF_DEFAULT_ID, - .passthrough = TRUE + .passthrough = true }; @@ -526,6 +535,8 @@ static void s2s_common_teardown(vrfid_t vrfid, dp_test_crypto_delete_policy(&opol[i]); } + dp_test_npf_cleanup(); + /*************************************************** * Tear down topology */ @@ -555,6 +566,8 @@ static void s2s_common_teardown6(vrfid_t vrfid, dp_test_crypto_delete_policy(&opol[i]); } + dp_test_npf_cleanup(); + /*************************************************** * Tear down topology */ @@ -599,7 +612,7 @@ static void encrypt_main(vrfid_t vrfid) ping_pkt = build_input_packet(CLIENT_LOCAL, CLIENT_REMOTE); (void)dp_test_pktmbuf_eth_init(ping_pkt, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* * Construct the expected encrypted packet @@ -619,7 +632,7 @@ static void encrypt_main(vrfid_t vrfid) (void)dp_test_pktmbuf_eth_init(encrypted_pkt, PEER_MAC_ADDR, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(encrypted_pkt); rte_pktmbuf_free(encrypted_pkt); @@ -698,7 +711,7 @@ static void encrypt6_main(vrfid_t vrfid) (void)dp_test_pktmbuf_eth_init(ping_pkt, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); /* * Construct the expected encrypted packet @@ -717,7 +730,7 @@ static void encrypt6_main(vrfid_t vrfid) (void)dp_test_pktmbuf_eth_init(encrypted_pkt, PEER_MAC_ADDR, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(encrypted_pkt); rte_pktmbuf_free(encrypted_pkt); @@ -785,7 +798,7 @@ receive_packet(vrfid_t vrfid, 1, &len); (void)dp_test_pktmbuf_eth_init(pkt, dp_test_intf_name2mac_str(ifin), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* * The packet may need to be dropped because it is received @@ -859,7 +872,7 @@ receive_packet6(vrfid_t vrfid, dp_test_assert_internal(pkt != NULL); (void)dp_test_pktmbuf_eth_init(pkt, dp_test_intf_name2mac_str(ifin), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); /* * The packet should be dropped because it is received in @@ -1006,32 +1019,32 @@ DP_DECL_TEST_SUITE(site_to_site_suite); DP_DECL_TEST_CASE(site_to_site_suite, passthrough, NULL, NULL); -DP_START_TEST(passthrough, encrypt) +DP_START_TEST_FULL_RUN(passthrough, encrypt) { encrypt_main(VRF_DEFAULT_ID); } DP_END_TEST; -DP_START_TEST(passthrough, encrypt_vrf) +DP_START_TEST_FULL_RUN(passthrough, encrypt_vrf) { encrypt_main(TEST_VRF); } DP_END_TEST; -DP_START_TEST(passthrough, rx_pkt_on_int) +DP_START_TEST_FULL_RUN(passthrough, rx_pkt_on_int) { rx_pkt_on_int(VRF_DEFAULT_ID); } DP_END_TEST; -DP_START_TEST(passthrough, encrypt6) +DP_START_TEST_FULL_RUN(passthrough, encrypt6) { encrypt6_main(VRF_DEFAULT_ID); } DP_END_TEST; -DP_START_TEST(passthrough, encrypt6_vrf) +DP_START_TEST_FULL_RUN(passthrough, encrypt6_vrf) { encrypt6_main(TEST_VRF); } DP_END_TEST; -DP_START_TEST(passthrough, rx_pkt_on_int6) +DP_START_TEST_FULL_RUN(passthrough, rx_pkt_on_int6) { rx_pkt_on_int6(VRF_DEFAULT_ID); } DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_crypto_utils.c b/tests/whole_dp/src/dp_test_crypto_utils.c index 15f64e7b..1d1172d5 100644 --- a/tests/whole_dp/src/dp_test_crypto_utils.c +++ b/tests/whole_dp/src/dp_test_crypto_utils.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -15,20 +15,22 @@ #include -#include "pktmbuf.h" +#include "pktmbuf_internal.h" #include "ip_funcs.h" #include "util.h" #include "crypto/crypto.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_macros.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test/dp_test_macros.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_crypto_utils.h" -#include "dp_test_netlink_state.h" -#include "dp_test_cmd_check.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test/dp_test_cmd_check.h" #include "dp_test_crypto_lib.h" #include "dp_test_json_utils.h" +#include "dp_test_xfrm_server.h" +#include "dp_test_controller.h" static const unsigned char default_cipher_key[] = { 0x1c, 0x53, 0xfa, 0xd5, 0xb5, 0x23, 0xb3, 0xe1, @@ -59,7 +61,7 @@ static struct xfrm_algo *cipher_algo_alloc(const char *alg_name, if (key_len_in_bits != 128 && key_len_in_bits != 256 && key_len_in_bits == AES128GM_KEY_LEN && strcmp(alg_name, "eNULL") != 0) - dp_test_assert_internal(0); + dp_test_abort_internal(); cipher_algo = malloc(sizeof(*cipher_algo) + key_len_in_bytes); dp_test_assert_internal(cipher_algo); @@ -81,7 +83,7 @@ static struct xfrm_algo_aead *algo_aead_alloc(const char *alg_name, if (key_len_in_bits != AES128GM_KEY_LEN || strcmp(alg_name, "rfc4106(gcm(aes))") != 0) - dp_test_assert_internal(0); + dp_test_abort_internal(); aead_algo = malloc(sizeof(*aead_algo) + key_len_in_bytes); dp_test_assert_internal(aead_algo); @@ -103,7 +105,7 @@ static struct xfrm_algo_auth *auth_algo_alloc(const char *alg_name, /* Only 160 bit keys are supported */ if (key_len_in_bits != 160 && strcmp(alg_name, "aNULL") != 0) - dp_test_assert_internal(0); + dp_test_abort_internal(); algo_auth = malloc(sizeof(*algo_auth) + key_len_in_bytes); dp_test_assert_internal(algo_auth); @@ -142,7 +144,7 @@ dp_test_pktmbuf_esp_init(struct rte_mbuf *m, uint16_t udphdrlen, return NULL; } - esp = pktmbuf_mtol4(m, struct ip_esp_hdr *); + esp = dp_pktmbuf_mtol4(m, struct ip_esp_hdr *); esp = (struct ip_esp_hdr *)((unsigned char *)esp + udphdrlen); memset(esp, 0, sizeof(*esp)); esp->spi = spi; @@ -156,7 +158,7 @@ dp_test_create_transport_hdr(struct rte_mbuf *m, struct iphdr *iphdr) struct iphdr *ip; m->l3_len = sizeof(*iphdr); - ip = pktmbuf_mtol3(m, struct iphdr *); + ip = dp_pktmbuf_mtol3(m, struct iphdr *); memmove(ip, iphdr, sizeof(*iphdr)); @@ -200,7 +202,7 @@ dp_test_create_esp_ipv4_pak(const char *saddr, const char *daddr, if (!pak) return NULL; - if (!dp_test_pktmbuf_eth_init(pak, NULL, NULL, ETHER_TYPE_IPv4)) { + if (!dp_test_pktmbuf_eth_init(pak, NULL, NULL, RTE_ETHER_TYPE_IPV4)) { rte_pktmbuf_free(pak); return NULL; } @@ -223,7 +225,7 @@ dp_test_create_esp_ipv4_pak(const char *saddr, const char *daddr, /* recalculation checksum */ ip->check = 0; - ip->check = rte_ipv4_cksum((const struct ipv4_hdr *)ip); + ip->check = rte_ipv4_cksum((const struct rte_ipv4_hdr *)ip); /* Payload offset and length */ uint32_t poff = pak->l2_len + pak->l3_len + udphdrlen + sizeof(*esp); @@ -258,7 +260,7 @@ dp_test_create_transport_hdr6(struct rte_mbuf *m, struct ip6_hdr *ip6_hdr) struct ip6_hdr *ip6; m->l3_len = sizeof(*ip6_hdr); - ip6 = pktmbuf_mtol3(m, struct ip6_hdr *); + ip6 = dp_pktmbuf_mtol3(m, struct ip6_hdr *); memmove(ip6, ip6_hdr, sizeof(*ip6_hdr)); @@ -298,7 +300,7 @@ dp_test_create_esp_ipv6_pak(const char *saddr, const char *daddr, if (!pak) return NULL; - if (!dp_test_pktmbuf_eth_init(pak, NULL, NULL, ETHER_TYPE_IPv6)) { + if (!dp_test_pktmbuf_eth_init(pak, NULL, NULL, RTE_ETHER_TYPE_IPV6)) { rte_pktmbuf_free(pak); return NULL; } @@ -355,67 +357,113 @@ static void build_xfrm_selector(struct xfrm_selector *sel, if (dp_test_prefix_str_to_xfrm_addr(d_prefix, &sel->daddr, &sel->prefixlen_d, family)) - dp_test_assert_internal(0); + dp_test_abort_internal(); if (dp_test_prefix_str_to_xfrm_addr(s_prefix, &sel->saddr, &sel->prefixlen_s, family)) - dp_test_assert_internal(0); + dp_test_abort_internal(); sel->family = family; sel->proto = proto; } -static void wait_for_npf_policy(const struct dp_test_crypto_policy *policy, - bool check_present, uint32_t vrf_id, - const char *file, int line) +static uint32_t poll_cnt; + +struct dp_test_crypto_reponses_cb { + enum dp_test_crypto_check_resp_type type; + bool match; + bool valid; + /* Expected values */ + uint64_t pkts; + uint64_t bytes; + /* Record the tested values */ + uint32_t tx_ack; + uint32_t rx_ack; + uint64_t actual_pkts; + uint64_t actual_bytes; +}; + +static int _dp_test_crypto_poll_response(zloop_t *loop, int poller, void *arg) { - json_object *expected_json; - char proto_str[100]; - char vrf_str[100]; - static const char template[] = - "{" - "\"config\": [{" - "\"attach_type\": \"vrf\"," - "\"attach_point\": \"%d\"," - "\"rulesets\": [{" - "\"ruleset_type\": \"ipsec\"," - "\"groups\": [{" - "\"class\": \"ipsec\"," - "\"name\": \"%s\"," - "\"direction\": \"out\"," - "\"rules\": {" - "\""__JSON_ANY_KEY_VAL__"\": {" - "\"action\": \"%s \"," - "\"match\": \"%sfrom %s to %s *\"," - "}" - "}" - "}]" - "}]" - "}]" - "}"; - - if (policy->proto) - snprintf(proto_str, 100, "proto %d ", policy->proto); - - snprintf(vrf_str, 100, "out-%d", vrf_id); - - char const *npf_action = - (policy->action == XFRM_POLICY_ALLOW) ? "pass" : "block"; - expected_json = dp_test_json_create(template, - vrf_id, - vrf_str, - npf_action, - policy->proto ? proto_str : "", - policy->s_prefix, - policy->d_prefix); + struct dp_test_crypto_reponses_cb *aux; - _dp_test_check_json_state("npf-op show all: ipsec", - expected_json, NULL, - DP_TEST_JSON_CHECK_SUBSET, - !check_present, - file, "", line); + aux = (struct dp_test_crypto_reponses_cb *) arg; + poll_cnt--; - json_object_put(expected_json); + switch (aux->type) { + case DP_TEST_CHECK_CRYPTO_SEQ: + aux->tx_ack = xfrm_seq; + aux->rx_ack = xfrm_seq_received; + if (aux->tx_ack == aux->rx_ack) + aux->valid = true; + break; + case DP_TEST_CHECK_CRYPTO_SA_STATS: + aux->actual_pkts = xfrm_packets; + aux->actual_bytes = xfrm_bytes; + if (aux->match) { + if (aux->pkts == aux->actual_pkts && + aux->bytes == aux->actual_bytes) + aux->valid = true; + } else { + if (aux->pkts != aux->actual_pkts || + aux->bytes != aux->actual_bytes) + aux->valid = true; + } + break; + default: + dp_test_assert_internal(false); + } + /* return -1 to stop if we got what we want or run out of retries */ + return (aux->valid || poll_cnt == 0) ? -1 : 0; +} + +/* + * Check the xfrm responses received, either + * - The number of rx acks sent versus the number of tx acks received. + * - The stats on a particular SA versus those as expected. + */ +void +_dp_test_crypto_check_xfrm_resp(const char *file, int line, + enum dp_test_crypto_check_resp_type type, + uint64_t exp_bytes, + uint64_t exp_packets, + bool match) +{ + struct dp_test_crypto_reponses_cb aux; + int timer; + zloop_t *loop = zloop_new(); + + aux.type = type; + aux.match = match; + aux.valid = false; + aux.bytes = exp_bytes; + aux.pkts = exp_packets; + + poll_cnt = DP_TEST_POLL_COUNT; + timer = zloop_timer(loop, DP_TEST_POLL_INTERVAL, 0, + _dp_test_crypto_poll_response, + &aux); + dp_test_assert_internal(timer >= 0); + + zloop_start(loop); + zloop_destroy(&loop); + + if (!aux.valid) + switch (type) { + case DP_TEST_CHECK_CRYPTO_SEQ: + _dp_test_fail(file, line, "Missing acks Tx %d Rx %d:\n", + aux.tx_ack, aux.rx_ack); + break; + case DP_TEST_CHECK_CRYPTO_SA_STATS: + _dp_test_fail(file, line, "SA stats expected " + "pkts %lu bytes %lu, " + "got pkts %lu btyes %lu\n", + aux.pkts, aux.bytes, aux.actual_pkts, + aux.actual_bytes); + break; + default: + dp_test_assert_internal(false); + } } /* @@ -434,6 +482,7 @@ void _wait_for_policy(const struct dp_test_crypto_policy *policy, "\"policies\": [{" "\"dst\": \"%s\"," "\"src\": \"%s\"," + "\"proto\": %d," "\"priority\": %d,%s" "\"peer\": \"%s\"," "\"direction\": \"%s\"," @@ -472,6 +521,7 @@ void _wait_for_policy(const struct dp_test_crypto_policy *policy, expected_json = dp_test_json_create(template, policy->d_prefix, policy->s_prefix, + policy->proto, policy->priority, reqid_str, peer_str, @@ -488,31 +538,29 @@ void _wait_for_policy(const struct dp_test_crypto_policy *policy, file, "", line); json_object_put(expected_json); - - if (policy->dir == XFRM_POLICY_OUT && !policy->mark) - wait_for_npf_policy(policy, check_present, vrf_id, file, line); } /* * _dp_test_create_ipsec_policy() * - * Create an IPsec policy in the dataplane + * Create or Update an IPsec policy in the dataplane */ void _dp_test_crypto_create_policy(const char *file, int line, const struct dp_test_crypto_policy *policy, - bool verify) + bool verify, bool update) { struct xfrm_selector sel; xfrm_address_t dst; + int action = update ? XFRM_MSG_UPDPOLICY : XFRM_MSG_NEWPOLICY; build_xfrm_selector(&sel, policy->d_prefix, policy->s_prefix, policy->proto, policy->family); if (dp_test_prefix_str_to_xfrm_addr(policy->dst, &dst, NULL, policy->dst_family)) - dp_test_assert_internal(0); + dp_test_abort_internal(); - _dp_test_netlink_xfrm_policy(XFRM_MSG_NEWPOLICY, + _dp_test_netlink_xfrm_policy(action, &sel, &dst, policy->dst_family, policy->dir, @@ -521,7 +569,8 @@ void _dp_test_crypto_create_policy(const char *file, int line, policy->mark, policy->action, policy->vrfid, - policy->passthrough, + policy->passthrough, + policy->rule_no, file, line); @@ -530,12 +579,13 @@ void _dp_test_crypto_create_policy(const char *file, int line, } /* - * _dp_test_crypto_update_policy() + * _dp_test_delete_ipsec_policy() * - * Create an IPsec policy in the dataplane + * Delete an IPsec policy from the dataplane */ -void _dp_test_crypto_update_policy(const char *file, int line, - const struct dp_test_crypto_policy *policy) +void _dp_test_crypto_delete_policy(const char *file, int line, + const struct dp_test_crypto_policy *policy, + bool verify) { struct xfrm_selector sel; xfrm_address_t dst; @@ -545,9 +595,9 @@ void _dp_test_crypto_update_policy(const char *file, int line, if (dp_test_prefix_str_to_xfrm_addr(policy->dst, &dst, NULL, policy->dst_family)) - dp_test_assert_internal(0); + dp_test_abort_internal(); - _dp_test_netlink_xfrm_policy(XFRM_MSG_UPDPOLICY, + _dp_test_netlink_xfrm_policy(XFRM_MSG_DELPOLICY, &sel, &dst, policy->dst_family, policy->dir, @@ -556,45 +606,48 @@ void _dp_test_crypto_update_policy(const char *file, int line, policy->mark, policy->action, policy->vrfid, - policy->passthrough, + policy->passthrough, + policy->rule_no, file, line); - _wait_for_policy(policy, true, file, line); + if (verify) + _wait_for_policy(policy, false, file, line); } -/* - * _dp_test_delete_ipsec_policy() - * - * Delete an IPsec policy from the dataplane - */ -void _dp_test_crypto_delete_policy(const char *file, int line, - const struct dp_test_crypto_policy *policy) +void _dp_test_crypto_check_policy_count(vrfid_t vrfid, + unsigned int num_policies, int af, + const char *file, int line) { - struct xfrm_selector sel; - xfrm_address_t dst; +#define POLL_CNT 1000 +#define POLL_INTERVAL 50 + char cmd_str[100]; + char exp_str[100]; + static const char template[] = "{" + "\"ipsec_policies\": {" + "\"vrf\": %d," + "\"live_policy_count\": {" + "\"%s\": %d," + " }" + "}}"; + json_object *jexp; - build_xfrm_selector(&sel, policy->d_prefix, policy->s_prefix, - policy->proto, policy->family); + vrfid = dp_test_translate_vrf_id(vrfid); - if (dp_test_prefix_str_to_xfrm_addr(policy->dst, &dst, - NULL, policy->dst_family)) - dp_test_assert_internal(0); + snprintf(cmd_str, sizeof(cmd_str), "ipsec spd vrf_id %d brief", vrfid); + snprintf(exp_str, sizeof(exp_str), template, vrfid, + af == AF_INET ? "ipv4" : "ipv6", num_policies); - _dp_test_netlink_xfrm_policy(XFRM_MSG_DELPOLICY, - &sel, &dst, - policy->dst_family, - policy->dir, - policy->priority, - policy->reqid, - policy->mark, - policy->action, - policy->vrfid, - policy->passthrough, - file, line); + jexp = dp_test_json_create("%s", exp_str); + dp_test_check_json_poll_state_interval(cmd_str, jexp, + DP_TEST_JSON_CHECK_SUBSET, + false, POLL_CNT, POLL_INTERVAL); + json_object_put(jexp); + + dp_test_crypto_check_xfrm_acks(); - _wait_for_policy(policy, false, file, line); } + /* * wait_for_sa() * @@ -739,14 +792,23 @@ void _dp_test_crypto_create_sa(const char *file, const char *func, int line, free(algo_auth); } -void _dp_test_crypto_delete_sa(const char *file, int line, - const struct dp_test_crypto_sa *sa) +void _dp_test_crypto_delete_sa_verify(const char *file, int line, + const struct dp_test_crypto_sa *sa, + bool verify) { dp_test_netlink_xfrm_delsa(sa->spi, sa->d_addr, sa->s_addr, sa->family, sa->mode, sa->reqid, sa->vrfid); + if (verify) + _wait_for_sa(sa, false, file, line); +} - _wait_for_sa(sa, false, file, line); +void _dp_test_crypto_get_sa(const char *file, int line, + const struct dp_test_crypto_sa *sa) +{ + dp_test_netlink_xfrm_getsa(sa->spi, sa->d_addr, sa->s_addr, + sa->family, sa->mode, sa->reqid, + sa->vrfid); } void _dp_test_crypto_expire_sa(const char *file, int line, @@ -794,6 +856,7 @@ void _dp_test_crypto_check_sa_count( snprintf(exp_str, sizeof(exp_str), "\"total-sas\": %u", num_sas); _dp_test_check_state_show(file, line, cmd_str, exp_str, false, DP_TEST_CHECK_STR_SUBSET); + dp_test_crypto_check_xfrm_acks(); } struct dp_test_expected * @@ -819,7 +882,7 @@ generate_exp_unreachable(struct rte_mbuf *input_pkt, int payload_len, (void)dp_test_pktmbuf_eth_init(icmp_pak, dmac, dp_test_intf_name2mac_str(oif), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_set_pak_ip_field(ip, DP_TEST_SET_TOS, IPTOS_PREC_INTERNETCONTROL); @@ -853,7 +916,7 @@ generate_exp_unreachable6(struct rte_mbuf *input_pkt, int payload_len, (void)dp_test_pktmbuf_eth_init(icmp_pak, dmac, dp_test_intf_name2mac_str(oif), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(icmp_pak); @@ -869,4 +932,23 @@ generate_exp_unreachable6(struct rte_mbuf *input_pkt, int payload_len, return exp; } +void _dp_test_xfrm_set_nack(uint32_t err_count) +{ + xfrm_ack_err = err_count; +} +void _dp_test_crypto_flush(void) +{ + nl_propagate_xfrm(xfrm_server_push_sock, NULL, 0, "FLUSH"); +} + +void _dp_test_crypto_commit(void) +{ + nl_propagate_xfrm(xfrm_server_push_sock, NULL, 0, "COMMIT"); +} + +void _dp_test_xfrm_poison_sa_stats(void) +{ + xfrm_packets = 0xcafe; + xfrm_bytes = 0xf00d; +} diff --git a/tests/whole_dp/src/dp_test_crypto_utils.h b/tests/whole_dp/src/dp_test_crypto_utils.h index d2af6012..4a199fb6 100644 --- a/tests/whole_dp/src/dp_test_crypto_utils.h +++ b/tests/whole_dp/src/dp_test_crypto_utils.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -15,7 +15,7 @@ #include #include -#include "vrf.h" +#include "vrf_internal.h" struct dp_test_expected; @@ -30,7 +30,7 @@ struct rte_mbuf *dp_test_create_esp_ipv6_pak(const char *saddr, int n, int *len, const char *payload, uint32_t spi, uint32_t seq_no, - uint16_t id, uint8_t ttl, + uint16_t id, uint8_t hlim, struct ip6_hdr *transport); struct dp_test_crypto_policy { @@ -47,26 +47,38 @@ struct dp_test_crypto_policy { uint8_t action; vrfid_t vrfid; bool passthrough; + uint32_t rule_no; }; void _dp_test_crypto_create_policy(const char *file, int line, const struct dp_test_crypto_policy *policy, - bool verify); + bool verify, bool update); void _dp_test_crypto_delete_policy(const char *file, int line, - const struct dp_test_crypto_policy *policy); -void _dp_test_crypto_update_policy(const char *file, int line, - const struct dp_test_crypto_policy *policy); + const struct dp_test_crypto_policy *policy, + bool verify); +void _dp_test_crypto_check_policy_count(vrfid_t vrfid, + unsigned int num_policies, int af, + const char *file, int line); #define dp_test_crypto_create_policy(_policy) \ - _dp_test_crypto_create_policy(__FILE__, __LINE__, _policy, true) -#define dp_test_crypto_create_policy_verify(_policy, _verify) \ - _dp_test_crypto_create_policy(__FILE__, __LINE__, _policy, _verify) + _dp_test_crypto_create_policy(__FILE__, __LINE__, _policy, true, false) + +#define dp_test_crypto_create_policy_verify(_policy, _verify) \ + _dp_test_crypto_create_policy(__FILE__, __LINE__, _policy, _verify, \ + false) #define dp_test_crypto_update_policy(_policy) \ - _dp_test_crypto_create_policy(__FILE__, __LINE__, _policy, true) + _dp_test_crypto_create_policy(__FILE__, __LINE__, _policy, true, true) #define dp_test_crypto_delete_policy(_policy) \ - _dp_test_crypto_delete_policy(__FILE__, __LINE__, _policy) + _dp_test_crypto_delete_policy(__FILE__, __LINE__, _policy, true) + +#define dp_test_crypto_delete_policy_verify(_policy, _verify) \ + _dp_test_crypto_delete_policy(__FILE__, __LINE__, _policy, _verify) + +#define dp_test_crypto_check_policy_count(vrfid, num_policies, af) \ + _dp_test_crypto_check_policy_count(vrfid, num_policies, af, __FILE__, \ + __LINE__) /* * Cipher algorithms supported by test suite. @@ -118,18 +130,26 @@ struct dp_test_crypto_sa { void _dp_test_crypto_create_sa(const char *file, const char *func, int line, const struct dp_test_crypto_sa *sa, bool verify); -void _dp_test_crypto_delete_sa(const char *file, int line, - const struct dp_test_crypto_sa *sa); +void _dp_test_crypto_delete_sa_verify(const char *file, int line, + const struct dp_test_crypto_sa *sa, + bool verify); void _dp_test_crypto_expire_sa(const char *file, int line, const struct dp_test_crypto_sa *sa, bool hard); +void _dp_test_crypto_get_sa(const char *file, int line, + const struct dp_test_crypto_sa *sa); -#define dp_test_crypto_create_sa(_sa) \ +#define dp_test_crypto_create_sa(_sa) \ _dp_test_crypto_create_sa(__FILE__, __func__, __LINE__, _sa, true) #define dp_test_crypto_create_sa_verify(_sa, verify) \ _dp_test_crypto_create_sa(__FILE__, __func__, __LINE__, _sa, verify) #define dp_test_crypto_delete_sa(_sa) \ - _dp_test_crypto_delete_sa(__FILE__, __LINE__, _sa) + _dp_test_crypto_delete_sa_verify(__FILE__, __LINE__, _sa, true) +#define dp_test_crypto_delete_sa_verify(_sa, verify) \ + _dp_test_crypto_delete_sa_verify(__FILE__, __LINE__, _sa, verify) + +#define dp_test_crypto_get_sa(_sa) \ + _dp_test_crypto_get_sa(__FILE__, __LINE__, _sa) #define dp_test_crypto_expire_sa(_sa, _hard) \ _dp_test_crypto_expire_sa(__FILE__, __LINE__, _sa, _hard) @@ -169,4 +189,41 @@ generate_exp_unreachable6(struct rte_mbuf *input_pkt, int payload_len, const char *source_ip, const char *dest_ip, const char *oif, const char *dmac); +enum dp_test_crypto_check_resp_type { + DP_TEST_CHECK_CRYPTO_SEQ, + DP_TEST_CHECK_CRYPTO_SA_STATS, +}; + +void +_dp_test_crypto_check_xfrm_resp(const char *file, int line, + enum dp_test_crypto_check_resp_type type, + uint64_t exp_bytes, uint64_t exp_packets, + bool match); +#define dp_test_crypto_check_xfrm_acks() \ + _dp_test_crypto_check_xfrm_resp(__FILE__, __LINE__, \ + DP_TEST_CHECK_CRYPTO_SEQ, \ + 0, 0, true) + +#define dp_test_crypto_check_xfrm_sa_cntrs(_pkts, _bytes, _match) \ + _dp_test_crypto_check_xfrm_resp(__FILE__, __LINE__, \ + DP_TEST_CHECK_CRYPTO_SA_STATS, \ + _pkts, _bytes, _match) + +void _dp_test_xfrm_set_nack(uint32_t err_count); + +#define dp_test_crypto_xfrm_set_nack(count) \ + _dp_test_xfrm_set_nack(count) + +void _dp_test_crypto_flush(void); +#define dp_test_crypto_flush() \ + _dp_test_crypto_flush() + +void _dp_test_crypto_commit(void); +#define dp_test_crypto_commit() \ + _dp_test_crypto_commit() + +void _dp_test_xfrm_poison_sa_stats(void); +#define dp_test_xfrm_poison_sa_stats() \ + _dp_test_xfrm_poison_sa_stats() + #endif /*_DP_TEST_CRYPTO_UTILS_H_ */ diff --git a/tests/whole_dp/src/dp_test_esp.c b/tests/whole_dp/src/dp_test_esp.c index f8fdc02d..060162ca 100644 --- a/tests/whole_dp/src/dp_test_esp.c +++ b/tests/whole_dp/src/dp_test_esp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -11,7 +11,7 @@ */ #include "dp_test.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "crypto/crypto_internal.h" #include "crypto/esp.h" @@ -38,6 +38,7 @@ DP_START_TEST(sequence_number_check, sequence_number_check) sa.replay_window = 0; sa.replay_bitmap = 0; sa.seq = 0; + sa.spi = 0; hdr.spi = 0; hdr.seq = 1; diff --git a/tests/whole_dp/src/dp_test_fails.c b/tests/whole_dp/src/dp_test_fails.c index 32146dac..036e9f53 100644 --- a/tests/whole_dp/src/dp_test_fails.c +++ b/tests/whole_dp/src/dp_test_fails.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -22,10 +22,10 @@ #include "dp_test.h" #include "dp_test_controller.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" DP_DECL_TEST_SUITE(failure_suite); @@ -81,3 +81,215 @@ DP_START_TEST(internals, string_overflow) ck_assert(strcmp(buffer_ut, "fit1fit2fit3fit4fit") == 0); } DP_END_TEST; + +DP_DECL_TEST_CASE(failure_suite, rx_pkt, NULL, NULL); + +DP_START_TEST_DONT_RUN(rx_pkt, wrong_oif) +{ + struct dp_test_expected *exp; + struct rte_mbuf *test_pak; + const char *nh_mac_str; + int len = 22; + + /* Set up the interface addresses */ + dp_test_nl_add_ip_addr_and_connected("dp2T1", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp3T2", "2.2.2.2/24"); + + /* Add the route / nh arp we want the packet to follow */ + dp_test_netlink_add_route("10.73.2.0/24 nh 2.2.2.1 int:dp3T2"); + nh_mac_str = "aa:bb:cc:dd:ee:ff"; + dp_test_netlink_add_neigh("dp3T2", "2.2.2.1", nh_mac_str); + + /* Create pak to match the route added above */ + test_pak = dp_test_create_ipv4_pak("10.73.0.0", "10.73.2.0", + 1, &len); + + /* Create pak we expect to receive on the tx ring */ + exp = dp_test_exp_create(test_pak); + /* deliberate error - should be "dp3T2" */ + dp_test_exp_set_oif_name(exp, "dp2T1"); + + (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), + nh_mac_str, "aa:aa:aa:aa:aa:3", + RTE_ETHER_TYPE_IPV4); + + dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); + + dp_test_pak_receive(test_pak, "dp2T1", exp); + + /* Clean Up */ + dp_test_nl_del_ip_addr_and_connected("dp2T1", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp3T2", "2.2.2.2/24"); +} DP_END_TEST; + +DP_START_TEST_DONT_RUN(rx_pkt, cleanup1) +{ + /* Clean Up - tests don't cleanup if they fail */ + dp_test_nl_del_ip_addr_and_connected("dp2T1", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp3T2", "2.2.2.2/24"); +} DP_END_TEST; + +DP_START_TEST_DONT_RUN(rx_pkt, wrong_pkt) +{ + struct dp_test_expected *exp; + struct rte_mbuf *test_pak; + const char *nh_mac_str; + int len = 22; + + /* Set up the interface addresses */ + dp_test_nl_add_ip_addr_and_connected("dp2T1", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp3T2", "2.2.2.2/24"); + + /* Add the route / nh arp we want the packet to follow */ + dp_test_netlink_add_route("10.73.2.0/24 nh 2.2.2.1 int:dp3T2"); + nh_mac_str = "aa:bb:cc:dd:ee:ff"; + dp_test_netlink_add_neigh("dp3T2", "2.2.2.1", nh_mac_str); + + /* Create pak to match the route added above */ + test_pak = dp_test_create_ipv4_pak("10.73.0.0", "10.73.2.0", + 1, &len); + + /* Create pak we expect to receive on the tx ring */ + exp = dp_test_exp_create(test_pak); + dp_test_exp_set_oif_name(exp, "dp3T2"); + + /* deliberate error - should be aa:aa:aa:aa:aa:3 */ + (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), + nh_mac_str, "aa:aa:aa:aa:aa:99", + RTE_ETHER_TYPE_IPV4); + + dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); + + dp_test_pak_receive(test_pak, "dp2T1", exp); + + /* Clean Up */ + dp_test_nl_del_ip_addr_and_connected("dp2T1", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp3T2", "2.2.2.2/24"); +} DP_END_TEST; + +DP_START_TEST_DONT_RUN(rx_pkt, cleanup2) +{ + /* Clean Up - tests don't cleanup if they fail */ + dp_test_nl_del_ip_addr_and_connected("dp2T1", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp3T2", "2.2.2.2/24"); +} DP_END_TEST; + +DP_DECL_TEST_CASE(failure_suite, cleanup, NULL, NULL); + +DP_START_TEST_DONT_RUN(cleanup, routes_and_ifaddrs) +{ + struct dp_test_expected *exp; + struct rte_mbuf *test_pak; + const char *nh_mac_str; + int len = 22; + + /* Set up the interface addresses */ + dp_test_nl_add_ip_addr_and_connected("dp2T1", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp3T2", "2.2.2.2/24"); + + /* Add the route / nh arp we want the packet to follow */ + dp_test_netlink_add_route("10.73.2.0/24 nh 2.2.2.1 int:dp3T2"); + nh_mac_str = "aa:bb:cc:dd:ee:ff"; + dp_test_netlink_add_neigh("dp3T2", "2.2.2.1", nh_mac_str); + + /* Create pak to match the route added above */ + test_pak = dp_test_create_ipv4_pak("10.73.0.0", "10.73.2.0", + 1, &len); + + /* Create pak we expect to receive on the tx ring */ + exp = dp_test_exp_create(test_pak); + dp_test_exp_set_oif_name(exp, "dp3T2"); + + (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), + nh_mac_str, "aa:aa:aa:aa:aa:2", + RTE_ETHER_TYPE_IPV4); + + dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); + + dp_test_pak_receive(test_pak, "dp2T1", exp); + + /* cleanup deliberately missing */ + /* dp_test_nl_del_ip_addr_and_connected("dp2T1", "1.1.1.1/24"); */ + /* dp_test_nl_del_ip_addr_and_connected("dp3T2", "2.2.2.2/24"); */ + +} DP_END_TEST; + +DP_START_TEST_DONT_RUN(cleanup, cleanup3) +{ + /* Clean Up */ + dp_test_nl_del_ip_addr_and_connected("dp2T1", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp3T2", "2.2.2.2/24"); +} DP_END_TEST; + +DP_DECL_TEST_CASE(failure_suite, pkt_drop, NULL, NULL); + +DP_START_TEST_DONT_RUN(pkt_drop, no_route) +{ + struct dp_test_expected *exp; + struct rte_mbuf *test_pak; + const char *nh_mac_str; + int len = 22; + + /* Set up the interface addresses */ + dp_test_nl_add_ip_addr_and_connected("dp2T1", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp3T2", "2.2.2.2/24"); + + /* Add the route / nh arp we want the packet to follow */ + dp_test_netlink_add_route("10.73.2.0/24 nh 2.2.2.1 int:dp3T2"); + nh_mac_str = "aa:bb:cc:dd:ee:ff"; + dp_test_netlink_add_neigh("dp3T2", "2.2.2.1", nh_mac_str); + + /* Deliberately wrong - add pkt to non-existent addr */ + test_pak = dp_test_create_ipv4_pak("88.88.88.88", "99.99.99.9", + 1, &len); + + /* Create pak we expect to receive on the tx ring */ + exp = dp_test_exp_create(test_pak); + dp_test_exp_set_oif_name(exp, "dp3T2"); + + (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), + nh_mac_str, "aa:aa:aa:aa:aa:3", + RTE_ETHER_TYPE_IPV4); + + dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); + + dp_test_pak_receive(test_pak, "dp2T1", exp); + + /* cleanup deliberately missing */ + dp_test_nl_del_ip_addr_and_connected("dp2T1", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp3T2", "2.2.2.2/24"); + +} DP_END_TEST; + +DP_START_TEST_DONT_RUN(pkt_drop, cleanup3) +{ + /* Clean Up - tests don't cleanup if they fail */ + dp_test_nl_del_ip_addr_and_connected("dp2T1", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp3T2", "2.2.2.2/24"); +} DP_END_TEST; + +DP_DECL_TEST_CASE(failure_suite, bad_operstate, NULL, NULL); + +DP_START_TEST_DONT_RUN(bad_operstate, no_local) +{ + char cmd[TEST_MAX_CMD_LEN]; + char expected[TEST_MAX_REPLY_LEN]; + + dp_test_nl_add_ip_addr_and_connected("dp2T1", "1.1.1.1/24"); + + /* deliberate mistake */ + strcpy(expected, "1.1.1.2"); + snprintf(cmd, TEST_MAX_CMD_LEN, "local"); + dp_test_check_state_gone_show(cmd, expected, 0); + /* cleanup */ + dp_test_nl_del_ip_addr_and_connected("dp2T1", "1.1.1.1/24"); + +} DP_END_TEST; + +DP_START_TEST_DONT_RUN(bad_operstate, cleanup4) +{ + /* Clean Up - tests don't cleanup if they fail */ + dp_test_intf_bridge_remove_port("br0", "dp1T0"); + dp_test_intf_bridge_remove_port("br0", "dp2T1"); + dp_test_intf_bridge_del("br0"); +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_gpc_pb.c b/tests/whole_dp/src/dp_test_gpc_pb.c new file mode 100644 index 00000000..854dd881 --- /dev/null +++ b/tests/whole_dp/src/dp_test_gpc_pb.c @@ -0,0 +1,601 @@ +/* + * Copyright (c) 2020-2021, AT&T Intellectual Property. All rights reserved. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * GPC protobuf parsing test cases + */ + +#include "dp_test.h" +#include "dp_test_controller.h" +#include "dp_test_console.h" +#include "dp_test_json_utils.h" +#include "dp_test_lib_internal.h" + +#include "protobuf/GPCConfig.pb-c.h" +#include "protobuf/IPAddress.pb-c.h" + +static bool dp_test_gpc_debug_state; + +static void +dp_test_gpc_debug(bool enable) +{ + char cmd[TEST_MAX_CMD_LEN]; + + if (enable != dp_test_gpc_debug_state) { + snprintf(cmd, TEST_MAX_CMD_LEN, "debug %sgpc", enable ? "":"-"); + dp_test_console_request_reply(cmd, false); + + rte_log_set_level(RTE_LOGTYPE_SCHED, + enable ? RTE_LOG_DEBUG : RTE_LOG_INFO); + + dp_test_gpc_debug_state = enable; + } +} + +#ifdef NOT_YET +static void +dp_test_gpc_json_dump(json_object *j_obj) +{ + const char *str; + + str = json_object_to_json_string_ext(j_obj, + JSON_C_TO_STRING_PRETTY); + if (str) + printf("%s\n", str); +} +#endif + +static void +dp_test_gpc_setup_action(RuleAction *action, PolicerParams *policer, + RuleAction__ActionValueCase action_type, + uint32_t value) +{ + action->action_value_case = action_type; + switch (action_type) { + case RULE_ACTION__ACTION_VALUE__NOT_SET: + dp_test_fail("invalid action type - not-set\n"); + break; + case RULE_ACTION__ACTION_VALUE_DECISION: + action->decision = value; + break; + case RULE_ACTION__ACTION_VALUE_DESIGNATION: + action->designation = value; + break; + case RULE_ACTION__ACTION_VALUE_COLOUR: + action->colour = value; + break; + case RULE_ACTION__ACTION_VALUE_POLICER: + /* limited policer testing */ + policer->has_bw = true; + policer->bw = value; + action->policer = policer; + break; + default: + dp_test_fail("invalid rule action value case: %u\n", + action_type); + break; + } +} + +static void +dp_test_lib_pb_set_ip_prefix(IPPrefix *prefix, const char *addr_str, + uint32_t prefix_length, void *data) +{ + IPAddress *ip_address = prefix->address; + + dp_test_lib_pb_set_ip_addr(ip_address, addr_str, data); + prefix->has_length = true; + prefix->length = prefix_length; +} + +static void +dp_test_gpc_setup_match(RuleMatch *match, + RuleMatch__MatchValueCase match_type, + char *addr_str, uint32_t value, IPPrefix *ip_prefix, + void *v6_addr, + RuleMatch__ICMPTypeAndCode *icmp_type_code) +{ + match->match_value_case = match_type; + switch (match_type) { + case RULE_MATCH__MATCH_VALUE__NOT_SET: + dp_test_fail("invalid match type - not-set\n"); + break; + case RULE_MATCH__MATCH_VALUE_SRC_IP: + if (!addr_str || !ip_prefix) + dp_test_fail("required argument is NULL\n"); + + dp_test_lib_pb_set_ip_prefix(ip_prefix, addr_str, value, + v6_addr); + match->src_ip = ip_prefix; + break; + case RULE_MATCH__MATCH_VALUE_DEST_IP: + if (!addr_str || !ip_prefix) + dp_test_fail("required argument is NULL\n"); + + dp_test_lib_pb_set_ip_prefix(ip_prefix, addr_str, value, + v6_addr); + + match->dest_ip = ip_prefix; + break; + case RULE_MATCH__MATCH_VALUE_SRC_PORT: + match->src_port = value; + break; + case RULE_MATCH__MATCH_VALUE_DEST_PORT: + match->dest_port = value; + break; + case RULE_MATCH__MATCH_VALUE_FRAGMENT: + match->fragment = value; + break; + case RULE_MATCH__MATCH_VALUE_DSCP: + match->dscp = value; + break; + case RULE_MATCH__MATCH_VALUE_TTL: + match->ttl = value; + break; + case RULE_MATCH__MATCH_VALUE_ICMPV4: + if (!icmp_type_code) + dp_test_fail("required argument is NULL\n"); + + /* code and type packed into lower 16-bits of value */ + icmp_type_code->has_code = true; + icmp_type_code->code = value >> 8; + icmp_type_code->has_typenum = true; + icmp_type_code->typenum = value & 0xFF; + match->icmpv4 = icmp_type_code; + break; + case RULE_MATCH__MATCH_VALUE_ICMPV6: + if (!icmp_type_code) + dp_test_fail("required argument is NULL\n"); + + /* code and type packed into lower 16-bits of value */ + icmp_type_code->has_code = true; + icmp_type_code->code = value >> 8; + icmp_type_code->has_typenum = true; + icmp_type_code->typenum = value & 0xFF; + match->icmpv6 = icmp_type_code; + break; + case RULE_MATCH__MATCH_VALUE_ICMPV6_CLASS: + match->icmpv6_class = value; + break; + case RULE_MATCH__MATCH_VALUE_PROTO_BASE: + match->proto_base = value; + break; + case RULE_MATCH__MATCH_VALUE_PROTO_FINAL: + match->proto_final = value; + break; + default: + dp_test_fail("invalid rule match type: %u\n", + match_type); + break; + } +} + +static void +dp_test_gpc_setup_match_src_ip(RuleMatch *match, char *addr_str, + uint32_t prefix_length, IPPrefix *ip_prefix, + void *v6_addr) +{ + dp_test_gpc_setup_match(match, RULE_MATCH__MATCH_VALUE_SRC_IP, addr_str, + prefix_length, ip_prefix, v6_addr, NULL); +} + +static void +dp_test_gpc_setup_match_dest_ip(RuleMatch *match, char *addr_str, + uint32_t prefix_length, IPPrefix *ip_prefix, + void *v6_addr) +{ + dp_test_gpc_setup_match(match, RULE_MATCH__MATCH_VALUE_DEST_IP, + addr_str, prefix_length, ip_prefix, v6_addr, + NULL); +} + +static void +dp_test_gpc_setup_match_src_port(RuleMatch *match, uint32_t port) +{ + dp_test_gpc_setup_match(match, RULE_MATCH__MATCH_VALUE_SRC_PORT, + NULL, port, NULL, NULL, NULL); +} + +static void +dp_test_gpc_setup_match_dest_port(RuleMatch *match, uint32_t port) +{ + dp_test_gpc_setup_match(match, RULE_MATCH__MATCH_VALUE_DEST_PORT, + NULL, port, NULL, NULL, NULL); + +} + +static void +dp_test_gpc_setup_match_fragment(RuleMatch *match, uint32_t fragment) +{ + dp_test_gpc_setup_match(match, RULE_MATCH__MATCH_VALUE_FRAGMENT, + NULL, fragment, NULL, NULL, NULL); +} + +static void +dp_test_gpc_setup_match_dscp(RuleMatch *match, uint32_t dscp) +{ + dp_test_gpc_setup_match(match, RULE_MATCH__MATCH_VALUE_DSCP, + NULL, dscp, NULL, NULL, NULL); +} + +static void +dp_test_gpc_setup_match_ttl(RuleMatch *match, uint32_t ttl) +{ + dp_test_gpc_setup_match(match, RULE_MATCH__MATCH_VALUE_TTL, + NULL, ttl, NULL, NULL, NULL); +} + +static void +dp_test_gpc_setup_match_icmpv4(RuleMatch *match, + RuleMatch__ICMPTypeAndCode *icmp_type_code, + uint32_t type, uint32_t code) +{ + uint32_t value; + + dp_test_fail_unless(type < 256, "icmp type value too large: %u\n", + type); + dp_test_fail_unless(code < 256, "icmp code value too large: %u\n", + code); + + /* pack code and type into lower 16-bit of value */ + value = (code << 8) | type; + dp_test_gpc_setup_match(match, RULE_MATCH__MATCH_VALUE_ICMPV4, + NULL, value, NULL, NULL, icmp_type_code); +} + +static void +dp_test_gpc_setup_match_icmpv6(RuleMatch *match, + RuleMatch__ICMPTypeAndCode *icmp_type_code, + uint32_t type, uint32_t code) +{ + uint32_t value; + + dp_test_fail_unless(type < 256, "icmp type value too large: %u\n", + type); + dp_test_fail_unless(code < 256, "icmp code value too large: %u\n", + code); + + /* pack code and type into lower 16-bit of value */ + value = (code << 8) | type; + dp_test_gpc_setup_match(match, RULE_MATCH__MATCH_VALUE_ICMPV6, + NULL, value, NULL, NULL, icmp_type_code); +} + +static void +dp_test_gpc_setup_match_proto_base(RuleMatch *match, uint32_t proto) +{ + dp_test_gpc_setup_match(match, RULE_MATCH__MATCH_VALUE_PROTO_BASE, + NULL, proto, NULL, NULL, NULL); +} + +static void +dp_test_gpc_setup_match_proto_final(RuleMatch *match, uint32_t proto) +{ + dp_test_gpc_setup_match(match, RULE_MATCH__MATCH_VALUE_PROTO_FINAL, + NULL, proto, NULL, NULL, NULL); +} + +static void +dp_test_gpc_setup_rule(Rule *gpc_rule, uint32_t rule_number, + size_t n_matches, RuleMatch **matches, + size_t n_actions, RuleAction **actions, + RuleCounter *counter) +{ + gpc_rule->has_number = true; + gpc_rule->number = rule_number; + + gpc_rule->n_matches = n_matches; + gpc_rule->matches = matches; + + gpc_rule->n_actions = n_actions; + gpc_rule->actions = actions; + + gpc_rule->counter = counter; + + gpc_rule->has_table_index = true; + gpc_rule->table_index = 1; + + gpc_rule->has_orig_number = true; + gpc_rule->orig_number = rule_number; +} + + +static void +dp_test_gpc_setup_rules(Rules *gpc_rules, TrafficType traffic_type, + size_t n_rules, Rule **rules) +{ + gpc_rules->has_traffic_type = true; + gpc_rules->traffic_type = traffic_type; + gpc_rules->n_rules = n_rules; + gpc_rules->rules = rules; +} + +static void +dp_test_gpc_setup_table(GPCTable *table, const char *ifname, + GPCTable__FeatureLocation location, + TrafficType traffic_type, Rules *rules, + uint32_t n_table_names, char **table_names) +{ + table->ifname = "dp1T0"; + table->has_location = true; + table->location = location; + table->has_traffic_type = true; + table->traffic_type = traffic_type; + table->rules = rules; + table->n_table_names = n_table_names; + table->table_names = table_names; +} + +static void +dp_test_create_and_send_gpc_config_msg() +{ + TrafficType traffic_type = TRAFFIC_TYPE__IPV4; + + /* set match values here */ + RuleMatch match_1_1 = RULE_MATCH__INIT; + RuleMatch match_1_2 = RULE_MATCH__INIT; + RuleMatch match_1_3 = RULE_MATCH__INIT; + RuleMatch match_1_4 = RULE_MATCH__INIT; + RuleMatch match_1_5 = RULE_MATCH__INIT; + RuleMatch match_2_1 = RULE_MATCH__INIT; + RuleMatch match_2_2 = RULE_MATCH__INIT; + RuleMatch match_2_3 = RULE_MATCH__INIT; + RuleMatch match_2_4 = RULE_MATCH__INIT; + RuleMatch match_2_5 = RULE_MATCH__INIT; + RuleMatch match_2_6 = RULE_MATCH__INIT; + + IPPrefix ip_prefix_1 = IPPREFIX__INIT; + IPAddress ip_address_1 = IPADDRESS__INIT; + uint32_t v6_addr[4]; + + ip_prefix_1.address = &ip_address_1; + dp_test_gpc_setup_match_src_ip(&match_1_1, "10.10.10.0", 24, + &ip_prefix_1, &v6_addr); + + IPPrefix ip_prefix_2 = IPPREFIX__INIT; + IPAddress ip_address_2 = IPADDRESS__INIT; + + ip_prefix_2.address = &ip_address_2; + dp_test_gpc_setup_match_dest_ip(&match_1_2, "20.0.0.0", 8, + &ip_prefix_2, &v6_addr); + + dp_test_gpc_setup_match_src_port(&match_1_3, 1234); + + dp_test_gpc_setup_match_dest_port(&match_1_4, 4321); + + dp_test_gpc_setup_match_ttl(&match_1_5, 64); + + RuleMatch__ICMPTypeAndCode icmp_type_code_1 = + RULE_MATCH__ICMPTYPE_AND_CODE__INIT; + + dp_test_gpc_setup_match_icmpv4(&match_2_1, &icmp_type_code_1, 3, 9); + + RuleMatch__ICMPTypeAndCode icmp_type_code_2 = + RULE_MATCH__ICMPTYPE_AND_CODE__INIT; + + dp_test_gpc_setup_match_icmpv6(&match_2_2, &icmp_type_code_2, 1, 3); + + dp_test_gpc_setup_match_fragment(&match_2_3, 1); + + dp_test_gpc_setup_match_dscp(&match_2_4, 63); + + dp_test_gpc_setup_match_proto_base(&match_2_5, 19); + + dp_test_gpc_setup_match_proto_final(&match_2_6, 100); + + /* set action values here */ + RuleAction action_1_1 = RULE_ACTION__INIT; + RuleAction action_1_2 = RULE_ACTION__INIT; + RuleAction action_2_1 = RULE_ACTION__INIT; + RuleAction action_2_2 = RULE_ACTION__INIT; + PolicerParams policer = POLICER_PARAMS__INIT; + + dp_test_gpc_setup_action(&action_1_1, NULL, + RULE_ACTION__ACTION_VALUE_DECISION, + RULE_ACTION__PACKET_DECISION__PASS); + + dp_test_gpc_setup_action(&action_1_2, &policer, + RULE_ACTION__ACTION_VALUE_POLICER, + 12345678); + + dp_test_gpc_setup_action(&action_2_1, NULL, + RULE_ACTION__ACTION_VALUE_DESIGNATION, + 6); + + dp_test_gpc_setup_action(&action_2_2, NULL, + RULE_ACTION__ACTION_VALUE_COLOUR, + RULE_ACTION__COLOUR_VALUE__YELLOW); + + /* set rule values here */ + Rule rule_1 = RULE__INIT; + Rule rule_2 = RULE__INIT; + RuleMatch *match_array_1[] = { &match_1_1, &match_1_2, &match_1_3, + &match_1_4, &match_1_5 }; + RuleMatch *match_array_2[] = { &match_2_1, &match_2_2, &match_2_3, + &match_2_4, &match_2_5, &match_2_6 }; + RuleAction *action_array_1[] = { &action_1_1, &action_1_2 }; + RuleAction *action_array_2[] = { &action_2_1, &action_2_2 }; + + dp_test_gpc_setup_rule(&rule_1, 1, ARRAY_SIZE(match_array_1), + match_array_1, ARRAY_SIZE(action_array_1), + action_array_1, NULL); + + dp_test_gpc_setup_rule(&rule_2, 2, ARRAY_SIZE(match_array_2), + match_array_2, ARRAY_SIZE(action_array_2), + action_array_2, NULL); + + /* set rules values here */ + Rules rules = RULES__INIT; + Rule *rules_array[] = { &rule_1, &rule_2 }; + + dp_test_gpc_setup_rules(&rules, traffic_type, ARRAY_SIZE(rules_array), + rules_array); + + /* set table values here */ + GPCTable table = GPCTABLE__INIT; + char *table_name = "gpc-table-name-1"; + char *table_names[1]; + + table_names[0] = table_name; + dp_test_gpc_setup_table(&table, "dp1T0", + GPCTABLE__FEATURE_LOCATION__INGRESS, + traffic_type, &rules, + ARRAY_SIZE(table_names), table_names); + + /* set config values here */ + GPCConfig config = GPCCONFIG__INIT; + GPCTable * table_array[] = { &table }; + + config.has_feature_type = true; + config.feature_type = GPCCONFIG__FEATURE_TYPE__QOS; + config.n_counters = 0; + config.n_tables = 1; + config.tables = table_array; + + size_t len = gpcconfig__get_packed_size(&config); + void *buf = malloc(len); + dp_test_assert_internal(buf); + + gpcconfig__pack(&config, buf); + + dp_test_lib_pb_wrap_and_send_pb("vyatta:gpc-config", buf, len); +} + +const char *expected_reply_1 = + "{" + " \"gpc\":{" + " \"features\":[" + " {" + " \"type\":\"qos\"," + " \"tables\":[" + " {" + " \"table-id\":\"dp1T0/ingress/ipv4\"," + " \"rules\":[" + " {" + " \"rule-number\":1," + " \"matches\":[" + " {" + " \"match\":\"src-ip\"," + " \"value\":\"0.10.10.10/24\"" + " },{" + " \"match\":\"dest-ip\"," + " \"value\": \"0.0.0.20/8\"" + " },{" + " \"match\":\"src-port\"," + " \"value\":1234" + " },{" + " \"match\":\"dest-port\"," + " \"value\":4321" + " },{" + " \"match\":\"ttl\"," + " \"value\":64" + " }" + " ]," + " \"decision\":\"pass\"," + " \"police\":{" + " \"bandwidth\": 12345678" + " }," + " \"table-index\":1," + " \"orig-number\":1" + " },{" + " \"rule-number\":2," + " \"matches\":[" + " {" + " \"match\":\"icmpv4\"," + /* type 3, code 9 -> (3 << 8) | 9 = 777 */ + " \"value\":777" + " },{" + " \"match\":\"icmpv6\"," + /* type 1, code 3 -> (1 << 8) | 3 = 259 */ + " \"value\":259" + " },{" + " \"match\":\"fragment\"," + " \"value\":1" + " },{" + " \"match\":\"dscp\"," + " \"value\":63" + " },{" + " \"match\":\"base-protocol\"," + " \"value\":19" + " },{" + " \"match\":\"final-protocol\"," + " \"value\":100" + " }" + " ]," + " \"designation\":6," + " \"colour\":\"yellow\"," + " \"table-index\":1," + " \"orig-number\":2" + " }" + " ]," + " \"table-names\":[" + " {" + " \"table-index\": 1," + " \"name\": \"gpc-table-name-1\"" + " }" + " ]" + " }" + " ]," + " \"counters\":[" + " ]" + " }" + " ]" + " }" + "}"; + +static void +dp_test_gpc_check_state(const char *expected_reply) +{ + json_object *jexp; + + jexp = dp_test_json_create("%s", expected_reply); + dp_test_check_json_poll_state("gpc show", jexp, + DP_TEST_JSON_CHECK_SUBSET, false, + DP_TEST_POLL_COUNT); + json_object_put(jexp); +} + +static void +dp_test_create_and_send_gpc_delete_msg() +{ + /* set config values here */ + GPCConfig config = GPCCONFIG__INIT; + + config.has_feature_type = true; + config.feature_type = GPCCONFIG__FEATURE_TYPE__QOS; + + /* A gpc config message with zero tables deletes the feature */ + config.n_tables = 0; + + size_t len = gpcconfig__get_packed_size(&config); + void *buf = malloc(len); + dp_test_assert_internal(buf); + + gpcconfig__pack(&config, buf); + + dp_test_lib_pb_wrap_and_send_pb("vyatta:gpc-config", buf, len); +} + +const char *expected_reply_2 = + "{" + " \"gpc\":{\"features\":[]}" + "}"; + +DP_DECL_TEST_SUITE(gpc_pb_suite); + +DP_DECL_TEST_CASE(gpc_pb_suite, gpc_pb_parsing, NULL, NULL); + +DP_START_TEST(gpc_pb_parsing, test1) +{ + + dp_test_gpc_debug(true); + + dp_test_create_and_send_gpc_config_msg(); + dp_test_gpc_check_state(expected_reply_1); + + dp_test_create_and_send_gpc_delete_msg(); + dp_test_gpc_check_state(expected_reply_2); + +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_gre.c b/tests/whole_dp/src/dp_test_gre.c index 9daa168a..63b8cf18 100644 --- a/tests/whole_dp/src/dp_test_gre.c +++ b/tests/whole_dp/src/dp_test_gre.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -16,21 +16,22 @@ #include #include "ip_funcs.h" #include "in_cksum.h" +#include "if/gre.h" #include "if_var.h" #include "main.h" -#include "gre.h" #include "iptun_common.h" #include "netinet6/ip6_funcs.h" #include "compat.h" #include "dp_test.h" #include "dp_test_controller.h" -#include "dp_test_netlink_state.h" -#include "dp_test_cmd_check.h" -#include "dp_test_lib.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test/dp_test_cmd_check.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_lib_exp.h" +#include "dp_test_gre.h" /* @@ -89,7 +90,7 @@ gre_test_create_pak(const char *outer_sip, const char *outer_dip, return m; } -static void +void gre_test_build_expected_pak(struct dp_test_expected **expected, struct iphdr *payload[], struct iphdr *outer[], @@ -100,16 +101,19 @@ gre_test_build_expected_pak(struct dp_test_expected **expected, struct iphdr *inner; struct rte_mbuf *m; + *expected = NULL; exp = dp_test_exp_create_m(NULL, num_paks); for (i = 0; i < num_paks; i++) { m = gre_test_create_pak( "1.1.2.1", "1.1.2.2", payload[i], &inner, &outer[i]); + if (!m) + return; dp_test_pktmbuf_eth_init(m, "aa:bb:cc:dd:ee:ff", dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_exp_set_pak_m(exp, i, m); @@ -183,7 +187,7 @@ gre_test_build_expected_pak_ipv6(struct dp_test_expected **expected, * subsequent exp paks are not set up correctly for the full * data len. */ - exp->check_start[i] = pktmbuf_l2_len(exp->exp_pak[i]); + exp->check_start[i] = dp_pktmbuf_l2_len(exp->exp_pak[i]); exp->check_len[i] = rte_pktmbuf_data_len(exp->exp_pak[0]) - exp->check_start[i]; } @@ -208,7 +212,7 @@ gre_test_build_expected_icmp_pak(struct dp_test_expected **exp, for (i = 0; i < num_paks; i++) { /* Jump over the ether hdr before checking. */ - expected->check_start[i] = sizeof(struct ether_hdr); + expected->check_start[i] = sizeof(struct rte_ether_hdr); expected->exp_pak[i] = dp_test_create_icmp_ipv4_pak( "1.1.1.1", "1.1.1.2", ICMP_DEST_UNREACH, @@ -229,7 +233,7 @@ gre_test_build_expected_icmp_pak(struct dp_test_expected **exp, *exp = expected; } -static void +void dp_test_gre_setup_tunnel(uint32_t vrfid, const char *tun_src, const char *tun_dst) { @@ -260,7 +264,7 @@ dp_test_gre_setup_tunnel(uint32_t vrfid, const char *tun_src, dp_test_netlink_add_neigh("dp1T1", "1.1.1.2", nh_mac_str); } -static void +void dp_test_gre_teardown_tunnel(uint32_t vrfid, const char *tun_src, const char *tun_dst) { @@ -300,7 +304,7 @@ DP_START_TEST(gre_encap, simple_encap) struct dp_test_expected *exp_frag; struct dp_test_expected *exp_icmp; struct iphdr *inner_ip; - struct iphdr *exp_ip_outer[DP_TEST_MAX_EXPECTED_PAKS]; + struct iphdr *exp_ip_outer[DP_TEST_MAX_EXPECTED_PAKS] = { 0 }; int len = 32; dp_test_gre_setup_tunnel(VRF_DEFAULT_ID, "1.1.2.1", "1.1.2.2"); @@ -337,12 +341,13 @@ DP_START_TEST(gre_encap, simple_encap) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); inner_ip = iphdr(m); dp_test_set_pak_ip_field(inner_ip, DP_TEST_SET_IP_ECN, IPTOS_ECN_NOT_ECT); gre_test_build_expected_pak(&exp_no_frag, &inner_ip, exp_ip_outer, 1); + dp_test_assert_internal(exp_ip_outer[0] != NULL); dp_test_set_pak_ip_field(exp_ip_outer[0], DP_TEST_SET_IP_ECN, IPTOS_ECN_NOT_ECT); dp_test_pak_receive(m, "dp1T1", exp_no_frag); @@ -352,7 +357,7 @@ DP_START_TEST(gre_encap, simple_encap) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); inner_ip = iphdr(m); dp_test_set_pak_ip_field(inner_ip, DP_TEST_SET_IP_ECN, IPTOS_ECN_ECT1); @@ -367,7 +372,7 @@ DP_START_TEST(gre_encap, simple_encap) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); inner_ip = iphdr(m); dp_test_set_pak_ip_field(inner_ip, DP_TEST_SET_IP_ECN, IPTOS_ECN_ECT0); @@ -382,7 +387,7 @@ DP_START_TEST(gre_encap, simple_encap) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); inner_ip = iphdr(m); dp_test_set_pak_ip_field(inner_ip, DP_TEST_SET_IP_ECN, IPTOS_ECN_CE); @@ -417,7 +422,7 @@ DP_START_TEST(gre_encap, simple_encap) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); inner_ip = iphdr(m); gre_test_build_expected_pak(&exp_no_frag, &inner_ip, exp_ip_outer, 1); @@ -429,7 +434,7 @@ DP_START_TEST(gre_encap, simple_encap) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); inner_ip = iphdr(m); gre_test_build_expected_pak(&exp_no_frag, &inner_ip, exp_ip_outer, 1); @@ -441,7 +446,7 @@ DP_START_TEST(gre_encap, simple_encap) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* * Set frag 0 which is the last x bytes of the pak, * then frag 1 which is the first 'mtu' bytes @@ -459,6 +464,7 @@ DP_START_TEST(gre_encap, simple_encap) frag_payload[0] = iphdr(frag_payload_m[0]); frag_payload[1] = iphdr(frag_payload_m[1]); gre_test_build_expected_pak(&exp_frag, frag_payload, frag_outer, 2); + dp_test_assert_internal(exp_frag != NULL); rte_pktmbuf_free(frag_payload_m[0]); rte_pktmbuf_free(frag_payload_m[1]); @@ -474,7 +480,7 @@ DP_START_TEST(gre_encap, simple_encap) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); inner_ip = iphdr(m); dp_test_set_pak_ip_field(inner_ip, DP_TEST_SET_DF, 1); gre_test_build_expected_pak(&exp_no_frag, &inner_ip, @@ -489,7 +495,7 @@ DP_START_TEST(gre_encap, simple_encap) (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); inner_ip = iphdr(m); dp_test_set_pak_ip_field(inner_ip, DP_TEST_SET_DF, 1); gre_test_build_expected_pak(&exp_no_frag, &inner_ip, @@ -504,7 +510,7 @@ DP_START_TEST(gre_encap, simple_encap) (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); inner_ip = iphdr(m); dp_test_set_pak_ip_field(inner_ip, DP_TEST_SET_DF, 1); @@ -544,7 +550,7 @@ DP_START_TEST(gre_encap, simple_encap_ipv6) m = dp_test_create_ipv6_pak("1:1:1::2", "10::1", 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); inner_ip = ip6hdr(m); ip6_ver_tc_flow_hdr(inner_ip, IPTOS_ECN_NOT_ECT, 0); gre_test_build_expected_pak_ipv6(&exp_no_frag, &inner_ip, @@ -558,7 +564,7 @@ DP_START_TEST(gre_encap, simple_encap_ipv6) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); inner_ip = ip6hdr(m); ip6_ver_tc_flow_hdr(inner_ip, IPTOS_ECN_ECT1, 0); gre_test_build_expected_pak_ipv6(&exp_no_frag, &inner_ip, @@ -572,7 +578,7 @@ DP_START_TEST(gre_encap, simple_encap_ipv6) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); inner_ip = ip6hdr(m); ip6_ver_tc_flow_hdr(inner_ip, IPTOS_ECN_ECT0, 0); gre_test_build_expected_pak_ipv6(&exp_no_frag, &inner_ip, @@ -586,7 +592,7 @@ DP_START_TEST(gre_encap, simple_encap_ipv6) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); inner_ip = ip6hdr(m); ip6_ver_tc_flow_hdr(inner_ip, IPTOS_ECN_CE, 0); gre_test_build_expected_pak_ipv6(&exp_no_frag, &inner_ip, @@ -614,7 +620,7 @@ DP_START_TEST(gre_encap, simple_encap_ipv6) 1, &len); dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); inner_ip = ip6hdr(m); gre_test_build_expected_pak_ipv6(&exp_no_frag, &inner_ip, exp_ip_outer, 1); @@ -626,7 +632,7 @@ DP_START_TEST(gre_encap, simple_encap_ipv6) 1, &len); dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); inner_ip = ip6hdr(m); gre_test_build_expected_pak_ipv6(&exp_no_frag, &inner_ip, exp_ip_outer, 1); @@ -638,7 +644,7 @@ DP_START_TEST(gre_encap, simple_encap_ipv6) 1, &len); dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); inner_ip = ip6hdr(m); exp_icmp = dp_test_exp_create_m(NULL, 1); @@ -656,7 +662,7 @@ DP_START_TEST(gre_encap, simple_encap_ipv6) icmp6); dp_test_pktmbuf_eth_init(exp_icmp->exp_pak[0], "aa:bb:cc:dd:ee:ff", dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); dp_test_exp_set_oif_name(exp_icmp, "dp1T1"); dp_test_exp_set_fwd_status(exp_icmp, DP_TEST_FWD_FORWARDED); @@ -723,11 +729,12 @@ DP_START_TEST(gre_encap, ignore_df) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); inner_ip = iphdr(m); dp_test_set_pak_ip_field(inner_ip, DP_TEST_SET_DF, 1); gre_test_build_expected_pak(&exp_no_frag, &inner_ip, exp_ip_outer, 1); + dp_test_assert_internal(exp_no_frag != NULL); dp_test_pak_receive(m, "dp1T1", exp_no_frag); /* 1476 */ @@ -736,7 +743,7 @@ DP_START_TEST(gre_encap, ignore_df) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); inner_ip = iphdr(m); dp_test_set_pak_ip_field(inner_ip, DP_TEST_SET_DF, 1); gre_test_build_expected_pak(&exp_no_frag, &inner_ip, @@ -749,7 +756,7 @@ DP_START_TEST(gre_encap, ignore_df) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); inner_ip = iphdr(m); dp_test_set_pak_ip_field(inner_ip, DP_TEST_SET_DF, 1); @@ -770,6 +777,7 @@ DP_START_TEST(gre_encap, ignore_df) frag_payload[0] = iphdr(frag_payload_m[0]); frag_payload[1] = iphdr(frag_payload_m[1]); gre_test_build_expected_pak(&exp_frag, frag_payload, frag_outer, 2); + dp_test_assert_internal(exp_frag != NULL); rte_pktmbuf_free(frag_payload_m[0]); rte_pktmbuf_free(frag_payload_m[1]); @@ -786,7 +794,7 @@ static inline void dp_test_gre_tos_encap(bool inherit, uint8_t val) struct rte_mbuf *m; struct dp_test_expected *exp; struct iphdr *inner_ip; - struct iphdr *exp_ip_outer[DP_TEST_MAX_EXPECTED_PAKS]; + struct iphdr *exp_ip_outer[DP_TEST_MAX_EXPECTED_PAKS] = { 0 }; int len = 32; /* Tos 0 */ @@ -794,11 +802,12 @@ static inline void dp_test_gre_tos_encap(bool inherit, uint8_t val) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); inner_ip = iphdr(m); dp_test_set_pak_ip_field(inner_ip, DP_TEST_SET_TOS, 0); gre_test_build_expected_pak(&exp, &inner_ip, exp_ip_outer, 1); + dp_test_assert_internal(exp_ip_outer[0] != NULL); dp_test_set_pak_ip_field(exp_ip_outer[0], DP_TEST_SET_TOS, 0); dp_test_pak_receive(m, "dp1T1", exp); @@ -808,7 +817,7 @@ static inline void dp_test_gre_tos_encap(bool inherit, uint8_t val) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); inner_ip = iphdr(m); dp_test_set_pak_ip_field(inner_ip, DP_TEST_SET_TOS, 0xc0); @@ -867,7 +876,7 @@ DP_START_TEST(gre_encap, no_route) 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* * Expect the packet to be dropped - an ICMP might attempt to @@ -901,7 +910,7 @@ dp_test_gre_build_encapped_pak(const struct iphdr *payload_ip, (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp2T2"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); return m; } @@ -922,13 +931,13 @@ static struct dp_test_expected *gre_test_build_expected_ecn_pak( (void)dp_test_pktmbuf_eth_init(exp_mbuf, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_set_pak_ip_field(iphdr(exp_mbuf), DP_TEST_SET_TTL, DP_TEST_PAK_DEFAULT_TTL - 2); exp = dp_test_exp_create_m(NULL, 1); exp->exp_pak[0] = exp_mbuf; dp_test_exp_set_oif_name(exp, "dp1T1"); - exp->check_start[0] = sizeof(struct ether_hdr); + exp->check_start[0] = sizeof(struct rte_ether_hdr); exp->check_len[0] = rte_pktmbuf_data_len(exp_mbuf) - exp->check_start[0]; *exp_mbuf_p = exp_mbuf; @@ -973,6 +982,8 @@ DP_START_TEST(gre_decap, ecn_decap) /* loop through all inners for outer 00 */ exp = gre_test_build_expected_ecn_pak(&e); m = dp_test_gre_build_encapped_pak(iphdr(e), &outer_ip, &inner_ip); + dp_test_assert_internal(outer_ip != NULL); + dp_test_assert_internal(inner_ip != NULL); dp_test_set_pak_ip_field(outer_ip, DP_TEST_SET_IP_ECN, IPTOS_ECN_NOT_ECT); dp_test_set_pak_ip_field(inner_ip, @@ -1169,7 +1180,7 @@ DP_START_TEST(gre_decap, invalid_paks) IPPROTO_GRE, 1, &len); dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(m); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); dp_test_pak_receive(m, "dp1T1", exp); @@ -1180,7 +1191,7 @@ DP_START_TEST(gre_decap, invalid_paks) IPPROTO_GRE, 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); gre = (struct gre_hdr *)(iphdr(m) + 1); memset(gre, 0, sizeof(struct gre_hdr) - 1); exp = dp_test_exp_create(m); @@ -1193,7 +1204,7 @@ DP_START_TEST(gre_decap, invalid_paks) IPPROTO_GRE, 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); gre = (struct gre_hdr *)(iphdr(m) + 1); memset(gre, 0, sizeof(struct gre_hdr) + 4 - 1); gre->flags |= GRE_CSUM; @@ -1207,7 +1218,7 @@ DP_START_TEST(gre_decap, invalid_paks) IPPROTO_GRE, 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); gre = (struct gre_hdr *)(iphdr(m) + 1); memset(gre, 0, sizeof(struct gre_hdr) + 4 - 1); gre->flags |= GRE_KEY; @@ -1221,7 +1232,7 @@ DP_START_TEST(gre_decap, invalid_paks) IPPROTO_GRE, 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); gre = (struct gre_hdr *)(iphdr(m) + 1); memset(gre, 0, sizeof(struct gre_hdr) + 4 - 1); gre->flags |= GRE_SEQ; @@ -1249,7 +1260,7 @@ DP_START_TEST(mgre_encap, simple_encap) m = dp_test_create_ipv4_pak("1.1.1.2", "10.0.0.1", 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(m); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_LOCAL); @@ -1260,7 +1271,7 @@ DP_START_TEST(mgre_encap, simple_encap) m = dp_test_create_ipv4_pak("1.1.1.2", "10.0.0.1", 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); ip_inner = iphdr(m); gre_test_build_expected_pak(&exp, &ip_inner, &exp_ip_outer, 1); @@ -1271,7 +1282,7 @@ DP_START_TEST(mgre_encap, simple_encap) m = dp_test_create_ipv4_pak("1.1.1.2", "10.0.0.1", 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); ip_inner = iphdr(m); gre_test_build_expected_pak(&exp, &ip_inner, &exp_ip_outer, 1); @@ -1283,7 +1294,7 @@ DP_START_TEST(mgre_encap, simple_encap) m = dp_test_create_ipv4_pak("1.1.1.2", "10.0.0.1", 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(m); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_LOCAL); @@ -1305,7 +1316,7 @@ DP_START_TEST(gre_vrf_encap, simple_vrf_encap) struct dp_test_expected *exp_frag; struct dp_test_expected *exp_icmp; struct iphdr *inner_ip; - struct iphdr *exp_ip_outer[DP_TEST_MAX_EXPECTED_PAKS]; + struct iphdr *exp_ip_outer[DP_TEST_MAX_EXPECTED_PAKS] = { 0 }; int len = 32; dp_test_gre_setup_tunnel(TEST_VRF, "1.1.2.1", "1.1.2.2"); @@ -1342,12 +1353,13 @@ DP_START_TEST(gre_vrf_encap, simple_vrf_encap) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); inner_ip = iphdr(m); dp_test_set_pak_ip_field(inner_ip, DP_TEST_SET_IP_ECN, IPTOS_ECN_NOT_ECT); gre_test_build_expected_pak(&exp_no_frag, &inner_ip, exp_ip_outer, 1); + dp_test_assert_internal(exp_ip_outer[0] != NULL); dp_test_set_pak_ip_field(exp_ip_outer[0], DP_TEST_SET_IP_ECN, IPTOS_ECN_NOT_ECT); @@ -1358,7 +1370,7 @@ DP_START_TEST(gre_vrf_encap, simple_vrf_encap) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); inner_ip = iphdr(m); dp_test_set_pak_ip_field(inner_ip, DP_TEST_SET_IP_ECN, IPTOS_ECN_ECT1); @@ -1373,7 +1385,7 @@ DP_START_TEST(gre_vrf_encap, simple_vrf_encap) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); inner_ip = iphdr(m); dp_test_set_pak_ip_field(inner_ip, DP_TEST_SET_IP_ECN, IPTOS_ECN_ECT0); @@ -1388,7 +1400,7 @@ DP_START_TEST(gre_vrf_encap, simple_vrf_encap) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); inner_ip = iphdr(m); dp_test_set_pak_ip_field(inner_ip, DP_TEST_SET_IP_ECN, IPTOS_ECN_CE); @@ -1427,7 +1439,7 @@ DP_START_TEST(gre_vrf_encap, simple_vrf_encap) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); inner_ip = iphdr(m); gre_test_build_expected_pak(&exp_no_frag, &inner_ip, exp_ip_outer, 1); @@ -1439,7 +1451,7 @@ DP_START_TEST(gre_vrf_encap, simple_vrf_encap) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); inner_ip = iphdr(m); gre_test_build_expected_pak(&exp_no_frag, &inner_ip, exp_ip_outer, 1); @@ -1451,7 +1463,7 @@ DP_START_TEST(gre_vrf_encap, simple_vrf_encap) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* * Set frag 0 which is the last x bytes of the pak, * then frag 1 which is the first 'mtu' bytes @@ -1469,6 +1481,7 @@ DP_START_TEST(gre_vrf_encap, simple_vrf_encap) frag_payload[0] = iphdr(frag_payload_m[0]); frag_payload[1] = iphdr(frag_payload_m[1]); gre_test_build_expected_pak(&exp_frag, frag_payload, frag_outer, 2); + dp_test_assert_internal(exp_frag != NULL); rte_pktmbuf_free(frag_payload_m[0]); rte_pktmbuf_free(frag_payload_m[1]); @@ -1483,7 +1496,7 @@ DP_START_TEST(gre_vrf_encap, simple_vrf_encap) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); inner_ip = iphdr(m); dp_test_set_pak_ip_field(inner_ip, DP_TEST_SET_DF, 1); gre_test_build_expected_pak(&exp_no_frag, &inner_ip, @@ -1498,7 +1511,7 @@ DP_START_TEST(gre_vrf_encap, simple_vrf_encap) (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); inner_ip = iphdr(m); dp_test_set_pak_ip_field(inner_ip, DP_TEST_SET_DF, 1); gre_test_build_expected_pak(&exp_no_frag, &inner_ip, @@ -1513,7 +1526,7 @@ DP_START_TEST(gre_vrf_encap, simple_vrf_encap) (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); inner_ip = iphdr(m); dp_test_set_pak_ip_field(inner_ip, DP_TEST_SET_DF, 1); diff --git a/tests/whole_dp/src/dp_test_gre.h b/tests/whole_dp/src/dp_test_gre.h new file mode 100644 index 00000000..db1dbf00 --- /dev/null +++ b/tests/whole_dp/src/dp_test_gre.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef _DP_TEST_GRE_H_ +#define _DP_TEST_GRE_H_ + +void dp_test_gre_setup_tunnel(uint32_t vrfid, const char *tun_src, + const char *tun_dst); +void dp_test_gre_teardown_tunnel(uint32_t vrfid, const char *tun_src, + const char *tun_dst); + +void gre_test_build_expected_pak(struct dp_test_expected **expected, + struct iphdr *payload[], + struct iphdr *outer[], + int num_paks); + +void dp_test_gre6_setup_tunnel(uint32_t vrfid, const char *tun_src, + const char *tun_dst); +void dp_test_gre6_teardown_tunnel(uint32_t vrfid, const char *tun_src, + const char *tun_dst); + +void gre6_test_build_expected_pak(struct dp_test_expected **expected, + struct ip6_hdr *payload, + struct ip6_hdr *outer); +#endif diff --git a/tests/whole_dp/src/dp_test_gre6.c b/tests/whole_dp/src/dp_test_gre6.c index 264aa58e..d58df228 100644 --- a/tests/whole_dp/src/dp_test_gre6.c +++ b/tests/whole_dp/src/dp_test_gre6.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -17,19 +17,20 @@ #include "in_cksum.h" #include "if_var.h" #include "main.h" -#include "gre.h" +#include "if/gre.h" #include "iptun_common.h" #include "netinet6/ip6_funcs.h" #include "compat.h" #include "dp_test.h" #include "dp_test_controller.h" -#include "dp_test_netlink_state.h" -#include "dp_test_cmd_check.h" -#include "dp_test_lib.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test/dp_test_cmd_check.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_lib_exp.h" +#include "dp_test_gre.h" /* * Start with a simple topology, 2 interfaces both with addresses, and @@ -80,8 +81,8 @@ gre_test_create_pak(const char *outer_sip, const char *outer_dip, return m; } -static void -gre_test_build_expected_pak(struct dp_test_expected **expected, +void +gre6_test_build_expected_pak(struct dp_test_expected **expected, struct ip6_hdr *payload, struct ip6_hdr *outer) { @@ -95,7 +96,7 @@ gre_test_build_expected_pak(struct dp_test_expected **expected, dp_test_pktmbuf_eth_init(m, "aa:bb:cc:dd:ee:ff", dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); /* currently doing hlim propagation */ outer->ip6_hlim = inner->ip6_hlim; @@ -106,7 +107,7 @@ gre_test_build_expected_pak(struct dp_test_expected **expected, *expected = exp; } -static void +void dp_test_gre6_setup_tunnel(uint32_t vrfid, const char *tun_src, const char *tun_dst) { @@ -137,7 +138,7 @@ dp_test_gre6_setup_tunnel(uint32_t vrfid, const char *tun_src, dp_test_netlink_add_neigh("dp1T1", "1:1:1::2", nh_mac_str); } -static void +void dp_test_gre6_teardown_tunnel(uint32_t vrfid, const char *tun_src, const char *tun_dst) { @@ -184,10 +185,10 @@ DP_START_TEST(gre6_encap, simple_encap_6O6) 1, &len); (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); inner_ip = ip6hdr(m); - gre_test_build_expected_pak(&exp, inner_ip, - exp_ip_outer); + gre6_test_build_expected_pak(&exp, inner_ip, + exp_ip_outer); dp_test_pak_receive(m, "dp1T1", exp); dp_test_gre6_teardown_tunnel(VRF_DEFAULT_ID, "1:1:2::1", "1:1:2::2"); @@ -207,14 +208,14 @@ static struct dp_test_expected *gre6_test_build_expected_decapped_pak( (void)dp_test_pktmbuf_eth_init(exp_mbuf, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); ip6 = ip6hdr(exp_mbuf); ip6->ip6_hlim -= 2; exp = dp_test_exp_create_m(NULL, 1); exp->exp_pak[0] = exp_mbuf; dp_test_exp_set_oif_name(exp, "dp1T1"); - exp->check_start[0] = sizeof(struct ether_hdr); + exp->check_start[0] = sizeof(struct rte_ether_hdr); exp->check_len[0] = rte_pktmbuf_data_len(exp_mbuf) - exp->check_start[0]; *exp_mbuf_p = exp_mbuf; @@ -234,7 +235,7 @@ dp_test_gre6_build_encapped_pak(const struct ip6_hdr *payload_ip, (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp2T2"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); return m; } diff --git a/tests/whole_dp/src/dp_test_if_config.c b/tests/whole_dp/src/dp_test_if_config.c index db24a288..58e5e70c 100644 --- a/tests/whole_dp/src/dp_test_if_config.c +++ b/tests/whole_dp/src/dp_test_if_config.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -12,44 +12,15 @@ #include "dp_test.h" #include "dp_test_controller.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" DP_DECL_TEST_SUITE(if_cfg_suite); -DP_DECL_TEST_CASE(if_cfg_suite, if_config_switchport, NULL, NULL); - -/* - * Check that the out of order infra works. Use switchport command - * as it already uses the infra. - */ -DP_START_TEST(if_config_switchport, add_cmd) -{ - dp_test_send_config_src(dp_test_cont_src_get(), - "switchport sw1 hw-switching enable"); - - dp_test_send_config_src(dp_test_cont_src_get(), - "switchport sw1 hw-switching disable"); - - /* - * Using a loopback here as we can create one of those easily. - * It does not matter that the command can not be applied to - * a loopback, because we are testing the replay infra here. - */ - dp_test_intf_loopback_create("sw2"); - - dp_test_send_config_src(dp_test_cont_src_get(), - "switchport sw1 hw-switching disable"); - - dp_test_intf_loopback_delete("sw2"); - -} DP_END_TEST; - DP_DECL_TEST_CASE(if_cfg_suite, if_config_vtun, NULL, NULL); - /* * Test dataplane allocates an ifp and index for OpenVPN "vtun" * interfaces. @@ -65,7 +36,7 @@ DP_START_TEST(if_config_vtun, add_vtun) idx = ifnet_nametoindex("vtun0"); dp_test_fail_unless(idx != 0, "Expected non-zero ifindex for vtun0"); - vifp = ifnet_byifname("vtun0"); + vifp = dp_ifnet_byifname("vtun0"); dp_test_fail_unless(vifp != NULL, "Expected non-NULL ifp for vtun0"); dp_test_fail_unless(vifp->if_name != NULL, "Expected non-NULL ifp->if_name for vtun0"); diff --git a/tests/whole_dp/src/dp_test_intf_incomplete.c b/tests/whole_dp/src/dp_test_intf_incomplete.c index 5fe84dc1..b0f983d9 100644 --- a/tests/whole_dp/src/dp_test_intf_incomplete.c +++ b/tests/whole_dp/src/dp_test_intf_incomplete.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -18,11 +18,11 @@ #include "dp_test.h" #include "dp_test_controller.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_console.h" DP_DECL_TEST_SUITE(ip_incomplete); @@ -57,14 +57,14 @@ static void _build_and_send_pak(const char *src_addr, const char *dest_addr, dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); } else { test_pak = dp_test_create_ipv6_pak(src_addr, dest_addr, 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); } /* Create pak we expect to receive on the tx ring */ @@ -82,14 +82,14 @@ static void _build_and_send_pak(const char *src_addr, const char *dest_addr, (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh.nh_mac_str, src_mac, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); } else { (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh.nh_mac_str, src_mac, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); dp_test_ipv6_decrement_ttl(dp_test_exp_get_pak(exp)); } diff --git a/tests/whole_dp/src/dp_test_ip.c b/tests/whole_dp/src/dp_test_ip.c index 241e5f2d..0a98c1dd 100644 --- a/tests/whole_dp/src/dp_test_ip.c +++ b/tests/whole_dp/src/dp_test_ip.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -18,12 +18,12 @@ #include "dp_test.h" #include "dp_test_controller.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" DP_DECL_TEST_SUITE(ip_suite); @@ -53,7 +53,7 @@ DP_START_TEST(ip_cfg, route_add_del) * Verifying adding and deleting a scale of routes such that the LPM * grows */ -DP_START_TEST(ip_cfg, route_add_del_scale) +DP_START_TEST_FULL_RUN(ip_cfg, route_add_del_scale) { json_object *expected_json; char summary_cmd[256]; @@ -169,7 +169,7 @@ DP_START_TEST(ip_rx, this_ifs_addr) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to see in local_packet */ exp = dp_test_exp_create(test_pak); @@ -202,7 +202,7 @@ DP_START_TEST(ip_rx, nondp_intf) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to see in local_packet */ exp = dp_test_exp_create(test_pak); @@ -243,7 +243,7 @@ DP_START_TEST(ip_rx, fwd_ping_nondp_intf) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to see in local_packet */ exp = dp_test_exp_create(test_pak); @@ -275,7 +275,7 @@ DP_START_TEST(ip_rx, lo_intf) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_LOCAL); @@ -291,7 +291,7 @@ DP_START_TEST(ip_rx, lo_intf) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_LOCAL); @@ -324,7 +324,7 @@ DP_START_TEST(ip_rx, other_ifs_addr) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to see in local_packet */ exp = dp_test_exp_create(test_pak); @@ -363,7 +363,7 @@ DP_START_TEST(ip_rx, other_ifs_addr_this_no_ip) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to see in local_packet */ exp = dp_test_exp_create(test_pak); @@ -392,7 +392,7 @@ DP_START_TEST(ip_rx, subnet_bcast) (void)dp_test_pktmbuf_eth_init(test_pak, "ff:ff:ff:ff:ff:ff", DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_LOCAL); @@ -405,7 +405,7 @@ DP_START_TEST(ip_rx, subnet_bcast) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -427,6 +427,7 @@ DP_START_TEST(ip_rx, invalid_paks) const char *nh_mac_str; struct iphdr *ip; int len = 22; + int newlen; dp_test_nl_add_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); @@ -443,7 +444,7 @@ DP_START_TEST(ip_rx, invalid_paks) (void)dp_test_pktmbuf_eth_init(good_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* * Test 1 - check that the payload packet without errors is @@ -457,7 +458,7 @@ DP_START_TEST(ip_rx, invalid_paks) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_exp_set_oif_name(exp, "dp2T2"); @@ -469,8 +470,8 @@ DP_START_TEST(ip_rx, invalid_paks) */ test_pak = dp_test_cp_pak(good_pak); ip = iphdr(test_pak); - rte_pktmbuf_data_len(test_pak) = (char *)ip - - rte_pktmbuf_mtod(test_pak, char *) + 1; + newlen = (char *)ip - rte_pktmbuf_mtod(test_pak, char *) + 1; + rte_pktmbuf_trim(test_pak, test_pak->pkt_len - newlen); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -483,7 +484,7 @@ DP_START_TEST(ip_rx, invalid_paks) ip = iphdr(test_pak); ip->ihl = DP_TEST_PAK_DEFAULT_IHL - 1; ip->check = 0; - ip->check = in_cksum_hdr(ip); + ip->check = dp_in_cksum_hdr(ip); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -507,7 +508,7 @@ DP_START_TEST(ip_rx, invalid_paks) ip = iphdr(test_pak); ip->tot_len = htons(2000); ip->check = 0; - ip->check = in_cksum_hdr(ip); + ip->check = dp_in_cksum_hdr(ip); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -521,7 +522,7 @@ DP_START_TEST(ip_rx, invalid_paks) ip = iphdr(test_pak); ip->tot_len = htons(sizeof(struct iphdr) - 1); ip->check = 0; - ip->check = in_cksum_hdr(ip); + ip->check = dp_in_cksum_hdr(ip); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -535,7 +536,7 @@ DP_START_TEST(ip_rx, invalid_paks) dp_test_fail_unless(inet_pton(AF_INET, "127.0.0.1", &ip->daddr) == 1, "Couldn't parse ip address"); ip->check = 0; - ip->check = in_cksum_hdr(ip); + ip->check = dp_in_cksum_hdr(ip); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -549,7 +550,7 @@ DP_START_TEST(ip_rx, invalid_paks) dp_test_fail_unless(inet_pton(AF_INET, "127.0.0.1", &ip->daddr) == 1, "Couldn't parse ip address"); ip->check = 0; - ip->check = in_cksum_hdr(ip); + ip->check = dp_in_cksum_hdr(ip); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -562,7 +563,7 @@ DP_START_TEST(ip_rx, invalid_paks) (void)dp_test_pktmbuf_eth_init(test_pak, "ff:ff:ff:ff:ff:ff", DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -575,7 +576,7 @@ DP_START_TEST(ip_rx, invalid_paks) ip = iphdr(test_pak); ip->version = 6; ip->check = 0; - ip->check = in_cksum_hdr(ip); + ip->check = dp_in_cksum_hdr(ip); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -609,7 +610,7 @@ DP_START_TEST(ip_fwd_basic, if_fwd_basic) test_pak = dp_test_create_ipv4_pak("10.73.1.1", "10.73.2.1", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv4); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -618,7 +619,7 @@ DP_START_TEST(ip_fwd_basic, if_fwd_basic) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str, dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); @@ -656,14 +657,14 @@ DP_START_TEST(ip_fwd, cover) /* Test sending the packet via more specific prefix */ test_pak = dp_test_create_ipv4_pak(ip_src, "10.73.2.0", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv4); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_exp_set_oif_name(exp, "dp2T1"); (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str, dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_pak_receive(test_pak, "dp1T0", exp); @@ -672,14 +673,14 @@ DP_START_TEST(ip_fwd, cover) test_pak = dp_test_create_ipv4_pak("10.73.0.0", "10.0.0.2", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv4); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_exp_set_oif_name(exp, "dp3T1"); (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str, dp_test_intf_name2mac_str("dp3T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_pak_receive(test_pak, "dp1T0", exp); @@ -693,14 +694,14 @@ DP_START_TEST(ip_fwd, cover) test_pak = dp_test_create_ipv4_pak("10.73.0.0", "10.73.2.0", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv4); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_exp_set_oif_name(exp, "dp3T1"); (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str, dp_test_intf_name2mac_str("dp3T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_pak_receive(test_pak, "dp1T0", exp); @@ -714,14 +715,14 @@ DP_START_TEST(ip_fwd, cover) test_pak = dp_test_create_ipv4_pak(ip_src, "10.73.2.0", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv4); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_exp_set_oif_name(exp, "dp2T1"); (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str, dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_pak_receive(test_pak, "dp1T0", exp); @@ -758,7 +759,7 @@ dp_test_frag_setup_exp(struct rte_mbuf **test_p, const char *nh_mac_str, test_pak = dp_test_create_ipv4_pak("10.73.0.0", "10.73.2.0", num_segs, seg_lens); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv4); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV4); /* * Fragmentation happens as follows: @@ -808,7 +809,7 @@ dp_test_frag_setup_exp(struct rte_mbuf **test_p, const char *nh_mac_str, IPPROTO_UDP); (void)dp_test_pktmbuf_eth_init(m, nh_mac_str, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); ip = iphdr(m); dp_test_ipv4_decrement_ttl(m); @@ -828,8 +829,8 @@ dp_test_frag_setup_exp(struct rte_mbuf **test_p, const char *nh_mac_str, } dp_test_exp_set_pak_m(exp, i, m); - foff += rte_pktmbuf_pkt_len(m) - pktmbuf_l2_len(m) - - pktmbuf_l3_len(m); + foff += rte_pktmbuf_pkt_len(m) - dp_pktmbuf_l2_len(m) - + dp_pktmbuf_l3_len(m); } /* And the last pak out has the start of the initial packet */ @@ -838,7 +839,7 @@ dp_test_frag_setup_exp(struct rte_mbuf **test_p, const char *nh_mac_str, frag_lens[0].segs); (void)dp_test_pktmbuf_eth_init(m, nh_mac_str, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); ip = iphdr(m); dp_test_ipv4_decrement_ttl(m); if (num_frags > 1) @@ -852,7 +853,7 @@ dp_test_frag_setup_exp(struct rte_mbuf **test_p, const char *nh_mac_str, return exp; } -DP_START_TEST(ip_fwd, fragment_smoke) +DP_START_TEST_FULL_RUN(ip_fwd, fragment_smoke) { struct dp_test_expected *exp; struct rte_mbuf *test_pak; @@ -901,7 +902,7 @@ DP_START_TEST(ip_fwd, fragment_smoke) dp_test_netlink_set_interface_mtu("dp1T1", 1500); } DP_END_TEST; -DP_START_TEST(ip_fwd, fragment_boundary_values) +DP_START_TEST_FULL_RUN(ip_fwd, fragment_boundary_values) { struct dp_test_expected *exp; struct rte_mbuf *test_pak; @@ -1004,7 +1005,7 @@ DP_START_TEST(ip_fwd, fragment_boundary_values) dp_test_netlink_set_interface_mtu("dp1T1", 1500); } DP_END_TEST; -DP_START_TEST(ip_fwd, fragment) +DP_START_TEST_FULL_RUN(ip_fwd, fragment) { struct dp_test_expected *exp; struct rte_mbuf *test_pak; @@ -1162,7 +1163,7 @@ DP_START_TEST(ip_fwd, router_alert) test_pak = dp_test_create_ipv4_pak("10.73.0.0", "10.73.2.0", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv4); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV4); ip = iphdr(test_pak); cp = (uint8_t *)(ip + 1); cp[IPOPT_OPTVAL] = IPOPT_RA; @@ -1213,7 +1214,7 @@ DP_START_TEST(ip_fwd, timestamp_opt) test_pak = dp_test_create_ipv4_pak("10.73.0.0", "10.73.2.0", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv4); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV4); ip = iphdr(test_pak); cp = (uint8_t *)(ip + 1); cp[IPOPT_OPTVAL] = IPOPT_TIMESTAMP; @@ -1233,7 +1234,7 @@ DP_START_TEST(ip_fwd, timestamp_opt) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str, dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_pak_receive(test_pak, "dp1T0", exp); @@ -1246,7 +1247,7 @@ DP_START_TEST(ip_fwd, timestamp_opt) test_pak = dp_test_create_ipv4_pak("10.73.0.0", "10.73.2.0", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv4); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV4); ip = iphdr(test_pak); cp = (uint8_t *)(ip + 1); cp[IPOPT_OPTVAL] = IPOPT_TIMESTAMP; @@ -1270,7 +1271,7 @@ DP_START_TEST(ip_fwd, timestamp_opt) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str, dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_pak_receive(test_pak, "dp1T0", exp); @@ -1283,7 +1284,7 @@ DP_START_TEST(ip_fwd, timestamp_opt) test_pak = dp_test_create_ipv4_pak("10.73.0.0", "10.73.2.0", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv4); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV4); ip = iphdr(test_pak); cp = (uint8_t *)(ip + 1); cp[IPOPT_OPTVAL] = IPOPT_TIMESTAMP; @@ -1307,7 +1308,7 @@ DP_START_TEST(ip_fwd, timestamp_opt) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str, dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_pak_receive(test_pak, "dp1T0", exp); @@ -1320,7 +1321,7 @@ DP_START_TEST(ip_fwd, timestamp_opt) test_pak = dp_test_create_ipv4_pak("10.73.0.0", "10.73.2.0", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv4); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV4); ip = iphdr(test_pak); cp = (uint8_t *)(ip + 1); cp[IPOPT_OPTVAL] = IPOPT_TIMESTAMP; @@ -1342,7 +1343,7 @@ DP_START_TEST(ip_fwd, timestamp_opt) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str, dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_pak_receive(test_pak, "dp1T0", exp); @@ -1355,7 +1356,7 @@ DP_START_TEST(ip_fwd, timestamp_opt) test_pak = dp_test_create_ipv4_pak("10.73.0.0", "1.1.1.1", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv4); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV4); ip = iphdr(test_pak); cp = (uint8_t *)(ip + 1); cp[IPOPT_OPTVAL] = IPOPT_TIMESTAMP; @@ -1383,7 +1384,7 @@ DP_START_TEST(ip_fwd, timestamp_opt) test_pak = dp_test_create_ipv4_pak("10.73.0.0", "1.1.1.1", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv4); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV4); ip = iphdr(test_pak); cp = (uint8_t *)(ip + 1); cp[IPOPT_OPTVAL] = IPOPT_TIMESTAMP; @@ -1436,14 +1437,14 @@ DP_START_TEST(ip_fwd, multi_scope) */ test_pak = dp_test_create_ipv4_pak("1.1.1.2", "2.2.2.1", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv4); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_exp_set_oif_name(exp, "dp2T1"); (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str1, dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_pak_receive(test_pak, "dp1T0", exp); @@ -1456,14 +1457,14 @@ DP_START_TEST(ip_fwd, multi_scope) test_pak = dp_test_create_ipv4_pak("1.1.1.2", "2.2.2.1", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv4); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_exp_set_oif_name(exp, "dp2T1"); (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str1, dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_pak_receive(test_pak, "dp1T0", exp); @@ -1476,14 +1477,14 @@ DP_START_TEST(ip_fwd, multi_scope) test_pak = dp_test_create_ipv4_pak("1.1.1.2", "2.2.2.1", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv4); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_exp_set_oif_name(exp, "dp3T1"); (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str2, dp_test_intf_name2mac_str("dp3T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_pak_receive(test_pak, "dp1T0", exp); @@ -1526,7 +1527,7 @@ DP_START_TEST(ecmp, ecmp) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -1535,7 +1536,7 @@ DP_START_TEST(ecmp, ecmp) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str1, dp_test_intf_name2mac_str("dp3T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); @@ -1549,7 +1550,7 @@ DP_START_TEST(ecmp, ecmp) 1112, 1010, 1, &len); (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -1558,7 +1559,7 @@ DP_START_TEST(ecmp, ecmp) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str2, dp_test_intf_name2mac_str("dp4T3"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); @@ -1604,14 +1605,14 @@ DP_START_TEST(ecmp, bad_l4) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_exp_set_oif_name(exp, "dp3T2"); dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str1, dp_test_intf_name2mac_str("dp3T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_pak_receive(test_pak, "dp1T1", exp); @@ -1625,14 +1626,14 @@ DP_START_TEST(ecmp, bad_l4) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_exp_set_oif_name(exp, "dp3T2"); dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str1, dp_test_intf_name2mac_str("dp3T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_pak_receive(test_pak, "dp1T1", exp); @@ -1646,14 +1647,14 @@ DP_START_TEST(ecmp, bad_l4) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_exp_set_oif_name(exp, "dp3T2"); dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str1, dp_test_intf_name2mac_str("dp3T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_pak_receive(test_pak, "dp1T1", exp); @@ -1667,14 +1668,14 @@ DP_START_TEST(ecmp, bad_l4) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_exp_set_oif_name(exp, "dp3T2"); dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str1, dp_test_intf_name2mac_str("dp3T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_pak_receive(test_pak, "dp1T1", exp); @@ -1720,7 +1721,7 @@ DP_START_TEST(vif_ingress, vif_ingress) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T3"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -1731,7 +1732,7 @@ DP_START_TEST(vif_ingress, vif_ingress) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str, dp_test_intf_name2mac_str("dp3T3"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); @@ -1776,7 +1777,7 @@ DP_START_TEST(vif_egress, vif_egress) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -1786,7 +1787,7 @@ DP_START_TEST(vif_egress, vif_egress) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str, dp_test_intf_name2mac_str("dp3T3"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); @@ -1837,7 +1838,7 @@ DP_START_TEST(vif_ecmp2, vif_ecmp2) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* set udp ports to influence ecmp loadbalance choice */ dp_test_pktmbuf_udp_init(test_pak, 1001, 2009, true); /* route 100 */ @@ -1850,7 +1851,7 @@ DP_START_TEST(vif_ecmp2, vif_ecmp2) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str[0], dp_test_intf_name2mac_str("dp3T3"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); @@ -1864,7 +1865,7 @@ DP_START_TEST(vif_ecmp2, vif_ecmp2) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_pktmbuf_udp_init(test_pak, 1257, 1003, true); /* route 101 */ @@ -1876,7 +1877,7 @@ DP_START_TEST(vif_ecmp2, vif_ecmp2) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str[1], dp_test_intf_name2mac_str("dp3T3"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); @@ -2040,7 +2041,7 @@ DP_START_TEST(ip_primary, ip_primary) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to see in local_packet */ exp = dp_test_exp_create(test_pak); @@ -2073,7 +2074,7 @@ DP_START_TEST(ip_secondary, ip_secondary) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to see in local_packet */ exp = dp_test_exp_create(test_pak); @@ -2110,7 +2111,7 @@ DP_START_TEST(ip_tertiary, ip_tertiary) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to see in local_packet */ exp = dp_test_exp_create(test_pak); @@ -2128,7 +2129,7 @@ DP_START_TEST(ip_tertiary, ip_tertiary) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); dp_test_pak_receive(test_pak, "dp1T0", exp); @@ -2156,7 +2157,7 @@ static void _build_and_send_pak(const char *src_addr, const char *dest_addr, test_pak = dp_test_create_ipv4_pak(src_addr, dest_addr, 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T3"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); if (nh.unreach) { @@ -2167,7 +2168,7 @@ static void _build_and_send_pak(const char *src_addr, const char *dest_addr, (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh.nh_mac_str, dp_test_intf_name2mac_str(nh.nh_int), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); } _dp_test_pak_receive(test_pak, "dp1T3", exp, __FILE__, func, line); @@ -2220,7 +2221,7 @@ static void _build_and_send_pak(const char *src_addr, const char *dest_addr, * entries in the lpm due to the connecteds, so test more. */ DP_DECL_TEST_CASE(ip_suite, ip_route_scopes_and_covers, NULL, NULL); -DP_START_TEST(ip_route_scopes_and_covers, ip_route_scopes_and_covers) +DP_START_TEST_FULL_RUN(ip_route_scopes_and_covers, ip_route_scopes_and_covers) { struct nh_info nh1_0 = {.nh_mac_str = "11:11:11:11:11:0", .nh_addr = "1.1.1.1", @@ -2493,7 +2494,7 @@ DP_START_TEST(ip_default_route, ip_default_route) test_pak = dp_test_create_ipv4_pak("10.73.1.1", "10.73.2.1", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv4); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -2502,7 +2503,7 @@ DP_START_TEST(ip_default_route, ip_default_route) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str, dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); @@ -2545,4 +2546,3 @@ DP_START_TEST(ip_default_route, ip_default_route2) dp_test_intf_vif_del("dp1T1.103", 103); } DP_END_TEST; - diff --git a/tests/whole_dp/src/dp_test_ip6.c b/tests/whole_dp/src/dp_test_ip6.c index cd3a100f..7dac06ee 100644 --- a/tests/whole_dp/src/dp_test_ip6.c +++ b/tests/whole_dp/src/dp_test_ip6.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -10,10 +10,11 @@ #include "ip6_funcs.h" #include "dp_test.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" +#include "dp_test_controller.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_console.h" #include "dp_test_cmd_state.h" @@ -68,9 +69,9 @@ DP_START_TEST(ip6_cfg, route_add_del_scale) " \"tbl8s\":" " {" " \"used\":" - " 0," + " 14," " \"free\":" - " 256" + " 242" " }," " }," "}"); @@ -115,9 +116,9 @@ DP_START_TEST(ip6_cfg, route_add_del_scale) " \"tbl8s\":" " {" " \"used\":" - " 0," + " 14," " \"free\":" - " 512" + " 498" " }," " }," "}"); @@ -158,7 +159,7 @@ DP_START_TEST(ip6_rx, lo_intf) (void)dp_test_pktmbuf_eth_init( test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_LOCAL); @@ -175,7 +176,7 @@ DP_START_TEST(ip6_rx, lo_intf) (void)dp_test_pktmbuf_eth_init( test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_LOCAL); @@ -210,7 +211,7 @@ DP_START_TEST(ip6_rx, this_ifs_addr) /* Ingress dp1T0 */ (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); /* Create pak we expect to see in local_packet */ exp = dp_test_exp_create(test_pak); @@ -248,7 +249,7 @@ DP_START_TEST(ip6_rx, fwd_ping6_nondp_intf) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); /* Create pak we expect to see in local_packet */ exp = dp_test_exp_create(test_pak); @@ -273,6 +274,7 @@ DP_START_TEST(ip6_rx, invalid_paks) const char *nh_mac_str; struct ip6_hdr *ip6; int len = 22; + int newlen; dp_test_nl_add_ip_addr_and_connected("dp1T1", "2001:1:1::1/64"); dp_test_nl_add_ip_addr_and_connected("dp2T2", "2002:2:2::2/64"); @@ -287,7 +289,7 @@ DP_START_TEST(ip6_rx, invalid_paks) (void)dp_test_pktmbuf_eth_init(good_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); /* * Test 1 - check that the payload packet without errors is @@ -301,7 +303,7 @@ DP_START_TEST(ip6_rx, invalid_paks) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); dp_test_ipv6_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_exp_set_oif_name(exp, "dp2T2"); @@ -313,9 +315,8 @@ DP_START_TEST(ip6_rx, invalid_paks) */ test_pak = dp_test_cp_pak(good_pak); ip6 = ip6hdr(test_pak); - rte_pktmbuf_data_len(test_pak) = (char *)ip6 - - rte_pktmbuf_mtod(test_pak, char *) + 1; - + newlen = (char *)ip6 - rte_pktmbuf_mtod(test_pak, char *) + 1; + rte_pktmbuf_trim(test_pak, test_pak->pkt_len - newlen); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); dp_test_pak_receive(test_pak, "dp1T1", exp); @@ -447,7 +448,7 @@ DP_START_TEST(ip6_fwd, basic) test_pak = dp_test_create_ipv6_pak("2010:73::", "2010:73:2::", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -456,7 +457,7 @@ DP_START_TEST(ip6_fwd, basic) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str, dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); dp_test_ipv6_decrement_ttl(dp_test_exp_get_pak(exp)); @@ -503,7 +504,7 @@ DP_START_TEST(ip6_fwd, bad_hbh) test_pak = dp_test_create_ipv6_pak("2010:73::", "2010:73:2::", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); ip6 = ip6hdr(test_pak); hbh = (struct ip6_hbh *)(ip6 + 1); hbh->ip6h_nxt = ip6->ip6_nxt; @@ -521,7 +522,7 @@ DP_START_TEST(ip6_fwd, bad_hbh) test_pak = dp_test_create_ipv6_pak("2010:73::", "2010:73:2::", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); ip6 = ip6hdr(test_pak); hbh = (struct ip6_hbh *)(ip6 + 1); hbh->ip6h_nxt = ip6->ip6_nxt; @@ -545,7 +546,7 @@ DP_START_TEST(ip6_fwd, bad_hbh) test_pak = dp_test_create_ipv6_pak("2010:73::", "2010:73:2::", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); ip6 = ip6hdr(test_pak); hbh = (struct ip6_hbh *)(ip6 + 1); hbh->ip6h_nxt = ip6->ip6_nxt; @@ -567,7 +568,7 @@ DP_START_TEST(ip6_fwd, bad_hbh) test_pak = dp_test_create_ipv6_pak("2010:73::", "2010:73:2::", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); ip6 = ip6hdr(test_pak); hbh = (struct ip6_hbh *)(ip6 + 1); hbh->ip6h_nxt = ip6->ip6_nxt; @@ -591,7 +592,7 @@ DP_START_TEST(ip6_fwd, bad_hbh) test_pak = dp_test_create_ipv6_pak("2010:73::", "2010:73:2::", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); ip6 = ip6hdr(test_pak); hbh = (struct ip6_hbh *)(ip6 + 1); hbh->ip6h_nxt = ip6->ip6_nxt; @@ -613,7 +614,7 @@ DP_START_TEST(ip6_fwd, bad_hbh) test_pak = dp_test_create_ipv6_pak("2010:73::", "2010:73:2::", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); ip6 = ip6hdr(test_pak); hbh = (struct ip6_hbh *)(ip6 + 1); hbh->ip6h_nxt = ip6->ip6_nxt; @@ -636,7 +637,7 @@ DP_START_TEST(ip6_fwd, bad_hbh) "fe80::5054:ff:fe79:3f5", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - nh_mac_str2, ETHER_TYPE_IPv6); + nh_mac_str2, RTE_ETHER_TYPE_IPV6); ip6 = ip6hdr(test_pak); hbh = (struct ip6_hbh *)(ip6 + 1); hbh->ip6h_nxt = ip6->ip6_nxt; @@ -658,7 +659,7 @@ DP_START_TEST(ip6_fwd, bad_hbh) (void)dp_test_pktmbuf_eth_init(icmp_pak, nh_mac_str2, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(icmp_pak); rte_pktmbuf_free(icmp_pak); @@ -676,7 +677,7 @@ DP_START_TEST(ip6_fwd, bad_hbh) "ff02::1", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - nh_mac_str2, ETHER_TYPE_IPv6); + nh_mac_str2, RTE_ETHER_TYPE_IPV6); ip6 = ip6hdr(test_pak); hbh = (struct ip6_hbh *)(ip6 + 1); hbh->ip6h_nxt = ip6->ip6_nxt; @@ -698,7 +699,7 @@ DP_START_TEST(ip6_fwd, bad_hbh) (void)dp_test_pktmbuf_eth_init(icmp_pak, nh_mac_str2, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(icmp_pak); rte_pktmbuf_free(icmp_pak); @@ -742,7 +743,7 @@ DP_START_TEST(ip6_fwd, router_alert) test_pak = dp_test_create_ipv6_pak("2010:73::", "2010:73:2::", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); ip6 = ip6hdr(test_pak); hbh = (struct ip6_hbh *)(ip6 + 1); hbh->ip6h_nxt = ip6->ip6_nxt; @@ -772,7 +773,7 @@ DP_START_TEST(ip6_fwd, router_alert) test_pak = dp_test_create_ipv6_pak("2010:73::", "2010:73:2::", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); ip6 = ip6hdr(test_pak); ip6->ip6_nxt = IPPROTO_HOPOPTS; hbh = (struct ip6_hbh *)(ip6 + 1); @@ -832,14 +833,14 @@ DP_START_TEST(ip6_fwd, multi_scope) test_pak = dp_test_create_ipv6_pak("2001:1:1::2", "2002:2:2::1", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv6); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(test_pak); dp_test_exp_set_oif_name(exp, "dp2T1"); (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str1, dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); dp_test_ipv6_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_pak_receive(test_pak, "dp1T0", exp); @@ -854,14 +855,14 @@ DP_START_TEST(ip6_fwd, multi_scope) test_pak = dp_test_create_ipv6_pak("2001:1:1::2", "2002:2:2::1", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv6); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(test_pak); dp_test_exp_set_oif_name(exp, "dp2T1"); (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str1, dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); dp_test_ipv6_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_pak_receive(test_pak, "dp1T0", exp); @@ -875,14 +876,14 @@ DP_START_TEST(ip6_fwd, multi_scope) test_pak = dp_test_create_ipv6_pak("2001:1:1::2", "2002:2:2::1", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv6); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(test_pak); dp_test_exp_set_oif_name(exp, "dp3T1"); (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str2, dp_test_intf_name2mac_str("dp3T1"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); dp_test_ipv6_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_pak_receive(test_pak, "dp1T0", exp); @@ -928,7 +929,7 @@ DP_START_TEST(ecmp6, ecmp) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -937,7 +938,7 @@ DP_START_TEST(ecmp6, ecmp) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str2, dp_test_intf_name2mac_str("dp4T3"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); dp_test_ipv6_decrement_ttl(dp_test_exp_get_pak(exp)); @@ -951,7 +952,7 @@ DP_START_TEST(ecmp6, ecmp) 1111, 1005, 1, &len); (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -960,7 +961,7 @@ DP_START_TEST(ecmp6, ecmp) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str1, dp_test_intf_name2mac_str("dp3T2"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); dp_test_ipv6_decrement_ttl(dp_test_exp_get_pak(exp)); @@ -1059,14 +1060,14 @@ DP_START_TEST(ecmp6, bad_l4) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(test_pak); dp_test_exp_set_oif_name(exp, "dp4T3"); dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str2, dp_test_intf_name2mac_str("dp4T3"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); dp_test_ipv6_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_pak_receive(test_pak, "dp1T1", exp); @@ -1080,14 +1081,14 @@ DP_START_TEST(ecmp6, bad_l4) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(test_pak); dp_test_exp_set_oif_name(exp, "dp4T3"); dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str2, dp_test_intf_name2mac_str("dp4T3"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); dp_test_ipv6_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_pak_receive(test_pak, "dp1T1", exp); @@ -1101,14 +1102,14 @@ DP_START_TEST(ecmp6, bad_l4) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(test_pak); dp_test_exp_set_oif_name(exp, "dp4T3"); dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str2, dp_test_intf_name2mac_str("dp4T3"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); dp_test_ipv6_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_pak_receive(test_pak, "dp1T1", exp); @@ -1122,14 +1123,14 @@ DP_START_TEST(ecmp6, bad_l4) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(test_pak); dp_test_exp_set_oif_name(exp, "dp4T3"); dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str2, dp_test_intf_name2mac_str("dp4T3"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); dp_test_ipv6_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_pak_receive(test_pak, "dp1T1", exp); @@ -1163,7 +1164,7 @@ DP_START_TEST(ip6_primary, ip6_primary) /* Ingress dp1T0 */ (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); /* Create pak we expect to see in local_packet */ exp = dp_test_exp_create(test_pak); @@ -1195,7 +1196,7 @@ DP_START_TEST(ip6_secondary, ip6_secondary) /* Ingress dp1T0 */ (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); /* Create pak we expect to see in local_packet */ exp = dp_test_exp_create(test_pak); @@ -1231,7 +1232,7 @@ DP_START_TEST(ip6_tertiary, ip6_tertiary) /* Ingress dp1T0 */ (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); /* Create pak we expect to see in local_packet */ exp = dp_test_exp_create(test_pak); @@ -1248,7 +1249,7 @@ DP_START_TEST(ip6_tertiary, ip6_tertiary) 1, &len); (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -1358,19 +1359,19 @@ struct nh_info { test_pak = dp_test_create_ipv6_pak(src_addr, dest_addr, \ 1, &len); \ dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), \ - SRC_MAC_STR, ETHER_TYPE_IPv6); \ + SRC_MAC_STR, RTE_ETHER_TYPE_IPV6); \ /* Create pak we expect to receive on the tx ring */ \ exp = dp_test_exp_create(test_pak); \ - if (nh.drop) { \ + if ((nh).drop) { \ /* Is dropped, but we send an icmp unreachable */ \ dp_test_exp_set_oif_name(exp, "dp1T0"); \ dp_test_exp_set_check_len(exp, 0); \ } else { \ - dp_test_exp_set_oif_name(exp, nh.nh_int); \ + dp_test_exp_set_oif_name(exp, (nh).nh_int); \ (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), \ - nh.nh_mac_str, \ - dp_test_intf_name2mac_str(nh.nh_int), \ - ETHER_TYPE_IPv6); \ + (nh).nh_mac_str, \ + dp_test_intf_name2mac_str((nh).nh_int), \ + RTE_ETHER_TYPE_IPV6); \ dp_test_ipv6_decrement_ttl(dp_test_exp_get_pak(exp)); \ } \ dp_test_pak_receive(test_pak, "dp1T0", exp); \ @@ -1737,7 +1738,7 @@ DP_START_TEST_DONT_RUN(ip6_fwd, ll_dst) "fe80::5054:ff:fe79:3f5", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_LOCAL); @@ -1752,7 +1753,7 @@ DP_START_TEST_DONT_RUN(ip6_fwd, ll_dst) "fe80::5054:ff:fe79:3f5", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(test_pak); /* the kernel will deal with verifying the scope */ @@ -1769,7 +1770,7 @@ DP_START_TEST_DONT_RUN(ip6_fwd, ll_dst) "fe80::7054:ff:fe79:3f5", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create_m(test_pak, 2); dp_test_exp_set_oif_name(exp, "dp1T0"); @@ -1787,7 +1788,7 @@ DP_START_TEST_DONT_RUN(ip6_fwd, ll_dst) dp_test_pktmbuf_eth_init(icmp_pak, nh_mac_str1, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); nd_rd = (struct nd_redirect *)icmp6; nd_rd->nd_rd_type = ND_REDIRECT; nd_rd->nd_rd_code = 0; @@ -1811,7 +1812,7 @@ DP_START_TEST_DONT_RUN(ip6_fwd, ll_dst) dp_test_exp_set_pak_m(exp, 0, icmp_pak); - optlen = (sizeof(struct nd_opt_hdr) + ETHER_ADDR_LEN + 7) & ~7; + optlen = (sizeof(struct nd_opt_hdr) + RTE_ETHER_ADDR_LEN + 7) & ~7; icmplen = sizeof(struct nd_neighbor_solicit) - sizeof(struct icmp6_hdr) + optlen; icmp_pak = dp_test_create_icmp_ipv6_pak("fe80::5054:ff:fe79:3f5", @@ -1825,7 +1826,7 @@ DP_START_TEST_DONT_RUN(ip6_fwd, ll_dst) dp_test_pktmbuf_eth_init(icmp_pak, "33:33:ff:79:03:f5", dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); ip6->ip6_hlim = 255; @@ -1836,8 +1837,9 @@ DP_START_TEST_DONT_RUN(ip6_fwd, ll_dst) memset((void *)nd_opt, 0, optlen); nd_opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; nd_opt->nd_opt_len = optlen >> 3; - ether_addr_copy(&rte_pktmbuf_mtod(test_pak, struct ether_hdr *)->d_addr, - (struct ether_addr *)(nd_opt + 1)); + rte_ether_addr_copy(&rte_pktmbuf_mtod(test_pak, + struct rte_ether_hdr *)->d_addr, + (struct rte_ether_addr *)(nd_opt + 1)); icmp6->icmp6_cksum = 0; icmp6->icmp6_cksum = @@ -1997,3 +1999,25 @@ DP_START_TEST(ip6_default_route, ip6_default_route2) } DP_END_TEST; +DP_DECL_TEST_CASE(ip6_suite, ip6_pic_edge, NULL, NULL); +DP_START_TEST(ip6_pic_edge, ip6_pic_edge) +{ + dp_test_nl_add_ip_addr_and_connected("dp1T1", "2001:1:1::1/64"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2002:2:2::2/64"); + + dp_test_netlink_add_route( + "2010:0:1::/64 nh 2001:1:1::2 int:dp1T1 nh 2002:2:2::1 int:dp2T1 backup"); + dp_test_netlink_del_route( + "2010:0:1::/64 nh 2001:1:1::2 int:dp1T1 nh 2002:2:2::1 int:dp2T1 backup"); + + /* This is a full service dataplane - we support both orders! */ + dp_test_netlink_add_route( + "2010:0:1::/64 nh 2001:1:1::2 int:dp1T1 backup nh 2002:2:2::1 int:dp2T1"); + dp_test_netlink_del_route( + "2010:0:1::/64 nh 2001:1:1::2 int:dp1T1 backup nh 2002:2:2::1 int:dp2T1"); + + /* Clean Up */ + dp_test_nl_del_ip_addr_and_connected("dp1T1", "2001:1:1::1/64"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2002:2:2::2/64"); + +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_ip6_icmp.c b/tests/whole_dp/src/dp_test_ip6_icmp.c index 2f87fcc3..2883c484 100644 --- a/tests/whole_dp/src/dp_test_ip6_icmp.c +++ b/tests/whole_dp/src/dp_test_ip6_icmp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -11,10 +11,10 @@ #include "ip6_funcs.h" #include "dp_test.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" DP_DECL_TEST_SUITE(ip6_icmp_suite); @@ -51,7 +51,7 @@ DP_START_TEST(ip6_icmp, too_big) test_pak = dp_test_create_ipv6_pak("2001:1:1::2", "2010:73:2::2", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - neigh1_mac_str, ETHER_TYPE_IPv6); + neigh1_mac_str, RTE_ETHER_TYPE_IPV6); /* @@ -68,7 +68,7 @@ DP_START_TEST(ip6_icmp, too_big) (void)dp_test_pktmbuf_eth_init(icmp_pak, neigh1_mac_str, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); /* Forwarding code will have already decremented hop limit */ in6_inner = (struct ip6_hdr *)(icmp6 + 1); @@ -122,7 +122,7 @@ DP_START_TEST(ip6_icmp, ttl) test_pak = dp_test_create_ipv6_pak("2001:1:1::2", "2010:73:2::2", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - neigh1_mac_str, ETHER_TYPE_IPv6); + neigh1_mac_str, RTE_ETHER_TYPE_IPV6); ip6hdr(test_pak)->ip6_hlim = 1; /* @@ -139,7 +139,7 @@ DP_START_TEST(ip6_icmp, ttl) (void)dp_test_pktmbuf_eth_init(icmp_pak, neigh1_mac_str, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(icmp_pak); rte_pktmbuf_free(icmp_pak); diff --git a/tests/whole_dp/src/dp_test_ip6_neigh.c b/tests/whole_dp/src/dp_test_ip6_neigh.c index cc9353b3..3fbead47 100644 --- a/tests/whole_dp/src/dp_test_ip6_neigh.c +++ b/tests/whole_dp/src/dp_test_ip6_neigh.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -8,6 +8,7 @@ #include #include "ip_funcs.h" +#include "in6.h" #include "in_cksum.h" #include "if_var.h" #include "main.h" @@ -15,11 +16,11 @@ #include "dp_test.h" #include "dp_test_cmd_state.h" #include "dp_test_controller.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" struct nh_info { const char *nh_int; @@ -44,7 +45,7 @@ static void _build_and_send_pak(const char *src_addr, const char *dest_addr, dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); if (nh.drop || nh.resolve) { exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -55,7 +56,7 @@ static void _build_and_send_pak(const char *src_addr, const char *dest_addr, nh.nh_mac_str, dp_test_intf_name2mac_str( nh.nh_int), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); dp_test_ipv6_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_exp_set_oif_name(exp, nh.nh_int); } @@ -1213,3 +1214,74 @@ DP_START_TEST(ip_neigh_nh_share, dp_test_nl_del_ip_addr_and_connected("dp1T2", "2004:4:4::4/64"); } DP_END_TEST; + +DP_DECL_TEST_CASE(ip_neigh_suite, ip_neigh_nh_scale, NULL, NULL); +/* + * Not run by default due to the time taken to set up and remove all + * the neigh entries. This test is usefuls for testing efficiency of + * the route processing code, with minor additions into the code + * to print out time taken processing routes. + */ +DP_START_TEST_DONT_RUN(ip_neigh_nh_scale, + ip_neigh_nh_scale) +{ + char nh_mac_str[18]; + struct rte_ether_addr start_eth_addr = { + { 0xf0, 0x0, 0x0, 0x0, 0x0, 0x0 } + }; + struct rte_ether_addr rte_ether_addr; + struct in6_addr ip_addr, start_ip_addr, tmp_ip_addr; + char ip_addr_str[INET6_ADDRSTRLEN] = "2002:2:2::3"; + int i; + int num_neighs = 3000; + uint32_t *ether; + uint32_t tmp_s6_addr32; + + /* Set up the interface addresses */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_nl_add_ip_addr_and_connected("dp1T1", "2002:2:2::2/64"); + + /* Initialise IP and MAC addresses */ + if (!inet_pton(AF_INET6, ip_addr_str, &start_ip_addr)) + assert(0); + ip_addr = start_ip_addr; + rte_ether_addr = start_eth_addr; + ether = (uint32_t *)&rte_ether_addr; + + /* Add neighbours */ + for (i = 0; i < num_neighs; i++) { + tmp_ip_addr = ip_addr; + if (!inet_ntop(AF_INET6, &tmp_ip_addr, ip_addr_str, + INET6_ADDRSTRLEN)) + assert(0); + if (!ether_ntoa_r(&rte_ether_addr, nh_mac_str)) + assert(0); + dp_test_netlink_add_neigh("dp1T1", ip_addr_str, nh_mac_str); + tmp_s6_addr32 = ntohl(ip_addr.s6_addr32[3]); + ip_addr.s6_addr32[3] = htonl(++tmp_s6_addr32); + (*ether)++; + } + + dp_test_netlink_add_route("3003:3:3::3/128 nh 2002:2:2::3 int:dp1T1"); + dp_test_netlink_del_route("3003:3:3::3/128 nh 2002:2:2::3 int:dp1T1"); + + /* Delete neighbours */ + ip_addr = start_ip_addr; + rte_ether_addr = start_eth_addr; + for (i = 0; i < num_neighs; i++) { + tmp_ip_addr = ip_addr; + if (!inet_ntop(AF_INET6, &tmp_ip_addr, ip_addr_str, + INET6_ADDRSTRLEN)) + assert(0); + if (!ether_ntoa_r(&rte_ether_addr, nh_mac_str)) + assert(0); + dp_test_netlink_del_neigh("dp1T1", ip_addr_str, nh_mac_str); + tmp_s6_addr32 = ntohl(ip_addr.s6_addr32[3]); + ip_addr.s6_addr32[3] = htonl(++tmp_s6_addr32); + (*ether)++; + } + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_nl_del_ip_addr_and_connected("dp1T1", "2002:2:2::2/64"); + +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_ip_arp.c b/tests/whole_dp/src/dp_test_ip_arp.c index 70475680..15e6f0ce 100644 --- a/tests/whole_dp/src/dp_test_ip_arp.c +++ b/tests/whole_dp/src/dp_test_ip_arp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -16,11 +16,11 @@ #include "dp_test.h" #include "dp_test_cmd_state.h" #include "dp_test_controller.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_console.h" struct nh_info { @@ -47,7 +47,7 @@ static void _build_and_send_pak(const char *src_addr, const char *dest_addr, dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); if (nh.arp) { char intf_addr[INET_ADDRSTRLEN]; @@ -76,7 +76,7 @@ static void _build_and_send_pak(const char *src_addr, const char *dest_addr, nh.nh_mac_str, dp_test_intf_name2mac_str( nh.nh_int), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_exp_set_oif_name(exp, nh.nh_int); } @@ -1371,8 +1371,9 @@ DP_START_TEST_DONT_RUN(ip_arp_nh_scale, ip_arp_nh_scale) { char nh_mac_str[18]; - struct ether_addr start_eth_addr = { { 0xf0, 0x0, 0x0, 0x0, 0x0, 0x0 } }; - struct ether_addr ether_addr; + struct rte_ether_addr start_eth_addr = { + { 0xf0, 0x0, 0x0, 0x0, 0x0, 0x0 } }; + struct rte_ether_addr rte_ether_addr; int start_ip_addr = 0x02020301; char ip_addr_str[INET_ADDRSTRLEN]; int ip_addr; @@ -1386,14 +1387,14 @@ DP_START_TEST_DONT_RUN(ip_arp_nh_scale, /* add neighbours */ ip_addr = start_ip_addr; - ether_addr = start_eth_addr; - ether = (uint32_t *)ðer_addr; + rte_ether_addr = start_eth_addr; + ether = (uint32_t *)&rte_ether_addr; for (i = 0; i < num_neighs; i++) { int tmp_ip = htonl(ip_addr); if (!inet_ntop(AF_INET, &tmp_ip, ip_addr_str, INET_ADDRSTRLEN)) assert(0); - if (!ether_ntoa_r(ðer_addr, nh_mac_str)) + if (!ether_ntoa_r(&rte_ether_addr, nh_mac_str)) assert(0); dp_test_netlink_add_neigh("dp1T1", ip_addr_str, nh_mac_str); ip_addr++; @@ -1405,14 +1406,14 @@ DP_START_TEST_DONT_RUN(ip_arp_nh_scale, /* del neighbours */ ip_addr = start_ip_addr; - ether_addr = start_eth_addr; + rte_ether_addr = start_eth_addr; for (i = 0; i < num_neighs; i++) { int tmp_ip = htonl(ip_addr); if (!inet_ntop(AF_INET, &tmp_ip, ip_addr_str, INET_ADDRSTRLEN)) assert(0); - if (!ether_ntoa_r(ðer_addr, nh_mac_str)) + if (!ether_ntoa_r(&rte_ether_addr, nh_mac_str)) assert(0); dp_test_netlink_del_neigh("dp1T1", ip_addr_str, nh_mac_str); diff --git a/tests/whole_dp/src/dp_test_ip_icmp.c b/tests/whole_dp/src/dp_test_ip_icmp.c index a56d5c4e..dfdb7a92 100644 --- a/tests/whole_dp/src/dp_test_ip_icmp.c +++ b/tests/whole_dp/src/dp_test_ip_icmp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -10,10 +10,10 @@ #include "ip_funcs.h" #include "dp_test.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_controller.h" DP_DECL_TEST_SUITE(ip_icmp_suite); @@ -56,7 +56,7 @@ DP_START_TEST(ip_icmp, df) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp3T3"), - neigh3_mac_str, ETHER_TYPE_IPv4); + neigh3_mac_str, RTE_ETHER_TYPE_IPV4); /* * Expected packet @@ -73,7 +73,7 @@ DP_START_TEST(ip_icmp, df) (void)dp_test_pktmbuf_eth_init(icmp_pak, neigh3_mac_str, dp_test_intf_name2mac_str("dp3T3"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_set_pak_ip_field(ip, DP_TEST_SET_TOS, IPTOS_PREC_INTERNETCONTROL); @@ -146,7 +146,7 @@ DP_START_TEST(ip_icmp, df2) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp2T2"), - neigh2_mac_str, ETHER_TYPE_IPv4); + neigh2_mac_str, RTE_ETHER_TYPE_IPV4); /* * Expected packet @@ -163,7 +163,7 @@ DP_START_TEST(ip_icmp, df2) (void)dp_test_pktmbuf_eth_init(icmp_pak, neigh2_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_set_pak_ip_field(ip, DP_TEST_SET_TOS, IPTOS_PREC_INTERNETCONTROL); @@ -244,7 +244,7 @@ DP_START_TEST(ip_icmp, df3) (void)dp_test_pktmbuf_eth_init(test_pak, macvlan_mac, - neigh1_mac_str, ETHER_TYPE_IPv4); + neigh1_mac_str, RTE_ETHER_TYPE_IPV4); /* * Expected packet @@ -261,7 +261,7 @@ DP_START_TEST(ip_icmp, df3) (void)dp_test_pktmbuf_eth_init(icmp_pak, neigh1_mac_str, macvlan_mac, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_set_pak_ip_field(ip, DP_TEST_SET_TOS, IPTOS_PREC_INTERNETCONTROL); @@ -332,7 +332,7 @@ DP_START_TEST(ip_icmp, ttl) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* * Expected packet @@ -348,7 +348,7 @@ DP_START_TEST(ip_icmp, ttl) (void)dp_test_pktmbuf_eth_init(icmp_pak, nh_mac_str, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_set_pak_ip_field(ip, DP_TEST_SET_TOS, IPTOS_PREC_INTERNETCONTROL); @@ -377,7 +377,7 @@ DP_START_TEST(ip_icmp, ttl) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_LOCAL); @@ -419,7 +419,7 @@ DP_START_TEST(ip_icmp, noroute) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* * Expected packet @@ -435,7 +435,7 @@ DP_START_TEST(ip_icmp, noroute) (void)dp_test_pktmbuf_eth_init(icmp_pak, nh_mac_str, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_set_pak_ip_field(ip, DP_TEST_SET_TOS, IPTOS_PREC_INTERNETCONTROL); @@ -490,7 +490,7 @@ DP_START_TEST(ip_icmp, redirect) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); dp_test_addr_str_to_addr("1.1.1.3", &gw); payload = dp_test_cp_pak(test_pak); @@ -498,7 +498,7 @@ DP_START_TEST(ip_icmp, redirect) (void)dp_test_pktmbuf_eth_init(payload, nh_mac_str3, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* * Expected packet @@ -514,7 +514,7 @@ DP_START_TEST(ip_icmp, redirect) (void)dp_test_pktmbuf_eth_init(icmp_pak, nh_mac_str2, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_set_pak_ip_field(ip, DP_TEST_SET_TOS, IPTOS_PREC_INTERNETCONTROL); @@ -586,7 +586,7 @@ DP_START_TEST(ip_icmp, noredirect) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* * Expected packet @@ -598,7 +598,7 @@ DP_START_TEST(ip_icmp, noredirect) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str3, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_pak_receive(test_pak, "dp1T1", exp); @@ -643,7 +643,7 @@ DP_START_TEST(ip_icmp, rfc_redirect) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* * Expected packet @@ -655,7 +655,7 @@ DP_START_TEST(ip_icmp, rfc_redirect) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str3, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_pak_receive(test_pak, "dp1T1", exp); diff --git a/tests/whole_dp/src/dp_test_ip_multicast.c b/tests/whole_dp/src/dp_test_ip_multicast.c index bad97684..aa1ced40 100644 --- a/tests/whole_dp/src/dp_test_ip_multicast.c +++ b/tests/whole_dp/src/dp_test_ip_multicast.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -11,9 +11,11 @@ #include "in_cksum.h" #include "dp_test_lib_exp.h" -#include "dp_test_macros.h" -#include "dp_test_netlink_state.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test/dp_test_macros.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_console.h" +#include "dp_test_cmd_check.h" DP_DECL_TEST_SUITE(ip_msuite); @@ -33,7 +35,7 @@ DP_START_TEST(ip_mfwd_1, local) test_pak = dp_test_create_ipv4_pak("10.73.1.1", multi_dest, 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv4); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -62,7 +64,7 @@ DP_START_TEST(ip_mfwd_2, non_local) test_pak = dp_test_create_ipv4_pak("10.73.1.1", multi_dest, 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv4); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -94,7 +96,7 @@ DP_START_TEST(ip_mfwd_3, limited_broadcast) test_pak = dp_test_create_ipv4_pak("10.73.1.1", multi_dest, 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv4); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -106,3 +108,165 @@ DP_START_TEST(ip_mfwd_3, limited_broadcast) dp_test_nl_del_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); dp_test_nl_del_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); } DP_END_TEST; + +/* + * Multicast forwarding + * + * Enables multicast forwarding on two interfaces, added a multicast route, + * injects one pkt to 224.0.0.1 and expects it to be forwarded on both output + * interfaces. + */ +DP_DECL_TEST_CASE(ip_msuite, ip_mfwd_4, NULL, NULL); +DP_START_TEST(ip_mfwd_4, dp_forwarding) +{ + const char *grp_dest = "224.0.1.1"; /* Not link local */ + const char *grp_mac = "01:00:5e:00:01:01"; + struct dp_test_expected *exp; + struct rte_mbuf *test_pak; + int len = 22; + + /* Set up the interface addresses */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_netlink_netconf_mcast("dp1T0", AF_INET, true); + + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + dp_test_netlink_netconf_mcast("dp2T1", AF_INET, true); + + dp_test_nl_add_ip_addr_and_connected("dp2T2", "3.3.3.3/24"); + dp_test_netlink_netconf_mcast("dp2T2", AF_INET, true); + + /* Add multicast route */ + dp_test_mroute_nl(RTM_NEWROUTE, "10.73.1.1", "dp1T0", + "224.0.1.1/32 nh int:dp2T1 nh int:dp2T2"); + + dp_test_wait_for_mroute("10.73.1.1", "224.0.1.1", + "dpT10", "dpT21 dpT22", false); + + /* Create multicast pak */ + test_pak = dp_test_create_ipv4_pak("10.73.1.1", grp_dest, 1, &len); + dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV4); + + /* Create pak we expect to receive on the tx ring */ + exp = dp_test_exp_create_m(test_pak, 2); + dp_test_exp_set_oif_name_m(exp, 0, "dp2T1"); + dp_test_exp_set_oif_name_m(exp, 1, "dp2T2"); + + (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak_m(exp, 0), + grp_mac, + dp_test_intf_name2mac_str("dp2T1"), + RTE_ETHER_TYPE_IPV4); + dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak_m(exp, 0)); + + (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak_m(exp, 1), + grp_mac, + dp_test_intf_name2mac_str("dp2T2"), + RTE_ETHER_TYPE_IPV4); + dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak_m(exp, 1)); + + dp_test_pak_receive(test_pak, "dp1T0", exp); + + /* Clean Up */ + dp_test_mroute_nl(RTM_DELROUTE, "10.73.1.1", "dp1T0", + "224.0.1.1/32 nh int:dp2T1 nh int:dp2T2"); + + dp_test_wait_for_mroute("10.73.1.1", "224.0.1.1", + "dpT10", "dpT21 dpT22", true); + + dp_test_netlink_netconf_mcast("dp1T0", AF_INET, false); + dp_test_nl_del_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + + dp_test_netlink_netconf_mcast("dp2T1", AF_INET, false); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + + dp_test_netlink_netconf_mcast("dp2T2", AF_INET, false); + dp_test_nl_del_ip_addr_and_connected("dp2T2", "3.3.3.3/24"); + +} DP_END_TEST; + +/* + * IPv6 multicast forwarding + */ +DP_DECL_TEST_CASE(ip_msuite, ip_mfwd_5, NULL, NULL); +DP_START_TEST(ip_mfwd_5, dp_forwarding) +{ + const char *grp_dest = "ff0e::1:1"; + const char *grp_mac = "33:33:00:01:00:01"; + struct dp_test_expected *exp; + struct rte_mbuf *test_pak; + int len = 22; + + dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2002:2:2::1/64"); + dp_test_nl_add_ip_addr_and_connected("dp2T2", "2003:3:3::1/64"); + + dp_test_netlink_add_neigh("dp1T0", "2001:1:1::2", + "aa:bb:cc:dd:1:a1"); + dp_test_netlink_add_neigh("dp2T1", "2002:2:2::2", + "aa:bb:cc:dd:2:b2"); + dp_test_netlink_add_neigh("dp2T2", "2003:3:3::2", + "aa:bb:cc:dd:3:c3"); + + dp_test_netlink_netconf_mcast("dp1T0", AF_INET6, true); + dp_test_netlink_netconf_mcast("dp2T1", AF_INET6, true); + dp_test_netlink_netconf_mcast("dp2T2", AF_INET6, true); + + /* Add multicast route */ + dp_test_mroute_nl(RTM_NEWROUTE, "2001:1:1::2", "dp1T0", + "ff0e::1:1/128 nh int:dp2T1 nh int:dp2T2"); + + dp_test_wait_for_mroute("2001:1:1::2", "ff0e::1:1", + "dpT10", "dpT21 dpT22", false); + + /* Create multicast pak */ + test_pak = dp_test_create_ipv6_pak("2001:1:1::2", + grp_dest, + 1, &len); + + dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV6); + + /* Create pak we expect to receive on the tx ring */ + exp = dp_test_exp_create_m(test_pak, 2); + + dp_test_exp_set_oif_name_m(exp, 0, "dp2T1"); + dp_test_exp_set_oif_name_m(exp, 1, "dp2T2"); + + (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak_m(exp, 0), + grp_mac, + dp_test_intf_name2mac_str("dp2T1"), + RTE_ETHER_TYPE_IPV6); + dp_test_ipv6_decrement_ttl(dp_test_exp_get_pak_m(exp, 0)); + + (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak_m(exp, 1), + grp_mac, + dp_test_intf_name2mac_str("dp2T2"), + RTE_ETHER_TYPE_IPV6); + dp_test_ipv6_decrement_ttl(dp_test_exp_get_pak_m(exp, 1)); + + dp_test_pak_receive(test_pak, "dp1T0", exp); + + + /* Cleanup */ + dp_test_mroute_nl(RTM_DELROUTE, "2001:1:1::2", "dp1T0", + "ff0e::1:1/128 nh int:dp2T1 nh int:dp2T2"); + + dp_test_wait_for_mroute("2001:1:1::2", "ff0e::1:1", + "dpT10", "dpT21 dpT22", true); + + dp_test_netlink_netconf_mcast("dp1T0", AF_INET6, false); + dp_test_netlink_netconf_mcast("dp2T1", AF_INET6, false); + dp_test_netlink_netconf_mcast("dp2T2", AF_INET6, false); + + dp_test_netlink_del_neigh("dp1T0", "2001:1:1::2", + "aa:bb:cc:dd:1:a1"); + dp_test_netlink_del_neigh("dp2T1", "2002:2:2::2", + "aa:bb:cc:dd:2:b2"); + dp_test_netlink_del_neigh("dp2T2", "2003:3:3::2", + "aa:bb:cc:dd:3:c3"); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2002:2:2::1/64"); + dp_test_nl_del_ip_addr_and_connected("dp2T2", "2003:3:3::1/64"); + +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_ip_n.c b/tests/whole_dp/src/dp_test_ip_n.c index 2002342e..b5db8c05 100644 --- a/tests/whole_dp/src/dp_test_ip_n.c +++ b/tests/whole_dp/src/dp_test_ip_n.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -11,9 +12,9 @@ * single test. */ #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" -#include "dp_test_macros.h" -#include "dp_test_netlink_state.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test/dp_test_macros.h" +#include "dp_test_netlink_state_internal.h" DP_DECL_TEST_SUITE(ip_suite_n); @@ -41,10 +42,10 @@ DP_START_TEST(ip_fwd_2, if_fwd_2) 1, &len); dp_test_pktmbuf_eth_init(rx_pak_n[0], dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv4); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV4); dp_test_pktmbuf_eth_init(rx_pak_n[1], dp_test_intf_name2mac_str("dp1T0"), - DP_TEST_INTF_DEF_SRC_MAC, ETHER_TYPE_IPv4); + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create_m(rx_pak_n[0], 1); @@ -55,11 +56,11 @@ DP_START_TEST(ip_fwd_2, if_fwd_2) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak_m(exp, 0), nh_mac_str, dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak_m(exp, 1), nh_mac_str, dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak_m(exp, 0)); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak_m(exp, 1)); @@ -97,7 +98,7 @@ DP_START_TEST(ip_fwd_n, if_fwd_n) dp_test_pktmbuf_eth_init(rx_pak_n[i], dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create paks we expect to receive on the tx ring */ if (i == 0) @@ -108,7 +109,7 @@ DP_START_TEST(ip_fwd_n, if_fwd_n) dp_test_pktmbuf_eth_init(dp_test_exp_get_pak_m(exp, i), nh_mac_str, dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak_m(exp, i)); dp_test_exp_set_oif_name_m(exp, i, "dp2T1"); } diff --git a/tests/whole_dp/src/dp_test_ip_pic_edge.c b/tests/whole_dp/src/dp_test_ip_pic_edge.c new file mode 100644 index 00000000..1bb885b2 --- /dev/null +++ b/tests/whole_dp/src/dp_test_ip_pic_edge.c @@ -0,0 +1,912 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * dataplane UT IP pic edge tests + */ + +#include +#include + +#include "ip_funcs.h" +#include "in_cksum.h" +#include "if_var.h" +#include "main.h" + +#include "dp_test.h" +#include "dp_test_controller.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_lib_exp.h" + +#include "dp_test_pktmbuf_lib_internal.h" + +static void dp_test_map_count_build_expected(char *expected, + int exp_size, + const char *addr, + int count, int list[]) +{ + int i; + int written; + + written = spush(expected, exp_size, + "\"nh_map_count\":%d," + "\"nh_map\":[", + count); + + for (i = 0; i < count; i++) { + written += spush(expected + written, + exp_size - written, + "%d%s", + list[i], + i == count - 1 ? "" : ","); + + } + + written += spush(expected + written, exp_size - written, + "],"); + + (void) written; +} + +static void _dp_test_verify_nh_map_count(const char *addr, + int count, int list[], + const char *file, + const char *func, int line) +{ + char cmd[100]; + char expected[DP_TEST_TMP_BUF]; + struct dp_test_addr addr_ptr; + + dp_test_assert_internal(dp_test_addr_str_to_addr(addr, &addr_ptr)); + if (addr_ptr.family == AF_INET) + snprintf(cmd, sizeof(cmd), "route lookup %s", addr); + else + snprintf(cmd, sizeof(cmd), "route6 lookup %s", addr); + dp_test_map_count_build_expected(expected, sizeof(expected), + addr, count, list); + + _dp_test_check_state_show(file, line, cmd, expected, false, + DP_TEST_CHECK_STR_SUBSET); +} + +static void _dp_test_verify_nh_map_count_one_of(const char *addr, + int count, int list_size, + int *lists[], + const char *file, + const char *func, int line) +{ + char cmd[100]; + char *expected[list_size]; + int i; + + snprintf(cmd, sizeof(cmd), "route lookup %s", addr); + + for (i = 0; i < list_size; i++) { + expected[i] = malloc(DP_TEST_TMP_BUF); + assert(expected[i]); + + dp_test_map_count_build_expected(expected[i], + DP_TEST_TMP_BUF, + addr, count, lists[i]); + } + + _dp_test_check_state_show_one_of(file, line, cmd, list_size, + (const char **)&expected, false, + DP_TEST_CHECK_STR_SUBSET); + for (i = 0; i < list_size; i++) + free(expected[i]); +} + +#define dp_test_verify_nh_map_count(addr, count, list) \ + _dp_test_verify_nh_map_count(addr, count, list, \ + __FILE__, __func__, __LINE__) + +#define dp_test_verify_nh_map_count_one_of(addr, count, list_size, lists) \ + _dp_test_verify_nh_map_count_one_of(addr, count, list_size, lists, \ + __FILE__, __func__, __LINE__) + + +DP_DECL_TEST_SUITE(ip_pic_edge_suite); + +DP_DECL_TEST_CASE(ip_pic_edge_suite, ip_pic_edge1, NULL, NULL); +DP_START_TEST(ip_pic_edge1, ip_pic_edge1) +{ + int map_list1[] = { 0 }; + int map_list2[] = { 1 }; + + dp_test_nl_add_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + + dp_test_netlink_add_route( + "10.0.1.0/24 nh 1.1.1.2 int:dp1T1 nh 2.2.2.1 int:dp2T1 backup"); + dp_test_verify_nh_map_count("10.0.1.0", 1, map_list1); + dp_test_netlink_del_route( + "10.0.1.0/24 nh 1.1.1.2 int:dp1T1 nh 2.2.2.1 int:dp2T1 backup"); + + /* This is a full service dataplane - we support both orders! */ + dp_test_netlink_add_route( + "10.0.1.0/24 nh 1.1.1.2 int:dp1T1 backup nh 2.2.2.1 int:dp2T1"); + dp_test_verify_nh_map_count("10.0.1.0", 1, map_list2); + dp_test_netlink_del_route( + "10.0.1.0/24 nh 1.1.1.2 int:dp1T1 backup nh 2.2.2.1 int:dp2T1"); + + /* Clean Up */ + dp_test_nl_del_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + +} DP_END_TEST; + +/* + * Check that the maps are updates correctly when 3 primary, 1 backup + */ +DP_DECL_TEST_CASE(ip_pic_edge_suite, ip_pic_edge2, NULL, NULL); +DP_START_TEST(ip_pic_edge2, ip_pic_edge2) +{ + int map_list1[] = { 0, 1, 2, 0, 1, 2 }; + int map_list1a[] = { 1, 0, 1, 0, 2, 2 }; + int map_list2[] = { 0, 0, 2, 0, 2, 2 }; + int map_list3[] = { 2, 2, 2, 2, 2, 2 }; + int map_list4[] = { 3, 3, 3, 3, 3, 3 }; + + dp_test_nl_add_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + dp_test_nl_add_ip_addr_and_connected("dp3T1", "3.3.3.3/24"); + dp_test_nl_add_ip_addr_and_connected("dp4T1", "4.4.4.4/24"); + + dp_test_netlink_add_route( + "10.0.1.0/24 " + "nh 1.1.1.2 int:dp1T1 " + "nh 2.2.2.1 int:dp2T1 " + "nh 3.3.3.1 int:dp3T1 " + "nh 4.4.4.1 int:dp4T1 backup"); + + dp_test_verify_nh_map_count("10.0.1.4", 6, map_list1); + + /* Make a intf/nh we are not using unusable - no map change */ + dp_test_make_nh_unusable("dp2T1", "2.2.2.3"); + dp_test_verify_nh_map_count("10.0.1.4", 6, map_list1); + + /* Make a intf we are not using unusable - no map change */ + dp_test_make_nh_unusable("dp2T2", NULL); + dp_test_verify_nh_map_count("10.0.1.4", 6, map_list1); + + /* Making it unusable should force a map rebuild */ + dp_test_make_nh_unusable("dp2T1", NULL); + dp_test_verify_nh_map_count("10.0.1.4", 6, map_list2); + + /* + * Make the nh usable again - does not change to the previous + * state - but goes to a new fair state. + */ + dp_test_make_nh_usable("dp2T1", NULL); + dp_test_verify_nh_map_count("10.0.1.4", 6, map_list1a); + + /* Making it unusable should force a map rebuild */ + dp_test_make_nh_unusable("dp2T1", NULL); + dp_test_verify_nh_map_count("10.0.1.4", 6, map_list2); + + /* Making it unusable should force a map rebuild */ + dp_test_make_nh_unusable("dp1T1", "1.1.1.2"); + dp_test_verify_nh_map_count("10.0.1.4", 6, map_list3); + + /* Making it unusable should force a map rebuild */ + dp_test_make_nh_unusable("dp3T1", "3.3.3.1"); + dp_test_verify_nh_map_count("10.0.1.4", 6, map_list4); + + /* Making it usable should force a map rebuild */ + dp_test_make_nh_usable("dp3T1", "3.3.3.1"); + dp_test_verify_nh_map_count("10.0.1.4", 6, map_list3); + + dp_test_netlink_del_route( + "10.0.1.0/24 " + "nh 1.1.1.2 int:dp1T1 " + "nh 2.2.2.1 int:dp2T1 " + "nh 3.3.3.1 int:dp3T1 " + "nh 4.4.4.1 int:dp4T1 backup"); + + /* Clean Up */ + dp_test_nl_del_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + dp_test_nl_del_ip_addr_and_connected("dp3T1", "3.3.3.3/24"); + dp_test_nl_del_ip_addr_and_connected("dp4T1", "4.4.4.4/24"); + dp_test_clear_path_unusable(); + +} DP_END_TEST; + +/* + * Check that the maps are updates correctly when 3 primary, 1 backup + */ +DP_DECL_TEST_CASE(ip_pic_edge_suite, ip_pic_edge3, NULL, NULL); +DP_START_TEST(ip_pic_edge3, ip_pic_edge3) +{ + int map_list1[] = { 0, 1, 2, 0, 1, 2 }; + int map_list2[] = { 0, 0, 2, 0, 2, 2 }; + int map_list3[] = { 2, 2, 2, 2, 2, 2 }; + int map_list4[] = { 3, 4, 3, 4, 3, 4 }; + + dp_test_nl_add_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + dp_test_nl_add_ip_addr_and_connected("dp3T1", "3.3.3.3/24"); + dp_test_nl_add_ip_addr_and_connected("dp4T1", "4.4.4.4/24"); + dp_test_nl_add_ip_addr_and_connected("dp4T2", "5.5.5.5/24"); + + dp_test_netlink_add_route( + "10.0.1.0/24 " + "nh 1.1.1.2 int:dp1T1 " + "nh 2.2.2.1 int:dp2T1 " + "nh 3.3.3.1 int:dp3T1 " + "nh 4.4.4.1 int:dp4T1 backup " + "nh 5.5.5.1 int:dp4T2 backup"); + + dp_test_verify_nh_map_count("10.0.1.4", 6, map_list1); + + /* Making it unusable should force a map rebuild */ + dp_test_make_nh_unusable("dp2T1", "2.2.2.1"); + dp_test_verify_nh_map_count("10.0.1.4", 6, map_list2); + + /* Making it unusable should force a map rebuild */ + dp_test_make_nh_unusable("dp1T1", "1.1.1.2"); + dp_test_verify_nh_map_count("10.0.1.4", 6, map_list3); + + /* Making it unusable should force a map rebuild */ + dp_test_make_nh_unusable("dp3T1", "3.3.3.1"); + dp_test_verify_nh_map_count("10.0.1.4", 6, map_list4); + + dp_test_netlink_del_route( + "10.0.1.0/24 " + "nh 1.1.1.2 int:dp1T1 " + "nh 2.2.2.1 int:dp2T1 " + "nh 3.3.3.1 int:dp3T1 " + "nh 4.4.4.1 int:dp4T1 backup " + "nh 5.5.5.1 int:dp4T2 backup"); + + /* Clean Up */ + dp_test_nl_del_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + dp_test_nl_del_ip_addr_and_connected("dp3T1", "3.3.3.3/24"); + dp_test_nl_del_ip_addr_and_connected("dp4T1", "4.4.4.4/24"); + dp_test_nl_del_ip_addr_and_connected("dp4T2", "5.5.5.5/24"); + dp_test_clear_path_unusable(); + +} DP_END_TEST; + +/* + * Create multiple NHs and check that all are marked unusable when + * the update signal arrives. + */ +DP_DECL_TEST_CASE(ip_pic_edge_suite, ip_pic_edge4, NULL, NULL); +DP_START_TEST(ip_pic_edge4, ip_pic_edge4) +{ + int map_list1[] = { 0 }; + int map_list2[] = { 1 }; + + dp_test_nl_add_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + dp_test_nl_add_ip_addr_and_connected("dp3T1", "3.3.3.3/24"); + + dp_test_netlink_add_route( + "10.0.1.0/24 " + "nh 1.1.1.2 int:dp1T1 " + "nh 2.2.2.1 int:dp2T1 backup"); + + dp_test_netlink_add_route( + "10.0.2.0/24 " + "nh 1.1.1.2 int:dp1T1 " + "nh 2.2.2.3 int:dp2T1 backup"); + + dp_test_verify_nh_map_count("10.0.1.0", 1, map_list1); + dp_test_verify_nh_map_count("10.0.2.0", 1, map_list1); + + /* Making it unusable should force a map rebuild of all users */ + dp_test_make_nh_unusable("dp1T1", "1.1.1.2"); + dp_test_verify_nh_map_count("10.0.1.4", 1, map_list2); + dp_test_verify_nh_map_count("10.0.2.4", 1, map_list2); + + dp_test_netlink_del_route( + "10.0.1.0/24 " + "nh 1.1.1.2 int:dp1T1 " + "nh 2.2.2.1 int:dp2T1 backup"); + + dp_test_netlink_del_route( + "10.0.2.0/24 " + "nh 1.1.1.2 int:dp1T1 " + "nh 2.2.2.3 int:dp2T1 backup"); + + /* Clean Up */ + dp_test_nl_del_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + dp_test_nl_del_ip_addr_and_connected("dp3T1", "3.3.3.3/24"); + dp_test_clear_path_unusable(); + +} DP_END_TEST; + +/* + * Create multiple NHs and check that all are marked unusable when + * the update signal arrives. + */ +DP_DECL_TEST_CASE(ip_pic_edge_suite, ip_pic_edge5, NULL, NULL); +DP_START_TEST(ip_pic_edge5, ip_pic_edge5) +{ + int map_list1[] = { 0 }; + int map_list2[] = { 1 }; + + dp_test_nl_add_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + dp_test_nl_add_ip_addr_and_connected("dp3T1", "3.3.3.3/24"); + + dp_test_netlink_add_route( + "10.0.1.0/24 " + "nh 1.1.1.2 int:dp1T1 " + "nh 2.2.2.1 int:dp2T1 backup"); + + dp_test_netlink_add_route( + "10.0.2.0/24 " + "nh 1.1.1.3 int:dp1T1 " + "nh 2.2.2.3 int:dp2T1 backup"); + + dp_test_verify_nh_map_count("10.0.1.0", 1, map_list1); + dp_test_verify_nh_map_count("10.0.2.0", 1, map_list1); + + /* Making it unusable should force a map rebuild */ + dp_test_make_nh_unusable("dp1T1", "1.1.1.2"); + dp_test_verify_nh_map_count("10.0.1.4", 1, map_list2); + /* The 10.0.2.4 route used a different nh so is not modified */ + dp_test_verify_nh_map_count("10.0.2.4", 1, map_list1); + + dp_test_netlink_del_route( + "10.0.1.0/24 " + "nh 1.1.1.2 int:dp1T1 " + "nh 2.2.2.1 int:dp2T1 backup"); + + dp_test_netlink_del_route( + "10.0.2.0/24 " + "nh 1.1.1.3 int:dp1T1 " + "nh 2.2.2.3 int:dp2T1 backup"); + + /* Clean Up */ + dp_test_nl_del_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + dp_test_nl_del_ip_addr_and_connected("dp3T1", "3.3.3.3/24"); + dp_test_clear_path_unusable(); + +} DP_END_TEST; + +/* + * Create multiple NHs and check that all are marked unusable when + * the update signal arrives. + */ +DP_DECL_TEST_CASE(ip_pic_edge_suite, ip_pic_edge6, NULL, NULL); +DP_START_TEST(ip_pic_edge6, ip_pic_edge6) +{ + int map_list1[] = { 0 }; + int map_list2[] = { 1 }; + + dp_test_nl_add_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + dp_test_nl_add_ip_addr_and_connected("dp3T1", "3.3.3.3/24"); + + dp_test_netlink_add_route( + "10.0.1.0/24 " + "nh 1.1.1.2 int:dp1T1 " + "nh 2.2.2.1 int:dp2T1 backup"); + + dp_test_netlink_add_route( + "10.0.2.0/24 " + "nh 1.1.1.3 int:dp1T1 " + "nh 2.2.2.3 int:dp2T1 backup"); + + dp_test_verify_nh_map_count("10.0.1.0", 1, map_list1); + dp_test_verify_nh_map_count("10.0.2.0", 1, map_list1); + + /* Making it unusable should force a map rebuild */ + dp_test_make_nh_unusable("dp1T1", NULL); + dp_test_verify_nh_map_count("10.0.1.4", 1, map_list2); + dp_test_verify_nh_map_count("10.0.2.4", 1, map_list2); + + dp_test_netlink_del_route( + "10.0.1.0/24 " + "nh 1.1.1.2 int:dp1T1 " + "nh 2.2.2.1 int:dp2T1 backup"); + + dp_test_netlink_del_route( + "10.0.2.0/24 " + "nh 1.1.1.3 int:dp1T1 " + "nh 2.2.2.3 int:dp2T1 backup"); + + /* Clean Up */ + dp_test_nl_del_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + dp_test_nl_del_ip_addr_and_connected("dp3T1", "3.3.3.3/24"); + dp_test_clear_path_unusable(); + +} DP_END_TEST; + +/* + * Check that the maps are updated correctly and that traffic flows + * change correctly. + */ +DP_DECL_TEST_CASE(ip_pic_edge_suite, ip_pic_edge7, NULL, NULL); +DP_START_TEST(ip_pic_edge7, ip_pic_edge7) +{ + struct dp_test_expected *exp; + struct rte_mbuf *test_pak; + const char *nh_mac_str1, *nh_mac_str2, *nh_mac_str3; + int len = 22; + int map_list1[] = { 0, 1, }; + int map_list2[] = { 1, 1, }; + int map_list3[] = { 2, 2, }; + + dp_test_nl_add_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + dp_test_nl_add_ip_addr_and_connected("dp3T1", "3.3.3.3/24"); + dp_test_nl_add_ip_addr_and_connected("dp4T1", "4.4.4.4/24"); + + dp_test_netlink_add_route( + "10.0.1.0/24 " + "nh 1.1.1.2 int:dp1T1 " + "nh 2.2.2.1 int:dp2T1 " + "nh 3.3.3.1 int:dp3T1 backup"); + + dp_test_verify_nh_map_count("10.0.1.2", 2, map_list1); + + nh_mac_str1 = "aa:bb:cc:dd:ee:ff"; + dp_test_netlink_add_neigh("dp1T1", "1.1.1.2", nh_mac_str1); + + dp_test_verify_nh_map_count("10.0.1.2", 2, map_list1); + + nh_mac_str2 = "11:22:33:44:55:66"; + dp_test_netlink_add_neigh("dp2T1", "2.2.2.1", nh_mac_str2); + + nh_mac_str3 = "22:33:44:55:66:77"; + dp_test_netlink_add_neigh("dp3T1", "3.3.3.1", nh_mac_str3); + + dp_test_verify_nh_map_count("10.0.1.2", 2, map_list1); + + /* + * Create pak to match the route added above, with ports + * carefully chosen to take path through first path. + */ + test_pak = dp_test_create_udp_ipv4_pak("4.4.4.5", "10.0.1.2", + 1001, 1003, 1, &len); + (void)dp_test_pktmbuf_eth_init(test_pak, + dp_test_intf_name2mac_str("dp4T1"), + DP_TEST_INTF_DEF_SRC_MAC, + RTE_ETHER_TYPE_IPV4); + + exp = dp_test_exp_create(test_pak); + dp_test_exp_set_oif_name(exp, "dp1T1"); + + (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), + nh_mac_str1, + dp_test_intf_name2mac_str("dp1T1"), + RTE_ETHER_TYPE_IPV4); + + dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); + dp_test_pak_receive(test_pak, "dp4T1", exp); + + /* + * Now bring down that path, and resend the packet - it should + * now use the remaining primary path + */ + dp_test_make_nh_unusable("dp1T1", NULL); + dp_test_verify_nh_map_count("10.0.1.4", 2, map_list2); + + test_pak = dp_test_create_udp_ipv4_pak("4.4.4.5", "10.0.1.2", + 1001, 1003, 1, &len); + (void)dp_test_pktmbuf_eth_init(test_pak, + dp_test_intf_name2mac_str("dp4T1"), + DP_TEST_INTF_DEF_SRC_MAC, + RTE_ETHER_TYPE_IPV4); + + exp = dp_test_exp_create(test_pak); + dp_test_exp_set_oif_name(exp, "dp2T1"); + + (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), + nh_mac_str2, + dp_test_intf_name2mac_str("dp2T1"), + RTE_ETHER_TYPE_IPV4); + + dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); + dp_test_pak_receive(test_pak, "dp4T1", exp); + + /* + * Now bring down last primary, and resend the packet - it should + * now use the backup path + */ + dp_test_make_nh_unusable("dp2T1", NULL); + dp_test_verify_nh_map_count("10.0.1.4", 2, map_list3); + + test_pak = dp_test_create_udp_ipv4_pak("4.4.4.5", "10.0.1.2", + 1001, 1003, 1, &len); + (void)dp_test_pktmbuf_eth_init(test_pak, + dp_test_intf_name2mac_str("dp4T1"), + DP_TEST_INTF_DEF_SRC_MAC, + RTE_ETHER_TYPE_IPV4); + + exp = dp_test_exp_create(test_pak); + dp_test_exp_set_oif_name(exp, "dp3T1"); + + (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), + nh_mac_str3, + dp_test_intf_name2mac_str("dp3T1"), + RTE_ETHER_TYPE_IPV4); + + dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); + dp_test_pak_receive(test_pak, "dp4T1", exp); + + /* Clean Up */ + dp_test_netlink_del_route( + "10.0.1.0/24 " + "nh 1.1.1.2 int:dp1T1 " + "nh 2.2.2.1 int:dp2T1 " + "nh 3.3.3.1 int:dp3T1 backup"); + + dp_test_netlink_del_neigh("dp1T1", "1.1.1.2", nh_mac_str1); + dp_test_netlink_del_neigh("dp2T1", "2.2.2.1", nh_mac_str2); + dp_test_netlink_del_neigh("dp3T1", "3.3.3.1", nh_mac_str3); + + dp_test_nl_del_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + dp_test_nl_del_ip_addr_and_connected("dp3T1", "3.3.3.3/24"); + dp_test_nl_del_ip_addr_and_connected("dp4T1", "4.4.4.4/24"); + dp_test_clear_path_unusable(); + +} DP_END_TEST; + +DP_DECL_TEST_CASE(ip_pic_edge_suite, ip_pic_edge8, NULL, NULL); +DP_START_TEST(ip_pic_edge8, ip_pic_edge8) +{ + pthread_t nh_unusable_thread1; + pthread_t nh_usable_thread1; + int map_list1[] = { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 }; + /* map_list2a is when 0 and 1 have been removed. */ + int map_list2a[] = { 2, 3, 2, 3, 2, 2, 2, 3, 3, 3, 2, 3 }; + /* map_list2b is when 1 and 0 have been removed. */ + int map_list2b[] = { 2, 3, 2, 3, 2, 2, 2, 3, 3, 3, 2, 3 }; + int map_list2c[] = { 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3 }; + + int *map_list2[] = { + map_list2a, + map_list2b, + map_list2c, + }; + + /* + * For map list 3, we have one of the 3 starting points from + * list 2. Each of them have 6 of 2 and 6 of 3. + * + * We are adding 0, 1 back, so there are 7 possibilities. + * For each of the 3 in list 2, we can do, 0 first, 1 first, + * or we can get a clash to give us the 7th possibility. + */ + + /* based on list entry 2a, making 0 usable, then making 1 usable */ + int map_list3a[] = { 1, 0, 0, 0, 1, 2, 2, 1, 3, 3, 2, 3 }; + /* based on list entry 2a, making 1 usable, then making 0 usable */ + int map_list3b[] = { 0, 1, 1, 1, 0, 2, 2, 0, 3, 3, 2, 3 }; + /* based on list entry 2b, making 0 usable, then making 1 usable */ + int map_list3c[] = { 1, 0, 0, 0, 1, 2, 2, 1, 3, 3, 2, 3 }; + /* based on list entry 2b, making 1 usable, then making 0 usable */ + int map_list3d[] = { 0, 1, 1, 1, 0, 2, 2, 0, 3, 3, 2, 3 }; + /* based on list entry 2c, making 0 usable, then making 1 usable */ + int map_list3e[] = { 1, 0, 0, 0, 1, 1, 2, 3, 2, 3, 2, 3 }; + /* based on list entry 2c, making 1 usable, then making 0 usable */ + int map_list3f[] = { 0, 1, 1, 1, 0, 0, 2, 3, 2, 3, 2, 3 }; + /* A collision so refill from start */ + int map_list3g[] = { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 }; + + int *map_list3[] = { + map_list3a, + map_list3b, + map_list3c, + map_list3d, + map_list3e, + map_list3f, + map_list3g, + }; + + int map_list4[] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 }; + int map_list5[] = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }; + + dp_test_nl_add_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + dp_test_nl_add_ip_addr_and_connected("dp3T1", "3.3.3.3/24"); + dp_test_nl_add_ip_addr_and_connected("dp4T1", "4.4.4.4/24"); + dp_test_nl_add_ip_addr_and_connected("dp4T2", "5.5.5.5/24"); + + dp_test_netlink_add_route( + "10.0.1.0/24 " + "nh 1.1.1.2 int:dp1T1 " + "nh 2.2.2.1 int:dp2T1 " + "nh 3.3.3.1 int:dp3T1 " + "nh 4.4.4.1 int:dp4T1 " + "nh 5.5.5.1 int:dp4T2 backup"); + + dp_test_verify_nh_map_count("10.0.1.4", 12, map_list1); + + dp_test_make_nh_unusable_other_thread(&nh_unusable_thread1, + "dp2T1", "2.2.2.1"); + dp_test_make_nh_unusable("dp1T1", "1.1.1.2"); + pthread_join(nh_unusable_thread1, NULL); + dp_test_verify_nh_map_count_one_of("10.0.1.4", 12, + 3, map_list2); + + /* + * Back to all being used - one via a 2nd thread. + */ + dp_test_make_nh_usable_other_thread(&nh_usable_thread1, + "dp2T1", "2.2.2.1"); + dp_test_make_nh_usable("dp1T1", "1.1.1.2"); + pthread_join(nh_usable_thread1, NULL); + dp_test_verify_nh_map_count_one_of("10.0.1.4", 12, + 7, map_list3); + + /* Finally make everything unusable again */ + dp_test_make_nh_unusable("dp1T1", "1.1.1.2"); + dp_test_make_nh_unusable("dp2T1", "2.2.2.1"); + dp_test_make_nh_unusable("dp3T1", "3.3.3.1"); + dp_test_verify_nh_map_count("10.0.1.4", 12, map_list4); + dp_test_make_nh_unusable("dp4T1", "4.4.4.1"); + dp_test_verify_nh_map_count("10.0.1.4", 12, map_list5); + + dp_test_netlink_del_route( + "10.0.1.0/24 " + "nh 1.1.1.2 int:dp1T1 " + "nh 2.2.2.1 int:dp2T1 " + "nh 3.3.3.1 int:dp3T1 " + "nh 4.4.4.1 int:dp4T1 " + "nh 5.5.5.1 int:dp4T2 backup"); + + /* Clean Up */ + dp_test_nl_del_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + dp_test_nl_del_ip_addr_and_connected("dp3T1", "3.3.3.3/24"); + dp_test_nl_del_ip_addr_and_connected("dp4T1", "4.4.4.4/24"); + dp_test_nl_del_ip_addr_and_connected("dp4T2", "5.5.5.5/24"); + dp_test_clear_path_unusable(); + +} DP_END_TEST; + +DP_DECL_TEST_CASE(ip_pic_edge_suite, ip_pic_edge9, NULL, NULL); +DP_START_TEST(ip_pic_edge9, ip_pic_edge9) +{ + pthread_t nh_unusable_thread1; + pthread_t nh_unusable_thread2; + int map_list1[] = { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 }; + int map_list2[] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }; + int map_list3[] = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }; + int map_list4[] = { 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0 }; + + /* Starting point of 4, enabling path 2 */ + int map_list5a[] = { 2, 2, 1, 1, 1, 1, 2, 2, 0, 0, 0, 0 }; + /* Starting point of 5a, disabling path 1 */ + int map_list5b[] = { 2, 2, 0, 2, 0, 2, 2, 2, 0, 0, 0, 0 }; + + /* Starting point of 4, disabling path 1 */ + int map_list5c[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + /* Starting point of 4, enabling path 2 */ + int map_list5d[] = { 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0 }; + /* Initial init to paths 0 and 2 */ + int map_list5e[] = { 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2 }; + + int *map_list5[] = { + map_list5a, + map_list5b, + map_list5c, + map_list5d, + map_list5e, + }; + + dp_test_nl_add_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + dp_test_nl_add_ip_addr_and_connected("dp3T1", "3.3.3.3/24"); + dp_test_nl_add_ip_addr_and_connected("dp4T1", "4.4.4.4/24"); + dp_test_nl_add_ip_addr_and_connected("dp4T2", "5.5.5.5/24"); + + dp_test_netlink_add_route( + "10.0.1.0/24 " + "nh 1.1.1.2 int:dp1T1 " + "nh 2.2.2.1 int:dp2T1 " + "nh 3.3.3.1 int:dp3T1 " + "nh 4.4.4.1 int:dp4T1 " + "nh 5.5.5.1 int:dp4T2 backup"); + + dp_test_verify_nh_map_count("10.0.1.4", 12, map_list1); + + /* Make a intf/nh we are not using unusable - no map change */ + dp_test_make_nh_unusable_other_thread(&nh_unusable_thread1, + "dp2T1", "2.2.2.1"); + dp_test_make_nh_unusable_other_thread(&nh_unusable_thread2, + "dp4T1", "4.4.4.1"); + dp_test_make_nh_unusable("dp1T1", "1.1.1.2"); + pthread_join(nh_unusable_thread1, NULL); + pthread_join(nh_unusable_thread2, NULL); + dp_test_verify_nh_map_count("10.0.1.4", 12, map_list2); + + /* Making it unusable should force a map rebuild to backup */ + dp_test_make_nh_unusable("dp3T1", "3.3.3.1"); + dp_test_verify_nh_map_count("10.0.1.4", 12, map_list3); + + /* Make paths 0 and 1 usable again */ + dp_test_make_nh_usable("dp1T1", "1.1.1.2"); + dp_test_make_nh_usable("dp2T1", "2.2.2.1"); + dp_test_verify_nh_map_count("10.0.1.4", 12, map_list4); + + /* Now add path 2 while removing path 1 */ + dp_test_make_nh_unusable_other_thread(&nh_unusable_thread1, + "dp2T1", "2.2.2.1"); + dp_test_make_nh_usable("dp3T1", "3.3.3.1"); + pthread_join(nh_unusable_thread1, NULL); + dp_test_verify_nh_map_count_one_of("10.0.1.4", 12, + 5, map_list5); + + dp_test_netlink_del_route( + "10.0.1.0/24 " + "nh 1.1.1.2 int:dp1T1 " + "nh 2.2.2.1 int:dp2T1 " + "nh 3.3.3.1 int:dp3T1 " + "nh 4.4.4.1 int:dp4T1 " + "nh 5.5.5.1 int:dp4T2 backup"); + + /* Clean Up */ + dp_test_nl_del_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + dp_test_nl_del_ip_addr_and_connected("dp3T1", "3.3.3.3/24"); + dp_test_nl_del_ip_addr_and_connected("dp4T1", "4.4.4.4/24"); + dp_test_nl_del_ip_addr_and_connected("dp4T2", "5.5.5.5/24"); + dp_test_clear_path_unusable(); + +} DP_END_TEST; + +/* + * Create a route with a backup and mark the primary unusable before + * the route arrives. + */ +DP_DECL_TEST_CASE(ip_pic_edge_suite, ip_pic_edge10, NULL, NULL); +DP_START_TEST(ip_pic_edge10, ip_pic_edge10) +{ + int map_list1[] = { 1, 1 }; + + dp_test_nl_add_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + dp_test_nl_add_ip_addr_and_connected("dp3T1", "3.3.3.3/24"); + + /* Making nh unusable before the route is added */ + dp_test_make_nh_unusable("dp1T1", "1.1.1.2"); + + dp_test_netlink_add_route( + "10.0.1.0/24 " + "nh 1.1.1.2 int:dp1T1 " + "nh 2.2.2.1 int:dp2T1 " + "nh 3.3.3.1 int:dp3T1 backup"); + + dp_test_verify_nh_map_count("10.0.1.0", 2, map_list1); + + dp_test_netlink_del_route( + "10.0.1.0/24 " + "nh 1.1.1.2 int:dp1T1 " + "nh 2.2.2.1 int:dp2T1 " + "nh 3.3.3.1 int:dp3T1 backup"); + + /* Clean Up */ + dp_test_nl_del_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + dp_test_nl_del_ip_addr_and_connected("dp3T1", "3.3.3.3/24"); + dp_test_clear_path_unusable(); + +} DP_END_TEST; + + +/* + * Create multiple NHs and check that all are marked unusable when + * the update signal arrives. + * + * This is the IPv6 version of ip_pic_edge5. As almost all the code + * being tested here is AF independent we only need minimal + * tests to check the differences in the hashing. + */ +DP_DECL_TEST_CASE(ip_pic_edge_suite, ip_pic_edge11, NULL, NULL); +DP_START_TEST(ip_pic_edge11, ip_pic_edge11) +{ + int map_list1[] = { 0 }; + int map_list2[] = { 1 }; + + dp_test_nl_add_ip_addr_and_connected("dp1T1", "1::1/64"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2::2/64"); + dp_test_nl_add_ip_addr_and_connected("dp3T1", "3::3/64"); + + dp_test_netlink_add_route( + "10::/64 " + "nh 1::2 int:dp1T1 " + "nh 2::1 int:dp2T1 backup"); + + dp_test_netlink_add_route( + "10:1::/64 " + "nh 1::3 int:dp1T1 " + "nh 2::3 int:dp2T1 backup"); + + dp_test_verify_nh_map_count("10::1", 1, map_list1); + dp_test_verify_nh_map_count("10:1::1", 1, map_list1); + + /* Making it unusable should force a map rebuild */ + dp_test_make_nh_unusable("dp1T1", "1::2"); + dp_test_verify_nh_map_count("10::1", 1, map_list2); + /* The 2nd route used a different nh so is not modified */ + dp_test_verify_nh_map_count("10:1::1", 1, map_list1); + + dp_test_netlink_del_route( + "10::/64 " + "nh 1::2 int:dp1T1 " + "nh 2::1 int:dp2T1 backup"); + + dp_test_netlink_del_route( + "10:1::/64 " + "nh 1::3 int:dp1T1 " + "nh 2::3 int:dp2T1 backup"); + + /* Clean Up */ + + dp_test_nl_del_ip_addr_and_connected("dp1T1", "1::1/64"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2::2/64"); + dp_test_nl_del_ip_addr_and_connected("dp3T1", "3::3/64"); + dp_test_clear_path_unusable(); + +} DP_END_TEST; + +/* + * Check that the next-hops are updated correctly when neighbour + * arrives with backup gateway being a primary gateway for another. + */ +DP_DECL_TEST_CASE(ip_pic_edge_suite, ip_pic_edge12, NULL, NULL); +DP_START_TEST(ip_pic_edge12, ip_pic_edge12) +{ + const char *nh_mac_str1, *nh_mac_str2; + int map_list1[] = { 0, }; + + dp_test_nl_add_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + + dp_test_netlink_add_route( + "10.0.1.0/24 " + "nh 1.1.1.2 int:dp1T1 " + "nh 2.2.2.1 int:dp2T1 backup"); + + dp_test_netlink_add_route( + "10.0.2.0/24 " + "nh 2.2.2.1 int:dp2T1 " + "nh 1.1.1.2 int:dp1T1 backup"); + + dp_test_verify_nh_map_count("10.0.1.2", 1, map_list1); + + nh_mac_str1 = "aa:bb:cc:dd:ee:ff"; + dp_test_netlink_add_neigh("dp1T1", "1.1.1.2", nh_mac_str1); + + dp_test_verify_nh_map_count("10.0.1.2", 1, map_list1); + + nh_mac_str2 = "11:22:33:44:55:66"; + dp_test_netlink_add_neigh("dp2T1", "2.2.2.1", nh_mac_str2); + + dp_test_verify_nh_map_count("10.0.1.2", 1, map_list1); + + /* Clean Up */ + dp_test_netlink_del_route( + "10.0.1.0/24 " + "nh 1.1.1.2 int:dp1T1 " + "nh 2.2.2.1 int:dp2T1 backup"); + + dp_test_netlink_del_route( + "10.0.2.0/24 " + "nh 2.2.2.1 int:dp2T1 " + "nh 1.1.1.2 int:dp1T1 backup"); + + dp_test_netlink_del_neigh("dp1T1", "1.1.1.2", nh_mac_str1); + dp_test_netlink_del_neigh("dp2T1", "2.2.2.1", nh_mac_str2); + + dp_test_nl_del_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + dp_test_clear_path_unusable(); + +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_json_utils.c b/tests/whole_dp/src/dp_test_json_utils.c index 9b9a08dc..f38ad7cf 100644 --- a/tests/whole_dp/src/dp_test_json_utils.c +++ b/tests/whole_dp/src/dp_test_json_utils.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -23,7 +23,7 @@ #include "main.h" #include "dp_test.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_controller.h" #include "dp_test_console.h" @@ -455,9 +455,10 @@ json_val_subset(json_object *value1, json_object *value2, struct dp_test_json_mismatches *subobj_mismatch = NULL; enum json_type type1, type2; - if (!value1) { + if (!value1) return true; - } else if (!value2) { + + if (!value2) { dp_test_json_mismatch_record(mismatches, key, NULL, "-- missing"); return false; @@ -633,18 +634,18 @@ dp_test_json_match(json_object *obj1, json_object *obj2, } void -dp_test_json_filter(json_object *master, json_object *filter) +dp_test_json_filter(json_object *haystack, json_object *filter) { - json_object *master_value; + json_object *haystack_value; enum json_type type_filter; json_object_object_foreach(filter, key, filter_value) { - if (!json_object_object_get_ex(master, key, - &master_value)) + if (!json_object_object_get_ex(haystack, key, + &haystack_value)) continue; type_filter = json_object_get_type(filter_value); - if (type_filter != json_object_get_type(master_value)) + if (type_filter != json_object_get_type(haystack_value)) continue; switch (type_filter) { @@ -652,27 +653,28 @@ dp_test_json_filter(json_object *master, json_object *filter) case json_type_double: case json_type_int: case json_type_string: - json_object_object_del(master, key); + json_object_object_del(haystack, key); break; case json_type_object: if (json_object_object_length(filter_value) == 0) - json_object_object_del(master, key); + json_object_object_del(haystack, key); else - dp_test_json_filter(master_value, filter_value); + dp_test_json_filter(haystack_value, + filter_value); break; case json_type_array: { int subset_list_len = json_object_array_length( - master_value); - json_object *master_list_elem; + haystack_value); + json_object *haystack_list_elem; json_object *filter_list_elem; int i; filter_list_elem = json_object_array_get_idx( filter_value, 0); for (i = 0; i < subset_list_len; i++) { - master_list_elem = json_object_array_get_idx( - master_value, i); - dp_test_json_filter(master_list_elem, + haystack_list_elem = json_object_array_get_idx( + haystack_value, i); + dp_test_json_filter(haystack_list_elem, filter_list_elem); } break; diff --git a/tests/whole_dp/src/dp_test_json_utils.h b/tests/whole_dp/src/dp_test_json_utils.h index 10e79131..72050231 100644 --- a/tests/whole_dp/src/dp_test_json_utils.h +++ b/tests/whole_dp/src/dp_test_json_utils.h @@ -1,3 +1,9 @@ +/* + * Copyright (c) 2018,2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + #ifndef _DP_TEST_JSON_UTILS_H_ #define _DP_TEST_JSON_UTILS_H_ @@ -12,11 +18,11 @@ json_object *parse_json(const char *response_str, char *err_str, json_object *dp_test_json_do_show_cmd(const char *request, - struct dp_test_json_mismatches **mismatches, + struct dp_test_json_mismatches **m_ret, bool print); bool dp_test_json_subset(json_object *obj1, json_object *obj2, - struct dp_test_json_mismatches **mismatches); + struct dp_test_json_mismatches **mm); bool dp_test_json_superset(json_object *obj1, json_object *obj2, struct dp_test_json_mismatches **mismatches); @@ -33,7 +39,7 @@ unsigned int dp_test_json_mismatch_print(struct dp_test_json_mismatches *m, json_object *dp_test_json_val_in_array(json_object *array, json_object *elem_subset); -void dp_test_json_filter(json_object *master, json_object *filter); +void dp_test_json_filter(json_object *haystack, json_object *filter); void dp_test_json_mismatch_record(struct dp_test_json_mismatches **m_ptr, diff --git a/tests/whole_dp/src/dp_test_lib.c b/tests/whole_dp/src/dp_test_lib.c index 24967ad5..ab3aa8f7 100644 --- a/tests/whole_dp/src/dp_test_lib.c +++ b/tests/whole_dp/src/dp_test_lib.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -25,21 +26,23 @@ #include #include "main.h" -#include "master.h" +#include "controller.h" #include "if_var.h" +#include "ip_forward.h" #include "ip_funcs.h" #include "in_cksum.h" +#include "rcu.h" #include "vplane_debug.h" #include "crypto/crypto_main.h" #include "power.h" #include "mpls/mpls.h" #include "dp_test.h" -#include "dp_test_lib.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" static struct dp_read_pkt g_read_pkt; @@ -85,8 +88,8 @@ spush(char *s, size_t remaining, const char *format, ...) dp_test_assert_internal(full_size >= 0); if ((unsigned int)full_size + 1 < remaining) return full_size; - else - return remaining; + + return remaining; } /* @@ -176,7 +179,8 @@ dp_test_addr_str_to_addr(const char *addr_str, struct dp_test_addr *addr) if (inet_pton(AF_INET, buf, &addr->addr.ipv4) == 1) { addr->family = AF_INET; return true; - } else if (inet_pton(AF_INET6, buf, &addr->addr.ipv6) == 1) { + } + if (inet_pton(AF_INET6, buf, &addr->addr.ipv6) == 1) { addr->family = AF_INET6; return true; } @@ -294,7 +298,8 @@ dp_test_parse_dp_lbls(const char *lbl_string, struct dp_test_nh *nh) */ nh->num_labels = num_labels; return lbl_string + (lbl_str - buf); - } else if (strcmp(lbl_str, "imp-null") == 0) { + } + if (strcmp(lbl_str, "imp-null") == 0) { label = MPLS_IMPLICITNULL; } else { label = strtoul(lbl_str, &end, 0); @@ -363,7 +368,7 @@ dp_test_parse_dp_nh(const char *nh_string, struct dp_test_nh *nh) strncpy(buf, str, len); buf[len] = '\0'; if (!dp_test_addr_str_to_addr(buf, &nh->nh_addr)) - dp_test_assert_internal(0); + dp_test_abort_internal(); str = strchrnul(str, ' '); if (*str) { @@ -374,6 +379,12 @@ dp_test_parse_dp_nh(const char *nh_string, struct dp_test_nh *nh) } } + + if (strncmp(str, " backup", strlen(" backup")) == 0) { + str += strlen(" backup"); + nh->backup = true; + } + /* * Remove any trailing whitespace */ @@ -565,7 +576,7 @@ dp_test_parse_route(const char *route_string) /* Populate VRF id, if present in string. Otherwise assign default */ const char *end = dp_test_parse_dp_vrf(route_string, &route->vrf_id); - /* Populate tabel id, if present in string. Otherwise assign default */ + /* Populate table id, if present in string. Otherwise assign default */ end = dp_test_parse_dp_table(end, &route->tableid); /* Populate prefix */ @@ -579,6 +590,12 @@ dp_test_parse_route(const char *route_string) route->type = RTN_UNICAST; + if (route->prefix.addr.family == AF_INET || + route->prefix.addr.family == AF_INET6) { + if ((route->prefix.addr.addr.ipv6.s6_addr[0] & 0xE0) == 0xE0) + route->type = RTN_MULTICAST; + } + if (!strcmp(end, "unreachable")) route->type = RTN_UNREACHABLE; else if (!strcmp(end, "blackhole")) @@ -615,7 +632,8 @@ void dp_test_free_route(struct dp_test_route *route) uint16_t dp_test_calc_udptcp_chksum(struct rte_mbuf *m) { - const struct ipv4_hdr *ip = pktmbuf_mtol3(m, struct ipv4_hdr *); + const struct rte_ipv4_hdr *ip = + dp_pktmbuf_mtol3(m, struct rte_ipv4_hdr *); const struct tcphdr *tcp = (const struct tcphdr *)(ip + 1); uint16_t cksum; @@ -627,7 +645,7 @@ dp_test_calc_udptcp_chksum(struct rte_mbuf *m) void dp_test_set_tcphdr(struct rte_mbuf *m, uint16_t src_port, uint16_t dst_port) { - struct ether_hdr *eth = rte_pktmbuf_mtod(m, struct ether_hdr *); + struct rte_ether_hdr *eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); struct iphdr *ip = (struct iphdr *)(eth + 1); struct tcphdr *tcp = (struct tcphdr *)(ip + 1); @@ -646,11 +664,11 @@ dp_test_set_tcphdr(struct rte_mbuf *m, uint16_t src_port, uint16_t dst_port) void dp_test_set_iphdr(struct rte_mbuf *m, const char *src, const char *dst) { - struct ether_hdr *eth = rte_pktmbuf_mtod(m, struct ether_hdr *); + struct rte_ether_hdr *eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); struct iphdr *ip = (struct iphdr *)(eth + 1); uint32_t addr; - eth->ether_type = htons(ETHER_TYPE_IPv4); + eth->ether_type = htons(RTE_ETHER_TYPE_IPV4); ip->ihl = DP_TEST_PAK_DEFAULT_IHL; ip->version = 4; ip->tos = 0; @@ -967,6 +985,8 @@ dp_test_pak_verify(struct rte_mbuf *m, struct ifnet *ifp, sent_check_len + check_start, NULL, 0, 0); + (void) written; + if (fwd_result != expected->fwd_result[check]) { bool final_result = false; /* VR failure */ @@ -1030,7 +1050,7 @@ dp_test_pak_verify(struct rte_mbuf *m, struct ifnet *ifp, /* Verify that the mbuf L2 length is valid for Ethernet frames */ _dp_test_fail_unless( - m->l2_len >= ETHER_HDR_LEN, file, line, + m->l2_len >= RTE_ETHER_HDR_LEN, file, line, "(%d) Invalid mbuf L2 length for Ethernet pkt: %d", check, m->l2_len); @@ -1135,6 +1155,8 @@ dp_test_pak_verify(struct rte_mbuf *m, struct ifnet *ifp, diff_at, diff_cnt, pak_info); + (void) written; + _dp_test_fail(file, line, "%s", buf); } else { expected->pak_correct[check] = true; @@ -1178,6 +1200,20 @@ dp_test_intf_name2tx_ring(const char *if_name) return dp_test_intf_name2ring(if_name, DP_TEST_TX_RING_BASE_NAME); } +int dp_test_pak_get_from_ring(const char *if_name, + struct rte_mbuf **bufs, + int count) +{ + struct rte_ring *ring; + + ring = dp_test_intf_name2tx_ring(if_name); + count = rte_ring_mc_dequeue_burst(ring, + (void **)bufs, + count, + NULL); + return count; +} + /* * Loop over all the tx rings checking for packets. For any that are * received, run the verify cb and then free the mbuf. @@ -1188,20 +1224,20 @@ static void dp_test_verify_tx(bool wait_for_first) { int i, j, count; struct ifnet *ifp; - struct rte_ring *ring; struct rte_mbuf *bufs[64]; int timeout = USEC_PER_SEC; while (1) { for (i = 0; i < dp_test_intf_count_local(); i++) { ifp = ifnet_byport(i); - ring = dp_test_intf_name2tx_ring(ifp->if_name); - count = rte_ring_mc_dequeue_burst(ring, - (void **)bufs, - 64, - NULL); + count = dp_test_pak_get_from_ring( + ifp->if_name, + (struct rte_mbuf **)&bufs, + 64); if (count) { for (j = 0; j < count; j++) { + dp_test_assert_internal( + bufs[j] != NULL); (*dp_test_exp_get_validate_cb( dp_test_global_expected)) (bufs[j], ifp, @@ -1227,18 +1263,18 @@ dp_test_wait_until_tx_processed(void) * Ensure all packets have been dequeued from * the TX ring and processed. */ - synchronize_rcu(); + dp_rcu_synchronize(); /* * Ensure that if portmonitoring is enabled * that all packets have been dequeued from * the second TX ring and processed. */ - synchronize_rcu(); + dp_rcu_synchronize(); /* * Just in case QOS has been configured and is * using a transmit thread. */ - synchronize_rcu(); + dp_rcu_synchronize(); } /* @@ -1267,7 +1303,7 @@ void dp_test_intf_wait_until_processed(struct rte_ring *ring) * Ensure all packets have made it from an RX * thread to a TX ring, having drained the pkt-burst. */ - synchronize_rcu(); + dp_rcu_synchronize(); dp_test_wait_until_tx_processed(); return; } @@ -1296,6 +1332,25 @@ dp_test_wait_until_local_processed(struct dp_test_expected *expected, } } +void dp_test_pak_add_to_ring(const char *iif_name, + struct rte_mbuf **paks_to_send, + uint32_t num_paks, + bool wait_until_processed) +{ + struct rte_ring *ring; + + ring = dp_test_intf_name2rx_ring(iif_name); + dp_test_assert_internal(ring); + /* + * Enqueue onto the ring, which will then be picked up + * by the driver at the next poll. + */ + rte_ring_mp_enqueue_burst(ring, (void **)paks_to_send, num_paks, NULL); + + if (wait_until_processed) + dp_test_intf_wait_until_processed(ring); +} + /* * Global expected pak that the wrapped end of the processing path can access * so that it can verify the contents. @@ -1330,16 +1385,7 @@ dp_test_pak_inject(struct rte_mbuf **paks_to_send, uint32_t num_paks, /* * Copy the mbufs into the ring for the interface. */ - struct rte_ring *ring; - - ring = dp_test_intf_name2rx_ring(iif_name); - dp_test_assert_internal(ring); - /* - * Enqueue onto the ring, which will then be picked up - * by the driver at the next poll. - */ - rte_ring_mp_enqueue_burst(ring, (void **)paks_to_send, num_paks, NULL); - dp_test_intf_wait_until_processed(ring); + dp_test_pak_add_to_ring(iif_name, paks_to_send, num_paks, true); if (local_paks) dp_test_wait_until_local_processed(expected, num_paks, local_paks); @@ -1433,6 +1479,8 @@ dp_test_pak_check_fwd_result(struct dp_test_expected *expected, "to be forwarded onto: %s", expected->oif_name[i]); + (void) written; + _dp_test_fail(file, line, "%s\nMissing packet:\n%s", expected->description, buf); } @@ -1482,7 +1530,7 @@ _dp_test_pak_receive_n(struct rte_mbuf **paks, uint32_t num_paks, if ((dp_test_intf_type(if_name) == DP_TEST_INTF_TYPE_SWITCH_PORT) && !dp_test_intf_switch_port_over_bkp(if_name)) { - uint32_t device, port; + uint32_t device, port, dpid; struct rte_ring *ring; char ring_name[32]; @@ -1491,7 +1539,8 @@ _dp_test_pak_receive_n(struct rte_mbuf **paks, uint32_t num_paks, * transferred from the RX queue to local Tx burst queue * and to the actual output queue. */ - if (sscanf(if_name, "sw_port_%u_%u", &device, &port) != 2) + if (sscanf(if_name, "dp%usw_port_%u_%u", &dpid, &device, + &port) != 3) dp_test_assert_internal(false); snprintf(ring_name, 32, "net_sw_portsw%uport%u-rx-0", device, port); @@ -1702,14 +1751,6 @@ struct rte_mbuf *dp_test_get_read_pkt(void) return m; } -uint8_t dp_test_get_read_port(void) -{ - int port = g_read_pkt.port; - - g_read_pkt.port = 0; - return port; -} - uint16_t dp_test_get_read_meta_flags(void) { uint16_t flags = g_read_pkt.m.flags; @@ -1748,3 +1789,191 @@ void dp_test_disable_soft_tick_override(void) { disable_soft_clock_override(); } + +#define DP_TEST_MAX_UNUSABLE 100 +static struct dp_rt_path_unusable_key dp_test_unusable[DP_TEST_MAX_UNUSABLE]; +static int dp_test_current_unusable; + +static bool dp_test_paths_equal(const struct dp_rt_path_unusable_key *key1, + const struct dp_rt_path_unusable_key *key2) +{ + if (key1->type == key2->type && + key1->ifindex == key2->ifindex) { + + if (key1->type == DP_RT_PATH_UNUSABLE_KEY_INTF_NEXTHOP) { + if (dp_addr_eq(&key1->nexthop, + &key2->nexthop)) + return true; + } else { + return true; + } + } + return false; +} + +static enum dp_rt_path_state +dp_test_get_path_usable(const struct dp_rt_path_unusable_key *key) +{ + int i; + + for (i = 0; i < dp_test_current_unusable; i++) { + if (dp_test_paths_equal(key, &dp_test_unusable[i])) + return DP_RT_PATH_UNUSABLE; + } + + return DP_RT_PATH_UNKNOWN; +} + +void dp_test_clear_path_unusable(void) +{ + dp_test_current_unusable = 0; +} + +static void dp_test_set_nh_state(const char *interface, + const char *nexthop, + bool usable) +{ + static int registered_usable_cb; + struct dp_test_addr addr; + struct dp_rt_path_unusable_key key; + enum dp_rt_path_state state; + + if (usable) + state = DP_RT_PATH_USABLE; + else + state = DP_RT_PATH_UNUSABLE; + + if (!registered_usable_cb) { + dp_rt_register_path_state("test_infra", + dp_test_get_path_usable); + registered_usable_cb = true; + } + dp_test_assert_internal(dp_test_current_unusable < + DP_TEST_MAX_UNUSABLE); + + /* nexthop is allowed to be null */ + if (nexthop) { + if (!dp_test_addr_str_to_addr(nexthop, &addr)) + dp_test_assert_internal(false); + + dp_test_assert_internal(addr.family == AF_INET || + addr.family == AF_INET6); + + key.type = DP_RT_PATH_UNUSABLE_KEY_INTF_NEXTHOP; + key.nexthop.type = addr.family; + memcpy(&key.nexthop.address, &addr.addr, + sizeof(key.nexthop.address)); + } else { + key.type = DP_RT_PATH_UNUSABLE_KEY_INTF; + } + + key.ifindex = dp_test_intf_name2index(interface); + + /* Store for later use */ + dp_test_unusable[dp_test_current_unusable] = key; + dp_test_current_unusable++; + + dp_rt_signal_path_state("tests", state, &key); +} + +void dp_test_make_nh_unusable(const char *interface, + const char *nexthop) +{ + dp_test_set_nh_state(interface, nexthop, false); +} + +void dp_test_make_nh_usable(const char *interface, + const char *nexthop) +{ + dp_test_set_nh_state(interface, nexthop, true); +} + + +static void nh_set_state(struct dp_rt_path_unusable_key *key, + enum dp_rt_path_state state) +{ + dp_rcu_register_thread(); + dp_rcu_thread_online(); + + dp_rt_signal_path_state("tests", state, key); + + dp_rcu_thread_offline(); + dp_rcu_unregister_thread(); +} + +static void *nh_unusable(void *arg) +{ + struct dp_rt_path_unusable_key *key = arg; + + nh_set_state(key, DP_RT_PATH_UNUSABLE); + free(key); + return 0; +} + +static void *nh_usable(void *arg) +{ + struct dp_rt_path_unusable_key *key = arg; + + nh_set_state(key, DP_RT_PATH_USABLE); + free(key); + return 0; +} + +static struct dp_rt_path_unusable_key * +dp_test_nh_state_make_key(const char *interface, + const char *nexthop) +{ + struct dp_rt_path_unusable_key *key; + struct dp_test_addr addr; + + key = calloc(1, sizeof(*key)); + dp_test_assert_internal(key != NULL); + /* nexthop is allowed to be null */ + if (nexthop) { + if (!dp_test_addr_str_to_addr(nexthop, &addr)) + dp_test_assert_internal(false); + + dp_test_assert_internal(addr.family == AF_INET || + addr.family == AF_INET6); + + key->type = DP_RT_PATH_UNUSABLE_KEY_INTF_NEXTHOP; + key->nexthop.type = addr.family; + memcpy(&key->nexthop.address, &addr.addr, + sizeof(key->nexthop.address)); + } else { + key->type = DP_RT_PATH_UNUSABLE_KEY_INTF; + } + + key->ifindex = dp_test_intf_name2index(interface); + + return key; + +} + +void dp_test_make_nh_unusable_other_thread(pthread_t *nh_unusable_thread, + const char *interface, + const char *nexthop) +{ + struct dp_rt_path_unusable_key *key; + + key = dp_test_nh_state_make_key(interface, nexthop); + /* + * Spin up a thread to make the nh unusable + */ + if (pthread_create(nh_unusable_thread, NULL, nh_unusable, key) < 0) + dp_test_abort_internal(); +} + +void dp_test_make_nh_usable_other_thread(pthread_t *nh_unusable_thread, + const char *interface, + const char *nexthop) +{ + struct dp_rt_path_unusable_key *key; + + key = dp_test_nh_state_make_key(interface, nexthop); + /* + * Spin up a thread to make the nh unusable + */ + if (pthread_create(nh_unusable_thread, NULL, nh_usable, key) < 0) + dp_test_abort_internal(); +} diff --git a/tests/whole_dp/src/dp_test_lib_cmd.c b/tests/whole_dp/src/dp_test_lib_cmd.c deleted file mode 100644 index b97fe1a9..00000000 --- a/tests/whole_dp/src/dp_test_lib_cmd.c +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. - * Copyright (c) 2017 by Brocade Communications Systems, Inc. - * All rights reserved. - * - * SPDX-License-Identifier: LGPL-2.1-only - * - * dataplane UT console commands - */ -#include "dp_test_lib_cmd.h" - -#include "dp_test.h" -#include "dp_test_console.h" -#include "dp_test_lib_intf.h" - -/* reset connection to the main vplaned */ -void -_dp_test_cmd_reset(const char *file, const char *func, int line) -{ - const char *cmd = "reset", *check_cmd = "master state"; - json_object *expected; - static uint32_t ready_count = 1; /* No. times in ready state */ - - expected = dp_test_json_create("{ \"master_state\":" - " {" - " \"vplaned\":" - " { \"ready\": %u }," - " \"vplaned-local\":" - " { \"ready\": 1 }" - " }" - "}", - ready_count); - - /* We expect the dataplane to be in READY state before reset */ - _dp_test_check_json_state(check_cmd, expected, NULL, - DP_TEST_JSON_CHECK_EXACT, - false, - file, func, line); - json_object_put(expected); - - dp_test_console_request_reply(cmd, false); - - /* We expect the dataplane to come back to READY state after reset */ - ready_count++; - /* Reset is expected to take some time. On laptop it always completes - * in < 2s (even with 10 x UT running in parallel). Be conservative for - * slow / busy hosts and set to 5. - */ - dp_test_wait_set(5); - expected = dp_test_json_create("{ \"master_state\":" - " {" - " \"vplaned\":" - " { \"ready\": %u }," - " \"vplaned-local\":" - " { \"ready\": 1 }" - " }" - "}", - ready_count); - _dp_test_check_json_state(check_cmd, expected, NULL, - DP_TEST_JSON_CHECK_EXACT, - false, - file, func, line); - json_object_put(expected); - - /* Recreate the interfaces expected in 'clean' state. */ - dp_test_intf_create_default_set(NULL); -} diff --git a/tests/whole_dp/src/dp_test_lib_cmd.h b/tests/whole_dp/src/dp_test_lib_cmd.h deleted file mode 100644 index d6435ece..00000000 --- a/tests/whole_dp/src/dp_test_lib_cmd.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (c) 2017, AT&T Intellectual Property. All rights reserved. - * Copyright (c) 2017 by Brocade Communications Systems, Inc. - * All rights reserved. - * - * SPDX-License-Identifier: LGPL-2.1-only - * - * dataplane UT console commands - */ - -#ifndef _DP_TEST_LIB_CMD_H_ -#define _DP_TEST_LIB_CMD_H_ - -void -_dp_test_cmd_reset(const char *file, const char *func, int line); -#define dp_test_cmd_reset(void) \ - _dp_test_cmd_reset(__FILE__, __func__, __LINE__) - -#endif /* _DP_TEST_LIB_CMD_H_ */ diff --git a/tests/whole_dp/src/dp_test_lib_exp.c b/tests/whole_dp/src/dp_test_lib_exp.c index a6299cb7..0eded7fb 100644 --- a/tests/whole_dp/src/dp_test_lib_exp.c +++ b/tests/whole_dp/src/dp_test_lib_exp.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -14,11 +14,11 @@ #include #include -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_macros.h" +#include "dp_test/dp_test_macros.h" /* * struct dp_test_ctx_free_rec @@ -282,8 +282,8 @@ dp_test_exp_get_validate_cb(struct dp_test_expected *exp) { if (exp->validate_cb) return exp->validate_cb; - else - return dp_test_pak_verify; + + return dp_test_pak_verify; } validate_cb @@ -340,7 +340,7 @@ dp_test_exp_set_validate_ctx(struct dp_test_expected *exp, void *new_ctx, */ void dp_test_exp_set_dont_care(struct dp_test_expected *exp, unsigned int pak_i, - uint8_t *start, uint32_t len) + const uint8_t *start, uint32_t len) { struct dp_test_dont_care_range *range; struct rte_mbuf *exp_pak; diff --git a/tests/whole_dp/src/dp_test_lib_exp.h b/tests/whole_dp/src/dp_test_lib_exp.h index d5ed3355..2c1a6e28 100644 --- a/tests/whole_dp/src/dp_test_lib_exp.h +++ b/tests/whole_dp/src/dp_test_lib_exp.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -17,8 +17,8 @@ #include -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" #define DP_TEST_MAX_EXPECTED_PAKS 10 #define DP_TEST_MAX_DONT_CARE 10 /* Number of dont care ranges */ @@ -86,8 +86,8 @@ struct dp_test_expected { }; void -dp_test_exp_set_dont_care(struct dp_test_expected *exp, unsigned int check, - uint8_t *start, uint32_t len); +dp_test_exp_set_dont_care(struct dp_test_expected *exp, unsigned int pak_i, + const uint8_t *start, uint32_t len); bool dp_test_exp_care(struct dp_test_expected *exp, unsigned int check, unsigned int offset); diff --git a/tests/whole_dp/src/dp_test_lib_internal.h b/tests/whole_dp/src/dp_test_lib_internal.h new file mode 100644 index 00000000..4a0fb706 --- /dev/null +++ b/tests/whole_dp/src/dp_test_lib_internal.h @@ -0,0 +1,148 @@ +/*- + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * A library of useful functions for writing dataplane tests. + */ + +#ifndef _DP_TEST_LIB_INTERNAL_H_ +#define _DP_TEST_LIB_INTERNAL_H_ + +#define DP_TEST_MAX_PREFIX_STRING_LEN 100 +#define DP_TEST_MAX_ROUTE_STRING_LEN 2048 + +#include +#include + +#include "if_var.h" + +#include "dp_test/dp_test_lib.h" + +#include "dp_test_pktmbuf_lib_internal.h" + +extern int spath_pipefd[2]; +extern int shadow_pipefd[DATAPLANE_MAX_PORTS]; + +/* Packet for read/readv. This can contain the user provided iov's */ +struct dp_read_pkt { + struct rte_mbuf *pkt; + portid_t port; + struct meta { + uint32_t ifindex; + uint16_t flags; + } m; + struct pi { + uint16_t proto; + } p; +}; + + +struct dp_test_expected; + +typedef void (*validate_cb)(struct rte_mbuf *pak, + struct ifnet *ifp, + struct dp_test_expected *expected, + enum dp_test_fwd_result_e fwd_result); + +/* + * Helper function to allow an idiom where we keep extending a string + * into a fixed size buffer with printf style calls and keep a running + * total of the number of non-null chars written. + * + * We return the number of characters in the string that results from + * the printf unless the string with its null exactly fills the + * remaining space at which point were return the remaining space. So + * subsequent calls will be given remaining == 0. + */ +int spush(char *s, size_t remaining, const char *format, ...) + __attribute__ ((__format__(printf, 3, 4))); + +void +dp_test_str_trim(char *str, uint16_t start_trim, uint16_t end_trim); + +validate_cb dp_test_exp_get_validate_cb(struct dp_test_expected *); + +validate_cb dp_test_exp_set_validate_cb(struct dp_test_expected *, validate_cb); + +/* + * Simulate injection of packet into the dataplane from the kernel + */ + +void _dp_test_send_slowpath_pkt(struct rte_mbuf *pak, + struct dp_test_expected *expected, + const char *file, const char *func, int line); + +#define dp_test_send_slowpath_pkt(pak, expected) \ + _dp_test_send_slowpath_pkt(pak, expected, \ + __FILE__, __func__, __LINE__) + +/* Inject packet on .spath interface from kernel */ +void _dp_test_send_spath_pkt(struct rte_mbuf *pak, const char *virt_oif_name, + struct dp_test_expected *expected, + const char *file, const char *func, int line); + +#define dp_test_send_spath_pkt(pak, virt_oif_name, expected) \ + _dp_test_send_spath_pkt(pak, virt_oif_name, expected, \ + __FILE__, __func__, __LINE__) +struct ifnet; +void +dp_test_pak_verify(struct rte_mbuf *m, struct ifnet *ifp, + struct dp_test_expected *expected, + enum dp_test_fwd_result_e fwd_result); + +/* Read packet context processing functions */ +void dp_test_inject_pkt_slow_path(struct rte_mbuf *pkt, portid_t port, + uint32_t ifindex, uint16_t flags, uint16_t proto); +struct rte_mbuf *dp_test_get_read_pkt(void); +uint16_t dp_test_get_read_meta_flags(void); +uint32_t dp_test_get_read_meta_iif(void); +uint16_t dp_test_get_read_proto(void); +bool dp_test_read_pkt_available(void); + +/* + * Internal error in test framework - will crash notifying the line + * number that we are currently at. Do NOT use this for normal test + * conditions - for those use dp_fail_unless and other services in + * dp_test/dp_test_macros.h. This is solely for internal unrecoverable errors + * in the test infra. + */ +#define dp_test_assert_internal(expr) \ + ({ \ + if (!(expr)) { \ + printf("Internal error: %s:%d\n", \ + __func__, __LINE__); \ + } \ + assert(expr); \ + }) + +#define dp_test_abort_internal() \ + ({ \ + printf("Internal error: %s:%d\n", \ + __func__, __LINE__); \ + abort(); \ + }) + +/* override soft-ticks time for tests that want to do timer dependent stuff. */ +void dp_test_enable_soft_tick_override(void); +void dp_test_disable_soft_tick_override(void); + +void dp_test_make_nh_unusable(const char *interface, + const char *nexthop); + +void dp_test_make_nh_usable(const char *interface, + const char *nexthop); + +void dp_test_clear_path_unusable(void); + +void dp_test_make_nh_unusable_other_thread(pthread_t *nh_unusable_thread, + const char *interface, + const char *nexthop); + +void dp_test_make_nh_usable_other_thread(pthread_t *nh_unusable_thread, + const char *interface, + const char *nexthop); + +#endif /*_DP_TEST_LIB_H_ */ diff --git a/tests/whole_dp/src/dp_test_lib_intf.c b/tests/whole_dp/src/dp_test_lib_intf.c index 4896c16e..07885841 100644 --- a/tests/whole_dp/src/dp_test_lib_intf.c +++ b/tests/whole_dp/src/dp_test_lib_intf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -11,14 +11,14 @@ #include #include -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test.h" #include "dp_test_console.h" #include "dp_test_controller.h" #include "dp_test_json_utils.h" -#include "dp_test_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_str.h" /* @@ -39,15 +39,14 @@ dp_test_intf_type(const char *if_name) if (strncmp(if_name, "dp", 2) == 0) { if (strlen(if_name) > 4 && !strncmp(if_name + 3, "vrrp", 4)) return DP_TEST_INTF_TYPE_MACVLAN; + if (strlen(if_name) > 4 && !strncmp(if_name + 3, "sw_port", 7)) + return DP_TEST_INTF_TYPE_SWITCH_PORT; return DP_TEST_INTF_TYPE_DP; } if (strncmp(if_name, "vtun", 4) == 0) return DP_TEST_INTF_TYPE_NON_DP; - if (strncmp(if_name, "sw_port", 7) == 0) - return DP_TEST_INTF_TYPE_SWITCH_PORT; - if (strncmp(if_name, "br", 2) == 0) return DP_TEST_INTF_TYPE_BRIDGE; if (strncmp(if_name, "sw", 2) == 0) @@ -162,7 +161,7 @@ struct dp_test_intf { int ifindex; /* Interface index allocated by 'kernel' */ uint8_t state; /* Track interface programmed state */ char if_name[IFNAMSIZ]; - struct ether_addr mac; /* Interface mac address */ + struct rte_ether_addr mac; /* Interface mac address */ in_addr_t ip4[DP_TEST_INTF_ADDR_MAX]; struct in6_addr ip6[DP_TEST_INTF_ADDR_MAX]; uint8_t active; /* are there switch_port interface active */ @@ -180,131 +179,151 @@ static struct dp_test_intf dp_test_intf_default[] = { { 0, 0, 0, 0 }, { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT }, + 0, 0 }, { 1, 1, 101, 0, "dp1T1", /* VR dpT11 */ { .addr_bytes = { 0x00, 0x00, 0xa4, 0x00, 0x00, 0x65 } }, { 0, 0, 0, 0 }, { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT }, + 0, 0 }, { 2, 1, 102, 0, "dp1T2", { .addr_bytes = { 0x00, 0x00, 0xa4, 0x00, 0x00, 0x66 } }, { 0, 0, 0, 0 }, { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT }, + 0, 0 }, { 3, 1, 103, 0, "dp1T3", { .addr_bytes = { 0x00, 0x00, 0xa4, 0x00, 0x00, 0x67 } }, { 0, 0, 0, 0 }, { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT }, + 0, 0 }, { 4, 1, 104, 0, "dp1T4", { .addr_bytes = { 0x00, 0x00, 0xa4, 0x00, 0x00, 0x68 } }, { 0, 0, 0, 0 }, { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT }, + 0, 0 }, { 5, 2, 105, 0, "dp2T0", /* VR dpT20 */ { .addr_bytes = { 0x00, 0x00, 0xa4, 0x00, 0x00, 0x69 } }, { 0, 0, 0, 0 }, { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT }, + 0, 0 }, { 6, 2, 106, 0, "dp2T1", { .addr_bytes = { 0x00, 0x00, 0xa4, 0x00, 0x00, 0x6a } }, { 0, 0, 0, 0 }, { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT }, + 0, 0 }, { 7, 2, 107, 0, "dp2T2", { .addr_bytes = { 0x00, 0x00, 0xa4, 0x00, 0x00, 0x6b } }, { 0, 0, 0, 0 }, { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT }, + 0, 0 }, { 8, 2, 108, 0, "dp2T3", { .addr_bytes = { 0x00, 0x00, 0xa4, 0x00, 0x00, 0x6c } }, { 0, 0, 0, 0 }, { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT }, + 0, 0 }, { 9, 2, 109, 0, "dp2T4", { .addr_bytes = { 0x00, 0x00, 0xa4, 0x00, 0x00, 0x6d } }, { 0, 0, 0, 0 }, { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT }, + 0, 0 }, { 10, 3, 110, 0, "dp3T0", /* VR dpT30 */ { .addr_bytes = { 0x00, 0x00, 0xa4, 0x00, 0x00, 0x6e } }, { 0, 0, 0, 0 }, { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT }, + 0, 0 }, { 11, 3, 111, 0, "dp3T1", { .addr_bytes = { 0x00, 0x00, 0xa4, 0x00, 0x00, 0x6f } }, { 0, 0, 0, 0 }, { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT }, + 0, 0 }, { 12, 3, 112, 0, "dp3T2", { .addr_bytes = { 0x00, 0x00, 0xa4, 0x00, 0x00, 0x70 } }, { 0, 0, 0, 0 }, { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT }, + 0, 0 }, { 13, 3, 113, 0, "dp3T3", { .addr_bytes = { 0x00, 0x00, 0xa4, 0x00, 0x00, 0x71 } }, { 0, 0, 0, 0 }, { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT }, + 0, 0 }, { 14, 3, 114, 0, "dp3T4", { .addr_bytes = { 0x00, 0x00, 0xa4, 0x00, 0x00, 0x72 } }, { 0, 0, 0, 0 }, { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT }, + 0, 0 }, { 15, 4, 115, 0, "dp4T0", /* VR dpT40 */ { .addr_bytes = { 0x00, 0x00, 0xa4, 0x00, 0x00, 0x73 } }, { 0, 0, 0, 0 }, { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT }, + 0, 0 }, { 16, 4, 116, 0, "dp4T1", { .addr_bytes = { 0x00, 0x00, 0xa4, 0x00, 0x00, 0x74 } }, { 0, 0, 0, 0 }, { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT }, + 0, 0 }, { 17, 4, 117, 0, "dp4T2", { .addr_bytes = { 0x00, 0x00, 0xa4, 0x00, 0x00, 0x75 } }, { 0, 0, 0, 0 }, { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT }, + 0, 0 }, { 18, 4, 118, 0, "dp4T3", { .addr_bytes = { 0x00, 0x00, 0xa4, 0x00, 0x00, 0x76 } }, { 0, 0, 0, 0 }, { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT }, + 0, 0 }, { 19, 4, 119, 0, "dp4T4", { .addr_bytes = { 0x00, 0x00, 0xa4, 0x00, 0x00, 0x77 } }, { 0, 0, 0, 0 }, { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT }, + 0, 0 }, }; static struct dp_test_intf dp_test_intf_switch_port[] = { - { 20, 1, 120, 0, "sw_port_0_0", /* switch port interface */ + { 20, 1, 120, 0, "dp1sw_port_0_0", /* switch port interface */ { .addr_bytes = { 0x00, 0x00, 0xa4, 0xbe, 0xef, 0x88 } }, { 0, 0, 0, 0 }, { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT}, false, true, }, - { 21, 1, 121, 0, "sw_port_0_7", /* switch port interface */ + { 21, 1, 121, 0, "dp1sw_port_0_7", /* switch port interface */ { .addr_bytes = { 0x00, 0x00, 0xa4, 0xbe, 0xef, 0x01 } }, { 0, 0, 0, 0 }, { IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, IN6ADDR_ANY_INIT, @@ -338,10 +357,10 @@ void dp_test_intf_dpdk_init(void) snprintf(tx_ring_name, sizeof(tx_ring_name), DP_TEST_TX_RING_BASE_NAME "%c%c", loc, intf->if_name[4]); - rx_ring = rte_ring_create(rx_ring_name, 32, + rx_ring = rte_ring_create(rx_ring_name, 512, SOCKET_ID_ANY, RING_F_SC_DEQ); - tx_ring = rte_ring_create(tx_ring_name, 32, + tx_ring = rte_ring_create(tx_ring_name, 512, SOCKET_ID_ANY, RING_F_SP_ENQ); if (!rx_ring || !tx_ring) @@ -431,6 +450,25 @@ dp_test_intf_switch_port_activate(const char *real_if_name) } +void +dp_test_intf_switch_port_deactivate(const char *real_if_name) +{ + struct dp_test_intf *intf; + + intf = dp_test_intf_find_switch_port(real_if_name); + + if (!intf || !intf->active) + return; + + intf->active = false; + + json_object *intf_set; + + intf_set = dp_test_json_intf_set_create(); + dp_test_intf_create_default_set(intf_set); + +} + bool dp_test_intf_switch_port_over_bkp(const char *real_if_name) { struct dp_test_intf *intf; @@ -471,15 +509,14 @@ dp_test_intf_count_local(void) } /* - * Return count of all virtual, non-virtual and switch port interfaces - * expected in the test clean state. + * Return count of all virtual and non-virtual interfaces expected in + * the test clean state. */ uint8_t dp_test_intf_clean_count(void) { /* add one for loopback interface */ - return dp_test_intf_count() + - dp_test_intf_switch_port_count() + 1; + return dp_test_intf_count() + 1; } /* Generate real interfaces. @@ -504,7 +541,7 @@ void dp_test_intf_init(void) int dp_test_intf_virt_add(const char *if_name) { - struct ether_addr mac = { + struct rte_ether_addr mac = { .addr_bytes = { 0x00, 0x00, 0xa5, 0x00, 0x00, 0x00 } }; char real_if_name[IFNAMSIZ]; @@ -564,20 +601,6 @@ dp_test_intf_name2intf(const char *if_name) return target_intf; } -/* - * Convert if_name to oper state flags - */ -uint8_t -dp_test_intf_name2state(const char *if_name) -{ - struct dp_test_intf *intf; - - intf = dp_test_intf_name2intf(if_name); - dp_test_assert_internal(intf); - - return intf->state; -} - /* * Set op_state flags on if_name intf */ @@ -637,7 +660,7 @@ dp_test_intf_name2index(const char *if_name) return intf->ifindex; } -struct ether_addr * +struct rte_ether_addr * dp_test_intf_name2mac(const char *if_name) { struct dp_test_intf *intf; @@ -728,22 +751,6 @@ dp_test_intf_port2index(portid_t port_id) return intf->ifindex; } -/* - * - * Convert port_id to interface mac. - */ -struct ether_addr * -dp_test_intf_port2mac(portid_t port_id) -{ - struct dp_test_intf *intf; - - dp_test_assert_internal(port_id < dp_test_intf_count()); - intf = dp_test_intf_port2intf(port_id); - dp_test_assert_internal(intf); - - return &intf->mac; -} - static int dp_test_intf_ip4_find(in_addr_t ip4, struct dp_test_intf *intf) { @@ -757,7 +764,7 @@ dp_test_intf_ip4_find(in_addr_t ip4, struct dp_test_intf *intf) } static void -dp_test_intf_ip4(const char *if_name, in_addr_t *ip4, bool add) +dp_test_intf_ip4(const char *if_name, const in_addr_t *ip4, bool add) { struct dp_test_intf *intf; int i; @@ -836,52 +843,6 @@ dp_test_intf_del_addr(const char *if_name, struct dp_test_addr *addr) dp_test_intf_addr(if_name, addr, false); } -static bool -dp_test_intf_has_ip4(const char *if_name, const in_addr_t ip4) -{ - struct dp_test_intf *intf; - int i; - - intf = dp_test_intf_name2intf(if_name); - dp_test_assert_internal(intf); - - for (i = 0; i < DP_TEST_INTF_ADDR_MAX; i++) - if (intf->ip4[i] == ip4) - return true; - - return false; -} - -static bool -dp_test_intf_has_ip6(const char *if_name, const struct in6_addr *ip6) -{ - struct dp_test_intf *intf; - int i; - - intf = dp_test_intf_name2intf(if_name); - dp_test_assert_internal(intf); - - for (i = 0; i < DP_TEST_INTF_ADDR_MAX; i++) - if (IN6_ARE_ADDR_EQUAL(&intf->ip6[i], ip6)) - return true; - - return false; -} - -bool -dp_test_intf_has_addr(const char *if_name, const struct dp_test_addr *addr) -{ - switch (addr->family) { - case AF_INET: - return dp_test_intf_has_ip4(if_name, addr->addr.ipv4); - case AF_INET6: - return dp_test_intf_has_ip6(if_name, &addr->addr.ipv6); - default: - dp_test_assert_internal(false); - } - return false; -} - static void dp_test_intf_primary_ip4(const char *if_name, in_addr_t *primary) { @@ -1047,7 +1008,7 @@ static void dp_test_intf_get_stats_for_if(const char *ifname, { char real_ifname[IFNAMSIZ]; dp_test_intf_real(ifname, real_ifname); - struct ifnet *ifp = ifnet_byifname(real_ifname); + struct ifnet *ifp = dp_ifnet_byifname(real_ifname); if_stats(ifp, stats); } @@ -1458,20 +1419,6 @@ _dp_test_intf_loopback_delete(const char *name, dp_test_intf_virt_del(name); } -void _dp_test_intf_lord_create(const char *name, vrfid_t vrf_id, - const char *file, int line) -{ - dp_test_intf_virt_add(name); - _dp_test_netlink_create_lord(name, vrf_id, true, file, NULL, line); -} - -void _dp_test_intf_lord_delete(const char *name, vrfid_t vrf_id, - const char *file, int line) -{ - _dp_test_netlink_del_lord(name, vrf_id, true, file, NULL, line); - dp_test_intf_virt_del(name); -} - void _dp_test_intf_vfp_create(const char *name, vrfid_t vrf_id, bool verify, const char *file, const char *func, int line) { @@ -1486,70 +1433,20 @@ void _dp_test_intf_vfp_delete(const char *name, vrfid_t vrf_id, dp_test_intf_virt_del(name); } -void _dp_test_intf_vrf_master_create(const char *name, vrfid_t vrf_id, +void _dp_test_intf_vrf_if_create(const char *name, vrfid_t vrf_id, uint32_t tableid, const char *file, int line) { dp_test_intf_virt_add(name); - _dp_test_netlink_create_vrf_master(name, vrf_id, tableid, true, + _dp_test_netlink_create_vrf_if(name, vrf_id, tableid, true, file, NULL, line); } -void _dp_test_intf_vrf_master_delete(const char *name, vrfid_t vrf_id, +void _dp_test_intf_vrf_if_delete(const char *name, vrfid_t vrf_id, uint32_t tableid, const char *file, int line) { - _dp_test_netlink_del_vrf_master(name, vrf_id, tableid, true, + _dp_test_netlink_del_vrf_if(name, vrf_id, tableid, true, file, NULL, line); dp_test_intf_virt_del(name); } - -/* Take an interface name and find the vrfid using a show cmd */ -void -_dp_test_intf_name2vrfid(const char *if_name, uint32_t *vrf_id, - const char *file, const char *func, int line) -{ - char cmd[DP_TEST_TMP_BUF_SMALL]; - char real_if_name[IFNAMSIZ]; - json_object *j_resp; - char *response; - bool err; - - dp_test_intf_real(if_name, real_if_name); - snprintf(cmd, sizeof(cmd), "ifconfig %s", real_if_name); - response = dp_test_console_request_w_err(cmd, &err, false); - if (!response || err) - dp_test_assert_internal(false); - j_resp = parse_json(response, parse_err_str, sizeof(parse_err_str)); - free(response); - if (!j_resp) - dp_test_assert_internal(false); - - /* Get interfaces object */ - json_object *j_interfaces; - struct dp_test_json_find_key interfaces_key[] = { - { "interfaces", NULL }, - }; - - j_interfaces = dp_test_json_find(j_resp, interfaces_key, - ARRAY_SIZE(interfaces_key)); - _dp_test_fail_unless(j_interfaces, file, line, - "Can't find json interfaces obj\n"); - - /* Find vrf_id */ - struct dp_test_json_find_key vrf_id_key[] = { - {"vrf_id", NULL}, - }; - json_object *j_vrf_id; - - j_vrf_id = dp_test_json_find(j_interfaces, vrf_id_key, - ARRAY_SIZE(vrf_id_key)); - _dp_test_fail_unless(j_vrf_id, file, line, - "Can't find json vrf_id obj\n"); - *vrf_id = (uint32_t)json_object_get_int(j_vrf_id); - _dp_test_fail_unless(*vrf_id != 0, file, line, - "Can't find json vrf_id int\n"); - json_object_put(j_vrf_id); - json_object_put(j_interfaces); - json_object_put(j_resp); -} diff --git a/tests/whole_dp/src/dp_test_lib_intf.h b/tests/whole_dp/src/dp_test_lib_intf_internal.h similarity index 76% rename from tests/whole_dp/src/dp_test_lib_intf.h rename to tests/whole_dp/src/dp_test_lib_intf_internal.h index 7ff5befd..5cb39970 100644 --- a/tests/whole_dp/src/dp_test_lib_intf.h +++ b/tests/whole_dp/src/dp_test_lib_intf_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -8,21 +8,21 @@ * dataplane UT Interface helpers */ -#ifndef _DP_TEST_LIB_INTF_H_ -#define _DP_TEST_LIB_INTF_H_ +#ifndef _DP_TEST_LIB_INTF_INTERNAL_H_ +#define _DP_TEST_LIB_INTF_INTERNAL_H_ #include #include #include /* conflicts with linux/if_bridge.h */ #include +#include "if/bridge/bridge_port.h" #include "if_var.h" -#include "bridge_port.h" -#include "dp_test_lib.h" -#include "dp_test_json_utils.h" +#include "dp_test/dp_test_lib_intf.h" -#define DP_TEST_INTF_DEF_SRC_MAC "00:00:a6:00:00:01" +#include "dp_test_lib_internal.h" +#include "dp_test_json_utils.h" /* Needs to match IF_PORT_ID_INVALID */ #define DP_TEST_INTF_INVALID_PORT_ID UCHAR_MAX @@ -66,67 +66,24 @@ uint16_t dp_test_intf2default_dpid(const char *if_name); /* Get interface information */ enum dp_test_intf_loc_e dp_test_intf_loc(const char *if_name); -char *dp_test_intf_real(const char *test_name, char *real_name); -int dp_test_intf_name2index(const char *if_name); -uint8_t dp_test_intf_name2port(const char *if_name); unsigned int dp_test_cont_src_ifindex(unsigned int ifindex); -struct ether_addr *dp_test_intf_name2mac(const char *if_name); -char *dp_test_intf_name2mac_str(const char *if_name); -void dp_test_intf_name2addr(const char *if_name, struct dp_test_addr *addr); -void dp_test_intf_name2addr_str(const char *if_name, int family, - char *addr_str, int buf_len); +uint8_t dp_test_intf_name2port(const char *if_name); +struct rte_ether_addr *dp_test_intf_name2mac(const char *if_name); + #define DP_TEST_INTF_STATE_BRIDGE 0x01 #define DP_TEST_INTF_STATE_PBR 0x02 -uint8_t dp_test_intf_name2state(const char *if_name); void dp_test_intf_name_add_state(const char *if_name, uint8_t state); void dp_test_intf_name_del_state(const char *if_name, uint8_t state); -void _dp_test_intf_name2vrfid(const char *if_name, uint32_t *vrf_id, - const char *file, const char *func, int line); -#define dp_test_intf_name2vrfid(if_name, vrf_id) \ - _dp_test_intf_name2vrfid(if_name, vrf_id, \ - __FILE__, __func__, __LINE__) -struct ether_addr *dp_test_intf_port2mac(portid_t port_id); void dp_test_intf_port2name(portid_t port_id, char *if_name); int dp_test_intf_port2index(portid_t port_id); void dp_test_intf_add_addr(const char *if_name, struct dp_test_addr *addr); void dp_test_intf_del_addr(const char *if_name, struct dp_test_addr *addr); -bool dp_test_intf_has_addr(const char *if_name, - const struct dp_test_addr *addr); void dp_test_intf_initial_stats_for_if(const char *ifname, struct if_data *stats); void dp_test_intf_delta_stats_for_if(const char *ifname, const struct if_data *initial_stats, struct if_data *stats); -/* Create / Delete interfaces */ -void _dp_test_intf_bridge_create(const char *br_name, - const char *file, const char *func, - int line); -#define dp_test_intf_bridge_create(br_name) \ - _dp_test_intf_bridge_create(br_name, \ - __FILE__, __func__, __LINE__) - -void _dp_test_intf_bridge_del(const char *br_name, - const char *file, const char *func, - int line); -#define dp_test_intf_bridge_del(br_name) \ - _dp_test_intf_bridge_del(br_name, \ - __FILE__, __func__, __LINE__) - -void _dp_test_intf_bridge_add_port(const char *br_name, const char *if_name, - const char *file, const char *func, - int line); -#define dp_test_intf_bridge_add_port(br_name, if_name) \ - _dp_test_intf_bridge_add_port(br_name, if_name, \ - __FILE__, __func__, __LINE__) - -void _dp_test_intf_bridge_remove_port(const char *br_name, const char *if_name, - const char *file, const char *func, - int line); -#define dp_test_intf_bridge_remove_port(br_name, if_name) \ - _dp_test_intf_bridge_remove_port(br_name, if_name, \ - __FILE__, __func__, __LINE__) - void _dp_test_intf_bridge_enable_vlan_filter(const char *br_name, const char *file, const char *func, int line); @@ -262,16 +219,7 @@ void dp_test_intf_vti_delete(const char *vti_name, uint16_t mark, vrfid_t vrf_id); -void _dp_test_intf_lord_create(const char *name, vrfid_t vrf_id, - const char *file, int line); -void _dp_test_intf_lord_delete(const char *name, vrfid_t vrf_id, - const char *file, int line); -#define dp_test_intf_lord_create(name, vrf_id) \ - _dp_test_intf_lord_create(name, vrf_id, __FILE__, __LINE__) -#define dp_test_intf_lord_delete(name, vrf_id) \ - _dp_test_intf_lord_delete(name, vrf_id, __FILE__, __LINE__) - -void _dp_test_intf_vfp_create(const char *name, vrfid_t vrf_id, bool verfiy, +void _dp_test_intf_vfp_create(const char *name, vrfid_t vrf_id, bool verify, const char *file, const char *func, int line); #define dp_test_intf_vfp_create(name, vrf_id) \ _dp_test_intf_vfp_create(name, vrf_id, false, \ @@ -305,24 +253,28 @@ void dp_test_intf_ppp_delete(const char *intf_name, uint32_t vrf_id); uint8_t dp_test_intf_switch_port_count(void); bool dp_test_intf_switch_port_over_bkp(const char *real_if_name); void dp_test_intf_switch_port_activate(const char *real_if_name); +void dp_test_intf_switch_port_deactivate(const char *real_if_name); -void _dp_test_intf_vrf_master_create(const char *name, vrfid_t vrf_id, +void _dp_test_intf_vrf_if_create(const char *name, vrfid_t vrf_id, uint32_t tableid, const char *file, int line); -void _dp_test_intf_vrf_master_delete(const char *name, vrfid_t vrf_id, +void _dp_test_intf_vrf_if_delete(const char *name, vrfid_t vrf_id, uint32_t tableid, const char *file, int line); -vrfid_t _dp_test_translate_vrf_id(vrfid_t vrf_id, const char *file, - int line); - -#define dp_test_translate_vrf_id(vrf_id) \ - _dp_test_translate_vrf_id(vrf_id, __FILE__, __LINE__) - bool dp_test_upstream_vrf_lookup_db(uint32_t vrf_id, char *vrf_name, uint32_t *tableid); bool dp_test_upstream_vrf_add_db(uint32_t vrf_id, char *vrf_name, uint32_t *tableid); -#endif /* _DP_TEST_LIB_INTF_H_ */ +void dp_test_pak_add_to_ring(const char *iif_name, + struct rte_mbuf **paks_to_send, + uint32_t num_paks, + bool wait_until_processed); + +int dp_test_pak_get_from_ring(const char *if_name, + struct rte_mbuf **bufs, + int count); + +#endif /* _DP_TEST_LIB_INTF_INTERNAL_H_ */ diff --git a/tests/whole_dp/src/dp_test_lib_pb.c b/tests/whole_dp/src/dp_test_lib_pb.c new file mode 100644 index 00000000..3b0563fd --- /dev/null +++ b/tests/whole_dp/src/dp_test_lib_pb.c @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ +#include "if_var.h" +#include "dp_test_controller.h" +#include "dp_test_lib_internal.h" + +#include "protobuf.h" +#include "protobuf_util.h" +#include "protobuf/DataplaneEnvelope.pb-c.h" +#include "protobuf/IPAddress.pb-c.h" + +void dp_test_lib_pb_set_ip_addr(IPAddress *addr, const char *str, void *data) +{ + struct dp_test_addr test_addr; + + if (!dp_test_addr_str_to_addr(str, &test_addr)) + dp_test_abort_internal(); + switch (test_addr.family) { + case AF_INET: + addr->address_oneof_case = + IPADDRESS__ADDRESS_ONEOF_IPV4_ADDR; + addr->ipv4_addr = test_addr.addr.ipv4; + break; + case AF_INET6: + addr->address_oneof_case = + IPADDRESS__ADDRESS_ONEOF_IPV6_ADDR; + + /* Use the data passed in */ + addr->ipv6_addr.data = data; + memcpy(addr->ipv6_addr.data, &test_addr.addr.ipv6, + 16); + addr->ipv6_addr.len = 16; + break; + default: + dp_test_abort_internal(); + } +} + + +void dp_test_lib_pb_wrap_and_send_pb(const char *str, + void *data, size_t data_len) +{ + DataplaneEnvelope msg = DATAPLANE_ENVELOPE__INIT; + void *buf; + size_t len; + size_t packed_len; + + msg.type = strdup(str); + dp_test_assert_internal(msg.type); + + msg.msg.data = data; + msg.msg.len = data_len; + + len = dataplane_envelope__get_packed_size(&msg); + + buf = malloc(len); + dp_test_assert_internal(buf); + + packed_len = dataplane_envelope__pack(&msg, buf); + dp_test_assert_internal(len == packed_len); + + dp_test_send_config_src_pb(dp_test_cont_src_get(), buf, len); + + free(msg.type); + free(buf); + free(data); +} diff --git a/tests/whole_dp/src/dp_test_lib_pkt.c b/tests/whole_dp/src/dp_test_lib_pkt.c index 9dc5add9..6b261527 100644 --- a/tests/whole_dp/src/dp_test_lib_pkt.c +++ b/tests/whole_dp/src/dp_test_lib_pkt.c @@ -5,7 +5,7 @@ * This contains library functions for creating test packets and test expect * object. * - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -22,10 +22,10 @@ #include "main.h" #include "dp_test.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" #include "dp_test_lib_exp.h" #include "dp_test_lib_pkt.h" @@ -237,10 +237,10 @@ _dp_test_pkt_from_desc(const struct dp_test_pkt_desc_t *pdesc, struct rte_mbuf *mbuf; switch (pdesc->ether_type) { - case ETHER_TYPE_IPv4: + case RTE_ETHER_TYPE_IPV4: mbuf = _dp_test_v4_pkt_from_desc(pdesc, file, line); break; - case ETHER_TYPE_IPv6: + case RTE_ETHER_TYPE_IPV6: mbuf = _dp_test_v6_pkt_from_desc(pdesc, file, line); break; default: @@ -414,10 +414,10 @@ _dp_test_exp_from_desc(struct rte_mbuf *mbuf, * Decrement TTL and recalc checksum for routed packets */ switch (pdesc->ether_type) { - case ETHER_TYPE_IPv4: + case RTE_ETHER_TYPE_IPV4: dp_test_ipv4_decrement_ttl(exp_mbuf); break; - case ETHER_TYPE_IPv6: + case RTE_ETHER_TYPE_IPV6: dp_test_ipv6_decrement_ttl(exp_mbuf); break; default: diff --git a/tests/whole_dp/src/dp_test_lib_pkt.h b/tests/whole_dp/src/dp_test_lib_pkt.h index 09633c0b..f880658a 100644 --- a/tests/whole_dp/src/dp_test_lib_pkt.h +++ b/tests/whole_dp/src/dp_test_lib_pkt.h @@ -1,6 +1,6 @@ /** * - * Copyright (c) 2017, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017,2019-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -18,7 +18,7 @@ * struct dp_test_pkt_desc_t v4_pkt_desc = { * .text = "TCP IPv4", * .len = 20, - * .ether_type = ETHER_TYPE_IPv4, + * .ether_type = RTE_ETHER_TYPE_IPV4, * .l3_src = "1.1.1.11", * .l2_src = "aa:bb:cc:dd:1:a1", * .l3_dst = "2.2.2.11", @@ -50,7 +50,7 @@ #include #include -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" /** * @brief Packet descriptor @@ -60,7 +60,7 @@ struct dp_test_pkt_desc_t { const char *text; /** Payload length */ int len; - /** Ethernet type. ETHER_TYPE_IPv4 or ETHER_TYPE_IPv6 */ + /** Ethernet type. RTE_ETHER_TYPE_IPV4 or RTE_ETHER_TYPE_IPV6 */ uint16_t ether_type; /** IPv4 or IPv6 source address */ const char *l3_src; diff --git a/tests/whole_dp/src/dp_test_lib_portmonitor.c b/tests/whole_dp/src/dp_test_lib_portmonitor.c index de21dbe4..d02216a2 100644 --- a/tests/whole_dp/src/dp_test_lib_portmonitor.c +++ b/tests/whole_dp/src/dp_test_lib_portmonitor.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -18,15 +18,15 @@ #include "dp_test.h" #include "dp_test_str.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_pkt.h" #include "dp_test_lib_portmonitor.h" #include "dp_test_npf_fw_lib.h" #include "dp_test_npf_lib.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" diff --git a/tests/whole_dp/src/dp_test_lib_portmonitor.h b/tests/whole_dp/src/dp_test_lib_portmonitor.h index 926a7d46..b471d7b2 100644 --- a/tests/whole_dp/src/dp_test_lib_portmonitor.h +++ b/tests/whole_dp/src/dp_test_lib_portmonitor.h @@ -1,4 +1,5 @@ /* + * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * diff --git a/tests/whole_dp/src/dp_test_lib_tcp.c b/tests/whole_dp/src/dp_test_lib_tcp.c index 9d8c08f4..8803b241 100644 --- a/tests/whole_dp/src/dp_test_lib_tcp.c +++ b/tests/whole_dp/src/dp_test_lib_tcp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -11,6 +11,7 @@ #include #include "ip_funcs.h" +#include "netinet6/ip6_funcs.h" #include "in_cksum.h" #include "if_var.h" #include "main.h" @@ -18,14 +19,14 @@ #include "dp_test.h" #include "dp_test_controller.h" #include "dp_test_cmd_state.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" #include "dp_test_str.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_pkt.h" #include "dp_test_lib_tcp.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_console.h" @@ -90,11 +91,78 @@ dp_test_tcp_flag2str(uint8_t flags, const char *delim) return str; } + +/*************************************************************************** + * TCP Flow Testing + **************************************************************************/ + +/* + * Create an IPv4 TCP or UDP packet descriptor + */ +struct dp_test_pkt_desc_t * +dpt_pdesc_v4_create(const char *text, uint8_t proto, + const char *l2_src, const char *l3_src, uint16_t sport, + const char *l2_dst, const char *l3_dst, uint16_t dport, + const char *rx_intf, const char *tx_intf) +{ + struct dp_test_pkt_desc_t *pkt; + + pkt = calloc(1, sizeof(*pkt)); + + pkt->text = text; + pkt->proto = proto; + pkt->ether_type = RTE_ETHER_TYPE_IPV4; + pkt->l2_src = l2_src; + pkt->l3_src = l3_src; + pkt->l4.tcp.sport = sport; + pkt->l4.tcp.dport = dport; + pkt->l2_dst = l2_dst; + pkt->l3_dst = l3_dst; + pkt->rx_intf = rx_intf; + pkt->tx_intf = tx_intf; + + if (proto == IPPROTO_TCP) + pkt->l4.tcp.win = 8192; + + return pkt; +} + +/* + * Create an IPv6 TCP or UDP packet descriptor + */ +struct dp_test_pkt_desc_t * +dpt_pdesc_v6_create(const char *text, uint8_t proto, + const char *l2_src, const char *l3_src, uint16_t sport, + const char *l2_dst, const char *l3_dst, uint16_t dport, + const char *rx_intf, const char *tx_intf) +{ + struct dp_test_pkt_desc_t *pkt; + + pkt = calloc(1, sizeof(*pkt)); + + pkt->text = text; + pkt->proto = proto; + pkt->ether_type = RTE_ETHER_TYPE_IPV6; + pkt->l2_src = l2_src; + pkt->l3_src = l3_src; + pkt->l4.tcp.sport = sport; + pkt->l4.tcp.dport = dport; + pkt->l2_dst = l2_dst; + pkt->l3_dst = l3_dst; + pkt->rx_intf = rx_intf; + pkt->tx_intf = tx_intf; + + if (proto == IPPROTO_TCP) + pkt->l4.tcp.win = 8192; + + return pkt; +} + /* * Write TCP payload, and re-calc checksums */ -void -dp_test_tcp_write_payload(struct rte_mbuf *m, uint plen, const char *payload) +static void +dpt_tcp_write_v4_payload(struct rte_mbuf *m, uint plen, const char *payload) { struct iphdr *ip; struct tcphdr *tcp; @@ -117,47 +185,106 @@ dp_test_tcp_write_payload(struct rte_mbuf *m, uint plen, const char *payload) } static void -dp_test_tcp_pak_receive(uint pktno, - struct dp_test_tcp_call *call, - enum dp_test_tcp_dir dir, uint8_t flags, - uint dlen, char *data, - void *ctx_ptr, uint ctx_uint) +dpt_tcp_write_v6_payload(struct rte_mbuf *m, uint plen, const char *payload) +{ + struct ip6_hdr *ip6; + struct tcphdr *tcp; + char *datap; + + if (!m || plen == 0 || !payload) + return; + + ip6 = ip6hdr(m); + tcp = (struct tcphdr *)(ip6 + 1); + tcp->check = 0; + + datap = (char *)tcp + (tcp->doff << 2); + memcpy(datap, payload, plen); + + tcp->check = dp_test_ipv6_udptcp_cksum(m, ip6, tcp); +} + +/* + * Setup and inject packet for a TCP flow + */ +static void dpt_tcp_pak_receive(uint pktno, struct dpt_tcp_flow *call, + struct dpt_tcp_flow_pkt *df, + void *ctx_ptr, uint ctx_uint) { struct dp_test_pkt_desc_t *pre; struct dp_test_pkt_desc_t *post; - enum dp_test_tcp_dir rev = DP_DIR_REVERSE(dir); + bool dir = df->forw; + bool rev = (dir == DPT_FORW) ? DPT_BACK : DPT_FORW; + uint8_t flags = df->flags; char str[80]; + bool is_v6; - spush(str, sizeof(str), - "%s, Pkt #%u %s, flags 0x%x", call->str, pktno, - dir == DP_DIR_FORW ? "FORW":"BACK", flags); + pre = call->desc[dir].pre; + post = call->desc[dir].pst; + + is_v6 = (pre->ether_type == RTE_ETHER_TYPE_IPV6); /* - * Make copies of the pre and post pkt descriptors in case test_cb - * wants to modify them. + * If data is a string, then dlen will be set to zero to indicate we + * need to call strlen for it. */ - struct dp_test_pkt_desc_t pre_copy = *call->desc[dir].pre; - struct dp_test_pkt_desc_t post_copy = *call->desc[dir].post; + if (df->pre_dlen == 0 && df->pre_data != NULL) { + df->pre_dlen = strnlen(df->pre_data, 2000); + dp_test_fail_unless(df->pre_dlen < 2000, + "Pre data is not a string"); + } - pre = &pre_copy; - post = &post_copy; + if (df->pst_dlen == 0 && df->pst_data != NULL) { + df->pst_dlen = strnlen(df->pst_data, 2000); + dp_test_fail_unless(df->pst_dlen < 2000, + "Pst data is not a string"); + } + const char *dir_str = (dir == DPT_FORW) ? "FORW":"BACK"; + + spush(str, sizeof(str), + "[%2u] %s %s, flags 0x%02x", pktno, call->text, + dir_str, flags); + + /* + * Adjust the pre and post pkt descriptors + */ pre->l4.tcp.flags = flags; post->l4.tcp.flags = flags; - pre->len = post->len = dlen; + + /* Post data is same as pre data unless otherwise specd */ + if (df->pst_dlen == 0 || !df->pst_data) { + df->pst_dlen = df->pre_dlen; + df->pst_data = df->pre_data; + } + + pre->len = df->pre_dlen; + post->len = df->pst_dlen; pre->l4.tcp.seq = call->seq[dir] + call->isn[dir]; post->l4.tcp.seq = call->seq[dir] + call->isn[dir]; + /* + * Pre ACK value is local ack number + * Post ACK value is remote seq number + */ pre->l4.tcp.ack = call->ack[dir]; - post->l4.tcp.ack = call->ack[dir]; + post->l4.tcp.ack = call->seq[rev]; - if (flags & (TH_FIN | TH_SYN)) + if (flags & (TH_FIN | TH_SYN)) { call->seq[dir] += 1; - else - call->seq[dir] += post->len; - - call->ack[rev] = call->seq[dir]; + call->ack[rev] += 1; + } else { + /* + * New local SEQ is SEQ + pre length + */ + call->seq[dir] += pre->len; + + /* + * New remote ACK is ACK + post length + */ + call->ack[rev] += post->len; + } /* * Callback may change the packet, result and/or next callback @@ -170,12 +297,32 @@ dp_test_tcp_pak_receive(uint pktno, struct rte_mbuf *pre_pak, *post_pak; struct dp_test_expected *test_exp; - pre_pak = dp_test_v4_pkt_from_desc(pre); - post_pak = dp_test_v4_pkt_from_desc(post); - - if (dlen > 0 && data) { - dp_test_tcp_write_payload(pre_pak, dlen, data); - dp_test_tcp_write_payload(post_pak, dlen, data); + if (!is_v6) { + pre_pak = dp_test_v4_pkt_from_desc(pre); + post_pak = dp_test_v4_pkt_from_desc(post); + + if (df->pre_dlen > 0 && df->pre_data) + dpt_tcp_write_v4_payload( + pre_pak, df->pre_dlen, + df->pre_data); + + if (df->pst_dlen > 0 && df->pst_data) + dpt_tcp_write_v4_payload( + post_pak, df->pst_dlen, + df->pst_data); + } else { + pre_pak = dp_test_v6_pkt_from_desc(pre); + post_pak = dp_test_v6_pkt_from_desc(post); + + if (df->pre_dlen > 0 && df->pre_data) + dpt_tcp_write_v6_payload( + pre_pak, df->pre_dlen, + df->pre_data); + + if (df->pst_dlen > 0 && df->pst_data) + dpt_tcp_write_v6_payload( + post_pak, df->pst_dlen, + df->pst_data); } test_exp = dp_test_exp_from_desc(post_pak, post); @@ -191,6 +338,7 @@ dp_test_tcp_pak_receive(uint pktno, if (call->post_cb) (*call->post_cb)(pktno, dir, flags, pre, post, str); + } /* @@ -199,41 +347,40 @@ dp_test_tcp_pak_receive(uint pktno, * call Packet descriptors for forw and back packets * df_array Array of direction, flags and pkt size tuples, * one for each packet to be sent + * df_array_size + * first Index of first pkt in df_array + * last Index of last pkt in df_array (if > 0 and < df_array_size) * ctx_ptr Pointer context to pass to test_cb * ctx_uint Uint context to pass to test_cb */ -void -dp_test_tcp_call(struct dp_test_tcp_call *call, - struct dp_test_tcp_flow_pkt *df_array, - size_t df_array_size, - void *ctx_ptr, uint ctx_uint) +void dpt_tcp_call(struct dpt_tcp_flow *call, struct dpt_tcp_flow_pkt *df_array, + size_t df_array_size, uint first, uint last, + void *ctx_ptr, uint ctx_uint) { - struct dp_test_tcp_desc *forw, *back; + struct dpt_tcp_flow_pkt_desc *forw, *back; uint pktno; - forw = &call->desc[DP_DIR_FORW]; - back = &call->desc[DP_DIR_BACK]; + forw = &call->desc[DPT_FORW]; + back = &call->desc[DPT_BACK]; - call->seq[DP_DIR_FORW] = 0; - call->seq[DP_DIR_BACK] = 0; - call->ack[DP_DIR_FORW] = 0; - call->ack[DP_DIR_BACK] = 0; + call->seq[DPT_FORW] = 0; + call->seq[DPT_BACK] = 0; + call->ack[DPT_FORW] = 0; + call->ack[DPT_BACK] = 0; forw->pre->l4.tcp.seq = 0; forw->pre->l4.tcp.ack = 0; - forw->post->l4.tcp.seq = 0; - forw->post->l4.tcp.ack = 0; + forw->pst->l4.tcp.seq = 0; + forw->pst->l4.tcp.ack = 0; back->pre->l4.tcp.seq = 0; back->pre->l4.tcp.ack = 0; - back->post->l4.tcp.seq = 0; - back->post->l4.tcp.ack = 0; - - for (pktno = 0; pktno < df_array_size; pktno++) { - dp_test_tcp_pak_receive(pktno, call, - df_array[pktno].dir, - df_array[pktno].flags, - df_array[pktno].dlen, - df_array[pktno].data, - ctx_ptr, ctx_uint); - } + back->pst->l4.tcp.seq = 0; + back->pst->l4.tcp.ack = 0; + + if (last == 0 || last >= df_array_size) + last = df_array_size - 1; + + for (pktno = first; pktno <= last; pktno++) + dpt_tcp_pak_receive(pktno, call, &df_array[pktno], + ctx_ptr, ctx_uint); } diff --git a/tests/whole_dp/src/dp_test_lib_tcp.h b/tests/whole_dp/src/dp_test_lib_tcp.h index 194e29b3..779e45ff 100644 --- a/tests/whole_dp/src/dp_test_lib_tcp.h +++ b/tests/whole_dp/src/dp_test_lib_tcp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -15,7 +15,7 @@ #include #include "if_var.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" /* * dp_test_tcp_flag2str @@ -28,110 +28,115 @@ char *dp_test_tcp_flag2str(uint8_t flags, const char *delim); + +/*************************************************************************** + * TCP Flow Testing + **************************************************************************/ + /* * Provides a mechanism to automatically generate a TCP call, i.e. a forwards * and backwards flow of packets, with use specified TCP flags and automatic * update of seq and ack. */ -enum dp_test_tcp_dir { - DP_DIR_BACK, - DP_DIR_FORW -}; +#define DPT_FORW true +#define DPT_BACK false -#define DP_DIR_REVERSE(x) (x == DP_DIR_FORW ? DP_DIR_BACK : DP_DIR_FORW) - -struct dp_test_tcp_flow_pkt { - enum dp_test_tcp_dir dir; - uint8_t flags; - uint dlen; - char *data; +/* + * TCP flow pkt flags and data + * + * data pointers are pointers to arrays or strings. If an array, then a dlen + * must be specified. If a string, then dlen may be specified. If not specd, + * then dpt_tcp_pak_receive will calculate it. + * + * If pst_data is NULL then the pre_data is used. Typically only ALG tests + * might use pst_data. + */ +struct dpt_tcp_flow_pkt { + bool forw; /* true for forw, false for back */ + uint8_t flags; /* TCP flags */ + uint pre_dlen; + char *pre_data; /* Pre data */ + uint pst_dlen; + char *pst_data; /* Post data */ }; -struct dp_test_tcp_desc { - struct dp_test_pkt_desc_t *pre; - struct dp_test_pkt_desc_t *post; -}; +#define DPT_TCP_CALL_TEXT_LEN 120 -#define DP_TEST_TCP_CALL_DESC_LEN 120 +/* + * TCP flow pkt descriptors + */ +struct dpt_tcp_flow_pkt_desc { + struct dp_test_pkt_desc_t *pre; + struct dp_test_pkt_desc_t *pst; +}; /* - * A TCP call comprises on a number of one or more packets in one or two - * directions. + * TCP flow. + * + * Keeps track of seq and ack numbers, and adds them to the test packets. */ -struct dp_test_tcp_call { - char str[DP_TEST_TCP_CALL_DESC_LEN]; +struct dpt_tcp_flow { + char text[DPT_TCP_CALL_TEXT_LEN]; - /* Packet descriptors for forw and back pkts */ - struct dp_test_tcp_desc desc[2]; + /* Forw and back, pre and post pkt descriptors */ + struct dpt_tcp_flow_pkt_desc desc[2]; /* Initial sequence number */ - uint32_t isn[2]; + uint32_t isn[2]; /* seq and ack; start at zero */ - uint32_t seq[2]; - uint32_t ack[2]; + uint32_t seq[2]; + uint32_t ack[2]; void (*test_cb)(const char *desc, - uint, enum dp_test_tcp_dir, - uint8_t, - struct dp_test_pkt_desc_t *, - struct dp_test_pkt_desc_t *, - void *, uint); - void (*post_cb)(uint, enum dp_test_tcp_dir, uint8_t, - struct dp_test_pkt_desc_t *, - struct dp_test_pkt_desc_t *, - const char *); + uint pktno, bool forw, + uint8_t flags, + struct dp_test_pkt_desc_t *pre, + struct dp_test_pkt_desc_t *post, + void *data, uint index); + void (*post_cb)(uint pktno, bool forw, uint8_t flags, + struct dp_test_pkt_desc_t *pre, + struct dp_test_pkt_desc_t *post, + const char *desc); }; -/* - * Example / template: - * - * struct dp_test_tcp_call tcp_call = { - * .str[0] = '\0', - * .initial_seq = 0, - * .desc[DP_DIR_FORW] = { - * .pre = &ins_pre, - * .post = &ins_post, - * }, - * .desc[DP_DIR_BACK] = { - * .pre = &outs_pre, - * .post = &outs_post, - * }, - * .test_cb = NULL, - * .post_cb = NULL, - * }; - * - * spush(tcp_call.desc, sizeof(tcp_call.desc), "npf TCP strict Test 1"); - * - * struct dp_test_tcp_flow_pkt tcp_pkt[] = { - * // 3-way setup handshake - * {DP_DIR_FORW, TH_SYN, 0}, - * {DP_DIR_BACK, TH_SYN | TH_ACK, 0}, - * {DP_DIR_FORW, TH_ACK, 0}, - * // Data transfer - * {DP_DIR_FORW, TH_ACK, 40}, - * {DP_DIR_BACK, TH_ACK, 100}, - * {DP_DIR_FORW, TH_ACK, 30}, - * // 4-way termination handshake - * {DP_DIR_FORW, TH_FIN}, - * {DP_DIR_BACK, TH_ACK, 0}, - * {DP_DIR_BACK, TH_FIN, 0}, - * {DP_DIR_FORW, TH_ACK, 0}, - * }; - * - * dp_test_tcp_call(&tcp_call, tcp_pkt, ARRAY_SIZE(tcp_pkt)); - */ -void -dp_test_tcp_call(struct dp_test_tcp_call *call, - struct dp_test_tcp_flow_pkt *df_array, - size_t df_array_size, - void *ctx_ptr, uint ctx_uint); +struct dp_test_pkt_desc_t *dpt_pdesc_v4_create(const char *text, + uint8_t proto, + const char *l2_src, + const char *l3_src, + uint16_t sport, + const char *l2_dst, + const char *l3_dst, + uint16_t dport, + const char *rx_intf, + const char *tx_intf); + +struct dp_test_pkt_desc_t *dpt_pdesc_v6_create(const char *text, + uint8_t proto, + const char *l2_src, + const char *l3_src, + uint16_t sport, + const char *l2_dst, + const char *l3_dst, + uint16_t dport, + const char *rx_intf, + const char *tx_intf); /* - * Write TCP payload, and re-calc checksums + * TCP call + * + * call Packet descriptors for forw and back packets + * df_array Array of direction, flags and pkt size tuples, + * one for each packet to be sent + * df_array_size + * first Index of first pkt in df_array + * last Index of last pkt in df_array (if > 0 and < df_array_size) + * ctx_ptr Pointer context to pass to test_cb + * ctx_uint Uint context to pass to test_cb */ -void dp_test_tcp_write_payload(struct rte_mbuf *m, uint plen, - const char *payload); +void dpt_tcp_call(struct dpt_tcp_flow *call, struct dpt_tcp_flow_pkt *df_array, + size_t df_array_size, uint first, uint last, + void *ctx_ptr, uint ctx_uint); #endif /* _DP_TEST_LIB_TCP_H_ */ diff --git a/tests/whole_dp/src/dp_test_mac_limit.c b/tests/whole_dp/src/dp_test_mac_limit.c new file mode 100644 index 00000000..596a79f8 --- /dev/null +++ b/tests/whole_dp/src/dp_test_mac_limit.c @@ -0,0 +1,380 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include +#include "dp_test/dp_test_macros.h" +#include "util.h" +#include "dp_test.h" +#include "dp_test_controller.h" +#include "dp_test_cmd_state.h" +#include "dp_test_console.h" +#include "dp_test_lib_intf_internal.h" +#include "bridge_vlan_set.h" +#include "fal_plugin_test.h" +#include "protobuf/MacLimitConfig.pb-c.h" + +#define INT1 "dpT10" +#define INT2 "dpT11" + +#define ALL "all" +#define NONE "none" + +#define LOG(l, t, ...) \ + rte_log(RTE_LOG_ ## l, \ + RTE_LOGTYPE_USER1, # t ": " __VA_ARGS__) + +#define DEBUG(...) \ + do { \ + if (dp_test_debug_get() == 2) \ + LOG(DEBUG, MAC_LIMIT, __VA_ARGS__); \ + } while (0) + +static void _show_mac_limit_info(const char *intf, uint16_t vlan, + const char *profile, uint32_t limit, + bool present, const char *file, int line) +{ + json_object *jexp; + char cmd_str[50]; + + sprintf(cmd_str, "mac-limit dump %s %d %s", + intf, vlan, profile); + + /* + * Expected JSON depends on whether an intf is specified + * and if so, whether is it assigned a profile. + */ + if (present) { + if (strcmp(intf, NONE) != 0) { + jexp = dp_test_json_create( + "{ " + "\"mac-limit\": " + "{ " + "\"instance\": " + "[ { " + "\"interface\": \"%s\", " + "\"vlan\": %d, " + "\"profile\": \"%s\" " + "} ], " + "\"profile\":" + "[ { " + "\"name\": \"%s\"," + "\"limit\":%d" + "} ] " + "} " "} ", intf, vlan, profile, profile, limit); + } else { + jexp = dp_test_json_create( + "{ " + "\"mac-limit\": " + "{ " + "\"profile\":" + "[ { " + "\"name\": \"%s\"," + "\"limit\":%d" + "} ] " + "} " "} ", profile, limit); + } + } else { + jexp = dp_test_json_create( + "{ " + "\"mac-limit\": " + "{ } }"); + } + + _dp_test_check_json_poll_state(cmd_str, jexp, NULL, + DP_TEST_JSON_CHECK_SUBSET, + false, 0, file, + "", line); + json_object_put(jexp); +} + +#define show_mac_limit_info(intf, vlan, profile, limit, present) \ + _show_mac_limit_info(intf, vlan, profile, limit, present, \ + __FILE__, __LINE__) + +static void _verify_plugin_limit(const char *intf, uint16_t vlan, + uint32_t explimit, + const char *file, int line) +{ + json_object *jexp; + char cmd_str[50]; + + sprintf(cmd_str, "mac-limit show status %s %d", + intf, vlan); + jexp = dp_test_json_create( + "{ " + "\"statistics\": " + "{ " + "\"limit\":%d," + "\"count\":0" + "} " "} ", explimit); + + _dp_test_check_json_poll_state(cmd_str, jexp, NULL, + DP_TEST_JSON_CHECK_SUBSET, + false, 0, __FILE__, + "", __LINE__); + json_object_put(jexp); +} + +#define verify_plugin_limit(intf, vlan, explimit) \ + _verify_plugin_limit(intf, vlan, explimit, __FILE__, __LINE__) + +static void mac_limit_wrap_and_send(MacLimitConfig *ml_cfg) +{ + void *buf; + int len; + + len = mac_limit_config__get_packed_size(ml_cfg); + + buf = malloc(len); + dp_test_assert_internal(buf); + + mac_limit_config__pack(ml_cfg, buf); + + dp_test_lib_pb_wrap_and_send_pb("vyatta:maclimit", buf, len); +} + +static void set_profile(const char *profile_name, int limit) +{ + MacLimitConfig__MacLimitProfileConfig profile_cfg = + MAC_LIMIT_CONFIG__MAC_LIMIT_PROFILE_CONFIG__INIT; + MacLimitConfig ml_cfg = MAC_LIMIT_CONFIG__INIT; + + DEBUG("Set profile %s limit %d\n", + profile_name, limit); + + ml_cfg.mtype_case = MAC_LIMIT_CONFIG__MTYPE_PROFILE; + ml_cfg.profile = &profile_cfg; + profile_cfg.has_action = true; + profile_cfg.action = MAC_LIMIT_CONFIG__ACTION__SET; + profile_cfg.profile = (char *)profile_name; + profile_cfg.has_limit = true; + profile_cfg.limit = limit; + + mac_limit_wrap_and_send(&ml_cfg); +} + +static void del_profile(const char *profile_name) +{ + MacLimitConfig__MacLimitProfileConfig profile_cfg = + MAC_LIMIT_CONFIG__MAC_LIMIT_PROFILE_CONFIG__INIT; + MacLimitConfig ml_cfg = MAC_LIMIT_CONFIG__INIT; + + DEBUG("Delete profile %s limit\n", + profile_name); + + ml_cfg.mtype_case = MAC_LIMIT_CONFIG__MTYPE_PROFILE; + ml_cfg.profile = &profile_cfg; + profile_cfg.has_action = true; + profile_cfg.action = MAC_LIMIT_CONFIG__ACTION__DELETE; + profile_cfg.profile = (char *)profile_name; + + mac_limit_wrap_and_send(&ml_cfg); +} + +static void assign_profile(const char *profile_name, uint16_t vlan, + const char *intf) +{ + MacLimitConfig__MacLimitIfVLANConfig ifvlan_cfg = + MAC_LIMIT_CONFIG__MAC_LIMIT_IF_VLANCONFIG__INIT; + MacLimitConfig ml_cfg = MAC_LIMIT_CONFIG__INIT; + + DEBUG("Assign profile %s to interface %s vlan %d\n", + profile_name, intf, vlan); + + ml_cfg.mtype_case = MAC_LIMIT_CONFIG__MTYPE_IFVLAN; + ml_cfg.ifvlan = &ifvlan_cfg; + ifvlan_cfg.has_action = true; + ifvlan_cfg.action = MAC_LIMIT_CONFIG__ACTION__SET; + ifvlan_cfg.ifname = (char *)intf; + ifvlan_cfg.has_vlan = true; + ifvlan_cfg.vlan = vlan; + ifvlan_cfg.profile = (char *)profile_name; + + mac_limit_wrap_and_send(&ml_cfg); +} + +static void unassign_profile(const char *profile_name, uint16_t vlan, + const char *intf) +{ + MacLimitConfig__MacLimitIfVLANConfig ifvlan_cfg = + MAC_LIMIT_CONFIG__MAC_LIMIT_IF_VLANCONFIG__INIT; + MacLimitConfig ml_cfg = MAC_LIMIT_CONFIG__INIT; + + DEBUG("Unassign profile %s from interface %s\n", + profile_name, intf); + + ml_cfg.mtype_case = MAC_LIMIT_CONFIG__MTYPE_IFVLAN; + ml_cfg.ifvlan = &ifvlan_cfg; + ifvlan_cfg.has_action = true; + ifvlan_cfg.action = MAC_LIMIT_CONFIG__ACTION__DELETE; + ifvlan_cfg.ifname = (char *)intf; + ifvlan_cfg.has_vlan = true; + ifvlan_cfg.vlan = vlan; + + mac_limit_wrap_and_send(&ml_cfg); +} + +DP_DECL_TEST_SUITE(mac_limit); + +DP_DECL_TEST_CASE(mac_limit, limit, NULL, NULL); + +DP_START_TEST(limit, test1) +{ + uint32_t lim1 = 1, lim2 = 2, lim3 = 3, lim4 = 4; + + dp_test_send_config_src(dp_test_cont_src_get(), + "switchport dpT10 hw-switching enable"); + dp_test_send_config_src(dp_test_cont_src_get(), + "switchport dpT11 hw-switching enable"); + + struct bridge_vlan_set *allowed_vlans = bridge_vlan_set_create(); + + /* + * Set up some profiles. + */ + set_profile("p1", lim1); + show_mac_limit_info(NONE, 0, "p1", lim1, true); + + set_profile("p2", lim2); + show_mac_limit_info(NONE, 0, "p2", lim2, true); + + set_profile("p3", lim3); + show_mac_limit_info(NONE, 0, "p3", lim3, true); + + set_profile("p4", lim4); + show_mac_limit_info(NONE, 0, "p4", lim4, true); + + /* + * Assign the profile to a port+vlan before the vlan + * has been created. + */ + assign_profile("p1", 1, INT1); + show_mac_limit_info(INT1, 1, "p1", lim1, true); + + bridge_vlan_set_add(allowed_vlans, 1); + bridge_vlan_set_add(allowed_vlans, 2); + + dp_test_intf_switch_create("switch0"); + dp_test_intf_bridge_enable_vlan_filter("switch0"); + + dp_test_intf_switch_add_port("switch0", INT1); + dp_test_intf_bridge_port_set_vlans("switch0", INT1, + 0, allowed_vlans, NULL); + + dp_test_intf_switch_add_port("switch0", INT2); + dp_test_intf_bridge_port_set_vlans("switch0", INT2, + 0, allowed_vlans, NULL); + + /* + * Vlan created. Verify limit was applied. + */ + verify_plugin_limit(INT1, 1, lim1); + + /* + * Delete the limit from the profile while it is still + * assigned + */ + + /* + * Assign same profile to another interface + */ + assign_profile("p1", 1, INT2); + verify_plugin_limit(INT2, 1, lim1); + + /* + * Increase the profile limit and verify that all + * instances to which it is assigned are updated. + */ + lim1 = 120; + set_profile("p1", lim1); + show_mac_limit_info(INT1, 1, "p1", lim1, true); + show_mac_limit_info(INT2, 1, "p1", lim1, true); + verify_plugin_limit(INT1, 1, lim1); + verify_plugin_limit(INT2, 1, lim1); + + /* + * Set different limits on same port and different vlan. + */ + assign_profile("p3", 2, INT1); + assign_profile("p4", 2, INT2); + show_mac_limit_info(INT1, 2, "p3", lim3, true); + show_mac_limit_info(INT2, 2, "p4", lim4, true); + verify_plugin_limit(INT1, 2, lim3); + verify_plugin_limit(INT2, 2, lim4); + + /* + * Recheck vlan 1 to ensure unaffected. + * All checks are for "lim1" as INT1 and INT2 vlan 1 + * are using the same profile. + */ + show_mac_limit_info(INT1, 1, "p1", lim1, true); + show_mac_limit_info(INT2, 1, "p1", lim1, true); + verify_plugin_limit(INT1, 1, lim1); + verify_plugin_limit(INT2, 1, lim1); + + unassign_profile("p1", 1, INT2); + unassign_profile("p3", 2, INT1); + unassign_profile("p4", 2, INT2); + + /* + * Verfy no active instances for these. + */ + show_mac_limit_info(INT2, 1, "none", 0, false); + show_mac_limit_info(INT1, 2, "none", 0, false); + show_mac_limit_info(INT2, 2, "none", 0, false); + + /* + * INT1+vlan1 has profile "p1". Assign another profile + * without unassigning the first and verify update. + */ + set_profile("p2", lim2); + assign_profile("p2", 1, INT1); + show_mac_limit_info(INT1, 1, "p2", lim2, true); + verify_plugin_limit(INT1, 1, lim2); + + unassign_profile("p2", 1, INT1); + + /* + * There should be no active instances now. + */ + show_mac_limit_info(INT1, 1, "none", 0, false); + + /* + * Verify all profiles still exist although unassigned. + */ + show_mac_limit_info(NONE, 0, "p1", lim1, true); + show_mac_limit_info(NONE, 0, "p2", lim2, true); + show_mac_limit_info(NONE, 0, "p3", lim3, true); + show_mac_limit_info(NONE, 0, "p4", lim4, true); + + DEBUG("Delete profiles\n"); + del_profile("p1"); + del_profile("p2"); + del_profile("p3"); + del_profile("p4"); + + /* + * Verify profiles no longer exist + */ + show_mac_limit_info(NONE, 0, "p1", lim1, false); + show_mac_limit_info(NONE, 0, "p2", lim2, false); + show_mac_limit_info(NONE, 0, "p3", lim3, false); + show_mac_limit_info(NONE, 0, "p4", lim4, false); + + DEBUG("MAC_LIMIT: End\n"); + + dp_test_intf_switch_remove_port("switch0", INT1); + dp_test_intf_switch_remove_port("switch0", INT2); + dp_test_intf_switch_del("switch0"); + bridge_vlan_set_free(allowed_vlans); + + dp_test_send_config_src(dp_test_cont_src_get(), + "switchport dpT10 hw-switching disable"); + dp_test_send_config_src(dp_test_cont_src_get(), + "switchport dpT11 hw-switching disable"); +} DP_END_TEST; + diff --git a/tests/whole_dp/src/dp_test_missed_netlink.c b/tests/whole_dp/src/dp_test_missed_netlink.c deleted file mode 100644 index 19e7aeb1..00000000 --- a/tests/whole_dp/src/dp_test_missed_netlink.c +++ /dev/null @@ -1,249 +0,0 @@ -/*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. - * Copyright (c) 2017 by Brocade Communications Systems, Inc. - * All rights reserved. - * - * SPDX-License-Identifier: LGPL-2.1-only - * - * Missed netlink unit tests - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "main.h" -#include "if_var.h" -#include "vrf.h" - -#include "compat.h" - -#include "dp_test_cmd_check.h" -#include "dp_test_console.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" -#include "dp_test_netlink_state.h" -#include "dp_test_str.h" -#include "dp_test.h" - -DP_DECL_TEST_SUITE(missed_netlink); - -#define dp_test_wait_for_missed_count(added, updated, deleted) \ - _dp_test_wait_for_missed_count(added, updated, deleted, \ - __FILE__, __func__, __LINE__) - -static void _dp_test_wait_for_missed_count(int added, int updated, int deleted, - const char *file, - const char *func, int line) -{ - json_object *expected; - - expected = dp_test_json_create("{ \"incomplete\":" - " {" - " \"missed_add\": %d," - " \"missed_update\": %d," - " \"missed_del\": %d," - " }" - "}", added, updated, deleted); - _dp_test_check_json_poll_state("incomplete", - expected, NULL, - DP_TEST_JSON_CHECK_SUBSET, - false, 0, file, func, line); - json_object_put(expected); -} - -#define dp_test_verify_missed_nl_counts(replayed, added, updated, deleted) \ - _dp_test_verify_missed_nl_counts(replayed, added, updated, deleted, \ - __FILE__, __func__, __LINE__) - -static void _dp_test_verify_missed_nl_counts(int replayed, - int added, - int updated, - int deleted, - const char *file, - const char *func, - int line) -{ - json_object *expected; - - expected = dp_test_json_create("{ \"incomplete\":" - " {" - " \"missed_replayed\": %d," - " \"missed_add\": %d," - " \"missed_update\": %d," - " \"missed_del\": %d," - " }" - "}", replayed, added, updated, deleted); - _dp_test_check_json_state("incomplete", - expected, NULL, - DP_TEST_JSON_CHECK_SUBSET, - false, file, func, line); - json_object_put(expected); -} - -static void dp_test_get_missed_nl_counts(unsigned int *replayed, - unsigned int *added, - unsigned int *updated, - unsigned int *deleted) -{ - const char *cmd = "incomplete"; - bool err; - json_object *jresp; - json_object *jrule; - char *response; - struct dp_test_json_find_key incomplete_key[] = { - { "incomplete", NULL }, - }; - - response = dp_test_console_request_w_err(cmd, &err, false); - if (!response || err) - dp_test_fail("no response from dataplane"); - - jresp = parse_json(response, parse_err_str, sizeof(parse_err_str)); - free(response); - - jrule = dp_test_json_find(jresp, incomplete_key, - ARRAY_SIZE(incomplete_key)); - - if (!dp_test_json_int_field_from_obj(jrule, "missed_replayed", - (int *)replayed)) - dp_test_fail("Could not get missed_replayed from dataplane"); - - if (!dp_test_json_int_field_from_obj(jrule, "missed_add", - (int *)added)) - dp_test_fail("Could not get missed_add from dataplane"); - - if (!dp_test_json_int_field_from_obj(jrule, "missed_update", - (int *)updated)) - dp_test_fail("Could not get missed_update from dataplane"); - - if (!dp_test_json_int_field_from_obj(jrule, "missed_del", - (int *)deleted)) - dp_test_fail("Could not get missed_del from dataplane"); - json_object_put(jrule); - json_object_put(jresp); -} - -static void dp_test_hide_interface(const char *ifname, - struct ifnet **ifp, - char *saved_ifname, - unsigned int *saved_ifindex) -{ - portid_t portid; - - dp_test_intf_real(ifname, saved_ifname); - *ifp = ifnet_byifname(saved_ifname); - dp_test_assert_internal(*ifp != NULL); - *saved_ifindex = (*ifp)->if_index; - - /* hide the interface from netlink */ - if_unset_ifindex(*ifp); - portid = dp_test_intf_name2port(saved_ifname); - snprintf((*ifp)->if_name, IFNAMSIZ, "port%d", portid); -} - -static void dp_test_restore_interface(struct ifnet *ifp, - const char *ifname, - unsigned int ifindex) -{ - dp_test_assert_internal(ifp != NULL); - snprintf(ifp->if_name, IFNAMSIZ, "%s", ifname); - if_set_ifindex(ifp, ifindex); -} - -DP_DECL_TEST_CASE(missed_netlink, basic_operation, NULL, NULL); -DP_START_TEST(basic_operation, basic_operation) -{ - struct ifnet *ifp = NULL; - unsigned int added = 0, updated = 0, deleted = 0, replayed = 0; - char saved_ifname[IFNAMSIZ]; - unsigned int saved_ifindex; - - dp_test_get_missed_nl_counts(&replayed, &added, &updated, &deleted); - dp_test_verify_missed_nl_counts(replayed, added, updated, deleted); - - /* simple adds and replay */ - dp_test_hide_interface("dp1T1", &ifp, saved_ifname, &saved_ifindex); - dp_test_netlink_set_interface_l2_noverify("dp1T1"); - dp_test_netlink_add_ip_address_noverify("dp1T1", "1.1.1.1/24"); - added += 3; - replayed += 3; - dp_test_wait_for_missed_count(added, updated, deleted); - dp_test_restore_interface(ifp, saved_ifname, saved_ifindex); - missed_netlink_replay(saved_ifindex); - dp_test_verify_missed_nl_counts(replayed, added, updated, deleted); - /* second replay shouldn't change counters */ - missed_netlink_replay(saved_ifindex); - dp_test_verify_missed_nl_counts(replayed, added, updated, deleted); - - /* adds, updates and deletes -- no replays */ - dp_test_hide_interface("dp1T1", &ifp, saved_ifname, &saved_ifindex); - dp_test_netlink_set_interface_l2_noverify("dp1T1"); - dp_test_netlink_add_ip_address_noverify("dp1T1", "1.1.1.1/24"); - dp_test_netlink_del_ip_address_noverify("dp1T1", "1.1.1.1/24"); - dp_test_netlink_del_interface_l2_noverify("dp1T1"); - added += 3; - updated += 1; - deleted += 3; - dp_test_wait_for_missed_count(added, updated, deleted); - dp_test_restore_interface(ifp, saved_ifname, saved_ifindex); - missed_netlink_replay(saved_ifindex); - dp_test_verify_missed_nl_counts(replayed, added, updated, deleted); - /* second replay shouldn't change counters */ - missed_netlink_replay(saved_ifindex); - dp_test_verify_missed_nl_counts(replayed, added, updated, deleted); - - /* add and update -- one replay each */ - dp_test_hide_interface("dp1T1", &ifp, saved_ifname, &saved_ifindex); - dp_test_netlink_set_interface_l2_noverify("dp1T1"); - dp_test_netlink_set_interface_l2_noverify("dp1T1"); - dp_test_netlink_add_ip_address_noverify("dp1T1", "1.1.1.1/24"); - dp_test_netlink_add_ip_address_noverify("dp1T1", "1.1.1.1/24"); - added += 3; - updated += 3; - replayed += 3; - dp_test_wait_for_missed_count(added, updated, deleted); - dp_test_restore_interface(ifp, saved_ifname, saved_ifindex); - missed_netlink_replay(saved_ifindex); - dp_test_verify_missed_nl_counts(replayed, added, updated, deleted); - /* second replay shouldn't change counters */ - missed_netlink_replay(saved_ifindex); - dp_test_verify_missed_nl_counts(replayed, added, updated, deleted); - - /* multiple adds, updates, deletes -- no replays */ - dp_test_hide_interface("dp1T1", &ifp, saved_ifname, &saved_ifindex); - dp_test_netlink_set_interface_l2_noverify("dp1T1"); - dp_test_netlink_add_ip_address_noverify("dp1T1", "1.1.1.1/24"); - dp_test_netlink_del_ip_address_noverify("dp1T1", "1.1.1.1/24"); - dp_test_netlink_del_interface_l2_noverify("dp1T1"); - dp_test_netlink_set_interface_l2_noverify("dp1T1"); - dp_test_netlink_add_ip_address_noverify("dp1T1", "1.1.1.1/24"); - dp_test_netlink_del_ip_address_noverify("dp1T1", "1.1.1.1/24"); - dp_test_netlink_del_interface_l2_noverify("dp1T1"); - dp_test_netlink_set_interface_l2_noverify("dp1T1"); - dp_test_netlink_add_ip_address_noverify("dp1T1", "1.1.1.1/24"); - dp_test_netlink_del_ip_address_noverify("dp1T1", "1.1.1.1/24"); - dp_test_netlink_del_interface_l2_noverify("dp1T1"); - added += 9; - updated += 3; - deleted += 9; - dp_test_wait_for_missed_count(added, updated, deleted); - dp_test_restore_interface(ifp, saved_ifname, saved_ifindex); - missed_netlink_replay(saved_ifindex); - dp_test_verify_missed_nl_counts(replayed, added, updated, deleted); - /* second replay shouldn't change counters */ - missed_netlink_replay(saved_ifindex); - dp_test_verify_missed_nl_counts(replayed, added, updated, deleted); - - /* set known state and clean up */ - dp_test_netlink_add_ip_address_noverify("dp1T1", "1.1.1.1/24"); - dp_test_netlink_add_route("vrf:1 1.1.1.0/24 scope:253 nh int:dp1T1"); - dp_test_nl_del_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); -} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_mpls.c b/tests/whole_dp/src/dp_test_mpls.c index 63419c73..7d8e3d68 100644 --- a/tests/whole_dp/src/dp_test_mpls.c +++ b/tests/whole_dp/src/dp_test_mpls.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -8,7 +8,6 @@ * Dataplane MPLS unit tests */ - #include "ip_funcs.h" #include "ip6_funcs.h" #include "in_cksum.h" @@ -16,12 +15,12 @@ #include "ecmp.h" #include "commands.h" -#include "dp_test_macros.h" +#include "dp_test/dp_test_macros.h" #include "dp_test_console.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_lib_exp.h" #include "dp_test_npf_fw_lib.h" @@ -81,6 +80,9 @@ DP_START_TEST(mpls_config, lswap_route_add_del) /* Add an labelled route entry */ dp_test_netlink_add_route("222 mpt:ipv4 nh 2.2.2.1 int:dp2T2 lbls 122"); + /* Re-add it without verifier to make sure redundant updates are ok */ + dp_test_netlink_add_route_nv( + "222 mpt:ipv4 nh 2.2.2.1 int:dp2T2 lbls 122"); /* Remove it */ dp_test_netlink_del_route("222 mpt:ipv4 nh 2.2.2.1 int:dp2T2 lbls 122"); /* Re-add but modified */ @@ -179,7 +181,7 @@ DP_START_TEST(lswap_fwd_simple, simple) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); /* * Expected packet @@ -193,7 +195,7 @@ DP_START_TEST(lswap_fwd_simple, simple) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(expected_pak); rte_pktmbuf_free(expected_pak); @@ -249,7 +251,7 @@ DP_START_TEST(lswap_fwd_simple, multilabel) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); /* * Expected packet @@ -262,7 +264,7 @@ DP_START_TEST(lswap_fwd_simple, multilabel) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(expected_pak); rte_pktmbuf_free(expected_pak); @@ -314,7 +316,7 @@ DP_START_TEST(lswap_fwd_simple, nondp_intf) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_LOCAL); @@ -354,7 +356,7 @@ DP_START_TEST(lswap_fwd_simple, noroute) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -392,7 +394,7 @@ DP_START_TEST(lswap_fwd_simple, fwding_disabled) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -441,7 +443,7 @@ DP_START_TEST(lswap_fwd_simple, simple6) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); /* * Expected packet @@ -455,7 +457,7 @@ DP_START_TEST(lswap_fwd_simple, simple6) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(expected_pak); rte_pktmbuf_free(expected_pak); @@ -512,7 +514,7 @@ DP_START_TEST(lswap_fwd_expnull, simple) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); /* * Expected packet @@ -526,7 +528,7 @@ DP_START_TEST(lswap_fwd_expnull, simple) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(expected_pak); rte_pktmbuf_free(expected_pak); @@ -583,7 +585,7 @@ DP_START_TEST(pop_lbl_fwd, simple) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); /* * Expected packet @@ -597,7 +599,7 @@ DP_START_TEST(pop_lbl_fwd, simple) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(expected_pak); rte_pktmbuf_free(payload_pak); @@ -652,7 +654,7 @@ DP_START_TEST(pop_lbl_fwd, unlabeled_nh) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(test_pak); @@ -698,7 +700,7 @@ DP_START_TEST(imp_fwd_simple, simple) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* * Expected packet @@ -711,7 +713,7 @@ DP_START_TEST(imp_fwd_simple, simple) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); ip = dp_test_get_mpls_pak_payload(expected_pak); dp_test_set_pak_ip_field(ip, DP_TEST_SET_TTL, @@ -764,7 +766,7 @@ DP_START_TEST(imp_fwd_simple, twolabels) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* * Expected packet @@ -778,7 +780,7 @@ DP_START_TEST(imp_fwd_simple, twolabels) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); ip = dp_test_get_mpls_pak_payload(expected_pak); dp_test_set_pak_ip_field(ip, DP_TEST_SET_TTL, @@ -833,7 +835,7 @@ DP_START_TEST(imp_fwd_simple, threelabels) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* * Expected packet @@ -849,7 +851,7 @@ DP_START_TEST(imp_fwd_simple, threelabels) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); ip = dp_test_get_mpls_pak_payload(expected_pak); dp_test_set_pak_ip_field(ip, DP_TEST_SET_TTL, @@ -918,7 +920,7 @@ DP_START_TEST(imp_fwd_simple, nlabels) (void)dp_test_pktmbuf_eth_init( test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* * Expected packet @@ -929,7 +931,7 @@ DP_START_TEST(imp_fwd_simple, nlabels) expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); ip = dp_test_get_mpls_pak_payload(expected_pak); dp_test_set_pak_ip_field(ip, DP_TEST_SET_TTL, @@ -982,7 +984,7 @@ DP_START_TEST(imp_fwd_simple, unlabeled) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_exp_set_oif_name(exp, "dp2T2"); @@ -990,7 +992,7 @@ DP_START_TEST(imp_fwd_simple, unlabeled) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_pak_receive(test_pak, "dp1T1", exp); @@ -1034,7 +1036,7 @@ DP_START_TEST(imp_fwd_simple, nondp_intf) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_LOCAL); @@ -1081,7 +1083,7 @@ DP_START_TEST(imp_fwd_simple, drop) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -1097,7 +1099,7 @@ DP_START_TEST(imp_fwd_simple, drop) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -1142,7 +1144,7 @@ DP_START_TEST(imp_ipv6_fwd_simple, simple) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); /* * Expected packet */ @@ -1157,7 +1159,7 @@ DP_START_TEST(imp_ipv6_fwd_simple, simple) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp1T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(expected_pak); rte_pktmbuf_free(payload_pak); @@ -1205,7 +1207,7 @@ DP_START_TEST(imp_ipv6_fwd_simple, twolabels) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); /* * Expected packet */ @@ -1221,7 +1223,7 @@ DP_START_TEST(imp_ipv6_fwd_simple, twolabels) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp1T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(expected_pak); rte_pktmbuf_free(payload_pak); @@ -1270,7 +1272,7 @@ DP_START_TEST(imp_ipv6_fwd_simple, threelabels) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); /* * Expected packet */ @@ -1288,7 +1290,7 @@ DP_START_TEST(imp_ipv6_fwd_simple, threelabels) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp1T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(expected_pak); rte_pktmbuf_free(payload_pak); @@ -1352,7 +1354,7 @@ DP_START_TEST(imp_ipv6_fwd_simple, nlabels) test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); /* * Expected packet @@ -1367,7 +1369,7 @@ DP_START_TEST(imp_ipv6_fwd_simple, nlabels) expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp1T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(expected_pak); rte_pktmbuf_free(payload_pak); @@ -1414,7 +1416,7 @@ DP_START_TEST(imp_ipv6_fwd_simple, unlabeled) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); /* * Expected packet */ @@ -1425,7 +1427,7 @@ DP_START_TEST(imp_ipv6_fwd_simple, unlabeled) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str, dp_test_intf_name2mac_str("dp1T2"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); /* now send test pak and check we get expected back */ dp_test_pak_receive(test_pak, "dp1T1", exp); @@ -1466,7 +1468,7 @@ DP_START_TEST(imp_ipv6_fwd_simple, nondp_intf) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); /* * Expected packet */ @@ -1546,7 +1548,7 @@ DP_START_TEST(disp_fwd_expnull, simple) 1, &len); dp_test_pktmbuf_eth_init(payload_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* * Test that the firewall rule is working @@ -1567,7 +1569,7 @@ DP_START_TEST(disp_fwd_expnull, simple) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); /* encapsulated ip packet will be same as payload packet except * TTL will have been decremented and it has an ether header @@ -1579,7 +1581,7 @@ DP_START_TEST(disp_fwd_expnull, simple) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(expected_pak); rte_pktmbuf_free(expected_pak); @@ -1605,6 +1607,7 @@ DP_START_TEST(disp_fwd_expnull, invalid_paks) const char *nh_mac_str; struct iphdr *ip; int len = 22; + int newlen; /* * Set up the input interface address - currently @@ -1636,7 +1639,7 @@ DP_START_TEST(disp_fwd_expnull, invalid_paks) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); /* * Test 1 - check that the payload packet without errors is @@ -1650,7 +1653,7 @@ DP_START_TEST(disp_fwd_expnull, invalid_paks) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(expected_pak); rte_pktmbuf_free(expected_pak); @@ -1669,12 +1672,12 @@ DP_START_TEST(disp_fwd_expnull, invalid_paks) * set the packet length so that it only includes 1 byte of * payload IP packet */ - rte_pktmbuf_data_len(test_pak) = (char *)ip - - rte_pktmbuf_mtod(test_pak, char *) + 1; + newlen = (char *)ip - rte_pktmbuf_mtod(test_pak, char *) + 1; + rte_pktmbuf_trim(test_pak, test_pak->pkt_len - newlen); (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -1689,11 +1692,11 @@ DP_START_TEST(disp_fwd_expnull, invalid_paks) ip = dp_test_get_mpls_pak_payload(test_pak); ip->ihl = DP_TEST_PAK_DEFAULT_IHL - 1; ip->check = 0; - ip->check = in_cksum_hdr(ip); + ip->check = dp_in_cksum_hdr(ip); (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -1710,7 +1713,7 @@ DP_START_TEST(disp_fwd_expnull, invalid_paks) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -1725,11 +1728,11 @@ DP_START_TEST(disp_fwd_expnull, invalid_paks) ip = dp_test_get_mpls_pak_payload(test_pak); ip->tot_len = htons(2000); ip->check = 0; - ip->check = in_cksum_hdr(ip); + ip->check = dp_in_cksum_hdr(ip); (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -1745,11 +1748,11 @@ DP_START_TEST(disp_fwd_expnull, invalid_paks) ip = dp_test_get_mpls_pak_payload(test_pak); ip->tot_len = htons(sizeof(struct iphdr) - 1); ip->check = 0; - ip->check = in_cksum_hdr(ip); + ip->check = dp_in_cksum_hdr(ip); (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -1766,11 +1769,11 @@ DP_START_TEST(disp_fwd_expnull, invalid_paks) "Couldn't parse ip address"); ip->tot_len = htons(sizeof(struct iphdr) - 1); ip->check = 0; - ip->check = in_cksum_hdr(ip); + ip->check = dp_in_cksum_hdr(ip); (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -1818,7 +1821,7 @@ DP_START_TEST(disp_fwd_expnull, local) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); /* * Decapsulated ip packet will be same as payload packet - @@ -1832,7 +1835,7 @@ DP_START_TEST(disp_fwd_expnull, local) (void)dp_test_pktmbuf_eth_init(expected_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* This should be a no-op */ dp_test_netlink_set_mpls_forwarding("dp2T2", false); @@ -1884,7 +1887,7 @@ DP_START_TEST(disp_fwd_ipv4, simple) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); /* encapsulated ip packet will be same as test packet except * TTL will have been decremented. @@ -1896,7 +1899,7 @@ DP_START_TEST(disp_fwd_ipv4, simple) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(expected_pak); rte_pktmbuf_free(expected_pak); @@ -1921,6 +1924,7 @@ DP_START_TEST(disp_fwd_ipv4, invalid_pak) const char *nh_mac_str; struct iphdr *ip; int len = 22; + int newlen; /* Set up the interface addresses */ dp_test_nl_add_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); @@ -1946,7 +1950,7 @@ DP_START_TEST(disp_fwd_ipv4, invalid_pak) (void)dp_test_pktmbuf_eth_init(payload_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); test_pak = dp_test_create_mpls_pak(1, (label_t []){52}, (uint8_t[]){DP_TEST_PAK_DEFAULT_TTL}, @@ -1954,15 +1958,15 @@ DP_START_TEST(disp_fwd_ipv4, invalid_pak) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); ip = dp_test_get_mpls_pak_payload(test_pak); /* * set the packet length so that it only includes 1 byte of * payload IP packet */ - rte_pktmbuf_data_len(test_pak) = (char *)ip - - rte_pktmbuf_mtod(test_pak, char *) + 1; + newlen = (char *)ip - rte_pktmbuf_mtod(test_pak, char *) + 1; + rte_pktmbuf_trim(test_pak, test_pak->pkt_len - newlen); exp = dp_test_exp_create(payload_pak); rte_pktmbuf_free(payload_pak); @@ -2011,7 +2015,7 @@ DP_START_TEST(disp_fwd_ipv4, deag) 1, &len); dp_test_pktmbuf_eth_init(payload_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* test packet is payload encapsulated with local label */ labels[0] = 122; @@ -2023,7 +2027,7 @@ DP_START_TEST(disp_fwd_ipv4, deag) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); /* encapsulated ip packet will be same as payload packet except * TTL will have been decremented and it has an ether header @@ -2035,7 +2039,7 @@ DP_START_TEST(disp_fwd_ipv4, deag) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(expected_pak); rte_pktmbuf_free(expected_pak); @@ -2092,7 +2096,7 @@ DP_START_TEST(disp_fwd_ipv4, forus) 1, &len); dp_test_pktmbuf_eth_init(payload_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* Test packet is payload encapsulated with local label */ labels[0] = 122; @@ -2103,7 +2107,7 @@ DP_START_TEST(disp_fwd_ipv4, forus) dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); expected_pak = payload_pak; if (test_data[i].ipttlpropagate) { @@ -2167,7 +2171,7 @@ DP_START_TEST(disp_fwd_ipv4, no_payload_type) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); /* encapsulated ip packet will be same as test packet except * TTL will have been decremented. @@ -2179,7 +2183,7 @@ DP_START_TEST(disp_fwd_ipv4, no_payload_type) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(expected_pak); rte_pktmbuf_free(expected_pak); @@ -2236,7 +2240,7 @@ DP_START_TEST(disp_fwd_vpnv4, simple) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); /* encapsulated ip packet will be same as test packet except * TTL will have been decremented. @@ -2248,7 +2252,7 @@ DP_START_TEST(disp_fwd_vpnv4, simple) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(expected_pak); rte_pktmbuf_free(expected_pak); @@ -2310,7 +2314,7 @@ DP_START_TEST(disp_fwd_vpnv4, deag) 1, &len); dp_test_pktmbuf_eth_init(payload_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* test packet is payload encapsulated with local label */ labels[0] = 122; @@ -2322,7 +2326,7 @@ DP_START_TEST(disp_fwd_vpnv4, deag) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); /* encapsulated ip packet will be same as payload packet except * TTL will have been decremented and it has an ether header @@ -2334,7 +2338,7 @@ DP_START_TEST(disp_fwd_vpnv4, deag) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(expected_pak); rte_pktmbuf_free(expected_pak); @@ -2384,7 +2388,7 @@ DP_START_TEST(disp_fwd_vpnv4, forus) 1, &len); dp_test_pktmbuf_eth_init(payload_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* Test packet is payload encapsulated with local label */ labels[0] = 122; @@ -2397,7 +2401,7 @@ DP_START_TEST(disp_fwd_vpnv4, forus) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); /* Expect test packet to be punted to kernel */ expected_pak = test_pak; @@ -2450,7 +2454,7 @@ DP_START_TEST(disp_fwd_ipv6, simple) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); /* encapsulated ip packet will be same as test packet except * TTL will have been decremented. @@ -2462,7 +2466,7 @@ DP_START_TEST(disp_fwd_ipv6, simple) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(expected_pak); rte_pktmbuf_free(expected_pak); @@ -2486,6 +2490,7 @@ DP_START_TEST(disp_fwd_ipv6, invalid_pak) const char *nh_mac_str; struct ip6_hdr *ip6; int len = 22; + int newlen; /* Set up the interface addresses */ dp_test_nl_add_ip_addr_and_connected("dp1T1", "2001:1:1::1/64"); @@ -2510,7 +2515,7 @@ DP_START_TEST(disp_fwd_ipv6, invalid_pak) (void)dp_test_pktmbuf_eth_init(payload_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); test_pak = dp_test_create_mpls_pak(1, (label_t []){52}, (uint8_t[]){DP_TEST_PAK_DEFAULT_TTL}, @@ -2518,15 +2523,15 @@ DP_START_TEST(disp_fwd_ipv6, invalid_pak) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); ip6 = dp_test_get_mpls_pak_payload(test_pak); /* * set the packet length so that it only includes 1 byte of * payload IPv6 packet */ - rte_pktmbuf_data_len(test_pak) = (char *)ip6 - - rte_pktmbuf_mtod(test_pak, char *) + 1; + newlen = (char *)ip6 - rte_pktmbuf_mtod(test_pak, char *) + 1; + rte_pktmbuf_trim(test_pak, test_pak->pkt_len - newlen); exp = dp_test_exp_create(payload_pak); rte_pktmbuf_free(payload_pak); @@ -2576,7 +2581,7 @@ DP_START_TEST(disp_fwd_ipv6, no_payload_type) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); /* encapsulated ip packet will be same as test packet except * TTL will have been decremented. @@ -2588,7 +2593,7 @@ DP_START_TEST(disp_fwd_ipv6, no_payload_type) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(expected_pak); rte_pktmbuf_free(expected_pak); @@ -2646,7 +2651,7 @@ DP_START_TEST(disp_fwd_vpnv6, simple) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); /* encapsulated ip packet will be same as test packet except * TTL will have been decremented. @@ -2658,7 +2663,7 @@ DP_START_TEST(disp_fwd_vpnv6, simple) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(expected_pak); rte_pktmbuf_free(expected_pak); @@ -2719,7 +2724,7 @@ DP_START_TEST(disp_fwd_vpnv6, deag) 1, &len); dp_test_pktmbuf_eth_init(payload_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); /* test packet is payload encapsulated with local label */ labels[0] = 122; @@ -2731,7 +2736,7 @@ DP_START_TEST(disp_fwd_vpnv6, deag) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); /* encapsulated ip packet will be same as payload packet except * TTL will have been decremented and it has an ether header @@ -2743,7 +2748,7 @@ DP_START_TEST(disp_fwd_vpnv6, deag) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(expected_pak); rte_pktmbuf_free(expected_pak); @@ -2794,7 +2799,7 @@ DP_START_TEST(disp_fwd_vpnv6, forus) 1, &len); dp_test_pktmbuf_eth_init(payload_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); /* Test packet is payload encapsulated with local label */ labels[0] = 122; @@ -2807,7 +2812,7 @@ DP_START_TEST(disp_fwd_vpnv6, forus) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); /* Expect test packet to be punted to kernel */ expected_pak = test_pak; @@ -2864,7 +2869,7 @@ DP_START_TEST(imp_fwd_ecmp_simple, ecmp) (void)dp_test_pktmbuf_eth_init(test_pak1, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* * Expected packet1 @@ -2877,7 +2882,7 @@ DP_START_TEST(imp_fwd_ecmp_simple, ecmp) (void)dp_test_pktmbuf_eth_init(expected_pak1, nh_mac_str1, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); ip = dp_test_get_mpls_pak_payload(expected_pak1); dp_test_set_pak_ip_field(ip, DP_TEST_SET_TTL, @@ -2897,7 +2902,7 @@ DP_START_TEST(imp_fwd_ecmp_simple, ecmp) (void)dp_test_pktmbuf_eth_init(test_pak2, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* * Expected packet2 */ @@ -2909,7 +2914,7 @@ DP_START_TEST(imp_fwd_ecmp_simple, ecmp) (void)dp_test_pktmbuf_eth_init(expected_pak2, nh_mac_str2, dp_test_intf_name2mac_str("dp3T3"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); ip = dp_test_get_mpls_pak_payload(expected_pak2); dp_test_set_pak_ip_field(ip, DP_TEST_SET_TTL, @@ -2975,7 +2980,7 @@ DP_START_TEST(imp_fwd_ecmp_simple, ecmp_ipv6) (void)dp_test_pktmbuf_eth_init(test_pak1, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); /* * Expected packet1 @@ -2991,7 +2996,7 @@ DP_START_TEST(imp_fwd_ecmp_simple, ecmp_ipv6) (void)dp_test_pktmbuf_eth_init(expected_pak1, nh_mac_str1, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp1 = dp_test_exp_create(expected_pak1); rte_pktmbuf_free(payload_pak1); @@ -3008,7 +3013,7 @@ DP_START_TEST(imp_fwd_ecmp_simple, ecmp_ipv6) (void)dp_test_pktmbuf_eth_init(test_pak2, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); /* * Expected packet2 */ @@ -3023,7 +3028,7 @@ DP_START_TEST(imp_fwd_ecmp_simple, ecmp_ipv6) (void)dp_test_pktmbuf_eth_init(expected_pak2, nh_mac_str2, dp_test_intf_name2mac_str("dp3T3"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp2 = dp_test_exp_create(expected_pak2); rte_pktmbuf_free(payload_pak2); @@ -3094,11 +3099,11 @@ DP_START_TEST(lswap_fwd_ecmp_simple, ecmp) (void)dp_test_pktmbuf_eth_init(test_pak1, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); (void)dp_test_pktmbuf_eth_init(test_pak2, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); /* * expected paks */ @@ -3110,7 +3115,7 @@ DP_START_TEST(lswap_fwd_ecmp_simple, ecmp) (void)dp_test_pktmbuf_eth_init(expected_pak1, nh_mac_str1, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); expected_labels[0] = 33; expected_pak2 = dp_test_create_mpls_pak( 1, expected_labels, @@ -3119,7 +3124,7 @@ DP_START_TEST(lswap_fwd_ecmp_simple, ecmp) (void)dp_test_pktmbuf_eth_init(expected_pak2, nh_mac_str2, dp_test_intf_name2mac_str("dp3T3"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp1 = dp_test_exp_create(expected_pak1); dp_test_exp_set_oif_name(exp1, "dp2T2"); @@ -3256,6 +3261,7 @@ get_next_flow_field_combo(struct flow_fields *value) ": %s, %s: %d,%d", value->ip_src_str, value->ip_dst_str, value->udp_src, value->udp_dst); + (void) written; return true; } @@ -3313,7 +3319,7 @@ label_swap_monitor(const struct rte_mbuf *mpls_pak, expected_pak, nh_mac_str[nh_idx], dp_test_intf_name2mac_str(lswap->nh[nh_idx].nh_int), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); } else if ((num_labels > 1) && (!lswap->nh[nh_idx].num_labels || @@ -3341,7 +3347,7 @@ label_swap_monitor(const struct rte_mbuf *mpls_pak, expected_pak, nh_mac_str[nh_idx], dp_test_intf_name2mac_str(lswap->nh[nh_idx].nh_int), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); } else if (num_labels > 1 || lswap->nh[nh_idx].num_labels) { /* label swap - top label is outlabel for nh */ @@ -3375,7 +3381,7 @@ label_swap_monitor(const struct rte_mbuf *mpls_pak, expected_pak, nh_mac_str[nh_idx], dp_test_intf_name2mac_str(lswap->nh[nh_idx].nh_int), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); } else { /* Unsupported combination */ assert(0); @@ -3411,7 +3417,7 @@ label_imp_monitor(const struct rte_mbuf *ip_pak, * various conditions cause the ip packet to * be dropped instead. */ - ip = pktmbuf_mtol3(ip_pak, struct iphdr *); + ip = dp_pktmbuf_mtol3(ip_pak, struct iphdr *); if (IN_LOOPBACK(ntohl(ip->daddr)) || IN_LOOPBACK(ntohl(ip->saddr))) { struct dp_test_expected *exp; @@ -3437,7 +3443,7 @@ label_imp_monitor(const struct rte_mbuf *ip_pak, /* * hash the mpls packet */ - hash_val = ecmp_mbuf_hash(payload_pak, ETHER_TYPE_IPv4); + hash_val = ecmp_mbuf_hash(payload_pak, RTE_ETHER_TYPE_IPV4); nh_idx = ecmp_lookup(lswap->nh_cnt, hash_val); expected_oif = lswap->nh[nh_idx].nh_int; @@ -3468,7 +3474,7 @@ label_imp_monitor(const struct rte_mbuf *ip_pak, expected_pak, nh_mac_str[nh_idx], dp_test_intf_name2mac_str(lswap->nh[nh_idx].nh_int), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); } else { /* Unsupported combination */ assert(0); @@ -3518,11 +3524,11 @@ dp_test_mpls_test_pkt(struct flow_fields *flow, (void)dp_test_pktmbuf_eth_init(payload_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* * also set the l4 payload */ - udp = pktmbuf_mtol4(payload_pak, struct udphdr *); + udp = dp_pktmbuf_mtol4(payload_pak, struct udphdr *); data = (uint32_t *)(udp + 1); *data = non_flow_variation; @@ -3544,7 +3550,7 @@ dp_test_mpls_test_pkt(struct flow_fields *flow, (void)dp_test_pktmbuf_eth_init(mpls_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); return mpls_pak; } @@ -3576,11 +3582,11 @@ dp_test_mpls_test_ip_pkt(struct flow_fields *flow, (void)dp_test_pktmbuf_eth_init(ip_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* * also set the l4 payload */ - udp = pktmbuf_mtol4(ip_pak, struct udphdr *); + udp = dp_pktmbuf_mtol4(ip_pak, struct udphdr *); data = (uint32_t *)(udp + 1); *data = non_flow_variation; @@ -3590,7 +3596,7 @@ dp_test_mpls_test_ip_pkt(struct flow_fields *flow, DP_DECL_TEST_CASE(mpls, imp_fwd_ecmp, NULL, NULL); -DP_START_TEST(imp_fwd_ecmp, payloadv4) +DP_START_TEST_FULL_RUN(imp_fwd_ecmp, payloadv4) { /* * Create an ECP label swap @@ -3706,6 +3712,8 @@ DP_START_TEST(imp_fwd_ecmp, payloadv4) for (i = 0; i < lswap->nh_cnt; i++) total_pkts += nh_pkts[i]; + assert(lswap->nh_cnt > 0); + assert(total_pkts > 0); unsigned int upper_bound = (total_pkts * 13) / (lswap->nh_cnt * 10); unsigned int lower_bound = (total_pkts * 7) / (lswap->nh_cnt * 10); unsigned int ave = (total_pkts * 10) / (lswap->nh_cnt * 10); @@ -3739,7 +3747,7 @@ DP_START_TEST(imp_fwd_ecmp, payloadv4) DP_DECL_TEST_CASE(mpls, disp_fwd_ecmp, NULL, NULL); -DP_START_TEST(disp_fwd_ecmp, payloadv4) +DP_START_TEST_FULL_RUN(disp_fwd_ecmp, payloadv4) { /* * Create an ECP label swap @@ -3822,7 +3830,8 @@ DP_START_TEST(disp_fwd_ecmp, payloadv4) struct dp_test_expected *exp; exp = label_swap_monitor(test_pak, flow.num_labels+1, - labels, ttls, ETHER_TYPE_IPv4, + labels, ttls, + RTE_ETHER_TYPE_IPV4, payload_pak, lswap, nh_mac_str, &hash_val, &nh_idx); dp_test_pak_rx_for(test_pak, "dp1T1", exp, @@ -3845,6 +3854,8 @@ DP_START_TEST(disp_fwd_ecmp, payloadv4) for (i = 0; i < lswap->nh_cnt; i++) total_pkts += nh_pkts[i]; + assert(lswap->nh_cnt > 0); + assert(total_pkts > 0); unsigned int upper_bound = (total_pkts * 12) / (lswap->nh_cnt * 10); unsigned int lower_bound = (total_pkts * 8) / (lswap->nh_cnt * 10); unsigned int ave = (total_pkts * 10) / (lswap->nh_cnt * 10); @@ -3875,7 +3886,7 @@ DP_START_TEST(disp_fwd_ecmp, payloadv4) } DP_END_TEST; DP_DECL_TEST_CASE(mpls, lswap_fwd_ecmp, NULL, NULL); -DP_START_TEST(lswap_fwd_ecmp, payloadv4) +DP_START_TEST_FULL_RUN(lswap_fwd_ecmp, payloadv4) { /* * Create an ECP label swap @@ -3957,7 +3968,8 @@ DP_START_TEST(lswap_fwd_ecmp, payloadv4) struct dp_test_expected *exp; exp = label_swap_monitor(test_pak, flow.num_labels+1, - labels, ttls, ETHER_TYPE_IPv4, + labels, ttls, + RTE_ETHER_TYPE_IPV4, payload_pak, lswap, nh_mac_str, &hash_val, &nh_idx); dp_test_pak_receive(test_pak, "dp1T1", exp); @@ -3976,6 +3988,8 @@ DP_START_TEST(lswap_fwd_ecmp, payloadv4) for (i = 0; i < lswap->nh_cnt; i++) total_pkts += nh_pkts[i]; + assert(lswap->nh_cnt > 0); + assert(total_pkts > 0); unsigned int upper_bound = (total_pkts * 12) / (lswap->nh_cnt * 10); unsigned int lower_bound = (total_pkts * 8) / (lswap->nh_cnt * 10); unsigned int ave = (total_pkts * 10) / (lswap->nh_cnt * 10); @@ -4044,7 +4058,7 @@ DP_START_TEST(rx_router_alert, simple) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); /* * Expected packet - looks *exactly* the same as test packet * including ttls. @@ -4171,7 +4185,7 @@ DP_START_TEST(mpls_ttl, imposition) 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* * Expected packet @@ -4182,12 +4196,12 @@ DP_START_TEST(mpls_ttl, imposition) dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); ip = dp_test_get_mpls_pak_payload(expected_pak); ip->ttl = DP_TEST_PAK_DEFAULT_TTL - 1; ip->check = 0; - ip->check = in_cksum_hdr(ip); + ip->check = dp_in_cksum_hdr(ip); exp = dp_test_exp_create(expected_pak); rte_pktmbuf_free(expected_pak); @@ -4270,7 +4284,7 @@ DP_START_TEST(mpls_ttl, pop) (void)dp_test_pktmbuf_eth_init( test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_MPLS); + NULL, RTE_ETHER_TYPE_MPLS); /* * Expected packet @@ -4281,7 +4295,7 @@ DP_START_TEST(mpls_ttl, pop) (void)dp_test_pktmbuf_eth_init( expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(expected_pak); rte_pktmbuf_free(expected_pak); @@ -4391,7 +4405,7 @@ DP_START_TEST(mpls_ttl, v4_disposition) (void)dp_test_pktmbuf_eth_init( test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_MPLS); + NULL, RTE_ETHER_TYPE_MPLS); /* * Expected packet @@ -4407,7 +4421,7 @@ DP_START_TEST(mpls_ttl, v4_disposition) (void)dp_test_pktmbuf_eth_init( expected_pak, nh_mac_str1, dp_test_intf_name2mac_str(test_data[i].oif), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_set_pak_ip_field(ip, DP_TEST_SET_TOS, IPTOS_PREC_INTERNETCONTROL); exp = dp_test_exp_create(expected_pak); @@ -4420,7 +4434,7 @@ DP_START_TEST(mpls_ttl, v4_disposition) (void)dp_test_pktmbuf_eth_init( expected_pak, nh_mac_str2, dp_test_intf_name2mac_str(test_data[i].oif), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(expected_pak); } rte_pktmbuf_free(expected_pak); @@ -4535,7 +4549,7 @@ DP_START_TEST(mpls_ttl, v6_disposition) (void)dp_test_pktmbuf_eth_init( test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_MPLS); + NULL, RTE_ETHER_TYPE_MPLS); /* * Expected packet @@ -4550,7 +4564,7 @@ DP_START_TEST(mpls_ttl, v6_disposition) (void)dp_test_pktmbuf_eth_init( expected_pak, nh_mac_str1, dp_test_intf_name2mac_str(test_data[i].oif), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(expected_pak); rte_pktmbuf_free(payload_pak); } else { @@ -4560,7 +4574,7 @@ DP_START_TEST(mpls_ttl, v6_disposition) (void)dp_test_pktmbuf_eth_init( expected_pak, nh_mac_str2, dp_test_intf_name2mac_str(test_data[i].oif), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); exp = dp_test_exp_create(expected_pak); } rte_pktmbuf_free(expected_pak); @@ -4692,9 +4706,9 @@ DP_START_TEST(mpls_icmp, ttl_v4) payload_pak); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_MPLS); + NULL, RTE_ETHER_TYPE_MPLS); - copy_from = pktmbuf_mtol3(payload_pak, struct iphdr *); + copy_from = dp_pktmbuf_mtol3(payload_pak, struct iphdr *); /* * Create expected icmp packet @@ -4738,7 +4752,7 @@ DP_START_TEST(mpls_icmp, ttl_v4) cp[7] = 1; /* ieo_ctype=ICMP_EXT_MPLS_INCOMING */ /* The incoming label stack */ - memcpy(&cp[8], pktmbuf_mtol3(test_pak, char *), + memcpy(&cp[8], dp_pktmbuf_mtol3(test_pak, char *), test_data[i].nlabels * 4); /* Finally the ICMP checksum fields */ @@ -4757,14 +4771,14 @@ DP_START_TEST(mpls_icmp, ttl_v4) expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); } else { expected_pak = icmp_pak; dp_test_pktmbuf_eth_init( expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); } exp = dp_test_exp_create(expected_pak); @@ -4831,7 +4845,7 @@ DP_START_TEST(mpls_icmp, invalid_paks) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -4843,10 +4857,10 @@ DP_START_TEST(mpls_icmp, invalid_paks) len = 64; payload_pak = dp_test_create_ipv4_pak("10.73.0.0", "10.73.2.0", 1, &len); - ip = pktmbuf_mtol3(payload_pak, struct iphdr *); + ip = dp_pktmbuf_mtol3(payload_pak, struct iphdr *); ip->tot_len = htons(sizeof(struct iphdr) + 1500); ip->check = 0; - ip->check = in_cksum_hdr(ip); + ip->check = dp_in_cksum_hdr(ip); test_pak = dp_test_create_mpls_pak(1, (label_t []){144}, (uint8_t []){1}, @@ -4855,7 +4869,7 @@ DP_START_TEST(mpls_icmp, invalid_paks) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -4867,10 +4881,10 @@ DP_START_TEST(mpls_icmp, invalid_paks) len = 64; payload_pak = dp_test_create_ipv4_pak("10.73.0.0", "10.73.2.0", 1, &len); - ip = pktmbuf_mtol3(payload_pak, struct iphdr *); + ip = dp_pktmbuf_mtol3(payload_pak, struct iphdr *); ip->ihl = DP_TEST_PAK_DEFAULT_IHL - 1; ip->check = 0; - ip->check = in_cksum_hdr(ip); + ip->check = dp_in_cksum_hdr(ip); test_pak = dp_test_create_mpls_pak(1, (label_t []){144}, (uint8_t []){1}, @@ -4879,7 +4893,7 @@ DP_START_TEST(mpls_icmp, invalid_paks) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -4891,7 +4905,7 @@ DP_START_TEST(mpls_icmp, invalid_paks) len = 64; payload_pak = dp_test_create_ipv4_pak("10.73.0.0", "10.73.2.0", 1, &len); - ip = pktmbuf_mtol3(payload_pak, struct iphdr *); + ip = dp_pktmbuf_mtol3(payload_pak, struct iphdr *); ip->check = htons(0xdead); test_pak = dp_test_create_mpls_pak(1, (label_t []){144}, @@ -4901,7 +4915,7 @@ DP_START_TEST(mpls_icmp, invalid_paks) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -4982,9 +4996,9 @@ DP_START_TEST(mpls_icmpv6, ttl_v6) payload_pak); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_MPLS); + NULL, RTE_ETHER_TYPE_MPLS); - copy_from = pktmbuf_mtol3(payload_pak, struct ip6_hdr *); + copy_from = dp_pktmbuf_mtol3(payload_pak, struct ip6_hdr *); /* * Create expected icmp packet @@ -5026,7 +5040,7 @@ DP_START_TEST(mpls_icmpv6, ttl_v6) cp[7] = 1; /* ieo_ctype=ICMP_EXT_MPLS_INCOMING */ /* The incoming label stack */ - memcpy(&cp[8], pktmbuf_mtol3(test_pak, char *), + memcpy(&cp[8], dp_pktmbuf_mtol3(test_pak, char *), test_data[i].nlabels * 4); /* Checksum */ @@ -5047,14 +5061,14 @@ DP_START_TEST(mpls_icmpv6, ttl_v6) expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); } else { expected_pak = icmp6_pak; dp_test_pktmbuf_eth_init( expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); } exp = dp_test_exp_create(expected_pak); @@ -5128,7 +5142,7 @@ DP_START_TEST(mpls_fragment, ip_imposition) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* * Expected packet @@ -5167,7 +5181,7 @@ DP_START_TEST(mpls_fragment, ip_imposition) (uint8_t []){DP_TEST_PAK_DEFAULT_TTL - 1}, m); dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); rte_pktmbuf_free(m); exp->exp_pak[0] = expected_pak; @@ -5188,7 +5202,7 @@ DP_START_TEST(mpls_fragment, ip_imposition) rte_pktmbuf_free(m); dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp->exp_pak[1] = expected_pak; /* now send test pak and check we get expected back */ @@ -5247,7 +5261,7 @@ DP_START_TEST(mpls_fragment, fragmentv4) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_MPLS); + NULL, RTE_ETHER_TYPE_MPLS); /* * Expected packet @@ -5284,7 +5298,7 @@ DP_START_TEST(mpls_fragment, fragmentv4) (uint8_t []){DP_TEST_PAK_DEFAULT_TTL - 1}, m); dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); rte_pktmbuf_free(m); exp->exp_pak[0] = expected_pak; @@ -5304,7 +5318,7 @@ DP_START_TEST(mpls_fragment, fragmentv4) rte_pktmbuf_free(m); dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp->exp_pak[1] = expected_pak; /* now send test pak and check we get expected back */ @@ -5362,7 +5376,7 @@ DP_START_TEST(mpls_fragment, lswitch_three_labels) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_MPLS); + NULL, RTE_ETHER_TYPE_MPLS); /* * Expected packet @@ -5400,7 +5414,7 @@ DP_START_TEST(mpls_fragment, lswitch_three_labels) expected_pak = dp_test_create_mpls_pak(2, expected_labels, ttls, m); dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); rte_pktmbuf_free(m); exp->exp_pak[0] = expected_pak; @@ -5418,7 +5432,7 @@ DP_START_TEST(mpls_fragment, lswitch_three_labels) rte_pktmbuf_free(m); dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp->exp_pak[1] = expected_pak; /* now send test pak and check we get expected back */ @@ -5478,7 +5492,7 @@ DP_START_TEST(mpls_fragment, lswitch_four_labels) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_MPLS); + NULL, RTE_ETHER_TYPE_MPLS); /* * Expected packet @@ -5518,7 +5532,7 @@ DP_START_TEST(mpls_fragment, lswitch_four_labels) expected_pak = dp_test_create_mpls_pak(3, expected_labels, ttls, m); dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); rte_pktmbuf_free(m); exp->exp_pak[0] = expected_pak; @@ -5536,7 +5550,7 @@ DP_START_TEST(mpls_fragment, lswitch_four_labels) rte_pktmbuf_free(m); dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp->exp_pak[1] = expected_pak; /* now send test pak and check we get expected back */ @@ -5598,7 +5612,7 @@ DP_START_TEST(mpls_icmp, frag_needed_v4_lswitch) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_MPLS); + NULL, RTE_ETHER_TYPE_MPLS); /* * Expected packet @@ -5624,7 +5638,7 @@ DP_START_TEST(mpls_icmp, frag_needed_v4_lswitch) icph->un.frag.mtu = htons(1400); /* Truncated original packet goes in next */ - copy_from = pktmbuf_mtol3(payload_pak, struct iphdr *); + copy_from = dp_pktmbuf_mtol3(payload_pak, struct iphdr *); memcpy(icph + 1, copy_from, icmplen - icmpextlen); /* Now the MPLS extended header */ @@ -5638,7 +5652,7 @@ DP_START_TEST(mpls_icmp, frag_needed_v4_lswitch) cp[7] = 1; /* ieo_ctype=ICMP_EXT_MPLS_INCOMING */ /* The incoming label stack */ - memcpy(&cp[8], pktmbuf_mtol3(test_pak, char *), + memcpy(&cp[8], dp_pktmbuf_mtol3(test_pak, char *), 2 * 4); /* Finally the ICMP checksum fields */ @@ -5656,7 +5670,7 @@ DP_START_TEST(mpls_icmp, frag_needed_v4_lswitch) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(expected_pak); rte_pktmbuf_free(expected_pak); @@ -5712,7 +5726,7 @@ DP_START_TEST(mpls_icmp, frag_needed_v4_imp) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* * Expected packet @@ -5753,7 +5767,7 @@ DP_START_TEST(mpls_icmp, frag_needed_v4_imp) (void)dp_test_pktmbuf_eth_init(icmp_pak, nh_mac_str, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(icmp_pak); rte_pktmbuf_free(icmp_pak); @@ -5781,6 +5795,7 @@ static void mpls_fragment_v4_invalid_paks(bool df) struct iphdr *ip; const char *nh_mac_str; int len = 1472; + int newlen; label_t labels[2]; uint8_t ttls[2]; @@ -5817,12 +5832,12 @@ static void mpls_fragment_v4_invalid_paks(bool df) * set the packet length so that it only includes 1 byte of * payload IP packet */ - rte_pktmbuf_data_len(test_pak) = (char *)ip - - rte_pktmbuf_mtod(test_pak, char *) + 1; + newlen = (char *)ip - rte_pktmbuf_mtod(test_pak, char *) + 1; + rte_pktmbuf_trim(test_pak, test_pak->pkt_len - newlen); (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_MPLS); + NULL, RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -5835,11 +5850,11 @@ static void mpls_fragment_v4_invalid_paks(bool df) ip = dp_test_get_mpls_pak_payload(test_pak); ip->ihl = DP_TEST_PAK_DEFAULT_IHL - 1; ip->check = 0; - ip->check = in_cksum_hdr(ip); + ip->check = dp_in_cksum_hdr(ip); (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -5854,7 +5869,7 @@ static void mpls_fragment_v4_invalid_paks(bool df) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -5867,11 +5882,11 @@ static void mpls_fragment_v4_invalid_paks(bool df) ip = dp_test_get_mpls_pak_payload(test_pak); ip->tot_len = htons(2000); ip->check = 0; - ip->check = in_cksum_hdr(ip); + ip->check = dp_in_cksum_hdr(ip); (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -5885,11 +5900,11 @@ static void mpls_fragment_v4_invalid_paks(bool df) ip = dp_test_get_mpls_pak_payload(test_pak); ip->tot_len = htons(sizeof(struct iphdr) - 1); ip->check = 0; - ip->check = in_cksum_hdr(ip); + ip->check = dp_in_cksum_hdr(ip); (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -5904,11 +5919,11 @@ static void mpls_fragment_v4_invalid_paks(bool df) "Couldn't parse ip address"); ip->tot_len = htons(sizeof(struct iphdr) - 1); ip->check = 0; - ip->check = in_cksum_hdr(ip); + ip->check = dp_in_cksum_hdr(ip); (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -6035,7 +6050,7 @@ DP_START_TEST(mpls_oam, v4_ecmp) dp_test_netlink_set_mpls_forwarding("dp1T1", false); } DP_END_TEST; -DP_START_TEST(mpls_oam, v4_ecmp_eight_paths) +DP_START_TEST_FULL_RUN(mpls_oam, v4_ecmp_eight_paths) { const char *cmd_string = "mpls oam --labelspace=0 " "--source_ip=1.1.1.1 --dest_ip=127.0.1.0 " @@ -6177,7 +6192,7 @@ DP_START_TEST(mpls_oam, v4_ecmp_eight_paths) dp_test_netlink_set_mpls_forwarding("dp1T1", false); } DP_END_TEST; -DP_START_TEST(mpls_oam, v4_ecmp_lswitch) +DP_START_TEST_FULL_RUN(mpls_oam, v4_ecmp_lswitch) { const char *nh_mac_str1, *nh_mac_str2; struct rte_mbuf *expected_pak; @@ -6265,7 +6280,7 @@ DP_START_TEST(mpls_oam, v4_ecmp_lswitch) payload_pak); (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_MPLS); + NULL, RTE_ETHER_TYPE_MPLS); first_path = (bitmask & ((uint64_t)1 << j)); @@ -6280,7 +6295,7 @@ DP_START_TEST(mpls_oam, v4_ecmp_lswitch) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str1, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(expected_pak); dp_test_exp_set_oif_name(exp, "dp2T2"); @@ -6288,7 +6303,7 @@ DP_START_TEST(mpls_oam, v4_ecmp_lswitch) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str2, dp_test_intf_name2mac_str("dp3T3"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(expected_pak); dp_test_exp_set_oif_name(exp, "dp3T3"); @@ -6313,7 +6328,7 @@ DP_START_TEST(mpls_oam, v4_ecmp_lswitch) dp_test_netlink_set_mpls_forwarding("dp1T1", false); } DP_END_TEST; -DP_START_TEST(mpls_oam, v4_ecmp_lswitch_two_labels) +DP_START_TEST_FULL_RUN(mpls_oam, v4_ecmp_lswitch_two_labels) { const char *nh_mac_str1, *nh_mac_str2; struct rte_mbuf *expected_pak; @@ -6403,7 +6418,7 @@ DP_START_TEST(mpls_oam, v4_ecmp_lswitch_two_labels) payload_pak); (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_MPLS); + NULL, RTE_ETHER_TYPE_MPLS); first_path = (bitmask & ((uint64_t)1 << j)); @@ -6420,7 +6435,7 @@ DP_START_TEST(mpls_oam, v4_ecmp_lswitch_two_labels) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str1, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(expected_pak); dp_test_exp_set_oif_name(exp, "dp2T2"); @@ -6428,7 +6443,7 @@ DP_START_TEST(mpls_oam, v4_ecmp_lswitch_two_labels) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str2, dp_test_intf_name2mac_str("dp3T3"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(expected_pak); dp_test_exp_set_oif_name(exp, "dp3T3"); @@ -6545,7 +6560,7 @@ DP_START_TEST(mpls_oam, v4_ecmp_lswitch_three_labels) payload_pak); (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_MPLS); + NULL, RTE_ETHER_TYPE_MPLS); first_path = (bitmask & ((uint64_t)1 << j)); @@ -6564,7 +6579,7 @@ DP_START_TEST(mpls_oam, v4_ecmp_lswitch_three_labels) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str1, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(expected_pak); dp_test_exp_set_oif_name(exp, "dp2T2"); @@ -6572,7 +6587,7 @@ DP_START_TEST(mpls_oam, v4_ecmp_lswitch_three_labels) (void)dp_test_pktmbuf_eth_init(expected_pak, nh_mac_str2, dp_test_intf_name2mac_str("dp3T3"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); exp = dp_test_exp_create(expected_pak); dp_test_exp_set_oif_name(exp, "dp3T3"); @@ -6644,7 +6659,7 @@ DP_START_TEST(imp_fwd_outlabels, v4_single) (void)dp_test_pktmbuf_eth_init( test_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* * Expected packet @@ -6655,7 +6670,7 @@ DP_START_TEST(imp_fwd_outlabels, v4_single) expected_pak, nh_mac_str, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); ip = dp_test_get_mpls_pak_payload(expected_pak); dp_test_set_pak_ip_field(ip, DP_TEST_SET_TTL, @@ -6714,7 +6729,7 @@ DP_START_TEST(imp_fwd_outlabels, v4_ecmp) (void)dp_test_pktmbuf_eth_init(test_pak1, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* * Expected packet1 @@ -6727,7 +6742,7 @@ DP_START_TEST(imp_fwd_outlabels, v4_ecmp) (void)dp_test_pktmbuf_eth_init(expected_pak1, nh_mac_str1, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); ip = dp_test_get_mpls_pak_payload(expected_pak1); dp_test_set_pak_ip_field(ip, DP_TEST_SET_TTL, @@ -6747,7 +6762,7 @@ DP_START_TEST(imp_fwd_outlabels, v4_ecmp) (void)dp_test_pktmbuf_eth_init(test_pak2, dp_test_intf_name2mac_str("dp1T1"), NULL, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* * Expected packet2 */ @@ -6759,7 +6774,7 @@ DP_START_TEST(imp_fwd_outlabels, v4_ecmp) (void)dp_test_pktmbuf_eth_init(expected_pak2, nh_mac_str2, dp_test_intf_name2mac_str("dp3T3"), - ETHER_TYPE_MPLS); + RTE_ETHER_TYPE_MPLS); ip = dp_test_get_mpls_pak_payload(expected_pak2); dp_test_set_pak_ip_field(ip, DP_TEST_SET_TTL, @@ -6783,3 +6798,82 @@ DP_START_TEST(imp_fwd_outlabels, v4_ecmp) dp_test_netlink_set_mpls_forwarding("dp1T1", false); } DP_END_TEST; + +DP_START_TEST(imp_fwd_outlabels, v6_single) +{ + struct ip6_hdr *ip6; + struct dp_test_expected *exp; + struct rte_mbuf *expected_pak; + label_t expected_labels[DP_TEST_MAX_LBLS]; + struct rte_mbuf *test_pak; + const char *nh_mac_str; + uint8_t ttls[DP_TEST_MAX_LBLS]; + int i, nlbls, len = 22; + + /* Set up the interface addresses */ + dp_test_nl_add_ip_addr_and_connected("dp1T1", "2001:1:1::1/64"); + dp_test_nl_add_ip_addr_and_connected("dp2T2", "2.2.2.2/24"); + dp_test_netlink_set_mpls_forwarding("dp1T2", true); + + nh_mac_str = "aa:bb:cc:dd:ee:ff"; + dp_test_netlink_add_neigh("dp2T2", "2.2.2.1", nh_mac_str); + + for (nlbls = 1; nlbls <= DP_TEST_MAX_LBLS; nlbls++) { + char lstack_str[TEST_MAX_CMD_LEN + 1] = {'\0'}; + char label_str[TEST_MAX_CMD_LEN + 1]; + char route1[TEST_MAX_CMD_LEN + 1]; + + for (i = 0; i < nlbls; i++) { + snprintf(label_str, sizeof(label_str), " %d", 122 + i); + strncat(lstack_str, label_str, TEST_MAX_CMD_LEN); + expected_labels[i] = 122 + i; + ttls[i] = DP_TEST_PAK_DEFAULT_TTL - 1; + } + + /* Add the route / nh arp we want the packet to follow */ + snprintf(route1, TEST_MAX_CMD_LEN, + "2010:73:2::0/64 nh 2.2.2.1 int:dp2T2 lbls %s", + lstack_str); + dp_test_netlink_add_route(route1); + + /* + * Test packet + */ + test_pak = dp_test_create_ipv6_pak("2010:73:0::", "2010:73:2::", + 1, &len); + (void)dp_test_pktmbuf_eth_init( + test_pak, + dp_test_intf_name2mac_str("dp1T1"), + NULL, RTE_ETHER_TYPE_IPV6); + + /* + * Expected packet + */ + expected_pak = dp_test_create_mpls_pak(nlbls, expected_labels, + ttls, test_pak); + (void)dp_test_pktmbuf_eth_init( + expected_pak, + nh_mac_str, + dp_test_intf_name2mac_str("dp2T2"), + RTE_ETHER_TYPE_MPLS); + + ip6 = dp_test_get_mpls_pak_payload(expected_pak); + ip6->ip6_hlim = DP_TEST_PAK_DEFAULT_TTL - 1; + + exp = dp_test_exp_create(expected_pak); + rte_pktmbuf_free(expected_pak); + dp_test_exp_set_oif_name(exp, "dp2T2"); + + dp_test_pak_receive(test_pak, "dp1T1", exp); + + /* Clean up */ + dp_test_netlink_del_route(route1); + } + + /* Clean up */ + dp_test_netlink_set_mpls_forwarding("dp1T2", false); + dp_test_netlink_del_neigh("dp2T2", "2.2.2.1", nh_mac_str); + dp_test_nl_del_ip_addr_and_connected("dp1T1", "2001:1:1::1/64"); + dp_test_nl_del_ip_addr_and_connected("dp2T2", "2.2.2.2/24"); + +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_mstp_cmds.c b/tests/whole_dp/src/dp_test_mstp_cmds.c index 7b84547f..787b3afa 100644 --- a/tests/whole_dp/src/dp_test_mstp_cmds.c +++ b/tests/whole_dp/src/dp_test_mstp_cmds.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. + * Copyright (c) 2018-2020, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -8,10 +8,10 @@ #include #include "dp_test.h" -#include "dp_test_lib.h" -#include "dp_test_cmd_check.h" +#include "dp_test_lib_internal.h" +#include "dp_test/dp_test_cmd_check.h" #include "dp_test_console.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_json_utils.h" #define POLL_CNT 1 diff --git a/tests/whole_dp/src/dp_test_mstp_fwd.c b/tests/whole_dp/src/dp_test_mstp_fwd.c index 2ea3a669..5c341379 100644 --- a/tests/whole_dp/src/dp_test_mstp_fwd.c +++ b/tests/whole_dp/src/dp_test_mstp_fwd.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, AT&T Intellectual Property. + * Copyright (c) 2018-2020, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -7,27 +7,27 @@ #include -#include "bridge.h" +#include "if/bridge/bridge.h" #include // conflicts with netinet/in.h #include "dp_test.h" -#include "dp_test_lib.h" -#include "dp_test_cmd_check.h" +#include "dp_test_lib_internal.h" +#include "dp_test/dp_test_cmd_check.h" #include "dp_test_console.h" #include "dp_test_controller.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_json_utils.h" -#include "dp_test_netlink_state.h" +#include "dp_test_netlink_state_internal.h" #define MSTI_CHECK_VLAN_STATE(_expst, _port, _vlan) \ do {enum bridge_ifstate state; \ state = bridge_port_get_state_vlan( \ - _port->if_brport, _vlan); \ + (_port)->if_brport, (_vlan)); \ dp_test_fail_unless( \ - state == _expst, \ + state == (_expst), \ "bridge_port_get_state_vlan(%s %d) failed: %s", \ - _port->if_name, _vlan, \ + (_port)->if_name, (_vlan), \ bridge_get_ifstate_string(state)); \ } while (0) @@ -157,8 +157,8 @@ DP_START_TEST(mstp_fwd_1, mstp_fwd_vlan_state) dp_test_intf_real(p1, port1_name); dp_test_intf_real(p2, port2_name); - struct ifnet *port1 = ifnet_byifname(port1_name); - struct ifnet *port2 = ifnet_byifname(port2_name); + struct ifnet *port1 = dp_ifnet_byifname(port1_name); + struct ifnet *port2 = dp_ifnet_byifname(port2_name); mstp_msti_add(sw, msti, "10:1000"); @@ -242,8 +242,8 @@ DP_START_TEST(mstp_fwd_1, mstp_fwd_vlan_drop) dp_test_intf_real(p1, port1_name); dp_test_intf_real(p2, port2_name); - struct ifnet *port1 = ifnet_byifname(port1_name); - struct ifnet *port2 = ifnet_byifname(port2_name); + struct ifnet *port1 = dp_ifnet_byifname(port1_name); + struct ifnet *port2 = dp_ifnet_byifname(port2_name); mstp_msti_add(sw, msti, "10"); MSTP_MSTI_SET_STATE(sw, BR_STATE_LEARNING, port1, msti); @@ -347,9 +347,9 @@ DP_START_TEST(mstp_fwd_1, mstp_fwd_vlan) dp_test_intf_real(p1, port1_name); dp_test_intf_real(p2, port2_name); dp_test_intf_real(p3, port3_name); - struct ifnet *port1 = ifnet_byifname(port1_name); - struct ifnet *port2 = ifnet_byifname(port2_name); - struct ifnet *port3 = ifnet_byifname(port3_name); + struct ifnet *port1 = dp_ifnet_byifname(port1_name); + struct ifnet *port2 = dp_ifnet_byifname(port2_name); + struct ifnet *port3 = dp_ifnet_byifname(port3_name); mstp_msti_add(sw, msti, "10"); MSTP_MSTI_SET_STATE(sw, BR_STATE_FORWARDING, port1, msti); @@ -436,8 +436,8 @@ DP_START_TEST(mstp_fwd_1, mstp_fwd_vlan_flush) dp_test_intf_real(p1, port1_name); dp_test_intf_real(p2, port2_name); - struct ifnet *port1 = ifnet_byifname(port1_name); - struct ifnet *port2 = ifnet_byifname(port2_name); + struct ifnet *port1 = dp_ifnet_byifname(port1_name); + struct ifnet *port2 = dp_ifnet_byifname(port2_name); mstp_msti_add(sw, msti, "10:1000"); MSTP_MSTI_SET_STATE(sw, BR_STATE_FORWARDING, port1, msti); diff --git a/tests/whole_dp/src/dp_test_nat.c b/tests/whole_dp/src/dp_test_nat.c index b8741e64..affaa9e3 100644 --- a/tests/whole_dp/src/dp_test_nat.c +++ b/tests/whole_dp/src/dp_test_nat.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -18,11 +18,11 @@ #include "dp_test.h" #include "dp_test_controller.h" #include "dp_test_cmd_state.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_npf_sess_lib.h" DP_DECL_TEST_SUITE(nat); @@ -77,7 +77,7 @@ DP_START_TEST(dnat, test_dnat) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -85,7 +85,7 @@ DP_START_TEST(dnat, test_dnat) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str_2, dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Expect the DNAT to have occurred */ dp_test_set_iphdr(dp_test_exp_get_pak(exp), client_ip, server_ip); @@ -107,7 +107,7 @@ DP_START_TEST(dnat, test_dnat) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp2T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -116,7 +116,7 @@ DP_START_TEST(dnat, test_dnat) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str_1, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Expect the reverse DNAT to have occurred */ dp_test_set_iphdr(dp_test_exp_get_pak(exp), local_dnat_ip, client_ip); @@ -233,7 +233,7 @@ DP_START_TEST(snat, test_snat) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -242,7 +242,7 @@ DP_START_TEST(snat, test_snat) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str_2, dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Expect the SNAT to have occurred */ dp_test_set_iphdr(dp_test_exp_get_pak(exp), local_snat_ip, server_ip); @@ -277,14 +277,14 @@ DP_START_TEST(snat, test_snat) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp2T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); dp_test_exp_set_oif_name(exp, "dp1T0"); (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str_1, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Expect the SNAT to have been undone */ dp_test_set_iphdr(dp_test_exp_get_pak(exp), server_ip, client_ip); @@ -307,3 +307,141 @@ DP_START_TEST(snat, test_snat) dp_test_nl_del_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); dp_test_nl_del_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); } DP_END_TEST; + +/* + * Dest Nat and Source Nat test + * + * TCP port 80 client 10.0.100.100 SNAT to web server DNAT 10.0.10.10 + * + * 10.0.100.100 -> dp1T0 -> DNAT 10.0.10.10 SNAT 100.0.10.10 -> dp2T1 -> + * 1.1.1.1 2.2.2.2 + * <- <- <- <- reply + * + * Client to Server initial packet (Inbound flow) + * Original Client IP packet: [ 10.0.100.100 | 1.1.1.1 ] + * After DNAT: [ 10.0.100.100 | 10.0.10.10 ] + * After SNAT: [ 2.2.2.2 | 10.0.10.10 ] + * + * Server to client packet reply (Outbound flow) + * Original Server IP packet [ 10.0.10.10 | 2.2.2.2 ] + * After SNAT: [ 10.0.10.10 | 10.0.100.100 ] <- DST rewrite + * After DNAT: [ 1.1.1.1 | 10.0.100.100 ] <- SRC rewrite + */ + +DP_DECL_TEST_CASE(nat, dnat_snat, NULL, NULL); +DP_START_TEST_DONT_RUN(dnat_snat, test_dnat_snat) +{ + const char *client_ip, *server_ip, *local_snat_ip, *local_dnat_ip; + struct dp_test_expected *exp; + struct rte_mbuf *test_pak; + const char *nh_mac_str_1; /* nh on dp1T0 (towards client) */ + const char *nh_mac_str_2; /* nh on dp2T1 (towards server) */ + int len = 20; + + client_ip = "10.0.100.100"; + server_ip = "10.0.10.10"; + local_dnat_ip = "1.1.1.1"; /* DNAT is applied inbound */ + local_snat_ip = "2.2.2.2"; /* SNAT is applied outbound */ + + /* Set up the interface addresses */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + /* Add the route / nh arp for the client to server flow */ + dp_test_netlink_add_route("10.0.10.0/24 nh 2.2.2.1 int:dp2T1"); + nh_mac_str_2 = "aa:bb:cc:dd:ee:a2"; + dp_test_netlink_add_neigh("dp2T1", "2.2.2.1", nh_mac_str_2); + + /* Add the route / nh arp for the server to client flow */ + dp_test_netlink_add_route("10.0.100.0/24 nh 1.1.1.2 int:dp1T0"); + nh_mac_str_1 = "aa:bb:cc:dd:ee:a1"; + dp_test_netlink_add_neigh("dp1T0", "1.1.1.2", nh_mac_str_1); + + /* + * Test initial packet flow, outside to inside + */ + /* Create pak to match the client connection to server */ +#define SOURCE_PORT_NUMBER 49152 + test_pak = dp_test_create_tcp_ipv4_pak(client_ip, local_dnat_ip, + SOURCE_PORT_NUMBER, 80, + TH_SYN, 0, 0, 5840, + NULL, 1, &len); + (void)dp_test_pktmbuf_eth_init(test_pak, + dp_test_intf_name2mac_str("dp1T0"), + DP_TEST_INTF_DEF_SRC_MAC, + RTE_ETHER_TYPE_IPV4); + + /* Create pak we expect to receive on the tx ring */ + exp = dp_test_exp_create(test_pak); + dp_test_exp_set_oif_name(exp, "dp2T1"); + (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), + nh_mac_str_2, + dp_test_intf_name2mac_str("dp2T1"), + RTE_ETHER_TYPE_IPV4); + + /* Expect the DNAT and DNAT to have occurred */ + dp_test_set_iphdr(dp_test_exp_get_pak(exp), local_snat_ip, server_ip); + dp_test_set_tcphdr(dp_test_exp_get_pak(exp), SOURCE_PORT_NUMBER, 80); + + dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); + + /* Add the dnat rule */ + dp_test_cmd_replace_dnat(10, "dp1T0", client_ip, server_ip, IPPROTO_TCP, + 80); + + /* Add the snat rule */ + dp_test_cmd_replace_snat(20, "dp2T1", client_ip, local_snat_ip, NULL); + + /* setup params for handling varying pak */ + struct dp_test_variable_snat_params params = { .range = NULL }; + + params.saved_cb = + dp_test_exp_set_validate_cb(exp, + dp_test_variable_snat_port); + dp_test_exp_set_validate_ctx(exp, ¶ms, false); + + /* Run the test */ + dp_test_pak_receive(test_pak, "dp1T0", exp); + + /* + * Test reply packet flow, inside to outside + */ + /* Create pak to match the server reply to the client */ + test_pak = dp_test_create_tcp_ipv4_pak(server_ip, local_snat_ip, + 80, params.port_used, + TH_SYN | TH_ACK, 0, 1, 5840, + NULL, 1, &len); + (void)dp_test_pktmbuf_eth_init(test_pak, + dp_test_intf_name2mac_str("dp2T1"), + DP_TEST_INTF_DEF_SRC_MAC, + RTE_ETHER_TYPE_IPV4); + + /* Create pak we expect to receive on the tx ring */ + exp = dp_test_exp_create(test_pak); + dp_test_exp_set_oif_name(exp, "dp1T0"); + (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), + nh_mac_str_1, + dp_test_intf_name2mac_str("dp1T0"), + RTE_ETHER_TYPE_IPV4); + + /* Expect the reverse SNAT and DNAT to have occurred */ + dp_test_set_iphdr(dp_test_exp_get_pak(exp), local_dnat_ip, client_ip); + dp_test_set_tcphdr(dp_test_exp_get_pak(exp), 80, SOURCE_PORT_NUMBER); +#undef SOURCE_PORT_NUMBER + + dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); + + /* Run the test */ + dp_test_pak_receive(test_pak, "dp2T1", exp); + + /* Cleanup */ + dp_test_cmd_delete_dnat(10, "dp1T0", client_ip, IPPROTO_TCP); + dp_test_cmd_delete_snat(20, "dp2T1", client_ip); + dp_test_npf_cleanup(); + + dp_test_netlink_del_neigh("dp2T1", "2.2.2.1", nh_mac_str_2); + dp_test_netlink_del_neigh("dp1T0", "1.1.1.2", nh_mac_str_1); + dp_test_netlink_del_route("10.0.100.0/24 nh 1.1.1.2 int:dp1T0"); + dp_test_netlink_del_route("10.0.10.0/24 nh 2.2.2.1 int:dp2T1"); + dp_test_nl_del_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_netlink_state.c b/tests/whole_dp/src/dp_test_netlink_state.c index 4547914d..4dccf114 100644 --- a/tests/whole_dp/src/dp_test_netlink_state.c +++ b/tests/whole_dp/src/dp_test_netlink_state.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -8,7 +8,7 @@ * Routines to generate/send netlink state, from test controller to * dataplane over ZMQ. */ -#include "dp_test_netlink_state.h" +#include "dp_test_netlink_state_internal.h" #include #include @@ -25,18 +25,21 @@ #include #include "main.h" +#include "if/bridge/bridge.h" #include "if_var.h" -#include "vxlan.h" -#include "vrf.h" -#include "bridge.h" +#include "if/vxlan.h" +#include "protobuf/RibUpdate.pb-c.h" +#include "vrf_internal.h" #include "dp_test_controller.h" -#include "dp_test_cmd_check.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test/dp_test_cmd_check.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_route_broker.h" #include "dp_test_str.h" #include "dp_test.h" #include "dp_test_crypto_lib.h" +#include "dp_test_xfrm_server.h" struct rtvia_v6 { __kernel_sa_family_t rtvia_family; @@ -144,7 +147,7 @@ vrfid_t _dp_test_translate_vrf_id(vrfid_t vrf_id, const char *file, ret = dp_test_upstream_vrf_lookup_db( vrf_id, vrf_name, NULL); _dp_test_fail_unless(ret, file, line, - "unable to find vrf master for vrf %u\n", + "unable to find vrf interface for vrf %u\n", vrf_id); return dp_test_intf_name2index(vrf_name); } @@ -175,7 +178,10 @@ dp_test_netlink_interface_l2_all(const char *ifname, int mtu, char real_ifname[IFNAMSIZ]; dp_test_intf_real(ifname, real_ifname); - dp_test_intf_switch_port_activate(real_ifname); + if (nlmsg_type == RTM_NEWLINK) + dp_test_intf_switch_port_activate(real_ifname); + else + dp_test_intf_switch_port_deactivate(real_ifname); memset(buf, 0, sizeof(buf)); nlh = mnl_nlmsg_put_header(buf); @@ -248,10 +254,10 @@ dp_test_netlink_interface_l2_all(const char *ifname, int mtu, mnl_attr_nest_end(nlh, linkinfo); if (nl_generate_topic(nlh, topic, sizeof(topic)) < 0) - dp_test_assert_internal(0); + dp_test_abort_internal(); if (verify) { - struct ether_addr *mac_addr; + struct rte_ether_addr *mac_addr; char cmd[TEST_MAX_CMD_LEN]; json_object *expected; char ebuf[32]; @@ -474,7 +480,7 @@ dp_test_netlink_tunnel(const char *tun_name, struct nlattr *gre_link, *gre_data; - if (v6 && memcmp(&remote6, &ip6_zero, sizeof(remote6))) + if (v6 && memcmp(&remote6, &ip6_zero, sizeof(remote6)) != 0) ifi->ifi_flags |= IFF_POINTOPOINT|IFF_NOARP; else if (!v6 && remote4 != INADDR_ANY) ifi->ifi_flags |= IFF_POINTOPOINT|IFF_NOARP; @@ -540,12 +546,12 @@ dp_test_netlink_tunnel(const char *tun_name, mnl_attr_put_u32(nlh, IFLA_MTU, 1476); break; } - mnl_attr_put(nlh, IFLA_ADDRESS, sizeof(struct ether_addr), + mnl_attr_put(nlh, IFLA_ADDRESS, sizeof(struct rte_ether_addr), dp_test_intf_name2mac(tun_name)); mnl_attr_put_strz(nlh, IFLA_IFNAME, tun_name); if (nl_generate_topic(nlh, topic, sizeof(topic)) < 0) - dp_test_assert_internal(0); + dp_test_abort_internal(); if (verify) { char cmd[TEST_MAX_CMD_LEN]; @@ -698,7 +704,7 @@ dp_test_netlink_ppp(const char *intf_name, mnl_attr_put_strz(nlh, IFLA_IFNAME, intf_name); if (nl_generate_topic(nlh, topic, sizeof(topic)) < 0) - dp_test_assert_internal(0); + dp_test_abort_internal(); if (verify) { char cmd[TEST_MAX_CMD_LEN]; @@ -777,7 +783,7 @@ _dp_test_verify_neigh(const char *ifname, const char *ipaddr, bool ipv4_neigh = false; struct dp_test_addr addr; uint32_t v4_addr; - struct ether_addr mac; + struct rte_ether_addr mac; char real_ifname[IFNAMSIZ]; dp_test_intf_real(ifname, real_ifname); @@ -878,7 +884,7 @@ dp_test_netlink_neighbour(const char *ifname, const char *nh_addr_str, char buf[MNL_SOCKET_BUFFER_SIZE]; struct nlmsghdr *nlh; char topic[DP_TEST_TMP_BUF]; - struct ether_addr mac; + struct rte_ether_addr mac; char real_ifname[IFNAMSIZ]; bool ipv4_neigh = false; uint32_t v4_addr; @@ -1081,8 +1087,8 @@ dp_test_netlink_ip_address(const char *ifname, const char *prefix_str, * Attributes are: * * IFA_UNSPEC, - * IFA_ADDRESS: Host Address - * ---- IFA_LOCAL, + * ---- IFA_ADDRESS: Subnet (or Peer) Address + * IFA_LOCAL: Host Address * IFA_LABEL, * IFA_BROADCAST: Subnet Broadcast * ---- IFA_ANYCAST, @@ -1090,7 +1096,7 @@ dp_test_netlink_ip_address(const char *ifname, const char *prefix_str, * ---- IFA_MULTICAST, * ---- IFA_FLAGS, */ - mnl_attr_put(nlh, IFA_ADDRESS, dp_test_addr_size(&prefix.addr), + mnl_attr_put(nlh, IFA_LOCAL, dp_test_addr_size(&prefix.addr), &prefix.addr.addr); mnl_attr_put_strz(nlh, IFA_LABEL, real_ifname); if (prefix.addr.family == AF_INET) @@ -1100,7 +1106,7 @@ dp_test_netlink_ip_address(const char *ifname, const char *prefix_str, prefix.len)); if (nl_generate_topic(nlh, topic, sizeof(topic)) < 0) - dp_test_assert_internal(0); + dp_test_abort_internal(); if (verify) { /* @@ -1335,20 +1341,10 @@ _dp_test_netlink_set_mpls_forwarding(const char *ifname, bool enable, json_object_put(expected); } -/* - * Add/delete a route, if verify is set then block until oper-state reflects - * the requested state. - * - * incomplete implies no verify as the route will not be installed in a way - * that lets the show command verify it. The user can do further verification - * once it becomes complete. - */ static void -dp_test_netlink_route(const char *route_string, uint16_t nl_type, - bool replace, bool verify, bool incomplete, - const char *file, const char *func, int line) +dp_test_netlink_route_nl(struct dp_test_route *route, uint16_t nl_type, + bool replace) { - struct dp_test_route *route = dp_test_parse_route(route_string); struct rtmsg *rtm; char topic[DP_TEST_TMP_BUF]; char buf[MNL_SOCKET_BUFFER_SIZE]; @@ -1359,20 +1355,6 @@ dp_test_netlink_route(const char *route_string, uint16_t nl_type, unsigned int route_idx; struct nlattr *pl_start; - if (verify) { - if (route->tableid == RT_TABLE_LOCAL) - _dp_test_wait_for_local_addr( - route_string, route->vrf_id, - nl_type == RTM_DELROUTE || replace, - file, func, line); - else if (nl_type == RTM_DELROUTE || replace) - _dp_test_wait_for_route(route_string, !replace, false, - file, func, line); - else - dp_test_wait_for_route_gone(route_string, false, - file, func, line); - } - if (route->prefix.addr.family == AF_INET6) route_cnt = route->nh_cnt ? route->nh_cnt : 1; else @@ -1475,6 +1457,24 @@ dp_test_netlink_route(const char *route_string, uint16_t nl_type, mnl_attr_put(nlh, RTA_GATEWAY, addr_size, &nh->nh_addr.addr); + } else if (route->prefix.addr.family == + AF_INET6 && + nh->nh_addr.family == + AF_INET) { + struct in6_addr v6addr; +#define IN6_SET_ADDR_V4MAPPED(a6, a4) { \ + (a6)->s6_addr32[0] = 0; \ + (a6)->s6_addr32[1] = 0; \ + (a6)->s6_addr32[2] = htonl(0xffff); \ + (a6)->s6_addr32[3] = (a4); \ + } + IN6_SET_ADDR_V4MAPPED( + &v6addr, + nh->nh_addr.addr.ipv4); + mnl_attr_put(nlh, + RTA_GATEWAY, + sizeof(v6addr), + &v6addr); } else { via.rtvia_family = nh->nh_addr.family; @@ -1568,6 +1568,18 @@ dp_test_netlink_route(const char *route_string, uint16_t nl_type, mnl_attr_put(nlh, RTA_GATEWAY, addr_size, &nh->nh_addr.addr); + } else if (route->prefix.addr.family == + AF_INET6 && + nh->nh_addr.family == + AF_INET) { + struct in6_addr v6addr; + IN6_SET_ADDR_V4MAPPED( + &v6addr, + nh->nh_addr.addr.ipv4); + mnl_attr_put(nlh, + RTA_GATEWAY, + sizeof(v6addr), + &v6addr); } else { struct rtvia_v6 { __kernel_sa_family_t rtvia_family; @@ -1662,11 +1674,247 @@ dp_test_netlink_route(const char *route_string, uint16_t nl_type, } if (nl_generate_topic(nlh, topic, sizeof(topic)) < 0) - dp_test_assert_internal(0); + dp_test_abort_internal(); + + nl_propagate_broker(topic, nlh, nlh->nlmsg_len); + } +} + +static void +dp_test_netlink_route_pb(struct dp_test_route *route, uint16_t nl_type) +{ + IPAddressOrLabel prefix = IPADDRESS_OR_LABEL__INIT; + RibUpdate rtupdate = RIB_UPDATE__INIT; + uint32_t tableid = route->tableid; + Route pbroute = ROUTE__INIT; + struct dp_test_nh *nh; + IPAddress *gateway; + Path **paths; + Path *path; + uint32_t i; + size_t len; + + switch (nl_type) { + case RTM_NEWROUTE: + /* leave as default */ + break; + case RTM_DELROUTE: + rtupdate.action = RIB_UPDATE__ACTION__DELETE; + rtupdate.has_action = true; + break; + default: + dp_test_assert_internal(false); + break; + } + + rtupdate.route = &pbroute; + + pbroute.prefix = &prefix; - nl_propagate_broker(topic, nlh); + switch (route->prefix.addr.family) { + case AF_INET: + prefix.address_oneof_case = + IPADDRESS_OR_LABEL__ADDRESS_ONEOF_IPV4_ADDR; + prefix.ipv4_addr = route->prefix.addr.addr.ipv4; + break; + case AF_INET6: + prefix.address_oneof_case = + IPADDRESS_OR_LABEL__ADDRESS_ONEOF_IPV6_ADDR; + prefix.ipv6_addr.data = + (uint8_t *)&route->prefix.addr.addr.ipv6; + prefix.ipv6_addr.len = sizeof(route->prefix.addr.addr.ipv6); + break; + case AF_MPLS: + prefix.address_oneof_case = + IPADDRESS_OR_LABEL__ADDRESS_ONEOF_MPLS_LABEL; + prefix.mpls_label = ntohl(route->prefix.addr.addr.mpls) >> + MPLS_LS_LABEL_SHIFT; + break; + default: + dp_test_assert_internal(false); + break; } + pbroute.has_prefix_length = true; + pbroute.prefix_length = route->prefix.len; + + if (route->vrf_id != VRF_DEFAULT_ID && + route->vrf_id != VRF_UPLINK_ID && + (route->tableid == RT_TABLE_MAIN || + route->tableid == RT_TABLE_LOCAL)) { + bool ret; + + ret = dp_test_upstream_vrf_lookup_db( + route->vrf_id, NULL, &tableid); + assert(ret); + } + + if (tableid != RT_TABLE_MAIN) { + pbroute.has_table_id = true; + pbroute.table_id = tableid; + } + + pbroute.has_scope = true; + pbroute.scope = route->scope; + + switch (route->mpls_payload_type) { + case RTMPT_IP: + /* default, so leave as-is */ + break; + case RTMPT_IPV4: + pbroute.has_payload_type = true; + pbroute.payload_type = ROUTE__PAYLOAD_TYPE__IPV4; + break; + case RTMPT_IPV6: + pbroute.has_payload_type = true; + pbroute.payload_type = ROUTE__PAYLOAD_TYPE__IPV6; + break; + default: + dp_test_assert_internal(false); + break; + } + + if (route->type == RTN_BLACKHOLE || + route->type == RTN_UNREACHABLE || + route->type == RTN_LOCAL) { + paths = calloc(1, sizeof(*paths)); + dp_test_assert_internal(paths); + + pbroute.paths = paths; + pbroute.n_paths = 1; + + path = calloc(1, sizeof(*path)); + paths[0] = path; + dp_test_assert_internal(path); + + path__init(path); + path->has_type = true; + switch (route->type) { + case RTN_BLACKHOLE: + path->type = PATH__PATH_TYPE__BLACKHOLE; + break; + case RTN_UNREACHABLE: + path->type = PATH__PATH_TYPE__UNREACHABLE; + break; + case RTN_LOCAL: + path->type = PATH__PATH_TYPE__LOCAL; + break; + } + } else { + paths = calloc(route->nh_cnt, sizeof(*paths)); + dp_test_assert_internal(paths); + + pbroute.paths = paths; + pbroute.n_paths = route->nh_cnt; + + for (i = 0; i < route->nh_cnt; i++) { + path = calloc(1, sizeof(*path) + sizeof(*gateway)); + paths[i] = path; + gateway = (IPAddress *)(path + 1); + nh = &route->nh[i]; + dp_test_assert_internal(path); + + path__init(path); + ipaddress__init(gateway); + + if (route->tableid == RT_TABLE_LOCAL) { + path->has_type = true; + path->type = PATH__PATH_TYPE__LOCAL; + } + + path->has_ifindex = true; + path->ifindex = dp_test_intf_name2index(nh->nh_int); + + path->has_backup = true; + path->backup = nh->backup; + + switch (nh->nh_addr.family) { + case AF_INET: + path->nexthop = gateway; + gateway->address_oneof_case = + IPADDRESS__ADDRESS_ONEOF_IPV4_ADDR; + gateway->ipv4_addr = nh->nh_addr.addr.ipv4; + break; + case AF_INET6: + path->nexthop = gateway; + gateway->address_oneof_case = + IPADDRESS__ADDRESS_ONEOF_IPV6_ADDR; + gateway->ipv6_addr.data = + (uint8_t *)&nh->nh_addr.addr.ipv6; + gateway->ipv6_addr.len = + sizeof(nh->nh_addr.addr.ipv6); + break; + case AF_UNSPEC: + break; + } + + if (nh->num_labels == 1 && + nh->labels[0] == MPLS_LABEL_IMPLNULL) { + /* Nothing to do */ + } else if (nh->num_labels > 0) { + path->mpls_labels = nh->labels; + path->n_mpls_labels = nh->num_labels; + } else if (route->prefix.addr.family == AF_MPLS) { + path->has_mpls_bos_only = true; + path->mpls_bos_only = true; + } + } + } + + len = rib_update__get_packed_size(&rtupdate); + void *buf = malloc(len); + dp_test_assert_internal(buf); + + rib_update__pack(&rtupdate, buf); + + nl_propagate_broker(NULL, buf, len); + + if (route->type == RTN_BLACKHOLE || + route->type == RTN_UNREACHABLE || + route->type == RTN_LOCAL) + free(paths[0]); + else { + for (i = 0; i < route->nh_cnt; i++) + free(paths[i]); + } + free(paths); +} + +/* + * Add/delete a route, if verify is set then block until oper-state reflects + * the requested state. + * + * incomplete implies no verify as the route will not be installed in a way + * that lets the show command verify it. The user can do further verification + * once it becomes complete. + */ +static void +dp_test_netlink_route(const char *route_string, uint16_t nl_type, + bool replace, bool verify, bool incomplete, + const char *file, const char *func, int line) +{ + struct dp_test_route *route = dp_test_parse_route(route_string); + + if (verify) { + if (route->tableid == RT_TABLE_LOCAL) + _dp_test_wait_for_local_addr( + route_string, route->vrf_id, + nl_type == RTM_DELROUTE || replace, + file, func, line); + else if (nl_type == RTM_DELROUTE || replace) + _dp_test_wait_for_route(route_string, !replace, false, + file, func, line); + else + dp_test_wait_for_route_gone(route_string, false, + file, func, line); + } + + if (dp_test_cont_src_get() == CONT_SRC_MAIN && + dp_test_route_broker_protobuf) + dp_test_netlink_route_pb(route, nl_type); + else + dp_test_netlink_route_nl(route, nl_type, replace); + if (verify) { if (route->tableid == RT_TABLE_LOCAL) _dp_test_wait_for_local_addr( @@ -1717,23 +1965,6 @@ _dp_test_netlink_replace_route(const char *route_str, bool verify, true, verify, incomplete, file, func, line); } -void -_dp_test_netlink_replace_route_fmt(bool verify, bool incomplete, - const char *file, - const char *func, int line, - const char *format, ...) -{ - char cmd[DP_TEST_TMP_BUF]; - va_list ap; - - va_start(ap, format); - vsnprintf(cmd, sizeof(cmd), format, ap); - va_end(ap); - - dp_test_netlink_route(cmd, RTM_NEWROUTE, true, verify, incomplete, - file, func, line); -} - void _dp_test_netlink_del_route(const char *route_str, bool verify, const char *file, const char *func, @@ -1778,6 +2009,184 @@ _dp_test_netlink_del_route_fmt(bool verify, const char *file, file, func, line); } +/* + * Add or delete a multicast route + * + * nlmsg_type: RTM_NEWROUTE or RTM_DELROUTE + * src: Source address of multicast stream + * sintf: Interface multicast stream expected on + * route_string: "224.0.1.1/32 nh int:dp2T1 nh int:dp2T2" + */ +void _dp_test_mroute_nl(uint16_t nlmsg_type, const char *src, + const char *sintf, const char *route_string, + const char *file, const char *func, int line) +{ + char topic[DP_TEST_TMP_BUF]; + char buf[MNL_SOCKET_BUFFER_SIZE]; + char real_eth_name[IFNAMSIZ]; + struct dp_test_route *route; + struct nlmsghdr *nlh; + struct rtmsg *rtm; + int iif; + int af, alen; + + route = dp_test_parse_route(route_string); + dp_test_fail_unless(route->type == RTN_MULTICAST, "Not multicast"); + + af = route->prefix.addr.family; + + dp_test_fail_unless(af == AF_INET || af == AF_INET6, + "Unknown address family"); + dp_test_fail_unless(route->prefix.len == 32 || + route->prefix.len == 128, + "Multicast address is not host address"); + dp_test_fail_unless(route->nh_cnt >= 1, + "Expect 1 or more nh interfaces"); + + alen = (af == AF_INET) ? 4 : 16; + + dp_test_intf_real(sintf, real_eth_name); + iif = dp_test_intf_name2index(real_eth_name); + + struct in6_addr src_addr; + inet_pton(af, src, &src_addr); + + memset(buf, 0, sizeof(buf)); + nlh = mnl_nlmsg_put_header(buf); + + nlh->nlmsg_type = nlmsg_type; + nlh->nlmsg_flags = NLM_F_ACK; + + rtm = mnl_nlmsg_put_extra_header(nlh, sizeof(struct rtmsg)); + rtm->rtm_family = ((af == AF_INET) ? + RTNL_FAMILY_IPMR : RTNL_FAMILY_IP6MR); + rtm->rtm_type = RTN_MULTICAST; + rtm->rtm_dst_len = route->prefix.len; + rtm->rtm_src_len = route->prefix.len; + rtm->rtm_tos = 0; + rtm->rtm_table = RT_TABLE_DEFAULT; + rtm->rtm_protocol = RTPROT_UNSPEC; + rtm->rtm_scope = 0; + rtm->rtm_flags = 0; + + mnl_attr_put_u32(nlh, RTA_TABLE, RT_TABLE_DEFAULT); + mnl_attr_put(nlh, RTA_SRC, alen, src_addr.s6_addr); + mnl_attr_put(nlh, RTA_DST, alen, &route->prefix.addr.addr); + mnl_attr_put_u32(nlh, RTA_IIF, iif); + + /* + * Add one or more output interfaces + */ + struct nlattr *mpath_start; + uint i; + + mpath_start = mnl_attr_nest_start(nlh, RTA_MULTIPATH); + + for (i = 0; i < route->nh_cnt; i++) { + struct rtnexthop *rtnh; + + dp_test_intf_real(route->nh[i].nh_int, real_eth_name); + + rtnh = (struct rtnexthop *)mnl_nlmsg_get_payload_tail(nlh); + nlh->nlmsg_len += MNL_ALIGN(sizeof(*rtnh)); + + memset(rtnh, 0, sizeof(*rtnh)); + rtnh->rtnh_ifindex = dp_test_intf_name2index(real_eth_name); + rtnh->rtnh_len = sizeof(*rtnh); + } + + mnl_attr_nest_end(nlh, mpath_start); + dp_test_free_route(route); + + if (nl_generate_topic(nlh, topic, sizeof(topic)) < 0) + dp_test_abort_internal(); + + nl_propagate(topic, nlh); +} + +/* + * Verify an IPv4 or IPv6 multicast route + */ +void +_dp_test_wait_for_mroute(const char *source, const char *group, + const char *input, const char *output, + bool gone, const char *file, const char *func, + int line) +{ + json_object *expected_json; + bool v6 = strchr(source, ':') != NULL; + char cmd[22]; + + expected_json = dp_test_json_create( + "{" + " \"%s\":[" + " {" + " \"source\":\"%s\"," + " \"group\":\"%s\"," + " \"input\":\"%s\"," + " \"output(s)\":\"%s\"," + " \"forwarding\":\"fast\\/dataplane\"" + " }" + " ]" + "}", + v6 ? "route6" : "route", + source, group, input, output); + + snprintf(cmd, sizeof(cmd), "multicast %s", + v6 ? "route6" : "route"); + + _dp_test_check_json_state(cmd, expected_json, NULL, + DP_TEST_JSON_CHECK_SUBSET, gone, + file, func, line); + json_object_put(expected_json); +} + +/* + * Enable or disable multicast on an interface. + */ +void _dp_test_netlink_netconf_mcast(const char *ifname, int af, bool enable, + const char *file, const char *func, + int line) +{ + struct netconfmsg *ncm; + char topic[DP_TEST_TMP_BUF]; + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh; + char real_ifname[IFNAMSIZ]; + + dp_test_intf_real(ifname, real_ifname); + + memset(buf, 0, sizeof(buf)); + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = RTM_NEWNETCONF; + nlh->nlmsg_flags = NLM_F_ACK; + + ncm = mnl_nlmsg_put_extra_header(nlh, sizeof(struct netconfmsg)); + ncm->ncm_family = af; + + /* + * Attributes are: + * + * NETCONFA_UNSPEC, + * NETCONFA_IFINDEX, + * NETCONFA_FORWARDING, + * NETCONFA_RP_FILTER, + * NETCONFA_MC_FORWARDING, + * NETCONFA_PROXY_NEIGH, + */ + mnl_attr_put_u32(nlh, NETCONFA_IFINDEX, + dp_test_intf_name2index(real_ifname)); + mnl_attr_put_u32(nlh, NETCONFA_FORWARDING, false); + mnl_attr_put_u32(nlh, NETCONFA_RP_FILTER, 0); + mnl_attr_put_u32(nlh, NETCONFA_MC_FORWARDING, enable); + mnl_attr_put_u32(nlh, NETCONFA_PROXY_NEIGH, 0); + + if (nl_generate_topic(nlh, topic, sizeof(topic)) < 0) + dp_test_abort_internal(); + + nl_propagate(topic, nlh); +} + /* * * RFC 2863 operational status * * @@ -1841,7 +2250,7 @@ dp_test_netlink_bridge(const char *br_name, uint16_t nlmsg_type, bool verify, * IFLA_PROTINFO */ mnl_attr_put_strz(nlh, IFLA_IFNAME, real_br_name); - mnl_attr_put(nlh, IFLA_ADDRESS, sizeof(struct ether_addr), + mnl_attr_put(nlh, IFLA_ADDRESS, sizeof(struct rte_ether_addr), dp_test_intf_name2mac(real_br_name)); if (nlmsg_type == RTM_NEWLINK) mnl_attr_put_u8(nlh, IFLA_OPERSTATE, DP_TEST_IF_OPER_UP); @@ -1853,7 +2262,7 @@ dp_test_netlink_bridge(const char *br_name, uint16_t nlmsg_type, bool verify, if (nl_generate_topic(nlh, topic, sizeof(topic)) < 0) - dp_test_assert_internal(0); + dp_test_abort_internal(); if (verify) { char cmd[TEST_MAX_CMD_LEN]; @@ -1945,7 +2354,7 @@ dp_test_netlink_bridge_port_state(const char *br_name, const char *eth_name, mnl_attr_nest_end(nlh, br_proto_info); if (nl_generate_topic(nlh, topic, sizeof(topic)) < 0) - dp_test_assert_internal(0); + dp_test_abort_internal(); /* * And now test it is there. Display bridge and look for port. @@ -2111,7 +2520,7 @@ _dp_test_netlink_bridge_port_set(const char *br_name, mnl_attr_nest_end(nlh, br_af_spec); } if (nl_generate_topic(nlh, topic, sizeof(topic)) < 0) - dp_test_assert_internal(0); + dp_test_abort_internal(); /*We need to generate the expected vlan_filtering object*/ char *vlanbuf = NULL; @@ -2214,7 +2623,7 @@ void _dp_test_netlink_set_bridge_vlan_filter(const char *br_name, bool verify, * IFLA_BR_VLAN_FILTERING: u8 */ mnl_attr_put_strz(nlh, IFLA_IFNAME, real_br_name); - mnl_attr_put(nlh, IFLA_ADDRESS, sizeof(struct ether_addr), + mnl_attr_put(nlh, IFLA_ADDRESS, sizeof(struct rte_ether_addr), dp_test_intf_name2mac(real_br_name)); if (nlmsg_type == RTM_NEWLINK) mnl_attr_put_u8(nlh, IFLA_OPERSTATE, DP_TEST_IF_OPER_UP); @@ -2229,7 +2638,7 @@ void _dp_test_netlink_set_bridge_vlan_filter(const char *br_name, bool verify, if (nl_generate_topic(nlh, topic, sizeof(topic)) < 0) - dp_test_assert_internal(0); + dp_test_abort_internal(); if (verify) { char cmd[TEST_MAX_CMD_LEN]; @@ -2242,7 +2651,7 @@ void _dp_test_netlink_set_bridge_vlan_filter(const char *br_name, bool verify, " {" " \"name\": \"%s\"," " \"type\": \"%s\"," - " \"bridge_master\": {" + " \"bridge_interface\": {" " \"vlan_filtering\": true" " }" " }" @@ -2395,7 +2804,7 @@ dp_test_netlink_vxlan(const char *vxlan_name, uint16_t nlmsg_type, * IFLA_OPERSTATE */ mnl_attr_put_strz(nlh, IFLA_IFNAME, vxlan_name); - mnl_attr_put(nlh, IFLA_ADDRESS, sizeof(struct ether_addr), + mnl_attr_put(nlh, IFLA_ADDRESS, sizeof(struct rte_ether_addr), dp_test_intf_name2mac(vxlan_name)); if (nlmsg_type == RTM_NEWLINK) mnl_attr_put_u8(nlh, IFLA_OPERSTATE, DP_TEST_IF_OPER_UP); @@ -2432,7 +2841,7 @@ dp_test_netlink_vxlan(const char *vxlan_name, uint16_t nlmsg_type, mnl_attr_nest_end(nlh, vxlan_info); if (nl_generate_topic(nlh, topic, sizeof(topic)) < 0) - dp_test_assert_internal(0); + dp_test_abort_internal(); if (verify) { char cmd[TEST_MAX_CMD_LEN]; @@ -2549,7 +2958,7 @@ dp_test_netlink_vlan(const char *vif_name, uint16_t nlmsg_type, */ mnl_attr_put_strz(nlh, IFLA_IFNAME, real_vif_name); if (parent_name) - mnl_attr_put(nlh, IFLA_ADDRESS, sizeof(struct ether_addr), + mnl_attr_put(nlh, IFLA_ADDRESS, sizeof(struct rte_ether_addr), dp_test_intf_name2mac(real_parent_name)); if (nlmsg_type == RTM_NEWLINK) mnl_attr_put_u8(nlh, IFLA_OPERSTATE, DP_TEST_IF_OPER_UP); @@ -2579,7 +2988,7 @@ dp_test_netlink_vlan(const char *vif_name, uint16_t nlmsg_type, dp_test_intf_name2index(real_parent_name)); if (nl_generate_topic(nlh, topic, sizeof(topic)) < 0) - dp_test_assert_internal(0); + dp_test_abort_internal(); if (verify) { char cmd[TEST_MAX_CMD_LEN]; @@ -2656,7 +3065,7 @@ dp_test_netlink_macvlan(const char *vif_name, uint16_t nlmsg_type, struct nlattr *vlan_info; char topic[DP_TEST_TMP_BUF]; char real_parent_name[IFNAMSIZ]; - struct ether_addr mac; + struct rte_ether_addr mac; struct ifinfomsg *ifi; struct nlmsghdr *nlh; int if_index = 0; @@ -2714,7 +3123,7 @@ dp_test_netlink_macvlan(const char *vif_name, uint16_t nlmsg_type, dp_test_intf_name2index(real_parent_name)); if (nl_generate_topic(nlh, topic, sizeof(topic)) < 0) - dp_test_assert_internal(0); + dp_test_abort_internal(); if (verify) { char cmd[TEST_MAX_CMD_LEN]; @@ -2841,12 +3250,12 @@ dp_test_netlink_vti(const char *tun_name, mnl_attr_put_u32(nlh, IFLA_MASTER, vrf_id); } mnl_attr_put_u32(nlh, IFLA_MTU, 1428); - mnl_attr_put(nlh, IFLA_ADDRESS, sizeof(struct ether_addr), + mnl_attr_put(nlh, IFLA_ADDRESS, sizeof(struct rte_ether_addr), dp_test_intf_name2mac(tun_name)); mnl_attr_put_strz(nlh, IFLA_IFNAME, tun_name); if (nl_generate_topic(nlh, topic, sizeof(topic)) < 0) - dp_test_assert_internal(0); + dp_test_abort_internal(); nl_propagate(topic, nlh); @@ -2898,7 +3307,7 @@ dp_test_netlink_lo_or_vfp(const char *name, bool verify, uint16_t nl_type, int if_index = dp_test_intf_name2index(name); char buf[MNL_SOCKET_BUFFER_SIZE]; struct nlmsghdr *nlh; - struct ether_addr addr; + struct rte_ether_addr addr; memset(&addr, 0, sizeof(addr)); @@ -2938,7 +3347,7 @@ dp_test_netlink_lo_or_vfp(const char *name, bool verify, uint16_t nl_type, mnl_attr_put_u32(nlh, IFLA_GROUP, 0); if (nl_generate_topic(nlh, topic, sizeof(topic)) < 0) - dp_test_assert_internal(0); + dp_test_abort_internal(); if (verify) { char cmd[TEST_MAX_CMD_LEN]; @@ -2988,22 +3397,6 @@ _dp_test_netlink_del_lo(const char *name, bool verify, file, func, line, false); } -void -_dp_test_netlink_create_lord(const char *name, vrfid_t vrf_id, bool verify, - const char *file, const char *func, int line) -{ - dp_test_netlink_lo_or_vfp(name, verify, RTM_NEWLINK, vrf_id, - file, func, line, false); -} - -void -_dp_test_netlink_del_lord(const char *name, vrfid_t vrf_id, bool verify, - const char *file, const char *func, int line) -{ - dp_test_netlink_lo_or_vfp(name, verify, RTM_DELLINK, vrf_id, - file, func, line, false); -} - void _dp_test_netlink_create_vfp(const char *name, vrfid_t vrf_id, bool verify, const char *file, const char *func, int line) @@ -3021,7 +3414,7 @@ _dp_test_netlink_del_vfp(const char *name, vrfid_t vrf_id, bool verify, } static void -dp_test_netlink_vrf_master(const char *name, bool verify, uint16_t nl_type, +dp_test_netlink_vrf_if(const char *name, bool verify, uint16_t nl_type, vrfid_t vrf_id, uint32_t tableid, const char *file, const char *func, int line) { @@ -3030,7 +3423,7 @@ dp_test_netlink_vrf_master(const char *name, bool verify, uint16_t nl_type, int if_index = dp_test_intf_name2index(name); char buf[MNL_SOCKET_BUFFER_SIZE]; struct nlmsghdr *nlh; - struct ether_addr addr; + struct rte_ether_addr addr; memset(&addr, 0, sizeof(addr)); @@ -3063,7 +3456,7 @@ dp_test_netlink_vrf_master(const char *name, bool verify, uint16_t nl_type, mnl_attr_put_u32(nlh, IFLA_GROUP, 0); if (nl_generate_topic(nlh, topic, sizeof(topic)) < 0) - dp_test_assert_internal(0); + dp_test_abort_internal(); if (verify) { char cmd[TEST_MAX_CMD_LEN]; @@ -3098,22 +3491,22 @@ dp_test_netlink_vrf_master(const char *name, bool verify, uint16_t nl_type, } void -_dp_test_netlink_create_vrf_master(const char *name, vrfid_t vrf_id, +_dp_test_netlink_create_vrf_if(const char *name, vrfid_t vrf_id, uint32_t tableid, bool verify, const char *file, const char *func, int line) { - dp_test_netlink_vrf_master(name, verify, RTM_NEWLINK, vrf_id, tableid, + dp_test_netlink_vrf_if(name, verify, RTM_NEWLINK, vrf_id, tableid, file, func, line); } void -_dp_test_netlink_del_vrf_master(const char *name, vrfid_t vrf_id, +_dp_test_netlink_del_vrf_if(const char *name, vrfid_t vrf_id, uint32_t tableid, bool verify, const char *file, const char *func, int line) { - dp_test_netlink_vrf_master(name, verify, RTM_DELLINK, vrf_id, tableid, + dp_test_netlink_vrf_if(name, verify, RTM_DELLINK, vrf_id, tableid, file, func, line); } @@ -3134,6 +3527,7 @@ static int xfrm_attr(const struct nlattr *attr, void *data) return MNL_CB_OK; } + static void xfrm_nl_policy_decode(const struct nlmsghdr *nlh, const struct xfrm_userpolicy_info **info, const struct xfrm_userpolicy_id **id) @@ -3320,6 +3714,14 @@ nl_generate_topic_xfrm(const struct nlmsghdr *nlh, char *buf, size_t buflen) return -1; } +/* + * Each netlink xfrm messages is required to have a unique + * sequence number that is returned back to the xfrm source + * via an ack message to indicate the successful processing + * of the message in the dataplane. + */ +uint32_t xfrm_seq; + void _dp_test_netlink_xfrm_policy(uint16_t nlmsg_type, const struct xfrm_selector *sel, const xfrm_address_t *dst, @@ -3332,18 +3734,20 @@ void _dp_test_netlink_xfrm_policy(uint16_t nlmsg_type, uint8_t action, vrfid_t vrfid, bool passthrough, + uint32_t rule_no, const char *file, int line) { - char buf[MNL_SOCKET_BUFFER_SIZE]; struct xfrm_userpolicy_info *userpolicy_info_p = NULL; struct xfrm_userpolicy_id *userpolicy_id; struct nlmsghdr *nlh; + char *buf = malloc(MNL_SOCKET_BUFFER_SIZE); - memset(buf, 0, sizeof(buf)); + memset(buf, 0, MNL_SOCKET_BUFFER_SIZE); nlh = mnl_nlmsg_put_header(buf); nlh->nlmsg_type = nlmsg_type; nlh->nlmsg_flags = NLM_F_ACK; + nlh->nlmsg_seq = ++xfrm_seq; switch (nlmsg_type) { case XFRM_MSG_NEWPOLICY: @@ -3351,6 +3755,7 @@ void _dp_test_netlink_xfrm_policy(uint16_t nlmsg_type, userpolicy_info_p = mnl_nlmsg_put_extra_header(nlh, sizeof(*userpolicy_info_p)); userpolicy_info_p->priority = priority; userpolicy_info_p->dir = dir; + userpolicy_info_p->index = rule_no; userpolicy_info_p->action = action; memcpy(&userpolicy_info_p->sel, sel, sizeof(userpolicy_info_p->sel)); break; @@ -3358,6 +3763,7 @@ void _dp_test_netlink_xfrm_policy(uint16_t nlmsg_type, userpolicy_id = mnl_nlmsg_put_extra_header(nlh, sizeof(*userpolicy_id)); userpolicy_id->dir = dir; + userpolicy_id->index = rule_no; memcpy(&userpolicy_id->sel, sel, sizeof(userpolicy_id->sel)); break; default: @@ -3403,9 +3809,9 @@ void _dp_test_netlink_xfrm_policy(uint16_t nlmsg_type, char topic[DP_TEST_TMP_BUF]; if (nl_generate_topic_xfrm(nlh, topic, sizeof(topic)) < 0) - dp_test_assert_internal(0); - - nl_propagate(topic, nlh); + dp_test_abort_internal(); + /* Signal an end of batch. This is a single msg batch */ + nl_propagate_xfrm(xfrm_server_push_sock, nlh, nlh->nlmsg_len, "END"); } void _dp_test_netlink_xfrm_newsa(uint32_t spi, /* Network byte order */ @@ -3428,21 +3834,23 @@ void _dp_test_netlink_xfrm_newsa(uint32_t spi, /* Network byte order */ const char *func, int line) { - char buf[MNL_SOCKET_BUFFER_SIZE]; + char topic[DP_TEST_TMP_BUF]; unsigned int key_len; struct nlmsghdr *nlh; struct xfrm_usersa_info *sa_info; + char *buf = malloc(MNL_SOCKET_BUFFER_SIZE); - memset(buf, 0, sizeof(buf)); + memset(buf, 0, MNL_SOCKET_BUFFER_SIZE); nlh = mnl_nlmsg_put_header(buf); nlh->nlmsg_type = XFRM_MSG_NEWSA; nlh->nlmsg_flags = NLM_F_ACK; + nlh->nlmsg_seq = ++xfrm_seq; sa_info = mnl_nlmsg_put_extra_header(nlh, sizeof(*sa_info)); if (dp_test_setup_xfrm_usersa_info(sa_info, dst, src, spi, family, mode, reqid, flags)) - dp_test_assert_internal(0); + dp_test_abort_internal(); if (crypto_algo) { key_len = crypto_algo->alg_key_len / 8; @@ -3484,9 +3892,9 @@ void _dp_test_netlink_xfrm_newsa(uint32_t spi, /* Network byte order */ } if (nl_generate_topic_xfrm(nlh, topic, sizeof(topic)) < 0) - dp_test_assert_internal(0); - - nl_propagate(topic, nlh); + dp_test_abort_internal(); + /* Signal an end of batch. This is a single msg batch */ + nl_propagate_xfrm(xfrm_server_push_sock, nlh, nlh->nlmsg_len, "END"); } void dp_test_netlink_xfrm_delsa(uint32_t spi, /* Network byte order */ @@ -3499,27 +3907,28 @@ void dp_test_netlink_xfrm_delsa(uint32_t spi, /* Network byte order */ { struct xfrm_usersa_info usersa_info; struct xfrm_usersa_id *usersa_id; - char buf[MNL_SOCKET_BUFFER_SIZE]; char topic[DP_TEST_TMP_BUF]; struct nlmsghdr *nlh; xfrm_address_t daddr; + char *buf = malloc(MNL_SOCKET_BUFFER_SIZE); - memset(buf, 0, sizeof(buf)); + memset(buf, 0, MNL_SOCKET_BUFFER_SIZE); nlh = mnl_nlmsg_put_header(buf); nlh->nlmsg_type = XFRM_MSG_DELSA; nlh->nlmsg_flags = NLM_F_ACK; + nlh->nlmsg_seq = ++xfrm_seq; usersa_id = mnl_nlmsg_put_extra_header(nlh, sizeof(*usersa_id)); usersa_id->family = family; if (dp_test_prefix_str_to_xfrm_addr(dst, &daddr, NULL, family)) - dp_test_assert_internal(0); + dp_test_abort_internal(); memcpy(&usersa_id->daddr, &daddr, sizeof(usersa_id->daddr)); usersa_id->spi = spi; if (dp_test_setup_xfrm_usersa_info(&usersa_info, dst, src, spi, family, mode, reqid, 0)) - dp_test_assert_internal(0); + dp_test_abort_internal(); if (vrfid != VRF_DEFAULT_ID && vrfid != VRF_UPLINK_ID) { usersa_info.sel.ifindex = dp_test_translate_vrf_id(vrfid); @@ -3528,9 +3937,54 @@ void dp_test_netlink_xfrm_delsa(uint32_t spi, /* Network byte order */ mnl_attr_put(nlh, XFRMA_SA, sizeof(usersa_info), &usersa_info); if (nl_generate_topic_xfrm(nlh, topic, sizeof(topic)) < 0) - dp_test_assert_internal(0); + dp_test_abort_internal(); + /* Signal an end of batch. This is a single msg batch */ + nl_propagate_xfrm(xfrm_server_push_sock, nlh, nlh->nlmsg_len, "END"); +} - nl_propagate(topic, nlh); +void dp_test_netlink_xfrm_getsa(uint32_t spi, /* Network byte order */ + const char *dst, + const char *src, + uint16_t family, + uint8_t mode, + uint32_t reqid, + vrfid_t vrfid) +{ + struct xfrm_usersa_id *usersa_id; + struct nlmsghdr *nlh; + xfrm_address_t daddr; + char *buf = malloc(MNL_SOCKET_BUFFER_SIZE); + uint32_t ifindex, mark_val = 1; + + memset(buf, 0, MNL_SOCKET_BUFFER_SIZE); + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = XFRM_MSG_GETSA; + nlh->nlmsg_flags = NLM_F_REQUEST; + nlh->nlmsg_seq = ++xfrm_seq; + + usersa_id = mnl_nlmsg_put_extra_header(nlh, sizeof(*usersa_id)); + usersa_id->family = family; + + if (dp_test_prefix_str_to_xfrm_addr(dst, &daddr, NULL, family)) + dp_test_abort_internal(); + memcpy(&usersa_id->daddr, &daddr, sizeof(usersa_id->daddr)); + usersa_id->spi = spi; + + if (mark_val) { + struct xfrm_mark mark = { + .v = htonl(mark_val), + .m = 0xffffffff}; + mnl_attr_put(nlh, XFRMA_MARK, sizeof(struct xfrm_mark), &mark); + } + + if (vrfid != VRF_DEFAULT_ID && vrfid != VRF_UPLINK_ID) + ifindex = dp_test_translate_vrf_id(vrfid); + else + ifindex = 0; + + mnl_attr_put(nlh, XFRMA_IF_ID, sizeof(ifindex), &ifindex); + + nl_propagate_xfrm(xfrm_server_push_sock, nlh, nlh->nlmsg_len, "STATS"); } void dp_test_netlink_xfrm_expire(uint32_t spi, /* Network byte order */ @@ -3542,30 +3996,31 @@ void dp_test_netlink_xfrm_expire(uint32_t spi, /* Network byte order */ bool expire_hard, vrfid_t vrfid) { - char buf[MNL_SOCKET_BUFFER_SIZE]; struct xfrm_user_expire *expire; char topic[DP_TEST_TMP_BUF]; struct nlmsghdr *nlh; + char *buf = malloc(MNL_SOCKET_BUFFER_SIZE); - memset(buf, 0, sizeof(buf)); + memset(buf, 0, MNL_SOCKET_BUFFER_SIZE); nlh = mnl_nlmsg_put_header(buf); nlh->nlmsg_type = XFRM_MSG_EXPIRE; nlh->nlmsg_flags = NLM_F_ACK; + nlh->nlmsg_seq = ++xfrm_seq; expire = mnl_nlmsg_put_extra_header(nlh, sizeof(*expire)); expire->hard = expire_hard; if (dp_test_setup_xfrm_usersa_info(&expire->state, dst, src, spi, family, mode, reqid, 0)) - dp_test_assert_internal(0); + dp_test_abort_internal(); if (vrfid != VRF_DEFAULT_ID && vrfid != VRF_UPLINK_ID) expire->state.sel.ifindex = dp_test_translate_vrf_id(vrfid); if (nl_generate_topic_xfrm(nlh, topic, sizeof(topic)) < 0) - dp_test_assert_internal(0); - - nl_propagate(topic, nlh); + dp_test_abort_internal(); + /* Signal an end of batch. This is a single msg batch */ + nl_propagate_xfrm(xfrm_server_push_sock, nlh, nlh->nlmsg_len, "END"); } void @@ -3591,7 +4046,7 @@ _dp_test_netlink_add_vrf(uint32_t vrf_id, uint32_t expected_ref_cnt, ret = dp_test_upstream_vrf_add_db(vrf_id, vrf_name, &tableid); _dp_test_fail_unless(ret, file, line, "maximum vrf limit reached\n"); - _dp_test_intf_vrf_master_create(vrf_name, vrf_id, tableid, file, line); + _dp_test_intf_vrf_if_create(vrf_name, vrf_id, tableid, file, line); } void @@ -3604,9 +4059,9 @@ _dp_test_netlink_del_vrf(uint32_t vrf_id, uint32_t expected_ref_cnt, ret = dp_test_upstream_vrf_del_db(vrf_id, vrf_name, &tableid); _dp_test_fail_unless(ret, file, line, - "unable to find vrf master device for %u\n", + "unable to find vrf interface for %u\n", vrf_id); - _dp_test_intf_vrf_master_delete(vrf_name, vrf_id, tableid, file, line); + _dp_test_intf_vrf_if_delete(vrf_name, vrf_id, tableid, file, line); } /* diff --git a/tests/whole_dp/src/dp_test_netlink_state.h b/tests/whole_dp/src/dp_test_netlink_state_internal.h similarity index 75% rename from tests/whole_dp/src/dp_test_netlink_state.h rename to tests/whole_dp/src/dp_test_netlink_state_internal.h index 1ced83dc..8dfe6a33 100644 --- a/tests/whole_dp/src/dp_test_netlink_state.h +++ b/tests/whole_dp/src/dp_test_netlink_state_internal.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -11,8 +11,8 @@ * queried. */ -#ifndef _DP_TEST_NETLINK_STATE_H_ -#define _DP_TEST_NETLINK_STATE_H_ +#ifndef _DP_TEST_NETLINK_STATE_INTERNAL_H_ +#define _DP_TEST_NETLINK_STATE_INTERNAL_H_ #include #include @@ -21,10 +21,11 @@ #include #include -#include "vrf.h" -#include "bridge_port.h" +#include "vrf_internal.h" +#include "if/bridge/bridge_port.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test/dp_test_netlink_state.h" void _dp_test_netlink_set_interface_l2(const char *name, bool verify, const char *file, const char *func, @@ -55,16 +56,6 @@ void _dp_test_netlink_set_interface_mtu(const char *name, int mtu, bool verify, #define dp_test_netlink_set_interface_mtu(name, mtu) \ _dp_test_netlink_set_interface_mtu(name, mtu, true, \ __FILE__, __func__, __LINE__) -void _dp_test_netlink_set_interface_vrf(const char *name, uint32_t vrf_id, - bool verify, - const char *file, const char *func, - int line); -/* - * Bind interface to VRF (note sets MTU to default value). - */ -#define dp_test_netlink_set_interface_vrf(name, vrf_id) \ - _dp_test_netlink_set_interface_vrf(name, vrf_id, true, \ - __FILE__, __func__, __LINE__) void _dp_test_netlink_set_interface_admin_status( const char *name, bool admin_up, bool verify, @@ -176,14 +167,6 @@ _dp_test_netlink_set_mpls_forwarding(const char *ifname, bool enable, #define dp_test_netlink_set_mpls_forwarding(ifname, enable) \ _dp_test_netlink_set_mpls_forwarding(ifname, enable, __FILE__, \ __func__, __LINE__) - -void _dp_test_netlink_add_route(const char *route_string, bool verify, - bool incomplete, - const char *file, const char *func, - int line); -#define dp_test_netlink_add_route(route_string) \ - _dp_test_netlink_add_route(route_string, true, false, \ - __FILE__, __func__, __LINE__) #define dp_test_netlink_add_route_nv(route_string) \ _dp_test_netlink_add_route(route_string, false, false, \ __FILE__, __func__, __LINE__) @@ -210,6 +193,10 @@ _dp_test_netlink_add_route_fmt(bool verify, bool incomplete, _dp_test_netlink_add_route_fmt(verify, false, __FILE__, __func__, \ __LINE__, fmt_str, ##__VA_ARGS__) +#define dp_test_nl_add_route_incomplete_fmt(fmt_str, ...) \ + _dp_test_netlink_add_route_fmt(false, true, __FILE__, __func__, \ + __LINE__, fmt_str, ##__VA_ARGS__) + void _dp_test_netlink_replace_route(const char *route_string, bool verify, bool incomplete, const char *file, const char *func, @@ -217,24 +204,10 @@ void _dp_test_netlink_replace_route(const char *route_string, bool verify, #define dp_test_netlink_replace_route(route_string) \ _dp_test_netlink_replace_route(route_string, true, false, \ __FILE__, __func__, __LINE__) - -void -_dp_test_netlink_replace_route_fmt(bool verify, bool incomplete, - const char *file, const char *func, - int line, const char *format, ...) - __attribute__((__format__(printf, 6, 7))); - -#define dp_test_nl_replace_route_fmt(verify, fmt_str, ...) \ - _dp_test_netlink_replace_route_fmt(verify, __FILE__, __func__, \ - __LINE__, fmt_str, \ - ##__VA_ARGS__) - -void _dp_test_netlink_del_route(const char *route_str, bool verify, - const char *file, const char *func, - int line); -#define dp_test_netlink_del_route(route_str) \ - _dp_test_netlink_del_route(route_str, true, \ +#define dp_test_netlink_replace_route_nv(route_string) \ + _dp_test_netlink_replace_route(route_string, false, false, \ __FILE__, __func__, __LINE__) + #define dp_test_netlink_del_route_nv(route_str) \ _dp_test_netlink_del_route(route_str, false, \ __FILE__, __func__, __LINE__) @@ -249,6 +222,10 @@ _dp_test_netlink_del_route_fmt(bool verify, const char *file, _dp_test_netlink_del_route_fmt(verify, __FILE__, __func__, \ __LINE__, fmt_str, ##__VA_ARGS__) +#define dp_test_nl_del_route_incomplete_fmt(fmt_str, ...) \ + _dp_test_netlink_del_route_fmt(false, __FILE__, __func__, \ + __LINE__, fmt_str, ##__VA_ARGS__) + /* Verify route in a VRF */ void _dp_test_verify_add_route(const char *route_string, bool match_nh, bool all, const char *file, const char *func, @@ -262,7 +239,6 @@ void _dp_test_verify_add_route(const char *route_string, bool match_nh, _dp_test_verify_add_route(route_string, match_nh, true, \ __FILE__, __func__, __LINE__) - void _dp_test_verify_del_route(const char *route_string, bool match_nh, const char *file, const char *func, int line); @@ -270,45 +246,34 @@ void _dp_test_verify_del_route(const char *route_string, bool match_nh, _dp_test_verify_del_route(route_string, match_nh, \ __FILE__, __func__, __LINE__) -void _dp_test_netlink_add_neigh(const char *ifname, - const char *nh_addr_str, - const char *mac_str, - bool verify, - const char *file, const char *func, - int line); -#define dp_test_netlink_add_neigh(ifname, nh_addr_str, mac_str) \ - _dp_test_netlink_add_neigh(ifname, nh_addr_str, mac_str, \ - true, \ - __FILE__, __func__, __LINE__) +/* Add or delete a multicast route */ +void _dp_test_mroute_nl(uint16_t nlmsg_type, const char *src, + const char *sintf, const char *route_string, + const char *file, const char *func, int line); -void _dp_test_netlink_del_neigh(const char *ifname, - const char *nh_addr_str, - const char *mac_str, bool verify, - const char *file, const char *func, - int line); -#define dp_test_netlink_del_neigh(ifname, nh_addr_str, mac_str) \ - _dp_test_netlink_del_neigh(ifname, nh_addr_str, mac_str, \ - true, \ - __FILE__, __func__, __LINE__) +#define dp_test_mroute_nl(nlmsg_type, src, sintf, route_string) \ + _dp_test_mroute_nl(nlmsg_type, src, sintf, route_string, \ + __FILE__, __func__, __LINE__) -void _dp_test_verify_neigh(const char *ifname, - const char *ipaddr, - const char *mac_str, - bool negate_match, - const char *file, const char *func, - int line); -#define dp_test_verify_neigh(ifname, nh_addr_str, mac_str, negate_match) \ - _dp_test_verify_neigh(ifname, nh_addr_str, mac_str, \ - negate_match, \ - __FILE__, __func__, __LINE__) - -/* VRF creation / deletion macros */ -void _dp_test_netlink_add_vrf(uint32_t vrf_id, uint32_t expected_ref_cnt, - const char *file, int line); - -#define dp_test_netlink_add_vrf(vrf_id, expected_ref_cnt) \ - _dp_test_netlink_add_vrf(vrf_id, expected_ref_cnt, \ - __FILE__, __LINE__) +/* Verify an IPv4 or IPv6 multicast route */ +void +_dp_test_wait_for_mroute(const char *source, const char *group, + const char *input, const char *output, + bool gone, const char *file, const char *func, + int line); + +#define dp_test_wait_for_mroute(source, group, input, output, gone) \ + _dp_test_wait_for_mroute(source, group, input, output, gone, \ + __FILE__, __func__, __LINE__) + +/* Enable or disable multicast on an interface */ +void _dp_test_netlink_netconf_mcast(const char *ifname, int af, + bool enable, const char *file, + const char *func, int line); + +#define dp_test_netlink_netconf_mcast(ifname, af, enable) \ + _dp_test_netlink_netconf_mcast(ifname, af, enable, \ + __FILE__, __func__, __LINE__) void _dp_test_netlink_add_vrf_incmpl(uint32_t vrf_id, uint32_t expected_ref_cnt, @@ -321,13 +286,6 @@ void _dp_test_netlink_add_vrf_incmpl(uint32_t vrf_id, _dp_test_netlink_add_vrf(vrf_id, expected_ref_cnt, \ __FILE__, __LINE__) -void _dp_test_netlink_del_vrf(uint32_t vrf_id, uint32_t expected_ref_cnt, - const char *file, int line); - -#define dp_test_netlink_del_vrf(vrf_id, expected_ref_cnt) \ - _dp_test_netlink_del_vrf(vrf_id, expected_ref_cnt, \ - __FILE__, __LINE__) - void _dp_test_netlink_create_bridge(const char *br_name, bool verify, const char *file, const char *func, @@ -457,18 +415,6 @@ void _dp_test_netlink_del_lo(const char *name, bool verify, const char *file, #define dp_test_netlink_del_lo(name, verify) \ _dp_test_netlink_del_lo(name, verify, __FILE__, __func__, __LINE__) -void _dp_test_netlink_create_lord(const char *name, vrfid_t vrf_id, bool verify, - const char *file, const char *func, int line); - -void _dp_test_netlink_del_lord(const char *name, vrfid_t vrf_id, bool verify, - const char *file, const char *func, int line); -#define dp_test_netlink_create_lord(name, vrf_id, verify) \ - _dp_test_netlink_create_lord(name, vrf_id, verify, \ - __FILE__, __func__, __LINE__) -#define dp_test_netlink_del_lord(name, vrf_id, verify) \ - _dp_test_netlink_del_lord(name, vrf_id, verify, \ - __FILE__, __func__, __LINE__) - void _dp_test_netlink_create_vfp(const char *name, vrfid_t vrf_id, bool verify, const char *file, const char *func, int line); @@ -482,12 +428,12 @@ void _dp_test_netlink_del_vfp(const char *name, vrfid_t vrf_id, bool verify, __FILE__, __func__, __LINE__) void -_dp_test_netlink_create_vrf_master(const char *name, vrfid_t vrf_id, +_dp_test_netlink_create_vrf_if(const char *name, vrfid_t vrf_id, uint32_t tableid, bool verify, const char *file, const char *func, int line); void -_dp_test_netlink_del_vrf_master(const char *name, vrfid_t vrf_id, +_dp_test_netlink_del_vrf_if(const char *name, vrfid_t vrf_id, uint32_t tableid, bool verify, const char *file, const char *func, int line); @@ -522,14 +468,16 @@ void _dp_test_netlink_xfrm_policy(uint16_t nlmsg_type, uint8_t action, vrfid_t vrfid, bool passthrough, + uint32_t rule_no, const char *file, int line); #define dp_test_netlink_xfrm_policy(nlmsg_type, sel, dst, dst_family, dir, \ priority, reqid, mark_val, action, vrfid, \ - passthrough) \ + passthrough, rule_no) \ _dp_test_netlink_xfrm_policy(nlmsg_type, sel, dst, dst_family, dir, \ priority, reqid, mark_val, action, \ - vrfid, passthrough, __FILE__, __LINE__) + vrfid, passthrough, rule_no, __FILE__, \ + __LINE__) void _dp_test_netlink_xfrm_newsa(uint32_t spi, /* Network byte order */ const char *dst, @@ -578,48 +526,12 @@ void dp_test_netlink_xfrm_expire(uint32_t spi, /* Network byte order */ bool expire_hard, vrfid_t vrfid); -/** - * @brief Adds L3 address and adds route for the attached network - * - * @param [in] intf Name of the interface - * @param [in] addr IPv4 or IPv6 address string for the interface, of the - * form "addr/prefix" - */ -#define dp_test_nl_add_ip_addr_and_connected(intf, addr) \ - _dp_test_nl_add_ip_addr_and_connected(intf, addr, \ - VRF_DEFAULT_ID, __FILE__, \ - __func__, __LINE__) - -#define dp_test_nl_add_ip_addr_and_connected_vrf(intf, addr, vrf_id) \ - _dp_test_nl_add_ip_addr_and_connected(intf, addr, vrf_id, \ - __FILE__, __func__, \ - __LINE__) - -void -_dp_test_nl_add_ip_addr_and_connected(const char *intf, const char *addr, - vrfid_t vrf_id, const char *file, - const char *func, int line); - -/** - * @brief Remove interface address and attached network route - * - * @param [in] intf Name of the interface - * @param [in] addr IPv4 or IPv6 address string for the interface, of the - * form "addr/prefix" - */ -#define dp_test_nl_del_ip_addr_and_connected(intf, addr) \ - _dp_test_nl_del_ip_addr_and_connected(intf, addr, \ - VRF_DEFAULT_ID, __FILE__, \ - __func__, __LINE__) - -#define dp_test_nl_del_ip_addr_and_connected_vrf(intf, addr, vrf_id) \ - _dp_test_nl_del_ip_addr_and_connected(intf, addr, vrf_id, \ - __FILE__, __func__, \ - __LINE__) - -void -_dp_test_nl_del_ip_addr_and_connected(const char *intf, const char *addr, - vrfid_t vrf_id, const char *file, - const char *func, int line); +void dp_test_netlink_xfrm_getsa(uint32_t spi, /* Network byte order */ + const char *dst, + const char *src, + uint16_t family, + uint8_t mode, + uint32_t reqid, + vrfid_t vrfid); -#endif /* _DP_TEST_NETLINK_STATE_H_ */ +#endif /* _DP_TEST_NETLINK_STATE_INTERNAL_H_ */ diff --git a/tests/whole_dp/src/dp_test_npf_acl.c b/tests/whole_dp/src/dp_test_npf_acl.c new file mode 100644 index 00000000..dbe1049d --- /dev/null +++ b/tests/whole_dp/src/dp_test_npf_acl.c @@ -0,0 +1,1407 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include "ip_funcs.h" +#include "ip6_funcs.h" +#include "in_cksum.h" +#include "if_var.h" +#include "main.h" + +#include "dp_test.h" +#include "dp_test_controller.h" +#include "dp_test_console.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test/dp_test_cmd_check.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_lib_exp.h" +#include "dp_test_lib_pkt.h" +#include "dp_test_npf_lib.h" +#include "dp_test_npf_fw_lib.h" +#include "dp_test_npf_nat_lib.h" +#include "dp_test_npf_sess_lib.h" +#include "dp_test_ppp.h" +#include "dp_test_gre.h" + +DP_DECL_TEST_SUITE(npf_acl); + +static void acl_setup(void) +{ + /* Setup v4 interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "10.0.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "20.0.2.1/24"); + + dp_test_netlink_add_neigh("dp1T0", "10.0.1.2", "aa:bb:cc:dd:1:a1"); + dp_test_netlink_add_neigh("dp2T1", "20.0.2.2", "aa:bb:cc:dd:2:b1"); + dp_test_netlink_add_neigh("dp2T1", "20.0.2.3", "aa:bb:cc:dd:2:b3"); + + /* Setup v6 interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2002:2:2::2/64"); + + dp_test_netlink_add_neigh("dp1T0", "2001:1:1::2", + "aa:bb:cc:dd:1:a1"); + dp_test_netlink_add_neigh("dp2T1", "2002:2:2::1", + "aa:bb:cc:dd:2:b1"); + dp_test_netlink_add_neigh("dp2T1", "2002:2:2::3", + "aa:bb:cc:dd:2:b3"); + +} + +static void acl_teardown(void) +{ + dp_test_netlink_del_neigh("dp1T0", "10.0.1.2", "aa:bb:cc:dd:1:a1"); + dp_test_netlink_del_neigh("dp2T1", "20.0.2.2", "aa:bb:cc:dd:2:b1"); + dp_test_netlink_del_neigh("dp2T1", "20.0.2.3", "aa:bb:cc:dd:2:b3"); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "10.0.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "20.0.2.1/24"); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2002:2:2::2/64"); + + dp_test_netlink_del_neigh("dp1T0", "2001:1:1::2", + "aa:bb:cc:dd:1:a1"); + dp_test_netlink_del_neigh("dp2T1", "2002:2:2::1", + "aa:bb:cc:dd:2:b1"); + dp_test_netlink_del_neigh("dp2T1", "2002:2:2::3", + "aa:bb:cc:dd:2:b3"); + +} + +static void _dpt_icmp6(uint8_t icmp_type, const char *rx_intf, + const char *pre_smac, const char *saddr, uint16_t icmpid, + const char *daddr, const char *post_dmac, + const char *tx_intf, int status, + const char *file, const char *func, int line) +{ + struct dp_test_expected *test_exp; + struct rte_mbuf *test_pak, *exp_pak; + + struct dp_test_pkt_desc_t v6_pkt = { + .text = "ICMP IPv6", + .len = 20, + .ether_type = RTE_ETHER_TYPE_IPV6, + .l3_src = saddr, + .l2_src = pre_smac, + .l3_dst = daddr, + .l2_dst = post_dmac, + .proto = IPPROTO_ICMPV6, + .l4 = { + .icmp = { + .type = icmp_type, + .code = 0, + { + .dpt_icmp_id = icmpid, + .dpt_icmp_seq = 0, + } + } + }, + .rx_intf = rx_intf, + .tx_intf = tx_intf + }; + + test_pak = dp_test_v6_pkt_from_desc(&v6_pkt); + + exp_pak = dp_test_v6_pkt_from_desc(&v6_pkt); + test_exp = dp_test_exp_from_desc(exp_pak, &v6_pkt); + rte_pktmbuf_free(exp_pak); + + dp_test_exp_set_fwd_status(test_exp, status); + + _dp_test_pak_receive(test_pak, rx_intf, test_exp, + file, func, line); +} + +#define dpt_icmp6(_a, _b, _c, _d, _e, _f, _g, _h, _i) \ + _dpt_icmp6(_a, _b, _c, _d, _e, _f, _g, _h, _i, \ + __FILE__, __func__, __LINE__) + +/* + * acl1 - IPv4 acl on input + */ +DP_DECL_TEST_CASE(npf_acl, acl1, acl_setup, acl_teardown); +DP_START_TEST(acl1, test) +{ + dp_test_npf_cmd("npf-ut add acl:v4test 0 family=inet", false); + + /* Drop ICMP */ + dp_test_npf_cmd("npf-ut add acl:v4test 10 " + "src-addr=10.0.1.2 " + "dst-addr=20.0.2.2 " + "proto-base=1 " + "action=drop", false); + + /* Drop UDP */ + dp_test_npf_cmd("npf-ut add acl:v4test 20 " + "src-addr=10.0.1.2 src-port=10000 " + "dst-addr=20.0.2.2 dst-port=20000 " + "proto-base=17 " + "action=drop", false); + + /* Drop TCP */ + dp_test_npf_cmd("npf-ut add acl:v4test 30 " + "src-addr=10.0.1.2 src-port=32878 " + "dst-addr=20.0.2.2 dst-port=80 " + "proto-base=6 " + "action=drop", false); + + dp_test_npf_cmd("npf-ut attach interface:dpT10 acl-in acl:v4test", + false); + + dp_test_npf_cmd("npf-ut commit", false); + + /* ICMP, no acl match */ + dpt_icmp(ICMP_ECHO, + "dp1T0", "aa:bb:cc:dd:1:a1", + "10.0.1.3", 1234, "20.0.2.2", + "10.0.1.3", 1234, "20.0.2.2", + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* ICMP, acl match */ + dpt_icmp(ICMP_ECHO, + "dp1T0", "aa:bb:cc:dd:1:a1", + "10.0.1.2", 10000, "20.0.2.2", + "10.0.1.2", 10000, "20.0.2.2", + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_DROPPED); + + /* ICMP6 */ + dpt_icmp6(ICMP6_ECHO_REQUEST, + "dp1T0", "aa:bb:cc:dd:1:a1", + "2001:1:1::2", 10000, "2002:2:2::1", + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* UDP, no acl match */ + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a1", + "10.0.1.2", 10000, "20.0.2.3", 30000, + "10.0.1.2", 10000, "20.0.2.3", 30000, + "aa:bb:cc:dd:2:b3", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* UDP, acl match */ + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a1", + "10.0.1.2", 10000, "20.0.2.2", 20000, + "10.0.1.2", 10000, "20.0.2.2", 20000, + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_DROPPED); + + /* TCP, no acl match */ + dpt_tcp(TH_SYN, "dp1T0", "aa:bb:cc:dd:1:a1", + "10.0.1.2", 32878, "20.0.2.2", 1024, + "10.0.1.2", 32878, "20.0.2.2", 1024, + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* TCP, acl match */ + dpt_tcp(TH_SYN, "dp1T0", "aa:bb:cc:dd:1:a1", + "10.0.1.2", 32878, "20.0.2.2", 80, + "10.0.1.2", 32878, "20.0.2.2", 80, + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_DROPPED); + + /***************************************************************** + * Unconfig + */ + dp_test_npf_cmd("npf-ut detach interface:dpT10 acl-in acl:v4test", + false); + dp_test_npf_cmd("npf-ut delete acl:v4test", false); + dp_test_npf_cmd("npf-ut commit", false); + +} DP_END_TEST; + +/* + * acl2 - v6 drop acl on input + */ +DP_DECL_TEST_CASE(npf_acl, acl2, acl_setup, acl_teardown); +DP_START_TEST(acl2, test) +{ + dp_test_npf_cmd("npf-ut add acl:v6test 0 family=inet6", false); + + /* Drop ICMP */ + dp_test_npf_cmd("npf-ut add acl:v6test 10 " + "src-addr=2001:1:1::2 " + "dst-addr=2002:2:2::1 " + "proto-base=58 " + "action=drop", false); + + /* Drop UDP */ + dp_test_npf_cmd("npf-ut add acl:v6test 20 " + "src-addr=2001:1:1::2 " + "dst-addr=2002:2:2::1 " + "proto-base=17 " + "action=drop", false); + + /* Drop TCP */ + dp_test_npf_cmd("npf-ut add acl:v6test 30 " + "src-addr=2001:1:1::2 " + "dst-addr=2002:2:2::1 dst-port=80 " + "proto-base=6 " + "action=drop", false); + + dp_test_npf_cmd("npf-ut attach interface:dpT10 acl-in acl:v6test", + false); + + dp_test_npf_cmd("npf-ut commit", false); + + dpt_icmp(ICMP_ECHO, + "dp1T0", "aa:bb:cc:dd:1:a1", + "10.0.1.2", 10000, "20.0.2.2", + "10.0.1.2", 10000, "20.0.2.2", + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* ICMP, no acl match */ + dpt_icmp6(ICMP6_ECHO_REQUEST, + "dp1T0", "aa:bb:cc:dd:1:a1", + "2001:1:1::3", 10000, "2002:2:2::1", + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* ICMP, acl match */ + dpt_icmp6(ICMP6_ECHO_REQUEST, + "dp1T0", "aa:bb:cc:dd:1:a1", + "2001:1:1::2", 10000, "2002:2:2::1", + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_DROPPED); + + /* UDP, no acl match */ + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a1", + "2001:1:1::3", 4321, "2002:2:2::1", 1024, + "2001:1:1::3", 4321, "2002:2:2::1", 1024, + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* UDP, acl match */ + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a1", + "2001:1:1::2", 1234, "2002:2:2::1", 1024, + "2001:1:1::2", 1234, "2002:2:2::1", 1024, + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_DROPPED); + + /* TCP, no acl match */ + dpt_tcp(TH_SYN, "dp1T0", "aa:bb:cc:dd:1:a1", + "2001:1:1::3", 30123, "2002:2:2::1", 80, + "2001:1:1::3", 30123, "2002:2:2::1", 80, + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* TCP, acl match */ + dpt_tcp(TH_SYN, "dp1T0", "aa:bb:cc:dd:1:a1", + "2001:1:1::2", 2121, "2002:2:2::1", 80, + "2001:1:1::2", 2121, "2002:2:2::1", 80, + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_DROPPED); + + /***************************************************************** + * Unconfig + */ + dp_test_npf_cmd("npf-ut detach interface:dpT10 acl-in acl:v6test", + false); + dp_test_npf_cmd("npf-ut delete acl:v6test", false); + dp_test_npf_cmd("npf-ut commit", false); + +} DP_END_TEST; + + +/* + * acl3 - IPv4 acl on output + */ +DP_DECL_TEST_CASE(npf_acl, acl3, acl_setup, acl_teardown); +DP_START_TEST(acl3, test) +{ + dp_test_npf_cmd("npf-ut add acl:v4test 0 family=inet", false); + + /* Drop UDP */ + dp_test_npf_cmd("npf-ut add acl:v4test 20 " + "src-addr=10.0.1.2 " + "dst-addr=20.0.2.2 " + "proto-base=17 " + "action=drop", false); + + dp_test_npf_cmd("npf-ut attach interface:dpT21 acl-out acl:v4test", + false); + + dp_test_npf_cmd("npf-ut commit", false); + + /* UDP, no acl match */ + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a1", + "10.0.1.3", 10000, "20.0.2.2", 30000, + "10.0.1.3", 10000, "20.0.2.2", 30000, + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* UDP, acl match */ + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a1", + "10.0.1.2", 10000, "20.0.2.2", 20000, + "10.0.1.2", 10000, "20.0.2.2", 20000, + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_DROPPED); + + /***************************************************************** + * Unconfig + */ + dp_test_npf_cmd("npf-ut detach interface:dpT21 acl-out acl:v4test", + false); + dp_test_npf_cmd("npf-ut delete acl:v4test", false); + dp_test_npf_cmd("npf-ut commit", false); + +} DP_END_TEST; + +/* + * acl4 - v6 drop acl on output + */ +DP_DECL_TEST_CASE(npf_acl, acl4, acl_setup, acl_teardown); +DP_START_TEST(acl4, test) +{ + dp_test_npf_cmd("npf-ut add acl:v6test 0 family=inet6", false); + + /* Drop TCP */ + dp_test_npf_cmd("npf-ut add acl:v6test 30 " + "src-addr=2001:1:1::2 " + "dst-addr=2002:2:2::1 dst-port=80 " + "proto-base=6 " + "action=drop", false); + + dp_test_npf_cmd("npf-ut attach interface:dpT21 acl-out acl:v6test", + false); + + dp_test_npf_cmd("npf-ut commit", false); + + /* TCP, no acl match */ + dpt_tcp(TH_SYN, "dp1T0", "aa:bb:cc:dd:1:a1", + "2001:1:1::3", 30123, "2002:2:2::1", 80, + "2001:1:1::3", 30123, "2002:2:2::1", 80, + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* TCP, acl match */ + dpt_tcp(TH_SYN, "dp1T0", "aa:bb:cc:dd:1:a1", + "2001:1:1::2", 2121, "2002:2:2::1", 80, + "2001:1:1::2", 2121, "2002:2:2::1", 80, + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_DROPPED); + + /***************************************************************** + * Unconfig + */ + dp_test_npf_cmd("npf-ut detach interface:dpT21 acl-out acl:v6test", + false); + dp_test_npf_cmd("npf-ut delete acl:v6test", false); + dp_test_npf_cmd("npf-ut commit", false); + +} DP_END_TEST; + +/* + * + */ +static void dpt_gre_spath(const char *gre_name, + const char *gre_local, const char *gre_remote, + const char *tx_intf, const char *nh_mac_str, + int status) +{ + struct dp_test_expected *exp; + struct rte_mbuf *test_pak, *payload_pak; + int len = 64; + int gre_pl_len; + void *gre_payload; + + payload_pak = dp_test_create_ipv4_pak("10.73.0.0", "10.73.2.0", + 1, &len); + dp_test_pktmbuf_eth_init(payload_pak, nh_mac_str, + dp_test_intf_name2mac_str(tx_intf), + RTE_ETHER_TYPE_IPV4); + + gre_pl_len = rte_pktmbuf_data_len(payload_pak); + + test_pak = dp_test_create_gre_ipv4_pak( + gre_local, gre_remote, 1, &gre_pl_len, ETH_P_TEB, 0, 0, + &gre_payload); + memcpy(gre_payload, rte_pktmbuf_mtod(payload_pak, + const struct rte_ether_hdr *), gre_pl_len); + dp_test_set_pak_ip_field(iphdr(test_pak), DP_TEST_SET_DF, 1); + dp_test_pktmbuf_eth_init(test_pak, + nh_mac_str, + dp_test_intf_name2mac_str(tx_intf), + RTE_ETHER_TYPE_IPV4); + + exp = dp_test_exp_create(test_pak); + rte_pktmbuf_free(test_pak); + dp_test_exp_set_oif_name(exp, tx_intf); + dp_test_exp_set_fwd_status(exp, status); + dp_test_send_spath_pkt(payload_pak, gre_name, exp); +} + +/* + * acl5. Tests ACL egress on the ip_lookup_and_originate output path using a + * GRE tunneled pkt. + */ +DP_DECL_TEST_CASE(npf_acl, acl5, NULL, NULL); +DP_START_TEST(acl5, test) +{ + const char *nh_mac_str2, *nh_mac_str3; + + dp_test_npf_cmd("npf-ut add acl:v4test 0 family=inet", false); + dp_test_npf_cmd("npf-ut add acl:v4test 10 " + "dst-addr=1.1.2.3 action=drop", false); + dp_test_npf_cmd("npf-ut attach interface:dpT11 acl-out acl:v4test", + false); + dp_test_npf_cmd("npf-ut commit", false); + + nh_mac_str2 = "aa:bb:cc:dd:ee:f2"; + nh_mac_str3 = "aa:bb:cc:dd:ee:f3"; + + dp_test_nl_add_ip_addr_and_connected("dp1T1", "1.1.2.1/24"); + dp_test_netlink_add_neigh("dp1T1", "1.1.2.2", nh_mac_str2); + dp_test_netlink_add_neigh("dp1T1", "1.1.2.3", nh_mac_str3); + + /* + * No acl match + */ + dp_test_intf_gre_l2_create("tun1", "1.1.2.1", "1.1.2.2", 0); + dpt_gre_spath("tun1", "1.1.2.1", "1.1.2.2", "dp1T1", nh_mac_str2, + DP_TEST_FWD_FORWARDED); + dp_test_intf_gre_l2_delete("tun1", "1.1.2.1", "1.1.2.2", 0); + + /* + * acl match + */ + dp_test_intf_gre_l2_create("tun2", "1.1.2.1", "1.1.2.3", 0); + dpt_gre_spath("tun2", "1.1.2.1", "1.1.2.3", "dp1T1", nh_mac_str3, + DP_TEST_FWD_DROPPED); + dp_test_intf_gre_l2_delete("tun2", "1.1.2.1", "1.1.2.3", 0); + + + /* + * Clean up + */ + dp_test_npf_cmd("npf-ut detach interface:dpT11 acl-out acl:v4test", + false); + dp_test_npf_cmd("npf-ut delete acl:v4test", false); + dp_test_npf_cmd("npf-ut commit", false); + + dp_test_netlink_del_neigh("dp1T1", "1.1.2.2", nh_mac_str2); + dp_test_netlink_del_neigh("dp1T1", "1.1.2.3", nh_mac_str3); + dp_test_nl_del_ip_addr_and_connected("dp1T1", "1.1.2.1/24"); + +} DP_END_TEST; + +/* + * acl6. Tests ACL egress on IPv4 spath output + */ +DP_DECL_TEST_CASE(npf_acl, acl6, acl_setup, acl_teardown); +DP_START_TEST(acl6, test) +{ + dp_test_npf_cmd("npf-ut add acl:v4test 0 family=inet", false); + + /* Drop UDP */ + dp_test_npf_cmd("npf-ut add acl:v4test 20 " + "dst-addr=20.0.2.2 " + "proto-base=17 " + "action=drop", false); + + dp_test_npf_cmd("npf-ut attach interface:dpT21 acl-out acl:v4test", + false); + + dp_test_npf_cmd("npf-ut commit", false); + + /* UDP, no acl match */ + dpt_udp(NULL, "aa:bb:cc:dd:1:a1", + "20.0.2.1", 10000, "20.0.2.3", 30000, + "20.0.2.1", 10000, "20.0.2.3", 30000, + "aa:bb:cc:dd:2:b3", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* UDP, acl match */ + dpt_udp(NULL, "aa:bb:cc:dd:1:a1", + "20.0.2.1", 10000, "20.0.2.2", 20000, + "20.0.2.1", 10000, "20.0.2.2", 20000, + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_DROPPED); + + /***************************************************************** + * Unconfig + */ + dp_test_npf_cmd("npf-ut detach interface:dpT21 acl-out acl:v4test", + false); + dp_test_npf_cmd("npf-ut delete acl:v4test", false); + dp_test_npf_cmd("npf-ut commit", false); + +} DP_END_TEST; + +/* + * acl7. Tests ACL egress on IPv6 spath output + */ +DP_DECL_TEST_CASE(npf_acl, acl7, acl_setup, acl_teardown); +DP_START_TEST(acl7, test) +{ + + dp_test_npf_cmd("npf-ut add acl:v6test 0 family=inet6", false); + + /* Drop UDP */ + dp_test_npf_cmd("npf-ut add acl:v6test 30 " + "dst-addr=2002:2:2::1 " + "proto-base=17 " + "action=drop", false); + + dp_test_npf_cmd("npf-ut attach interface:dpT21 acl-out acl:v6test", + false); + + dp_test_npf_cmd("npf-ut commit", false); + + /* UDP, no acl match */ + dpt_udp(NULL, "aa:bb:cc:dd:1:a1", + "2002:2:2::2", 4321, "2002:2:2::3", 1024, + "2002:2:2::2", 4321, "2002:2:2::3", 1024, + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* UDP, acl match */ + dpt_udp(NULL, "aa:bb:cc:dd:1:a1", + "2002:2:2::2", 1234, "2002:2:2::1", 1024, + "2002:2:2::2", 1234, "2002:2:2::1", 1024, + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_DROPPED); + + /***************************************************************** + * Unconfig + */ + dp_test_npf_cmd("npf-ut detach interface:dpT21 acl-out acl:v6test", + false); + dp_test_npf_cmd("npf-ut delete acl:v6test", false); + dp_test_npf_cmd("npf-ut commit", false); + +} DP_END_TEST; + +/* + * acl8 - IPv4 drop acl on output, fragmented packet + */ +DP_DECL_TEST_CASE(npf_acl, acl8, acl_setup, acl_teardown); +DP_START_TEST(acl8, test) +{ + dp_test_npf_cmd("npf-ut add acl:v4test 0 family=inet", false); + + /* Drop UDP */ + dp_test_npf_cmd("npf-ut add acl:v4test 10 " + "fragment=y " + "src-addr=10.0.1.2 " + "dst-addr=20.0.2.2 " + "proto-base=17 " + "action=drop", false); + + dp_test_npf_cmd("npf-ut attach interface:dpT21 acl-out acl:v4test", + false); + + dp_test_npf_cmd("npf-ut commit", false); + + struct dp_test_expected *exp; + struct rte_mbuf *test_pak; + int len = 800; + + struct dp_test_pkt_desc_t pkt_UDP = { + .text = "IPv4 UDP", + .len = len, + .ether_type = RTE_ETHER_TYPE_IPV4, + .l3_src = "10.0.1.3", + .l2_src = "aa:bb:cc:dd:1:a1", + .l3_dst = "20.0.2.2", + .l2_dst = "aa:bb:cc:dd:2:b1", + .proto = IPPROTO_UDP, + .l4 = { + .udp = { + .sport = 10000, + .dport = 30000 + } + }, + .rx_intf = "dp1T0", + .tx_intf = "dp2T1" + }; + + /* + * 1st fragmented packet. Source address does not match drop ACL so + * packet should be forwarded. + * + * First fragment is last in the array. Array indices to get an + * in-order packet are: 2, 0, 1 + */ + struct rte_mbuf *frag_pkts[3] = { 0 }; + uint16_t frag_sizes[3] = { 400, 400, 8 }; + int rc; + + pkt_UDP.l3_src = "10.0.1.3"; + test_pak = dp_test_v4_pkt_from_desc(&pkt_UDP); + + rc = dp_test_ipv4_fragment_packet(test_pak, frag_pkts, + ARRAY_SIZE(frag_pkts), + frag_sizes, 0); + rte_pktmbuf_free(test_pak); + + dp_test_fail_unless(rc == ARRAY_SIZE(frag_pkts), + "dp_test_ipv4_fragment_packet failed: %d", rc); + + /* frag_pkts[2] */ + exp = dp_test_exp_create_m(NULL, 1); + dp_test_exp_set_pak_m(exp, 0, frag_pkts[2]); + dp_test_exp_set_oif_name_m(exp, 0, "dp2T1"); + dp_test_pak_receive(frag_pkts[2], "dp1T0", exp); + + /* frag_pkts[0] */ + exp = dp_test_exp_create_m(NULL, 1); + dp_test_exp_set_pak_m(exp, 0, frag_pkts[0]); + dp_test_exp_set_oif_name_m(exp, 0, "dp2T1"); + dp_test_pak_receive(frag_pkts[0], "dp1T0", exp); + + /* frag_pkts[1] */ + exp = dp_test_exp_create_m(NULL, 1); + dp_test_exp_set_pak_m(exp, 0, frag_pkts[1]); + dp_test_exp_set_oif_name_m(exp, 0, "dp2T1"); + dp_test_pak_receive(frag_pkts[1], "dp1T0", exp); + + /* + * 2nd fragmented packet. Source address does match drop ACL so + * packet should be dropped. + */ + pkt_UDP.l3_src = "10.0.1.2"; + test_pak = dp_test_v4_pkt_from_desc(&pkt_UDP); + + rc = dp_test_ipv4_fragment_packet(test_pak, frag_pkts, + ARRAY_SIZE(frag_pkts), + frag_sizes, 0); + rte_pktmbuf_free(test_pak); + + dp_test_fail_unless(rc == ARRAY_SIZE(frag_pkts), + "dp_test_ipv4_fragment_packet failed: %d", rc); + + /* frag_pkts[2] */ + exp = dp_test_exp_create_m(NULL, 1); + dp_test_exp_set_pak_m(exp, 0, frag_pkts[2]); + dp_test_exp_set_oif_name_m(exp, 0, "dp2T1"); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); + dp_test_pak_receive(frag_pkts[2], "dp1T0", exp); + + /* frag_pkts[0] */ + exp = dp_test_exp_create_m(NULL, 1); + dp_test_exp_set_pak_m(exp, 0, frag_pkts[0]); + dp_test_exp_set_oif_name_m(exp, 0, "dp2T1"); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); + dp_test_pak_receive(frag_pkts[0], "dp1T0", exp); + + /* frag_pkts[1] */ + exp = dp_test_exp_create_m(NULL, 1); + dp_test_exp_set_pak_m(exp, 0, frag_pkts[1]); + dp_test_exp_set_oif_name_m(exp, 0, "dp2T1"); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); + dp_test_pak_receive(frag_pkts[1], "dp1T0", exp); + + /***************************************************************** + * Unconfig + */ + dp_test_npf_cmd("npf-ut detach interface:dpT21 acl-out acl:v4test", + false); + dp_test_npf_cmd("npf-ut delete acl:v4test", false); + dp_test_npf_cmd("npf-ut commit", false); + +} DP_END_TEST; + +/* + * acl9 - v6 drop acl on output, fragmented packet + */ +DP_DECL_TEST_CASE(npf_acl, acl9, acl_setup, acl_teardown); +DP_START_TEST(acl9, test) +{ + dp_test_npf_cmd("npf-ut add acl:v6test 0 family=inet6", false); + + /* Drop TCP */ + dp_test_npf_cmd("npf-ut add acl:v6test 30 " + "fragment=y " + "src-addr=2001:1:1::2 " + "dst-addr=2002:2:2::1 " + "proto-base=44 " + "proto-final=17 " + "action=drop", false); + + dp_test_npf_cmd("npf-ut attach interface:dpT21 acl-out acl:v6test", + false); + + dp_test_npf_cmd("npf-ut commit", false); + + struct dp_test_expected *exp; + struct rte_mbuf *test_pak; + int len = 800; + + struct dp_test_pkt_desc_t pkt_UDP = { + .text = "IPv6 UDP", + .len = len, + .ether_type = RTE_ETHER_TYPE_IPV6, + .l3_src = "2001:1:1::3", + .l2_src = "aa:bb:cc:dd:1:a1", + .l3_dst = "2002:2:2::1", + .l2_dst = "aa:bb:cc:dd:2:b1", + .proto = IPPROTO_UDP, + .l4 = { + .udp = { + .sport = 30123, + .dport = 22143 + } + }, + .rx_intf = "dp1T0", + .tx_intf = "dp2T1" + }; + + /* + * 1st fragmented packet. Source address does not match drop ACL so + * packet should be forwarded. + * + * First fragment is last in the array. Array indices to get an + * in-order packet are: 2, 0, 1 + */ + struct rte_mbuf *frag_pkts[3] = { 0 }; + uint16_t frag_sizes[3] = { 400, 400, 8 }; + int rc; + + pkt_UDP.l3_src = "2001:1:1::3"; + test_pak = dp_test_v6_pkt_from_desc(&pkt_UDP); + + rc = dp_test_ipv6_fragment_packet(test_pak, frag_pkts, + ARRAY_SIZE(frag_pkts), + frag_sizes, 0); + rte_pktmbuf_free(test_pak); + + dp_test_fail_unless(rc == ARRAY_SIZE(frag_pkts), + "dp_test_ipv6_fragment_packet failed: %d", rc); + + /* frag_pkts[2] */ + exp = dp_test_exp_create_m(NULL, 1); + dp_test_exp_set_pak_m(exp, 0, frag_pkts[2]); + dp_test_exp_set_oif_name_m(exp, 0, "dp2T1"); + dp_test_pak_receive(frag_pkts[2], "dp1T0", exp); + + /* frag_pkts[0] */ + exp = dp_test_exp_create_m(NULL, 1); + dp_test_exp_set_pak_m(exp, 0, frag_pkts[0]); + dp_test_exp_set_oif_name_m(exp, 0, "dp2T1"); + dp_test_pak_receive(frag_pkts[0], "dp1T0", exp); + + /* frag_pkts[1] */ + exp = dp_test_exp_create_m(NULL, 1); + dp_test_exp_set_pak_m(exp, 0, frag_pkts[1]); + dp_test_exp_set_oif_name_m(exp, 0, "dp2T1"); + dp_test_pak_receive(frag_pkts[1], "dp1T0", exp); + + /* + * 2nd fragmented packet. Source address does match drop ACL so + * packet should be dropped. + */ + pkt_UDP.l3_src = "2001:1:1::2"; + test_pak = dp_test_v6_pkt_from_desc(&pkt_UDP); + + rc = dp_test_ipv6_fragment_packet(test_pak, frag_pkts, + ARRAY_SIZE(frag_pkts), + frag_sizes, 0); + rte_pktmbuf_free(test_pak); + + dp_test_fail_unless(rc == ARRAY_SIZE(frag_pkts), + "dp_test_ipv6_fragment_packet failed: %d", rc); + + /* frag_pkts[2] */ + exp = dp_test_exp_create_m(NULL, 1); + dp_test_exp_set_pak_m(exp, 0, frag_pkts[2]); + dp_test_exp_set_oif_name_m(exp, 0, "dp2T1"); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); + dp_test_pak_receive(frag_pkts[2], "dp1T0", exp); + + /* frag_pkts[0] */ + exp = dp_test_exp_create_m(NULL, 1); + dp_test_exp_set_pak_m(exp, 0, frag_pkts[0]); + dp_test_exp_set_oif_name_m(exp, 0, "dp2T1"); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); + dp_test_pak_receive(frag_pkts[0], "dp1T0", exp); + + /* frag_pkts[1] */ + exp = dp_test_exp_create_m(NULL, 1); + dp_test_exp_set_pak_m(exp, 0, frag_pkts[1]); + dp_test_exp_set_oif_name_m(exp, 0, "dp2T1"); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); + dp_test_pak_receive(frag_pkts[1], "dp1T0", exp); + + /***************************************************************** + * Unconfig + */ + dp_test_npf_cmd("npf-ut detach interface:dpT21 acl-out acl:v6test", + false); + dp_test_npf_cmd("npf-ut delete acl:v6test", false); + dp_test_npf_cmd("npf-ut commit", false); + +} DP_END_TEST; + + +/* + * acl10 - IPv4 egress ACL on a pppoe interface. + * + * Two packets are sent over a pppoe session. First one is permitted by the + * ACL, and second one is blocked. + * + * (based on dp_test_ppp.c, TEST(ppp_traffic, ppp_traffic_1)) + */ + +static struct rte_mbuf * +npf_acl_pppoe_pkt(int len, const char *d_mac, const char *s_mac, + uint16_t ether_type) +{ + struct rte_mbuf *test_pak; + + test_pak = dp_test_create_ipv4_pak("1.1.1.1", "10.73.2.1", + 1, &len); + /* Ingress dp1T0 */ + (void)dp_test_pktmbuf_eth_init(test_pak, d_mac, s_mac, ether_type); + + return test_pak; +} + +static struct dp_test_expected * +npf_acl_pppoe_exp(int len, struct rte_mbuf *test_pak, const char *oif, + const char *dst_mac, uint16_t session_id) +{ + struct dp_test_expected *exp; + struct pppoe_packet *ppp_hdr; + + /* Create pak we expect to receive on the tx ring */ + exp = dp_test_exp_create(test_pak); + + dp_test_exp_set_oif_name(exp, oif); + dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); + + ppp_hdr = dp_test_ipv4_pktmbuf_ppp_prepend( + dp_test_exp_get_pak(exp), + dst_mac, + dp_test_intf_name2mac_str(oif), + len + 20 + 8, + session_id); + dp_test_fail_unless(ppp_hdr, "Could not prepend ppp header"); + + return exp; +} + +DP_DECL_TEST_CASE(npf_acl, acl10, NULL, NULL); +DP_START_TEST(acl10, test) +{ + struct dp_test_expected *exp; + struct rte_mbuf *test_pak; + int len = 22; + const char *dst_mac = "aa:bb:cc:dd:ee:ff"; + uint16_t session_id = 3; + + dp_test_nl_add_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + + dp_test_intf_ppp_create("pppoe0", VRF_DEFAULT_ID); + dp_test_create_pppoe_session("pppoe0", "dp2T1", session_id, + dp_test_intf_name2mac_str("dp2T1"), + dst_mac); + + dp_test_netlink_add_route("10.73.2.0/24 nh int:pppoe0"); + + /* + * Add egress ACL to *allow* traffic on pppoe interface + */ + dp_test_npf_cmd("npf-ut add acl:v4test 10 " + "src-addr=1.1.1.1 " + "proto-base=17 " + "action=accept", false); + dp_test_npf_cmd("npf-ut attach interface:pppoe0 acl-out acl:v4test", + false); + dp_test_npf_cmd("npf-ut commit", false); + + /* Ingress dp1T0 */ + test_pak = npf_acl_pppoe_pkt(len, dp_test_intf_name2mac_str("dp1T0"), + DP_TEST_INTF_DEF_SRC_MAC, + RTE_ETHER_TYPE_IPV4); + + /* Create pak we expect to receive on the tx ring */ + exp = npf_acl_pppoe_exp(len, test_pak, "dp2T1", dst_mac, session_id); + + dp_test_pak_receive(test_pak, "dp1T0", exp); + + /* + * Change egress ACL to *block* traffic on pppoe interface + */ + dp_test_npf_cmd("npf-ut add acl:v4test 10 " + "src-addr=1.1.1.1 " + "proto-base=17 " + "action=drop", false); + dp_test_npf_cmd("npf-ut commit", false); + + test_pak = npf_acl_pppoe_pkt(len, dp_test_intf_name2mac_str("dp1T0"), + DP_TEST_INTF_DEF_SRC_MAC, + RTE_ETHER_TYPE_IPV4); + + /* Create pak we do *not* expect to receive on the tx ring */ + exp = npf_acl_pppoe_exp(len, test_pak, "dp2T1", dst_mac, session_id); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); + + dp_test_pak_receive(test_pak, "dp1T0", exp); + + /* Cleanup */ + dp_test_npf_cmd("npf-ut detach interface:pppoe0 acl-out acl:v4test", + false); + dp_test_npf_cmd("npf-ut delete acl:v4test", false); + dp_test_npf_cmd("npf-ut commit", false); + + dp_test_netlink_del_route("10.73.2.0/24 nh int:pppoe0"); + dp_test_intf_ppp_delete("pppoe0", VRF_DEFAULT_ID); + dp_test_nl_del_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + +} DP_END_TEST; + +/* + * acl11 - IPv4 egress ACL on a bridge interface. + */ +DP_DECL_TEST_CASE(npf_acl, acl11, NULL, NULL); +DP_START_TEST(acl11, test) +{ + struct rte_mbuf *test_pak; + struct dp_test_expected *exp; + int len = 20; + + dp_test_intf_bridge_create("br1"); + dp_test_intf_bridge_add_port("br1", "dp2T1"); + + /* Setup v4 interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "10.0.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("br1", "20.0.2.1/24"); + + dp_test_netlink_add_neigh("dp1T0", "10.0.1.2", "aa:bb:cc:dd:1:a1"); + dp_test_netlink_add_neigh("br1", "20.0.2.2", "aa:bb:cc:dd:2:b1"); + + /* + * Add egress ACL to br1 interface + */ + dp_test_npf_cmd("npf-ut add acl:v4test 10 " + "src-addr=10.0.1.2 " + "proto-base=17 " + "action=drop", false); + dp_test_npf_cmd("npf-ut attach interface:br1 acl-out acl:v4test", + false); + dp_test_npf_cmd("npf-ut commit", false); + + /* + * Packet #1. Does not match 'drop' ACL, and is forwarded + */ + test_pak = dp_test_create_ipv4_pak("10.0.1.3", "20.0.2.2", + 1, &len); + dp_test_pktmbuf_eth_init(test_pak, + dp_test_intf_name2mac_str("dp1T0"), + "aa:bb:cc:dd:1:a1", RTE_ETHER_TYPE_IPV4); + + exp = dp_test_exp_create(test_pak); + dp_test_exp_set_oif_name(exp, "dp2T1"); + dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); + + dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), + "aa:bb:cc:dd:2:b1", + dp_test_intf_name2mac_str("br1"), + RTE_ETHER_TYPE_IPV4); + + dp_test_pak_receive(test_pak, "dp1T0", exp); + + + /* + * Packet #2. Matches 'drop' ACL, and is dropped + */ + test_pak = dp_test_create_ipv4_pak("10.0.1.2", "20.0.2.2", + 1, &len); + dp_test_pktmbuf_eth_init(test_pak, + dp_test_intf_name2mac_str("dp1T0"), + "aa:bb:cc:dd:1:a1", RTE_ETHER_TYPE_IPV4); + + exp = dp_test_exp_create(test_pak); + dp_test_exp_set_oif_name(exp, "dp2T1"); + dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); + + dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), + "aa:bb:cc:dd:2:b1", + dp_test_intf_name2mac_str("br1"), + RTE_ETHER_TYPE_IPV4); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); + + dp_test_pak_receive(test_pak, "dp1T0", exp); + + + /* Cleanup */ + dp_test_npf_cmd("npf-ut detach interface:br1 acl-out acl:v4test", + false); + dp_test_npf_cmd("npf-ut delete acl:v4test", false); + dp_test_npf_cmd("npf-ut commit", false); + + dp_test_netlink_del_neigh("dp1T0", "10.0.1.2", "aa:bb:cc:dd:1:a1"); + dp_test_netlink_del_neigh("br1", "20.0.2.2", "aa:bb:cc:dd:2:b1"); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "10.0.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("br1", "20.0.2.1/24"); + + dp_test_intf_bridge_remove_port("br1", "dp2T1"); + dp_test_intf_bridge_del("br1"); + +} DP_END_TEST; + +/* + * IPv4 multicast forwarding and egress ACL. + * + * Based on test case ip_mfwd_4. An egress ACL drops the packet on one of the + * two output interfaces. + */ +DP_DECL_TEST_CASE(npf_acl, acl12, NULL, NULL); +DP_START_TEST(acl12, test) +{ + const char *grp_dest = "224.0.1.1"; /* Not link local */ + const char *grp_mac = "01:00:5e:00:01:01"; + struct dp_test_expected *exp; + struct rte_mbuf *test_pak; + int len = 22; + + /* Set up the interface addresses */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_netlink_netconf_mcast("dp1T0", AF_INET, true); + + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + dp_test_netlink_netconf_mcast("dp2T1", AF_INET, true); + + dp_test_nl_add_ip_addr_and_connected("dp2T2", "3.3.3.3/24"); + dp_test_netlink_netconf_mcast("dp2T2", AF_INET, true); + + dp_test_mroute_nl(RTM_NEWROUTE, "10.73.1.1", "dp1T0", + "224.0.1.1/32 nh int:dp2T1 nh int:dp2T2"); + + dp_test_wait_for_mroute("10.73.1.1", "224.0.1.1", + "dpT10", "dpT21 dpT22", false); + + dp_test_npf_cmd("npf-ut add acl:v4test 0 family=inet", false); + + dp_test_npf_cmd("npf-ut add acl:v4test 20 " + "src-addr=10.73.1.1 " + "proto-base=17 " + "action=drop", false); + dp_test_npf_cmd("npf-ut attach interface:dpT22 acl-out acl:v4test", + false); + dp_test_npf_cmd("npf-ut commit", false); + + /* Create multicast pak */ + test_pak = dp_test_create_ipv4_pak("10.73.1.1", grp_dest, 1, &len); + dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV4); + + /* Create pak we expect to receive on the tx ring */ + exp = dp_test_exp_create_m(test_pak, 2); + dp_test_exp_set_oif_name_m(exp, 0, "dp2T1"); + dp_test_exp_set_oif_name_m(exp, 1, "dp2T2"); + + dp_test_exp_set_fwd_status_m(exp, 0, DP_TEST_FWD_FORWARDED); + dp_test_exp_set_fwd_status_m(exp, 1, DP_TEST_FWD_DROPPED); + + (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak_m(exp, 0), + grp_mac, + dp_test_intf_name2mac_str("dp2T1"), + RTE_ETHER_TYPE_IPV4); + dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak_m(exp, 0)); + + (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak_m(exp, 1), + grp_mac, + dp_test_intf_name2mac_str("dp2T2"), + RTE_ETHER_TYPE_IPV4); + dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak_m(exp, 1)); + + dp_test_pak_receive(test_pak, "dp1T0", exp); + + + /* Clean Up */ + + dp_test_npf_cmd("npf-ut detach interface:dpT22 acl-out acl:v4test", + false); + dp_test_npf_cmd("npf-ut delete acl:v4test", false); + dp_test_npf_cmd("npf-ut commit", false); + + dp_test_mroute_nl(RTM_DELROUTE, "10.73.1.1", "dp1T0", + "224.0.1.1/32 nh int:dp2T1 nh int:dp2T2"); + + dp_test_wait_for_mroute("10.73.1.1", "224.0.1.1", + "dpT10", "dpT21 dpT22", true); + + dp_test_netlink_netconf_mcast("dp1T0", AF_INET, false); + dp_test_nl_del_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + + dp_test_netlink_netconf_mcast("dp2T1", AF_INET, false); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); + + dp_test_netlink_netconf_mcast("dp2T2", AF_INET, false); + dp_test_nl_del_ip_addr_and_connected("dp2T2", "3.3.3.3/24"); + +} DP_END_TEST; + +/* + * IPv6 multicast forwarding and egress ACL. + * + * Based on test case ip_mfwd_5. An egress ACL drops the packet on one of the + * two output interfaces. + */ +DP_DECL_TEST_CASE(npf_acl, acl13, NULL, NULL); +DP_START_TEST(acl13, test) +{ + const char *grp_dest = "ff0e::1:1"; + const char *grp_mac = "33:33:00:01:00:01"; + struct dp_test_expected *exp; + struct rte_mbuf *test_pak; + int len = 22; + + dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2002:2:2::1/64"); + dp_test_nl_add_ip_addr_and_connected("dp2T2", "2003:3:3::1/64"); + + dp_test_netlink_add_neigh("dp1T0", "2001:1:1::2", + "aa:bb:cc:dd:1:a1"); + dp_test_netlink_add_neigh("dp2T1", "2002:2:2::2", + "aa:bb:cc:dd:2:b2"); + dp_test_netlink_add_neigh("dp2T2", "2003:3:3::2", + "aa:bb:cc:dd:3:c3"); + + dp_test_netlink_netconf_mcast("dp1T0", AF_INET6, true); + dp_test_netlink_netconf_mcast("dp2T1", AF_INET6, true); + dp_test_netlink_netconf_mcast("dp2T2", AF_INET6, true); + + /* Add multicast route */ + dp_test_mroute_nl(RTM_NEWROUTE, "2001:1:1::2", "dp1T0", + "ff0e::1:1/128 nh int:dp2T1 nh int:dp2T2"); + + dp_test_wait_for_mroute("2001:1:1::2", "ff0e::1:1", + "dpT10", "dpT21 dpT22", false); + + dp_test_npf_cmd("npf-ut add acl:v6test 0 family=inet6", false); + + dp_test_npf_cmd("npf-ut add acl:v6test 20 " + "src-addr=2001:1:1::2 " + "action=drop", false); + dp_test_npf_cmd("npf-ut attach interface:dpT22 acl-out acl:v6test", + false); + dp_test_npf_cmd("npf-ut commit", false); + + + /* Create multicast pak */ + test_pak = dp_test_create_ipv6_pak("2001:1:1::2", + grp_dest, + 1, &len); + + dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), + DP_TEST_INTF_DEF_SRC_MAC, RTE_ETHER_TYPE_IPV6); + + /* Create pak we expect to receive on the tx ring */ + exp = dp_test_exp_create_m(test_pak, 2); + + dp_test_exp_set_oif_name_m(exp, 0, "dp2T1"); + dp_test_exp_set_oif_name_m(exp, 1, "dp2T2"); + + dp_test_exp_set_fwd_status_m(exp, 0, DP_TEST_FWD_FORWARDED); + dp_test_exp_set_fwd_status_m(exp, 1, DP_TEST_FWD_DROPPED); + + (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak_m(exp, 0), + grp_mac, + dp_test_intf_name2mac_str("dp2T1"), + RTE_ETHER_TYPE_IPV6); + dp_test_ipv6_decrement_ttl(dp_test_exp_get_pak_m(exp, 0)); + + (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak_m(exp, 1), + grp_mac, + dp_test_intf_name2mac_str("dp2T2"), + RTE_ETHER_TYPE_IPV6); + dp_test_ipv6_decrement_ttl(dp_test_exp_get_pak_m(exp, 1)); + + dp_test_pak_receive(test_pak, "dp1T0", exp); + + + /* Cleanup */ + dp_test_npf_cmd("npf-ut detach interface:dpT22 acl-out acl:v6test", + false); + dp_test_npf_cmd("npf-ut delete acl:v6test", false); + dp_test_npf_cmd("npf-ut commit", false); + + dp_test_mroute_nl(RTM_DELROUTE, "2001:1:1::2", "dp1T0", + "ff0e::1:1/128 nh int:dp2T1 nh int:dp2T2"); + + dp_test_wait_for_mroute("2001:1:1::2", "ff0e::1:1", + "dpT10", "dpT21 dpT22", true); + + dp_test_netlink_netconf_mcast("dp1T0", AF_INET6, false); + dp_test_netlink_netconf_mcast("dp2T1", AF_INET6, false); + dp_test_netlink_netconf_mcast("dp2T2", AF_INET6, false); + + dp_test_netlink_del_neigh("dp1T0", "2001:1:1::2", + "aa:bb:cc:dd:1:a1"); + dp_test_netlink_del_neigh("dp2T1", "2002:2:2::2", + "aa:bb:cc:dd:2:b2"); + dp_test_netlink_del_neigh("dp2T2", "2003:3:3::2", + "aa:bb:cc:dd:3:c3"); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2002:2:2::1/64"); + dp_test_nl_del_ip_addr_and_connected("dp2T2", "2003:3:3::1/64"); + +} DP_END_TEST; + +/* + * IPv4 Egress ACL at GRE tunnel start-point + */ +DP_DECL_TEST_CASE(npf_acl, acl14, NULL, NULL); +DP_START_TEST(acl14, test) +{ + struct rte_mbuf *m; + struct dp_test_expected *exp; + struct iphdr *inner_ip; + struct iphdr *exp_ip_outer[DP_TEST_MAX_EXPECTED_PAKS]; + int len = 32; + + dp_test_gre_setup_tunnel(VRF_DEFAULT_ID, "1.1.2.1", "1.1.2.2"); + + /* + * Egress ACL matches on UDP protocol for first packet (which is + * expected to be forwarded), and GRE protocol for second packet + * (dropped) + */ + dp_test_npf_cmd("npf-ut add acl:v4test 0 family=inet", false); + dp_test_npf_cmd("npf-ut add acl:v4test 10 " + "proto-base=17 " + "action=drop", false); + dp_test_npf_cmd("npf-ut attach interface:dpT22 acl-out acl:v4test", + false); + dp_test_npf_cmd("npf-ut commit", false); + + /* + * Packet 1. Egress ACL on output interface does not match + * encapsulation IP header. + */ + m = dp_test_create_ipv4_pak("1.1.1.2", "10.0.0.1", + 1, &len); + (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), + DP_TEST_INTF_DEF_SRC_MAC, + RTE_ETHER_TYPE_IPV4); + inner_ip = iphdr(m); + dp_test_set_pak_ip_field(inner_ip, DP_TEST_SET_IP_ECN, + IPTOS_ECN_NOT_ECT); + gre_test_build_expected_pak(&exp, &inner_ip, + exp_ip_outer, 1); + dp_test_set_pak_ip_field(exp_ip_outer[0], + DP_TEST_SET_IP_ECN, IPTOS_ECN_NOT_ECT); + dp_test_pak_receive(m, "dp1T1", exp); + + /* + * Packet 2. egress ACL on output interface drops packet. + */ + dp_test_npf_cmd("npf-ut add acl:v4test 10 " + "proto-base=47 " + "action=drop", false); + dp_test_npf_cmd("npf-ut commit", false); + + m = dp_test_create_ipv4_pak("1.1.1.2", "10.0.0.1", + 1, &len); + (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), + DP_TEST_INTF_DEF_SRC_MAC, + RTE_ETHER_TYPE_IPV4); + inner_ip = iphdr(m); + dp_test_set_pak_ip_field(inner_ip, DP_TEST_SET_IP_ECN, + IPTOS_ECN_NOT_ECT); + gre_test_build_expected_pak(&exp, &inner_ip, + exp_ip_outer, 1); + dp_test_set_pak_ip_field(exp_ip_outer[0], + DP_TEST_SET_IP_ECN, IPTOS_ECN_NOT_ECT); + dp_test_exp_set_fwd_status_m(exp, 0, DP_TEST_FWD_DROPPED); + + dp_test_pak_receive(m, "dp1T1", exp); + + /* And now clean up all the state we added. */ + dp_test_npf_cmd("npf-ut detach interface:dpT22 acl-out acl:v4test", + false); + dp_test_npf_cmd("npf-ut delete acl:v4test", false); + dp_test_npf_cmd("npf-ut commit", false); + + dp_test_gre_teardown_tunnel(VRF_DEFAULT_ID, "1.1.2.1", "1.1.2.2"); +} DP_END_TEST; + +/* + * IPv6 Egress ACL at GRE tunnel start-point + */ +DP_DECL_TEST_CASE(npf_acl, acl15, NULL, NULL); +DP_START_TEST(acl15, test) +{ + struct rte_mbuf *m; + struct dp_test_expected *exp; + struct ip6_hdr *inner_ip; + struct ip6_hdr *exp_ip_outer = NULL; + int len = 32; + + dp_test_gre6_setup_tunnel(VRF_DEFAULT_ID, "1:1:2::1", "1:1:2::2"); + + /* + * Egress ACL matches on UDP protocol for first packet (which is + * expected to be forwarded), and GRE protocol for second packet + * (dropped) + */ + dp_test_npf_cmd("npf-ut add acl:v6test 0 family=inet6", false); + dp_test_npf_cmd("npf-ut add acl:v6test 10 " + "proto-base=17 " + "action=drop", false); + dp_test_npf_cmd("npf-ut attach interface:dpT22 acl-out acl:v6test", + false); + dp_test_npf_cmd("npf-ut commit", false); + + /* + * Packet 1. + */ + m = dp_test_create_ipv6_pak("1:1:1::2", "10:0:0::1", + 1, &len); + (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), + DP_TEST_INTF_DEF_SRC_MAC, + RTE_ETHER_TYPE_IPV6); + inner_ip = ip6hdr(m); + gre6_test_build_expected_pak(&exp, inner_ip, exp_ip_outer); + dp_test_pak_receive(m, "dp1T1", exp); + + /* + * Packet 2. + */ + dp_test_npf_cmd("npf-ut add acl:v6test 10 " + "proto-base=47 " + "action=drop", false); + dp_test_npf_cmd("npf-ut commit", false); + + m = dp_test_create_ipv6_pak("1:1:1::2", "10:0:0::1", + 1, &len); + (void)dp_test_pktmbuf_eth_init(m, dp_test_intf_name2mac_str("dp1T1"), + DP_TEST_INTF_DEF_SRC_MAC, + RTE_ETHER_TYPE_IPV6); + inner_ip = ip6hdr(m); + gre6_test_build_expected_pak(&exp, inner_ip, exp_ip_outer); + dp_test_exp_set_fwd_status_m(exp, 0, DP_TEST_FWD_DROPPED); + dp_test_pak_receive(m, "dp1T1", exp); + + /* Cleanup */ + dp_test_npf_cmd("npf-ut detach interface:dpT22 acl-out acl:v6test", + false); + dp_test_npf_cmd("npf-ut delete acl:v6test", false); + dp_test_npf_cmd("npf-ut commit", false); + + dp_test_gre6_teardown_tunnel(VRF_DEFAULT_ID, "1:1:2::1", "1:1:2::2"); +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_npf_addrgrp.c b/tests/whole_dp/src/dp_test_npf_addrgrp.c index 1af468bc..600a14ac 100644 --- a/tests/whole_dp/src/dp_test_npf_addrgrp.c +++ b/tests/whole_dp/src/dp_test_npf_addrgrp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -33,11 +33,11 @@ #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_controller.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_npf_fw_lib.h" static bool print_tbls; @@ -445,7 +445,7 @@ static int addr_incr(uint8_t *addr, int alen) * Similar logic to memcmp. Return -1 if a1 < a2, +1 id a1 > a2, 0 id a1 == * a2. Addresses are in network byte order. */ -static int addr_cmp(uint8_t *a1, uint8_t *a2, int alen) +static int addr_cmp(const uint8_t *a1, const uint8_t *a2, int alen) { int i; @@ -453,7 +453,7 @@ static int addr_cmp(uint8_t *a1, uint8_t *a2, int alen) for (i = 0; i < alen; i++) { if (a1[i] < a2[i]) return -1; - else if (a1[i] > a2[i]) + if (a1[i] > a2[i]) return 1; } return 0; @@ -465,18 +465,13 @@ static int addr_cmp(uint8_t *a1, uint8_t *a2, int alen) static bool dp_test_addrgrp_tree_lookup(const char *group, const char *addr_str) { - struct npf_addrgrp *ag; uint8_t klen, af, mask; npf_addr_t addr; - int tid; + uint32_t tid; int rc; - ag = npf_addrgrp_lookup_name(group); - if (!ag) - return false; - - tid = npf_addrgrp_get_tid(ag); - if (tid < 0) + rc = npf_addrgrp_name2tid(group, &tid); + if (rc < 0 || !npf_addrgrp_tid_valid(tid)) return false; klen = dp_test_string2key(addr_str, addr.s6_addr, &af, &mask); @@ -487,7 +482,7 @@ dp_test_addrgrp_tree_lookup(const char *group, const char *addr_str) * This is the function called from the forwarding-threads. * It does a shortest match lookup to verify address-group membership. */ - rc = npf_addrgrp_lookup((klen == 4) ? AG_IPv4 : AG_IPv6, ag, &addr); + rc = npf_addrgrp_lookup((klen == 4) ? AG_IPv4 : AG_IPv6, tid, &addr); return rc == 0; } @@ -947,7 +942,7 @@ DP_START_TEST(npf_addrgrp2, test1) dp_test_addrgrp_prefix_add("ADDRGRP2", "10.0.0.25/32", true); - naddrs = npf_addrgrp_naddrs(AG_IPv4, tid); + naddrs = npf_addrgrp_naddrs(AG_IPv4, tid, false); dp_test_fail_unless(naddrs == 1, "%s contains %lu addresses, expected 1", "ADDRGRP2", naddrs); @@ -955,7 +950,7 @@ DP_START_TEST(npf_addrgrp2, test1) dp_test_addrgrp_range_add("ADDRGRP2", "10.0.0.10", "10.0.0.15", true, true); - naddrs = npf_addrgrp_naddrs(AG_IPv4, tid); + naddrs = npf_addrgrp_naddrs(AG_IPv4, tid, false); dp_test_fail_unless(naddrs == 7, "%s contains %lu addresses, expected 7", "ADDRGRP2", naddrs); @@ -1155,7 +1150,7 @@ DP_START_TEST(npf_addrgrp5, test1) .rule = "10", .pass = PASS, .stateful = STATELESS, - .npf = "proto=17 dst-addr-group=ADDR_GRP0" + .npf = "proto-final=17 dst-addr-group=ADDR_GRP0" }, RULE_DEF_BLOCK, NULL_RULE @@ -1175,7 +1170,7 @@ DP_START_TEST(npf_addrgrp5, test1) /* Get interface */ dp_test_intf_real("dp1T0", real_ifname); - ifp = ifnet_byifname(real_ifname); + ifp = dp_ifnet_byifname(real_ifname); dp_test_fail_unless(ifp, "ifp for %s", real_ifname); dp_test_npf_fw_addr_group_add("ADDR_GRP0"); @@ -1218,7 +1213,7 @@ DP_START_TEST(npf_addrgrp5, test1) dp_test_fail_unless(pkt4, "IPv4 packet create\n"); dp_test_pktmbuf_eth_init(pkt4, "00:00:00:00:00:02", "00:00:00:00:00:01", - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); uint16_t exp_npc4 = NPC_GROUPER | NPC_L4PORTS | NPC_IP4; uint i; @@ -1241,7 +1236,7 @@ DP_START_TEST(npf_addrgrp5, test1) { "1.1.1.6", NPF_DECISION_PASS }, { "1.1.1.7", NPF_DECISION_BLOCK }, }; - struct iphdr *ip = pktmbuf_mtol3(pkt4, struct iphdr *); + struct iphdr *ip = dp_pktmbuf_mtol3(pkt4, struct iphdr *); /* * Lookup addresses in test_arr1 and verify decision @@ -1374,7 +1369,7 @@ DP_START_TEST(npf_addrgrp7, test1) /* Get interface */ dp_test_intf_real("dp1T0", real_ifname); - ifp = ifnet_byifname(real_ifname); + ifp = dp_ifnet_byifname(real_ifname); dp_test_fail_unless(ifp, "ifp for %s", real_ifname); dp_test_npf_fw_addr_group_add("ADDR_GRP0"); @@ -1400,7 +1395,7 @@ DP_START_TEST(npf_addrgrp7, test1) dp_test_fail_unless(pkt4, "IPv4 packet create\n"); dp_test_pktmbuf_eth_init(pkt4, "00:00:00:00:00:02", "00:00:00:00:00:01", - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); uint16_t exp_npc4 = NPC_GROUPER | NPC_L4PORTS | NPC_IP4; uint32_t addr; @@ -1487,7 +1482,7 @@ DP_START_TEST(npf_addrgrp8, test1) /* Get interface */ dp_test_intf_real("dp1T0", real_ifname); - ifp = ifnet_byifname(real_ifname); + ifp = dp_ifnet_byifname(real_ifname); dp_test_fail_unless(ifp, "ifp for %s", real_ifname); dp_test_npf_fw_addr_group_add("ADDR_GRP0"); @@ -1513,7 +1508,7 @@ DP_START_TEST(npf_addrgrp8, test1) dp_test_fail_unless(pkt6, "IPv6 packet create\n"); dp_test_pktmbuf_eth_init(pkt6, "00:00:00:00:00:02", "00:00:00:00:00:01", - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); uint16_t exp_npc6 = NPC_GROUPER | NPC_L4PORTS | NPC_IP6; npf_addr_t addr; @@ -1584,7 +1579,7 @@ DP_START_TEST(npf_addrgrp9, test1) dp_test_addrgrp_prefix_add("ADDRGRP9", "10.0.0.25", true); /* Expect 1 entry */ - naddrs = npf_addrgrp_naddrs(AG_IPv4, tid); + naddrs = npf_addrgrp_naddrs(AG_IPv4, tid, false); dp_test_fail_unless(naddrs == 1, "expected 1, got %lu", naddrs); /* Is 10.0.0.25 in ptree? */ @@ -1595,14 +1590,14 @@ DP_START_TEST(npf_addrgrp9, test1) dp_test_addrgrp_prefix_add("ADDRGRP9", "10.0.0.25/32", true); /* Still expect 1 entry */ - naddrs = npf_addrgrp_naddrs(AG_IPv4, tid); + naddrs = npf_addrgrp_naddrs(AG_IPv4, tid, false); dp_test_fail_unless(naddrs == 1, "expected 1, got %lu", naddrs); /* Remove 10.0.0.25/32 */ dp_test_addrgrp_prefix_remove("ADDRGRP9", "10.0.0.25/32", true, true); /* Still expect 1 entry */ - naddrs = npf_addrgrp_naddrs(AG_IPv4, tid); + naddrs = npf_addrgrp_naddrs(AG_IPv4, tid, false); dp_test_fail_unless(naddrs == 1, "expected 1, got %lu", naddrs); /* Is 10.0.0.25 in ptree? */ @@ -1613,7 +1608,7 @@ DP_START_TEST(npf_addrgrp9, test1) dp_test_addrgrp_prefix_remove("ADDRGRP9", "10.0.0.25", true, false); /* Expect 0 entries */ - naddrs = npf_addrgrp_naddrs(AG_IPv4, tid); + naddrs = npf_addrgrp_naddrs(AG_IPv4, tid, false); dp_test_fail_unless(naddrs == 0, "expected 0, got %lu", naddrs); /* Is 10.0.0.25 in ptree? */ @@ -1628,7 +1623,7 @@ DP_START_TEST(npf_addrgrp9, test1) dp_test_addrgrp_prefix_add("ADDRGRP9", "10.0.0.25/32", true); /* Expect 1 entry */ - naddrs = npf_addrgrp_naddrs(AG_IPv4, tid); + naddrs = npf_addrgrp_naddrs(AG_IPv4, tid, false); dp_test_fail_unless(naddrs == 1, "expected 1, got %lu", naddrs); /* Is 10.0.0.25 in ptree? */ @@ -1639,14 +1634,14 @@ DP_START_TEST(npf_addrgrp9, test1) dp_test_addrgrp_prefix_add("ADDRGRP9", "10.0.0.25", true); /* Still expect 1 entry */ - naddrs = npf_addrgrp_naddrs(AG_IPv4, tid); + naddrs = npf_addrgrp_naddrs(AG_IPv4, tid, false); dp_test_fail_unless(naddrs == 1, "expected 1, got %lu", naddrs); /* Remove 10.0.0.25 */ dp_test_addrgrp_prefix_remove("ADDRGRP9", "10.0.0.25", true, true); /* Still expect 1 entry */ - naddrs = npf_addrgrp_naddrs(AG_IPv4, tid); + naddrs = npf_addrgrp_naddrs(AG_IPv4, tid, false); dp_test_fail_unless(naddrs == 1, "expected 1, got %lu", naddrs); /* Is 10.0.0.25 in ptree? */ @@ -1657,7 +1652,7 @@ DP_START_TEST(npf_addrgrp9, test1) dp_test_addrgrp_prefix_remove("ADDRGRP9", "10.0.0.25/32", true, false); /* Expect 0 entries */ - naddrs = npf_addrgrp_naddrs(AG_IPv4, tid); + naddrs = npf_addrgrp_naddrs(AG_IPv4, tid, false); dp_test_fail_unless(naddrs == 0, "expected 0, got %lu", naddrs); /* Is 10.0.0.25 in ptree? */ @@ -1670,3 +1665,85 @@ DP_START_TEST(npf_addrgrp9, test1) "ADDRGRP9 not empty"); dp_test_addrgrp_destroy("ADDRGRP9"); } DP_END_TEST; + + +/* + * npf_addrgrp10 + */ +DP_DECL_TEST_CASE(npf_addrgrp, npf_addrgrp10, NULL, NULL); +DP_START_TEST(npf_addrgrp10, test1) +{ + struct npf_addrgrp *ag, *tmp; + uint32_t tid; + int rc; + + dp_test_addrgrp_create("ADDRGRP10"); + dp_test_addrgrp_prefix_add("ADDRGRP10", "10.0.0.0/24", true); + + rc = npf_addrgrp_name2tid("ADDRGRP10", &tid); + dp_test_fail_unless(rc == 0, "npf_addrgrp_name2tid"); + + ag = npf_addrgrp_tid2handle(tid); + dp_test_fail_unless(ag, "npf_addrgrp_tid2handle"); + + /* Take reference of address-group */ + npf_addrgrp_get(ag); + + /* + * Lookup using npf_addrgrp_lookup_v4_by_handle + */ + uint32_t ipaddr; + inet_pton(AF_INET, "10.0.0.1", &ipaddr); + + rc = npf_addrgrp_lookup_v4_by_handle(ag, ipaddr); + dp_test_fail_unless(rc == 0, "Lookup by handle failed"); + + dp_test_addrgrp_prefix_remove("ADDRGRP10", "10.0.0.0/24", true, false); + + /* Unconfigure address-group */ + dp_test_addrgrp_destroy("ADDRGRP10"); + + /* + * Addr-group should no longer be findable since we have deleted it + * from the tableset. + */ + tmp = npf_addrgrp_tid2handle(tid); + dp_test_fail_unless(tmp == NULL, "Addr-group not found"); + + /* Lookup of address should fail (but not crash) */ + rc = npf_addrgrp_lookup_v4_by_handle(ag, ipaddr); + dp_test_fail_unless(rc != 0, "Lookup by handle succeeded"); + + /* Release reference on address-group */ + npf_addrgrp_put(ag); + +} DP_END_TEST; + + +/* + * npf_addrgrp11 - Test that a host address and address range with contigous + * addresses can be configured. + */ +DP_DECL_TEST_CASE(npf_addrgrp, npf_addrgrp11, NULL, NULL); +DP_START_TEST(npf_addrgrp11, test1) +{ + dp_test_addrgrp_create("ADDRGRP11"); + + dp_test_addrgrp_prefix_add("ADDRGRP11", "10.136.166.206", true); + dp_test_addrgrp_range_add("ADDRGRP11", + "10.136.166.207", "10.136.166.208", + true, true); + dp_test_addrgrp_prefix_add("ADDRGRP11", "10.136.166.209", true); + + dp_test_addrgrp_prefix_remove("ADDRGRP11", "10.136.166.206", + true, false); + dp_test_addrgrp_range_remove("ADDRGRP11", + "10.136.166.207", "10.136.166.208", + true, false); + dp_test_addrgrp_prefix_remove("ADDRGRP11", "10.136.166.209", + true, false); + + /* Unconfigure address-group */ + dp_test_addrgrp_destroy("ADDRGRP11"); + +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_npf_alg_ftp.c b/tests/whole_dp/src/dp_test_npf_alg_ftp.c index aaa93c9a..e2ce9abc 100644 --- a/tests/whole_dp/src/dp_test_npf_alg_ftp.c +++ b/tests/whole_dp/src/dp_test_npf_alg_ftp.c @@ -1,731 +1,962 @@ /* - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * * Whole dataplane npf alg ftp tests. - * - * alg_ftp1 - is a plain vanilla ftp call. No firewall or nat. Passive ftp. - * - * alg_ftp2 - adds DNAT in the forwards direction (client-to-server). There - * is one ftp payload from the server (a 227 response) that gets reverse NAT'd - * by the ftp alg. - * - * alg_ftp3 - is same as npf_alg_ftp2, but adds both interfaces to a vrf, and - * then deletes the vrf while the ftp control and data sessions are still in - * existence. - * - * alg_ftp4 - NATing from smaller prefix 10.25.1.0/24 to larger address - * 159.8.106.21 - * - * alg_ftp5 - NATing from larger prefix 10.250.100.0/24 to smaller address - * 15.8.6.1 - * - * alg_ftp7 - SNAT from client to server, Active ftp - * - * alg_ftp8 - SNAT from client to server, Active ftp. Deleting vrf. - * - * alg_ftp9 - Same as ftp_alg2, except no parenthesis around 227 msg. - * - * - * To run each test in the chroot setup: - * - * make -j4 dataplane_test_run CK_RUN_CASE=alg_ftp1 - * make -j4 dataplane_test_run CK_RUN_CASE=alg_ftp2 - * make -j4 dataplane_test_run CK_RUN_CASE=alg_ftp3 - * make -j4 dataplane_test_run CK_RUN_CASE=alg_ftp4 - * make -j4 dataplane_test_run CK_RUN_CASE=alg_ftp5 - * - * To run all the tests: - * - * make -j4 dataplane_test_run CK_RUN_SUITE=dp_test_npf_alg_ftp.c */ -#include -#include "ip_funcs.h" #include "in_cksum.h" -#include "if_var.h" -#include "main.h" -#include "npf/npf_state.h" + +#include "npf/alg/alg.h" #include "dp_test.h" #include "dp_test_controller.h" #include "dp_test_cmd_state.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" #include "dp_test_str.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_pkt.h" #include "dp_test_lib_tcp.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" -#include "dp_test_npf_sess_lib.h" -#include "dp_test_npf_fw_lib.h" #include "dp_test_npf_nat_lib.h" +#include "dp_test_npf_sess_lib.h" #include "dp_test_npf_alg_lib.h" -DP_DECL_TEST_SUITE(npf_alg_ftp); - -/*************************************************************************** - * alg_ftp1 +/* + * In the SNAT tests, client is on the inside and server on the outside. + * + * With Passive FTP and SNAT (ftp4), the packet containing the data address + * and port is from the server, so does not need translated. Initial data pkt + * is *from* the client. * - * Simulates an ftp call via two TCP calls - one for control channel and one - * for data channel. + * With Active FTP and SNAT (ftp7), the packet containing the data address and + * port is from the client, so *does* need translated. And may result in + * change of length of payload. Initial data pkt is *from* the server. * - * No firewall or NAT. + * With Passive FTP, the control and data flows both start in the same + * direction (i.e. forwards). * - ***************************************************************************/ - -static struct dp_test_pkt_desc_t ftp1_fwd_in = { - .text = "ftp data Forwards In", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "1.1.1.11", - .l2_src = "aa:bb:cc:dd:1:11", - .l3_dst = "2.2.2.11", - .l2_dst = "00:00:a4:00:00:64", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 46682, - .dport = 21, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T1" -}; - -static struct dp_test_pkt_desc_t ftp1_fwd_out = { - .text = "ftp data Forwards Out", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "1.1.1.11", - .l2_src = "00:00:a4:00:00:64", - .l3_dst = "2.2.2.11", - .l2_dst = "aa:bb:cc:dd:2:11", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 46682, - .dport = 21, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T1" -}; - -static struct dp_test_pkt_desc_t ftp1_rev_in = { - .text = "ftp data Reverse In", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "2.2.2.11", - .l2_src = "aa:bb:cc:dd:2:11", - .l3_dst = "1.1.1.11", - .l2_dst = "00:00:a4:00:00:64", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 21, - .dport = 46682, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T1", - .tx_intf = "dp1T0" -}; - -static struct dp_test_pkt_desc_t ftp1_rev_out = { - .text = "ftp data Reverse Out", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "2.2.2.11", - .l2_src = "00:00:a4:00:00:64", - .l3_dst = "1.1.1.11", - .l2_dst = "aa:bb:cc:dd:1:11", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 21, - .dport = 46682, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T1", - .tx_intf = "dp1T0" -}; + * With Active FTP, the data flow starts in the reverse direction. + * + * With SNAT and Active FTP, the difference in size between the source address + * string and the translation address string will alter the length of the TCP + * data, and hence cause a difference in TCP seq and ack between sender and + * receiver. Tests ftp8 and ftp9 test that the FTP ALG adjusts the TCP header + * accordingly. + * + * ftp1 ~ No NAT Passive ftp. '227' back. + * ftp2 ~ No NAT Passive ftp. Stateful firewall. IPv4. '227' back. + * ftp3 ~ No NAT Passive ftp. Stateful firewall. IPv6. '227' back. + * ftp4 - SNAT Passive ftp. '227' back. + * ftp5 - DNAT Passive ftp. '227' back. + * ftp6 ~ DNAT Passive ftp. No parenthesis in 227 message. + * ftp7 ~ SNAT Active ftp. 'PORT' forw. + * ftp8 - SNAT Active ftp. Translating to a larger address. 'PORT' forw. + * ftp9 - SNAT Active ftp. Translating to a smaller address. 'PORT' forw. + * ftp10 - DNAT Active ftp. 'PORT' forw. + * ftp11 - SNAT Ext. Passive ftp. Src port is outside of trans port range. + * ftp12 - SNAT Active ftp. Src port is outside of trans port range. + */ + +static void dpt_alg_ftp_setup(void); +static void dpt_alg_ftp_teardown(void); + + +DP_DECL_TEST_SUITE(npf_alg_ftp); /* - * This is used for all tests. + * ftp1 - Passive ftp. No firewall or NAT. */ -static void ftp_data_call1(void) +DP_DECL_TEST_CASE(npf_alg_ftp, ftp1, dpt_alg_ftp_setup, dpt_alg_ftp_teardown); +DP_START_TEST_FULL_RUN(ftp1, test) { - uint16_t fwd_in_sport = ftp1_fwd_in.l4.tcp.sport; - uint16_t fwd_in_dport = ftp1_fwd_in.l4.tcp.dport; - uint16_t fwd_out_sport = ftp1_fwd_out.l4.tcp.sport; - uint16_t fwd_out_dport = ftp1_fwd_out.l4.tcp.dport; - - uint16_t rev_in_sport = ftp1_rev_in.l4.tcp.sport; - uint16_t rev_in_dport = ftp1_rev_in.l4.tcp.dport; - uint16_t rev_out_sport = ftp1_rev_out.l4.tcp.sport; - uint16_t rev_out_dport = ftp1_rev_out.l4.tcp.dport; - - ftp1_fwd_in.l4.tcp.sport = 49888; - ftp1_fwd_in.l4.tcp.dport = 9819; - - ftp1_fwd_out.l4.tcp.sport = 49888; - ftp1_fwd_out.l4.tcp.dport = 9819; - - ftp1_rev_in.l4.tcp.sport = 9819; - ftp1_rev_in.l4.tcp.dport = 49888; - - ftp1_rev_out.l4.tcp.sport = 9819; - ftp1_rev_out.l4.tcp.dport = 49888; - - struct dp_test_tcp_call tcp_call = { - .str[0] = '\0', - .isn = {0, 0}, - .desc[DP_DIR_FORW] = { - .pre = &ftp1_fwd_in, - .post = &ftp1_fwd_out, + struct dp_test_pkt_desc_t *ctrl_fw_pre, *ctrl_fw_pst; + struct dp_test_pkt_desc_t *ctrl_bk_pre, *ctrl_bk_pst; + struct dp_test_pkt_desc_t *data_fw_pre, *data_fw_pst; + struct dp_test_pkt_desc_t *data_bk_pre, *data_bk_pst; + + /* ftp port appears as 2 numbers in the string */ + uint8_t data_port_upr = 38; + uint8_t data_port_lwr = 91; + uint16_t data_port = (data_port_upr * 256) + data_port_lwr; + + /* + * ftp control flow packets + */ + ctrl_fw_pre = dpt_pdesc_v4_create( + "ctrl_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "dp1T0", "dp2T1"); + + ctrl_fw_pst = dpt_pdesc_v4_create( + "ctrl_fw_pst", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "dp1T0", "dp2T1"); + + ctrl_bk_pre = dpt_pdesc_v4_create( + "ctrl_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "dp2T1", "dp1T0"); + + ctrl_bk_pst = dpt_pdesc_v4_create( + "ctrl_bk_pst", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "dp2T1", "dp1T0"); + + /* + * ftp data flow packets + */ + data_fw_pre = dpt_pdesc_v4_create( + "data_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 49888, + "aa:bb:cc:dd:2:11", "2.2.2.11", data_port, + "dp1T0", "dp2T1"); + + data_fw_pst = dpt_pdesc_v4_create( + "data_fw_pst", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 49888, + "aa:bb:cc:dd:2:11", "2.2.2.11", data_port, + "dp1T0", "dp2T1"); + + data_bk_pre = dpt_pdesc_v4_create( + "data_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", data_port, + "aa:bb:cc:dd:1:11", "1.1.1.11", 49888, + "dp2T1", "dp1T0"); + + data_bk_pst = dpt_pdesc_v4_create( + "data_bk_pst", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", data_port, + "aa:bb:cc:dd:1:11", "1.1.1.11", 49888, + "dp2T1", "dp1T0"); + + /* + * Packet descriptors for ftp ctrl flow + */ + struct dpt_tcp_flow ftp_ctrl_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = ctrl_fw_pre, + .pst = ctrl_fw_pst, }, - .desc[DP_DIR_BACK] = { - .pre = &ftp1_rev_in, - .post = &ftp1_rev_out, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = ctrl_bk_pre, + .pst = ctrl_bk_pst, }, - .test_cb = NULL, - .post_cb = NULL, + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ }; + snprintf(ftp_ctrl_call.text, sizeof(ftp_ctrl_call), "Ctrl"); - struct dp_test_tcp_flow_pkt ftp_data_pkt1[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, + /* + * Per-packet flags and data for ftp ctrl flow + */ + struct dpt_tcp_flow_pkt ftp_ctrl_pkts[] = { + { DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, - {DP_DIR_BACK, TH_ACK, 100, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, + /* session established */ + { DPT_FORW, TH_ACK, 0, + (char *)"SYST\x0d\x0a", 0, NULL }, - {DP_DIR_FORW, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_BACK, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - }; + { DPT_BACK, TH_ACK, 0, + (char *)"215 UNIX Type: L8\x0d\x0a", 0, NULL }, - /* Simulate the TCP call */ - dp_test_tcp_call(&tcp_call, ftp_data_pkt1, - ARRAY_SIZE(ftp_data_pkt1), - NULL, 0); + { DPT_FORW, TH_ACK, 0, + (char *)"TYPE I\x0d\x0a", 0, NULL }, - ftp1_fwd_in.l4.tcp.sport = fwd_in_sport; - ftp1_fwd_in.l4.tcp.dport = fwd_in_dport; - ftp1_fwd_out.l4.tcp.sport = fwd_out_sport; - ftp1_fwd_out.l4.tcp.dport = fwd_out_dport; + { DPT_BACK, TH_ACK, 0, + (char *)"200 Switching to Binary mode.\x0d\x0a", + 0, NULL }, - ftp1_rev_in.l4.tcp.sport = rev_in_sport; - ftp1_rev_in.l4.tcp.dport = rev_in_dport; - ftp1_rev_out.l4.tcp.sport = rev_out_sport; - ftp1_rev_out.l4.tcp.dport = rev_out_dport; -} + { DPT_FORW, TH_ACK, 0, + (char *)"PASV\x0d\x0a", 0, NULL }, -/* 'data' context for callback */ -struct ftp_ctx { - const char **payload; - uint payload_len; - bool do_data_call; -}; + /* + * #8. Response: 227. Server telling client which address and + * port to use for data channel. Address is 2.2.2.11, port is + * 9819. (38 == 0x26, 91 == 0x5B, 0x265B == 9819) + */ + { DPT_BACK, TH_ACK, 0, + (char *)"227 Entering Passive Mode (2,2,2,11,38,91).\r\n", + 0, NULL }, -/* - * Callback function for TCP call simulator for ftp control channel. - * - * This prepares the packet, including adding the payload, and then sends the - * packet. - * - * This is used for all tests. - */ -static void tcp_ftp_control_cb1(const char *str, - uint pktno, enum dp_test_tcp_dir dir, - uint8_t flags, - struct dp_test_pkt_desc_t *pre, - struct dp_test_pkt_desc_t *post, - void *data, uint index) -{ - struct rte_mbuf *pre_pak, *post_pak; - struct dp_test_expected *test_exp; - struct ftp_ctx *ctx = data; - const char **ftp = ctx->payload; + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + }; + + /* + * Packet descriptors for ftp data flow + */ + struct dpt_tcp_flow ftp_data_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = data_fw_pre, + .pst = data_fw_pst, + }, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = data_bk_pre, + .pst = data_bk_pst, + }, + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ + }; + snprintf(ftp_data_call.text, sizeof(ftp_data_call), "Data"); - pre_pak = dp_test_v4_pkt_from_desc(pre); - post_pak = dp_test_v4_pkt_from_desc(post); + struct dpt_tcp_flow_pkt ftp_data_pkts[] = { + { DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL}, - /* - * Add ftp payload - */ - if (ftp[pktno]) { - dp_test_tcp_write_payload(pre_pak, strlen(ftp[pktno]), - ftp[pktno]); + { DPT_BACK, TH_ACK, 100, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + }; - dp_test_tcp_write_payload(post_pak, strlen(ftp[pktno]), - ftp[pktno]); - } + /* Start of ftp ctrl flow (pkts 0 - 8) */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, ARRAY_SIZE(ftp_ctrl_pkts), + 0, 8, NULL, 0); - test_exp = dp_test_exp_from_desc(post_pak, post); - rte_pktmbuf_free(post_pak); - dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_FORWARDED); + /* ftp data flow */ + dpt_tcp_call(&ftp_data_call, ftp_data_pkts, ARRAY_SIZE(ftp_data_pkts), + 0, 0, NULL, 0); - spush(test_exp->description, sizeof(test_exp->description), - "%s", str); + /* End of ftp ctrl flow (pkts 9 - end) */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, ARRAY_SIZE(ftp_ctrl_pkts), + 9, 0, NULL, 0); - /* Send the packet */ - dp_test_pak_receive(pre_pak, pre->rx_intf, test_exp); + free(ctrl_fw_pre); + free(ctrl_fw_pst); + free(ctrl_bk_pre); + free(ctrl_bk_pst); + + free(data_fw_pre); + free(data_fw_pst); + free(data_bk_pre); + free(data_bk_pst); + +} DP_END_TEST; - /* - * 227 is a Response from the server that contains the data channel - * address and port. So we can startup the data channel tcp session - * here. - */ - if (ctx->do_data_call && ftp[pktno] && !strncmp(ftp[pktno], "227", 3)) - ftp_data_call1(); -} /* - * alg_ftp1 + * ftp2 - Passive ftp. Stateful firewall on output interface. */ -DP_DECL_TEST_CASE(npf_alg_ftp, alg_ftp1, NULL, NULL); -DP_START_TEST(alg_ftp1, test) +DP_DECL_TEST_CASE(npf_alg_ftp, ftp2, dpt_alg_ftp_setup, dpt_alg_ftp_teardown); +DP_START_TEST_FULL_RUN(ftp2, test) { - uint vrfid = VRF_DEFAULT_ID; + struct dp_test_pkt_desc_t *ctrl_fw_pre, *ctrl_fw_pst; + struct dp_test_pkt_desc_t *ctrl_bk_pre, *ctrl_bk_pst; + struct dp_test_pkt_desc_t *data_fw_pre, *data_fw_pst; + struct dp_test_pkt_desc_t *data_bk_pre, *data_bk_pst; - if (vrfid != VRF_DEFAULT_ID) - dp_test_netlink_add_vrf(vrfid, 1); + /* + * Stateful firewall rule to match on TCP pkts to port 21. This + * matches the ctrl flow but not the data flow. The data flow only + * gets through because of the alg child session. + */ + struct dp_test_npf_rule_t rset[] = { + { + .rule = "10", + .pass = PASS, + .stateful = true, + .npf = "proto-final=6 dst-port=21" + }, + RULE_DEF_BLOCK, + NULL_RULE + }; - /* Setup interfaces and neighbours */ - dp_test_nl_add_ip_addr_and_connected_vrf("dp1T0", "1.1.1.1/24", - vrfid); - dp_test_nl_add_ip_addr_and_connected_vrf("dp1T1", "2.2.2.2/24", - vrfid); - - dp_test_netlink_add_neigh("dp1T0", "1.1.1.11", - "aa:bb:cc:dd:1:11"); - dp_test_netlink_add_neigh("dp1T0", "1.1.1.12", - "aa:bb:cc:dd:1:12"); - dp_test_netlink_add_neigh("dp1T1", "2.2.2.11", - "aa:bb:cc:dd:2:11"); - dp_test_netlink_add_neigh("dp1T1", "2.2.2.12", - "aa:bb:cc:dd:2:12"); + struct dp_test_npf_ruleset_t fw = { + .rstype = "fw-out", + .name = "OUT_FW", + .enable = 1, + .attach_point = "dp2T1", + .fwd = FWD, + .dir = "out", + .rules = rset + }; + + dp_test_npf_fw_add(&fw, false); + + /* ftp port appears as 2 numbers in the string */ + uint8_t data_port_upr = 38; + uint8_t data_port_lwr = 91; + uint16_t data_port = (data_port_upr * 256) + data_port_lwr; + + /* + * ftp control flow packets + */ + ctrl_fw_pre = dpt_pdesc_v4_create( + "ctrl_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "dp1T0", "dp2T1"); + + ctrl_fw_pst = dpt_pdesc_v4_create( + "ctrl_fw_pst", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "dp1T0", "dp2T1"); + + ctrl_bk_pre = dpt_pdesc_v4_create( + "ctrl_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "dp2T1", "dp1T0"); + + ctrl_bk_pst = dpt_pdesc_v4_create( + "ctrl_bk_pst", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "dp2T1", "dp1T0"); /* - * Packet descriptors of forw and back pre and post packets. + * ftp data flow packets */ - struct dp_test_tcp_call tcp_call = { - .str[0] = '\0', /* description */ + data_fw_pre = dpt_pdesc_v4_create( + "data_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 49888, + "aa:bb:cc:dd:2:11", "2.2.2.11", data_port, + "dp1T0", "dp2T1"); + + data_fw_pst = dpt_pdesc_v4_create( + "data_fw_pst", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 49888, + "aa:bb:cc:dd:2:11", "2.2.2.11", data_port, + "dp1T0", "dp2T1"); + + data_bk_pre = dpt_pdesc_v4_create( + "data_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", data_port, + "aa:bb:cc:dd:1:11", "1.1.1.11", 49888, + "dp2T1", "dp1T0"); + + data_bk_pst = dpt_pdesc_v4_create( + "data_bk_pst", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", data_port, + "aa:bb:cc:dd:1:11", "1.1.1.11", 49888, + "dp2T1", "dp1T0"); + + /* + * Packet descriptors for ftp ctrl flow + */ + struct dpt_tcp_flow ftp_ctrl_call = { + .text[0] = '\0', /* description */ .isn = {0, 0}, /* initial seq no */ - .desc[DP_DIR_FORW] = { /* Forw pkt descriptors */ - .pre = &ftp1_fwd_in, - .post = &ftp1_fwd_out, + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = ctrl_fw_pre, + .pst = ctrl_fw_pst, }, - .desc[DP_DIR_BACK] = { /* Back pkt descriptors */ - .pre = &ftp1_rev_in, - .post = &ftp1_rev_out, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = ctrl_bk_pre, + .pst = ctrl_bk_pst, }, - .test_cb = tcp_ftp_control_cb1, /* Prep and send pkt */ + .test_cb = NULL, /* Prep and send pkt */ .post_cb = NULL, /* Fixup pkt exp */ }; + snprintf(ftp_ctrl_call.text, sizeof(ftp_ctrl_call), "Ctrl"); /* - * Payloads for TCP call + * Per-packet flags and data for ftp ctrl flow */ - const char *ftp[] = { - [0] = NULL, - [1] = NULL, - [2] = NULL, + struct dpt_tcp_flow_pkt ftp_ctrl_pkts[] = { + { DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + + /* session established */ + { DPT_FORW, TH_ACK, 0, + (char *)"SYST\x0d\x0a", 0, NULL }, - [3] = "SYST\x0d\x0a", - [4] = "215 UNIX Type: L8\x0d\x0a", + { DPT_BACK, TH_ACK, 0, + (char *)"215 UNIX Type: L8\x0d\x0a", 0, NULL }, - [5] = "TYPE I\x0d\x0a", - [6] = "200 Switching to Binary mode.\x0d\x0a", + { DPT_FORW, TH_ACK, 0, + (char *)"TYPE I\x0d\x0a", 0, NULL }, - [7] = "PASV\x0d\x0a", + { DPT_BACK, TH_ACK, 0, + (char *)"200 Switching to Binary mode.\x0d\x0a", + 0, NULL }, + + { DPT_FORW, TH_ACK, 0, + (char *)"PASV\x0d\x0a", 0, NULL }, /* - * Response: 227. Server telling client which address and + * #8. Response: 227. Server telling client which address and * port to use for data channel. Address is 2.2.2.11, port is * 9819. (38 == 0x26, 91 == 0x5B, 0x265B == 9819) */ - [8] = "227 Entering Passive Mode (2,2,2,11,38,91).\r\n", - - /* - * Here we get a new TCP call opened for the data channel, - * from 1.1.1.11:46682 to 2.2.2.11:9819 - */ + { DPT_BACK, TH_ACK, 0, + (char *)"227 Entering Passive Mode (2,2,2,11,38,91).\r\n", + 0, NULL }, - [9] = NULL, - [10] = NULL, - [11] = NULL, + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, }; - /* - * TCP call packet direction, flags, payload length. - */ - struct dp_test_tcp_flow_pkt tcp_pkt1[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - - /* session established */ - {DP_DIR_FORW, TH_ACK, strlen(ftp[3]), NULL}, - {DP_DIR_BACK, TH_ACK, strlen(ftp[4]), NULL}, + /* + * Packet descriptors for ftp data flow + */ + struct dpt_tcp_flow ftp_data_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = data_fw_pre, + .pst = data_fw_pst, + }, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = data_bk_pre, + .pst = data_bk_pst, + }, + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ + }; + snprintf(ftp_data_call.text, sizeof(ftp_data_call), "Data"); - {DP_DIR_FORW, TH_ACK, strlen(ftp[5]), NULL}, - {DP_DIR_BACK, TH_ACK, strlen(ftp[6]), NULL}, + struct dpt_tcp_flow_pkt ftp_data_pkts[] = { + { DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL}, - {DP_DIR_FORW, TH_ACK, strlen(ftp[7]), NULL}, - {DP_DIR_BACK, TH_ACK, strlen(ftp[8]), NULL}, + { DPT_BACK, TH_ACK, 100, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, - {DP_DIR_FORW, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_BACK, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, }; - assert(ARRAY_SIZE(ftp) == ARRAY_SIZE(tcp_pkt1)); - struct ftp_ctx ftp_ctx = { - .payload = ftp, - .payload_len = ARRAY_SIZE(ftp), - .do_data_call = true, /* Add data call */ - }; + /* Start of ftp ctrl flow (pkts 0 - 8) */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, ARRAY_SIZE(ftp_ctrl_pkts), + 0, 8, NULL, 0); - /* - * Simulate the TCP call - */ - dp_test_tcp_call(&tcp_call, /* Call context */ - tcp_pkt1, /* Per pkt context */ - ARRAY_SIZE(tcp_pkt1), /* number of pkts */ - &ftp_ctx, 0); + /* ftp data flow */ + dpt_tcp_call(&ftp_data_call, ftp_data_pkts, ARRAY_SIZE(ftp_data_pkts), + 0, 0, NULL, 0); - /* Cleanup */ - dp_test_npf_cleanup(); + /* End of ftp ctrl flow (pkts 9 - end) */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, ARRAY_SIZE(ftp_ctrl_pkts), + 9, 0, NULL, 0); - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T0", "1.1.1.1/24", - vrfid); - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T1", "2.2.2.2/24", - vrfid); + free(ctrl_fw_pre); + free(ctrl_fw_pst); + free(ctrl_bk_pre); + free(ctrl_bk_pst); - dp_test_netlink_del_neigh("dp1T0", "1.1.1.11", - "aa:bb:cc:dd:1:11"); - dp_test_netlink_del_neigh("dp1T0", "1.1.1.12", - "aa:bb:cc:dd:1:12"); - dp_test_netlink_del_neigh("dp1T1", "2.2.2.11", - "aa:bb:cc:dd:2:11"); - dp_test_netlink_del_neigh("dp1T1", "2.2.2.12", - "aa:bb:cc:dd:2:12"); + free(data_fw_pre); + free(data_fw_pst); + free(data_bk_pre); + free(data_bk_pst); - if (vrfid != VRF_DEFAULT_ID) - dp_test_netlink_del_vrf(vrfid, 0); + dp_test_npf_fw_del(&fw, false); -} DP_END_TEST; + dp_test_npf_cleanup(); +} DP_END_TEST; -/*************************************************************************** - * alg_ftp2 - * - * Simulates an ftp call via two TCP calls - one for control channel and one - * for data channel. - * - * dnat is configured. Destination address 2.2.2.12 is translated to - * 2.2.2.11 for client-to-server traffic. - * - * The server includes the address 2.2.2.11 in its 227 Response packet, which - * the ftp alg translates to 2.2.2.12. - * - ***************************************************************************/ /* - * Callback function for TCP call simulator. ftp control channel. + * ftp3 - Passive ftp. Stateful firewall. IPv6. */ -static void tcp_ftp_control_cb2(const char *str, - uint pktno, enum dp_test_tcp_dir dir, - uint8_t flags, - struct dp_test_pkt_desc_t *pre, - struct dp_test_pkt_desc_t *post, - void *data, uint index) +DP_DECL_TEST_CASE(npf_alg_ftp, ftp3, NULL, NULL); +DP_START_TEST_FULL_RUN(ftp3, test) { - struct rte_mbuf *pre_pak, *post_pak; - struct dp_test_expected *test_exp; - struct ftp_ctx *ctx = data; - const char **ftp = ctx->payload; + struct dp_test_pkt_desc_t *ctrl_fw_pre, *ctrl_fw_pst; + struct dp_test_pkt_desc_t *ctrl_bk_pre, *ctrl_bk_pst; + struct dp_test_pkt_desc_t *data_fw_pre, *data_fw_pst; + struct dp_test_pkt_desc_t *data_bk_pre, *data_bk_pst; - pre_pak = dp_test_v4_pkt_from_desc(pre); - post_pak = dp_test_v4_pkt_from_desc(post); + char *dp1T0_mac = dp_test_intf_name2mac_str("dp1T0"); + char *dp2T1_mac = dp_test_intf_name2mac_str("dp2T1"); + + /* Setup interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2002:2:2::2/64"); + + dp_test_netlink_add_neigh("dp1T0", "2001:1:1::2", + "aa:bb:cc:dd:1:a1"); + dp_test_netlink_add_neigh("dp2T1", "2002:2:2::1", + "aa:bb:cc:dd:2:b1"); /* - * Add ftp payload + * Stateful firewall rule to match on TCP pkts to port 21. This + * matches the ctrl flow but not the data flow. The data flow only + * gets through because of the alg child session. */ - if (ftp[pktno]) { - const char *pre_ftp = ftp[pktno]; - const char *post_ftp = ftp[pktno]; - char rnatd[50]; + struct dp_test_npf_rule_t rset[] = { + { + .rule = "10", + .pass = PASS, + .stateful = true, + .npf = "proto-final=6 dst-port=21" + }, + RULE_DEF_BLOCK, + NULL_RULE + }; - /* Reverse dNAT the ftp payload for 227 Response pkt */ - if (!strncmp("227 ", post_ftp, 4)) { - snprintf(rnatd, sizeof(rnatd), - "227 Entering Passive Mode " - "(2,2,2,12,38,91).\r\n"); - post_ftp = rnatd; - } + struct dp_test_npf_ruleset_t fw = { + .rstype = "fw-out", + .name = "OUT_FW", + .enable = 1, + .attach_point = "dp2T1", + .fwd = FWD, + .dir = "out", + .rules = rset + }; - dp_test_tcp_write_payload(pre_pak, strlen(pre_ftp), pre_ftp); - dp_test_tcp_write_payload(post_pak, strlen(post_ftp), post_ftp); - } + dp_test_npf_fw_add(&fw, false); - test_exp = dp_test_exp_from_desc(post_pak, post); - rte_pktmbuf_free(post_pak); - dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_FORWARDED); + /* ftp port appears as 2 numbers in the string */ + uint8_t data_port_upr = 38; + uint8_t data_port_lwr = 91; + uint16_t data_port = (data_port_upr * 256) + data_port_lwr; - spush(test_exp->description, sizeof(test_exp->description), - "%s", str); + /* + * ftp control flow packets + */ + ctrl_fw_pre = dpt_pdesc_v6_create( + "ctrl_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:a1", "2001:1:1::2", 46682, + dp1T0_mac, "2002:2:2::1", 21, + "dp1T0", "dp2T1"); + + ctrl_fw_pst = dpt_pdesc_v6_create( + "ctrl_fw_pst", IPPROTO_TCP, + dp2T1_mac, "2001:1:1::2", 46682, + "aa:bb:cc:dd:2:b1", "2002:2:2::1", 21, + "dp1T0", "dp2T1"); + + ctrl_bk_pre = dpt_pdesc_v6_create( + "ctrl_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:b1", "2002:2:2::1", 21, + dp2T1_mac, "2001:1:1::2", 46682, + "dp2T1", "dp1T0"); + + ctrl_bk_pst = dpt_pdesc_v6_create( + "ctrl_bk_pst", IPPROTO_TCP, + dp1T0_mac, "2002:2:2::1", 21, + "aa:bb:cc:dd:1:a1", "2001:1:1::2", 46682, + "dp2T1", "dp1T0"); - /* Send the packet */ - dp_test_pak_receive(pre_pak, pre->rx_intf, test_exp); + /* + * ftp data flow packets + */ + data_fw_pre = dpt_pdesc_v6_create( + "data_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:a1", "2001:1:1::2", 46682, + dp1T0_mac, "2002:2:2::1", data_port, + "dp1T0", "dp2T1"); + + data_fw_pst = dpt_pdesc_v6_create( + "data_fw_pst", IPPROTO_TCP, + dp2T1_mac, "2001:1:1::2", 46682, + "aa:bb:cc:dd:2:b1", "2002:2:2::1", data_port, + "dp1T0", "dp2T1"); + + data_bk_pre = dpt_pdesc_v6_create( + "data_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:b1", "2002:2:2::1", data_port, + dp2T1_mac, "2001:1:1::2", 46682, + "dp2T1", "dp1T0"); + + data_bk_pst = dpt_pdesc_v6_create( + "data_bk_pst", IPPROTO_TCP, + dp1T0_mac, "2002:2:2::1", data_port, + "aa:bb:cc:dd:1:a1", "2001:1:1::2", 46682, + "dp2T1", "dp1T0"); + + /* + * Packet descriptors for ftp ctrl flow + */ + struct dpt_tcp_flow ftp_ctrl_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = ctrl_fw_pre, + .pst = ctrl_fw_pst, + }, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = ctrl_bk_pre, + .pst = ctrl_bk_pst, + }, + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ + }; + snprintf(ftp_ctrl_call.text, sizeof(ftp_ctrl_call), "Ctrl"); /* - * 227 is a Response from the server that contains the data channel - * address and port. So we can startup the data channel tcp session - * here. + * Per-packet flags and data for ftp ctrl flow */ - if (ctx->do_data_call && ftp[pktno] && !strncmp(ftp[pktno], "227", 3)) - ftp_data_call1(); -} + struct dpt_tcp_flow_pkt ftp_ctrl_pkts[] = { + { DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, -/* - * alg_ftp2 - */ -DP_DECL_TEST_CASE(npf_alg_ftp, alg_ftp2, NULL, NULL); -DP_START_TEST(alg_ftp2, test) -{ - uint vrfid = VRF_DEFAULT_ID; + /* session established */ + { DPT_FORW, TH_ACK, 0, + (char *)"SYST\x0d\x0a", 0, NULL }, + + { DPT_BACK, TH_ACK, 0, + (char *)"215 UNIX Type: L8\x0d\x0a", 0, NULL }, + + { DPT_FORW, TH_ACK, 0, + (char *)"TYPE I\x0d\x0a", 0, NULL }, + + { DPT_BACK, TH_ACK, 0, + (char *)"200 Switching to Binary mode.\x0d\x0a", + 0, NULL }, + + { DPT_FORW, TH_ACK, 0, + (char *)"PASV\x0d\x0a", 0, NULL }, - if (vrfid != VRF_DEFAULT_ID) - dp_test_netlink_add_vrf(vrfid, 1); + /* + * #8. Response: 227. Server telling client which address and + * port to use for data channel. Address is 2.2.2.11, port is + * 9819. (38 == 0x26, 91 == 0x5B, 0x265B == 9819) + */ + { DPT_BACK, TH_ACK, 0, + (char *)"227 Entering Passive Mode (2,2,2,11,38,91).\r\n", + 0, NULL }, + + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + }; + + /* + * Packet descriptors for ftp data flow + */ + struct dpt_tcp_flow ftp_data_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = data_fw_pre, + .pst = data_fw_pst, + }, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = data_bk_pre, + .pst = data_bk_pst, + }, + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ + }; + snprintf(ftp_data_call.text, sizeof(ftp_data_call), "Data"); + + struct dpt_tcp_flow_pkt ftp_data_pkts[] = { + { DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL}, + + { DPT_BACK, TH_ACK, 100, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + }; + + /* Start of ftp ctrl flow (pkts 0 - 8) */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, ARRAY_SIZE(ftp_ctrl_pkts), + 0, 8, NULL, 0); + + /* ftp data flow */ + dpt_tcp_call(&ftp_data_call, ftp_data_pkts, ARRAY_SIZE(ftp_data_pkts), + 0, 0, NULL, 0); + + /* End of ftp ctrl flow (pkts 9 - end) */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, ARRAY_SIZE(ftp_ctrl_pkts), + 9, 0, NULL, 0); + + free(ctrl_fw_pre); + free(ctrl_fw_pst); + free(ctrl_bk_pre); + free(ctrl_bk_pst); + + free(data_fw_pre); + free(data_fw_pst); + free(data_bk_pre); + free(data_bk_pst); + + dp_test_npf_fw_del(&fw, false); + + dp_test_npf_cleanup(); + + dp_test_netlink_del_neigh("dp1T0", "2001:1:1::2", + "aa:bb:cc:dd:1:a1"); + dp_test_netlink_del_neigh("dp2T1", "2002:2:2::1", + "aa:bb:cc:dd:2:b1"); /* Setup interfaces and neighbours */ - dp_test_nl_add_ip_addr_and_connected_vrf("dp1T0", "1.1.1.1/24", - vrfid); - dp_test_nl_add_ip_addr_and_connected_vrf("dp1T1", "2.2.2.2/24", - vrfid); - - dp_test_netlink_add_neigh("dp1T0", "1.1.1.11", - "aa:bb:cc:dd:1:11"); - dp_test_netlink_add_neigh("dp1T0", "1.1.1.12", - "aa:bb:cc:dd:1:12"); - dp_test_netlink_add_neigh("dp1T1", "2.2.2.11", - "aa:bb:cc:dd:2:11"); - dp_test_netlink_add_neigh("dp1T1", "2.2.2.12", - "aa:bb:cc:dd:2:12"); + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2002:2:2::2/64"); + +} DP_END_TEST; + + +/* + * ftp4 - SNAT, Passive ftp. + * + * The packet containing the data address and port is from the server, so does + * not need translated. Initial data pkt is *from* the client. + */ +DP_DECL_TEST_CASE(npf_alg_ftp, ftp4, dpt_alg_ftp_setup, dpt_alg_ftp_teardown); +DP_START_TEST(ftp4, test) +{ + struct dp_test_pkt_desc_t *ctrl_fw_pre, *ctrl_fw_pst; + struct dp_test_pkt_desc_t *ctrl_bk_pre, *ctrl_bk_pst; + struct dp_test_pkt_desc_t *data_fw_pre, *data_fw_pst; + struct dp_test_pkt_desc_t *data_bk_pre, *data_bk_pst; /* - * Add DNAT rule. + * Add SNAT rule. */ - struct dp_test_npf_nat_rule_t dnat = { - .desc = "dnat rule", + struct dp_test_npf_nat_rule_t snat = { + .desc = "snat rule", .rule = "10", - .ifname = "dp1T0", + .ifname = "dp2T1", .proto = IPPROTO_TCP, .map = "dynamic", - .from_addr = NULL, + .port_alloc = NULL, + .from_addr = "1.1.1.11", .from_port = NULL, - .to_addr = "2.2.2.12", + .to_addr = NULL, .to_port = NULL, - .trans_addr = "2.2.2.11", + .trans_addr = "2.2.2.20", .trans_port = NULL }; - dp_test_npf_dnat_add(&dnat, true); - - ftp1_fwd_in.l4.tcp.sport = 46682; - ftp1_fwd_in.l4.tcp.dport = 21; + dp_test_npf_snat_add(&snat, true); - ftp1_fwd_out.l4.tcp.sport = 46682; - ftp1_fwd_out.l4.tcp.dport = 21; - ftp1_rev_in.l4.tcp.sport = 21; - ftp1_rev_in.l4.tcp.dport = 46682; + /* ftp port appears as 2 numbers in the string */ + uint16_t data_port_upr; + uint16_t data_port_lwr; + uint16_t data_port; - ftp1_rev_out.l4.tcp.sport = 21; - ftp1_rev_out.l4.tcp.dport = 46682; + /* Data port is 2559 */ + data_port_upr = 9; + data_port_lwr = 255; + data_port = (data_port_upr * 256) + data_port_lwr; - ftp1_fwd_in.l3_dst = "2.2.2.12"; - ftp1_fwd_out.l3_dst = "2.2.2.11"; + /* + * ftp control flow packets + */ + ctrl_fw_pre = dpt_pdesc_v4_create( + "ctrl_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "dp1T0", "dp2T1"); + + ctrl_fw_pst = dpt_pdesc_v4_create( + "ctrl_fw_pst", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "2.2.2.20", 46682, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "dp1T0", "dp2T1"); + + ctrl_bk_pre = dpt_pdesc_v4_create( + "ctrl_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "aa:bb:cc:dd:1:11", "2.2.2.20", 46682, + "dp2T1", "dp1T0"); + + ctrl_bk_pst = dpt_pdesc_v4_create( + "ctrl_bk_pst", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "dp2T1", "dp1T0"); - ftp1_rev_in.l3_src = "2.2.2.11"; - ftp1_rev_out.l3_src = "2.2.2.12"; + /* + * ftp data flow packets + */ + data_fw_pre = dpt_pdesc_v4_create( + "data_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 49888, + "aa:bb:cc:dd:2:11", "2.2.2.11", data_port, + "dp1T0", "dp2T1"); + + data_fw_pst = dpt_pdesc_v4_create( + "data_fw_pst", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "2.2.2.20", 49888, + "aa:bb:cc:dd:2:11", "2.2.2.11", data_port, + "dp1T0", "dp2T1"); + + data_bk_pre = dpt_pdesc_v4_create( + "data_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", data_port, + "aa:bb:cc:dd:1:11", "2.2.2.20", 49888, + "dp2T1", "dp1T0"); + + data_bk_pst = dpt_pdesc_v4_create( + "data_bk_pst", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", data_port, + "aa:bb:cc:dd:1:11", "1.1.1.11", 49888, + "dp2T1", "dp1T0"); - struct dp_test_tcp_call tcp_call = { - .str[0] = '\0', - .isn = {0, 0}, - .desc[DP_DIR_FORW] = { - .pre = &ftp1_fwd_in, - .post = &ftp1_fwd_out, + /* + * Packet descriptors for ftp ctrl flow + */ + struct dpt_tcp_flow ftp_ctrl_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = ctrl_fw_pre, + .pst = ctrl_fw_pst, }, - .desc[DP_DIR_BACK] = { - .pre = &ftp1_rev_in, - .post = &ftp1_rev_out, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = ctrl_bk_pre, + .pst = ctrl_bk_pst, }, - .test_cb = tcp_ftp_control_cb2, - .post_cb = NULL, + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ }; + snprintf(ftp_ctrl_call.text, sizeof(ftp_ctrl_call), + "Ctrl"); - const char *ftp[] = { - [0] = NULL, - [1] = NULL, - [2] = NULL, + /* + * Per-packet flags and data for ftp ctrl flow + */ + struct dpt_tcp_flow_pkt ftp_ctrl_pkts[] = { + { DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, - [3] = "SYST\x0d\x0a", - [4] = "215 UNIX Type: L8\x0d\x0a", + /* session established */ + { DPT_FORW, TH_ACK, + 0, (char *)"SYST\x0d\x0a", 0, NULL }, - [5] = "TYPE I\x0d\x0a", - [6] = "200 Switching to Binary mode.\x0d\x0a", + { DPT_BACK, TH_ACK, + 0, (char *)"215 UNIX Type: L8\x0d\x0a", 0, NULL }, - [7] = "PASV\x0d\x0a", + { DPT_FORW, TH_ACK, + 0, (char *)"TYPE I\x0d\x0a", 0, NULL }, - /* - * Response: 227. Server telling client which address and - * port to use for data channel. Address is 2.2.2.11, port is - * 9819. (38 == 0x26, 91 == 0x5B, 0x265B == 9819) - */ - [8] = "227 Entering Passive Mode (2,2,2,11,38,91).\x0d\x0a", + { DPT_BACK, TH_ACK, + 0, + (char *)"200 Switching to Binary mode.\x0d\x0a", + 0, NULL }, + + { DPT_FORW, TH_ACK, + 0, (char *)"PASV\x0d\x0a", 0, NULL }, /* - * Here we get a new TCP call opened for the data channel, - * from 1.1.1.11:46682 to 2.2.2.11:9819 + * #8. Response: 227. Server telling client which + * address and port to use for data channel. Address + * is 2.2.2.11, port is 9819 */ + { DPT_BACK, TH_ACK, 0, + (char *)"227 Entering Passive Mode (2,2,2,11,9,255).\r\n", + 0, NULL }, - [9] = NULL, - [10] = NULL, - [11] = NULL, + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, }; + /* + * Packet descriptors for ftp data flow + */ + struct dpt_tcp_flow ftp_data_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = data_fw_pre, + .pst = data_fw_pst, + }, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = data_bk_pre, + .pst = data_bk_pst, + }, + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ + }; - struct dp_test_tcp_flow_pkt tcp_pkt1[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - - /* session established */ - {DP_DIR_FORW, TH_ACK, strlen(ftp[3]), NULL}, - {DP_DIR_BACK, TH_ACK, strlen(ftp[4]), NULL}, - - {DP_DIR_FORW, TH_ACK, strlen(ftp[5]), NULL}, - {DP_DIR_BACK, TH_ACK, strlen(ftp[6]), NULL}, + snprintf(ftp_data_call.text, sizeof(ftp_data_call), + "Data, port %u", data_port); - {DP_DIR_FORW, TH_ACK, strlen(ftp[7]), NULL}, - {DP_DIR_BACK, TH_ACK, strlen(ftp[8]), NULL}, + struct dpt_tcp_flow_pkt ftp_data_pkts[] = { + { DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, - {DP_DIR_FORW, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_BACK, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - }; - assert(ARRAY_SIZE(ftp) == ARRAY_SIZE(tcp_pkt1)); + { DPT_BACK, TH_ACK, 100, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, - struct ftp_ctx ftp_ctx = { - .payload = ftp, - .payload_len = ARRAY_SIZE(ftp), - .do_data_call = true, /* Add data call */ + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, }; - /* Simulate the TCP call */ - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), - &ftp_ctx, 0); + /* Start of ftp ctrl flow (pkts 0 - 8) */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, + ARRAY_SIZE(ftp_ctrl_pkts), + 0, 8, NULL, 0); - /* Cleanup */ + /* ftp data flow */ + dpt_tcp_call(&ftp_data_call, ftp_data_pkts, + ARRAY_SIZE(ftp_data_pkts), + 0, 0, NULL, 0); - dp_test_npf_dnat_del(dnat.ifname, dnat.rule, true); + /* End of ftp ctrl flow (pkts 9 - end) */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, + ARRAY_SIZE(ftp_ctrl_pkts), + 9, 0, NULL, 0); - dp_test_npf_cleanup(); + free(ctrl_fw_pre); + free(ctrl_fw_pst); + free(ctrl_bk_pre); + free(ctrl_bk_pst); - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T0", "1.1.1.1/24", - vrfid); - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T1", "2.2.2.2/24", - vrfid); + free(data_fw_pre); + free(data_fw_pst); + free(data_bk_pre); + free(data_bk_pst); - dp_test_netlink_del_neigh("dp1T0", "1.1.1.11", - "aa:bb:cc:dd:1:11"); - dp_test_netlink_del_neigh("dp1T0", "1.1.1.12", - "aa:bb:cc:dd:1:12"); - dp_test_netlink_del_neigh("dp1T1", "2.2.2.11", - "aa:bb:cc:dd:2:11"); - dp_test_netlink_del_neigh("dp1T1", "2.2.2.12", - "aa:bb:cc:dd:2:12"); + dp_test_npf_snat_del(snat.ifname, snat.rule, true); - if (vrfid != VRF_DEFAULT_ID) - dp_test_netlink_del_vrf(vrfid, 0); + dp_test_npf_cleanup(); } DP_END_TEST; -/*************************************************************************** - * alg_ftp3 - * - * Simulates an ftp call via two TCP calls - one for control channel and one - * for data channel. - * - * dnat is configured. Destination address 2.2.2.12 is translated to - * 2.2.2.11 for client-to-server traffic. - * - * The server includes the address 2.2.2.11 in its 227 Response packet, which - * the ftp alg translates to 2.2.2.12. - * - * Input and output interface are in a non-default VRF. - * - ***************************************************************************/ - /* - * alg_ftp3 + * ftp5 - DNAT, Passive ftp. + * + * With Passive FTP, the control and data flows both start in the same + * direction (i.e. forwards). */ -DP_DECL_TEST_CASE(npf_alg_ftp, alg_ftp3, NULL, NULL); -DP_START_TEST(alg_ftp3, test) +DP_DECL_TEST_CASE(npf_alg_ftp, ftp5, dpt_alg_ftp_setup, dpt_alg_ftp_teardown); +DP_START_TEST(ftp5, test) { - uint vrfid = 69; - - if (vrfid != VRF_DEFAULT_ID) - dp_test_netlink_add_vrf(vrfid, 1); + struct dp_test_pkt_desc_t *ctrl_fw_pre, *ctrl_fw_pst; + struct dp_test_pkt_desc_t *ctrl_bk_pre, *ctrl_bk_pst; + struct dp_test_pkt_desc_t *data_fw_pre, *data_fw_pst; + struct dp_test_pkt_desc_t *data_bk_pre, *data_bk_pst; - /* Setup interfaces and neighbours */ - dp_test_nl_add_ip_addr_and_connected_vrf("dp1T0", "1.1.1.1/24", - vrfid); - dp_test_nl_add_ip_addr_and_connected_vrf("dp1T1", "2.2.2.2/24", - vrfid); - - dp_test_netlink_add_neigh("dp1T0", "1.1.1.11", - "aa:bb:cc:dd:1:11"); - dp_test_netlink_add_neigh("dp1T0", "1.1.1.12", - "aa:bb:cc:dd:1:12"); - dp_test_netlink_add_neigh("dp1T1", "2.2.2.11", - "aa:bb:cc:dd:2:11"); - dp_test_netlink_add_neigh("dp1T1", "2.2.2.12", - "aa:bb:cc:dd:2:12"); + /* ftp port appears as 2 numbers in the string */ + uint8_t data_port_upr = 38; + uint8_t data_port_lwr = 91; + uint16_t data_port = (data_port_upr * 256) + data_port_lwr; /* - * Add DNAT rule. + * Add SNAT rule. */ struct dp_test_npf_nat_rule_t dnat = { .desc = "dnat rule", @@ -733,9 +964,10 @@ DP_START_TEST(alg_ftp3, test) .ifname = "dp1T0", .proto = IPPROTO_TCP, .map = "dynamic", + .port_alloc = NULL, .from_addr = NULL, .from_port = NULL, - .to_addr = "2.2.2.12", + .to_addr = "2.2.2.20", .to_port = NULL, .trans_addr = "2.2.2.11", .trans_port = NULL @@ -743,346 +975,420 @@ DP_START_TEST(alg_ftp3, test) dp_test_npf_dnat_add(&dnat, true); - ftp1_fwd_in.l4.tcp.sport = 46682; - ftp1_fwd_in.l4.tcp.dport = 21; - - ftp1_fwd_out.l4.tcp.sport = 46682; - ftp1_fwd_out.l4.tcp.dport = 21; - - ftp1_rev_in.l4.tcp.sport = 21; - ftp1_rev_in.l4.tcp.dport = 46682; - - ftp1_rev_out.l4.tcp.sport = 21; - ftp1_rev_out.l4.tcp.dport = 46682; - - ftp1_fwd_in.l3_dst = "2.2.2.12"; - ftp1_fwd_out.l3_dst = "2.2.2.11"; - - ftp1_rev_in.l3_src = "2.2.2.11"; - ftp1_rev_out.l3_src = "2.2.2.12"; + /* + * ftp control flow packets + */ + ctrl_fw_pre = dpt_pdesc_v4_create( + "ctrl_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "aa:bb:cc:dd:2:11", "2.2.2.20", 21, + "dp1T0", "dp2T1"); + + ctrl_fw_pst = dpt_pdesc_v4_create( + "ctrl_fw_pst", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "dp1T0", "dp2T1"); + + ctrl_bk_pre = dpt_pdesc_v4_create( + "ctrl_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "dp2T1", "dp1T0"); + + ctrl_bk_pst = dpt_pdesc_v4_create( + "ctrl_bk_pst", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.20", 21, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "dp2T1", "dp1T0"); - struct dp_test_tcp_call tcp_call = { - .str[0] = '\0', - .isn = {0, 0}, - .desc[DP_DIR_FORW] = { - .pre = &ftp1_fwd_in, - .post = &ftp1_fwd_out, + /* + * ftp data flow packets + */ + data_fw_pre = dpt_pdesc_v4_create( + "data_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 49888, + "aa:bb:cc:dd:2:11", "2.2.2.20", data_port, + "dp1T0", "dp2T1"); + + data_fw_pst = dpt_pdesc_v4_create( + "data_fw_pst", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 49888, + "aa:bb:cc:dd:2:11", "2.2.2.11", data_port, + "dp1T0", "dp2T1"); + + data_bk_pre = dpt_pdesc_v4_create( + "data_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", data_port, + "aa:bb:cc:dd:1:11", "1.1.1.11", 49888, + "dp2T1", "dp1T0"); + + data_bk_pst = dpt_pdesc_v4_create( + "data_bk_pst", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.20", data_port, + "aa:bb:cc:dd:1:11", "1.1.1.11", 49888, + "dp2T1", "dp1T0"); + + /* + * Packet descriptors for ftp ctrl flow + */ + struct dpt_tcp_flow ftp_ctrl_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = ctrl_fw_pre, + .pst = ctrl_fw_pst, }, - .desc[DP_DIR_BACK] = { - .pre = &ftp1_rev_in, - .post = &ftp1_rev_out, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = ctrl_bk_pre, + .pst = ctrl_bk_pst, }, - .test_cb = tcp_ftp_control_cb2, - .post_cb = NULL, + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ }; + snprintf(ftp_ctrl_call.text, sizeof(ftp_ctrl_call), "Ctrl"); + + /* + * Per-packet flags and data for ftp ctrl flow + */ + struct dpt_tcp_flow_pkt ftp_ctrl_pkts[] = { + { DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, - const char *ftp[] = { - [0] = NULL, - [1] = NULL, - [2] = NULL, + /* session established */ + { DPT_FORW, TH_ACK, + 0, (char *)"SYST\x0d\x0a", 0, NULL }, - [3] = "SYST\x0d\x0a", - [4] = "215 UNIX Type: L8\x0d\x0a", + { DPT_BACK, TH_ACK, + 0, (char *)"215 UNIX Type: L8\x0d\x0a", 0, NULL }, - [5] = "TYPE I\x0d\x0a", - [6] = "200 Switching to Binary mode.\x0d\x0a", + { DPT_FORW, TH_ACK, + 0, (char *)"TYPE I\x0d\x0a", 0, NULL }, - [7] = "PASV\x0d\x0a", + { DPT_BACK, TH_ACK, + 0, (char *)"200 Switching to Binary mode.\x0d\x0a", 0, NULL }, - /* - * Response: 227. Server telling client which address and - * port to use for data channel. Address is 2.2.2.11, port is - * 9819. (38 == 0x26, 91 == 0x5B, 0x265B == 9819) - */ - [8] = "227 Entering Passive Mode (2,2,2,11,38,91).\x0d\x0a", + { DPT_FORW, TH_ACK, + 0, (char *)"PASV\x0d\x0a", 0, NULL }, /* - * Here we get a new TCP call opened for the data channel, - * from 1.1.1.11:46682 to 2.2.2.11:9819 + * #8. Response: 227. Server telling client which address and + * port to use for data channel. Address is 2.2.2.11, which + * is reverse-dnatd to 2.2.2.20. Port is 9819. (38 == 0x26, + * 91 == 0x5B, 0x265B == 9819). + * + * The alg creates a tuple: "TCP 1.1.1.1:any -> 2.2.2.20:9819" + * in order to detect the data flow which start in the reverse + * direction. */ - }; + { DPT_BACK, TH_ACK, 0, + (char *)"227 Entering Passive Mode (2,2,2,11,38,91).\r\n", + 0, + (char *)"227 Entering Passive Mode (2,2,2,20,38,91).\r\n" }, - struct dp_test_tcp_flow_pkt tcp_pkt1[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, + /* Data call is done at this point */ - /* session established */ - {DP_DIR_FORW, TH_ACK, strlen(ftp[3]), NULL}, - {DP_DIR_BACK, TH_ACK, strlen(ftp[4]), NULL}, + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + }; - {DP_DIR_FORW, TH_ACK, strlen(ftp[5]), NULL}, - {DP_DIR_BACK, TH_ACK, strlen(ftp[6]), NULL}, + /* + * Packet descriptors for ftp data flow + */ + struct dpt_tcp_flow ftp_data_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = data_fw_pre, + .pst = data_fw_pst, + }, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = data_bk_pre, + .pst = data_bk_pst, + }, + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ + }; + snprintf(ftp_data_call.text, sizeof(ftp_data_call), "Data"); - {DP_DIR_FORW, TH_ACK, strlen(ftp[7]), NULL}, - {DP_DIR_BACK, TH_ACK, strlen(ftp[8]), NULL}, + struct dpt_tcp_flow_pkt ftp_data_pkts[] = { + { DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, - /* call not completed */ - }; - assert(ARRAY_SIZE(ftp) == ARRAY_SIZE(tcp_pkt1)); + { DPT_BACK, TH_ACK, 100, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, - struct ftp_ctx ftp_ctx = { - .payload = ftp, - .payload_len = ARRAY_SIZE(ftp), - .do_data_call = true, /* Add data call */ + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, }; - /* Simulate the TCP call */ - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), - &ftp_ctx, 0); - - /* - * ftp control and data channels established - */ -#if 0 - dp_test_npf_print_session_table(false); -#endif + /* Start of ftp ctrl flow (pkts 0 - 8) */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, ARRAY_SIZE(ftp_ctrl_pkts), + 0, 8, NULL, 0); - /* - * Set true to delete vrf before sessions are expired. - */ - bool delete_vrf = true; + /* ftp data flow */ + dpt_tcp_call(&ftp_data_call, ftp_data_pkts, ARRAY_SIZE(ftp_data_pkts), + 0, 0, NULL, 0); - if (delete_vrf && vrfid != VRF_DEFAULT_ID) { - /* - * Delete vrf while there are ALG sessions - */ - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T0", "1.1.1.1/24", - vrfid); - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T1", "2.2.2.2/24", - vrfid); - dp_test_netlink_set_interface_vrf("dp1T0", VRF_DEFAULT_ID); - dp_test_netlink_set_interface_vrf("dp1T1", VRF_DEFAULT_ID); - dp_test_netlink_del_vrf(vrfid, 0); - dp_test_npf_clear_sessions(); - } - - /* Cleanup */ + /* End of ftp ctrl flow (pkts 9 - end) */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, ARRAY_SIZE(ftp_ctrl_pkts), + 9, 0, NULL, 0); dp_test_npf_dnat_del(dnat.ifname, dnat.rule, true); - dp_test_netlink_del_neigh("dp1T0", "1.1.1.11", - "aa:bb:cc:dd:1:11"); - dp_test_netlink_del_neigh("dp1T0", "1.1.1.12", - "aa:bb:cc:dd:1:12"); - dp_test_netlink_del_neigh("dp1T1", "2.2.2.11", - "aa:bb:cc:dd:2:11"); - dp_test_netlink_del_neigh("dp1T1", "2.2.2.12", - "aa:bb:cc:dd:2:12"); - - dp_test_npf_cleanup(); + free(ctrl_fw_pre); + free(ctrl_fw_pst); + free(ctrl_bk_pre); + free(ctrl_bk_pst); - if (!delete_vrf || vrfid == VRF_DEFAULT_ID) { - /* - * Normal test cleanup. - */ - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T0", "1.1.1.1/24", - vrfid); - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T1", "2.2.2.2/24", - vrfid); + free(data_fw_pre); + free(data_fw_pst); + free(data_bk_pre); + free(data_bk_pst); - if (vrfid != VRF_DEFAULT_ID) - dp_test_netlink_del_vrf(vrfid, 0); - } + dp_test_npf_cleanup(); } DP_END_TEST; -/****************************************************************** - * - * alg_ftp4 - * - * This tests the ftp ALG and NAT where the ftp payload *increases* in size - * due to NATed embedded address strings. - * - * alg_ftp5 - * - * This tests the ftp ALG and NAT where the ftp payload *decreases* in size - * due to NATed embedded address strings. - * - *****************************************************************/ - /* - * The core of each test is an ftp call that repeats this sequence 'n' times: + * ftp6 - DNAT, Passive ftp. No parenthesis in 227 message. * - * Fwd: PORT addr, port - * Back: 200 ... - * Fwd: LIST ... - * Back: 150 ... - * Back: 226 ... - * Fwd: ack - * - * The 'n' repeat count is specified by these defines. The port number is - * incremented each repeat. + * With Passive FTP, the control and data flows both start in the same + * direction (i.e. forwards). */ -#define NPF_ALG_FTP5_REPEATS 10 -#define NPF_ALG_FTP6_REPEATS 10 - -static void -npf_alg_ftp_rx(struct dp_test_pkt_desc_t *pre, - const char *pre_pload, uint pre_plen, - struct dp_test_pkt_desc_t *post, - const char *post_pload, uint post_plen, - bool fwd) +DP_DECL_TEST_CASE(npf_alg_ftp, ftp6, dpt_alg_ftp_setup, dpt_alg_ftp_teardown); +DP_START_TEST_FULL_RUN(ftp6, test) { - struct rte_mbuf *pre_pak, *post_pak; - struct dp_test_expected *test_exp; - - pre->len = pre_plen; - post->len = post_plen; + struct dp_test_pkt_desc_t *ctrl_fw_pre, *ctrl_fw_pst; + struct dp_test_pkt_desc_t *ctrl_bk_pre, *ctrl_bk_pst; + struct dp_test_pkt_desc_t *data_fw_pre, *data_fw_pst; + struct dp_test_pkt_desc_t *data_bk_pre, *data_bk_pst; - pre_pak = dp_test_v4_pkt_from_desc(pre); - post_pak = dp_test_v4_pkt_from_desc(post); + /* ftp port appears as 2 numbers in the string */ + uint8_t data_port_upr = 38; + uint8_t data_port_lwr = 91; + uint16_t data_port = (data_port_upr * 256) + data_port_lwr; - if (pre_pload) - dp_test_tcp_write_payload(pre_pak, pre_plen, pre_pload); - - if (post_pload) - dp_test_tcp_write_payload(post_pak, post_plen, post_pload); + /* + * Add SNAT rule. + */ + struct dp_test_npf_nat_rule_t dnat = { + .desc = "dnat rule", + .rule = "10", + .ifname = "dp1T0", + .proto = IPPROTO_TCP, + .map = "dynamic", + .port_alloc = NULL, + .from_addr = NULL, + .from_port = NULL, + .to_addr = "2.2.2.20", + .to_port = NULL, + .trans_addr = "2.2.2.11", + .trans_port = NULL + }; - test_exp = dp_test_exp_from_desc(post_pak, post); - rte_pktmbuf_free(post_pak); - dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_FORWARDED); + dp_test_npf_dnat_add(&dnat, true); - /* Run the test */ - dp_test_pak_receive(pre_pak, pre->rx_intf, test_exp); -} + /* + * ftp control flow packets + */ + ctrl_fw_pre = dpt_pdesc_v4_create( + "ctrl_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "aa:bb:cc:dd:2:11", "2.2.2.20", 21, + "dp1T0", "dp2T1"); + + ctrl_fw_pst = dpt_pdesc_v4_create( + "ctrl_fw_pst", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "dp1T0", "dp2T1"); + + ctrl_bk_pre = dpt_pdesc_v4_create( + "ctrl_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "dp2T1", "dp1T0"); + + ctrl_bk_pst = dpt_pdesc_v4_create( + "ctrl_bk_pst", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.20", 21, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "dp2T1", "dp1T0"); + /* + * ftp data flow packets + */ + data_fw_pre = dpt_pdesc_v4_create( + "data_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 49888, + "aa:bb:cc:dd:2:11", "2.2.2.20", data_port, + "dp1T0", "dp2T1"); + + data_fw_pst = dpt_pdesc_v4_create( + "data_fw_pst", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 49888, + "aa:bb:cc:dd:2:11", "2.2.2.11", data_port, + "dp1T0", "dp2T1"); + + data_bk_pre = dpt_pdesc_v4_create( + "data_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", data_port, + "aa:bb:cc:dd:1:11", "1.1.1.11", 49888, + "dp2T1", "dp1T0"); + + data_bk_pst = dpt_pdesc_v4_create( + "data_bk_pst", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.20", data_port, + "aa:bb:cc:dd:1:11", "1.1.1.11", 49888, + "dp2T1", "dp1T0"); + + /* + * Packet descriptors for ftp ctrl flow + */ + struct dpt_tcp_flow ftp_ctrl_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = ctrl_fw_pre, + .pst = ctrl_fw_pst, + }, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = ctrl_bk_pre, + .pst = ctrl_bk_pst, + }, + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ + }; + snprintf(ftp_ctrl_call.text, sizeof(ftp_ctrl_call), "Ctrl"); -struct test_tcp { - struct dp_test_pkt_desc_t *e2w_pre; - struct dp_test_pkt_desc_t *e2w_post; - struct dp_test_pkt_desc_t *w2e_pre; - struct dp_test_pkt_desc_t *w2e_post; - int east_seq; - int east_ack; - int west_seq; - int west_ack; - uint8_t *tcp_opts; /* - * seq/ack cumulative diff for east to west packets. If > 0 then - * packets on west are larger than pkts on east. + * Per-packet flags and data for ftp ctrl flow */ - int e2w_diff; - int w2e_diff; -}; + struct dpt_tcp_flow_pkt ftp_ctrl_pkts[] = { + { DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, -/* - * TCP packet, East to West - */ -static void -tcp_pak_rx_e2w(const char *desc, struct test_tcp *tcp, uint16_t tcp_flags, - const char *pre_pload, int pre_plen, - const char *post_pload, int post_plen) -{ - struct dp_test_pkt_desc_t *pre = tcp->e2w_pre; - struct dp_test_pkt_desc_t *post = tcp->e2w_post; - - pre->l4.tcp.flags = tcp_flags; - post->l4.tcp.flags = tcp_flags; - - /* Options are only added to the SYN or SYN|ACK */ - if (tcp_flags & TH_SYN) { - pre->l4.tcp.opts = tcp->tcp_opts; - post->l4.tcp.opts = tcp->tcp_opts; - } else { - pre->l4.tcp.opts = NULL; - post->l4.tcp.opts = NULL; - } - pre->l4.tcp.seq = tcp->east_seq; - pre->l4.tcp.ack = tcp->east_ack; - - post->l4.tcp.seq = tcp->east_seq + tcp->e2w_diff; - post->l4.tcp.ack = tcp->east_ack - tcp->w2e_diff; - - if (!pre_pload || !post_pload) - tcp->east_seq += 1; - else - tcp->east_seq += pre_plen; - - npf_alg_ftp_rx(pre, pre_pload, (uint)pre_plen, - post, post_pload, (uint)post_plen, true); - - if (!pre_pload || !post_pload) - tcp->west_ack += 1; - else - tcp->west_ack += post_plen; - - if (pre_plen != post_plen) - tcp->e2w_diff += post_plen - pre_plen; + /* session established */ + { DPT_FORW, TH_ACK, + 0, (char *)"SYST\x0d\x0a", 0, NULL }, -} + { DPT_BACK, TH_ACK, + 0, (char *)"215 UNIX Type: L8\x0d\x0a", 0, NULL }, -/* - * TCP packet, West to East - */ -static void -tcp_pak_rx_w2e(const char *desc, struct test_tcp *tcp, uint16_t tcp_flags, - const char *pre_pload, int pre_plen, - const char *post_pload, int post_plen) -{ - struct dp_test_pkt_desc_t *pre = tcp->w2e_pre; - struct dp_test_pkt_desc_t *post = tcp->w2e_post; + { DPT_FORW, TH_ACK, + 0, (char *)"TYPE I\x0d\x0a", 0, NULL }, - pre->l4.tcp.flags = tcp_flags; - post->l4.tcp.flags = tcp_flags; + { DPT_BACK, TH_ACK, + 0, (char *)"200 Switching to Binary mode.\x0d\x0a", 0, NULL }, - if (tcp_flags & TH_SYN) { - pre->l4.tcp.opts = tcp->tcp_opts; - post->l4.tcp.opts = tcp->tcp_opts; - } else { - pre->l4.tcp.opts = NULL; - post->l4.tcp.opts = NULL; - } - pre->l4.tcp.seq = tcp->west_seq; - pre->l4.tcp.ack = tcp->west_ack; + { DPT_FORW, TH_ACK, + 0, (char *)"PASV\x0d\x0a", 0, NULL }, - post->l4.tcp.seq = tcp->west_seq + tcp->w2e_diff; - post->l4.tcp.ack = tcp->west_ack - tcp->e2w_diff; + /* + * #8. Response: 227. Server telling client which address and + * port to use for data channel. Address is 2.2.2.11, which + * is reverse-dnatd to 2.2.2.20. Port is 9819. (38 == 0x26, + * 91 == 0x5B, 0x265B == 9819). + * + * The alg creates a tuple: "TCP 1.1.1.1:any -> 2.2.2.20:9819" + * in order to detect the data flow which start in the reverse + * direction. + */ + { DPT_BACK, TH_ACK, 0, + (char *)"227 Entering Passive Mode 2,2,2,11,38,91\r\n", + 0, + (char *)"227 Entering Passive Mode 2,2,2,20,38,91\r\n" }, - if (!pre_pload || !post_pload) - tcp->west_seq += 1; - else - tcp->west_seq += pre_plen; + /* Data call is done at this point */ - npf_alg_ftp_rx(pre, pre_pload, (uint)pre_plen, - post, post_pload, (uint)post_plen, false); + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + }; - if (!pre_pload || !post_pload) - tcp->east_ack += 1; - else - tcp->east_ack += post_plen; + /* + * Packet descriptors for ftp data flow + */ + struct dpt_tcp_flow ftp_data_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = data_fw_pre, + .pst = data_fw_pst, + }, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = data_bk_pre, + .pst = data_bk_pst, + }, + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ + }; + snprintf(ftp_data_call.text, sizeof(ftp_data_call), "Data"); - if (pre_plen != post_plen) - tcp->w2e_diff += post_plen - pre_plen; + struct dpt_tcp_flow_pkt ftp_data_pkts[] = { + { DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, -} + { DPT_BACK, TH_ACK, 100, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, -/* - * NATing from smaller prefix 10.25.1.0/24 to larger address 159.8.106.21 - */ -DP_DECL_TEST_CASE(npf_alg_ftp, alg_ftp4, NULL, NULL); -DP_START_TEST(alg_ftp4, test) -{ - uint vrfid = VRF_DEFAULT_ID; + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + }; - if (vrfid != VRF_DEFAULT_ID) - dp_test_netlink_add_vrf(vrfid, 1); + /* Start of ftp ctrl flow (pkts 0 - 8) */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, ARRAY_SIZE(ftp_ctrl_pkts), + 0, 8, NULL, 0); - /* Setup interfaces and neighbours */ - dp_test_nl_add_ip_addr_and_connected_vrf("dp1T0", "10.25.1.1/24", - vrfid); + /* ftp data flow */ + dpt_tcp_call(&ftp_data_call, ftp_data_pkts, ARRAY_SIZE(ftp_data_pkts), + 0, 0, NULL, 0); - /* prefix 159.8.106.16/28 */ - dp_test_nl_add_ip_addr_and_connected_vrf("dp1T1", "159.8.106.21/28", - vrfid); + /* End of ftp ctrl flow (pkts 9 - end) */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, ARRAY_SIZE(ftp_ctrl_pkts), + 9, 0, NULL, 0); - dp_test_netlink_add_neigh("dp1T0", "10.25.1.20", "0:50:56:ac:ab:30"); + dp_test_npf_dnat_del(dnat.ifname, dnat.rule, true); + + free(ctrl_fw_pre); + free(ctrl_fw_pst); + free(ctrl_bk_pre); + free(ctrl_bk_pst); - dp_test_netlink_add_neigh("dp1T1", "159.8.106.17", - "aa:bb:cc:dd:2:11"); + free(data_fw_pre); + free(data_fw_pst); + free(data_bk_pre); + free(data_bk_pst); - dp_test_netlink_add_route("134.158.69.0/24 nh 159.8.106.17 int:dp1T1"); + dp_test_npf_cleanup(); + +} DP_END_TEST; + + +/* + * ftp7 - SNAT, Active ftp. + */ +DP_DECL_TEST_CASE(npf_alg_ftp, ftp7, dpt_alg_ftp_setup, dpt_alg_ftp_teardown); +DP_START_TEST_FULL_RUN(ftp7, test) +{ + struct dp_test_pkt_desc_t *ctrl_fw_pre, *ctrl_fw_pst; + struct dp_test_pkt_desc_t *ctrl_bk_pre, *ctrl_bk_pst; + struct dp_test_pkt_desc_t *data_fw_pre, *data_fw_pst; + struct dp_test_pkt_desc_t *data_bk_pre, *data_bk_pst; + + /* ftp port appears as 2 numbers in the string */ + uint8_t data_port_upr = 38; + uint8_t data_port_lwr = 91; + uint16_t data_port = (data_port_upr * 256) + data_port_lwr; /* * Add SNAT rule. @@ -1090,302 +1396,196 @@ DP_START_TEST(alg_ftp4, test) struct dp_test_npf_nat_rule_t snat = { .desc = "snat rule", .rule = "10", - .ifname = "dp1T1", + .ifname = "dp2T1", .proto = IPPROTO_TCP, .map = "dynamic", - .from_addr = "10.25.1.0/24", + .port_alloc = NULL, + .from_addr = "1.1.1.11", .from_port = NULL, .to_addr = NULL, .to_port = NULL, - .trans_addr = "masquerade", + .trans_addr = "2.2.2.20", .trans_port = NULL }; dp_test_npf_snat_add(&snat, true); /* - * TCP packet + * ftp control flow packets + */ + ctrl_fw_pre = dpt_pdesc_v4_create( + "ctrl_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "dp1T0", "dp2T1"); + + ctrl_fw_pst = dpt_pdesc_v4_create( + "ctrl_fw_pst", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "2.2.2.20", 46682, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "dp1T0", "dp2T1"); + + ctrl_bk_pre = dpt_pdesc_v4_create( + "ctrl_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "aa:bb:cc:dd:1:11", "2.2.2.20", 46682, + "dp2T1", "dp1T0"); + + ctrl_bk_pst = dpt_pdesc_v4_create( + "ctrl_bk_pst", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "dp2T1", "dp1T0"); + + /* + * ftp data flow packets */ - struct dp_test_pkt_desc_t fwd_in = { - .text = "TCP Forwards In", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "10.25.1.20", - .l2_src = "00:50:56:ac:ab:30", - .l3_dst = "134.158.69.171", - .l2_dst = "00:00:a4:00:00:64", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 58047, - .dport = 21, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 14600, - .opts = NULL - } + data_fw_pre = dpt_pdesc_v4_create( + "data_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", data_port, + "aa:bb:cc:dd:2:11", "2.2.2.11", 20, + "dp1T0", "dp2T1"); + + data_fw_pst = dpt_pdesc_v4_create( + "data_fw_pst", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "2.2.2.20", data_port, + "aa:bb:cc:dd:2:11", "2.2.2.11", 20, + "dp1T0", "dp2T1"); + + data_bk_pre = dpt_pdesc_v4_create( + "data_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 20, + "aa:bb:cc:dd:1:11", "2.2.2.20", data_port, + "dp2T1", "dp1T0"); + + data_bk_pst = dpt_pdesc_v4_create( + "data_bk_pst", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 20, + "aa:bb:cc:dd:1:11", "1.1.1.11", data_port, + "dp2T1", "dp1T0"); + + /* + * Packet descriptors for ftp ctrl flow + */ + struct dpt_tcp_flow ftp_ctrl_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = ctrl_fw_pre, + .pst = ctrl_fw_pst, }, - .rx_intf = "dp1T0", /* East */ - .tx_intf = "dp1T1" /* West */ - }; - - struct dp_test_pkt_desc_t rev_in = { - .text = "TCP Reverse In", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "134.158.69.171", - .l2_src = "aa:bb:cc:dd:2:11", - .l3_dst = "159.8.106.21", - .l2_dst = "00:00:a4:00:00:64", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 21, - .dport = 58047, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 14480, - .opts = NULL - } + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = ctrl_bk_pre, + .pst = ctrl_bk_pst, }, - .rx_intf = "dp1T1", - .tx_intf = "dp1T0" + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ }; + snprintf(ftp_ctrl_call.text, sizeof(ftp_ctrl_call), "Ctrl"); - struct dp_test_pkt_desc_t fwd_out, rev_out; + /* + * Per-packet flags and data for ftp ctrl flow + */ + struct dpt_tcp_flow_pkt ftp_ctrl_pkts[] = { + { DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, - /* East to west l2 and l3 changes at output */ - fwd_out = fwd_in; - fwd_out.l3_src = "159.8.106.21"; - fwd_out.l2_src = "00:00:a4:00:00:64"; - fwd_out.l2_dst = "aa:bb:cc:dd:2:11"; + /* session established */ - /* West to east l2 and l3 changes at output */ - rev_out = rev_in; - rev_out.l3_dst = "10.25.1.20"; - rev_out.l2_src = "00:00:a4:00:00:64"; - rev_out.l2_dst = "00:50:56:ac:ab:30"; + /* + * PORT command. Client specifies which client-side port that + * the server should use for data flow. Port is + * 9819. (38 == 0x26, 91 == 0x5B, 0x265B == 9819) + */ + { DPT_FORW, TH_ACK, 0, + (char *)"PORT 1,1,1,11,38,91\x0d\x0a", + 0, + (char *)"PORT 2,2,2,20,38,91\x0d\x0a" }, - uint8_t tcp_opts[] = { - 2, 4, 5, 180, /* MSS 1460 bytes */ - 4, 2, /* SACK permitted */ - 1, /* NOOP */ - 1, /* NOOP */ - 3, 3, 4, /* Window scale 4 (x16) */ - 1, /* NOOP */ - 0 /* marks end of opts */ - }; + { DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, - /* - * TCP control structure - */ - struct test_tcp ttcp = { - .e2w_pre = &fwd_in, - .e2w_post = &fwd_out, - .w2e_pre = &rev_in, - .w2e_post = &rev_out, - .east_seq = 0, - .east_ack = 0, - .west_seq = 0, - .west_ack = 0, - .tcp_opts = tcp_opts, - .e2w_diff = 0, - .w2e_diff = 0, - }; - uint i; - -#define FWD true -#define REV false - - /* Packet flow data structure */ - struct ftp_call { - bool fwd; - const char *desc; - uint16_t flags; - const char *prepl; - const char *pstpl; + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, }; - /* - * ftp call. - */ - struct ftp_call ftp_call_start[] = { - {FWD, NULL, TH_SYN, NULL, NULL}, - {REV, NULL, TH_SYN|TH_ACK, NULL, NULL}, - {FWD, NULL, TH_ACK, NULL, NULL} + /* + * Packet descriptors for ftp data flow + */ + struct dpt_tcp_flow ftp_data_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = data_fw_pre, + .pst = data_fw_pst, + }, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = data_bk_pre, + .pst = data_bk_pst, + }, + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ }; + snprintf(ftp_data_call.text, sizeof(ftp_data_call), "Data"); - struct ftp_call ftp_call_end[] = { - {FWD, NULL, TH_FIN|TH_ACK, NULL, NULL}, - {REV, NULL, TH_FIN|TH_ACK, NULL, NULL}, - {FWD, NULL, TH_ACK, NULL, NULL}, - }; + struct dpt_tcp_flow_pkt ftp_data_pkts[] = { + { DPT_BACK, TH_SYN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, + + { DPT_BACK, TH_ACK, 100, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, - struct ftp_call ftp_call[] = { - {FWD, "1", TH_ACK, "PORT"}, - {REV, "2", TH_ACK|TH_PUSH, - "200 POST command successful. Consider using PASV.\x0d\x0a", - NULL}, - {FWD, "3", TH_ACK|TH_PUSH, "LIST\x0d\x0a", NULL}, - {REV, "4", TH_ACK|TH_PUSH, - "150 Here comes the directory listing.\x0d\x0a", NULL}, - {REV, "5", TH_ACK|TH_PUSH, - "226 Directory send OK.\x0d\x0a", NULL}, - {FWD, "6", TH_ACK, NULL, NULL}, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, }; - /* - * TCP Call setup - */ - for (i = 0; i < ARRAY_SIZE(ftp_call_start); i++) { - struct ftp_call *call = ftp_call_start; - int pre_plen = 0, pst_plen = 0; - - if (call[i].prepl) - pre_plen = strlen(call[i].prepl); - if (call[i].pstpl) - pst_plen = strlen(call[i].pstpl); - - if (call[i].fwd) { - tcp_pak_rx_e2w(call[i].desc, &ttcp, call[i].flags, - call[i].prepl, pre_plen, - call[i].pstpl, pst_plen); - } else { - tcp_pak_rx_w2e(call[i].desc, &ttcp, call[i].flags, - call[i].prepl, pre_plen, - call[i].pstpl, pst_plen); - } - } + /* Start of ftp ctrl flow (pkts 0 - 4) */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, ARRAY_SIZE(ftp_ctrl_pkts), + 0, 4, NULL, 0); - /* - * FTP exchange: PORT, LIST, 150, 226 - */ - uint16_t port = 51712; /* "202,0" */ - uint rpt, repeats = NPF_ALG_FTP5_REPEATS; - struct test_tcp ttcp_copy = ttcp; + /* ftp data flow */ + dpt_tcp_call(&ftp_data_call, ftp_data_pkts, ARRAY_SIZE(ftp_data_pkts), + 0, 0, NULL, 0); - for (rpt = 0; rpt < repeats; rpt++) { - /* - * This simulates a retransmission of the previous set of - * packets. - */ - if (rpt == 6) - ttcp = ttcp_copy; - - ttcp_copy = ttcp; - - for (i = 0; i < ARRAY_SIZE(ftp_call); i++) { - struct ftp_call *call = ftp_call; - int pre_plen = 0, pst_plen = 0; - const char *prepl = call[i].prepl; - const char *pstpl = - call[i].pstpl ? call[i].pstpl : prepl; - char pre_pload[100], pst_pload[100]; - - if (prepl && !strcmp(prepl, "PORT")) { - uint16_t p_msb = ((port+rpt) >> 8) & 0xFF; - uint16_t p_lsb = (port+rpt) & 0xFF; - - snprintf(pre_pload, sizeof(pre_pload), - "PORT 10,25,1,20,%u,%u\x0d\x0a", - p_msb, p_lsb); - snprintf(pst_pload, sizeof(pst_pload), - "PORT 159,8,106,21,%u,%u\x0d\x0a", - p_msb, p_lsb); - - prepl = pre_pload; - pstpl = pst_pload; - } - - if (prepl) - pre_plen = strlen(prepl); - if (pstpl) - pst_plen = strlen(pstpl); - - if (call[i].fwd) { - tcp_pak_rx_e2w(call[i].desc, &ttcp, - call[i].flags, prepl, pre_plen, - pstpl, pst_plen); - } else { - tcp_pak_rx_w2e(call[i].desc, &ttcp, - call[i].flags, prepl, pre_plen, - pstpl, pst_plen); - } - } - } + /* End of ftp ctrl flow (pkts 5 - end) */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, ARRAY_SIZE(ftp_ctrl_pkts), + 5, 0, NULL, 0); - /* - * TCP Call finish - */ - for (i = 0; i < ARRAY_SIZE(ftp_call_end); i++) { - struct ftp_call *call = ftp_call_end; - int pre_plen = 0, pst_plen = 0; - - if (call[i].prepl) - pre_plen = strlen(call[i].prepl); - if (call[i].pstpl) - pst_plen = strlen(call[i].pstpl); - - if (call[i].fwd) { - tcp_pak_rx_e2w(call[i].desc, &ttcp, call[i].flags, - call[i].prepl, pre_plen, - call[i].pstpl, pst_plen); - } else { - tcp_pak_rx_w2e(call[i].desc, &ttcp, call[i].flags, - call[i].prepl, pre_plen, - call[i].pstpl, pst_plen); - } - } - - /* Cleanup */ - - dp_test_netlink_del_route("134.158.69.0/24 nh 159.8.106.17 int:dp1T1"); dp_test_npf_snat_del(snat.ifname, snat.rule, true); - dp_test_npf_cleanup(); - - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T0", "10.25.1.1/24", - vrfid); - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T1", "159.8.106.21/28", - vrfid); + free(ctrl_fw_pre); + free(ctrl_fw_pst); + free(ctrl_bk_pre); + free(ctrl_bk_pst); - dp_test_netlink_del_neigh("dp1T0", "10.25.1.20", - "0:50:56:ac:ab:30"); - dp_test_netlink_del_neigh("dp1T1", "159.8.106.17", - "aa:bb:cc:dd:2:11"); + free(data_fw_pre); + free(data_fw_pst); + free(data_bk_pre); + free(data_bk_pst); - if (vrfid != VRF_DEFAULT_ID) - dp_test_netlink_del_vrf(vrfid, 0); + dp_test_npf_cleanup(); } DP_END_TEST; + /* - * NATing from larger prefix 10.250.100.0/24 to smaller address 15.8.6.1 + * ftp9 - SNAT, Active ftp. Translation address is larger. */ -DP_DECL_TEST_CASE(npf_alg_ftp, alg_ftp5, NULL, NULL); -DP_START_TEST(alg_ftp5, test) +DP_DECL_TEST_CASE(npf_alg_ftp, ftp8, dpt_alg_ftp_setup, dpt_alg_ftp_teardown); +DP_START_TEST(ftp8, test) { - uint vrfid = VRF_DEFAULT_ID; - - if (vrfid != VRF_DEFAULT_ID) - dp_test_netlink_add_vrf(vrfid, 1); - - /* Setup interfaces and neighbours */ - dp_test_nl_add_ip_addr_and_connected_vrf("dp1T0", "10.250.100.1/24", - vrfid); - - /* prefix 15.8.6.16/28 */ - dp_test_nl_add_ip_addr_and_connected_vrf("dp1T1", "15.8.6.1/28", - vrfid); - - dp_test_netlink_add_neigh("dp1T0", "10.250.100.20", "0:50:56:ac:ab:30"); - - dp_test_netlink_add_neigh("dp1T1", "15.8.6.2", - "aa:bb:cc:dd:2:11"); + struct dp_test_pkt_desc_t *ctrl_fw_pre, *ctrl_fw_pst; + struct dp_test_pkt_desc_t *ctrl_bk_pre, *ctrl_bk_pst; + struct dp_test_pkt_desc_t *data_fw_pre, *data_fw_pst; + struct dp_test_pkt_desc_t *data_bk_pre, *data_bk_pst; - dp_test_netlink_add_route("134.158.69.0/24 nh 15.8.6.2 int:dp1T1"); + /* ftp port appears as 2 numbers in the string */ + uint8_t data_port_upr = 38; + uint8_t data_port_lwr = 91; + uint16_t data_port = (data_port_upr * 256) + data_port_lwr; /* * Add SNAT rule. @@ -1393,739 +1593,592 @@ DP_START_TEST(alg_ftp5, test) struct dp_test_npf_nat_rule_t snat = { .desc = "snat rule", .rule = "10", - .ifname = "dp1T1", + .ifname = "dp2T1", .proto = IPPROTO_TCP, .map = "dynamic", - .from_addr = "10.250.100.0/24", + .port_alloc = NULL, + .from_addr = "1.1.1.11", .from_port = NULL, .to_addr = NULL, .to_port = NULL, - .trans_addr = "masquerade", + .trans_addr = "2.2.2.200", .trans_port = NULL }; dp_test_npf_snat_add(&snat, true); /* - * TCP packet + * ftp control flow packets */ - struct dp_test_pkt_desc_t fwd_in = { - .text = "TCP Forwards In", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "10.250.100.20", - .l2_src = "00:50:56:ac:ab:30", - .l3_dst = "134.158.69.171", - .l2_dst = "00:00:a4:00:00:64", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 58047, - .dport = 21, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 14600, - .opts = NULL - } + ctrl_fw_pre = dpt_pdesc_v4_create( + "ctrl_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "dp1T0", "dp2T1"); + + ctrl_fw_pst = dpt_pdesc_v4_create( + "ctrl_fw_pst", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "2.2.2.200", 46682, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "dp1T0", "dp2T1"); + + ctrl_bk_pre = dpt_pdesc_v4_create( + "ctrl_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "aa:bb:cc:dd:1:11", "2.2.2.200", 46682, + "dp2T1", "dp1T0"); + + ctrl_bk_pst = dpt_pdesc_v4_create( + "ctrl_bk_pst", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "dp2T1", "dp1T0"); + + /* + * ftp data flow packets + */ + data_fw_pre = dpt_pdesc_v4_create( + "data_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", data_port, + "aa:bb:cc:dd:2:11", "2.2.2.11", 20, + "dp1T0", "dp2T1"); + + data_fw_pst = dpt_pdesc_v4_create( + "data_fw_pst", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "2.2.2.200", data_port, + "aa:bb:cc:dd:2:11", "2.2.2.11", 20, + "dp1T0", "dp2T1"); + + data_bk_pre = dpt_pdesc_v4_create( + "data_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 20, + "aa:bb:cc:dd:1:11", "2.2.2.200", data_port, + "dp2T1", "dp1T0"); + + data_bk_pst = dpt_pdesc_v4_create( + "data_bk_pst", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 20, + "aa:bb:cc:dd:1:11", "1.1.1.11", data_port, + "dp2T1", "dp1T0"); + + /* + * Packet descriptors for ftp ctrl flow + */ + struct dpt_tcp_flow ftp_ctrl_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = ctrl_fw_pre, + .pst = ctrl_fw_pst, }, - .rx_intf = "dp1T0", /* East */ - .tx_intf = "dp1T1" /* West */ - }; - - struct dp_test_pkt_desc_t rev_in = { - .text = "TCP Reverse In", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "134.158.69.171", - .l2_src = "aa:bb:cc:dd:2:11", - .l3_dst = "15.8.6.1", - .l2_dst = "00:00:a4:00:00:64", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 21, - .dport = 58047, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 14480, - .opts = NULL - } + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = ctrl_bk_pre, + .pst = ctrl_bk_pst, }, - .rx_intf = "dp1T1", - .tx_intf = "dp1T0" + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ }; + snprintf(ftp_ctrl_call.text, sizeof(ftp_ctrl_call), "Ctrl"); - struct dp_test_pkt_desc_t fwd_out, rev_out; + /* + * Per-packet flags and data for ftp ctrl flow + */ + struct dpt_tcp_flow_pkt ftp_ctrl_pkts[] = { + { DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, - /* East to west l2 and l3 changes at output */ - fwd_out = fwd_in; - fwd_out.l3_src = "15.8.6.1"; - fwd_out.l2_src = "00:00:a4:00:00:64"; - fwd_out.l2_dst = "aa:bb:cc:dd:2:11"; + /* session established */ - /* West to east l2 and l3 changes at output */ - rev_out = rev_in; - rev_out.l3_dst = "10.250.100.20"; - rev_out.l2_src = "00:00:a4:00:00:64"; - rev_out.l2_dst = "00:50:56:ac:ab:30"; + /* + * PORT command. Client specifies which client-side port that + * the server should use for data flow. Port is + * 9819. (38 == 0x26, 91 == 0x5B, 0x265B == 9819) + */ + { DPT_FORW, TH_ACK, 0, + (char *)"PORT 1,1,1,11,38,91\x0d\x0a", + 0, + (char *)"PORT 2,2,2,200,38,91\x0d\x0a" }, - uint8_t tcp_opts[] = { - 2, 4, 5, 180, /* MSS 1460 bytes */ - 4, 2, /* SACK permitted */ - 1, /* NOOP */ - 1, /* NOOP */ - 3, 3, 4, /* Window scale 4 (x16) */ - 1, /* NOOP */ - 0 /* marks end of opts */ - }; + { DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, - /* - * TCP control structure - */ - struct test_tcp ttcp = { - .e2w_pre = &fwd_in, - .e2w_post = &fwd_out, - .w2e_pre = &rev_in, - .w2e_post = &rev_out, - .east_seq = 0, - .east_ack = 0, - .west_seq = 0, - .west_ack = 0, - .tcp_opts = tcp_opts, - .e2w_diff = 0, - .w2e_diff = 0, - }; - uint i; - -#define FWD true -#define REV false - - /* Packet flow data structure */ - struct ftp_call { - bool fwd; - const char *desc; - uint16_t flags; - const char *prepl; - const char *pstpl; + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, }; - /* - * ftp call. - */ - struct ftp_call ftp_call_start[] = { - {FWD, NULL, TH_SYN, NULL, NULL}, - {REV, NULL, TH_SYN|TH_ACK, NULL, NULL}, - {FWD, NULL, TH_ACK, NULL, NULL} + /* + * Packet descriptors for ftp data flow + */ + struct dpt_tcp_flow ftp_data_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = data_fw_pre, + .pst = data_fw_pst, + }, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = data_bk_pre, + .pst = data_bk_pst, + }, + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ }; + snprintf(ftp_data_call.text, sizeof(ftp_data_call), "Data"); - struct ftp_call ftp_call_end[] = { - {FWD, NULL, TH_FIN|TH_ACK, NULL, NULL}, - {REV, NULL, TH_FIN|TH_ACK, NULL, NULL}, - {FWD, NULL, TH_ACK, NULL, NULL}, - }; + struct dpt_tcp_flow_pkt ftp_data_pkts[] = { + { DPT_BACK, TH_SYN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, - struct ftp_call ftp_call[] = { - {FWD, "1", TH_ACK, "PORT"}, - {REV, "2", TH_ACK|TH_PUSH, - "200 POST command successful. Consider using PASV.\x0d\x0a", - NULL}, + { DPT_BACK, TH_ACK, 100, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, - {FWD, "3", TH_ACK|TH_PUSH, - "LIST\x0d\x0a", NULL}, - {REV, "4", TH_ACK|TH_PUSH, - "150 Here comes the directory listing.\x0d\x0a", NULL}, - {REV, "5", TH_ACK|TH_PUSH, - "226 Directory send OK.\x0d\x0a", NULL}, - {FWD, "6", TH_ACK, NULL, NULL}, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, }; - /* - * TCP Call setup - */ - for (i = 0; i < ARRAY_SIZE(ftp_call_start); i++) { - struct ftp_call *call = ftp_call_start; - int pre_plen = 0, pst_plen = 0; - - if (call[i].prepl) - pre_plen = strlen(call[i].prepl); - if (call[i].pstpl) - pst_plen = strlen(call[i].pstpl); - - if (call[i].fwd) { - tcp_pak_rx_e2w(call[i].desc, &ttcp, call[i].flags, - call[i].prepl, pre_plen, - call[i].pstpl, pst_plen); - } else { - tcp_pak_rx_w2e(call[i].desc, &ttcp, call[i].flags, - call[i].prepl, pre_plen, - call[i].pstpl, pst_plen); - } - } + /* Start of ftp ctrl flow (pkts 0 - 4) */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, ARRAY_SIZE(ftp_ctrl_pkts), + 0, 4, NULL, 0); - /* - * FTP exchange: PORT, LIST, 150, 226 - */ - uint16_t port = 51712; /* "202,0" */ - uint rpt, repeats = NPF_ALG_FTP6_REPEATS; - struct test_tcp ttcp_copy = ttcp; + /* ftp data flow */ + dpt_tcp_call(&ftp_data_call, ftp_data_pkts, ARRAY_SIZE(ftp_data_pkts), + 0, 0, NULL, 0); - for (rpt = 0; rpt < repeats; rpt++) { - /* - * This simulates a retransmission of the previous set of - * packets. - */ - if (rpt == 6) - ttcp = ttcp_copy; - - ttcp_copy = ttcp; - - for (i = 0; i < ARRAY_SIZE(ftp_call); i++) { - struct ftp_call *call = ftp_call; - int pre_plen = 0, pst_plen = 0; - const char *prepl = call[i].prepl; - const char *pstpl = - call[i].pstpl ? call[i].pstpl : prepl; - char pre_pload[100], pst_pload[100]; - - if (prepl && !strcmp(prepl, "PORT")) { - uint16_t p_msb = ((port+rpt) >> 8) & 0xFF; - uint16_t p_lsb = (port+rpt) & 0xFF; - - snprintf(pre_pload, sizeof(pre_pload), - "PORT 10,250,100,20,%u,%u\x0d\x0a", - p_msb, p_lsb); - snprintf(pst_pload, sizeof(pst_pload), - "PORT 15,8,6,1,%u,%u\x0d\x0a", - p_msb, p_lsb); - - prepl = pre_pload; - pstpl = pst_pload; - } - - if (prepl) - pre_plen = strlen(prepl); - if (pstpl) - pst_plen = strlen(pstpl); - - if (call[i].fwd) { - tcp_pak_rx_e2w(call[i].desc, &ttcp, - call[i].flags, prepl, pre_plen, - pstpl, pst_plen); - } else { - tcp_pak_rx_w2e(call[i].desc, &ttcp, - call[i].flags, prepl, pre_plen, - pstpl, pst_plen); - } - } - } + /* End of ftp ctrl flow (pkts 5 - end) */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, ARRAY_SIZE(ftp_ctrl_pkts), + 5, 0, NULL, 0); - /* - * TCP Call finish - */ - for (i = 0; i < ARRAY_SIZE(ftp_call_end); i++) { - struct ftp_call *call = ftp_call_end; - int pre_plen = 0, pst_plen = 0; - - if (call[i].prepl) - pre_plen = strlen(call[i].prepl); - if (call[i].pstpl) - pst_plen = strlen(call[i].pstpl); - - if (call[i].fwd) { - tcp_pak_rx_e2w(call[i].desc, &ttcp, call[i].flags, - call[i].prepl, pre_plen, - call[i].pstpl, pst_plen); - } else { - tcp_pak_rx_w2e(call[i].desc, &ttcp, call[i].flags, - call[i].prepl, pre_plen, - call[i].pstpl, pst_plen); - } - } - - /* Cleanup */ - - dp_test_netlink_del_route("134.158.69.0/24 nh 15.8.6.2 int:dp1T1"); dp_test_npf_snat_del(snat.ifname, snat.rule, true); - dp_test_npf_cleanup(); + free(ctrl_fw_pre); + free(ctrl_fw_pst); + free(ctrl_bk_pre); + free(ctrl_bk_pst); - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T0", "10.250.100.1/24", - vrfid); - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T1", "15.8.6.1/28", - vrfid); + free(data_fw_pre); + free(data_fw_pst); + free(data_bk_pre); + free(data_bk_pst); - dp_test_netlink_del_neigh("dp1T0", "10.250.100.20", - "0:50:56:ac:ab:30"); - dp_test_netlink_del_neigh("dp1T1", "15.8.6.2", - "aa:bb:cc:dd:2:11"); - - if (vrfid != VRF_DEFAULT_ID) - dp_test_netlink_del_vrf(vrfid, 0); + dp_test_npf_cleanup(); } DP_END_TEST; -/********************************************************************* - * alg_ftp6 - * - * Same as alg_ftp3, except we stop all packets and delete the vrf just after - * the secondary tuple has been created. +/* + * ftp9 - SNAT, Active ftp. Translation address is smaller. */ - -DP_DECL_TEST_CASE(npf_alg_ftp, alg_ftp6, NULL, NULL); -DP_START_TEST(alg_ftp6, test) +DP_DECL_TEST_CASE(npf_alg_ftp, ftp9, dpt_alg_ftp_setup, dpt_alg_ftp_teardown); +DP_START_TEST(ftp9, test) { - uint vrfid = 69; - - if (vrfid != VRF_DEFAULT_ID) - dp_test_netlink_add_vrf(vrfid, 1); + struct dp_test_pkt_desc_t *ctrl_fw_pre, *ctrl_fw_pst; + struct dp_test_pkt_desc_t *ctrl_bk_pre, *ctrl_bk_pst; + struct dp_test_pkt_desc_t *data_fw_pre, *data_fw_pst; + struct dp_test_pkt_desc_t *data_bk_pre, *data_bk_pst; - /* Setup interfaces and neighbours */ - dp_test_nl_add_ip_addr_and_connected_vrf("dp1T0", "1.1.1.1/24", - vrfid); - dp_test_nl_add_ip_addr_and_connected_vrf("dp1T1", "2.2.2.2/24", - vrfid); - - dp_test_netlink_add_neigh("dp1T0", "1.1.1.11", - "aa:bb:cc:dd:1:11"); - dp_test_netlink_add_neigh("dp1T0", "1.1.1.12", - "aa:bb:cc:dd:1:12"); - dp_test_netlink_add_neigh("dp1T1", "2.2.2.11", - "aa:bb:cc:dd:2:11"); - dp_test_netlink_add_neigh("dp1T1", "2.2.2.12", - "aa:bb:cc:dd:2:12"); + /* ftp port appears as 2 numbers in the string */ + uint8_t data_port_upr = 38; + uint8_t data_port_lwr = 91; + uint16_t data_port = (data_port_upr * 256) + data_port_lwr; /* - * Add DNAT rule. + * Add SNAT rule. */ - struct dp_test_npf_nat_rule_t dnat = { - .desc = "dnat rule", + struct dp_test_npf_nat_rule_t snat = { + .desc = "snat rule", .rule = "10", - .ifname = "dp1T0", + .ifname = "dp2T1", .proto = IPPROTO_TCP, .map = "dynamic", - .from_addr = NULL, + .port_alloc = NULL, + .from_addr = "1.1.1.11", .from_port = NULL, - .to_addr = "2.2.2.12", + .to_addr = NULL, .to_port = NULL, - .trans_addr = "2.2.2.11", + .trans_addr = "2.2.2.5", .trans_port = NULL }; - dp_test_npf_dnat_add(&dnat, true); + dp_test_npf_snat_add(&snat, true); + + /* + * ftp control flow packets + */ + ctrl_fw_pre = dpt_pdesc_v4_create( + "ctrl_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "dp1T0", "dp2T1"); + + ctrl_fw_pst = dpt_pdesc_v4_create( + "ctrl_fw_pst", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "2.2.2.5", 46682, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "dp1T0", "dp2T1"); + + ctrl_bk_pre = dpt_pdesc_v4_create( + "ctrl_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "aa:bb:cc:dd:1:11", "2.2.2.5", 46682, + "dp2T1", "dp1T0"); + + ctrl_bk_pst = dpt_pdesc_v4_create( + "ctrl_bk_pst", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "dp2T1", "dp1T0"); - ftp1_fwd_in.l4.tcp.sport = 46682; - ftp1_fwd_in.l4.tcp.dport = 21; + /* + * ftp data flow packets + */ + data_fw_pre = dpt_pdesc_v4_create( + "data_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", data_port, + "aa:bb:cc:dd:2:11", "2.2.2.11", 20, + "dp1T0", "dp2T1"); + + data_fw_pst = dpt_pdesc_v4_create( + "data_fw_pst", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "2.2.2.5", data_port, + "aa:bb:cc:dd:2:11", "2.2.2.11", 20, + "dp1T0", "dp2T1"); + + data_bk_pre = dpt_pdesc_v4_create( + "data_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 20, + "aa:bb:cc:dd:1:11", "2.2.2.5", data_port, + "dp2T1", "dp1T0"); + + data_bk_pst = dpt_pdesc_v4_create( + "data_bk_pst", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 20, + "aa:bb:cc:dd:1:11", "1.1.1.11", data_port, + "dp2T1", "dp1T0"); + + /* + * Packet descriptors for ftp ctrl flow + */ + struct dpt_tcp_flow ftp_ctrl_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = ctrl_fw_pre, + .pst = ctrl_fw_pst, + }, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = ctrl_bk_pre, + .pst = ctrl_bk_pst, + }, + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ + }; + snprintf(ftp_ctrl_call.text, sizeof(ftp_ctrl_call), "Ctrl"); - ftp1_fwd_out.l4.tcp.sport = 46682; - ftp1_fwd_out.l4.tcp.dport = 21; + /* + * Per-packet flags and data for ftp ctrl flow + */ + struct dpt_tcp_flow_pkt ftp_ctrl_pkts[] = { + { DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, - ftp1_rev_in.l4.tcp.sport = 21; - ftp1_rev_in.l4.tcp.dport = 46682; + /* session established */ - ftp1_rev_out.l4.tcp.sport = 21; - ftp1_rev_out.l4.tcp.dport = 46682; + /* + * PORT command. Client specifies which client-side port that + * the server should use for data flow. Port is + * 9819. (38 == 0x26, 91 == 0x5B, 0x265B == 9819) + */ + { DPT_FORW, TH_ACK, 0, + (char *)"PORT 1,1,1,11,38,91\x0d\x0a", + 0, + (char *)"PORT 2,2,2,5,38,91\x0d\x0a" }, - ftp1_fwd_in.l3_dst = "2.2.2.12"; - ftp1_fwd_out.l3_dst = "2.2.2.11"; + { DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, - ftp1_rev_in.l3_src = "2.2.2.11"; - ftp1_rev_out.l3_src = "2.2.2.12"; + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + }; - struct dp_test_tcp_call tcp_call = { - .str[0] = '\0', - .isn = {0, 0}, - .desc[DP_DIR_FORW] = { - .pre = &ftp1_fwd_in, - .post = &ftp1_fwd_out, + /* + * Packet descriptors for ftp data flow + */ + struct dpt_tcp_flow ftp_data_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = data_fw_pre, + .pst = data_fw_pst, }, - .desc[DP_DIR_BACK] = { - .pre = &ftp1_rev_in, - .post = &ftp1_rev_out, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = data_bk_pre, + .pst = data_bk_pst, }, - .test_cb = tcp_ftp_control_cb2, - .post_cb = NULL, + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ }; + snprintf(ftp_data_call.text, sizeof(ftp_data_call), "Data"); - const char *ftp[] = { - [0] = NULL, - [1] = NULL, - [2] = NULL, + struct dpt_tcp_flow_pkt ftp_data_pkts[] = { + { DPT_BACK, TH_SYN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, - [3] = "SYST\x0d\x0a", - [4] = "215 UNIX Type: L8\x0d\x0a", + { DPT_BACK, TH_ACK, 100, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, - [5] = "TYPE I\x0d\x0a", - [6] = "200 Switching to Binary mode.\x0d\x0a", + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, + }; - [7] = "PASV\x0d\x0a", + /* Start of ftp ctrl flow (pkts 0 - 4) */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, ARRAY_SIZE(ftp_ctrl_pkts), + 0, 4, NULL, 0); - /* - * Response: 227. Server telling client which address and - * port to use for data channel. Address is 2.2.2.11, port is - * 9819. (38 == 0x26, 91 == 0x5B, 0x265B == 9819) - */ - [8] = "227 Entering Passive Mode (2,2,2,11,38,91).\x0d\x0a", + /* ftp data flow */ + dpt_tcp_call(&ftp_data_call, ftp_data_pkts, ARRAY_SIZE(ftp_data_pkts), + 0, 0, NULL, 0); - /* - * Here we delete the vrf - */ - }; + /* End of ftp ctrl flow (pkts 5 - end) */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, ARRAY_SIZE(ftp_ctrl_pkts), + 5, 0, NULL, 0); + dp_test_npf_snat_del(snat.ifname, snat.rule, true); - struct dp_test_tcp_flow_pkt tcp_pkt1[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, + free(ctrl_fw_pre); + free(ctrl_fw_pst); + free(ctrl_bk_pre); + free(ctrl_bk_pst); - /* session established */ - {DP_DIR_FORW, TH_ACK, strlen(ftp[3]), NULL}, - {DP_DIR_BACK, TH_ACK, strlen(ftp[4]), NULL}, + free(data_fw_pre); + free(data_fw_pst); + free(data_bk_pre); + free(data_bk_pst); - {DP_DIR_FORW, TH_ACK, strlen(ftp[5]), NULL}, - {DP_DIR_BACK, TH_ACK, strlen(ftp[6]), NULL}, + dp_test_npf_cleanup(); - {DP_DIR_FORW, TH_ACK, strlen(ftp[7]), NULL}, - {DP_DIR_BACK, TH_ACK, strlen(ftp[8]), NULL}, +} DP_END_TEST; - /* call not completed */ - }; - assert(ARRAY_SIZE(ftp) == ARRAY_SIZE(tcp_pkt1)); - struct ftp_ctx ftp_ctx = { - .payload = ftp, - .payload_len = ARRAY_SIZE(ftp), - .do_data_call = false, - }; +/* + * ftp10 - DNAT, Active ftp. + */ +DP_DECL_TEST_CASE(npf_alg_ftp, ftp10, dpt_alg_ftp_setup, dpt_alg_ftp_teardown); +DP_START_TEST(ftp10, test) +{ + struct dp_test_pkt_desc_t *ctrl_fw_pre, *ctrl_fw_pst; + struct dp_test_pkt_desc_t *ctrl_bk_pre, *ctrl_bk_pst; + struct dp_test_pkt_desc_t *data_fw_pre, *data_fw_pst; + struct dp_test_pkt_desc_t *data_bk_pre, *data_bk_pst; - /* Simulate the partial ftp flow */ - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), - &ftp_ctx, 0); + /* ftp port appears as 2 numbers in the string */ + uint8_t data_port_upr = 38; + uint8_t data_port_lwr = 91; + uint16_t data_port = (data_port_upr * 256) + data_port_lwr; /* - * ftp control and data channels established + * Add SNAT rule. */ + struct dp_test_npf_nat_rule_t dnat = { + .desc = "dnat rule", + .rule = "10", + .ifname = "dp1T0", + .proto = IPPROTO_TCP, + .map = "dynamic", + .port_alloc = NULL, + .from_addr = NULL, + .from_port = NULL, + .to_addr = "2.2.2.20", + .to_port = NULL, + .trans_addr = "2.2.2.11", + .trans_port = NULL + }; + + dp_test_npf_dnat_add(&dnat, true); /* - * Set true to delete vrf before sessions are expired. + * ftp control flow packets */ - bool delete_vrf = true; - - if (delete_vrf && vrfid != VRF_DEFAULT_ID) { - /* - * Delete vrf while there are ALG sessions - */ - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T0", "1.1.1.1/24", - vrfid); - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T1", "2.2.2.2/24", - vrfid); - dp_test_netlink_set_interface_vrf("dp1T0", VRF_DEFAULT_ID); - dp_test_netlink_set_interface_vrf("dp1T1", VRF_DEFAULT_ID); - dp_test_netlink_del_vrf(vrfid, 0); - dp_test_npf_clear_sessions(); - } - - /* Cleanup */ + ctrl_fw_pre = dpt_pdesc_v4_create( + "ctrl_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "aa:bb:cc:dd:2:11", "2.2.2.20", 21, + "dp1T0", "dp2T1"); + + ctrl_fw_pst = dpt_pdesc_v4_create( + "ctrl_fw_pst", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "dp1T0", "dp2T1"); + + ctrl_bk_pre = dpt_pdesc_v4_create( + "ctrl_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "dp2T1", "dp1T0"); + + ctrl_bk_pst = dpt_pdesc_v4_create( + "ctrl_bk_pst", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.20", 21, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "dp2T1", "dp1T0"); - dp_test_npf_dnat_del(dnat.ifname, dnat.rule, true); - - dp_test_netlink_del_neigh("dp1T0", "1.1.1.11", - "aa:bb:cc:dd:1:11"); - dp_test_netlink_del_neigh("dp1T0", "1.1.1.12", - "aa:bb:cc:dd:1:12"); - dp_test_netlink_del_neigh("dp1T1", "2.2.2.11", - "aa:bb:cc:dd:2:11"); - dp_test_netlink_del_neigh("dp1T1", "2.2.2.12", - "aa:bb:cc:dd:2:12"); + /* + * ftp data flow packets + */ + data_fw_pre = dpt_pdesc_v4_create( + "data_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", data_port, + "aa:bb:cc:dd:2:11", "2.2.2.20", 20, + "dp1T0", "dp2T1"); + + data_fw_pst = dpt_pdesc_v4_create( + "data_fw_pst", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", data_port, + "aa:bb:cc:dd:2:11", "2.2.2.11", 20, + "dp1T0", "dp2T1"); + + data_bk_pre = dpt_pdesc_v4_create( + "data_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 20, + "aa:bb:cc:dd:1:11", "1.1.1.11", data_port, + "dp2T1", "dp1T0"); + + data_bk_pst = dpt_pdesc_v4_create( + "data_bk_pst", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.20", 20, + "aa:bb:cc:dd:1:11", "1.1.1.11", data_port, + "dp2T1", "dp1T0"); + + /* + * Packet descriptors for ftp ctrl flow + */ + struct dpt_tcp_flow ftp_ctrl_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = ctrl_fw_pre, + .pst = ctrl_fw_pst, + }, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = ctrl_bk_pre, + .pst = ctrl_bk_pst, + }, + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ + }; + snprintf(ftp_ctrl_call.text, sizeof(ftp_ctrl_call), "Ctrl"); - dp_test_npf_cleanup(); + /* + * Per-packet flags and data for ftp ctrl flow + */ + struct dpt_tcp_flow_pkt ftp_ctrl_pkts[] = { + { DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, - if (!delete_vrf || vrfid == VRF_DEFAULT_ID) { + /* session established */ /* - * Normal test cleanup. + * PORT command. Client specifies which client-side port that + * the server should use for data flow. Port is + * 9819. (38 == 0x26, 91 == 0x5B, 0x265B == 9819) */ - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T0", "1.1.1.1/24", - vrfid); - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T1", "2.2.2.2/24", - vrfid); - - if (vrfid != VRF_DEFAULT_ID) - dp_test_netlink_del_vrf(vrfid, 0); - } + { DPT_FORW, TH_ACK, 0, + (char *)"PORT 1,1,1,11,38,91\x0d\x0a", + 0, NULL }, -} DP_END_TEST; + { DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, + /* Data call is done at this point */ -/*************************************************************************** - * alg_ftp7 -- SNAT from client to server, Active ftp - * - * dp1T0 dp1T1 - * 1.1.1.11 2.2.2.11 (src, pre) - * 2.2.2.12 (src, post - * Server Client - * 1. 21 <----- 1026 "PORT 1027" - * 2. 21 -----> 1026 ack - * 3. 20 -----> 1027 - * 4. 20 <----- 1027 ack - * - * Simulates an ftp call via two TCP calls - one for control channel and one - * for data channel. - * - * snat is configured. Source address 2.2.2.11 is translated to 2.2.2.12 for - * client-to-server traffic. - * - * Input and output interface are in default VRF. - * - ***************************************************************************/ - -/* - * TCP packet - */ -static struct dp_test_pkt_desc_t ftp7_fwd_in = { - .text = "TCP Forwards In", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "2.2.2.11", - .l2_src = "aa:bb:cc:dd:2:11", - .l3_dst = "1.1.1.11", - .l2_dst = "00:00:a4:00:00:64", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 1026, - .dport = 21, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T1", - .tx_intf = "dp1T0" -}; - -static struct dp_test_pkt_desc_t ftp7_fwd_out = { - .text = "TCP Forwards Out", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "2.2.2.12", - .l2_src = "00:00:a4:00:00:64", - .l3_dst = "1.1.1.11", - .l2_dst = "aa:bb:cc:dd:1:11", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 1026, - .dport = 21, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T1", - .tx_intf = "dp1T0" -}; - -static struct dp_test_pkt_desc_t ftp7_rev_in = { - .text = "TCP Reverse In", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "1.1.1.11", - .l2_src = "aa:bb:cc:dd:1:11", - .l3_dst = "2.2.2.12", - .l2_dst = "00:00:a4:00:00:64", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 21, - .dport = 1026, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T1" -}; - -static struct dp_test_pkt_desc_t ftp7_rev_out = { - .text = "TCP Reverse Out", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "1.1.1.11", - .l2_src = "00:00:a4:00:00:64", - .l3_dst = "2.2.2.11", - .l2_dst = "aa:bb:cc:dd:2:11", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 21, - .dport = 1026, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T1" -}; + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + }; -/* - * Active ftp data flow starts in the reverse direction - */ -static void ftp_data_call7(void) -{ - uint16_t fwd_in_sport = ftp7_fwd_in.l4.tcp.sport; - uint16_t fwd_in_dport = ftp7_fwd_in.l4.tcp.dport; - uint16_t fwd_out_sport = ftp7_fwd_out.l4.tcp.sport; - uint16_t fwd_out_dport = ftp7_fwd_out.l4.tcp.dport; - - uint16_t rev_in_sport = ftp7_rev_in.l4.tcp.sport; - uint16_t rev_in_dport = ftp7_rev_in.l4.tcp.dport; - uint16_t rev_out_sport = ftp7_rev_out.l4.tcp.sport; - uint16_t rev_out_dport = ftp7_rev_out.l4.tcp.dport; - - ftp7_rev_in.l4.tcp.sport = 20; - ftp7_rev_in.l4.tcp.dport = 1027; - - ftp7_rev_out.l4.tcp.sport = 20; - ftp7_rev_out.l4.tcp.dport = 1027; - - ftp7_fwd_in.l4.tcp.sport = 1027; - ftp7_fwd_in.l4.tcp.dport = 20; - - ftp7_fwd_out.l4.tcp.sport = 1027; - ftp7_fwd_out.l4.tcp.dport = 20; - - struct dp_test_tcp_call tcp_call = { - .str[0] = '\0', - .isn = {0, 0}, - .desc[DP_DIR_FORW] = { - .pre = &ftp7_rev_in, - .post = &ftp7_rev_out, + /* + * Packet descriptors for ftp data flow + */ + struct dpt_tcp_flow ftp_data_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = data_fw_pre, + .pst = data_fw_pst, }, - .desc[DP_DIR_BACK] = { - .pre = &ftp7_fwd_in, - .post = &ftp7_fwd_out, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = data_bk_pre, + .pst = data_bk_pst, }, - .test_cb = NULL, - .post_cb = NULL, + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ }; + snprintf(ftp_data_call.text, sizeof(ftp_data_call), "Data"); - struct dp_test_tcp_flow_pkt ftp_data_pkt1[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, + struct dpt_tcp_flow_pkt ftp_data_pkts[] = { + { DPT_BACK, TH_SYN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, - /* call not completed */ + { DPT_BACK, TH_ACK, 100, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, }; - /* Simulate the TCP call */ - dp_test_tcp_call(&tcp_call, ftp_data_pkt1, - ARRAY_SIZE(ftp_data_pkt1), - NULL, 0); + /* Start of ftp ctrl flow (pkts 0 - 4) */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, ARRAY_SIZE(ftp_ctrl_pkts), + 0, 4, NULL, 0); - ftp7_fwd_in.l4.tcp.sport = fwd_in_sport; - ftp7_fwd_in.l4.tcp.dport = fwd_in_dport; - ftp7_fwd_out.l4.tcp.sport = fwd_out_sport; - ftp7_fwd_out.l4.tcp.dport = fwd_out_dport; + /* ftp data flow */ + dpt_tcp_call(&ftp_data_call, ftp_data_pkts, ARRAY_SIZE(ftp_data_pkts), + 0, 0, NULL, 0); - ftp7_rev_in.l4.tcp.sport = rev_in_sport; - ftp7_rev_in.l4.tcp.dport = rev_in_dport; - ftp7_rev_out.l4.tcp.sport = rev_out_sport; - ftp7_rev_out.l4.tcp.dport = rev_out_dport; -} + /* End of ftp ctrl flow (pkts 5 - end) */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, ARRAY_SIZE(ftp_ctrl_pkts), + 5, 0, NULL, 0); -/* - * Callback function for TCP call simulator. ftp control channel. - */ -static void tcp_ftp_control_cb7(const char *str, - uint pktno, enum dp_test_tcp_dir dir, - uint8_t flags, - struct dp_test_pkt_desc_t *pre, - struct dp_test_pkt_desc_t *post, - void *data, uint index) -{ - struct rte_mbuf *pre_pak, *post_pak; - struct dp_test_expected *test_exp; - struct ftp_ctx *ctx = data; - const char **ftp = ctx->payload; - - pre_pak = dp_test_v4_pkt_from_desc(pre); - post_pak = dp_test_v4_pkt_from_desc(post); - - /* - * Add ftp payload - */ - if (ftp[pktno]) { - const char *pre_ftp = ftp[pktno]; - const char *post_ftp = ftp[pktno]; - char rnatd[50]; - - /* Reverse SNAT the ftp payload for 'PORT' pkt */ - if (!strncmp("PORT ", post_ftp, 4)) { - snprintf(rnatd, sizeof(rnatd), - "PORT 2,2,2,12,4,3\x0d\x0a"); - post_ftp = rnatd; - } + dp_test_npf_dnat_del(dnat.ifname, dnat.rule, true); - dp_test_tcp_write_payload(pre_pak, strlen(pre_ftp), pre_ftp); - dp_test_tcp_write_payload(post_pak, strlen(post_ftp), post_ftp); - } + free(ctrl_fw_pre); + free(ctrl_fw_pst); + free(ctrl_bk_pre); + free(ctrl_bk_pst); - test_exp = dp_test_exp_from_desc(post_pak, post); - rte_pktmbuf_free(post_pak); - dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_FORWARDED); + free(data_fw_pre); + free(data_fw_pst); + free(data_bk_pre); + free(data_bk_pst); - spush(test_exp->description, sizeof(test_exp->description), - "%s", str); + dp_test_npf_cleanup(); - /* Send the packet */ - dp_test_pak_receive(pre_pak, pre->rx_intf, test_exp); +} DP_END_TEST; - /* - * Detect the ACK just after the "PORT" command. We can startup the - * data channel tcp session here. - */ - if (ctx->do_data_call && pktno > 1 && ftp[pktno - 1] && - !strncmp(ftp[pktno - 1], "PORT", 4)) - ftp_data_call7(); -} /* - * alg_ftp7 + * ftp11 - SNAT, Extended Passive ftp. Source port is outside of trans port + * range. + * + * Earlier SNAT tests relied on 'port preservation', in that the inside port + * was within the translation port range and hence the same same port nu,ber + * could be used (provided it was available). + * + * This test forces the SNAT to change the port number during the translation. */ -DP_DECL_TEST_CASE(npf_alg_ftp, alg_ftp7, NULL, NULL); -DP_START_TEST(alg_ftp7, test) +DP_DECL_TEST_CASE(npf_alg_ftp, ftp11, dpt_alg_ftp_setup, dpt_alg_ftp_teardown); +DP_START_TEST(ftp11, test) { - uint vrfid = VRF_DEFAULT_ID; - - if (vrfid != VRF_DEFAULT_ID) - dp_test_netlink_add_vrf(vrfid, 1); - - /* Setup interfaces and neighbours */ - dp_test_nl_add_ip_addr_and_connected_vrf("dp1T0", "1.1.1.1/24", - vrfid); - dp_test_nl_add_ip_addr_and_connected_vrf("dp1T1", "2.2.2.2/24", - vrfid); - - dp_test_netlink_add_neigh("dp1T0", "1.1.1.11", - "aa:bb:cc:dd:1:11"); - dp_test_netlink_add_neigh("dp1T0", "1.1.1.12", - "aa:bb:cc:dd:1:12"); - dp_test_netlink_add_neigh("dp1T1", "2.2.2.11", - "aa:bb:cc:dd:2:11"); - dp_test_netlink_add_neigh("dp1T1", "2.2.2.12", - "aa:bb:cc:dd:2:12"); + struct dp_test_pkt_desc_t *ctrl_fw_pre, *ctrl_fw_pst; + struct dp_test_pkt_desc_t *ctrl_bk_pre, *ctrl_bk_pst; + struct dp_test_pkt_desc_t *data_fw_pre, *data_fw_pst; + struct dp_test_pkt_desc_t *data_bk_pre, *data_bk_pst; /* * Add SNAT rule. @@ -2133,174 +2186,218 @@ DP_START_TEST(alg_ftp7, test) struct dp_test_npf_nat_rule_t snat = { .desc = "snat rule", .rule = "10", - .ifname = "dp1T0", + .ifname = "dp2T1", .proto = IPPROTO_TCP, .map = "dynamic", - .from_addr = "2.2.2.11", + .port_alloc = "sequential", + .from_addr = "1.1.1.11", .from_port = NULL, .to_addr = NULL, .to_port = NULL, - .trans_addr = "2.2.2.12", - .trans_port = NULL + .trans_addr = "2.2.2.20", + .trans_port = "10000-20000" }; dp_test_npf_snat_add(&snat, true); - struct dp_test_tcp_call tcp_call = { - .str[0] = '\0', - .isn = {0, 0}, - .desc[DP_DIR_FORW] = { - .pre = &ftp7_fwd_in, - .post = &ftp7_fwd_out, + /* Data port is 2559 */ + uint16_t data_port = 2559; + + /* + * ftp control flow packets + */ + ctrl_fw_pre = dpt_pdesc_v4_create( + "ctrl_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "dp1T0", "dp2T1"); + + ctrl_fw_pst = dpt_pdesc_v4_create( + "ctrl_fw_pst", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "2.2.2.20", 10000, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "dp1T0", "dp2T1"); + + ctrl_bk_pre = dpt_pdesc_v4_create( + "ctrl_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "aa:bb:cc:dd:1:11", "2.2.2.20", 10000, + "dp2T1", "dp1T0"); + + ctrl_bk_pst = dpt_pdesc_v4_create( + "ctrl_bk_pst", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "dp2T1", "dp1T0"); + + /* + * ftp data flow packets + */ + data_fw_pre = dpt_pdesc_v4_create( + "data_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 49888, + "aa:bb:cc:dd:2:11", "2.2.2.11", data_port, + "dp1T0", "dp2T1"); + + data_fw_pst = dpt_pdesc_v4_create( + "data_fw_pst", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "2.2.2.20", 10001, + "aa:bb:cc:dd:2:11", "2.2.2.11", data_port, + "dp1T0", "dp2T1"); + + data_bk_pre = dpt_pdesc_v4_create( + "data_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", data_port, + "aa:bb:cc:dd:1:11", "2.2.2.20", 10001, + "dp2T1", "dp1T0"); + + data_bk_pst = dpt_pdesc_v4_create( + "data_bk_pst", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", data_port, + "aa:bb:cc:dd:1:11", "1.1.1.11", 49888, + "dp2T1", "dp1T0"); + + /* + * Packet descriptors for ftp ctrl flow + */ + struct dpt_tcp_flow ftp_ctrl_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = ctrl_fw_pre, + .pst = ctrl_fw_pst, }, - .desc[DP_DIR_BACK] = { - .pre = &ftp7_rev_in, - .post = &ftp7_rev_out, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = ctrl_bk_pre, + .pst = ctrl_bk_pst, }, - .test_cb = tcp_ftp_control_cb7, - .post_cb = NULL, + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ }; + snprintf(ftp_ctrl_call.text, sizeof(ftp_ctrl_call), + "Ctrl"); + + /* + * Per-packet flags and data for ftp ctrl flow + */ + struct dpt_tcp_flow_pkt ftp_ctrl_pkts[] = { + { DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + + /* session established */ + { DPT_FORW, TH_ACK, + 0, (char *)"SYST\x0d\x0a", 0, NULL }, + + { DPT_BACK, TH_ACK, + 0, (char *)"215 UNIX Type: L8\x0d\x0a", 0, NULL }, - const char *ftp[] = { - [0] = NULL, - [1] = NULL, - [2] = NULL, + { DPT_FORW, TH_ACK, + 0, (char *)"TYPE I\x0d\x0a", 0, NULL }, - /* Port: 1027 = 4*256 + 3 */ - [3] = "PORT 2,2,2,11,4,3\x0d\x0a", - [4] = NULL, + { DPT_BACK, TH_ACK, + 0, + (char *)"200 Switching to Binary mode.\x0d\x0a", + 0, NULL }, + + { DPT_FORW, TH_ACK, + 0, (char *)"EPSV\x0d\x0a", 0, NULL }, /* - * Here we get a new TCP call opened for the data channel, - * from 1.1.1.11:46682 to 2.2.2.11:9819 + * #8. Response: 229. Server telling client which + * address and port to use for data channel. port is 2559. */ + { DPT_BACK, TH_ACK, 0, + (char *)"229 Entering Extended Passive Mode (|||2559|)\r\n", + 0, + (char *)"229 Entering Extended Passive Mode (|||2559|)\r\n" }, + + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, }; + /* + * Packet descriptors for ftp data flow + */ + struct dpt_tcp_flow ftp_data_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = data_fw_pre, + .pst = data_fw_pst, + }, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = data_bk_pre, + .pst = data_bk_pst, + }, + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ + }; - struct dp_test_tcp_flow_pkt tcp_pkt1[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, + snprintf(ftp_data_call.text, sizeof(ftp_data_call), + "Data, port %u", data_port); - /* session established */ - {DP_DIR_FORW, TH_ACK, strlen(ftp[3]), NULL}, - {DP_DIR_BACK, TH_ACK, 0, NULL}, + struct dpt_tcp_flow_pkt ftp_data_pkts[] = { + { DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, - /* call not completed */ - }; - assert(ARRAY_SIZE(ftp) == ARRAY_SIZE(tcp_pkt1)); + { DPT_BACK, TH_ACK, 100, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, - struct ftp_ctx ftp_ctx = { - .payload = ftp, - .payload_len = ARRAY_SIZE(ftp), - .do_data_call = true, /* Add data call */ + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, }; - /* Simulate the TCP call */ - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), - &ftp_ctx, 0); + /* Start of ftp ctrl flow (pkts 0 - 8) */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, + ARRAY_SIZE(ftp_ctrl_pkts), + 0, 8, NULL, 0); - /* - * ftp control and data channels established - */ -#if 0 - dp_test_npf_print_session_table(false); -#endif + /* ftp data flow */ + dpt_tcp_call(&ftp_data_call, ftp_data_pkts, + ARRAY_SIZE(ftp_data_pkts), + 0, 0, NULL, 0); - /* - * Set true to delete vrf before sessions are expired. - */ - bool delete_vrf = false; + /* End of ftp ctrl flow (pkts 9 - end) */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, + ARRAY_SIZE(ftp_ctrl_pkts), + 9, 0, NULL, 0); - if (delete_vrf && vrfid != VRF_DEFAULT_ID) { - /* - * Delete vrf while there are ALG sessions - */ - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T0", "1.1.1.1/24", - vrfid); - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T1", "2.2.2.2/24", - vrfid); - dp_test_netlink_set_interface_vrf("dp1T0", VRF_DEFAULT_ID); - dp_test_netlink_set_interface_vrf("dp1T1", VRF_DEFAULT_ID); - dp_test_netlink_del_vrf(vrfid, 0); - dp_test_npf_clear_sessions(); - } - - /* Cleanup */ + free(ctrl_fw_pre); + free(ctrl_fw_pst); + free(ctrl_bk_pre); + free(ctrl_bk_pst); - dp_test_npf_snat_del(snat.ifname, snat.rule, true); + free(data_fw_pre); + free(data_fw_pst); + free(data_bk_pre); + free(data_bk_pst); - dp_test_netlink_del_neigh("dp1T0", "1.1.1.11", - "aa:bb:cc:dd:1:11"); - dp_test_netlink_del_neigh("dp1T0", "1.1.1.12", - "aa:bb:cc:dd:1:12"); - dp_test_netlink_del_neigh("dp1T1", "2.2.2.11", - "aa:bb:cc:dd:2:11"); - dp_test_netlink_del_neigh("dp1T1", "2.2.2.12", - "aa:bb:cc:dd:2:12"); + dp_test_npf_snat_del(snat.ifname, snat.rule, true); dp_test_npf_cleanup(); - if (!delete_vrf || vrfid == VRF_DEFAULT_ID) { - /* - * Normal test cleanup. - */ - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T0", "1.1.1.1/24", - vrfid); - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T1", "2.2.2.2/24", - vrfid); - - if (vrfid != VRF_DEFAULT_ID) - dp_test_netlink_del_vrf(vrfid, 0); - } - } DP_END_TEST; -/*************************************************************************** - * alg_ftp8 -- SNAT from client to server, Active ftp. Deleting vrf. - * - * dp1T0 dp1T1 - * 1.1.1.11 2.2.2.11 (src, pre) - * 2.2.2.12 (src, post - * Server Client - * 1. 21 <----- 1026 "PORT 1027" - * 2. 21 -----> 1026 ack - * 3. 20 -----> 1027 - * 4. 20 <----- 1027 ack - * - * Simulates an ftp call via two TCP calls - one for control channel and one - * for data channel. - * - * snat is configured. Source address 2.2.2.11 is translated to 2.2.2.12 for - * client-to-server traffic. - * - * Input and output interface are in a non-default VRF. - * - ***************************************************************************/ - -DP_DECL_TEST_CASE(npf_alg_ftp, alg_ftp8, NULL, NULL); -DP_START_TEST(alg_ftp8, test) +/* + * ftp12 - SNAT, Active ftp. Source port is outside of trans port range. + */ +DP_DECL_TEST_CASE(npf_alg_ftp, ftp12, dpt_alg_ftp_setup, dpt_alg_ftp_teardown); +DP_START_TEST(ftp12, test) { - uint vrfid = 69; + struct dp_test_pkt_desc_t *ctrl_fw_pre, *ctrl_fw_pst; + struct dp_test_pkt_desc_t *ctrl_bk_pre, *ctrl_bk_pst; + struct dp_test_pkt_desc_t *data_fw_pre, *data_fw_pst; + struct dp_test_pkt_desc_t *data_bk_pre, *data_bk_pst; - if (vrfid != VRF_DEFAULT_ID) - dp_test_netlink_add_vrf(vrfid, 1); - - /* Setup interfaces and neighbours */ - dp_test_nl_add_ip_addr_and_connected_vrf("dp1T0", "1.1.1.1/24", - vrfid); - dp_test_nl_add_ip_addr_and_connected_vrf("dp1T1", "2.2.2.2/24", - vrfid); - - dp_test_netlink_add_neigh("dp1T0", "1.1.1.11", - "aa:bb:cc:dd:1:11"); - dp_test_netlink_add_neigh("dp1T0", "1.1.1.12", - "aa:bb:cc:dd:1:12"); - dp_test_netlink_add_neigh("dp1T1", "2.2.2.11", - "aa:bb:cc:dd:2:11"); - dp_test_netlink_add_neigh("dp1T1", "2.2.2.12", - "aa:bb:cc:dd:2:12"); + /* ftp port appears as 2 numbers in the string */ + uint8_t data_port_upr = 38; + uint8_t data_port_lwr = 91; + uint16_t data_port = (data_port_upr * 256) + data_port_lwr; /* * Add SNAT rule. @@ -2308,351 +2405,333 @@ DP_START_TEST(alg_ftp8, test) struct dp_test_npf_nat_rule_t snat = { .desc = "snat rule", .rule = "10", - .ifname = "dp1T0", + .ifname = "dp2T1", .proto = IPPROTO_TCP, .map = "dynamic", - .from_addr = "2.2.2.11", + .port_alloc = "sequential", + .from_addr = "1.1.1.11", .from_port = NULL, .to_addr = NULL, .to_port = NULL, - .trans_addr = "2.2.2.12", - .trans_port = NULL + .trans_addr = "2.2.2.20", + .trans_port = "1024-1030" }; dp_test_npf_snat_add(&snat, true); - struct dp_test_tcp_call tcp_call = { - .str[0] = '\0', - .isn = {0, 0}, - .desc[DP_DIR_FORW] = { - .pre = &ftp7_fwd_in, - .post = &ftp7_fwd_out, + /* + * ftp control flow packets + */ + ctrl_fw_pre = dpt_pdesc_v4_create( + "ctrl_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "dp1T0", "dp2T1"); + + ctrl_fw_pst = dpt_pdesc_v4_create( + "ctrl_fw_pst", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "2.2.2.20", 1024, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "dp1T0", "dp2T1"); + + ctrl_bk_pre = dpt_pdesc_v4_create( + "ctrl_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "aa:bb:cc:dd:1:11", "2.2.2.20", 1024, + "dp2T1", "dp1T0"); + + ctrl_bk_pst = dpt_pdesc_v4_create( + "ctrl_bk_pst", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "dp2T1", "dp1T0"); + + /* + * ftp data flow packets + */ + data_fw_pre = dpt_pdesc_v4_create( + "data_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", data_port, + "aa:bb:cc:dd:2:11", "2.2.2.11", 20, + "dp1T0", "dp2T1"); + + data_fw_pst = dpt_pdesc_v4_create( + "data_fw_pst", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "2.2.2.20", 1025, + "aa:bb:cc:dd:2:11", "2.2.2.11", 20, + "dp1T0", "dp2T1"); + + data_bk_pre = dpt_pdesc_v4_create( + "data_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 20, + "aa:bb:cc:dd:1:11", "2.2.2.20", 1025, + "dp2T1", "dp1T0"); + + data_bk_pst = dpt_pdesc_v4_create( + "data_bk_pst", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 20, + "aa:bb:cc:dd:1:11", "1.1.1.11", data_port, + "dp2T1", "dp1T0"); + + /* + * Packet descriptors for ftp ctrl flow + */ + struct dpt_tcp_flow ftp_ctrl_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = ctrl_fw_pre, + .pst = ctrl_fw_pst, }, - .desc[DP_DIR_BACK] = { - .pre = &ftp7_rev_in, - .post = &ftp7_rev_out, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = ctrl_bk_pre, + .pst = ctrl_bk_pst, }, - .test_cb = tcp_ftp_control_cb7, - .post_cb = NULL, - }; - - const char *ftp[] = { - [0] = NULL, - [1] = NULL, - [2] = NULL, - - /* Port: 1027 = 4*256 + 3 */ - [3] = "PORT 2,2,2,11,4,3\x0d\x0a", - [4] = NULL, - }; - - - struct dp_test_tcp_flow_pkt tcp_pkt1[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - - /* session established */ - {DP_DIR_FORW, TH_ACK, strlen(ftp[3]), NULL}, - {DP_DIR_BACK, TH_ACK, 0, NULL}, - - /* call not completed */ - }; - assert(ARRAY_SIZE(ftp) == ARRAY_SIZE(tcp_pkt1)); - - struct ftp_ctx ftp_ctx = { - .payload = ftp, - .payload_len = ARRAY_SIZE(ftp), - .do_data_call = false, + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ }; - - /* Simulate the TCP call */ - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), - &ftp_ctx, 0); + snprintf(ftp_ctrl_call.text, sizeof(ftp_ctrl_call), "Ctrl"); /* - * ftp control and data channels established + * Per-packet flags and data for ftp ctrl flow */ -#if 0 - dp_test_npf_print_session_table(false); -#endif + struct dpt_tcp_flow_pkt ftp_ctrl_pkts[] = { + { DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, - /* - * Set true to delete vrf before sessions are expired. - */ - bool delete_vrf = true; + /* session established */ - if (delete_vrf && vrfid != VRF_DEFAULT_ID) { /* - * Delete vrf while there are ALG sessions and tuples + * PORT command. Client specifies which client-side port that + * the server should use for data flow. Port is + * 9819. (38 == 0x26, 91 == 0x5B, 0x265B == 9819). + * Trans port is 1024 */ - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T0", "1.1.1.1/24", - vrfid); - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T1", "2.2.2.2/24", - vrfid); - dp_test_netlink_set_interface_vrf("dp1T0", VRF_DEFAULT_ID); - dp_test_netlink_set_interface_vrf("dp1T1", VRF_DEFAULT_ID); - dp_test_netlink_del_vrf(vrfid, 0); - dp_test_npf_clear_sessions(); - } - - /* Cleanup */ + { DPT_FORW, TH_ACK, 0, + (char *)"PORT 1,1,1,11,38,91\x0d\x0a", + 0, + (char *)"PORT 2,2,2,20,4,1\x0d\x0a" }, - dp_test_npf_snat_del(snat.ifname, snat.rule, true); - - dp_test_netlink_del_neigh("dp1T0", "1.1.1.11", - "aa:bb:cc:dd:1:11"); - dp_test_netlink_del_neigh("dp1T0", "1.1.1.12", - "aa:bb:cc:dd:1:12"); - dp_test_netlink_del_neigh("dp1T1", "2.2.2.11", - "aa:bb:cc:dd:2:11"); - dp_test_netlink_del_neigh("dp1T1", "2.2.2.12", - "aa:bb:cc:dd:2:12"); - - dp_test_npf_cleanup(); + { DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, - if (!delete_vrf || vrfid == VRF_DEFAULT_ID) { - /* - * Normal test cleanup. - */ - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T0", "1.1.1.1/24", - vrfid); - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T1", "2.2.2.2/24", - vrfid); + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + }; - if (vrfid != VRF_DEFAULT_ID) - dp_test_netlink_del_vrf(vrfid, 0); - } + /* + * Packet descriptors for ftp data flow + */ + struct dpt_tcp_flow ftp_data_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = data_fw_pre, + .pst = data_fw_pst, + }, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = data_bk_pre, + .pst = data_bk_pst, + }, + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ + }; + snprintf(ftp_data_call.text, sizeof(ftp_data_call), "Data"); -} DP_END_TEST; + struct dpt_tcp_flow_pkt ftp_data_pkts[] = { + { DPT_BACK, TH_SYN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK, 100, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, -/*************************************************************************** - * alg_ftp9 - * - * Same as alg_ftp2, except there are no parenthesis around the address and - * port in the 227 message. - * - ***************************************************************************/ + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, + }; -/* - * Callback function for TCP call simulator. ftp control channel. - */ -static void tcp_ftp_control_cb9(const char *str, - uint pktno, enum dp_test_tcp_dir dir, - uint8_t flags, - struct dp_test_pkt_desc_t *pre, - struct dp_test_pkt_desc_t *post, - void *data, uint index) -{ - struct rte_mbuf *pre_pak, *post_pak; - struct dp_test_expected *test_exp; - struct ftp_ctx *ctx = data; - const char **ftp = ctx->payload; + /* Start of ftp ctrl flow (pkts 0 - 4) */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, ARRAY_SIZE(ftp_ctrl_pkts), + 0, 4, NULL, 0); - pre_pak = dp_test_v4_pkt_from_desc(pre); - post_pak = dp_test_v4_pkt_from_desc(post); + /* ftp data flow */ + dpt_tcp_call(&ftp_data_call, ftp_data_pkts, ARRAY_SIZE(ftp_data_pkts), + 0, 0, NULL, 0); - /* - * Add ftp payload - */ - if (ftp[pktno]) { - const char *pre_ftp = ftp[pktno]; - const char *post_ftp = ftp[pktno]; - char rnatd[50]; + /* End of ftp ctrl flow (pkts 5 - end) */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, ARRAY_SIZE(ftp_ctrl_pkts), + 5, 0, NULL, 0); - /* Reverse dNAT the ftp payload for 227 Response pkt */ - if (!strncmp("227 ", post_ftp, 4)) { - snprintf(rnatd, sizeof(rnatd), - "227 Entering Passive Mode " - "2,2,2,12,38,91\r\n"); - post_ftp = rnatd; - } + dp_test_npf_snat_del(snat.ifname, snat.rule, true); - dp_test_tcp_write_payload(pre_pak, strlen(pre_ftp), pre_ftp); - dp_test_tcp_write_payload(post_pak, strlen(post_ftp), post_ftp); - } + free(ctrl_fw_pre); + free(ctrl_fw_pst); + free(ctrl_bk_pre); + free(ctrl_bk_pst); - test_exp = dp_test_exp_from_desc(post_pak, post); - rte_pktmbuf_free(post_pak); - dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_FORWARDED); + free(data_fw_pre); + free(data_fw_pst); + free(data_bk_pre); + free(data_bk_pst); - spush(test_exp->description, sizeof(test_exp->description), - "%s", str); + dp_test_npf_cleanup(); - /* Send the packet */ - dp_test_pak_receive(pre_pak, pre->rx_intf, test_exp); +} DP_END_TEST; - /* - * 227 is a Response from the server that contains the data channel - * address and port. So we can startup the data channel tcp session - * here. - */ - if (ctx->do_data_call && ftp[pktno] && !strncmp(ftp[pktno], "227", 3)) - ftp_data_call1(); -} /* - * alg_ftp9 + * ftp13 - SNAT, Active ftp. Tests when a session is destroyed before the + * tuple that is pointing to the session. + * + * 1. create SNAT ALG session + * 2. create ALG ftp tuple/pinhole + * 3. destroy sessions + * 4. destroy tuples */ -DP_DECL_TEST_CASE(npf_alg_ftp, alg_ftp9, NULL, NULL); -DP_START_TEST(alg_ftp9, test) +DP_DECL_TEST_CASE(npf_alg_ftp, ftp13, dpt_alg_ftp_setup, dpt_alg_ftp_teardown); +DP_START_TEST(ftp13, test) { - uint vrfid = VRF_DEFAULT_ID; - - if (vrfid != VRF_DEFAULT_ID) - dp_test_netlink_add_vrf(vrfid, 1); - - /* Setup interfaces and neighbours */ - dp_test_nl_add_ip_addr_and_connected_vrf("dp1T0", "1.1.1.1/24", - vrfid); - dp_test_nl_add_ip_addr_and_connected_vrf("dp1T1", "2.2.2.2/24", - vrfid); - - dp_test_netlink_add_neigh("dp1T0", "1.1.1.11", - "aa:bb:cc:dd:1:11"); - dp_test_netlink_add_neigh("dp1T0", "1.1.1.12", - "aa:bb:cc:dd:1:12"); - dp_test_netlink_add_neigh("dp1T1", "2.2.2.11", - "aa:bb:cc:dd:2:11"); - dp_test_netlink_add_neigh("dp1T1", "2.2.2.12", - "aa:bb:cc:dd:2:12"); + struct dp_test_pkt_desc_t *ctrl_fw_pre, *ctrl_fw_pst; + struct dp_test_pkt_desc_t *ctrl_bk_pre, *ctrl_bk_pst; /* - * Add DNAT rule. + * Add SNAT rule. */ - struct dp_test_npf_nat_rule_t dnat = { - .desc = "dnat rule", + struct dp_test_npf_nat_rule_t snat = { + .desc = "snat rule", .rule = "10", - .ifname = "dp1T0", + .ifname = "dp2T1", .proto = IPPROTO_TCP, .map = "dynamic", - .from_addr = NULL, + .port_alloc = NULL, + .from_addr = "1.1.1.11", .from_port = NULL, - .to_addr = "2.2.2.12", + .to_addr = NULL, .to_port = NULL, - .trans_addr = "2.2.2.11", + .trans_addr = "2.2.2.20", .trans_port = NULL }; - dp_test_npf_dnat_add(&dnat, true); - - ftp1_fwd_in.l4.tcp.sport = 46682; - ftp1_fwd_in.l4.tcp.dport = 21; - - ftp1_fwd_out.l4.tcp.sport = 46682; - ftp1_fwd_out.l4.tcp.dport = 21; - - ftp1_rev_in.l4.tcp.sport = 21; - ftp1_rev_in.l4.tcp.dport = 46682; - - ftp1_rev_out.l4.tcp.sport = 21; - ftp1_rev_out.l4.tcp.dport = 46682; - - ftp1_fwd_in.l3_dst = "2.2.2.12"; - ftp1_fwd_out.l3_dst = "2.2.2.11"; - - ftp1_rev_in.l3_src = "2.2.2.11"; - ftp1_rev_out.l3_src = "2.2.2.12"; + dp_test_npf_snat_add(&snat, true); - struct dp_test_tcp_call tcp_call = { - .str[0] = '\0', - .isn = {0, 0}, - .desc[DP_DIR_FORW] = { - .pre = &ftp1_fwd_in, - .post = &ftp1_fwd_out, + /* + * ftp control flow packets + */ + ctrl_fw_pre = dpt_pdesc_v4_create( + "ctrl_fw_pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "dp1T0", "dp2T1"); + + ctrl_fw_pst = dpt_pdesc_v4_create( + "ctrl_fw_pst", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "2.2.2.20", 46682, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "dp1T0", "dp2T1"); + + ctrl_bk_pre = dpt_pdesc_v4_create( + "ctrl_bk_pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "aa:bb:cc:dd:1:11", "2.2.2.20", 46682, + "dp2T1", "dp1T0"); + + ctrl_bk_pst = dpt_pdesc_v4_create( + "ctrl_bk_pst", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 21, + "aa:bb:cc:dd:1:11", "1.1.1.11", 46682, + "dp2T1", "dp1T0"); + + /* + * Packet descriptors for ftp ctrl flow + */ + struct dpt_tcp_flow ftp_ctrl_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = ctrl_fw_pre, + .pst = ctrl_fw_pst, }, - .desc[DP_DIR_BACK] = { - .pre = &ftp1_rev_in, - .post = &ftp1_rev_out, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = ctrl_bk_pre, + .pst = ctrl_bk_pst, }, - .test_cb = tcp_ftp_control_cb9, - .post_cb = NULL, + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ }; + snprintf(ftp_ctrl_call.text, sizeof(ftp_ctrl_call), "Ctrl"); - const char *ftp[] = { - [0] = NULL, - [1] = NULL, - [2] = NULL, - - [3] = "SYST\x0d\x0a", - [4] = "215 UNIX Type: L8\x0d\x0a", - - [5] = "TYPE I\x0d\x0a", - [6] = "200 Switching to Binary mode.\x0d\x0a", + /* + * Per-packet flags and data for ftp ctrl flow + */ + struct dpt_tcp_flow_pkt ftp_ctrl_pkts[] = { + { DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, - [7] = "PASV\x0d\x0a", + /* session established */ /* - * Response: 227. Server telling client which address and - * port to use for data channel. Address is 2.2.2.11, port is + * PORT command. Client specifies which client-side port that + * the server should use for data flow. Port is * 9819. (38 == 0x26, 91 == 0x5B, 0x265B == 9819) */ - [8] = "227 Entering Passive Mode 2,2,2,11,38,91\x0d\x0a", + { DPT_FORW, TH_ACK, 0, + (char *)"PORT 1,1,1,11,38,91\x0d\x0a", + 0, + (char *)"PORT 2,2,2,20,38,91\x0d\x0a" }, - /* - * Here we get a new TCP call opened for the data channel, - * from 1.1.1.11:46682 to 2.2.2.11:9819 - */ + { DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, - [9] = NULL, - [10] = NULL, - [11] = NULL, + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, }; + /* + * Start of ftp ctrl flow (pkts 0 - 4). Just enough to create a tuple. + */ + dpt_tcp_call(&ftp_ctrl_call, ftp_ctrl_pkts, ARRAY_SIZE(ftp_ctrl_pkts), + 0, 4, NULL, 0); - struct dp_test_tcp_flow_pkt tcp_pkt1[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - - /* session established */ - {DP_DIR_FORW, TH_ACK, strlen(ftp[3]), NULL}, - {DP_DIR_BACK, TH_ACK, strlen(ftp[4]), NULL}, - - {DP_DIR_FORW, TH_ACK, strlen(ftp[5]), NULL}, - {DP_DIR_BACK, TH_ACK, strlen(ftp[6]), NULL}, - - {DP_DIR_FORW, TH_ACK, strlen(ftp[7]), NULL}, - {DP_DIR_BACK, TH_ACK, strlen(ftp[8]), NULL}, - - {DP_DIR_FORW, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_BACK, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - }; - assert(ARRAY_SIZE(ftp) == ARRAY_SIZE(tcp_pkt1)); + /* + * Clear session before tuples + */ + dp_test_npf_clear_sessions(); + npf_alg_flush_all(); - struct ftp_ctx ftp_ctx = { - .payload = ftp, - .payload_len = ARRAY_SIZE(ftp), - .do_data_call = true, /* Add data call */ - }; + dp_test_npf_snat_del(snat.ifname, snat.rule, true); - /* Simulate the TCP call */ - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), - &ftp_ctx, 0); + free(ctrl_fw_pre); + free(ctrl_fw_pst); + free(ctrl_bk_pre); + free(ctrl_bk_pst); - /* Cleanup */ + dp_test_npf_cleanup(); - dp_test_npf_dnat_del(dnat.ifname, dnat.rule, true); +} DP_END_TEST; - dp_test_npf_cleanup(); - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T0", "1.1.1.1/24", - vrfid); - dp_test_nl_del_ip_addr_and_connected_vrf("dp1T1", "2.2.2.2/24", - vrfid); +static void dpt_alg_ftp_setup(void) +{ + /* Setup interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); - dp_test_netlink_del_neigh("dp1T0", "1.1.1.11", - "aa:bb:cc:dd:1:11"); - dp_test_netlink_del_neigh("dp1T0", "1.1.1.12", - "aa:bb:cc:dd:1:12"); - dp_test_netlink_del_neigh("dp1T1", "2.2.2.11", - "aa:bb:cc:dd:2:11"); - dp_test_netlink_del_neigh("dp1T1", "2.2.2.12", - "aa:bb:cc:dd:2:12"); + dp_test_netlink_add_neigh("dp1T0", "1.1.1.11", "aa:bb:cc:dd:1:11"); + dp_test_netlink_add_neigh("dp1T0", "1.1.1.12", "aa:bb:cc:dd:1:12"); + dp_test_netlink_add_neigh("dp2T1", "2.2.2.11", "aa:bb:cc:dd:2:11"); + dp_test_netlink_add_neigh("dp2T1", "2.2.2.12", "aa:bb:cc:dd:2:12"); +} - if (vrfid != VRF_DEFAULT_ID) - dp_test_netlink_del_vrf(vrfid, 0); +static void dpt_alg_ftp_teardown(void) +{ + dp_test_nl_del_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); -} DP_END_TEST; + dp_test_netlink_del_neigh("dp1T0", "1.1.1.11", "aa:bb:cc:dd:1:11"); + dp_test_netlink_del_neigh("dp1T0", "1.1.1.12", "aa:bb:cc:dd:1:12"); + dp_test_netlink_del_neigh("dp2T1", "2.2.2.11", "aa:bb:cc:dd:2:11"); + dp_test_netlink_del_neigh("dp2T1", "2.2.2.12", "aa:bb:cc:dd:2:12"); +} diff --git a/tests/whole_dp/src/dp_test_npf_alg_lib.c b/tests/whole_dp/src/dp_test_npf_alg_lib.c index bb38ca9d..288fed0f 100644 --- a/tests/whole_dp/src/dp_test_npf_alg_lib.c +++ b/tests/whole_dp/src/dp_test_npf_alg_lib.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -16,47 +16,16 @@ #include "main.h" #include "dp_test.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_sess_lib.h" #include "dp_test_npf_lib.h" #include "dp_test_npf_alg_lib.h" -/* - * dp_test_npf_set_alg_port(1, "sip", 5090) - */ -void -_dp_test_npf_set_alg_port(uint iid, const char *name, uint16_t port, - const char *file, int line) -{ - char cmd[80]; - - spush(cmd, sizeof(cmd), "npf-ut fw alg %u set %s port %u", - iid, name, port); - _dp_test_npf_cmd(cmd, false, file, line); -} - -/* - * dp_test_npf_delete_alg_port(1, "sip", 5090) - * - * Deleting a non-default port will cause the default port to be added back - * for that ALG. - */ -void -_dp_test_npf_delete_alg_port(uint iid, const char *name, uint16_t port, - const char *file, int line) -{ - char cmd[80]; - - spush(cmd, sizeof(cmd), "npf-ut fw alg %u delete %s port %u", - iid, name, port); - _dp_test_npf_cmd(cmd, false, file, line); -} - /* * *The output of "npf-op fw dump-alg" looks like: @@ -335,6 +304,7 @@ _dp_test_npf_alg_tuple_verify(uint npf_id, const char *alg, uint8_t proto, l += spush(str + l, sizeof(str) - l, ", srcip: %s", srcip); if (dstip) l += spush(str + l, sizeof(str) - l, ", dstip: %s", dstip); + (void) l; jobj = dp_test_npf_json_get_alg_tuple(npf_id, alg, proto, dport, sport, dstip, srcip); diff --git a/tests/whole_dp/src/dp_test_npf_alg_lib.h b/tests/whole_dp/src/dp_test_npf_alg_lib.h index 3423d218..46f8dff0 100644 --- a/tests/whole_dp/src/dp_test_npf_alg_lib.h +++ b/tests/whole_dp/src/dp_test_npf_alg_lib.h @@ -1,4 +1,5 @@ /* + * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -15,22 +16,6 @@ #include "dp_test_json_utils.h" -/* - * Add or delete an ALG port - */ -void -_dp_test_npf_set_alg_port(uint iid, const char *name, uint16_t port, - const char *file, int line); - -#define dp_test_npf_set_alg_port(iid, name, port) \ - _dp_test_npf_set_alg_port(iid, name, port, __FILE__, __LINE__) - -void -_dp_test_npf_delete_alg_port(uint iid, const char *name, uint16_t port, - const char *file, int line); - -#define dp_test_npf_delete_alg_port(iid, name, port) \ - _dp_test_npf_delete_alg_port(iid, name, port, __FILE__, __LINE__) /* * Verify that an ALF tuple exists diff --git a/tests/whole_dp/src/dp_test_npf_alg_rpc.c b/tests/whole_dp/src/dp_test_npf_alg_rpc.c index f7010e2c..f7d3b77d 100644 --- a/tests/whole_dp/src/dp_test_npf_alg_rpc.c +++ b/tests/whole_dp/src/dp_test_npf_alg_rpc.c @@ -1,9 +1,9 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * - * Whole dataplane npf alg rpc tests. + * Sun rpc alg tests. * */ @@ -18,14 +18,14 @@ #include "dp_test.h" #include "dp_test_controller.h" #include "dp_test_cmd_state.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" #include "dp_test_str.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_pkt.h" #include "dp_test_lib_tcp.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_sess_lib.h" @@ -33,6 +33,13 @@ #include "dp_test_npf_nat_lib.h" #include "dp_test_npf_alg_lib.h" +/* + * alg_rpc1b -- Stateful firewall and RPC Portmapper + * alg_rpc2 -- SNAT and RPC Portmapper + * alg_rpc3 -- SNAT and RPC Portmapper on non-default vrf. + * alg_rpc4 -- DNAT and RPC Portmapper + * alg_rpc5 -- SNAT and RPC Portmapper, with CGNAT also cfgd + */ struct nat_ctx { bool do_check; @@ -53,12 +60,19 @@ _pak_rcv_nat_udp(const char *rx_intf, const char *pre_smac, int pre_vlan, const char *post_daddr, uint16_t post_dport, const char *post_dmac, int post_vlan, const char *tx_intf, int status, char *payload, uint payload_len, + uint16_t eth_type, const char *file, const char *func, int line); #define pak_rcv_nat_udp(_a, _b, _c, _d, _e, _f, _g, _h, \ _i, _j, _k, _l, _m, _n, _o, _p, _q) \ _pak_rcv_nat_udp(_a, _b, _c, _d, _e, _f, _g, _h, \ _i, _j, _k, _l, _m, _n, _o, _p, _q, \ - __FILE__, __func__, __LINE__) + RTE_ETHER_TYPE_IPV4, __FILE__, __func__, __LINE__) + +#define pak_rcv_nat_udp_v6(_a, _b, _c, _d, _e, _f, _g, _h, \ + _i, _j, _k, _l, _m, _n, _o, _p, _q) \ + _pak_rcv_nat_udp(_a, _b, _c, _d, _e, _f, _g, _h, \ + _i, _j, _k, _l, _m, _n, _o, _p, _q, \ + RTE_ETHER_TYPE_IPV6, __FILE__, __func__, __LINE__) /* * The rpc tuple is setup in the same direction as the SNAT. Therefore the @@ -79,7 +93,7 @@ npf_rpc_out_fw(bool enable) .rule = "10", .pass = PASS, .stateful = false, - .npf = "proto=17 dst-port=111" + .npf = "proto-final=17 dst-port=111" }, RULE_DEF_BLOCK, NULL_RULE @@ -101,35 +115,260 @@ npf_rpc_out_fw(bool enable) dp_test_npf_fw_del(&fw, false); } +static void dpt_alg_rpc_setup(void); +static void dpt_alg_rpc_teardown(void); + DP_DECL_TEST_SUITE(npf_alg_rpc); /* - * alg_rpc1 -- Tests RPC Portmapper + * alg_rpc1b -- Stateful firewall and RPC Portmapper */ -DP_DECL_TEST_CASE(npf_alg_rpc, alg_rpc1, NULL, NULL); -DP_START_TEST(alg_rpc1, test) +DP_DECL_TEST_CASE(npf_alg_rpc, alg_rpc1b, dpt_alg_rpc_setup, + dpt_alg_rpc_teardown); +DP_START_TEST(alg_rpc1b, test) { - dp_test_nl_add_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); - dp_test_nl_add_ip_addr_and_connected("dp2T1", "2.2.2.1/24"); + /* + * Stateful firewall rule to match on TCP pkts to port 111. This + * matches the ctrl flow but not the data flow. The data flow only + * gets through because of the alg child session. + */ + struct dp_test_npf_rule_t rset[] = { + { + .rule = "10", + .pass = PASS, + .stateful = true, + .npf = "proto-final=17 dst-port=111" + }, + RULE_DEF_BLOCK, + NULL_RULE + }; + + struct dp_test_npf_ruleset_t fw = { + .rstype = "fw-out", + .name = "OUT_FW", + .enable = 1, + .attach_point = "dp2T1", + .fwd = FWD, + .dir = "out", + .rules = rset + }; + + dp_test_npf_fw_add(&fw, false); /* - * Inside + * RPC Call */ - dp_test_netlink_add_neigh("dp1T0", "1.1.1.2", "aa:bb:cc:dd:1:a2"); - dp_test_netlink_add_neigh("dp1T0", "1.1.1.3", "aa:bb:cc:dd:1:a3"); + char rpc_call[] = {0x01, 0x02, 0x03, 0x04, /* xid (host order) */ + 0x00, 0x00, 0x00, 0x00, /* type (0=call) */ + 0x00, 0x00, 0x00, 0x02, /* RPC version */ + 0x00, 0x01, 0x86, 0xa0, /* Program (100000) */ + 0x00, 0x00, 0x00, 0x00, /* Program version */ + 0x00, 0x00, 0x00, 0x03, /* Procedure (3=getport) */ + 0x00, 0x00, 0x00, 0x00, /* Auth flavor */ + 0x00, 0x00, 0x00, 0x00, /* Auth length */ + 0x00, 0x00, 0x00, 0x00, /* Verifier flavor */ + 0x00, 0x00, 0x00, 0x00, /* Verifier flavor length */ + 0x00, 0x01, 0x86, 0xa0, /* Pmap Program (100000) */ + }; + + pak_rcv_nat_udp("dp1T0", "aa:bb:cc:dd:1:a2", 0, + "1.1.1.2", 50618, "2.2.2.2", 111, + "1.1.1.2", 50618, "2.2.2.2", 111, + "aa:bb:cc:dd:2:b2", 0, "dp2T1", + DP_TEST_FWD_FORWARDED, rpc_call, sizeof(rpc_call)); + /* - * Outside + * RPC Reply + * + * Will create tuple: + * proto 17, dport 1025 Src 1.1.1.2, Dst 2.2.2.2, [MATCH_ANY_SPORT] */ - dp_test_netlink_add_neigh("dp2T1", "2.2.2.2", "aa:bb:cc:dd:2:b2"); - dp_test_netlink_add_neigh("dp2T1", "2.2.2.3", "aa:bb:cc:dd:2:b3"); + char rpc_reply[] = {0x01, 0x02, 0x03, 0x04, /* xid (host order) */ + 0x00, 0x00, 0x00, 0x01, /* type (1=reply) */ + 0x00, 0x00, 0x00, 0x00, /* Reply st (0=accepted) */ + 0x00, 0x00, 0x00, 0x00, /* Auth */ + 0x00, 0x00, 0x00, 0x00, /* Auth length */ + 0x00, 0x00, 0x00, 0x00, /* Accept st (0=success) */ + 0x00, 0x00, 0x04, 0x01, /* Port = 1025 */ + }; + + pak_rcv_nat_udp("dp2T1", "aa:bb:cc:dd:2:b2", 0, + "2.2.2.2", 111, "1.1.1.2", 50618, + "2.2.2.2", 111, "1.1.1.2", 50618, + "aa:bb:cc:dd:1:a2", 0, "dp1T0", + DP_TEST_FWD_FORWARDED, rpc_reply, sizeof(rpc_reply)); + + + /* + * RPC Data + * + * Finds the above tuple, creates a child session, expired the tuple + */ + char rpc_data[] = {0x01, 0x02, 0x03, 0x04 }; + + pak_rcv_nat_udp("dp1T0", "aa:bb:cc:dd:1:a2", 0, + "1.1.1.2", 30123, "2.2.2.2", 1025, + "1.1.1.2", 30123, "2.2.2.2", 1025, + "aa:bb:cc:dd:2:b2", 0, "dp2T1", + DP_TEST_FWD_FORWARDED, + rpc_data, sizeof(rpc_data)); + + + pak_rcv_nat_udp("dp2T1", "aa:bb:cc:dd:2:b2", 0, + "2.2.2.2", 1025, "1.1.1.2", 30123, + "2.2.2.2", 1025, "1.1.1.2", 30123, + "aa:bb:cc:dd:1:a2", 0, "dp1T0", + DP_TEST_FWD_FORWARDED, + rpc_data, sizeof(rpc_data)); + + dp_test_npf_fw_del(&fw, false); + +} DP_END_TEST; + + +/* + * alg_rpc1c -- Stateful firewall and RPC Portmapper. IPv6. + */ +DP_DECL_TEST_CASE(npf_alg_rpc, alg_rpc1c, NULL, NULL); +DP_START_TEST(alg_rpc1c, test) +{ + /* Setup interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2002:2:2::2/64"); + + dp_test_netlink_add_neigh("dp1T0", "2001:1:1::2", + "aa:bb:cc:dd:1:a1"); + dp_test_netlink_add_neigh("dp2T1", "2002:2:2::1", + "aa:bb:cc:dd:2:b1"); + /* + * Stateful firewall rule to match on TCP pkts to port 111. This + * matches the ctrl flow but not the data flow. The data flow only + * gets through because of the alg child session. + */ + struct dp_test_npf_rule_t rset[] = { + { + .rule = "10", + .pass = PASS, + .stateful = true, + .npf = "proto-final=17 dst-port=111" + }, + RULE_DEF_BLOCK, + NULL_RULE + }; + + struct dp_test_npf_ruleset_t fw = { + .rstype = "fw-out", + .name = "OUT_FW", + .enable = 1, + .attach_point = "dp2T1", + .fwd = FWD, + .dir = "out", + .rules = rset + }; + + dp_test_npf_fw_add(&fw, false); + + /* + * RPC Call + */ + char rpc_call[] = {0x01, 0x02, 0x03, 0x04, /* xid (host order) */ + 0x00, 0x00, 0x00, 0x00, /* type (0=call) */ + 0x00, 0x00, 0x00, 0x02, /* RPC version */ + 0x00, 0x01, 0x86, 0xa0, /* Program (100000) */ + 0x00, 0x00, 0x00, 0x00, /* Program version */ + 0x00, 0x00, 0x00, 0x03, /* Procedure (3=getport) */ + 0x00, 0x00, 0x00, 0x00, /* Auth flavor */ + 0x00, 0x00, 0x00, 0x00, /* Auth length */ + 0x00, 0x00, 0x00, 0x00, /* Verifier flavor */ + 0x00, 0x00, 0x00, 0x00, /* Verifier flavor length */ + 0x00, 0x01, 0x86, 0xa0, /* Pmap Program (100000) */ + }; + + pak_rcv_nat_udp_v6("dp1T0", "aa:bb:cc:dd:1:a1", 0, + "2001:1:1::2", 50618, "2002:2:2::1", 111, + "2001:1:1::2", 50618, "2002:2:2::1", 111, + "aa:bb:cc:dd:2:b1", 0, "dp2T1", + DP_TEST_FWD_FORWARDED, + rpc_call, sizeof(rpc_call)); + + + /* + * RPC Reply + * + * Will create tuple: + * proto 17, dport 1025 Src 1.1.1.2, Dst 2.2.2.2, [MATCH_ANY_SPORT] + */ + char rpc_reply[] = {0x01, 0x02, 0x03, 0x04, /* xid (host order) */ + 0x00, 0x00, 0x00, 0x01, /* type (1=reply) */ + 0x00, 0x00, 0x00, 0x00, /* Reply st (0=accepted) */ + 0x00, 0x00, 0x00, 0x00, /* Auth */ + 0x00, 0x00, 0x00, 0x00, /* Auth length */ + 0x00, 0x00, 0x00, 0x00, /* Accept st (0=success) */ + 0x00, 0x00, 0x04, 0x01, /* Port = 1025 */ + }; + + pak_rcv_nat_udp_v6("dp2T1", "aa:bb:cc:dd:2:b1", 0, + "2002:2:2::1", 111, "2001:1:1::2", 50618, + "2002:2:2::1", 111, "2001:1:1::2", 50618, + "aa:bb:cc:dd:1:a1", 0, "dp1T0", + DP_TEST_FWD_FORWARDED, + rpc_reply, sizeof(rpc_reply)); + + + /* + * RPC Data + * + * Finds the above tuple, creates a child session, expired the tuple + */ + char rpc_data[] = {0x01, 0x02, 0x03, 0x04 }; + + pak_rcv_nat_udp_v6("dp1T0", "aa:bb:cc:dd:1:a1", 0, + "2001:1:1::2", 30123, "2002:2:2::1", 1025, + "2001:1:1::2", 30123, "2002:2:2::1", 1025, + "aa:bb:cc:dd:2:b1", 0, "dp2T1", + DP_TEST_FWD_FORWARDED, + rpc_data, sizeof(rpc_data)); + + + pak_rcv_nat_udp_v6("dp2T1", "aa:bb:cc:dd:2:b1", 0, + "2002:2:2::1", 1025, "2001:1:1::2", 30123, + "2002:2:2::1", 1025, "2001:1:1::2", 30123, + "aa:bb:cc:dd:1:a1", 0, "dp1T0", + DP_TEST_FWD_FORWARDED, + rpc_data, sizeof(rpc_data)); + + dp_test_npf_fw_del(&fw, false); + + dp_test_npf_cleanup(); + + dp_test_netlink_del_neigh("dp1T0", "2001:1:1::2", + "aa:bb:cc:dd:1:a1"); + dp_test_netlink_del_neigh("dp2T1", "2002:2:2::1", + "aa:bb:cc:dd:2:b1"); + + /* Setup interfaces and neighbours */ + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2002:2:2::2/64"); + +} DP_END_TEST; + + +/* + * alg_rpc2 -- SNAT and RPC Portmapper + */ +DP_DECL_TEST_CASE(npf_alg_rpc, alg_rpc2, dpt_alg_rpc_setup, + dpt_alg_rpc_teardown); +DP_START_TEST(alg_rpc2, test) +{ struct dp_test_npf_nat_rule_t snat = { .desc = "snat rule", .rule = "10", .ifname = "dp2T1", .proto = IPPROTO_UDP, .map = "dynamic", + .port_alloc = NULL, .from_addr = "1.1.1.0/24", .from_port = NULL, .to_addr = NULL, @@ -202,6 +441,13 @@ DP_START_TEST(alg_rpc1, test) rpc_data, sizeof(rpc_data)); + pak_rcv_nat_udp("dp2T1", "aa:bb:cc:dd:2:b2", 0, + "2.2.2.2", 1025, "2.2.2.254", 30123, + "2.2.2.2", 1025, "1.1.1.2", 30123, + "aa:bb:cc:dd:1:a2", 0, "dp1T0", + DP_TEST_FWD_FORWARDED, + rpc_data, sizeof(rpc_data)); + if (0) { dp_test_npf_print_session_table(false); dp_test_npf_print_nat_sessions(""); @@ -209,29 +455,15 @@ DP_START_TEST(alg_rpc1, test) dp_test_npf_snat_del(snat.ifname, snat.rule, true); npf_rpc_out_fw(false); - dp_test_npf_cleanup(); - - /* Cleanup */ - dp_test_netlink_del_neigh("dp1T0", "1.1.1.2", "aa:bb:cc:dd:1:a2"); - dp_test_netlink_del_neigh("dp1T0", "1.1.1.3", "aa:bb:cc:dd:1:a3"); - - dp_test_netlink_del_neigh("dp2T1", "2.2.2.2", "aa:bb:cc:dd:2:b2"); - dp_test_netlink_del_neigh("dp2T1", "2.2.2.3", "aa:bb:cc:dd:2:b3"); - - dp_test_nl_del_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); - dp_test_nl_del_ip_addr_and_connected("dp2T1", "2.2.2.1/24"); - - dp_test_npf_cleanup(); } DP_END_TEST; /* - * alg_rpc2 -- Tests RPC Portmapper. vrf is deleted while there is a non-keep - * tuple. + * alg_rpc3 -- SNAT and RPC Portmapper on non-default vrf. */ -DP_DECL_TEST_CASE(npf_alg_rpc, alg_rpc2, NULL, NULL); -DP_START_TEST(alg_rpc2, test) +DP_DECL_TEST_CASE(npf_alg_rpc, alg_rpc3, NULL, NULL); +DP_START_TEST(alg_rpc3, test) { uint vrfid = 69; @@ -261,6 +493,7 @@ DP_START_TEST(alg_rpc2, test) .ifname = "dp2T1", .proto = IPPROTO_UDP, .map = "dynamic", + .port_alloc = NULL, .from_addr = "1.1.1.0/24", .from_port = NULL, .to_addr = NULL, @@ -297,7 +530,7 @@ DP_START_TEST(alg_rpc2, test) * RPC Reply * * Will create tuple: - * proto 17, dport 1025 Src 1.1.1.2, Dst 2.2.2.2, [MATCH_ANY_SPORT] + * proto 17, dport 1025 Src 1.1.1.2, Dst 2.2.2.254, [MATCH_ANY_SPORT] */ char rpc_reply[] = {0x01, 0x02, 0x03, 0x04, /* xid (host order) */ 0x00, 0x00, 0x00, 0x01, /* type (1=reply) */ @@ -364,6 +597,244 @@ DP_START_TEST(alg_rpc2, test) } DP_END_TEST; +/* + * alg_rpc4 -- DNAT and RPC Portmapper + */ +DP_DECL_TEST_CASE(npf_alg_rpc, alg_rpc4, dpt_alg_rpc_setup, + dpt_alg_rpc_teardown); +DP_START_TEST(alg_rpc4, test) +{ + struct dp_test_npf_nat_rule_t dnat = { + .desc = "dnat rule", + .rule = "10", + .ifname = "dp1T0", + .proto = IPPROTO_UDP, + .map = "dynamic", + .port_alloc = NULL, + .from_addr = NULL, + .from_port = NULL, + .to_addr = "2.2.2.254", + .to_port = NULL, + .trans_addr = "2.2.2.2", + .trans_port = NULL + }; + + dp_test_npf_dnat_add(&dnat, true); + + /* + * RPC Call + */ + char rpc_call[] = {0x01, 0x02, 0x03, 0x04, /* xid (host order) */ + 0x00, 0x00, 0x00, 0x00, /* type (0=call) */ + 0x00, 0x00, 0x00, 0x02, /* RPC version */ + 0x00, 0x01, 0x86, 0xa0, /* Program (100000) */ + 0x00, 0x00, 0x00, 0x00, /* Program version */ + 0x00, 0x00, 0x00, 0x03, /* Procedure (3=getport) */ + 0x00, 0x00, 0x00, 0x00, /* Auth flavor */ + 0x00, 0x00, 0x00, 0x00, /* Auth length */ + 0x00, 0x00, 0x00, 0x00, /* Verifier flavor */ + 0x00, 0x00, 0x00, 0x00, /* Verifier flavor length */ + 0x00, 0x01, 0x86, 0xa0, /* Pmap Program (100000) */ + }; + + pak_rcv_nat_udp("dp1T0", "aa:bb:cc:dd:1:a2", 0, + "1.1.1.2", 50618, "2.2.2.254", 111, + "1.1.1.2", 50618, "2.2.2.2", 111, + "aa:bb:cc:dd:2:b2", 0, "dp2T1", + DP_TEST_FWD_FORWARDED, rpc_call, sizeof(rpc_call)); + + + /* + * RPC Reply + * + * Will create tuple: + * proto 17, dport 1025 Src 1.1.1.2, Dst 2.2.2.254, [MATCH_ANY_SPORT] + */ + char rpc_reply[] = {0x01, 0x02, 0x03, 0x04, /* xid (host order) */ + 0x00, 0x00, 0x00, 0x01, /* type (1=reply) */ + 0x00, 0x00, 0x00, 0x00, /* Reply st (0=accepted) */ + 0x00, 0x00, 0x00, 0x00, /* Auth */ + 0x00, 0x00, 0x00, 0x00, /* Auth length */ + 0x00, 0x00, 0x00, 0x00, /* Accept st (0=success) */ + 0x00, 0x00, 0x04, 0x01, /* Port = 1025 */ + }; + + pak_rcv_nat_udp("dp2T1", "aa:bb:cc:dd:2:b2", 0, + "2.2.2.2", 111, "1.1.1.2", 50618, + "2.2.2.254", 111, "1.1.1.2", 50618, + "aa:bb:cc:dd:1:a2", 0, "dp1T0", + DP_TEST_FWD_FORWARDED, rpc_reply, sizeof(rpc_reply)); + + + /* + * RPC Data + * + * Finds the above tuple, creates a child session, expired the tuple + */ + char rpc_data[] = {0x01, 0x02, 0x03, 0x04 }; + + pak_rcv_nat_udp("dp1T0", "aa:bb:cc:dd:1:a2", 0, + "1.1.1.2", 30123, "2.2.2.254", 1025, + "1.1.1.2", 30123, "2.2.2.2", 1025, + "aa:bb:cc:dd:2:b2", 0, "dp2T1", + DP_TEST_FWD_FORWARDED, + rpc_data, sizeof(rpc_data)); + + pak_rcv_nat_udp("dp2T1", "aa:bb:cc:dd:2:b2", 0, + "2.2.2.2", 1025, "1.1.1.2", 30123, + "2.2.2.254", 1025, "1.1.1.2", 30123, + "aa:bb:cc:dd:1:a2", 0, "dp1T0", + DP_TEST_FWD_FORWARDED, + rpc_data, sizeof(rpc_data)); + + if (0) { + dp_test_npf_print_session_table(false); + dp_test_npf_print_nat_sessions(""); + } + + dp_test_npf_dnat_del(dnat.ifname, dnat.rule, true); + +} DP_END_TEST; + + +/* + * alg_rpc5 -- SNAT and RPC Portmapper, with CGNAT also cfgd + */ +DP_DECL_TEST_CASE(npf_alg_rpc, alg_rpc5, dpt_alg_rpc_setup, + dpt_alg_rpc_teardown); +DP_START_TEST(alg_rpc5, test) +{ + struct dp_test_npf_nat_rule_t snat = { + .desc = "snat rule", + .rule = "10", + .ifname = "dp2T1", + .proto = IPPROTO_UDP, + .map = "dynamic", + .port_alloc = NULL, + .from_addr = "1.1.1.0/24", + .from_port = NULL, + .to_addr = NULL, + .to_port = NULL, + .trans_addr = "2.2.2.254", + .trans_port = NULL + }; + + dp_test_npf_snat_add(&snat, true); + + npf_rpc_out_fw(true); + + /* + * Add CGNAT config. Matches on same source addresses as SNAT, but + * maps to different addresses. + */ + dp_test_npf_cmd_fmt(false, "nat-ut pool add POOL1 type=cgnat " + "address-range=RANGE1/2.2.2.100-2.2.2.199"); + cgnat_policy_add2("POLICY1", 10, "1.1.1.0/24", "POOL1", + "dp2T1", NULL); + + dp_test_npf_cmd_fmt(false, "cgn-ut snat-alg-bypass on"); + + /* + * RPC Call + */ + char rpc_call[] = {0x01, 0x02, 0x03, 0x04, /* xid (host order) */ + 0x00, 0x00, 0x00, 0x00, /* type (0=call) */ + 0x00, 0x00, 0x00, 0x02, /* RPC version */ + 0x00, 0x01, 0x86, 0xa0, /* Program (100000) */ + 0x00, 0x00, 0x00, 0x00, /* Program version */ + 0x00, 0x00, 0x00, 0x03, /* Procedure (3=getport) */ + 0x00, 0x00, 0x00, 0x00, /* Auth flavor */ + 0x00, 0x00, 0x00, 0x00, /* Auth length */ + 0x00, 0x00, 0x00, 0x00, /* Verifier flavor */ + 0x00, 0x00, 0x00, 0x00, /* Verifier flavor length */ + 0x00, 0x01, 0x86, 0xa0, /* Pmap Program (100000) */ + }; + + pak_rcv_nat_udp("dp1T0", "aa:bb:cc:dd:1:a2", 0, + "1.1.1.2", 50618, "2.2.2.2", 111, + "2.2.2.254", 50618, "2.2.2.2", 111, + "aa:bb:cc:dd:2:b2", 0, "dp2T1", + DP_TEST_FWD_FORWARDED, rpc_call, sizeof(rpc_call)); + + + /* + * RPC Reply + * + * Will create tuple: + * proto 17, dport 1025 Src 1.1.1.2, Dst 2.2.2.2, [MATCH_ANY_SPORT] + */ + char rpc_reply[] = {0x01, 0x02, 0x03, 0x04, /* xid (host order) */ + 0x00, 0x00, 0x00, 0x01, /* type (1=reply) */ + 0x00, 0x00, 0x00, 0x00, /* Reply st (0=accepted) */ + 0x00, 0x00, 0x00, 0x00, /* Auth */ + 0x00, 0x00, 0x00, 0x00, /* Auth length */ + 0x00, 0x00, 0x00, 0x00, /* Accept st (0=success) */ + 0x00, 0x00, 0x04, 0x01, /* Port = 1025 */ + }; + + pak_rcv_nat_udp("dp2T1", "aa:bb:cc:dd:2:b2", 0, + "2.2.2.2", 111, "2.2.2.254", 50618, + "2.2.2.2", 111, "1.1.1.2", 50618, + "aa:bb:cc:dd:1:a2", 0, "dp1T0", + DP_TEST_FWD_FORWARDED, rpc_reply, sizeof(rpc_reply)); + + + /* + * RPC Data + * + * Finds the above tuple, creates a child session, expired the tuple + */ + char rpc_data[] = {0x01, 0x02, 0x03, 0x04 }; + + pak_rcv_nat_udp("dp1T0", "aa:bb:cc:dd:1:a2", 0, + "1.1.1.2", 30123, "2.2.2.2", 1025, + "2.2.2.254", 30123, "2.2.2.2", 1025, + "aa:bb:cc:dd:2:b2", 0, "dp2T1", + DP_TEST_FWD_FORWARDED, + rpc_data, sizeof(rpc_data)); + + + if (0) { + dp_test_npf_print_session_table(false); + dp_test_npf_print_nat_sessions(""); + } + + dp_test_npf_cmd_fmt(false, "cgn-ut snat-alg-bypass off"); + + cgnat_policy_del("POLICY1", 10, "dp2T1"); + dp_test_npf_cmd_fmt(false, "nat-ut pool delete POOL1"); + + dp_test_npf_snat_del(snat.ifname, snat.rule, true); + npf_rpc_out_fw(false); + +} DP_END_TEST; + +static void dpt_alg_rpc_setup(void) +{ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2.2.2.1/24"); + + dp_test_netlink_add_neigh("dp1T0", "1.1.1.2", "aa:bb:cc:dd:1:a2"); + dp_test_netlink_add_neigh("dp1T0", "1.1.1.3", "aa:bb:cc:dd:1:a3"); + dp_test_netlink_add_neigh("dp2T1", "2.2.2.2", "aa:bb:cc:dd:2:b2"); + dp_test_netlink_add_neigh("dp2T1", "2.2.2.3", "aa:bb:cc:dd:2:b3"); +} + +static void dpt_alg_rpc_teardown(void) +{ + dp_test_netlink_del_neigh("dp1T0", "1.1.1.2", "aa:bb:cc:dd:1:a2"); + dp_test_netlink_del_neigh("dp1T0", "1.1.1.3", "aa:bb:cc:dd:1:a3"); + + dp_test_netlink_del_neigh("dp2T1", "2.2.2.2", "aa:bb:cc:dd:2:b2"); + dp_test_netlink_del_neigh("dp2T1", "2.2.2.3", "aa:bb:cc:dd:2:b3"); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2.2.2.1/24"); + + dp_test_npf_cleanup(); +} + + /* * This is called *after* the packet has been modified, but *before* the pkt * queued on the tx ring is checked. @@ -421,16 +892,17 @@ _pak_rcv_nat_udp(const char *rx_intf, const char *pre_smac, int pre_vlan, const char *post_daddr, uint16_t post_dport, const char *post_dmac, int post_vlan, const char *tx_intf, int status, char *payload, uint payload_len, + uint16_t eth_type, const char *file, const char *func, int line) { struct dp_test_expected *test_exp; struct rte_mbuf *test_pak, *exp_pak; - /* Pre IPv4 UDP packet */ + /* Pre UDP packet */ struct dp_test_pkt_desc_t pre_pkt_UDP = { - .text = "IPv4 UDP", + .text = "UDP", .len = payload_len, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = eth_type, .l3_src = pre_saddr, .l2_src = pre_smac, .l3_dst = pre_daddr, @@ -446,11 +918,11 @@ _pak_rcv_nat_udp(const char *rx_intf, const char *pre_smac, int pre_vlan, .tx_intf = tx_intf }; - /* Post IPv4 UDP packet */ + /* Post UDP packet */ struct dp_test_pkt_desc_t post_pkt_UDP = { - .text = "IPv4 UDP", + .text = "UDP", .len = payload_len, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = eth_type, .l3_src = post_saddr, .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = post_daddr, @@ -466,11 +938,18 @@ _pak_rcv_nat_udp(const char *rx_intf, const char *pre_smac, int pre_vlan, .tx_intf = tx_intf }; - test_pak = dp_test_v4_pkt_from_desc(&pre_pkt_UDP); + if (eth_type == RTE_ETHER_TYPE_IPV4) + test_pak = dp_test_v4_pkt_from_desc(&pre_pkt_UDP); + else + test_pak = dp_test_v6_pkt_from_desc(&pre_pkt_UDP); udp_payload_init(test_pak, &pre_pkt_UDP, payload, payload_len); - exp_pak = dp_test_v4_pkt_from_desc(&post_pkt_UDP); + if (eth_type == RTE_ETHER_TYPE_IPV4) + exp_pak = dp_test_v4_pkt_from_desc(&post_pkt_UDP); + else + exp_pak = dp_test_v6_pkt_from_desc(&post_pkt_UDP); + test_exp = dp_test_exp_from_desc(exp_pak, &post_pkt_UDP); rte_pktmbuf_free(exp_pak); @@ -488,7 +967,7 @@ _pak_rcv_nat_udp(const char *rx_intf, const char *pre_smac, int pre_vlan, dp_test_exp_get_pak(test_exp), post_dmac, dp_test_intf_name2mac_str(tx_intf), - ETHER_TYPE_IPv4); + eth_type); } dp_test_exp_set_fwd_status(test_exp, status); @@ -499,3 +978,4 @@ _pak_rcv_nat_udp(const char *rx_intf, const char *pre_smac, int pre_vlan, _dp_test_pak_receive(test_pak, rx_intf, test_exp, file, func, line); } + diff --git a/tests/whole_dp/src/dp_test_npf_alg_sip_data.c b/tests/whole_dp/src/dp_test_npf_alg_sip_data.c new file mode 100644 index 00000000..48e7574a --- /dev/null +++ b/tests/whole_dp/src/dp_test_npf_alg_sip_data.c @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +/* Functions for operating on SIP data sets */ + +#include +#include +#include +#include "dp_test_npf_alg_sip_data.h" + +/* + * Create a description string from the first line of a SIP packet payload + * payload. This is useful in identifying packets where the same function is + * being used in a loop to send packets. + */ +char *sipd_descr(uint index, bool forw, const char *pload) +{ + static char str[100]; + uint i; + uint len; + + len = snprintf(str, sizeof(str), "[%u:%s] ", + index, forw ? "FORW":"BACK"); + + for (i = 0; i + len < sizeof(str) - 1; i++) { + if (pload[i] == '\r' || pload[i] == '\0') + break; + str[i + len] = pload[i]; + } + + str[i + len] = '\0'; + return str; +} + +/* + * Extract content-length value from SIP message header, and measure actual + * content-length. Return false if both can be determined but are *not* + * equal. + */ +bool sipd_check_content_length(const char *pload, uint *hdr_clen, + uint *body_clen) +{ + char str[30]; + char *p; + uint i, hdr_cl, body_cl; + int rc; + + if (hdr_clen) + *hdr_clen = 0; + if (body_clen) + *body_clen = 0; + + /* + * Copy the "Content-Length" line from the payloaf into a local buffer + */ + p = strcasestr(pload, "Content-Length"); + if (!p) + return true; + + /* End of the line is denoted by "\r\n" */ + for (i = 0; p[i] && p[i] != '\r' && i < sizeof(str) - 1; i++) + str[i] = p[i]; + str[i] = '\0'; + + /* Look for end of SIP msg headers */ + p = strstr(p, "\r\n\r\n"); + if (!p) + return true; + + /* + * Move 'p' to start of SIP msg body (or end of string if there is no + * body) + */ + p += strlen("\r\n\r\n"); + if (p[0] == '\0') + return true; + + /* Determine length of SIP msg body (i.e. the 'content-length') */ + body_cl = strlen(p); + + /* Get Content-length value in the header line */ + rc = 0; + for (p = str; p[0]; p++) { + if (isdigit(p[0])) { + rc = sscanf(p, "%u", &hdr_cl); + break; + } + } + if (rc != 1) + hdr_cl = 0; + + if (hdr_clen) + *hdr_clen = hdr_cl; + if (body_clen) + *body_clen = body_cl; + + return hdr_cl == body_cl; +} diff --git a/tests/whole_dp/src/dp_test_npf_alg_sip_data.h b/tests/whole_dp/src/dp_test_npf_alg_sip_data.h new file mode 100644 index 00000000..17c7692c --- /dev/null +++ b/tests/whole_dp/src/dp_test_npf_alg_sip_data.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#ifndef __DP_TEST_NPF_ALG_SIP_DATA_H__ +#define __DP_TEST_NPF_ALG_SIP_DATA_H__ + +#include +#include +#include + +#define SIP_FORW true +#define SIP_BACK false + +/* + * SIP Data Set #1 + */ +#define SIPD1_SZ 6 +extern const bool sipd1_dir[SIPD1_SZ]; +extern const uint sipd1_rtp_index; +extern const char *sipd1[SIPD1_SZ]; +extern const char *sipd1_pre_snat[SIPD1_SZ]; +extern const char *sipd1_post_snat[SIPD1_SZ]; +extern const char *sipd1_pre_dnat[SIPD1_SZ]; +extern const char *sipd1_post_dnat[SIPD1_SZ]; + +/* + * SIP Data Set #2 + */ +#define SIPD2_SZ 6 +extern const bool sipd2_dir[SIPD2_SZ]; +extern const uint sipd2_rtp_index; +extern const char *sipd2[SIPD2_SZ]; +extern const char *sipd2_pre_snat[SIPD2_SZ]; +extern const char *sipd2_post_snat[SIPD2_SZ]; + +/* + * SIP Data Set #3 + */ +#define SIPD3_SZ 8 +extern const bool sipd3_dir[SIPD3_SZ]; +extern const uint sipd3_rtp_early_media_index; +extern const uint sipd3_rtp_media_index; +extern const char *sipd3_pre_snat[SIPD3_SZ]; +extern const char *sipd3_post_snat[SIPD3_SZ]; + + +/* + * SIP Data Set #4 + */ +#define SIPD4_SZ 7 +extern const bool sipd4_dir[SIPD4_SZ]; +extern const char *sipd4_pre_dnat[SIPD4_SZ]; +extern const char *sipd4_post_dnat[SIPD4_SZ]; + + +char *sipd_descr(uint index, bool forw, const char *pload); +bool sipd_check_content_length(const char *pload, uint *hdr_clen, + uint *body_clen); + +#endif /* __DP_TEST_NPF_ALG_SIP_DATA_H__ */ diff --git a/tests/whole_dp/src/dp_test_npf_alg_sip_data1.c b/tests/whole_dp/src/dp_test_npf_alg_sip_data1.c new file mode 100644 index 00000000..acda5a99 --- /dev/null +++ b/tests/whole_dp/src/dp_test_npf_alg_sip_data1.c @@ -0,0 +1,566 @@ +/* + * Copyright (c) 2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include "dp_test_npf_alg_sip_data.h" + +/* + * SIP Data Set #1. + * + * Simple SIP Session + * + * Client Rcvr + * + * 1.1.1.2 22.22.22.2 + * sip:workman@home.com B.Boss@work.co.uk + * (snat trans addr 30.30.30.2) + * + * | INVITE | + * |--------------------------------------------->| + * | 180 Ringing | + * |<---------------------------------------------| + * | 200 OK | + * |<---------------------------------------------| + * | ACK | + * |--------------------------------------------->| + * | Media Session | + * |<============================================>| + * | BYE | + * |<---------------------------------------------| + * | 200 OK | + * |--------------------------------------------->| + * | | + * + * Notes: + * + * The 'From' and 'To' headers are always the same as the INVITE or BYE + * request that started the msg exchange. They do *not* always reflect who + * sent any given message. + */ + +/* + * These data set arrays are purposefully kept very simple. asserts in the + * test code ensure they are the same size. + */ +const bool sipd1_dir[SIPD1_SZ] = { + SIP_FORW, SIP_BACK, SIP_BACK, SIP_FORW, SIP_BACK, SIP_FORW, +}; + +/* + * Indicates after which SIP message the data stream should occur (first msg + * is index 0). + */ +const uint sipd1_rtp_index = 3; + +/* + * SIP messages without any NAT + */ +const char *sipd1[SIPD1_SZ] = { + /* + * 0. Forward. INVITE + */ + "INVITE sip:B.Boss@22.22.22.2 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 1.1.1.2:5060;branch=z9hG4bKfw19b\r\n" + "Record-Route: \r\n" + "From: A. Workman ;tag=76341\r\n" + "To: B.Boss \r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "P-asserted-identity: \"Workman\" \r\n" + "Max-forwards: 70\r\n" + "Subject: About That Power Outage...\r\n" + "Content-Length: 135\r\n" + "\r\n" + "v=0\r\n" + "o=Doe 2890844526 2890844526 IN IP4 1.1.1.2\r\n" + "s=Phone Call\r\n" + "c=IN IP4 1.1.1.2\r\n" + "t=0 0\r\n" + "m=audio 10000 RTP/AVP 0\r\n" + "a=rtpmap:0 PCMU/8000\r\n", + + /* + * 1. Back. 180 Ringing + */ + "SIP/2.0 180 Ringing\r\n" + "Via: SIP/2.0/UDP 1.1.1.2:5060;branch=z9hG4bKfw19b\r\n" + "Record-Route: \r\n" + "From: A. Workman ;tag=76341\r\n" + "To: B.Boss ;tag=a53e42\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* + * 2. Back. 200 OK + */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 1.1.1.2:5060;branch=z9hG4bKfw19b\r\n" + "From: A. Workman ;tag=76341\r\n" + "To: B.Boss ;tag=a53e42\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "Content-Length: 144\r\n" + "\r\n" + "v=0\r\n" + "o=B.Boss 2890844528 2890844528 IN IP4 22.22.22.2\r\n" + "s=Phone Call\r\n" + "c=IN IP4 22.22.22.2\r\n" + "t=0 0\r\n" + "m=audio 60000 RTP/AVP 0\r\n" + "a=rtpmap:0 PCMU/8000\r\n", + + /* + * 3. Forward. ACK + */ + "ACK sip:boss@22.22.22.2 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 1.1.1.2:5060;branch=z9hG4bK321g\r\n" + "From: A. Workman ;tag=76341\r\n" + "To: B.Boss ;tag=a53e42\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1 ACK\r\n" + "Max-forwards: 70\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* + * 4. Back: BYE + */ + "BYE sip:workman@1.1.1.2 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 22.22.22.2:5060;branch=z9hG4bK392kf\r\n" + "From: B.Boss ;tag=a53e42\r\n" + "To: A. Workman ;tag=76341\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1392 BYE\r\n" + "Max-forwards: 70\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* + * 5. Forward. 200 OK + * + * Record-route + */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 22.22.22.2:5060;branch=z9hG4bK392kf\r\n" + "From: B.Boss ;tag=a53e42\r\n" + "To: A. Workman ;tag=76341\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1392 BYE\r\n" + "Content-Length: 0\r\n" + "\r\n", +}; + +/* + * SNAT + */ +const char *sipd1_pre_snat[SIPD1_SZ] = { + /* + * 0. INVITE. Forward (inside) + */ + "INVITE sip:B.Boss@22.22.22.2 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 1.1.1.2:5060;branch=z9hG4bKfw19b\r\n" + "Record-Route: \r\n" + "From: A. Workman ;tag=76341\r\n" + "To: B.Boss \r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "P-asserted-identity: \"Workman\" \r\n" + "Max-forwards: 70\r\n" + "Subject: About That Power Outage...\r\n" + "Content-Length: 139\r\n" + "\r\n" + "v=0\r\n" + "o=Workman 2890844526 2890844526 IN IP4 1.1.1.2\r\n" + "s=Phone Call\r\n" + "c=IN IP4 1.1.1.2\r\n" + "t=0 0\r\n" + "m=audio 10000 RTP/AVP 0\r\n" + "a=rtpmap:0 PCMU/8000\r\n", + + /* + * 1. 180 Ringing. Back (outside) + */ + "SIP/2.0 180 Ringing\r\n" + "Via: SIP/2.0/UDP 30.30.30.2:1024;branch=z9hG4bKfw19b\r\n" + "Record-Route: \r\n" + "From: A. Workman ;tag=76341\r\n" + "To: B.Boss ;tag=a53e42\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* + * 2. 200 OK. Back + */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 30.30.30.2:1024;branch=z9hG4bKfw19b\r\n" + "From: A. Workman ;tag=76341\r\n" + "To: B.Boss ;tag=a53e42\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "Content-Length: 144\r\n" + "\r\n" + "v=0\r\n" + "o=B.Boss 2890844528 2890844528 IN IP4 22.22.22.2\r\n" + "s=Phone Call\r\n" + "c=IN IP4 22.22.22.2\r\n" + "t=0 0\r\n" + "m=audio 60000 RTP/AVP 0\r\n" + "a=rtpmap:0 PCMU/8000\r\n", + + /* + * 3. ACK. Forward + */ + "ACK sip:boss@22.22.22.2 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 1.1.1.2:5060;branch=z9hG4bK321g\r\n" + "From: A. Workman ;tag=76341\r\n" + "To: B.Boss ;tag=a53e42\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1 ACK\r\n" + "Max-forwards: 70\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* + * 4. BYE. Back + */ + "BYE sip:workman@30.30.30.2 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 22.22.22.2:5060;branch=z9hG4bK392kf\r\n" + "From: B.Boss ;tag=a53e42\r\n" + "To: A. Workman ;tag=76341\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1392 BYE\r\n" + "Max-forwards: 70\r\n" + "Content-Length: 0\r\n" + "\r\n", + /* + * 5. Forward + */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 22.22.22.2:5060;branch=z9hG4bK392kf\r\n" + "From: B.Boss ;tag=a53e42\r\n" + "To: A. Workman ;tag=76341\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1392 BYE\r\n" + "Content-Length: 0\r\n" + "\r\n", +}; + +const char *sipd1_post_snat[SIPD1_SZ] = { + /* + * 0. INVITE. Forward (outside) + */ + "INVITE sip:B.Boss@22.22.22.2 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 30.30.30.2:1024;branch=z9hG4bKfw19b\r\n" + "Record-Route: \r\n" + "From: A. Workman ;tag=76341\r\n" + "To: B.Boss \r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "P-asserted-identity: \"Workman\" \r\n" + "Max-forwards: 70\r\n" + "Subject: About That Power Outage...\r\n" + "Content-Length: 144\r\n" + "\r\n" + "v=0\r\n" + "o=Workman 2890844526 2890844526 IN IP4 30.30.30.2\r\n" + "s=Phone Call\r\n" + "c=IN IP4 30.30.30.2\r\n" + "t=0 0\r\n" + "m=audio 1026 RTP/AVP 0\r\n" + "a=rtpmap:0 PCMU/8000\r\n", + + /* + * 1. 180 Ringing. Back (inside) + */ + "SIP/2.0 180 Ringing\r\n" + "Via: SIP/2.0/UDP 1.1.1.2:5060;branch=z9hG4bKfw19b\r\n" + "Record-Route: \r\n" + "From: A. Workman ;tag=76341\r\n" + "To: B.Boss ;tag=a53e42\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* + * 2. 200 OK. Back + */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 1.1.1.2:5060;branch=z9hG4bKfw19b\r\n" + "From: A. Workman ;tag=76341\r\n" + "To: B.Boss ;tag=a53e42\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "Content-Length: 144\r\n" + "\r\n" + "v=0\r\n" + "o=B.Boss 2890844528 2890844528 IN IP4 22.22.22.2\r\n" + "s=Phone Call\r\n" + "c=IN IP4 22.22.22.2\r\n" + "t=0 0\r\n" + "m=audio 60000 RTP/AVP 0\r\n" + "a=rtpmap:0 PCMU/8000\r\n", + + /* + * 3. ACK. Forward + */ + "ACK sip:boss@22.22.22.2 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 30.30.30.2:1024;branch=z9hG4bK321g\r\n" + "From: A. Workman ;tag=76341\r\n" + "To: B.Boss ;tag=a53e42\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1 ACK\r\n" + "Max-forwards: 70\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* + * 4. BYE. Back + */ + "BYE sip:workman@1.1.1.2 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 22.22.22.2:5060;branch=z9hG4bK392kf\r\n" + "From: B.Boss ;tag=a53e42\r\n" + "To: A. Workman ;tag=76341\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1392 BYE\r\n" + "Max-forwards: 70\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* + * 5. 200 OK. Forward + */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 22.22.22.2:5060;branch=z9hG4bK392kf\r\n" + "From: B.Boss ;tag=a53e42\r\n" + "To: A. Workman ;tag=76341\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1392 BYE\r\n" + "Content-Length: 0\r\n" + "\r\n", +}; + +/* + * DNAT + * + * Forw: Dest 1.1.1.22 translated to 22.22.22.2 + * Back: Src 22.22.22.2 translated to 1.1.1.22 + */ +const char *sipd1_pre_dnat[SIPD1_SZ] = { + /* + * 0. Forward. INVITE + */ + "INVITE sip:B.Boss@1.1.1.22 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 1.1.1.2:5060;branch=z9hG4bKfw19b\r\n" + "Record-Route: \r\n" + "From: A. Workman ;tag=76341\r\n" + "To: B.Boss \r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "Max-forwards: 70\r\n" + "Subject: About That Power Outage...\r\n" + "Content-Length: 139\r\n" + "\r\n" + "v=0\r\n" + "o=Workman 2890844526 2890844526 IN IP4 1.1.1.2\r\n" + "s=Phone Call\r\n" + "c=IN IP4 1.1.1.2\r\n" + "t=0 0\r\n" + "m=audio 10000 RTP/AVP 0\r\n" + "a=rtpmap:0 PCMU/8000\r\n", + + /* + * 1. Back. 180 Ringing + */ + "SIP/2.0 180 Ringing\r\n" + "Via: SIP/2.0/UDP 1.1.1.2:5060;branch=z9hG4bKfw19b\r\n" + "Record-Route: \r\n" + "From: A. Workman ;tag=76341\r\n" + "To: B.Boss ;tag=a53e42\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* + * 2. Back. 200 OK + */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 1.1.1.2:5060;branch=z9hG4bKfw19b\r\n" + "From: A. Workman ;tag=76341\r\n" + "To: B.Boss ;tag=a53e42\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "Content-Length: 144\r\n" + "\r\n" + "v=0\r\n" + "o=B.Boss 2890844528 2890844528 IN IP4 22.22.22.2\r\n" + "s=Phone Call\r\n" + "c=IN IP4 22.22.22.2\r\n" + "t=0 0\r\n" + "m=audio 60000 RTP/AVP 0\r\n" + "a=rtpmap:0 PCMU/8000\r\n", + + /* + * 3. Forward. ACK + */ + "ACK sip:boss@1.1.1.22 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 1.1.1.2:5060;branch=z9hG4bK321g\r\n" + "From: A. Workman ;tag=76341\r\n" + "To: B.Boss ;tag=a53e42\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1 ACK\r\n" + "Max-forwards: 70\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* + * 4. Back: BYE + */ + "BYE sip:workman@1.1.1.2 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 22.22.22.2:5060;branch=z9hG4bK392kf\r\n" + "From: B.Boss ;tag=a53e42\r\n" + "To: A. Workman ;tag=76341\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1392 BYE\r\n" + "Max-forwards: 70\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* + * 5. Forward. 200 OK + */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 22.22.22.2:5060;branch=z9hG4bK392kf\r\n" + "From: B.Boss ;tag=a53e42\r\n" + "To: A. Workman ;tag=76341\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1392 BYE\r\n" + "Content-Length: 0\r\n" + "\r\n", +}; + +const char *sipd1_post_dnat[SIPD1_SZ] = { + /* + * 0. Forward. INVITE + */ + "INVITE sip:B.Boss@22.22.22.2 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 1.1.1.2:5060;branch=z9hG4bKfw19b\r\n" + "Record-Route: \r\n" + "From: A. Workman ;tag=76341\r\n" + "To: B.Boss \r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "Max-forwards: 70\r\n" + "Subject: About That Power Outage...\r\n" + "Content-Length: 139\r\n" + "\r\n" + "v=0\r\n" + "o=Workman 2890844526 2890844526 IN IP4 1.1.1.2\r\n" + "s=Phone Call\r\n" + "c=IN IP4 1.1.1.2\r\n" + "t=0 0\r\n" + "m=audio 10000 RTP/AVP 0\r\n" + "a=rtpmap:0 PCMU/8000\r\n", + + /* + * 1. Back. 180 Ringing + */ + "SIP/2.0 180 Ringing\r\n" + "Via: SIP/2.0/UDP 1.1.1.2:5060;branch=z9hG4bKfw19b\r\n" + "Record-Route: \r\n" + "From: A. Workman ;tag=76341\r\n" + "To: B.Boss ;tag=a53e42\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* + * 2. Back. 200 OK. + */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 1.1.1.2:5060;branch=z9hG4bKfw19b\r\n" + "From: A. Workman ;tag=76341\r\n" + "To: B.Boss ;tag=a53e42\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "Content-Length: 142\r\n" + "\r\n" + "v=0\r\n" + "o=B.Boss 2890844528 2890844528 IN IP4 22.22.22.2\r\n" + "s=Phone Call\r\n" + "c=IN IP4 1.1.1.22\r\n" + "t=0 0\r\n" + "m=audio 60000 RTP/AVP 0\r\n" + "a=rtpmap:0 PCMU/8000\r\n", + + /* + * 3. Forward. ACK + */ + "ACK sip:boss@22.22.22.2 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 1.1.1.2:5060;branch=z9hG4bK321g\r\n" + "From: A. Workman ;tag=76341\r\n" + "To: B.Boss ;tag=a53e42\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1 ACK\r\n" + "Max-forwards: 70\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* + * 4. Back: BYE + */ + "BYE sip:workman@1.1.1.2 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 22.22.22.2:5060;branch=z9hG4bK392kf\r\n" + "From: B.Boss ;tag=a53e42\r\n" + "To: A. Workman ;tag=76341\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1392 BYE\r\n" + "Max-forwards: 70\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* + * 5. Forward. 200 OK + */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 22.22.22.2:5060;branch=z9hG4bK392kf\r\n" + "From: B.Boss ;tag=a53e42\r\n" + "To: A. Workman ;tag=76341\r\n" + "Call-ID: j2qu348ek2328ws\r\n" + "CSeq: 1392 BYE\r\n" + "Content-Length: 0\r\n" + "\r\n", +}; diff --git a/tests/whole_dp/src/dp_test_npf_alg_sip_data2.c b/tests/whole_dp/src/dp_test_npf_alg_sip_data2.c new file mode 100644 index 00000000..d01bdb44 --- /dev/null +++ b/tests/whole_dp/src/dp_test_npf_alg_sip_data2.c @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include "dp_test_npf_alg_sip_data.h" + +/* + * SIP Data Set #2. (uut is between Caller and Proxy server) + * + * + * Caller Proxy Server Callee + * 100.101.102.103 200.201.202.205 200.201.202.203 + * + * | INVITE (M1) | + * |----[uut]---------------------->| INVITE (M2) + * | |-------------------------------->| + * | | 180 Ringing (M3) | + * | 180 Ringing (M4) |<--------------------------------| + * |<---[uut]-----------------------| | + * | | 200 OK (M5) | + * | 200 OK (M6) |<--------------------------------| + * |<---[uut]-----------------------| | + * | | + * | ACK (M7) | + * |----[uut]-------------------------------------------------------->| + * | Media Session | + * |<===[uut]========================================================>| + * | BYE (M8) | + * |<---[uut]---------------------------------------------------------| + * | 200 OK (M9) | + * |----[uut]-------------------------------------------------------->| + * | | + * + * Note: with SNAT, ACK (M7) will create a new session. + */ + +/* + * These data set arrays are purposefully kept very simple. asserts in the + * test code ensure they are the same size. + */ +const bool sipd2_dir[SIPD2_SZ] = { + SIP_FORW, SIP_BACK, SIP_BACK, SIP_FORW, SIP_BACK, SIP_FORW +}; + +/* + * Indicates after which SIP message the data stream should occur (first msg + * is index 0). + */ +const uint sipd2_rtp_index = 3; + +/* + * SIP messages without any NAT + */ +const char *sipd2[SIPD2_SZ] = { + /* + * 0. Forward. INVITE (M1) + */ + "INVITE sip:joe.bloggs@200.201.202.203 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 100.101.102.103:5060;branch=z9hG4bKmp17a\r\n" + "From: J. Doe ;tag=42\r\n" + "To: Bloggs \r\n" + "Call-ID: 4827311-391-32934\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "Max-forwards: 70\r\n" + "Subject: Where are you exactly?\r\n" + "Content-Length: 154\r\n" + "\r\n" + "v=0\r\n" + "o=doe123 2890844526 2890844526 IN IP4 100.101.102.103\r\n" + "s=Phone Call\r\n" + "c=IN IP4 100.101.102.103\r\n" + "t=0 0\r\n" + "m=audio 10000 RTP/AVP 0\r\n" + "a=rtpmap:0 PCMU/8000\r\n", + + /* + * 1. Back. 180 RINGING (M4) + */ + "SIP/2.0 180 Ringing\r\n" + "Via: SIP/2.0/UDP 100.101.102.103:5060;branch=z9hG4bKmp17a\r\n" + "From: J. Doe ;tag=42\r\n" + "To: Bloggs ;" + "tag=314159\r\n" + "Call-ID: 4827311-391-32934\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* + * 2. Back. 200 OK (M6) + */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 100.101.102.103:5060;branch=z9hG4bKmp17a\r\n" + "From: J. Doe ;tag=42\r\n" + "To: Bloggs ;" + "tag=314159\r\n" + "Call-ID: 4827311-391-32934\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "Content-Length: 154\r\n" + "\r\n" + "v=0\r\n" + "o=bloggs 2890844526 2890844526 IN IP4 200.201.202.203\r\n" + "s=phone call\r\n" + "c=IN IP4 200.201.202.203\r\n" + "t=0 0\r\n" + "m=audio 60000 RTP/AVP 0\r\n" + "a=rtpmap:0 PCMU/8000\r\n", + + /* + * 3. Forw. ACK (M7). Caller to Callee. + */ + "ACK sip:joe.bloggs@200.201.202.203 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 100.101.102.103:5060;branch=z9hG4bKka42\r\n" + "From: J. Doe ;tag=42\r\n" + "To: Bloggs ;" + "tag=314159\r\n" + "Call-ID: 4827311-391-32934\r\n" + "CSeq: 1 ACK\r\n" + "Max-forwards: 70\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* Data flow occurs here */ + + /* + * 4. Back. BYE (M8) + */ + "BYE sip:doe123@pc33.example.com SIP/2.0\r\n" + "Via: SIP/2.0/UDP 200.201.202.203:5060;branch=z9hG4bK4332\r\n" + "From: Bloggs ;" + "tag=314159\r\n" + "To: J. Doe ;tag=42\r\n" + "Call-ID: 4827311-391-32934\r\n" + "CSeq: 2000 BYE\r\n" + "Max-forwards: 70\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* + * 5. Forw. 200 OK (M9) + */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 200.201.202.203:5060;branch=z9hG4bK4332\r\n" + "From: Bloggs ;" + "tag=314159\r\n" + "To: J. Doe ;tag=42\r\n" + "Call-ID: 4827311-391-32934\r\n" + "CSeq: 2000 BYE\r\n" + "Content-Length: 0\r\n" + "\r\n", +}; + +const char *sipd2_pre_snat[SIPD1_SZ] = { + /* + * 0. Forward. INVITE (M1) + */ + "INVITE sip:joe.bloggs@200.201.202.203 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 100.101.102.103:5060;branch=z9hG4bKmp17a\r\n" + "From: J. Doe ;tag=42\r\n" + "To: Bloggs \r\n" + "Call-ID: 4827311-391-32934\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "Max-forwards: 70\r\n" + "Subject: Where are you exactly?\r\n" + "Content-Length: 154\r\n" + "\r\n" + "v=0\r\n" + "o=doe123 2890844526 2890844526 IN IP4 100.101.102.103\r\n" + "s=Phone Call\r\n" + "c=IN IP4 100.101.102.103\r\n" + "t=0 0\r\n" + "m=audio 10000 RTP/AVP 0\r\n" + "a=rtpmap:0 PCMU/8000\r\n", + + /* + * 1. Back. 180 RINGING (M4) + */ + "SIP/2.0 180 Ringing\r\n" + "Via: SIP/2.0/UDP 200.201.202.1:5060;branch=z9hG4bKmp17a\r\n" + "From: J. Doe ;tag=42\r\n" + "To: Bloggs ;" + "tag=314159\r\n" + "Call-ID: 4827311-391-32934\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* + * 2. Back. 200 OK (M6) + */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 200.201.202.1:5060;branch=z9hG4bKmp17a\r\n" + "From: J. Doe ;tag=42\r\n" + "To: Bloggs ;" + "tag=314159\r\n" + "Call-ID: 4827311-391-32934\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "Content-Length: 154\r\n" + "\r\n" + "v=0\r\n" + "o=bloggs 2890844526 2890844526 IN IP4 200.201.202.203\r\n" + "s=phone call\r\n" + "c=IN IP4 200.201.202.203\r\n" + "t=0 0\r\n" + "m=audio 60000 RTP/AVP 0\r\n" + "a=rtpmap:0 PCMU/8000\r\n", + + /* + * 3. Forw. ACK (M7). Caller to Callee. + */ + "ACK sip:joe.bloggs@200.201.202.203 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 100.101.102.103:5060;branch=z9hG4bKka42\r\n" + "From: J. Doe ;tag=42\r\n" + "To: Bloggs ;" + "tag=314159\r\n" + "Call-ID: 4827311-391-32934\r\n" + "CSeq: 1 ACK\r\n" + "Max-forwards: 70\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* Data flow occurs here */ + + /* + * 4. Back. BYE (M8) + */ + "BYE sip:doe123@pc33.example.com SIP/2.0\r\n" + "Via: SIP/2.0/UDP 200.201.202.203:5060;branch=z9hG4bK4332\r\n" + "From: Bloggs ;" + "tag=314159\r\n" + "To: J. Doe ;tag=42\r\n" + "Call-ID: 4827311-391-32934\r\n" + "CSeq: 2000 BYE\r\n" + "Max-forwards: 70\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* + * 5. Forw. 200 OK (M9) + */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 200.201.202.203:5060;branch=z9hG4bK4332\r\n" + "From: Bloggs ;" + "tag=314159\r\n" + "To: J. Doe ;tag=42\r\n" + "Call-ID: 4827311-391-32934\r\n" + "CSeq: 2000 BYE\r\n" + "Content-Length: 0\r\n" + "\r\n", +}; + +const char *sipd2_post_snat[SIPD1_SZ] = { + /* + * 0. Forward. INVITE (M1) + */ + "INVITE sip:joe.bloggs@200.201.202.203 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 200.201.202.1:5060;branch=z9hG4bKmp17a\r\n" + "From: J. Doe ;tag=42\r\n" + "To: Bloggs \r\n" + "Call-ID: 4827311-391-32934\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "Max-forwards: 70\r\n" + "Subject: Where are you exactly?\r\n" + "Content-Length: 150\r\n" + "\r\n" + "v=0\r\n" + "o=doe123 2890844526 2890844526 IN IP4 200.201.202.1\r\n" + "s=Phone Call\r\n" + "c=IN IP4 200.201.202.1\r\n" + "t=0 0\r\n" + "m=audio 10000 RTP/AVP 0\r\n" + "a=rtpmap:0 PCMU/8000\r\n", + + /* + * 1. Back. 180 RINGING (M4) + */ + "SIP/2.0 180 Ringing\r\n" + "Via: SIP/2.0/UDP 100.101.102.103:5060;branch=z9hG4bKmp17a\r\n" + "From: J. Doe ;tag=42\r\n" + "To: Bloggs ;" + "tag=314159\r\n" + "Call-ID: 4827311-391-32934\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* + * 2. Back. 200 OK (M6) + */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 100.101.102.103:5060;branch=z9hG4bKmp17a\r\n" + "From: J. Doe ;tag=42\r\n" + "To: Bloggs ;" + "tag=314159\r\n" + "Call-ID: 4827311-391-32934\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "Content-Length: 154\r\n" + "\r\n" + "v=0\r\n" + "o=bloggs 2890844526 2890844526 IN IP4 200.201.202.203\r\n" + "s=phone call\r\n" + "c=IN IP4 200.201.202.203\r\n" + "t=0 0\r\n" + "m=audio 60000 RTP/AVP 0\r\n" + "a=rtpmap:0 PCMU/8000\r\n", + + /* + * 3. Forw. ACK (M7). Caller to Callee. New session created, with src + * port 5061. + */ + "ACK sip:joe.bloggs@200.201.202.203 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 200.201.202.1:5061;branch=z9hG4bKka42\r\n" + "From: J. Doe ;tag=42\r\n" + "To: Bloggs ;" + "tag=314159\r\n" + "Call-ID: 4827311-391-32934\r\n" + "CSeq: 1 ACK\r\n" + "Max-forwards: 70\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* Data flow occurs here */ + + /* + * 4. Back. BYE (M8) + */ + "BYE sip:doe123@pc33.example.com SIP/2.0\r\n" + "Via: SIP/2.0/UDP 200.201.202.203:5060;branch=z9hG4bK4332\r\n" + "From: Bloggs ;" + "tag=314159\r\n" + "To: J. Doe ;tag=42\r\n" + "Call-ID: 4827311-391-32934\r\n" + "CSeq: 2000 BYE\r\n" + "Max-forwards: 70\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* + * 5. Forw. 200 OK (M9) + */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 200.201.202.203:5060;branch=z9hG4bK4332\r\n" + "From: Bloggs ;" + "tag=314159\r\n" + "To: J. Doe ;tag=42\r\n" + "Call-ID: 4827311-391-32934\r\n" + "CSeq: 2000 BYE\r\n" + "Content-Length: 0\r\n" + "\r\n", +}; + diff --git a/tests/whole_dp/src/dp_test_npf_alg_sip_data3.c b/tests/whole_dp/src/dp_test_npf_alg_sip_data3.c new file mode 100644 index 00000000..513a5894 --- /dev/null +++ b/tests/whole_dp/src/dp_test_npf_alg_sip_data3.c @@ -0,0 +1,376 @@ +/* + * Copyright (c) 2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include "dp_test_npf_alg_sip_data.h" + +/* + * SIP Data Set #3. SIP to PSTN Call Through Gateway + * + * (only Caller to GW messages are shown) + * + * SIP Caller SIP/PSTN Gateway + * 8.19.19.6 50.60.70.80 + * + * + * | INVITE (M1) | + * |------------------------------------------------->| IAM (M2) + * | |-----------> + * | | ACM (M3) + * | 183 Session Progress (M4) |<----------- + * |<-------------------------------------------------| + * | PRACK (M5) | + * |------------------------------------------------->| + * | 200 OK (M6) | + * |<-------------------------------------------------| ring tone + * | RTP Early Media (one way) |<------------ + * |<=================================================| ANM (M7) + * | 200 OK (M8) |<------------ + * |<-------------------------------------------------| + * | ACK (M9) | + * |------------------------------------------------->| + * | RTP Media (speech) | + * |<================================================>| + * | BYE (M10) | + * |------------------------------------------------->| REL (M11) + * | 200 OK (M12) |-----------> + * |<-------------------------------------------------| RLC (M13) + * | |<----------- + * | | + * + * + * Calls from SIP to the PSTN through a gateway often make use of early + * media. Early media is RTP media sent prior to the call being answered. + * + * In SIP, this means media sent prior to the 200 OK response. This is usually + * done in SIP by the gateway sending a 183 Session Progress response, which + * creates an early dialog. RTP media is then sent from the gateway to the + * UA. Often early media carries special ringback tones to recorded + * announcements and tones. + * + * The call completes when the called party answers the telephone, which + * causes the telephone switch to send an answer message (ANM) to the gateway. + * The gateway then cuts the PSTN audio connection through in both directions + * and sends a 200 OK response to the caller. Because the RTP media path is + * already established, the gateway echoes the SDP in the 183 but causes no + * changes to the RTP connection. + */ + +/* + * These data set arrays are purposefully kept very simple. asserts in the + * test code ensure they are the same size. + */ +const bool sipd3_dir[SIPD3_SZ] = { + SIP_FORW, SIP_BACK, SIP_FORW, SIP_BACK, SIP_BACK, SIP_FORW, + SIP_FORW, SIP_BACK +}; + +/* + * Indicates after which SIP message the data stream should occur (first msg + * is index 0). + */ +const uint sipd3_rtp_early_media_index = 3; +const uint sipd3_rtp_media_index = 5; + +/* + * SIP messages pre + */ +const char *sipd3_pre_snat[SIPD3_SZ] = { + /* + * 0. Forward. INVITE (M1) + */ + "INVITE sip:+12025551313@test.org;user=phone SIP/2.0\r\n" + "Via: SIP/2.0/UDP 8.19.19.6:5060;branch=z9hG4bK4545\r\n" + "From: ;tag=12\r\n" + "To: \r\n" + "Call-ID: 49235243082018498\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "Supported: 100rel\r\n" + "Max-forwards: 70\r\n" + "Content-Length: 153\r\n" + "\r\n" + "v=0\r\n" + "o=FF 2890844535 2890844535 IN IP4 8.19.19.6\r\n" + "s=-\r\n" + "c=IN IP4 8.19.19.6\r\n" + "t=0 0\r\n" + "m=audio 50004 RTP/AVP 0 8\r\n" + "a=rtpmap:0 PCMU/8000\r\n" + "a=rtpmap:8 PCMA/8000\r\n", + + /* + * 1. Back. 183 Session Progress (M4) + */ + "SIP/2.0 183 Session Progress\r\n" + "Via: SIP/2.0/UDP 50.60.70.1:5060;branch=z9hG4bK4545\r\n" + "From: ;tag=12\r\n" + "To: ;tag=37\r\n" + "Call-ID: 49235243082018498\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "Rseq: 08071\r\n" + "Content-Length: 139\r\n" + "\r\n" + "v=0\r\n" + "o=Port1723 2890844535 2890844535 IN IP4 50.60.70.80\r\n" + "s=-\r\n" + "c=IN IP4 50.60.70.80\r\n" + "t=0 0\r\n" + "m=audio 62002 RTP/AVP 0\r\n" + "a=rtpmap:0 PCMU/8000\r\n", + + /* + * 2. Forw. PRACK (M5) + */ + "PRACK sip:50.60.70.80 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 8.19.19.6:5060;branch=z9hG4bK454\r\n" + "From: ;tag=37\r\n" + "To: ;tag=12\r\n" + "Call-ID: 49235243082018498\r\n" + "CSeq: 2 PRACK\r\n" + "Contact: \r\n" + "Max-forwards: 70\r\n" + "Rack: 08071 1 INVITE\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* + * 3. Back. 200 OK (M6) + */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 50.60.70.1:5060;branch=z9hG4bK4545\r\n" + "From: ;tag=12\r\n" + "To: ;tag=37\r\n" + "Call-ID: 49235243082018498\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "Content-Length: 139\r\n" + "\r\n" + "v=0\r\n" + "o=Port1723 2890844535 2890844535 IN IP4 50.60.70.80\r\n" + "s=-\r\n" + "c=IN IP4 50.60.70.80\r\n" + "t=0 0\r\n" + "m=audio 62002 RTP/AVP 0\r\n" + "a=rtpmap:0 PCMU/8000\r\n", + + /*===== RTP Early Media (one way) =====*/ + + /* + * 4. Back. 200 OK (M8) + */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 50.60.70.1:5060;branch=z9hG4bK4545\r\n" + "From: ;tag=12\r\n" + "To: ;tag=37\r\n" + "Call-ID: 49235243082018498\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "Content-Length: 139\r\n" + "\r\n" + "v=0\r\n" + "o=Port1723 2890844535 2890844535 IN IP4 50.60.70.80\r\n" + "s=-\r\n" + "c=IN IP4 50.60.70.80\r\n" + "t=0 0\r\n" + "m=audio 62002 RTP/AVP 0\r\n" + "a=rtpmap:0 PCMU/8000\r\n", + + /* + * 5. Forw. ACK (M9) + */ + "ACK sip:50.60.70.80 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 8.19.19.6:5060;branch=z9hG4bKfgrw\r\n" + "From: ;tag=12\r\n" + "To: ;tag=37\r\n" + "Call-ID: 49235243082018498\r\n" + "CSeq: 1 ACK\r\n" + "Max-forwards: 70\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /*===== RTP Media (speech) =====*/ + + /* + * 6. Forw. BYE (M10) + */ + "BYE sip:50.60.70.80 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 8.19.19.6:5060;branch=z9hG4bK321\r\n" + "From: ;tag=12\r\n" + "To: ;tag=37\r\n" + "Call-ID: 49235243082018498\r\n" + "CSeq: 3 BYE\r\n" + "Max-forwards: 70\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* + * 7. Back. 200 OK (M12) + */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 50.60.70.1:5060;branch=z9hG4bK321\r\n" + "From: ;tag=12\r\n" + "To: ;tag=37\r\n" + "Call-ID: 49235243082018498\r\n" + "CSeq: 3 BYE\r\n" + "Content-Length: 0\r\n" + "\r\n", +}; + +/* + * SIP messages post + */ +const char *sipd3_post_snat[SIPD3_SZ] = { + /* + * 0. Forward. INVITE (M1) + */ + "INVITE sip:+12025551313@test.org;user=phone SIP/2.0\r\n" + "Via: SIP/2.0/UDP 50.60.70.1:5060;branch=z9hG4bK4545\r\n" + "From: ;tag=12\r\n" + "To: \r\n" + "Call-ID: 49235243082018498\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "Supported: 100rel\r\n" + "Max-forwards: 70\r\n" + "Content-Length: 155\r\n" + "\r\n" + "v=0\r\n" + "o=FF 2890844535 2890844535 IN IP4 50.60.70.1\r\n" + "s=-\r\n" + "c=IN IP4 50.60.70.1\r\n" + "t=0 0\r\n" + "m=audio 50004 RTP/AVP 0 8\r\n" + "a=rtpmap:0 PCMU/8000\r\n" + "a=rtpmap:8 PCMA/8000\r\n", + + /* + * 1. Back. 183 Session Progress (M4) + */ + "SIP/2.0 183 Session Progress\r\n" + "Via: SIP/2.0/UDP 8.19.19.6:5060;branch=z9hG4bK4545\r\n" + "From: ;tag=12\r\n" + "To: ;tag=37\r\n" + "Call-ID: 49235243082018498\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "Rseq: 08071\r\n" + "Content-Length: 139\r\n" + "\r\n" + "v=0\r\n" + "o=Port1723 2890844535 2890844535 IN IP4 50.60.70.80\r\n" + "s=-\r\n" + "c=IN IP4 50.60.70.80\r\n" + "t=0 0\r\n" + "m=audio 62002 RTP/AVP 0\r\n" + "a=rtpmap:0 PCMU/8000\r\n", + + /* + * 2. Forw. PRACK (M5) + */ + "PRACK sip:50.60.70.80 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 50.60.70.1:5060;branch=z9hG4bK454\r\n" + "From: ;tag=37\r\n" + "To: ;tag=12\r\n" + "Call-ID: 49235243082018498\r\n" + "CSeq: 2 PRACK\r\n" + "Contact: \r\n" + "Max-forwards: 70\r\n" + "Rack: 08071 1 INVITE\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* + * 3. Back. 200 OK (M6) + */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 8.19.19.6:5060;branch=z9hG4bK4545\r\n" + "From: ;tag=12\r\n" + "To: ;tag=37\r\n" + "Call-ID: 49235243082018498\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "Content-Length: 139\r\n" + "\r\n" + "v=0\r\n" + "o=Port1723 2890844535 2890844535 IN IP4 50.60.70.80\r\n" + "s=-\r\n" + "c=IN IP4 50.60.70.80\r\n" + "t=0 0\r\n" + "m=audio 62002 RTP/AVP 0\r\n" + "a=rtpmap:0 PCMU/8000\r\n", + + /*===== RTP Early Media (one way) =====*/ + + /* + * 4. Back. 200 OK (M8) + */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 8.19.19.6:5060;branch=z9hG4bK4545\r\n" + "From: ;tag=12\r\n" + "To: ;tag=37\r\n" + "Call-ID: 49235243082018498\r\n" + "CSeq: 1 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "Content-Length: 139\r\n" + "\r\n" + "v=0\r\n" + "o=Port1723 2890844535 2890844535 IN IP4 50.60.70.80\r\n" + "s=-\r\n" + "c=IN IP4 50.60.70.80\r\n" + "t=0 0\r\n" + "m=audio 62002 RTP/AVP 0\r\n" + "a=rtpmap:0 PCMU/8000\r\n", + + /* + * 5. Forw. ACK (M9) + */ + "ACK sip:50.60.70.80 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 50.60.70.1:5060;branch=z9hG4bKfgrw\r\n" + "From: ;tag=12\r\n" + "To: ;tag=37\r\n" + "Call-ID: 49235243082018498\r\n" + "CSeq: 1 ACK\r\n" + "Max-forwards: 70\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /*===== RTP Media (speech) =====*/ + + /* + * 6. Forw. BYE (M10) + */ + "BYE sip:50.60.70.80 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 50.60.70.1:5060;branch=z9hG4bK321\r\n" + "From: ;tag=12\r\n" + "To: ;tag=37\r\n" + "Call-ID: 49235243082018498\r\n" + "CSeq: 3 BYE\r\n" + "Max-forwards: 70\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* + * 7. Back. 200 OK (M12) + */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 8.19.19.6:5060;branch=z9hG4bK321\r\n" + "From: ;tag=12\r\n" + "To: ;tag=37\r\n" + "Call-ID: 49235243082018498\r\n" + "CSeq: 3 BYE\r\n" + "Content-Length: 0\r\n" + "\r\n", +}; + diff --git a/tests/whole_dp/src/dp_test_npf_alg_sip_data4.c b/tests/whole_dp/src/dp_test_npf_alg_sip_data4.c new file mode 100644 index 00000000..25ef3dce --- /dev/null +++ b/tests/whole_dp/src/dp_test_npf_alg_sip_data4.c @@ -0,0 +1,361 @@ +/* + * Copyright (c) 2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include "dp_test_npf_alg_sip_data.h" + +/* + * SIP Invite from 18.33.0.200 + * + * <===== FORWARDS DIR + * + * +-------+ + * 16.33.0.200 16.33.0.220 | | 18.33.0.220 18.33.0.200 + * -----------------------------+ +------------------------------- + * dp1T0 | | dp1T1 + * +-------+ + * <---- DNAT, 77.1.1.1 -> 16.33.0.200 + * 16.33.0.200 -> 77.1.1.1 + * + * SIP Call. This is taken directly from the pcap of an issue, however the + * order and format is changed to match the order that the osip library + * re-writes a SIP packet. + */ + +const bool sipd4_dir[SIPD4_SZ] = { + SIP_FORW, SIP_BACK, SIP_BACK, SIP_BACK, SIP_FORW, SIP_FORW, SIP_BACK +}; + +/* + * Indicates after which SIP message the data stream should occur (first msg + * is index 0). + */ +const uint sipd4_rtp_early_media_index = 3; +const uint sipd4_rtp_media_index = 5; + +/* + * SIP messages pre + */ +const char *sipd4_pre_dnat[SIPD4_SZ] = { + /* 0. Forward. INVITE (M1) */ + "INVITE sip:5000@77.1.1.1:5060 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 18.33.0.200:5060;branch=z9hG4bK2567F\r\n" + "From: ;tag=1871EFF8-AE1\r\n" + "To: \r\n" + "Call-ID: 27DDA24D-F4A11E8-80CCFB84-B2765CC2@18.33.0.200\r\n" + "CSeq: 101 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "Supported: timer\r\n" + "Supported: resource-priority\r\n" + "Supported: replaces\r\n" + "Supported: sdp-anat\r\n" + "Min-se: 900\r\n" + "Cisco-guid: 0640716367-0256512488-2160589700-2994101442\r\n" + "User-agent: Cisco-SIPGateway/IOS-15.5.3.M1\r\n" + "Max-forwards: 70\r\n" + "Timestamp: 1518453580\r\n" + "Expires: 180\r\n" + "allow-events: telephone-event\r\n" + "Content-disposition: session;handling=required\r\n" + "Content-Length: 279\r\n" + "\r\n" + "v=0\r\n" + "o=CiscoSystemsSIP-GW-UserAgent 4600 9437 IN IP4 18.33.0.200\r\n" + "s=SIP Call\r\n" + "c=IN IP4 18.33.0.200\r\n" + "t=0 0\r\n" + "m=audio 16440 RTP/AVP 18 100\r\n" + "c=IN IP4 18.33.0.200\r\n" + "a=rtpmap:18 G729/8000\r\n" + "a=fmtp:18 annexb=no\r\n" + "a=rtpmap:100 telephone-event/8000\r\n" + "a=fmtp:100 0-16\r\n" + "a=ptime:30\r\n" + "a=ptime:30\r\n", + + /* 1. Back. 100 Trying */ + "SIP/2.0 100 Trying\r\n" + "Via: SIP/2.0/UDP 18.33.0.200:5060;branch=z9hG4bK2567F\r\n" + "From: ;tag=1871EFF8-AE1\r\n" + "To: \r\n" + "Call-ID: 27DDA24D-F4A11E8-80CCFB84-B2765CC2@18.33.0.200\r\n" + "CSeq: 101 INVITE\r\n" + "Timestamp: 1518453580\r\n" + "allow-events: telephone-event\r\n" + "Server: Cisco-SIPGateway/IOS-16.6.1\r\n" + "Session-id: 00000000000000000000000000000000;" + "remote=7121b823d210540b859a0d21af34a724\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* 2. Back. 183 Session Progress */ + "SIP/2.0 183 Session Progress\r\n" + "Via: SIP/2.0/UDP 18.33.0.200:5060;branch=z9hG4bK2567F\r\n" + "From: ;tag=1871EFF8-AE1\r\n" + "To: ;tag=757787AF-E66\r\n" + "Call-ID: 27DDA24D-F4A11E8-80CCFB84-B2765CC2@18.33.0.200\r\n" + "CSeq: 101 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "allow-events: telephone-event\r\n" + "P-asserted-identity: \r\n" + "Supported: sdp-anat\r\n" + "Server: Cisco-SIPGateway/IOS-16.6.1\r\n" + "Session-id: a3c87797321e51d085c9b6346ba8c710;" + "remote=7121b823d210540b859a0d21af34a724\r\n" + "Content-disposition: session;handling=required\r\n" + "Content-Length: 253\r\n" + "\r\n" + "v=0\r\n" + "o=CiscoSystemsSIP-GW-UserAgent 4551 177 IN IP4 16.33.0.200\r\n" + "s=SIP Call\r\n" + "c=IN IP4 16.33.0.200\r\n" + "t=0 0\r\n" + "m=audio 8038 RTP/AVP 18 100\r\n" + "c=IN IP4 16.33.0.200\r\n" + "a=rtpmap:18 G729/8000\r\n" + "a=fmtp:18 annexb=no\r\n" + "a=rtpmap:100 telephone-event/8000\r\n" + "a=fmtp:100 0-16\r\n", + + /* 3. Back. 200 OK */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 18.33.0.200:5060;branch=z9hG4bK2567F\r\n" + "From: ;tag=1871EFF8-AE1\r\n" + "To: ;tag=757787AF-E66\r\n" + "Call-ID: 27DDA24D-F4A11E8-80CCFB84-B2765CC2@18.33.0.200\r\n" + "CSeq: 101 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "allow-events: telephone-event\r\n" + /* Not translated, but not expected to be? */ + "P-asserted-identity: \r\n" + "Supported: replaces\r\n" + "Supported: sdp-anat\r\n" + "Server: Cisco-SIPGateway/IOS-16.6.1\r\n" + "Session-id: a3c87797321e51d085c9b6346ba8c710;" + "remote=7121b823d210540b859a0d21af34a724\r\n" + "Supported: timer\r\n" + "Content-disposition: session;handling=required\r\n" + "Content-Length: 253\r\n" + "\r\n" + "v=0\r\n" + /* Not translated, but not expected to be? */ + "o=CiscoSystemsSIP-GW-UserAgent 4551 177 IN IP4 16.33.0.200\r\n" + "s=SIP Call\r\n" + "c=IN IP4 16.33.0.200\r\n" + "t=0 0\r\n" + "m=audio 8038 RTP/AVP 18 100\r\n" + "c=IN IP4 16.33.0.200\r\n" + "a=rtpmap:18 G729/8000\r\n" + "a=fmtp:18 annexb=no\r\n" + "a=rtpmap:100 telephone-event/8000\r\n" + "a=fmtp:100 0-16\r\n", + + /* 4. Forw. ACK */ + "ACK sip:5000@77.1.1.1:5060 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 18.33.0.200:5060;branch=z9hG4bK26CF6\r\n" + "From: ;tag=1871EFF8-AE1\r\n" + "To: ;tag=757787AF-E66\r\n" + "Call-ID: 27DDA24D-F4A11E8-80CCFB84-B2765CC2@18.33.0.200\r\n" + "CSeq: 101 ACK\r\n" + "Max-forwards: 70\r\n" + "allow-events: telephone-event\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* 5. Forw. BYE. */ + "BYE sip:5000@77.1.1.1:5060 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 18.33.0.200:5060;branch=z9hG4bK2740D\r\n" + "From: ;tag=1871EFF8-AE1\r\n" + "To: ;tag=757787AF-E66\r\n" + "Call-ID: 27DDA24D-F4A11E8-80CCFB84-B2765CC2@18.33.0.200\r\n" + "CSeq: 102 BYE\r\n" + "User-agent: Cisco-SIPGateway/IOS-15.5.3.M1\r\n" + "Max-forwards: 70\r\n" + "Timestamp: 1518453588\r\n" + "Reason: Q.850;cause=16\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* 6. Back. 200 ACK */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 18.33.0.200:5060;branch=z9hG4bK2740D\r\n" + "From: ;tag=1871EFF8-AE1\r\n" + "To: ;tag=757787AF-E66\r\n" + "Call-ID: 27DDA24D-F4A11E8-80CCFB84-B2765CC2@18.33.0.200\r\n" + "CSeq: 102 BYE\r\n" + "Server: Cisco-SIPGateway/IOS-16.6.1\r\n" + "Timestamp: 1518453588\r\n" + "Reason: Q.850;cause=16\r\n" + "Session-id: 7121b823d210540b859a0d21af34a724;" + "remote=a3c87797321e51d085c9b6346ba8c710\r\n" + "Content-Length: 0\r\n" + "\r\n", +}; + +/* + * SIP messages post + */ +const char *sipd4_post_dnat[SIPD4_SZ] = { + /* 0. Forward. INVITE (M1) */ + "INVITE sip:5000@16.33.0.200:5060 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 18.33.0.200:5060;branch=z9hG4bK2567F\r\n" + "From: ;tag=1871EFF8-AE1\r\n" + "To: \r\n" + "Call-ID: 27DDA24D-F4A11E8-80CCFB84-B2765CC2@18.33.0.200\r\n" + "CSeq: 101 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "Supported: timer\r\n" + "Supported: resource-priority\r\n" + "Supported: replaces\r\n" + "Supported: sdp-anat\r\n" + "Min-se: 900\r\n" + "Cisco-guid: 0640716367-0256512488-2160589700-2994101442\r\n" + "User-agent: Cisco-SIPGateway/IOS-15.5.3.M1\r\n" + "Max-forwards: 70\r\n" + "Timestamp: 1518453580\r\n" + "Expires: 180\r\n" + "allow-events: telephone-event\r\n" + "Content-disposition: session;handling=required\r\n" + "Content-Length: 279\r\n" + "\r\n" + "v=0\r\n" + "o=CiscoSystemsSIP-GW-UserAgent 4600 9437 IN IP4 18.33.0.200\r\n" + "s=SIP Call\r\n" + "c=IN IP4 18.33.0.200\r\n" + "t=0 0\r\n" + "m=audio 16440 RTP/AVP 18 100\r\n" + "c=IN IP4 18.33.0.200\r\n" + "a=rtpmap:18 G729/8000\r\n" + "a=fmtp:18 annexb=no\r\n" + "a=rtpmap:100 telephone-event/8000\r\n" + "a=fmtp:100 0-16\r\n" + "a=ptime:30\r\n" + "a=ptime:30\r\n", + + /* 1. Back. 100 Trying */ + "SIP/2.0 100 Trying\r\n" + "Via: SIP/2.0/UDP 18.33.0.200:5060;branch=z9hG4bK2567F\r\n" + "From: ;tag=1871EFF8-AE1\r\n" + "To: \r\n" + "Call-ID: 27DDA24D-F4A11E8-80CCFB84-B2765CC2@18.33.0.200\r\n" + "CSeq: 101 INVITE\r\n" + "Timestamp: 1518453580\r\n" + "allow-events: telephone-event\r\n" + "Server: Cisco-SIPGateway/IOS-16.6.1\r\n" + "Session-id: 00000000000000000000000000000000;" + "remote=7121b823d210540b859a0d21af34a724\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* 2. Back. 183 Session Progress */ + "SIP/2.0 183 Session Progress\r\n" + "Via: SIP/2.0/UDP 18.33.0.200:5060;branch=z9hG4bK2567F\r\n" + "From: ;tag=1871EFF8-AE1\r\n" + "To: ;tag=757787AF-E66\r\n" + "Call-ID: 27DDA24D-F4A11E8-80CCFB84-B2765CC2@18.33.0.200\r\n" + "CSeq: 101 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "allow-events: telephone-event\r\n" + "P-asserted-identity: \r\n" + "Supported: sdp-anat\r\n" + "Server: Cisco-SIPGateway/IOS-16.6.1\r\n" + "Session-id: a3c87797321e51d085c9b6346ba8c710;" + "remote=7121b823d210540b859a0d21af34a724\r\n" + "Content-disposition: session;handling=required\r\n" + "Content-Length: 247\r\n" + "\r\n" + "v=0\r\n" + /* Not translated, but not expected to be? */ + "o=CiscoSystemsSIP-GW-UserAgent 4551 177 IN IP4 16.33.0.200\r\n" + "s=SIP Call\r\n" + "c=IN IP4 77.1.1.1\r\n" + "t=0 0\r\n" + "m=audio 8038 RTP/AVP 18 100\r\n" + "c=IN IP4 77.1.1.1\r\n" + "a=rtpmap:18 G729/8000\r\n" + "a=fmtp:18 annexb=no\r\n" + "a=rtpmap:100 telephone-event/8000\r\n" + "a=fmtp:100 0-16\r\n", + + /* 3. Back. 200 OK */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 18.33.0.200:5060;branch=z9hG4bK2567F\r\n" + "From: ;tag=1871EFF8-AE1\r\n" + "To: ;tag=757787AF-E66\r\n" + "Call-ID: 27DDA24D-F4A11E8-80CCFB84-B2765CC2@18.33.0.200\r\n" + "CSeq: 101 INVITE\r\n" + "Contact: \r\n" + "Content-Type: application/sdp\r\n" + "allow-events: telephone-event\r\n" + /* Not translated, but not expected to be */ + "P-asserted-identity: \r\n" + "Supported: replaces\r\n" + "Supported: sdp-anat\r\n" + "Server: Cisco-SIPGateway/IOS-16.6.1\r\n" + "Session-id: a3c87797321e51d085c9b6346ba8c710;" + "remote=7121b823d210540b859a0d21af34a724\r\n" + "Supported: timer\r\n" + "Content-disposition: session;handling=required\r\n" + "Content-Length: 247\r\n" + "\r\n" + "v=0\r\n" + /* Not translated, but not expected to be */ + "o=CiscoSystemsSIP-GW-UserAgent 4551 177 IN IP4 16.33.0.200\r\n" + "s=SIP Call\r\n" + "c=IN IP4 77.1.1.1\r\n" + "t=0 0\r\n" + "m=audio 8038 RTP/AVP 18 100\r\n" + "c=IN IP4 77.1.1.1\r\n" + "a=rtpmap:18 G729/8000\r\n" + "a=fmtp:18 annexb=no\r\n" + "a=rtpmap:100 telephone-event/8000\r\n" + "a=fmtp:100 0-16\r\n", + + /* 4. Forw. ACK */ + "ACK sip:5000@16.33.0.200:5060 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 18.33.0.200:5060;branch=z9hG4bK26CF6\r\n" + "From: ;tag=1871EFF8-AE1\r\n" + "To: ;tag=757787AF-E66\r\n" + "Call-ID: 27DDA24D-F4A11E8-80CCFB84-B2765CC2@18.33.0.200\r\n" + "CSeq: 101 ACK\r\n" + "Max-forwards: 70\r\n" + "allow-events: telephone-event\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* 5. Forw. BYE. */ + "BYE sip:5000@16.33.0.200:5060 SIP/2.0\r\n" + "Via: SIP/2.0/UDP 18.33.0.200:5060;branch=z9hG4bK2740D\r\n" + "From: ;tag=1871EFF8-AE1\r\n" + "To: ;tag=757787AF-E66\r\n" + "Call-ID: 27DDA24D-F4A11E8-80CCFB84-B2765CC2@18.33.0.200\r\n" + "CSeq: 102 BYE\r\n" + "User-agent: Cisco-SIPGateway/IOS-15.5.3.M1\r\n" + "Max-forwards: 70\r\n" + "Timestamp: 1518453588\r\n" + "Reason: Q.850;cause=16\r\n" + "Content-Length: 0\r\n" + "\r\n", + + /* 6. Back. 200 OK */ + "SIP/2.0 200 OK\r\n" + "Via: SIP/2.0/UDP 18.33.0.200:5060;branch=z9hG4bK2740D\r\n" + "From: ;tag=1871EFF8-AE1\r\n" + "To: ;tag=757787AF-E66\r\n" + "Call-ID: 27DDA24D-F4A11E8-80CCFB84-B2765CC2@18.33.0.200\r\n" + "CSeq: 102 BYE\r\n" + "Server: Cisco-SIPGateway/IOS-16.6.1\r\n" + "Timestamp: 1518453588\r\n" + "Reason: Q.850;cause=16\r\n" + "Session-id: 7121b823d210540b859a0d21af34a724;" + "remote=a3c87797321e51d085c9b6346ba8c710\r\n" + "Content-Length: 0\r\n" + "\r\n", +}; diff --git a/tests/whole_dp/src/dp_test_npf_alg_sip_nat.c b/tests/whole_dp/src/dp_test_npf_alg_sip_nat.c new file mode 100644 index 00000000..85f91dae --- /dev/null +++ b/tests/whole_dp/src/dp_test_npf_alg_sip_nat.c @@ -0,0 +1,821 @@ +/* + * Copyright (c) 2021, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include + +#include "dp_test.h" +#include "dp_test_netlink_state.h" +#include "dp_test_npf_lib.h" +#include "dp_test_npf_fw_lib.h" +#include "dp_test_npf_nat_lib.h" + +#include "dp_test_npf_alg_sip_data.h" + +static void dpt_alg_sipd1_setup(void); +static void dpt_alg_sipd1_teardown(void); + +static void dpt_alg_sipd2_setup(void); +static void dpt_alg_sipd2_teardown(void); + +static void dpt_alg_sipd3_setup(void); +static void dpt_alg_sipd3_teardown(void); + +static void dpt_alg_sipd4_setup(void); +static void dpt_alg_sipd4_teardown(void); + +DP_DECL_TEST_SUITE(sip_nat); + +/* + * sip_nat10. Data set #1. No NAT. + */ +DP_DECL_TEST_CASE(sip_nat, sip_nat10, dpt_alg_sipd1_setup, + dpt_alg_sipd1_teardown); +DP_START_TEST(sip_nat10, test) +{ + const char *desc, *pload; + uint hdr_clen, body_clen; + bool forw, rv; + uint i; + + static_assert(ARRAY_SIZE(sipd1) == ARRAY_SIZE(sipd1_dir), + "sipd1 array size incorrect"); + + /* For each SIP msg payload */ + for (i = 0; i < ARRAY_SIZE(sipd1); i++) { + pload = sipd1[i]; + forw = (sipd1_dir[i] == SIP_FORW); + desc = sipd_descr(i, forw, pload); + + /* Check content-length value matches actual content-length */ + rv = sipd_check_content_length(pload, &hdr_clen, + &body_clen); + dp_test_fail_unless(rv, "[%s] hdr=%u, body=%u", + desc, hdr_clen, body_clen); + + if (forw) { + dpt_udp_pl("dp1T0", "aa:bb:cc:16:0:20", + "1.1.1.2", 5060, "22.22.22.2", 5060, + "1.1.1.2", 5060, "22.22.22.2", 5060, + "aa:bb:cc:18:0:1", "dp2T1", + DP_TEST_FWD_FORWARDED, + pload, strlen(pload), + pload, strlen(pload), desc); + } else { + dpt_udp_pl("dp2T1", "aa:bb:cc:18:0:1", + "22.22.22.2", 5060, "1.1.1.2", 5060, + "22.22.22.2", 5060, "1.1.1.2", 5060, + "aa:bb:cc:16:0:20", "dp1T0", + DP_TEST_FWD_FORWARDED, + pload, strlen(pload), + pload, strlen(pload), desc); + } + } + +} DP_END_TEST; /* sip_nat10 */ + +/* + * sip_nat11. Data Set #1. SNAT. + * + * RTP flow started in forw direction. RTCP flow started in back direction. + */ +DP_DECL_TEST_CASE(sip_nat, sip_nat11, dpt_alg_sipd1_setup, + dpt_alg_sipd1_teardown); +DP_START_TEST(sip_nat11, test) +{ + const char *desc, *pre_pload, *pst_pload; + uint hdr_clen, body_clen; + bool forw, rv; + uint i; + + static_assert(ARRAY_SIZE(sipd1_pre_snat) == + ARRAY_SIZE(sipd1_post_snat), + "sipd pre and post array size don't match"); + static_assert(ARRAY_SIZE(sipd1_pre_snat) == ARRAY_SIZE(sipd1_dir), + "spid pre snat array size incorrect"); + + /* Configure SNAT with sequential port allocation */ + struct dp_test_npf_nat_rule_t snat = { + .desc = "snat rule", + .rule = "10", + .ifname = "dp2T1", + .proto = IPPROTO_UDP, + .map = "dynamic", + .port_alloc = "sequential", + .from_addr = "1.1.1.0/24", + .from_port = NULL, + .to_addr = NULL, + .to_port = NULL, + .trans_addr = "30.30.30.2", + .trans_port = "1024-2000", + }; + dp_test_npf_snat_add(&snat, true); + + /* For each SIP msg payload */ + for (i = 0; i < ARRAY_SIZE(sipd1_pre_snat); i++) { + pre_pload = sipd1_pre_snat[i]; + pst_pload = sipd1_post_snat[i]; + forw = (sipd1_dir[i] == SIP_FORW); + desc = sipd_descr(i, forw, pst_pload); + + /* Check content-length value matches actual content-length */ + rv = sipd_check_content_length(pre_pload, &hdr_clen, + &body_clen); + dp_test_fail_unless(rv, "[%s] Pre hdr=%u, body=%u", + desc, hdr_clen, body_clen); + + rv = sipd_check_content_length(pst_pload, &hdr_clen, + &body_clen); + dp_test_fail_unless(rv, "[%s] Post hdr=%u, body=%u", + desc, hdr_clen, body_clen); + + if (forw) { + dpt_udp_pl("dp1T0", "aa:bb:cc:16:0:20", + "1.1.1.2", 5060, "22.22.22.2", 5060, + "30.30.30.2", 1024, "22.22.22.2", 5060, + "aa:bb:cc:18:0:1", "dp2T1", + DP_TEST_FWD_FORWARDED, + pre_pload, strlen(pre_pload), + pst_pload, strlen(pst_pload), desc); + } else { + dpt_udp_pl("dp2T1", "aa:bb:cc:18:0:1", + "22.22.22.2", 5060, "30.30.30.2", 1024, + "22.22.22.2", 5060, "1.1.1.2", 5060, + "aa:bb:cc:16:0:20", "dp1T0", + DP_TEST_FWD_FORWARDED, + pre_pload, strlen(pre_pload), + pst_pload, strlen(pst_pload), desc); + } + + if (i == sipd1_rtp_index) { + /* RTP Forw (Initial) */ + dpt_udp("dp1T0", "aa:bb:cc:16:0:20", + "1.1.1.2", 10000, "22.22.22.2", 60000, + "30.30.30.2", 1026, "22.22.22.2", 60000, + "aa:bb:cc:18:0:1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* RTP Back */ + dpt_udp("dp2T1", "aa:bb:cc:18:0:1", + "22.22.22.2", 60000, "30.30.30.2", 1026, + "22.22.22.2", 60000, "1.1.1.2", 10000, + "aa:bb:cc:16:0:20", "dp1T0", + DP_TEST_FWD_FORWARDED); + + /* RTCP Back (Initial) */ + dpt_udp("dp2T1", "aa:bb:cc:18:0:1", + "22.22.22.2", 60001, "30.30.30.2", 1027, + "22.22.22.2", 60001, "1.1.1.2", 10001, + "aa:bb:cc:16:0:20", "dp1T0", + DP_TEST_FWD_FORWARDED); + + /* RTCP Forw */ + dpt_udp("dp1T0", "aa:bb:cc:16:0:20", + "1.1.1.2", 10001, "22.22.22.2", 60001, + "30.30.30.2", 1027, "22.22.22.2", 60001, + "aa:bb:cc:18:0:1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + } + } + + dp_test_npf_snat_del(snat.ifname, snat.rule, true); + +} DP_END_TEST; /* sip_nat11 */ + +/* + * sip_nat12. Data Set #1. DNAT. + */ +DP_DECL_TEST_CASE(sip_nat, sip_nat12, dpt_alg_sipd1_setup, + dpt_alg_sipd1_teardown); +DP_START_TEST(sip_nat12, test) +{ + const char *desc, *pre_pload, *pst_pload; + uint hdr_clen, body_clen; + bool forw, rv; + uint i; + + static_assert(ARRAY_SIZE(sipd1_pre_dnat) == + ARRAY_SIZE(sipd1_post_dnat), + "sipd1 pre and post dnat array size don't match"); + static_assert(ARRAY_SIZE(sipd1_pre_dnat) == ARRAY_SIZE(sipd1_dir), + "spid pre dnat array size incorrect"); + + struct dp_test_npf_nat_rule_t dnat = { + .desc = "dnat rule", + .rule = "10", + .ifname = "dp1T0", + .proto = NAT_NULL_PROTO, + .map = "dynamic", + .from_addr = NULL, + .from_port = NULL, + .to_addr = "1.1.1.22/24", + .to_port = NULL, + .trans_addr = "22.22.22.2", + .trans_port = NULL, + .port_alloc = NULL, + }; + dp_test_npf_dnat_add(&dnat, true); + + /* For each SIP msg payload */ + for (i = 0; i < ARRAY_SIZE(sipd1_pre_dnat); i++) { + pre_pload = sipd1_pre_dnat[i]; + pst_pload = sipd1_post_dnat[i]; + forw = (sipd1_dir[i] == SIP_FORW); + desc = sipd_descr(i, forw, pst_pload); + + /* Check content-length value matches actual content-length */ + rv = sipd_check_content_length(pre_pload, &hdr_clen, + &body_clen); + dp_test_fail_unless(rv, "[%s] Pre hdr=%u, body=%u", + desc, hdr_clen, body_clen); + + rv = sipd_check_content_length(pst_pload, &hdr_clen, + &body_clen); + dp_test_fail_unless(rv, "[%s] Post hdr=%u, body=%u", + desc, hdr_clen, body_clen); + + if (forw) { + dpt_udp_pl("dp1T0", "aa:bb:cc:16:0:20", + "1.1.1.2", 5060, "1.1.1.22", 5060, + "1.1.1.2", 5060, "22.22.22.2", 5060, + "aa:bb:cc:18:0:1", "dp2T1", + DP_TEST_FWD_FORWARDED, + pre_pload, strlen(pre_pload), + pst_pload, strlen(pst_pload), desc); + } else { + dpt_udp_pl("dp2T1", "aa:bb:cc:18:0:1", + "22.22.22.2", 5060, "1.1.1.2", 5060, + "1.1.1.22", 5060, "1.1.1.2", 5060, + "aa:bb:cc:16:0:20", "dp1T0", + DP_TEST_FWD_FORWARDED, + pre_pload, strlen(pre_pload), + pst_pload, strlen(pst_pload), desc); + } + + if (i == sipd1_rtp_index) { + /* RTP Forw (Initial) */ + dpt_udp("dp1T0", "aa:bb:cc:16:0:20", + "1.1.1.2", 10000, "1.1.1.22", 60000, + "1.1.1.2", 10000, "22.22.22.2", 60000, + "aa:bb:cc:18:0:1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* RTP Back */ + dpt_udp("dp2T1", "aa:bb:cc:18:0:1", + "22.22.22.2", 60000, "1.1.1.2", 10000, + "1.1.1.22", 60000, "1.1.1.2", 10000, + "aa:bb:cc:16:0:20", "dp1T0", + DP_TEST_FWD_FORWARDED); + + /* RTCP Back (Initial) */ + dpt_udp("dp2T1", "aa:bb:cc:18:0:1", + "22.22.22.2", 60001, "1.1.1.2", 10001, + "1.1.1.22", 60001, "1.1.1.2", 10001, + "aa:bb:cc:16:0:20", "dp1T0", + DP_TEST_FWD_FORWARDED); + + /* RTCP Forw */ + dpt_udp("dp1T0", "aa:bb:cc:16:0:20", + "1.1.1.2", 10001, "1.1.1.22", 60001, + "1.1.1.2", 10001, "22.22.22.2", 60001, + "aa:bb:cc:18:0:1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + } + } + + dp_test_npf_dnat_del(dnat.ifname, dnat.rule, true); + +} DP_END_TEST; /* sip_nat12 */ + + +/* + * sip_nat20. Data set #2. No NAT. + */ +DP_DECL_TEST_CASE(sip_nat, sip_nat20, dpt_alg_sipd2_setup, + dpt_alg_sipd2_teardown); +DP_START_TEST(sip_nat20, test) +{ + const char *desc, *pload; + uint hdr_clen, body_clen; + bool forw, rv; + uint i; + + static_assert(ARRAY_SIZE(sipd2) == ARRAY_SIZE(sipd2_dir), + "sipd2 array size incorrect"); + + /* + * Caller to/from Proxy Server + */ +#define SIPD2_PROXY_INDEX 3 + for (i = 0; i < SIPD2_PROXY_INDEX; i++) { + pload = sipd2[i]; + forw = (sipd2_dir[i] == SIP_FORW); + desc = sipd_descr(i, forw, pload); + + /* Check content-length value matches actual content-length */ + rv = sipd_check_content_length(pload, &hdr_clen, + &body_clen); + dp_test_fail_unless(rv, "[%s] hdr=%u, body=%u", + desc, hdr_clen, body_clen); + + if (forw) { + dpt_udp_pl("dp1T0", "aa:bb:cc:16:0:20", + "100.101.102.103", 5060, /* pre src */ + "200.201.202.205", 5060, /* pre dst */ + "100.101.102.103", 5060, /* post src */ + "200.201.202.205", 5060, /* post dst */ + "aa:bb:cc:18:0:5", "dp2T1", + DP_TEST_FWD_FORWARDED, + pload, strlen(pload), + pload, strlen(pload), desc); + } else { + dpt_udp_pl("dp2T1", "aa:bb:cc:18:0:5", + "200.201.202.205", 5060, /* pre src */ + "100.101.102.103", 5060, /* pre dst */ + "200.201.202.205", 5060, /* post src */ + "100.101.102.103", 5060, /* post dst */ + "aa:bb:cc:16:0:20", "dp1T0", + DP_TEST_FWD_FORWARDED, + pload, strlen(pload), + pload, strlen(pload), desc); + } + } + + /* + * Caller to/from Callee + */ + for (i = SIPD2_PROXY_INDEX; i < ARRAY_SIZE(sipd2); i++) { + pload = sipd2[i]; + forw = (sipd2_dir[i] == SIP_FORW); + desc = sipd_descr(i, forw, pload); + + /* Check content-length value matches actual content-length */ + rv = sipd_check_content_length(pload, &hdr_clen, + &body_clen); + dp_test_fail_unless(rv, "[%s] hdr=%u, body=%u", + desc, hdr_clen, body_clen); + + if (forw) { + dpt_udp_pl("dp1T0", "aa:bb:cc:16:0:20", + "100.101.102.103", 5060, /* pre src */ + "200.201.202.203", 5060, /* pre dst */ + "100.101.102.103", 5060, /* post src */ + "200.201.202.203", 5060, /* post dst */ + "aa:bb:cc:18:0:1", "dp2T1", + DP_TEST_FWD_FORWARDED, + pload, strlen(pload), + pload, strlen(pload), desc); + } else { + dpt_udp_pl("dp2T1", "aa:bb:cc:18:0:1", + "200.201.202.203", 5060, /* pre src */ + "100.101.102.103", 5060, /* pre dst */ + "200.201.202.203", 5060, /* post src */ + "100.101.102.103", 5060, /* post dst */ + "aa:bb:cc:16:0:20", "dp1T0", + DP_TEST_FWD_FORWARDED, + pload, strlen(pload), + pload, strlen(pload), desc); + } + } + +} DP_END_TEST; /* sip_nat20 */ + + +/* + * sip_nat21. Data set #2. SNAT. + */ +DP_DECL_TEST_CASE(sip_nat, sip_nat21, dpt_alg_sipd2_setup, + dpt_alg_sipd2_teardown); +DP_START_TEST(sip_nat21, test) +{ + const char *desc, *pre_pload, *pst_pload; + uint hdr_clen, body_clen; + bool forw, rv; + uint i; + + static_assert(ARRAY_SIZE(sipd2_pre_snat) == + ARRAY_SIZE(sipd2_post_snat), + "sipd pre and post snat array size don't match"); + static_assert(ARRAY_SIZE(sipd2_pre_snat) == ARRAY_SIZE(sipd2_dir), + "spid2 pre snat array size incorrect"); + + struct dp_test_npf_nat_rule_t snat = { + .desc = "snat rule", + .rule = "10", + .ifname = "dp2T1", + .proto = IPPROTO_UDP, + .map = "dynamic", + .port_alloc = "sequential", + .from_addr = "100.101.102.0/24", + .from_port = NULL, + .to_addr = NULL, + .to_port = NULL, + .trans_addr = "masquerade", /* 200.201.202.1 */ + .trans_port = "1024-65535" + }; + dp_test_npf_snat_add(&snat, true); + + /* + * Caller to/from Proxy Server + */ +#define SIPD2_PROXY_INDEX 3 + for (i = 0; i < SIPD2_PROXY_INDEX; i++) { + pre_pload = sipd2_pre_snat[i]; + pst_pload = sipd2_post_snat[i]; + forw = (sipd2_dir[i] == SIP_FORW); + desc = sipd_descr(i, forw, pst_pload); + + /* Check content-length value matches actual content-length */ + rv = sipd_check_content_length(pre_pload, &hdr_clen, + &body_clen); + dp_test_fail_unless(rv, "[%s] hdr=%u, body=%u", + desc, hdr_clen, body_clen); + + rv = sipd_check_content_length(pst_pload, &hdr_clen, + &body_clen); + dp_test_fail_unless(rv, "[%s] hdr=%u, body=%u", + desc, hdr_clen, body_clen); + + if (forw) { + dpt_udp_pl("dp1T0", "aa:bb:cc:16:0:20", + "100.101.102.103", 5060, /* pre src */ + "200.201.202.205", 5060, /* pre dst */ + "200.201.202.1", 5060, /* post src */ + "200.201.202.205", 5060, /* post dst */ + "aa:bb:cc:18:0:5", "dp2T1", + DP_TEST_FWD_FORWARDED, + pre_pload, strlen(pre_pload), + pst_pload, strlen(pst_pload), desc); + } else { + dpt_udp_pl("dp2T1", "aa:bb:cc:18:0:5", + "200.201.202.205", 5060, /* pre src */ + "200.201.202.1", 5060, /* pre dst */ + "200.201.202.205", 5060, /* post src */ + "100.101.102.103", 5060, /* post dst */ + "aa:bb:cc:16:0:20", "dp1T0", + DP_TEST_FWD_FORWARDED, + pre_pload, strlen(pre_pload), + pst_pload, strlen(pst_pload), desc); + } + } + + /* + * Caller to/from Callee + */ + for (i = SIPD2_PROXY_INDEX; i < ARRAY_SIZE(sipd2); i++) { + pre_pload = sipd2_pre_snat[i]; + pst_pload = sipd2_post_snat[i]; + forw = (sipd2_dir[i] == SIP_FORW); + desc = sipd_descr(i, forw, pst_pload); + + /* Check content-length value matches actual content-length */ + rv = sipd_check_content_length(pre_pload, &hdr_clen, + &body_clen); + dp_test_fail_unless(rv, "[%s] hdr=%u, body=%u", + desc, hdr_clen, body_clen); + + rv = sipd_check_content_length(pst_pload, &hdr_clen, + &body_clen); + dp_test_fail_unless(rv, "[%s] hdr=%u, body=%u", + desc, hdr_clen, body_clen); + + if (forw) { + dpt_udp_pl("dp1T0", "aa:bb:cc:16:0:20", + "100.101.102.103", 5060, /* pre src */ + "200.201.202.203", 5060, /* pre dst */ + "200.201.202.1", 5061, /* post src */ + "200.201.202.203", 5060, /* post dst */ + "aa:bb:cc:18:0:1", "dp2T1", + DP_TEST_FWD_FORWARDED, + pre_pload, strlen(pre_pload), + pst_pload, strlen(pst_pload), desc); + } else { + dpt_udp_pl("dp2T1", "aa:bb:cc:18:0:1", + "200.201.202.203", 5060, /* pre src */ + "200.201.202.1", 5061, /* pre dst */ + "200.201.202.203", 5060, /* post src */ + "100.101.102.103", 5060, /* post dst */ + "aa:bb:cc:16:0:20", "dp1T0", + DP_TEST_FWD_FORWARDED, + pre_pload, strlen(pre_pload), + pst_pload, strlen(pst_pload), desc); + } + + /* RTP Data flow */ + if (i == sipd2_rtp_index) { + /* RTP Back (Initial) */ + dpt_udp("dp2T1", "aa:bb:cc:18:0:1", + "200.201.202.203", 60000, + "200.201.202.1", 10000, + "200.201.202.203", 60000, + "100.101.102.103", 10000, + "aa:bb:cc:16:0:20", "dp1T0", + DP_TEST_FWD_FORWARDED); + + /* RTP Forw */ + dpt_udp("dp1T0", "aa:bb:cc:16:0:20", + "100.101.102.103", 10000, + "200.201.202.203", 60000, + "200.201.202.1", 10000, + "200.201.202.203", 60000, + "aa:bb:cc:18:0:1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + } + } + + dp_test_npf_snat_del(snat.ifname, snat.rule, true); + +} DP_END_TEST; /* sip_nat21 */ + +/* + * sip_nat30. Data set #3. SNAT. + */ +DP_DECL_TEST_CASE(sip_nat, sip_nat30, dpt_alg_sipd3_setup, + dpt_alg_sipd3_teardown); +DP_START_TEST(sip_nat30, test) +{ + const char *desc, *pre_pload, *pst_pload; + uint hdr_clen, body_clen; + bool forw, rv; + uint i; + + static_assert(ARRAY_SIZE(sipd3_pre_snat) == + ARRAY_SIZE(sipd3_post_snat), + "sipd3 pre and post array size don't match"); + static_assert(ARRAY_SIZE(sipd3_pre_snat) == ARRAY_SIZE(sipd3_dir), + "spid3 pre snat array size incorrect"); + + struct dp_test_npf_nat_rule_t snat = { + .desc = "snat rule", + .rule = "10", + .ifname = "dp2T1", + .proto = IPPROTO_UDP, + .map = "dynamic", + .port_alloc = "sequential", + .from_addr = "8.19.19.0/24", + .from_port = NULL, + .to_addr = NULL, + .to_port = NULL, + .trans_addr = "masquerade", /* 50.60.70.1 */ + .trans_port = "1024-65535" + }; + dp_test_npf_snat_add(&snat, true); + + for (i = 0; i < ARRAY_SIZE(sipd3_dir); i++) { + pre_pload = sipd3_pre_snat[i]; + pst_pload = sipd3_post_snat[i]; + forw = (sipd3_dir[i] == SIP_FORW); + desc = sipd_descr(i, forw, pst_pload); + + /* Check content-length value matches actual content-length */ + rv = sipd_check_content_length(pre_pload, &hdr_clen, + &body_clen); + dp_test_fail_unless(rv, "[%s] hdr=%u, body=%u", + desc, hdr_clen, body_clen); + + rv = sipd_check_content_length(pst_pload, &hdr_clen, + &body_clen); + dp_test_fail_unless(rv, "[%s] hdr=%u, body=%u", + desc, hdr_clen, body_clen); + + if (forw) { + dpt_udp_pl("dp1T0", "aa:bb:cc:16:0:20", + "8.19.19.6", 5060, /* pre src */ + "50.60.70.80", 5060, /* pre dst */ + "50.60.70.1", 5060, /* post src */ + "50.60.70.80", 5060, /* post dst */ + "aa:bb:cc:18:0:1", "dp2T1", + DP_TEST_FWD_FORWARDED, + pre_pload, strlen(pre_pload), + pst_pload, strlen(pst_pload), desc); + } else { + dpt_udp_pl("dp2T1", "aa:bb:cc:18:0:1", + "50.60.70.80", 5060, /* pre src */ + "50.60.70.1", 5060, /* pre dst */ + "50.60.70.80", 5060, /* post src */ + "8.19.19.6", 5060, /* post dst */ + "aa:bb:cc:16:0:20", "dp1T0", + DP_TEST_FWD_FORWARDED, + pre_pload, strlen(pre_pload), + pst_pload, strlen(pst_pload), desc); + } + + if (i == sipd3_rtp_early_media_index) { + /* Back. RTP Early media */ + dpt_udp("dp2T1", "aa:bb:cc:18:0:1", + "50.60.70.80", 62002, "50.60.70.1", 50004, + "50.60.70.80", 62002, "8.19.19.6", 50004, + "aa:bb:cc:16:0:20", "dp1T0", + DP_TEST_FWD_FORWARDED); + } + + if (i == sipd3_rtp_media_index) { + /* RTP Forw */ + dpt_udp("dp1T0", "aa:bb:cc:16:0:20", + "8.19.19.6", 50004, "50.60.70.80", 62002, + "50.60.70.1", 50004, "50.60.70.80", 62002, + "aa:bb:cc:18:0:1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* RTP Back */ + dpt_udp("dp2T1", "aa:bb:cc:18:0:1", + "50.60.70.80", 62002, "50.60.70.1", 50004, + "50.60.70.80", 62002, "8.19.19.6", 50004, + "aa:bb:cc:16:0:20", "dp1T0", + DP_TEST_FWD_FORWARDED); + } + } + + dp_test_npf_snat_del(snat.ifname, snat.rule, true); + +} DP_END_TEST; /* sip_nat30 */ + + +/* + * sip_nat40. Data set #4. DNAT. + */ +DP_DECL_TEST_CASE(sip_nat, sip_nat40, dpt_alg_sipd4_setup, + dpt_alg_sipd4_teardown); +DP_START_TEST(sip_nat40, test) +{ + const char *desc, *pre_pload, *pst_pload; + uint hdr_clen, body_clen; + bool forw, rv; + uint i; + + static_assert(ARRAY_SIZE(sipd4_pre_dnat) == + ARRAY_SIZE(sipd4_post_dnat), + "sipd4 pre and post array size don't match"); + static_assert(ARRAY_SIZE(sipd4_pre_dnat) == ARRAY_SIZE(sipd4_dir), + "spid4 pre snat array size incorrect"); + + struct dp_test_npf_nat_rule_t dnat = { + .desc = "DNAT rule", + .rule = "1", + .ifname = "dp1T1", + .proto = IPPROTO_UDP, + .map = "dynamic", + .from_addr = NULL, + .from_port = NULL, + .to_addr = "77.1.1.1", + .to_port = NULL, + .trans_addr = "16.33.0.200", + .trans_port = NULL + }; + dp_test_npf_dnat_add(&dnat, true); + + for (i = 0; i < ARRAY_SIZE(sipd4_dir); i++) { + pre_pload = sipd4_pre_dnat[i]; + pst_pload = sipd4_post_dnat[i]; + forw = (sipd4_dir[i] == SIP_FORW); + desc = sipd_descr(i, forw, pst_pload); + + /* Check content-length value matches actual content-length */ + rv = sipd_check_content_length(pre_pload, &hdr_clen, + &body_clen); + dp_test_fail_unless(rv, "[%s] hdr=%u, body=%u", + desc, hdr_clen, body_clen); + + rv = sipd_check_content_length(pst_pload, &hdr_clen, + &body_clen); + dp_test_fail_unless(rv, "[%s] hdr=%u, body=%u", + desc, hdr_clen, body_clen); + + if (forw) { + dpt_udp_pl("dp1T1", "7c:69:f6:4a:3a:50", + "18.33.0.200", 60673, "77.1.1.1", 5060, + "18.33.0.200", 60673, "16.33.0.200", 5060, + "70:6e:6d:88:55:80", "dp1T0", + DP_TEST_FWD_FORWARDED, + pre_pload, strlen(pre_pload), + pst_pload, strlen(pst_pload), desc); + } else { + dpt_udp_pl("dp1T0", "70:6e:6d:88:55:80", + "16.33.0.200", 5060, "18.33.0.200", 60673, + "77.1.1.1", 5060, "18.33.0.200", 60673, + "7c:69:f6:4a:3a:50", "dp1T1", + DP_TEST_FWD_FORWARDED, + pre_pload, strlen(pre_pload), + pst_pload, strlen(pst_pload), desc); + } + + } + + dp_test_npf_dnat_del(dnat.ifname, dnat.rule, true); + +} DP_END_TEST; /* sip_nat40 */ + + +static void dpt_alg_sipd1_setup(void) +{ + /* Setup interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "1.1.1.254/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "22.22.22.254/24"); + + dp_test_netlink_add_neigh("dp1T0", "1.1.1.2", + "aa:bb:cc:16:0:20"); + dp_test_netlink_add_neigh("dp2T1", "22.22.22.2", + "aa:bb:cc:18:0:1"); +} + +static void dpt_alg_sipd1_teardown(void) +{ + dp_test_npf_cleanup(); + + dp_test_netlink_del_neigh("dp1T0", "1.1.1.2", + "aa:bb:cc:16:0:20"); + dp_test_netlink_del_neigh("dp2T1", "22.22.22.2", + "aa:bb:cc:18:0:1"); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "1.1.1.254/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "22.22.22.254/24"); +} + +static void dpt_alg_sipd2_setup(void) +{ + /* Setup interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "100.101.102.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "200.201.202.1/24"); + + dp_test_netlink_add_neigh("dp1T0", "100.101.102.103", + "aa:bb:cc:16:0:20"); + dp_test_netlink_add_neigh("dp2T1", "200.201.202.203", + "aa:bb:cc:18:0:1"); + + /* Proxy server */ + dp_test_netlink_add_neigh("dp2T1", "200.201.202.205", + "aa:bb:cc:18:0:5"); +} + +static void dpt_alg_sipd2_teardown(void) +{ + dp_test_npf_cleanup(); + + dp_test_netlink_del_neigh("dp1T0", "100.101.102.103", + "aa:bb:cc:16:0:20"); + dp_test_netlink_del_neigh("dp2T1", "200.201.202.203", + "aa:bb:cc:18:0:1"); + + /* Proxy server */ + dp_test_netlink_del_neigh("dp2T1", "200.201.202.205", + "aa:bb:cc:18:0:5"); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "100.101.102.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "200.201.202.1/24"); +} + +static void dpt_alg_sipd3_setup(void) +{ + /* Setup interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "8.19.19.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "50.60.70.1/24"); + + dp_test_netlink_add_neigh("dp1T0", "8.19.19.6", + "aa:bb:cc:16:0:20"); + dp_test_netlink_add_neigh("dp2T1", "50.60.70.80", + "aa:bb:cc:18:0:1"); +} + +static void dpt_alg_sipd3_teardown(void) +{ + dp_test_npf_cleanup(); + + dp_test_netlink_del_neigh("dp1T0", "8.19.19.6", + "aa:bb:cc:16:0:20"); + dp_test_netlink_del_neigh("dp2T1", "50.60.70.80", + "aa:bb:cc:18:0:1"); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "8.19.19.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "50.60.70.1/24"); +} + +static void dpt_alg_sipd4_setup(void) +{ + /* Setup interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "16.33.0.220/24"); + dp_test_nl_add_ip_addr_and_connected("dp1T1", "18.33.0.220/24"); + + dp_test_netlink_add_neigh("dp1T0", "16.33.0.200", + "70:6e:6d:88:55:80"); + dp_test_netlink_add_neigh("dp1T1", "18.33.0.200", + "7c:69:f6:4a:3a:50"); +} + +static void dpt_alg_sipd4_teardown(void) +{ + dp_test_npf_cleanup(); + + dp_test_netlink_del_neigh("dp1T0", "16.33.0.200", + "70:6e:6d:88:55:80"); + dp_test_netlink_del_neigh("dp1T1", "18.33.0.200", + "7c:69:f6:4a:3a:50"); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "16.33.0.220/24"); + dp_test_nl_del_ip_addr_and_connected("dp1T1", "18.33.0.220/24"); + +} diff --git a/tests/whole_dp/src/dp_test_npf_alg_tftp.c b/tests/whole_dp/src/dp_test_npf_alg_tftp.c index de122d9a..b676bec6 100644 --- a/tests/whole_dp/src/dp_test_npf_alg_tftp.c +++ b/tests/whole_dp/src/dp_test_npf_alg_tftp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * @@ -25,14 +25,14 @@ #include "dp_test.h" #include "dp_test_controller.h" #include "dp_test_cmd_state.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" #include "dp_test_str.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_pkt.h" #include "dp_test_lib_tcp.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_sess_lib.h" @@ -40,6 +40,14 @@ #include "dp_test_npf_nat_lib.h" #include "dp_test_npf_alg_lib.h" +/* + * alg_tftp1b -- Stateful firewall only. IPv4. + * alg_tftp1c -- Stateful firewall only. IPv6. + * alg_tftp2 -- SNAT + * alg_tftp3 -- SNAT and CGNAT + * alg_tftp4 -- DNAT + */ + static void tftp_setup(void); static void tftp_teardown(void); @@ -62,20 +70,210 @@ _pak_rcv_nat_udp(const char *rx_intf, const char *pre_smac, int pre_vlan, const char *post_daddr, uint16_t post_dport, const char *post_dmac, int post_vlan, const char *tx_intf, int status, char *payload, uint payload_len, + uint16_t eth_type, const char *file, const char *func, int line); + #define pak_rcv_nat_udp(_a, _b, _c, _d, _e, _f, _g, _h, \ _i, _j, _k, _l, _m, _n, _o, _p, _q) \ _pak_rcv_nat_udp(_a, _b, _c, _d, _e, _f, _g, _h, \ _i, _j, _k, _l, _m, _n, _o, _p, _q, \ - __FILE__, __func__, __LINE__) + RTE_ETHER_TYPE_IPV4, __FILE__, __func__, __LINE__) + +#define pak_rcv_nat_udp_v6(_a, _b, _c, _d, _e, _f, _g, _h, \ + _i, _j, _k, _l, _m, _n, _o, _p, _q) \ + _pak_rcv_nat_udp(_a, _b, _c, _d, _e, _f, _g, _h, \ + _i, _j, _k, _l, _m, _n, _o, _p, _q, \ + RTE_ETHER_TYPE_IPV6, __FILE__, __func__, __LINE__) + + + DP_DECL_TEST_SUITE(npf_alg_tftp); + +/* + * alg_tftp1b -- Stateful firewall only. IPv4. + */ +DP_DECL_TEST_CASE(npf_alg_tftp, alg_tftp1b, tftp_setup, tftp_teardown); +DP_START_TEST(alg_tftp1b, test) +{ + /* + * Stateful firewall rule to match on UDP pkts to port 69. This + * matches the ctrl flow but not the data flow. The data flow only + * gets through because of the alg child session. + */ + struct dp_test_npf_rule_t rset[] = { + { + .rule = "10", + .pass = PASS, + .stateful = true, + .npf = "proto-final=17 dst-port=69" + }, + RULE_DEF_BLOCK, + NULL_RULE + }; + + struct dp_test_npf_ruleset_t fw = { + .rstype = "fw-out", + .name = "OUT_FW", + .enable = 1, + .attach_point = "dp2T1", + .fwd = FWD, + .dir = "out", + .rules = rset + }; + + dp_test_npf_fw_add(&fw, false); + + /* + * TFTP Read Request. + * + * This sets up an inbound tuple listening for dest port 50618, and + * any source port. + */ + char tftp_rreq[] = {0x00, 0x01, + 0x72, 0x66, 0x63, 0x31, 0x33, 0x35, + 0x30, 0x2e, 0x74, 0x78, 0x74, 0x00, + 0x6f, 0x63, 0x74, 0x65, 0x74, 0x00}; + + pak_rcv_nat_udp("dp1T0", "aa:bb:cc:dd:1:a2", 0, + "1.1.1.2", 50618, "2.2.2.2", 69, + "1.1.1.2", 50618, "2.2.2.2", 69, + "aa:bb:cc:dd:2:b2", 0, "dp2T1", + DP_TEST_FWD_FORWARDED, tftp_rreq, sizeof(tftp_rreq)); + + /* + * TFTP Data. + */ + char tftp_data1[] = {0x00, 0x03, 0x00, 0x01, + 0x2e, 0x2e, 0x2e}; + + pak_rcv_nat_udp("dp2T1", "aa:bb:cc:dd:2:b2", 0, + "2.2.2.2", 3445, "1.1.1.2", 50618, + "2.2.2.2", 3445, "1.1.1.2", 50618, + "aa:bb:cc:dd:1:a2", 0, "dp1T0", + DP_TEST_FWD_FORWARDED, tftp_data1, sizeof(tftp_data1)); + + /* + * TFTP Ack + */ + char tftp_ack1[] = {0x00, 0x04, 0x00, 0x01}; + + pak_rcv_nat_udp("dp1T0", "aa:bb:cc:dd:1:a2", 0, + "1.1.1.2", 50618, "2.2.2.2", 3445, + "1.1.1.2", 50618, "2.2.2.2", 3445, + "aa:bb:cc:dd:2:b2", 0, "dp2T1", + DP_TEST_FWD_FORWARDED, tftp_ack1, sizeof(tftp_ack1)); + + dp_test_npf_fw_del(&fw, false); + dp_test_npf_cleanup(); + +} DP_END_TEST; -DP_DECL_TEST_SUITE(npf_alg_tftp); /* - * alg_tftp1 -- SNAT + * alg_tftp1c -- Stateful firewall only. IPv6. */ -DP_DECL_TEST_CASE(npf_alg_tftp, alg_tftp1, tftp_setup, tftp_teardown); -DP_START_TEST(alg_tftp1, test) +DP_DECL_TEST_CASE(npf_alg_tftp, alg_tftp1c, NULL, NULL); +DP_START_TEST(alg_tftp1c, test) +{ + /* Setup interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2002:2:2::2/64"); + + dp_test_netlink_add_neigh("dp1T0", "2001:1:1::2", + "aa:bb:cc:dd:1:a1"); + dp_test_netlink_add_neigh("dp2T1", "2002:2:2::1", + "aa:bb:cc:dd:2:b1"); + + /* + * Stateful firewall rule to match on UDP pkts to port 69. This + * matches the ctrl flow but not the data flow. The data flow only + * gets through because of the alg child session. + */ + struct dp_test_npf_rule_t rset[] = { + { + .rule = "10", + .pass = PASS, + .stateful = true, + .npf = "proto-final=17 dst-port=69" + }, + RULE_DEF_BLOCK, + NULL_RULE + }; + + struct dp_test_npf_ruleset_t fw = { + .rstype = "fw-out", + .name = "OUT_FW", + .enable = 1, + .attach_point = "dp2T1", + .fwd = FWD, + .dir = "out", + .rules = rset + }; + + dp_test_npf_fw_add(&fw, false); + + /* + * TFTP Read Request. + * + * This sets up an inbound tuple listening for dest port 50618, and + * any source port. + */ + char tftp_rreq[] = {0x00, 0x01, + 0x72, 0x66, 0x63, 0x31, 0x33, 0x35, + 0x30, 0x2e, 0x74, 0x78, 0x74, 0x00, + 0x6f, 0x63, 0x74, 0x65, 0x74, 0x00}; + + pak_rcv_nat_udp_v6("dp1T0", "aa:bb:cc:dd:1:a1", 0, + "2001:1:1::2", 50618, "2002:2:2::1", 69, + "2001:1:1::2", 50618, "2002:2:2::1", 69, + "aa:bb:cc:dd:2:b1", 0, "dp2T1", + DP_TEST_FWD_FORWARDED, + tftp_rreq, sizeof(tftp_rreq)); + + /* + * TFTP Data. + */ + char tftp_data1[] = {0x00, 0x03, 0x00, 0x01, + 0x2e, 0x2e, 0x2e}; + + pak_rcv_nat_udp_v6("dp2T1", "aa:bb:cc:dd:2:b1", 0, + "2002:2:2::1", 3445, "2001:1:1::2", 50618, + "2002:2:2::1", 3445, "2001:1:1::2", 50618, + "aa:bb:cc:dd:1:a1", 0, "dp1T0", + DP_TEST_FWD_FORWARDED, + tftp_data1, sizeof(tftp_data1)); + + /* + * TFTP Ack + */ + char tftp_ack1[] = {0x00, 0x04, 0x00, 0x01}; + + pak_rcv_nat_udp_v6("dp1T0", "aa:bb:cc:dd:1:a1", 0, + "2001:1:1::2", 50618, "2002:2:2::1", 3445, + "2001:1:1::2", 50618, "2002:2:2::1", 3445, + "aa:bb:cc:dd:2:b1", 0, "dp2T1", + DP_TEST_FWD_FORWARDED, + tftp_ack1, sizeof(tftp_ack1)); + + dp_test_npf_fw_del(&fw, false); + dp_test_npf_cleanup(); + + dp_test_netlink_del_neigh("dp1T0", "2001:1:1::2", + "aa:bb:cc:dd:1:a1"); + dp_test_netlink_del_neigh("dp2T1", "2002:2:2::1", + "aa:bb:cc:dd:2:b1"); + + /* Setup interfaces and neighbours */ + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2002:2:2::2/64"); + +} DP_END_TEST; + + +/* + * alg_tftp2 -- SNAT + */ +DP_DECL_TEST_CASE(npf_alg_tftp, alg_tftp2, tftp_setup, tftp_teardown); +DP_START_TEST(alg_tftp2, test) { struct dp_test_npf_nat_rule_t snat = { .desc = "snat rule", @@ -83,6 +281,7 @@ DP_START_TEST(alg_tftp1, test) .ifname = "dp2T1", .proto = IPPROTO_UDP, .map = "dynamic", + .port_alloc = NULL, .from_addr = "1.1.1.0/24", .from_port = NULL, .to_addr = NULL, @@ -148,10 +347,106 @@ DP_START_TEST(alg_tftp1, test) /* - * alg_tftp2 -- DNAT + * alg_tftp3 -- SNAT and CGNAT */ -DP_DECL_TEST_CASE(npf_alg_tftp, alg_tftp2, tftp_setup, tftp_teardown); -DP_START_TEST(alg_tftp2, test) +DP_DECL_TEST_CASE(npf_alg_tftp, alg_tftp3, tftp_setup, tftp_teardown); +DP_START_TEST(alg_tftp3, test) +{ + struct dp_test_npf_nat_rule_t snat = { + .desc = "snat rule", + .rule = "10", + .ifname = "dp2T1", + .proto = IPPROTO_UDP, + .map = "dynamic", + .port_alloc = NULL, + .from_addr = "1.1.1.0/24", + .from_port = NULL, + .to_addr = NULL, + .to_port = NULL, + .trans_addr = "2.2.2.254", + .trans_port = NULL + }; + + dp_test_npf_snat_add(&snat, true); + + /* + * Add CGNAT config. Matches on same source addresses as SNAT, but + * maps to different addresses. + */ + dp_test_npf_cmd_fmt(false, "nat-ut pool add POOL1 type=cgnat " + "address-range=RANGE1/2.2.2.100-2.2.2.199"); + cgnat_policy_add2("POLICY1", 10, "1.1.1.0/24", "POOL1", + "dp2T1", NULL); + + dp_test_npf_cmd_fmt(false, "cgn-ut snat-alg-bypass on"); + + /* + * TFTP Read Request. + * + * This sets up an inbound tuple listening for dest port 50618, and + * any source port. + */ + char tftp_rreq[] = {0x00, 0x01, + 0x72, 0x66, 0x63, 0x31, 0x33, 0x35, + 0x30, 0x2e, 0x74, 0x78, 0x74, 0x00, + 0x6f, 0x63, 0x74, 0x65, 0x74, 0x00}; + + pak_rcv_nat_udp("dp1T0", "aa:bb:cc:dd:1:a2", 0, + "1.1.1.2", 50618, "2.2.2.2", 69, + "2.2.2.254", 50618, "2.2.2.2", 69, + "aa:bb:cc:dd:2:b2", 0, "dp2T1", + DP_TEST_FWD_FORWARDED, tftp_rreq, sizeof(tftp_rreq)); + + /* + * TFTP Data. + * + * Matches the tuple setup by the Read Req, and sets up a child + * session (2.2.2.2:3445 -> 2.2.2.254:50618). + */ + char tftp_data1[] = {0x00, 0x03, 0x00, 0x01, + 0x2e, 0x2e, 0x2e}; + + pak_rcv_nat_udp("dp2T1", "aa:bb:cc:dd:2:b2", 0, + "2.2.2.2", 3445, "2.2.2.254", 50618, + "2.2.2.2", 3445, "1.1.1.2", 50618, + "aa:bb:cc:dd:1:a2", 0, "dp1T0", + DP_TEST_FWD_FORWARDED, tftp_data1, sizeof(tftp_data1)); + + /* + * TFTP Ack + */ + char tftp_ack1[] = {0x00, 0x04, 0x00, 0x01}; + + pak_rcv_nat_udp("dp1T0", "aa:bb:cc:dd:1:a2", 0, + "1.1.1.2", 50618, "2.2.2.2", 3445, + "2.2.2.254", 50618, "2.2.2.2", 3445, + "aa:bb:cc:dd:2:b2", 0, "dp2T1", + DP_TEST_FWD_FORWARDED, tftp_ack1, sizeof(tftp_ack1)); + + if (0) { + dp_test_npf_print_session_table(false); + dp_test_npf_print_nat_sessions(""); + } + + /* + * Delete CGNAT config + */ + dp_test_npf_cmd_fmt(false, "cgn-ut snat-alg-bypass off"); + + cgnat_policy_del("POLICY1", 10, "dp2T1"); + dp_test_npf_cmd_fmt(false, "nat-ut pool delete POOL1"); + + dp_test_npf_snat_del(snat.ifname, snat.rule, true); + dp_test_npf_cleanup(); + +} DP_END_TEST; + + +/* + * alg_tftp4 -- DNAT + */ +DP_DECL_TEST_CASE(npf_alg_tftp, alg_tftp4, tftp_setup, tftp_teardown); +DP_START_TEST(alg_tftp4, test) { struct dp_test_npf_nat_rule_t dnat = { .desc = "dnat rule", @@ -159,6 +454,7 @@ DP_START_TEST(alg_tftp2, test) .ifname = "dp1T0", .proto = IPPROTO_UDP, .map = "dynamic", + .port_alloc = NULL, .from_addr = NULL, .from_port = NULL, .to_addr = "2.2.2.254", @@ -314,16 +610,17 @@ _pak_rcv_nat_udp(const char *rx_intf, const char *pre_smac, int pre_vlan, const char *post_daddr, uint16_t post_dport, const char *post_dmac, int post_vlan, const char *tx_intf, int status, char *payload, uint payload_len, + uint16_t eth_type, const char *file, const char *func, int line) { struct dp_test_expected *test_exp; struct rte_mbuf *test_pak, *exp_pak; - /* Pre IPv4 UDP packet */ + /* Pre UDP packet */ struct dp_test_pkt_desc_t pre_pkt_UDP = { - .text = "IPv4 UDP", + .text = "UDP", .len = payload_len, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = eth_type, .l3_src = pre_saddr, .l2_src = pre_smac, .l3_dst = pre_daddr, @@ -339,11 +636,11 @@ _pak_rcv_nat_udp(const char *rx_intf, const char *pre_smac, int pre_vlan, .tx_intf = tx_intf }; - /* Post IPv4 UDP packet */ + /* Post UDP packet */ struct dp_test_pkt_desc_t post_pkt_UDP = { - .text = "IPv4 UDP", + .text = "UDP", .len = payload_len, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = eth_type, .l3_src = post_saddr, .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = post_daddr, @@ -359,11 +656,18 @@ _pak_rcv_nat_udp(const char *rx_intf, const char *pre_smac, int pre_vlan, .tx_intf = tx_intf }; - test_pak = dp_test_v4_pkt_from_desc(&pre_pkt_UDP); + if (eth_type == RTE_ETHER_TYPE_IPV4) + test_pak = dp_test_v4_pkt_from_desc(&pre_pkt_UDP); + else + test_pak = dp_test_v6_pkt_from_desc(&pre_pkt_UDP); udp_payload_init(test_pak, &pre_pkt_UDP, payload, payload_len); - exp_pak = dp_test_v4_pkt_from_desc(&post_pkt_UDP); + if (eth_type == RTE_ETHER_TYPE_IPV4) + exp_pak = dp_test_v4_pkt_from_desc(&post_pkt_UDP); + else + exp_pak = dp_test_v6_pkt_from_desc(&post_pkt_UDP); + test_exp = dp_test_exp_from_desc(exp_pak, &post_pkt_UDP); rte_pktmbuf_free(exp_pak); @@ -381,7 +685,7 @@ _pak_rcv_nat_udp(const char *rx_intf, const char *pre_smac, int pre_vlan, dp_test_exp_get_pak(test_exp), post_dmac, dp_test_intf_name2mac_str(tx_intf), - ETHER_TYPE_IPv4); + eth_type); } dp_test_exp_set_fwd_status(test_exp, status); diff --git a/tests/whole_dp/src/dp_test_npf_bridge.c b/tests/whole_dp/src/dp_test_npf_bridge.c index 118b5869..62855030 100644 --- a/tests/whole_dp/src/dp_test_npf_bridge.c +++ b/tests/whole_dp/src/dp_test_npf_bridge.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -20,12 +20,12 @@ #include "dp_test.h" #include "dp_test_str.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" #include "dp_test_lib_pkt.h" -#include "dp_test_lib_intf.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_lib.h" @@ -251,7 +251,7 @@ DP_START_TEST(bridge_local, to) * Bridge firewall, assigned on br1 */ spush(npf10, sizeof(npf10), - "ether-type=%u", ETHER_TYPE_IPv4); + "ether-type=%u", RTE_ETHER_TYPE_IPV4); spush(npf20, sizeof(npf20), "src-mac=%s", "aa:bb:cc:dd:01:a1"); @@ -301,7 +301,7 @@ DP_START_TEST(bridge_local, to) struct dp_test_pkt_desc_t v4_pktA = { .text = "Neighbour 1 -> Neighbour 2", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.0.1.2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "10.0.1.3", @@ -347,7 +347,7 @@ DP_START_TEST(bridge_local, to) struct dp_test_pkt_desc_t v4_pktB = { .text = "Neighbour 1 -> Bridge IP address", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.0.1.2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "10.0.1.1", @@ -443,7 +443,7 @@ DP_START_TEST(bridge_local, from) * Bridge firewall */ spush(npf10, sizeof(npf10), - "ether-type=%u", ETHER_TYPE_IPv4); + "ether-type=%u", RTE_ETHER_TYPE_IPV4); spush(npf20, sizeof(npf20), "src-mac=%s", "aa:bb:cc:dd:01:a1"); @@ -495,7 +495,7 @@ DP_START_TEST(bridge_local, from) struct dp_test_pkt_desc_t v4_pktA = { .text = "Neighbour 1 -> Neighbour 2", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.0.1.2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "10.0.1.3", @@ -547,7 +547,7 @@ DP_START_TEST(bridge_local, from) struct dp_test_pkt_desc_t v4_pktC = { .text = "Neighbour 2 -> Neighbour 1", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.0.1.3", .l2_src = "aa:bb:cc:dd:2:a2", .l3_dst = "10.0.1.2", @@ -602,7 +602,7 @@ DP_START_TEST(bridge_local, from) test_pak = dp_test_create_l2_pak( "FF:FF:FF:FF:FF:FF", dp_test_intf_name2mac_str("br1"), - ETHER_TYPE_ARP, 1, &len); + RTE_ETHER_TYPE_ARP, 1, &len); dp_test_fail_unless(test_pak, "Failed to create l2 pak"); /* @@ -854,7 +854,7 @@ dp_test_npf_fw1(enum test_fw action, const struct dp_test_bridge_ctx *ctx) { char proto_str[40]; - snprintf(proto_str, sizeof(proto_str), "proto=%u", ctx->proto); + snprintf(proto_str, sizeof(proto_str), "proto-final=%u", ctx->proto); /* UDP */ struct dp_test_npf_rule_t rset[] = { @@ -913,7 +913,7 @@ static void dp_test_npf_1(const struct dp_test_bridge_ctx *ctx) dp_test_npf_fw1(TEST_FW_ADD, ctx); - struct ether_addr *br1_eth; + struct rte_ether_addr *br1_eth; char real_ifname[IFNAMSIZ]; char br1_eth_str[ETH_ADDR_STR_LEN]; @@ -927,7 +927,7 @@ static void dp_test_npf_1(const struct dp_test_bridge_ctx *ctx) struct dp_test_pkt_desc_t udp_pkt1 = { .text = "pkt1", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.11", .l2_src = "aa:bb:cc:dd:1:11", .l3_dst = "2.2.2.11", @@ -970,7 +970,7 @@ static void dp_test_npf_1(const struct dp_test_bridge_ctx *ctx) struct dp_test_pkt_desc_t udp_pkt2 = { .text = "pkt2", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "2.2.2.11", .l2_src = "aa:bb:cc:dd:2:11", .l3_dst = "1.1.1.11", @@ -1042,7 +1042,7 @@ static void dp_test_npf_2(const struct dp_test_bridge_ctx *ctx) dp_test_npf_fw1(TEST_FW_ADD, ctx); - struct ether_addr *br1_eth; + struct rte_ether_addr *br1_eth; char real_ifname[IFNAMSIZ]; char br1_eth_str[ETH_ADDR_STR_LEN]; @@ -1056,7 +1056,7 @@ static void dp_test_npf_2(const struct dp_test_bridge_ctx *ctx) struct dp_test_pkt_desc_t udp_pkt1 = { .text = "UDP pkt1", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "2.2.2.11", .l2_src = "aa:bb:cc:dd:2:11", .l3_dst = "1.1.1.11", @@ -1100,7 +1100,7 @@ static void dp_test_npf_2(const struct dp_test_bridge_ctx *ctx) struct dp_test_pkt_desc_t udp_pkt2 = { .text = "UDP pkt3", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.11", .l2_src = "aa:bb:cc:dd:1:11", .l3_dst = "2.2.2.11", diff --git a/tests/whole_dp/src/dp_test_npf_cgnat.c b/tests/whole_dp/src/dp_test_npf_cgnat.c index 6a4cf249..75cff124 100644 --- a/tests/whole_dp/src/dp_test_npf_cgnat.c +++ b/tests/whole_dp/src/dp_test_npf_cgnat.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include "ip_funcs.h" #include "ip6_funcs.h" #include "in_cksum.h" @@ -21,11 +23,11 @@ #include "dp_test.h" #include "dp_test_controller.h" #include "dp_test_console.h" -#include "dp_test_netlink_state.h" -#include "dp_test_cmd_check.h" -#include "dp_test_lib.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test/dp_test_cmd_check.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_lib_exp.h" #include "dp_test_lib_pkt.h" #include "dp_test_npf_lib.h" @@ -33,22 +35,25 @@ #include "dp_test_npf_sess_lib.h" #include "npf/nat/nat_pool_public.h" -#include "npf/cgnat/cgn.h" -#include "npf/apm/apm.h" #include "npf/cgnat/cgn_limits.h" -#include "npf/cgnat/cgn_policy.h" -#include "npf/cgnat/cgn_source.h" +#include "npf/cgnat/cgn_sess_state.h" #include "npf/cgnat/cgn_session.h" +#include "npf/cgnat/cgn_sess2.h" +#include "npf/cgnat/cgn_mbuf.h" +#include "npf/cgnat/cgn_log.h" +#include "npf/cgnat/cgn_if.h" +#include "npf/cgnat/cgn_test.h" DP_DECL_TEST_SUITE(npf_cgnat); /* + * cgnat_pre - Checks session structure size * cgnat1 - UDP 1 fwd pkt * cgnat2 - TCP 1 fwd pkt * cgnat3 - ICMP 1 fwd pkt * * cgnat4 - UDP 1 fwd pkt, 1 back pkt - * cgnat5 - TCP 1 fwd pkt, 1 back pkt + * cgnat5 - TCP 1 fwd pkt, 1 back pkt, 5-tuple session * cgnat6 - ICMP 1 fwd pkt, 1 back pkt * * cgnat7 - UDP 1 fwd pkt, 1 back pkt, 1 fwd pkt @@ -59,11 +64,10 @@ DP_DECL_TEST_SUITE(npf_cgnat); * cgnat11 - UDP, TCP, and ICMP 1 fwd pkt, 1 back pkt * cgnat12 - Hairpinning. * cgnat13 - CGNAT over VRF interface. - * cgnat14 - VRF interface created after CGNAT config. * cgnat15 - VRF interface deleted while CGNAT policy and sessions present * cgnat16 - Tests random port allocation within port block. * cgnat17 - Exercises op-mode show commands - * cgnat18 - Tests public address blacklist + * cgnat18 - Tests public address blocklist * * cgnat20 - UDP 129 pkts with different src addrs * @@ -75,8 +79,10 @@ DP_DECL_TEST_SUITE(npf_cgnat); * cgnat22 - Tests address-pool paired limit. * TCP 'n' pkts with same src addr, diff src ports. * Port range is limited to 256 ports. Block size is 128 and - * max-blocks-per-user is 4, so APP is the limiting factor. Sends - * 257 pkts. 256 ok, 1 fail. + * max-blocks-per-user is 4, so APP is the limiting factor. + * The public address we are paired with runs out of ports + * before the max-blocks-per-subscriber limit is reached. + * Sends 257 pkts. 256 ok, 1 fail. * * cgnat23 - Tests address-pool arbitrary. * TCP 'n' pkts with same src addr, diff src ports. @@ -97,10 +103,16 @@ DP_DECL_TEST_SUITE(npf_cgnat); * * cgnat26 - Tests max-blocks-per-subscriber limit, with random port-allocn. * + * cgnat27 - Tests destructive change to a nat pool + * * cgnat30 - CGNAT commands * cgnat31 - CGNAT commands * - * cgnat32 - Tests CGNAT and SNAT on same interface (partially disabled) + * cgnat32 - Tests CGNAT and SNAT on same interface + * + * cgnat32b - Tests CGNAT and Stateful Firewall on same interface + * + * cgnat32c - Tests inbound traffic whose dest is not in nat pool * * cgnat33 - Tests ICMP error messages with embedded UDP packets * (incl cksum 0) @@ -108,9 +120,49 @@ DP_DECL_TEST_SUITE(npf_cgnat); * cgnat34 - Tests ICMP error messages with embedded TCP packets (including * truncated) * + * cgnat35 - Tests generation of an ICMP error message *after* CGNAT + * translation of an outbound packet but before transmission. We + * undo the *source* CGNAT translation, and send an + * ICMP_DEST_UNREACH/FRAG_NEEDED message back to the sender. + * + * cgnat36 - As cgnat35, but for an inbound packet. + * + * cgnat37 - Test that inbound traffic that matches an snat session but + * not a cgnat session is *not* filtered by CGNAT. + * + * cgnat38 - 20 UDP forwards pkts, different source addrs and ports + * * cgnat39 - Packet reassembly before translation. * cgnat40 - Split TCP header over two chained mbufs * + * cgnat41 - Create multiple sessions and test show command + * + * cgnat42 - cgnat scale test (remove '_DONT_RUN' to run it) + * + * cgnat43 - cgnat scale test (remove '_DONT_RUN' to run it) + * + * cgnat44 - Tests cgnat exclude address group + * + * cgnat45 - Tests PCP/unit-test 'map' command + * + * cgnat46 - Verify CGNAT responds to echo request sent to CGNAT pool address + * on the outside interface. + * + * cgnat47 - Excercises threshold add/del code paths, and apm pb full code + * path. + * + * cgnat48 - Tests a policy being uncfgd and re-cfgd while a subscriber + * structure exists + * + * cgnat49 - Tests that two different subscribers may be allocated port + * blocks from the same public address. + * + * cgnat52 - Test NAT pool lookup using the hidden NAT pool address-group. + * + * cgnat53 - Test timeout value for TCP 5-tuple session in different states + * + * cgnat54 - Tests interface failover + * * make -j4 dataplane_test_run CK_RUN_SUITE=dp_test_npf_cgnat.c * make -j4 dataplane_test_run CK_RUN_CASE=cgnat1 */ @@ -136,39 +188,15 @@ static struct cgn_ctx cgn_ctx = { .saved_cb = dp_test_pak_verify, }; -#define CGN_3TUPLE false -#define CGN_5TUPLE true - static void cgn_validate_cb(struct rte_mbuf *mbuf, struct ifnet *ifp, struct dp_test_expected *expected, enum dp_test_fwd_result_e fwd_result); -static void _cgnat_policy_add(const char *policy, uint pri, const char *src, - const char *pool, const char *intf, - enum cgn_map_type eim, enum cgn_fltr_type eif, - bool log_sess, bool check_feat, - const char *file, const char *func, int line); -#define cgnat_policy_add(_a, _b, _c, _d, _e, _f, _g, _h, _i) \ - _cgnat_policy_add(_a, _b, _c, _d, _e, _f, _g, _h, _i, \ - __FILE__, __func__, __LINE__) - -static void _cgnat_policy_add2(const char *policy, uint pri, const char *src, - const char *pool, const char *intf, - enum cgn_map_type eim, enum cgn_fltr_type eif, - const char *log_name, bool check_feat, - const char *file, const char *func, int line); -#define cgnat_policy_add2(_a, _b, _c, _d, _e, _f, _g, _h, _i) \ - _cgnat_policy_add2(_a, _b, _c, _d, _e, _f, _g, _h, _i, \ - __FILE__, __func__, __LINE__) - -static void _cgnat_policy_del(const char *policy, uint pri, const char *intf, - const char *file, const char *func, int line); -#define cgnat_policy_del(_a, _b, _c) \ - _cgnat_policy_del(_a, _b, _c, __FILE__, __func__, __LINE__) - static void cgnat_setup(void); static void cgnat_teardown(void); +static int dpt_cgn_show_session(const char *fltr, uint count, bool per_subs, + bool print, bool debug); static void _cgnat_udp(const char *rx_intf, const char *pre_smac, int pre_vlan, @@ -281,8 +309,62 @@ static uint32_t dpt_random_ipaddr(uint32_t addr, uint32_t mask, return addr_n; } -void dpt_cgn_print_json(char *cmd); -void dpt_cgn_print_json(char *cmd) +static void dpt_cgn_show_summary(bool print) +{ + json_object *jresp; + char *response; + bool err; + + response = dp_test_console_request_w_err( + "cgn-op show summary", &err, false); + if (!response || err) + return; + + jresp = parse_json(response, parse_err_str, sizeof(parse_err_str)); + free(response); + + if (!jresp) + return; + + const char *str; + + str = json_object_to_json_string_ext(jresp, JSON_C_TO_STRING_PRETTY); + if (str && print) + printf("%s\n", str); + + json_object_put(jresp); +} + + +static void dpt_cgn_show_error(bool print) +{ + json_object *jresp; + char *response; + bool err; + + response = dp_test_console_request_w_err( + "cgn-op show errors", &err, false); + if (!response || err) + return; + + jresp = parse_json(response, parse_err_str, sizeof(parse_err_str)); + free(response); + + if (!jresp) + return; + + const char *str; + + str = json_object_to_json_string_ext(jresp, JSON_C_TO_STRING_PRETTY); + if (str && print) + printf("%s\n", str); + + json_object_put(jresp); +} + + +void dpt_cgn_print_json(const char *cmd, bool print); +void dpt_cgn_print_json(const char *cmd, bool print) { json_object *jobj; const char *str; @@ -291,26 +373,28 @@ void dpt_cgn_print_json(char *cmd) response = dp_test_console_request_w_err(cmd, &err, false); if (!response || err) { - printf(" no response\n"); + if (print) + printf(" no response\n"); return; } jobj = parse_json(response, parse_err_str, sizeof(parse_err_str)); if (!jobj) { - printf(" failed to parse json\n"); - printf("%s\n", response); + if (print) { + printf(" failed to parse json\n"); + printf("%s\n", response); + } free(response); return; } free(response); str = json_object_to_json_string_ext(jobj, JSON_C_TO_STRING_PRETTY); - if (str) + if (str && print) printf("%s\n", str); json_object_put(jobj); } - /* * npf_cgnat_1 - 1 UDP forwards pkt * @@ -338,9 +422,13 @@ DP_START_TEST(cgnat1, test) "type=cgnat " "address-range=RANGE1/1.1.1.11-1.1.1.20 " "prefix=RANGE2/1.1.1.192/26 " + "prefix=RANGE3/204.112.12.224/28 " + "subnet=RANGE4/204.112.13.224/28 " "log-pba=yes " ""); + dpt_cgn_print_json("nat-op show pool", false); + cgnat_policy_add("POLICY1", 10, "100.64.0.0/12", "POOL1", "dp2T1", CGN_MAP_EIM, CGN_FLTR_EIF, CGN_3TUPLE, true); @@ -377,7 +465,7 @@ DP_START_TEST(cgnat2, test) "nat-ut pool add POOL1 " "type=cgnat " "max-blocks=2 " - "prefix=RANGE1/1.1.1.192/26 " + "subnet=RANGE1/1.1.1.192/26 " ""); cgnat_policy_add("POLICY1", 10, "100.64.0.0/12", "POOL1", "dp2T1", @@ -429,7 +517,7 @@ DP_START_TEST(cgnat3, test) dpt_cgn_cmd_fmt(false, true, "nat-ut pool add POOL2 " "type=cgnat " - "address-range=RANGE1/1.1.2.11-1.2.1.20 " + "address-range=RANGE1/1.1.2.11-1.1.2.20 " "prefix=RANGE2/1.1.2.192/26 " ""); @@ -440,8 +528,8 @@ DP_START_TEST(cgnat3, test) CGN_MAP_EIM, CGN_FLTR_EIF, CGN_3TUPLE, true); /* Change nat pool for POLICY1 */ - cgnat_policy_add("POLICY1", 10, "100.64.1.0/24", "POOL1", "dp2T1", - CGN_MAP_EIM, CGN_FLTR_EIF, CGN_3TUPLE, true); + cgnat_policy_change("POLICY1", 10, "100.64.1.0/24", "POOL1", "dp2T1", + CGN_MAP_EIM, CGN_FLTR_EIF, CGN_3TUPLE, true); /* * 100.64.0.1:49152 / 1.1.1.11:1024 --> dst 1.1.1.1:80 @@ -551,6 +639,9 @@ DP_START_TEST(cgnat5, test) "aa:bb:cc:dd:2:b1", "dp2T1", DP_TEST_FWD_FORWARDED); + + dpt_cgn_show_session(NULL, 1, false, false, false); + cgnat_policy_del("POLICY1", 10, "dp2T1"); dp_test_npf_cmd_fmt(false, "nat-ut pool delete POOL1"); @@ -620,7 +711,7 @@ DP_START_TEST(cgnat6, test) /* - * npf_cgnat_7 - 1 UDP forwards pkt, 1 backwards pkt + * npf_cgnat_7 - 1 fwd pkt, 1 back pkt, 1 fwd pkt * * Private Public * dp1T0 +---+ dp2T1 @@ -628,7 +719,7 @@ DP_START_TEST(cgnat6, test) * +---+ */ DP_DECL_TEST_CASE(npf_cgnat, cgnat7, cgnat_setup, cgnat_teardown); -DP_START_TEST(cgnat7, test) +DP_START_TEST_FULL_RUN(cgnat7, test) { /* * pool add POOL1 @@ -683,7 +774,7 @@ DP_START_TEST(cgnat7, test) /* - * npf_cgnat_8 - 1 TCP forwards pkt, 1 TCP backwards pkt + * npf_cgnat_8 - 1 fwd pkt, 1 back pkt, 1 fwd pkt * * Private Public * dp1T0 +---+ dp2T1 @@ -735,7 +826,7 @@ DP_START_TEST(cgnat8, test) /* - * npf_cgnat_9 - 1 ICMP echo req forwards pkt, 1 ICMP echo reply backwards pkt + * npf_cgnat_9 - ICMP 1 fwd pkt, 1 back pkt, 1 fwd pkt * * Private Public * dp1T0 +---+ dp2T1 @@ -743,7 +834,7 @@ DP_START_TEST(cgnat8, test) * +---+ */ DP_DECL_TEST_CASE(npf_cgnat, cgnat9, cgnat_setup, cgnat_teardown); -DP_START_TEST(cgnat9, test) +DP_START_TEST_FULL_RUN(cgnat9, test) { dpt_cgn_cmd_fmt(false, true, "nat-ut pool add POOL1 " @@ -794,7 +885,7 @@ DP_START_TEST(cgnat9, test) * +---+ */ DP_DECL_TEST_CASE(npf_cgnat, cgnat10, cgnat_setup, cgnat_teardown); -DP_START_TEST(cgnat10, test) +DP_START_TEST_FULL_RUN(cgnat10, test) { dpt_cgn_cmd_fmt(false, true, "nat-ut pool add POOL1 " @@ -925,7 +1016,7 @@ DP_START_TEST(cgnat11, test) * 4. Send packet out rx interface dp1T0 */ DP_DECL_TEST_CASE(npf_cgnat, cgnat12, cgnat_setup, cgnat_teardown); -DP_START_TEST(cgnat12, test) +DP_START_TEST_FULL_RUN(cgnat12, test) { dpt_cgn_cmd_fmt(false, true, "nat-ut pool add POOL1 " @@ -985,7 +1076,7 @@ DP_START_TEST(cgnat12, test) * +---+ */ DP_DECL_TEST_CASE(npf_cgnat, cgnat13, NULL, NULL); -DP_START_TEST(cgnat13, test) +DP_START_TEST_FULL_RUN(cgnat13, test) { dp_test_intf_vif_create("dp2T1.100", "dp2T1", 100); @@ -1056,100 +1147,6 @@ DP_START_TEST(cgnat13, test) } DP_END_TEST; -/* - * npf_cgnat_14 - CGNAT over a deferred VRF interface. - * - * Tests deferred interface command and command replay. - * - * Private Public - * dp1T0 +---+ dp2T1 - * 100.64.0.0/24 ----------| |--------------- 1.1.1.0/24 - * +---+ - */ -DP_DECL_TEST_CASE(npf_cgnat, cgnat14, NULL, NULL); -DP_START_TEST(cgnat14, test) -{ - dpt_cgn_cmd_fmt(false, true, - "nat-ut pool add POOL1 " - "type=cgnat " - "address-range=RANGE1/1.1.1.11-1.1.1.20 " - ""); - - cgnat_policy_add("POLICY1", 10, "100.64.0.0/24", "POOL1", - "dp2T1.100", CGN_MAP_EIM, CGN_FLTR_EIF, - CGN_3TUPLE, false); - - cgnat_policy_add("POLICY2", 10, "100.64.1.0/24", "POOL1", - "dp2T1.100", CGN_MAP_EIM, CGN_FLTR_EIF, - CGN_3TUPLE, false); - - dp_test_intf_vif_create("dp2T1.100", "dp2T1", 100); - - /* Check that CGNAT is enabled on VIF interface */ - dp_test_wait_for_pl_feat("dp2T1.100", "vyatta:ipv4-cgnat-in", - "ipv4-validate"); - dp_test_wait_for_pl_feat("dp2T1.100", "vyatta:ipv4-cgnat-out", - "ipv4-out"); - - dp_test_nl_add_ip_addr_and_connected("dp1T0", "100.64.0.254/24"); - dp_test_nl_add_ip_addr_and_connected("dp2T1.100", "1.1.1.254/24"); - - /* - * Inside - */ - dp_test_netlink_add_neigh("dp1T0", "100.64.0.1", - "aa:bb:cc:dd:1:a1"); - dp_test_netlink_add_neigh("dp1T0", "100.64.0.2", - "aa:bb:cc:dd:1:a2"); - - /* - * Outside - */ - dp_test_netlink_add_neigh("dp2T1.100", "1.1.1.1", - "aa:bb:cc:dd:2:b1"); - dp_test_netlink_add_neigh("dp2T1.100", "1.1.1.2", - "aa:bb:cc:dd:2:b2"); - - /* - * 100.64.0.1:49152 / 1.1.1.11:1024 --> dst 1.1.1.1:80 - */ - cgnat_udp("dp1T0", "aa:bb:cc:dd:1:a1", 0, - "100.64.0.1", 49152, "1.1.1.1", 80, - "1.1.1.11", 1024, "1.1.1.1", 80, - "aa:bb:cc:dd:2:b1", 100, "dp2T1", - DP_TEST_FWD_FORWARDED); - - /* - * 1.1.1.1:80 --> 1.1.1.11:1024 / 100.64.0.1:49152 - */ - cgnat_udp("dp2T1", "aa:bb:cc:dd:2:b1", 100, - "1.1.1.1", 80, "1.1.1.11", 1024, - "1.1.1.1", 80, "100.64.0.1", 49152, - "aa:bb:cc:dd:1:a1", 0, "dp1T0", - DP_TEST_FWD_FORWARDED); - - cgnat_policy_del("POLICY1", 10, "dp2T1.100"); - cgnat_policy_del("POLICY2", 10, "dp2T1.100"); - - dp_test_npf_cmd_fmt(false, "nat-ut pool delete POOL1"); - - /* Cleanup */ - dp_test_netlink_del_neigh("dp1T0", "100.64.0.1", "aa:bb:cc:dd:1:a1"); - dp_test_netlink_del_neigh("dp1T0", "100.64.0.2", "aa:bb:cc:dd:1:a2"); - - dp_test_netlink_del_neigh("dp2T1.100", "1.1.1.1", "aa:bb:cc:dd:2:b1"); - dp_test_netlink_del_neigh("dp2T1.100", "1.1.1.2", "aa:bb:cc:dd:2:b2"); - - dp_test_nl_del_ip_addr_and_connected("dp1T0", "100.64.0.254/24"); - dp_test_nl_del_ip_addr_and_connected("dp2T1.100", "1.1.1.254/24"); - - dp_test_intf_vif_del("dp2T1.100", 100); - - dp_test_npf_cleanup(); - -} DP_END_TEST; - - /* * npf_cgnat_15 - Deleting a VRF interface that has CGNAT * @@ -1159,7 +1156,7 @@ DP_START_TEST(cgnat14, test) * +---+ */ DP_DECL_TEST_CASE(npf_cgnat, cgnat15, NULL, NULL); -DP_START_TEST(cgnat15, test) +DP_START_TEST_FULL_RUN(cgnat15, test) { dp_test_intf_vif_create("dp2T1.100", "dp2T1", 100); @@ -1215,6 +1212,8 @@ DP_START_TEST(cgnat15, test) dp_test_nl_del_ip_addr_and_connected("dp2T1.100", "1.1.1.254/24"); dp_test_intf_vif_del("dp2T1.100", 100); + dpt_addr_grp_destroy("POLICY1_AG", "100.64.0.0/12"); + dp_test_npf_cmd_fmt(false, "nat-ut pool delete POOL1"); /* Cleanup */ @@ -1270,7 +1269,7 @@ cgnat16_cb(struct rte_mbuf *mbuf, struct ifnet *ifp, /* update tcp checksum */ udp->check = 0; - cksum = rte_ipv4_udptcp_cksum((const struct ipv4_hdr *)ip, + cksum = rte_ipv4_udptcp_cksum((const struct rte_ipv4_hdr *)ip, (const void *)udp); udp->check = (cksum == 0xffff) ? 0000 : cksum; @@ -1293,7 +1292,7 @@ cgnat16_cb(struct rte_mbuf *mbuf, struct ifnet *ifp, * +---+ */ DP_DECL_TEST_CASE(npf_cgnat, cgnat16, cgnat_setup, cgnat_teardown); -DP_START_TEST(cgnat16, test) +DP_START_TEST_FULL_RUN(cgnat16, test) { dpt_cgn_cmd_fmt(false, true, "nat-ut pool add POOL1 " @@ -1336,7 +1335,7 @@ DP_START_TEST(cgnat16, test) } DP_END_TEST; -static void dpt_cgn_show_source_detail(void) +static void dpt_cgn_show_source(bool print) { json_object *jresp; char *response; @@ -1356,7 +1355,7 @@ static void dpt_cgn_show_source_detail(void) const char *str; str = json_object_to_json_string_ext(jresp, JSON_C_TO_STRING_PRETTY); - if (str) + if (str && print) printf("%s\n", str); json_object_put(jresp); @@ -1379,6 +1378,8 @@ static void dpt_cgn_show_subscriber_count(uint start, uint count, bool detail) l += snprintf(cmd + l, sizeof(cmd) - l, " start %u count %u", start, count); + (void) l; + response = dp_test_console_request_w_err(cmd, &err, false); if (!response || err) @@ -1399,14 +1400,14 @@ static void dpt_cgn_show_subscriber_count(uint start, uint count, bool detail) json_object_put(jresp); } -static void dpt_cgn_show_policy_detail(void) +static void dpt_cgn_show_policy(bool print) { json_object *jresp; char *response; bool err; response = dp_test_console_request_w_err( - "cgn-op show policy detail 10", &err, false); + "cgn-op show policy", &err, false); if (!response || err) return; @@ -1419,13 +1420,13 @@ static void dpt_cgn_show_policy_detail(void) const char *str; str = json_object_to_json_string_ext(jresp, JSON_C_TO_STRING_PRETTY); - if (str) + if (str && print) printf("%s\n", str); json_object_put(jresp); } -static void dpt_cgn_show_public(bool detail) +static void dpt_cgn_show_public(bool print, bool detail) { json_object *jresp; char *response; @@ -1450,7 +1451,7 @@ static void dpt_cgn_show_public(bool detail) const char *str; str = json_object_to_json_string_ext(jresp, JSON_C_TO_STRING_PRETTY); - if (str) + if (str && print) printf("%s\n", str); json_object_put(jresp); @@ -1473,6 +1474,8 @@ static void dpt_cgn_show_public_count(uint start, uint count, bool detail) l += snprintf(cmd + l, sizeof(cmd) - l, " start %u count %u", start, count); + (void) l; + response = dp_test_console_request_w_err(cmd, &err, false); if (!response || err) @@ -1493,130 +1496,188 @@ static void dpt_cgn_show_public_count(uint start, uint count, bool detail) json_object_put(jresp); } -struct cgn_target { - char addr[15]; - uint16_t port; - uint8_t proto; - char intf[IFNAMSIZ+1]; -}; +static int +dpt_cgn_show_session_one(json_object *joutr, bool print, bool debug) +{ + /* + * Format string with outer session info + */ + const char *subs_addr = NULL, *pub_addr = NULL, *intf = NULL; + int subs_port = 0, pub_port = 0, proto = 0, timeout = 0, index = 0; + char outr_str[120]; + + dp_test_json_string_field_from_obj(joutr, "subs_addr", &subs_addr); + dp_test_json_string_field_from_obj(joutr, "pub_addr", &pub_addr); + dp_test_json_string_field_from_obj(joutr, "intf", &intf); + dp_test_json_int_field_from_obj(joutr, "subs_port", &subs_port); + dp_test_json_int_field_from_obj(joutr, "pub_port", &pub_port); + dp_test_json_int_field_from_obj(joutr, "proto", &proto); + dp_test_json_int_field_from_obj(joutr, "cur_to", &timeout); + dp_test_json_int_field_from_obj(joutr, "index", &index); + + snprintf(outr_str, sizeof(outr_str), + "%6s %10d %5d %15s %5d %15s %5d %8d", + intf, index, proto, subs_addr, subs_port, + pub_addr, pub_port, timeout); + + json_object *jarray_inr; + + /* + * Get dest sessions array from joutr + */ + struct dp_test_json_find_key key[] = { + { "destinations", NULL }, + { "sessions", NULL } + }; + + /* Inner dest sessions array may not exist */ + jarray_inr = dp_test_json_find(joutr, key, ARRAY_SIZE(key)); + + if (!jarray_inr) { + if (print || debug) + printf("%s\n", outr_str); + return 1; + } + + int sess_count = 0; + uint i, arraylen_inr = 0; + + arraylen_inr = json_object_array_length(jarray_inr); + + for (i = 0; i < arraylen_inr; i++) { + json_object *jinr; + + /* Get the array element at position i */ + jinr = json_object_array_get_idx(jarray_inr, i); + if (!jinr) + break; + + /* + * Format string with outer session info + */ + const char *dst_addr = NULL; + int dst_port; + char inr_str[40]; + + dp_test_json_string_field_from_obj(jinr, "dst_addr", &dst_addr); + dp_test_json_int_field_from_obj(jinr, "dst_port", &dst_port); + + snprintf(inr_str, sizeof(inr_str), "%15s %5d", + dst_addr, dst_port); + + if (print || debug) + printf("%s %s\n", outr_str, inr_str); + + sess_count++; + } + + json_object_put(jarray_inr); + + if (i == 0) { + if (print || debug) + printf("%s\n", outr_str); + return 1; + } + + return sess_count; +} /* * Fetches 'count' sessions per call. Returns number of sessions found. */ static int -_dpt_cgn_show_session(char *_cmd, uint count, bool print, - struct cgn_target *tgt) +_dpt_cgn_show_session(char *_cmd, uint count, bool print, bool debug) { - struct dp_test_json_find_key key[] = { {"sessions", NULL} }; - const char *subs_addr = NULL, *pub_addr, *intf = NULL; - json_object *jresp, *jarray; - int subs_port, pub_port, proto; - uint i, arraylen; + int sess_count = 0; char *response; char cmd[120]; - bool err, debug = false; - int l = 0; - - l += snprintf(cmd + l, sizeof(cmd) - l, "%s count %u", _cmd, count); + bool err; + bool print_json = debug; - /* If tgt is specified, start with the session just after tgt */ - if (tgt && strlen(tgt->addr) > 0) - l += snprintf(cmd + l, sizeof(cmd) - l, - " tgt-addr %s tgt-port %u " - "tgt-proto %u tgt-intf %s", - tgt->addr, tgt->port, tgt->proto, tgt->intf); + /* + * Send command to dataplane + */ + snprintf(cmd, sizeof(cmd), "%s count %u", _cmd, count); + if (debug) + printf("Cmd: %s\n", cmd); response = dp_test_console_request_w_err(cmd, &err, false); - if (!response || err) + if (!response || err) { + if (debug) + printf("No response to command\n"); return 0; + } + + /***************************************************************** + * Parse response string to get json object + */ + json_object *jresp; jresp = parse_json(response, parse_err_str, sizeof(parse_err_str)); free(response); - - if (!jresp) + if (!jresp) { + if (debug) + printf("Failed to parse response\n"); return 0; + } - if (debug) { + if (print_json) { const char *str; - - str = json_object_to_json_string_ext( - jresp, JSON_C_TO_STRING_PRETTY); + str = json_object_to_json_string_ext(jresp, + JSON_C_TO_STRING_PRETTY); if (str) printf("%s\n", str); } - jarray = dp_test_json_find(jresp, key, ARRAY_SIZE(key)); - json_object_put(jresp); - - if (!jarray) - return 0; - - arraylen = json_object_array_length(jarray); - - for (i = 0; i < arraylen; i++) { - json_object *jvalue; - bool rv; - - /* Get the array element at position i */ - jvalue = json_object_array_get_idx(jarray, i); - if (!jvalue) - return 0; - - if (debug) { - const char *str; - - str = json_object_to_json_string_ext( - jvalue, JSON_C_TO_STRING_PRETTY); - if (str) - printf("%s\n", str); + /***************************************************************** + * Get the outer 3-tuple sessions json array + */ + struct dp_test_json_find_key key[] = { + { + "sessions", NULL } + }; + json_object *jarray_outr; + uint arraylen_outr; - rv = dp_test_json_string_field_from_obj( - jvalue, "subs_addr", &subs_addr); - if (!rv) - return 0; + jarray_outr = dp_test_json_find(jresp, key, ARRAY_SIZE(key)); - rv = dp_test_json_int_field_from_obj(jvalue, - "subs_port", &subs_port); - if (!rv) - return 0; + /* finished with jresp now */ + json_object_put(jresp); + jresp = NULL; - rv = dp_test_json_string_field_from_obj(jvalue, - "pub_addr", &pub_addr); - if (!rv) - return 0; + if (!jarray_outr) { + if (debug) + printf("Failed to get outer sessions array\n"); + return 0; + } + arraylen_outr = json_object_array_length(jarray_outr); - rv = dp_test_json_int_field_from_obj(jvalue, - "pub_port", &pub_port); - if (!rv) - return 0; + /* Print banner */ + if (print || debug) + printf("%6s %10s %5s %15s %5s %15s %5s %8s %15s %5s\n", + "Intf", "Index", "Proto", "Src addr", "Port", + "Trans Addr", "Port", "Timeout", "Dest Addr", "Port"); - rv = dp_test_json_int_field_from_obj(jvalue, - "proto", &proto); - if (!rv) - return 0; + /***************************************************************** + * Iterate over the 3-tuple session array + */ + uint i; - rv = dp_test_json_string_field_from_obj(jvalue, - "intf", &intf); - if (!rv) - return 0; + for (i = 0; i < arraylen_outr; i++) { + json_object *joutr; - if (print) - printf("%15s %5u %15s %5u %u %s\n", - subs_addr, subs_port, pub_addr, - pub_port, proto, intf); - } + /* Get the array element at position i */ + joutr = json_object_array_get_idx(jarray_outr, i); + if (!joutr) + break; - if (tgt && subs_addr && intf) { - tgt->port = subs_port; - tgt->proto = proto; - strncpy(tgt->addr, subs_addr, sizeof(tgt->addr)); - strncpy(tgt->intf, intf, sizeof(tgt->intf)); + sess_count += dpt_cgn_show_session_one(joutr, print, debug); } - json_object_put(jarray); + json_object_put(jarray_outr); - return arraylen; + return sess_count; } /* @@ -1627,12 +1688,11 @@ _dpt_cgn_show_session(char *_cmd, uint count, bool print, * the sessions for each subscriber. */ static int -dpt_cgn_show_session(const char *fltr, uint count, bool per_subs, bool print) +dpt_cgn_show_session(const char *fltr, uint count, bool per_subs, bool print, + bool debug) { - struct cgn_target target = { 0 }; char cmd[120]; - int l = 0; - uint j; + int l; int rv, found = 0; if (per_subs) { @@ -1687,17 +1747,8 @@ dpt_cgn_show_session(const char *fltr, uint count, bool per_subs, bool print) snprintf(cmd, sizeof(cmd), "cgn-op show session subs-addr %s", subs_str); - memset(&target, 0, sizeof(target)); - - j = 0; - while (true) { - rv = _dpt_cgn_show_session(cmd, count, - print, &target); - - if (rv == 0 || ++j > 10000000) - break; - found += rv; - } + rv = _dpt_cgn_show_session(cmd, count, print, debug); + found += rv; } json_object_put(jarray); @@ -1707,27 +1758,192 @@ dpt_cgn_show_session(const char *fltr, uint count, bool per_subs, bool print) if (fltr) l += snprintf(cmd + l, sizeof(cmd) - l, " %s", fltr); - j = 0; - while (true) { - rv = _dpt_cgn_show_session(cmd, count, print, &target); + (void) l; - if (rv == 0 || ++j > 10000000) - break; - found += rv; - } + rv = _dpt_cgn_show_session(cmd, count, print, debug); + found += rv; } return found; } -static void dpt_cgn_show_pool_detail(void) +/* + * Get json object for a 3-tuple session + */ +static json_object *dpt_cgn_sess_json(const char *fltr, bool debug) +{ + char cmd[120]; + char *response; + bool err; + + dp_test_fail_unless(fltr, + "A filter identifying the session must be specd"); + + snprintf(cmd, sizeof(cmd), "cgn-op show session %s", fltr); + + /* + * Send command to dataplane + */ + response = dp_test_console_request_w_err(cmd, &err, false); + if (!response || err) { + if (debug) + printf("No response to command\n"); + return NULL; + } + + /***************************************************************** + * Parse response string to get json object + */ + json_object *jresp; + + jresp = parse_json(response, parse_err_str, sizeof(parse_err_str)); + free(response); + if (!jresp) { + if (debug) + printf("Failed to parse response\n"); + return NULL; + } + + if (debug) { + const char *str; + str = json_object_to_json_string_ext(jresp, + JSON_C_TO_STRING_PRETTY); + if (str) + printf("%s\n", str); + } + + /***************************************************************** + * Get the outer 3-tuple sessions json array + */ + struct dp_test_json_find_key key[] = { + { + "sessions", NULL + } + }; + json_object *jarray_outr; + uint arraylen_outr; + + jarray_outr = dp_test_json_find(jresp, key, ARRAY_SIZE(key)); + + /* finished with jresp now */ + json_object_put(jresp); + jresp = NULL; + + if (!jarray_outr) { + if (debug) + printf("Failed to get outer sessions array\n"); + return NULL; + } + arraylen_outr = json_object_array_length(jarray_outr); + + dp_test_fail_unless(arraylen_outr == 1, + "More than one outer session"); + + /***************************************************************** + * Get first session (should only be one) + */ + json_object *joutr; + + /* Get the array element at position 0 */ + joutr = json_object_array_get_idx(jarray_outr, 0); + if (!joutr) { + json_object_put(jarray_outr); + return NULL; + } + + joutr = json_object_get(joutr); + json_object_put(jarray_outr); + + return joutr; +} + +/* + * Get json object for a 2-tuple session + */ +static json_object *_dpt_cgn_inr_sess_json(json_object *joutr, bool debug, + const char *file, const char *func, + int line) +{ + json_object *jarray_inr; + + /* + * Get dest sessions array from joutr + */ + struct dp_test_json_find_key key[] = { + { "destinations", NULL }, + { "sessions", NULL } + }; + + /* Inner dest sessions array may not exist */ + jarray_inr = dp_test_json_find(joutr, key, ARRAY_SIZE(key)); + + if (!jarray_inr) + return NULL; + + uint arraylen_inr = 0; + + arraylen_inr = json_object_array_length(jarray_inr); + + _dp_test_fail_unless(arraylen_inr == 1, file, line, + "Zero or more than one inner session"); + + json_object *jinr; + + /* Get the array element at position 0 */ + jinr = json_object_array_get_idx(jarray_inr, 0); + + if (!jinr) + return NULL; + + jinr = json_object_get(jinr); + + json_object_put(jarray_inr); + return jinr; +} + +#define dpt_cgn_inr_sess_json(_a, _b) \ + _dpt_cgn_inr_sess_json(_a, _b, \ + __FILE__, __func__, __LINE__) + +static int _dpt_cgn_sess_get_timeout(const char *fltr, bool outer, + const char *file, const char *func, + int line) +{ + json_object *joutr, *jinr; + int timeout = 0; + + joutr = dpt_cgn_sess_json(fltr, false); + dp_test_fail_unless(joutr, "Failed to get json object for 3-tuple"); + + if (outer) { + dp_test_json_int_field_from_obj(joutr, "max_to", &timeout); + json_object_put(joutr); + return timeout; + } + + jinr = _dpt_cgn_inr_sess_json(joutr, false, file, func, line); + dp_test_fail_unless(jinr, "Failed to get json object for 2-tuple"); + + dp_test_json_int_field_from_obj(jinr, "max_to", &timeout); + + json_object_put(jinr); + json_object_put(joutr); + + return timeout; +} + +#define dpt_cgn_sess_get_timeout(_a, _b) \ + _dpt_cgn_sess_get_timeout(_a, _b, \ + __FILE__, __func__, __LINE__) + +static void dpt_cgn_show_pool(bool print) { json_object *jresp; char *response; bool err; response = dp_test_console_request_w_err( - "nat-op show pool detail 10", &err, false); + "nat-op show pool", &err, false); if (!response || err) return; @@ -1740,25 +1956,30 @@ static void dpt_cgn_show_pool_detail(void) const char *str; str = json_object_to_json_string_ext(jresp, JSON_C_TO_STRING_PRETTY); - if (str) + if (str && print) printf("%s\n", str); json_object_put(jresp); } /* - * Create a CGNAT mapping + * Create a CGNAT mapping using the vplsh command. + * + * If pub_addr and pub_port are non NULL, and *pub_port > 0, then these + * contain mappings to be requested. + * + * pub_addr and pub_port will contain the allocated mappings. */ static int dpt_cgn_map(bool print, char *real_intf, uint timeout, uint8_t ipproto, char *subs_addr, uint16_t subs_port, - char *pub_addr, uint16_t *pub_port) + char *pub_addr, int *pub_port) { - json_object *jresp; + json_object *jresp, *jmap; const char *str; char *response; char cmd[240]; - bool err; + bool err, rv; int l; l = snprintf(cmd, sizeof(cmd), @@ -1766,10 +1987,15 @@ dpt_cgn_map(bool print, char *real_intf, uint timeout, uint8_t ipproto, "subs-addr %s subs-port %u", real_intf, timeout, ipproto, subs_addr, subs_port); - if (pub_addr && pub_port) + if (pub_addr && strlen(pub_addr) > 0) + l += snprintf(cmd + l, sizeof(cmd) - l, + " pub-addr %s", pub_addr); + + if (pub_port) l += snprintf(cmd + l, sizeof(cmd) - l, - "pub-addr %s pub-port %u", - pub_addr, *pub_port); + " pub-port %u", *pub_port); + + (void) l; response = dp_test_console_request_w_err(cmd, &err, false); @@ -1786,12 +2012,80 @@ dpt_cgn_map(bool print, char *real_intf, uint timeout, uint8_t ipproto, if (str && print) printf("%s\n", str); + struct dp_test_json_find_key key[] = { {"map", NULL} }; + const char *pub_addr_str = NULL; + + jmap = dp_test_json_find(jresp, key, ARRAY_SIZE(key)); json_object_put(jresp); + if (!jmap) + return -3; + + rv = dp_test_json_string_field_from_obj(jmap, "pub_addr", + &pub_addr_str); + if (!rv) { + json_object_put(jmap); + return -4; + } + if (pub_addr) + strcpy(pub_addr, pub_addr_str); + + if (pub_port) { + rv = dp_test_json_int_field_from_obj(jmap, "pub_port", + pub_port); + if (!rv) { + json_object_put(jmap); + return -5; + } + } + + json_object_put(jmap); return 0; } -static void dpt_cgn_list_subscribers(void) +/* + * Create a CGNAT mapping directly + */ +static int +dpt_cgn_map2(struct ifnet *ifp, uint timeout, uint8_t ipproto, + uint32_t subs_addr, uint16_t subs_port, + const uint32_t *pub_addr, const uint16_t *pub_port) +{ + struct cgn_packet cpk; + struct cgn_session *cse; + int error = 0; + + memset(&cpk, 0, sizeof(cpk)); + + cpk.cpk_saddr = subs_addr; + cpk.cpk_sid = htons(subs_port); + cpk.cpk_daddr = 0; + cpk.cpk_did = 0; + cpk.cpk_ipproto = ipproto; + cpk.cpk_ifindex = ifp->if_index; + cpk.cpk_key.k_ifindex = cgn_if_key_index(ifp); + cpk.cpk_l4ports = true; + + cpk.cpk_proto = nat_proto_from_ipproto(ipproto); + cpk.cpk_vrfid = if_vrfid(ifp); + cpk.cpk_keepalive = true; + cpk.cpk_pkt_instd = false; + + cpk.cpk_key.k_expired = false; + + /* Setup direction dependent part of hash key */ + cgn_pkt_key_init(&cpk, CGN_DIR_OUT); + + cse = cgn_session_map(ifp, &cpk, + pub_addr ? *pub_addr : 0, + pub_port ? *pub_port : 0, &error); + if (!cse) + return -1; + + return error; +} + +static void dpt_cgn_list_subscribers(bool print) { json_object *jresp; char *response; @@ -1811,13 +2105,13 @@ static void dpt_cgn_list_subscribers(void) const char *str; str = json_object_to_json_string_ext(jresp, JSON_C_TO_STRING_PRETTY); - if (str) + if (str && print) printf("%s\n", str); json_object_put(jresp); } -static void dpt_cgn_list_public(void) +static void dpt_cgn_list_public(bool print) { json_object *jresp; char *response; @@ -1837,7 +2131,7 @@ static void dpt_cgn_list_public(void) const char *str; str = json_object_to_json_string_ext(jresp, JSON_C_TO_STRING_PRETTY); - if (str) + if (str && print) printf("%s\n", str); json_object_put(jresp); @@ -1852,7 +2146,7 @@ static void dpt_cgn_list_public(void) * +---+ */ DP_DECL_TEST_CASE(npf_cgnat, cgnat17, cgnat_setup, cgnat_teardown); -DP_START_TEST(cgnat17, test) +DP_START_TEST_FULL_RUN(cgnat17, test) { dpt_cgn_cmd_fmt(false, true, "nat-ut pool add POOL1 " @@ -1861,16 +2155,13 @@ DP_START_TEST(cgnat17, test) "prefix=RANGE2/1.1.1.192/26 " ""); - dp_test_npf_cmd_fmt(false, - "npf-ut fw table create %s", "AG1"); - dp_test_npf_cmd_fmt(false, - "npf-ut fw table add %s 100.64.1.0/24", "AG1"); + dpt_addr_grp_create("LOG_AG1", "100.64.1.0/24"); cgnat_policy_add("POLICY1", 10, "100.64.0.0/24", "POOL1", "dp2T1", CGN_MAP_EIM, CGN_FLTR_EIF, CGN_3TUPLE, true); cgnat_policy_add2("POLICY2", 20, "100.64.1.0/24", "POOL1", - "dp2T1", CGN_MAP_EIM, CGN_FLTR_EIF, "AG1", true); + "dp2T1", "log-sess-group=LOG_AG1"); cgnat_udp("dp1T0", "aa:bb:cc:dd:1:a1", 0, "100.64.0.1", 49152, "1.1.1.1", 80, @@ -1936,22 +2227,29 @@ DP_START_TEST(cgnat17, test) "aa:bb:cc:dd:2:b1", 0, "dp2T1", DP_TEST_FWD_FORWARDED); - if (0) { - dpt_cgn_show_policy_detail(); - dpt_cgn_show_pool_detail(); - dpt_cgn_show_public(true); - dpt_cgn_show_source_detail(); - dpt_cgn_list_subscribers(); - dpt_cgn_list_public(); - } + dp_test_npf_cmd_fmt(false, "cgn-op update session"); + dp_test_npf_cmd_fmt(false, "cgn-op update subscriber"); + + bool print = false; + + dpt_cgn_show_policy(print); + dpt_cgn_show_pool(print); + dpt_cgn_show_public(print, true); + dpt_cgn_show_source(print); + dpt_cgn_list_subscribers(print); + dpt_cgn_list_public(print); + dpt_cgn_show_summary(print); + dpt_cgn_show_error(print); + + dp_test_npf_cmd_fmt(false, "cgn-op clear session"); + dp_test_npf_cmd_fmt(false, "cgn-op clear subscriber"); + dp_test_npf_cmd_fmt(false, "cgn-op clear policy"); + dp_test_npf_cmd_fmt(false, "cgn-op clear errors"); cgnat_policy_del("POLICY1", 10, "dp2T1"); cgnat_policy_del("POLICY2", 20, "dp2T1"); - dp_test_npf_cmd_fmt(false, - "npf-ut fw table remove %s 100.64.1.0/24", "AG1"); - dp_test_npf_cmd_fmt(false, - "npf-ut fw table delete %s", "AG1"); + dpt_addr_grp_destroy("LOG_AG1", "100.64.1.0/24"); dp_test_npf_cmd_fmt(false, "nat-ut pool delete POOL1"); @@ -1959,7 +2257,7 @@ DP_START_TEST(cgnat17, test) /* - * npf_cgnat_18 - Tests blacklist + * npf_cgnat_18 - Tests blocklist * * Private Public * dp1T0 +---+ dp2T1 @@ -1969,16 +2267,14 @@ DP_START_TEST(cgnat17, test) DP_DECL_TEST_CASE(npf_cgnat, cgnat18, cgnat_setup, cgnat_teardown); DP_START_TEST(cgnat18, test) { + dpt_addr_grp_create("BLOCKLIST1", "1.1.1.11/32"); dp_test_npf_cmd_fmt(false, - "npf-ut fw table create %s", "BLACKLIST1"); - dp_test_npf_cmd_fmt(false, - "npf-ut fw table add %s 1.1.1.11/32", "BLACKLIST1"); + "npf-ut fw table add BLOCKLIST1 1.1.1.13/32"); dpt_cgn_cmd_fmt(false, true, "nat-ut pool add POOL1 " "type=cgnat " - "address-range=RANGE1/1.1.1.11-1.1.1.20 " - "prefix=RANGE2/1.1.1.192/26 " + "address-range=RANGE1/1.1.1.11-1.1.1.14 " ""); cgnat_policy_add("POLICY1", 10, "100.64.0.0/12", "POOL1", @@ -1994,12 +2290,15 @@ DP_START_TEST(cgnat18, test) DP_TEST_FWD_FORWARDED); /* - * Add blacklist to pool, and run GC to expire sessions using - * blacklisted addresses + * Add blocked address to pool, and run GC to expire sessions using + * blocked addresses. + * + * Blocklist addresses: 1.1.1.11, 1.1.1.13 + * Useable addresses: 1.1.1.12, 1.1.1.14 */ dpt_cgn_cmd_fmt(false, true, "nat-ut pool update POOL1 " - "blacklist=BLACKLIST1"); + "blacklist=BLOCKLIST1"); /* * We need to explicitly clear existing sessions @@ -2007,21 +2306,45 @@ DP_START_TEST(cgnat18, test) dp_test_npf_cmd_fmt(false, "cgn-op clear session pub-addr 1.1.1.11"); + /* + * Repeat earlier packet. Public address 1.1.1.12 is now used. + */ cgnat_udp("dp1T0", "aa:bb:cc:dd:1:a1", 0, "100.64.0.1", 49152, "1.1.1.1", 80, "1.1.1.12", 1024, "1.1.1.1", 80, "aa:bb:cc:dd:2:b1", 0, "dp2T1", DP_TEST_FWD_FORWARDED); + /* + * 100.64.0.2:1234 / 1.1.1.14:1024 --> dst 1.1.1.1:80 + */ + cgnat_udp("dp1T0", "aa:bb:cc:dd:1:a1", 0, + "100.64.0.2", 1234, "1.1.1.1", 80, + "1.1.1.14", 1024, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", 0, "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* + * There are now no unused public addresses. Another new subscriber + * should use an unused port-block on an already used public address. + * In this case it will use the second port-block on 1.1.1.12. New + * src port is 1536 (1024+512). + * + * 100.64.0.3:2345 / 1.1.1.12:1536 --> dst 1.1.1.1:80 + */ + cgnat_udp("dp1T0", "aa:bb:cc:dd:1:a1", 0, + "100.64.0.3", 2345, "1.1.1.1", 80, + "1.1.1.12", 1536, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", 0, "dp2T1", + DP_TEST_FWD_FORWARDED); + cgnat_policy_del("POLICY1", 10, "dp2T1"); dp_test_npf_cmd_fmt(false, "nat-ut pool delete POOL1"); dp_test_npf_cmd_fmt(false, - "npf-ut fw table remove %s 1.1.1.11/32", - "BLACKLIST1"); - dp_test_npf_cmd_fmt(false, - "npf-ut fw table delete %s", "BLACKLIST1"); + "npf-ut fw table remove BLOCKLIST1 1.1.1.13/32"); + dpt_addr_grp_destroy("BLOCKLIST1", "1.1.1.11/32"); } DP_END_TEST; @@ -2036,7 +2359,7 @@ DP_START_TEST(cgnat18, test) * +---+ */ DP_DECL_TEST_CASE(npf_cgnat, cgnat20, cgnat_setup, cgnat_teardown); -DP_START_TEST(cgnat20, test) +DP_START_TEST_FULL_RUN(cgnat20, test) { dpt_cgn_cmd_fmt(false, true, "nat-ut pool add POOL1 " @@ -2110,7 +2433,7 @@ DP_START_TEST(cgnat20, test) * +---+ */ DP_DECL_TEST_CASE(npf_cgnat, cgnat21, cgnat_setup, cgnat_teardown); -DP_START_TEST(cgnat21, test) +DP_START_TEST_FULL_RUN(cgnat21, test) { uint block_size = 128; uint mbpu = 2; @@ -2210,6 +2533,8 @@ DP_START_TEST(cgnat21, test) * Block size is 128 and max-blocks-per-user is 4, so APP is the limiting * factor. * + * The public address we are paired with runs out of ports before the + * max-blocks-per-subscriber limit is reached. * * Private Public * dp1T0 +---+ dp2T1 @@ -2217,7 +2542,7 @@ DP_START_TEST(cgnat21, test) * +---+ */ DP_DECL_TEST_CASE(npf_cgnat, cgnat22, cgnat_setup, cgnat_teardown); -DP_START_TEST(cgnat22, test) +DP_START_TEST_FULL_RUN(cgnat22, test) { uint16_t port_start = 4096, port_end = 4351; uint16_t nports = port_end - port_start + 1; @@ -2301,7 +2626,7 @@ DP_START_TEST(cgnat22, test) * +---+ */ DP_DECL_TEST_CASE(npf_cgnat, cgnat23, cgnat_setup, cgnat_teardown); -DP_START_TEST(cgnat23, test) +DP_START_TEST_FULL_RUN(cgnat23, test) { uint16_t port_start = 4096, port_end = 4351; uint16_t nports = port_end - port_start + 1; @@ -2382,7 +2707,7 @@ DP_START_TEST(cgnat23, test) * +---+ */ DP_DECL_TEST_CASE(npf_cgnat, cgnat24, cgnat_setup, cgnat_teardown); -DP_START_TEST(cgnat24, test) +DP_START_TEST_FULL_RUN(cgnat24, test) { /* * pool add POOL1 @@ -2449,8 +2774,9 @@ DP_START_TEST(cgnat24, test) /* - * npf_cgnat_25 - 'n' UDP forwards pkts, same src addr, diff dest addrs + * npf_cgnat_25 - Tests nested 2-tuple sessions. * + * 'n' UDP forwards pkts, same src addr, diff dest addrs * * Private Public * dp1T0 +---+ dp2T1 @@ -2458,11 +2784,11 @@ DP_START_TEST(cgnat24, test) * +---+ */ DP_DECL_TEST_CASE(npf_cgnat, cgnat25, cgnat_setup, cgnat_teardown); -DP_START_TEST(cgnat25, test) +DP_START_TEST_FULL_RUN(cgnat25, test) { uint block_size = 4096; uint mbpu = 16; - uint i, count = 1000; + uint i, count = 65; dpt_cgn_cmd_fmt(false, true, "nat-ut pool add POOL1 " @@ -2487,6 +2813,16 @@ DP_START_TEST(cgnat25, test) for (i = 0; i < count; i++) { + int status = DP_TEST_FWD_FORWARDED; + + /* + * 64 dests are allowed per 3-tuple session. Once that is + * reached, further flows will be dropped, and an ICMP error + * returned to the sender. + */ + if (i == count - 1) + status = DP_TEST_FWD_DROPPED; + /* Alternate dest addr */ if ((i & 1) == 0) { daddr = "1.1.1.1"; @@ -2495,11 +2831,15 @@ DP_START_TEST(cgnat25, test) daddr = "1.1.1.2"; dmac = "aa:bb:cc:dd:2:b2"; } - cgnat_udp("dp1T0", "aa:bb:cc:dd:1:a1", 0, + + bool icmp_err = (status == DP_TEST_FWD_DROPPED); + + _cgnat_udp("dp1T0", "aa:bb:cc:dd:1:a1", 0, "100.64.0.1", 1000, daddr, dport, "1.1.1.11", 4096, daddr, dport, dmac, 0, "dp2T1", - DP_TEST_FWD_FORWARDED); + status, icmp_err, + __FILE__, __func__, __LINE__); dport++; } @@ -2524,7 +2864,7 @@ DP_START_TEST(cgnat25, test) * +---+ */ DP_DECL_TEST_CASE(npf_cgnat, cgnat26, cgnat_setup, cgnat_teardown); -DP_START_TEST(cgnat26, test) +DP_START_TEST_FULL_RUN(cgnat26, test) { uint block_size = 128; uint mbpu = 2; @@ -2620,7 +2960,7 @@ DP_START_TEST(cgnat26, test) * +---+ */ DP_DECL_TEST_CASE(npf_cgnat, cgnat27, cgnat_setup, cgnat_teardown); -DP_START_TEST(cgnat27, test) +DP_START_TEST_FULL_RUN(cgnat27, test) { /* * pool add POOL1 @@ -2727,7 +3067,7 @@ DP_START_TEST(cgnat27, test) * npf_cgnat_30 - CGNAT commands */ DP_DECL_TEST_CASE(npf_cgnat, cgnat30, NULL, NULL); -DP_START_TEST(cgnat30, test) +DP_START_TEST_FULL_RUN(cgnat30, test) { /* * Address pool with all options @@ -2902,7 +3242,7 @@ DP_START_TEST(cgnat30, test) * npf_cgnat_31 - CGNAT commands */ DP_DECL_TEST_CASE(npf_cgnat, cgnat31, NULL, NULL); -DP_START_TEST(cgnat31, test) +DP_START_TEST_FULL_RUN(cgnat31, test) { char real_ifname[IFNAMSIZ]; @@ -2914,17 +3254,21 @@ DP_START_TEST(cgnat31, test) dpt_cgn_cmd_fmt(false, true, "nat-ut pool add POOL1 type=cgnat " "address-range=RANGE1/1.1.1.11-1.1.1.20"); + dpt_addr_grp_create("MATCH1", "100.64.1.0/24"); dpt_cgn_cmd_fmt(false, true, "cgn-ut policy add POLICY1 " - "src-addr=100.64.1.0/12 pool=POOL1 priority=30"); + "match-ag=MATCH1 pool=POOL1 priority=30"); + dpt_addr_grp_create("MATCH2", "100.64.2.0/24"); dpt_cgn_cmd_fmt(false, true, "cgn-ut policy add POLICY2 " - "src-addr=100.64.2.0/12 pool=POOL1 priority=10"); + "match-ag=MATCH2 pool=POOL1 priority=10"); + dpt_addr_grp_create("MATCH3", "100.64.3.0/24"); dpt_cgn_cmd_fmt(false, true, "cgn-ut policy add POLICY3 " - "src-addr=100.64.3.0/12 pool=POOL1 priority=20"); + "match-ag=MATCH3 pool=POOL1 priority=20"); + dpt_addr_grp_create("MATCH4", "100.64.4.0/24"); dpt_cgn_cmd_fmt(false, true, "cgn-ut policy add POLICY4 " - "src-addr=100.64.4.0/12 pool=POOL1 priority=40"); + "match-ag=MATCH4 pool=POOL1 priority=40"); /* First policy added to interface */ dpt_cgn_cmd_fmt(false, true, "cgn-ut policy attach intf=%s " @@ -2955,13 +3299,21 @@ DP_START_TEST(cgnat31, test) dpt_cgn_cmd_fmt(false, true, "cgn-ut max-sessions 1000000"); dpt_cgn_cmd_fmt(false, true, "cgn-ut max-dest-per-session 16"); dpt_cgn_cmd_fmt(false, true, "cgn-ut session-timeouts " - "tcp-opening=55 udp-estab=600"); + "tcp-opening 55 udp-estab 600"); + dpt_cgn_cmd_fmt(false, true, "cgn-ut session-timeouts " + "tcp-opening %u udp-estab %u", + CGN_DEF_ETIME_TCP_OPENING, + CGN_DEF_ETIME_TCP_ESTBD); dpt_cgn_cmd_fmt(false, true, "cgn-ut policy delete POLICY1"); dpt_cgn_cmd_fmt(false, true, "cgn-ut policy delete POLICY2"); dpt_cgn_cmd_fmt(false, true, "cgn-ut policy delete POLICY3"); dpt_cgn_cmd_fmt(false, true, "cgn-ut policy delete POLICY4"); dpt_cgn_cmd_fmt(false, true, "nat-ut pool delete POOL1"); + dpt_addr_grp_destroy("MATCH1", "100.64.1.0/24"); + dpt_addr_grp_destroy("MATCH2", "100.64.2.0/24"); + dpt_addr_grp_destroy("MATCH3", "100.64.3.0/24"); + dpt_addr_grp_destroy("MATCH4", "100.64.4.0/24"); } DP_END_TEST; @@ -2972,7 +3324,7 @@ DP_START_TEST(cgnat31, test) * Tests CGNAT and SNAT on same interface */ DP_DECL_TEST_CASE(npf_cgnat, cgnat32, cgnat_setup, cgnat_teardown); -DP_START_TEST(cgnat32, test) +DP_START_TEST_FULL_RUN(cgnat32, test) { /* * Add CGNAT config @@ -3016,6 +3368,7 @@ DP_START_TEST(cgnat32, test) .proto = IPPROTO_UDP, .map = "dynamic", .from_addr = "100.64.0.2", + .port_alloc = NULL, .from_port = NULL, .to_addr = NULL, .to_port = NULL, @@ -3082,7 +3435,7 @@ DP_START_TEST(cgnat32, test) * Tests CGNAT and Stateful Firewall on same interface */ DP_DECL_TEST_CASE(npf_cgnat, cgnat32b, cgnat_setup, cgnat_teardown); -DP_START_TEST(cgnat32b, test) +DP_START_TEST_FULL_RUN(cgnat32b, test) { /* * Add CGNAT config @@ -3124,13 +3477,13 @@ DP_START_TEST(cgnat32b, test) .rule = "10", .pass = PASS, .stateful = true, - .npf = "proto=6 dst-port=179" + .npf = "proto-final=6 dst-port=179" }, { .rule = "10", .pass = PASS, .stateful = true, - .npf = "proto=6 src-port=179" + .npf = "proto-final=6 src-port=179" }, NULL_RULE }; @@ -3206,7 +3559,7 @@ DP_START_TEST(cgnat32b, test) * destination address is *not* covered by the CGNAT address pool. */ DP_DECL_TEST_CASE(npf_cgnat, cgnat32c, cgnat_setup, cgnat_teardown); -DP_START_TEST(cgnat32c, test) +DP_START_TEST_FULL_RUN(cgnat32c, test) { /* * Outside to Inside packet before cfg CGNAT is configured, where dest @@ -3285,7 +3638,7 @@ DP_START_TEST(cgnat32c, test) * Tests ICMP error messages with embedded UDP packets (incl cksum 0) */ DP_DECL_TEST_CASE(npf_cgnat, cgnat33, cgnat_setup, cgnat_teardown); -DP_START_TEST(cgnat33, test) +DP_START_TEST_FULL_RUN(cgnat33, test) { struct dp_test_expected *test_exp; struct rte_mbuf *test_pak, *exp_pak; @@ -3307,7 +3660,7 @@ DP_START_TEST(cgnat33, test) struct dp_test_pkt_desc_t int_to_ext_pre = { .text = "IPv4 Inside to Outside UDP pre", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "100.64.0.1", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "1.1.1.1", @@ -3329,7 +3682,7 @@ DP_START_TEST(cgnat33, test) struct dp_test_pkt_desc_t int_to_ext_post = { .text = "IPv4 Outside to Inside UDP post", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.11", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "1.1.1.1", @@ -3360,7 +3713,7 @@ DP_START_TEST(cgnat33, test) struct dp_test_pkt_desc_t ext_to_int_pre = { .text = "IPv4 Outside to Inside UDP pre", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.1", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "1.1.1.11", @@ -3382,7 +3735,7 @@ DP_START_TEST(cgnat33, test) struct dp_test_pkt_desc_t ext_to_int_post = { .text = "IPv4 Outside to Inside UDP post", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.1", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "100.64.0.1", @@ -3442,7 +3795,7 @@ DP_START_TEST(cgnat33, test) (void)dp_test_pktmbuf_eth_init(icmp_pak, dp_test_intf_name2mac_str("dp1T0"), "aa:bb:cc:dd:1:a1", - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create expect */ payload_pak = dp_test_v4_pkt_from_desc(&ext_to_int_pre); @@ -3469,7 +3822,7 @@ DP_START_TEST(cgnat33, test) dp_test_ipv4_decrement_ttl(exp_pak); (void)dp_test_pktmbuf_eth_init(exp_pak, "aa:bb:cc:dd:2:b1", dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Send ICMP error message */ dp_test_pak_receive(icmp_pak, "dp1T0", test_exp); @@ -3504,7 +3857,7 @@ DP_START_TEST(cgnat33, test) (void)dp_test_pktmbuf_eth_init(icmp_pak, dp_test_intf_name2mac_str("dp2T1"), "aa:bb:cc:dd:2:b1", - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create expect */ payload_pak = dp_test_v4_pkt_from_desc(&int_to_ext_pre); @@ -3531,7 +3884,7 @@ DP_START_TEST(cgnat33, test) dp_test_ipv4_decrement_ttl(exp_pak); (void)dp_test_pktmbuf_eth_init(exp_pak, "aa:bb:cc:dd:1:a1", dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Send ICMP error message */ dp_test_pak_receive(icmp_pak, "dp2T1", test_exp); @@ -3547,7 +3900,7 @@ DP_START_TEST(cgnat33, test) /* Create packet to be embedded in ICMP error message */ payload_pak = dp_test_v4_pkt_from_desc(&ext_to_int_post); - udp = pktmbuf_mtol4(payload_pak, struct udphdr *); + udp = dp_pktmbuf_mtol4(payload_pak, struct udphdr *); udp->check = 0; /* Create ICMP error message */ @@ -3569,11 +3922,11 @@ DP_START_TEST(cgnat33, test) (void)dp_test_pktmbuf_eth_init(icmp_pak, dp_test_intf_name2mac_str("dp1T0"), "aa:bb:cc:dd:1:a1", - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create expect */ payload_pak = dp_test_v4_pkt_from_desc(&ext_to_int_pre); - udp = pktmbuf_mtol4(payload_pak, struct udphdr *); + udp = dp_pktmbuf_mtol4(payload_pak, struct udphdr *); udp->check = 0; icmplen = sizeof(struct iphdr) + sizeof(struct udphdr) + @@ -3598,7 +3951,7 @@ DP_START_TEST(cgnat33, test) dp_test_ipv4_decrement_ttl(exp_pak); (void)dp_test_pktmbuf_eth_init(exp_pak, "aa:bb:cc:dd:2:b1", dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Send ICMP error message */ dp_test_pak_receive(icmp_pak, "dp1T0", test_exp); @@ -3621,7 +3974,7 @@ DP_START_TEST(cgnat33, test) * Tests ICMP error messages with embedded TCP packets (including truncated) */ DP_DECL_TEST_CASE(npf_cgnat, cgnat34, cgnat_setup, cgnat_teardown); -DP_START_TEST(cgnat34, test) +DP_START_TEST_FULL_RUN(cgnat34, test) { struct dp_test_expected *test_exp; struct rte_mbuf *test_pak, *exp_pak; @@ -3643,7 +3996,7 @@ DP_START_TEST(cgnat34, test) struct dp_test_pkt_desc_t int_to_ext_pre = { .text = "IPv4 Inside to Outside UDP pre", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "100.64.0.1", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "1.1.1.1", @@ -3670,7 +4023,7 @@ DP_START_TEST(cgnat34, test) struct dp_test_pkt_desc_t int_to_ext_post = { .text = "IPv4 Outside to Inside TCP post", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.11", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "1.1.1.1", @@ -3707,7 +4060,7 @@ DP_START_TEST(cgnat34, test) struct dp_test_pkt_desc_t ext_to_int_pre = { .text = "IPv4 Outside to Inside TCP pre", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.1", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "1.1.1.11", @@ -3734,7 +4087,7 @@ DP_START_TEST(cgnat34, test) struct dp_test_pkt_desc_t ext_to_int_post = { .text = "IPv4 Outside to Inside TCP post", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.1", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "100.64.0.1", @@ -3798,7 +4151,7 @@ DP_START_TEST(cgnat34, test) (void)dp_test_pktmbuf_eth_init(icmp_pak, dp_test_intf_name2mac_str("dp1T0"), "aa:bb:cc:dd:1:a1", - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create expect */ payload_pak = dp_test_v4_pkt_from_desc(&ext_to_int_pre); @@ -3825,7 +4178,7 @@ DP_START_TEST(cgnat34, test) dp_test_ipv4_decrement_ttl(exp_pak); (void)dp_test_pktmbuf_eth_init(exp_pak, "aa:bb:cc:dd:2:b1", dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Send ICMP error message */ dp_test_pak_receive(icmp_pak, "dp1T0", test_exp); @@ -3860,7 +4213,7 @@ DP_START_TEST(cgnat34, test) (void)dp_test_pktmbuf_eth_init(icmp_pak, dp_test_intf_name2mac_str("dp2T1"), "aa:bb:cc:dd:2:b1", - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create expect */ payload_pak = dp_test_v4_pkt_from_desc(&int_to_ext_pre); @@ -3887,7 +4240,7 @@ DP_START_TEST(cgnat34, test) dp_test_ipv4_decrement_ttl(exp_pak); (void)dp_test_pktmbuf_eth_init(exp_pak, "aa:bb:cc:dd:1:a1", dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Send ICMP error message */ dp_test_pak_receive(icmp_pak, "dp2T1", test_exp); @@ -3920,7 +4273,7 @@ DP_START_TEST(cgnat34, test) (void)dp_test_pktmbuf_eth_init(icmp_pak, dp_test_intf_name2mac_str("dp1T0"), "aa:bb:cc:dd:1:a1", - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create expect */ payload_pak = dp_test_v4_pkt_from_desc(&ext_to_int_pre); @@ -3945,7 +4298,7 @@ DP_START_TEST(cgnat34, test) dp_test_ipv4_decrement_ttl(exp_pak); (void)dp_test_pktmbuf_eth_init(exp_pak, "aa:bb:cc:dd:2:b1", dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Send ICMP error message */ dp_test_pak_receive(icmp_pak, "dp1T0", test_exp); @@ -3981,7 +4334,7 @@ DP_START_TEST(cgnat34, test) * */ DP_DECL_TEST_CASE(npf_cgnat, cgnat35, NULL, NULL); -DP_START_TEST(cgnat35, test) +DP_START_TEST_FULL_RUN(cgnat35, test) { struct dp_test_expected *exp; struct rte_mbuf *icmp_pak; @@ -4023,7 +4376,7 @@ DP_START_TEST(cgnat35, test) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp3T3"), - neigh3_mac_str, ETHER_TYPE_IPv4); + neigh3_mac_str, RTE_ETHER_TYPE_IPV4); /* * Expected packet @@ -4040,7 +4393,7 @@ DP_START_TEST(cgnat35, test) (void)dp_test_pktmbuf_eth_init(icmp_pak, neigh3_mac_str, dp_test_intf_name2mac_str("dp3T3"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_set_pak_ip_field(ip, DP_TEST_SET_TOS, IPTOS_PREC_INTERNETCONTROL); @@ -4101,7 +4454,7 @@ DP_START_TEST(cgnat35, test) * >1400 ---------> */ DP_DECL_TEST_CASE(npf_cgnat, cgnat36, NULL, NULL); -DP_START_TEST(cgnat36, test) +DP_START_TEST_FULL_RUN(cgnat36, test) { struct dp_test_expected *exp; struct rte_mbuf *icmp_pak; @@ -4152,7 +4505,7 @@ DP_START_TEST(cgnat36, test) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp3T3"), - neigh3_mac_str, ETHER_TYPE_IPv4); + neigh3_mac_str, RTE_ETHER_TYPE_IPV4); /* * Expected packet @@ -4169,7 +4522,7 @@ DP_START_TEST(cgnat36, test) (void)dp_test_pktmbuf_eth_init(icmp_pak, neigh3_mac_str, dp_test_intf_name2mac_str("dp3T3"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_set_pak_ip_field(ip, DP_TEST_SET_TOS, IPTOS_PREC_INTERNETCONTROL); @@ -4215,7 +4568,7 @@ DP_START_TEST(cgnat36, test) * not a cgnat session is *not* filtered by CGNAT. */ DP_DECL_TEST_CASE(npf_cgnat, cgnat37, cgnat_setup, cgnat_teardown); -DP_START_TEST(cgnat37, test) +DP_START_TEST_FULL_RUN(cgnat37, test) { dpt_cgn_cmd_fmt(false, true, "nat-ut pool add POOL1 " @@ -4253,6 +4606,7 @@ DP_START_TEST(cgnat37, test) .ifname = "dp2T1", .proto = IPPROTO_UDP, .map = "dynamic", + .port_alloc = NULL, .from_addr = "100.64.0.2", .from_port = NULL, .to_addr = NULL, @@ -4329,7 +4683,7 @@ struct cgnat_test_vals { }; DP_DECL_TEST_CASE(npf_cgnat, cgnat38, cgnat_setup, cgnat_teardown); -DP_START_TEST(cgnat38, test) +DP_START_TEST_FULL_RUN(cgnat38, test) { dpt_cgn_cmd_fmt(false, true, "nat-ut pool add POOL1 " @@ -4395,7 +4749,7 @@ DP_START_TEST(cgnat38, test) * +---+ */ DP_DECL_TEST_CASE(npf_cgnat, cgnat39, cgnat_setup, cgnat_teardown); -DP_START_TEST(cgnat39, test) +DP_START_TEST_FULL_RUN(cgnat39, test) { /* * pool add POOL1 @@ -4443,7 +4797,7 @@ DP_START_TEST(cgnat39, test) struct dp_test_pkt_desc_t pre_pkt_UDP = { .text = "IPv4 UDP", .len = len, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = pre_saddr, .l2_src = pre_smac, .l3_dst = pre_daddr, @@ -4463,7 +4817,7 @@ DP_START_TEST(cgnat39, test) struct dp_test_pkt_desc_t post_pkt_UDP = { .text = "IPv4 UDP", .len = len, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = post_saddr, .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = post_daddr, @@ -4552,7 +4906,7 @@ DP_START_TEST(cgnat39, test) * second mbuf. */ DP_DECL_TEST_CASE(npf_cgnat, cgnat40, cgnat_setup, cgnat_teardown); -DP_START_TEST(cgnat40, test) +DP_START_TEST_FULL_RUN(cgnat40, test) { struct rte_mbuf *test_pak, *exp_pak; @@ -4578,7 +4932,7 @@ DP_START_TEST(cgnat40, test) dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), "aa:bb:cc:dd:1:a1", - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp_pak = dp_test_create_tcp_ipv4_pak( "1.1.1.13", "1.1.1.1", 1024, 80, TH_SYN, @@ -4587,7 +4941,7 @@ DP_START_TEST(cgnat40, test) dp_test_pktmbuf_eth_init(exp_pak, dp_test_intf_name2mac_str("dp1T0"), "aa:bb:cc:dd:1:a1", - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(exp_pak); rte_pktmbuf_free(exp_pak); @@ -4597,7 +4951,7 @@ DP_START_TEST(cgnat40, test) (void)dp_test_pktmbuf_eth_init( dp_test_exp_get_pak(exp), "aa:bb:cc:dd:2:b1", dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); @@ -4717,9 +5071,11 @@ DP_START_TEST(cgnat41, test) goto repeat; /* - * Fetch the sessions in batches of 4 at a time + * Fetch the sessions */ - count = dpt_cgn_show_session(NULL, 4, false, false); + count = dpt_cgn_show_session(NULL, + nsessions_per_repeat * repeat_count, + false, false, false); dp_test_fail_unless((uint)count == nsessions_per_repeat * repeat_count, "%u sessions in show output, %u expected", @@ -4728,7 +5084,7 @@ DP_START_TEST(cgnat41, test) /* * Fetch the sessions in per-subscriber batches */ - count = dpt_cgn_show_session(NULL, 1000, true, false); + count = dpt_cgn_show_session(NULL, 1000, true, false, false); dp_test_fail_unless((uint)count == nsessions_per_repeat * repeat_count, "%u sessions in show output, %u expected", @@ -4842,7 +5198,7 @@ DP_START_TEST_DONT_RUN(cgnat42, test) * Fetch the sessions in batches of 1000 at a time */ ms1 = time_ms(); - count = dpt_cgn_show_session(NULL, 1000, false, false); + count = dpt_cgn_show_session(NULL, 1000, false, false, false); ms2 = time_ms(); dp_test_fail_unless((uint)count == nsessions_per_repeat * repeat_count, @@ -4860,170 +5216,2035 @@ DP_START_TEST_DONT_RUN(cgnat42, test) } DP_END_TEST; - -/********************************************************************** - * Support Functions - *********************************************************************/ +static inline uint64_t cgn_time_nsecs(void) +{ + struct timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + return (ts.tv_sec * 1000000000) + ts.tv_nsec; +} /* - * Issue command to dataplane + * cgnat43 -- More cgnat scale tests */ -static void -_dpt_cgn_cmd(const char *cmd, bool print, bool exp, - const char *file, int line) +DP_DECL_TEST_CASE(npf_cgnat, cgnat43, cgnat_setup, cgnat_teardown); +DP_START_TEST_DONT_RUN(cgnat43, test) { - char *reply; - bool err; - - reply = dp_test_console_request_w_err(cmd, &err, print); + char real_ifname[IFNAMSIZ]; /* - * Returned string for npf commands is just an empty string, which is - * of no interest + * Setup */ - free(reply); - - _dp_test_fail_unless(err != exp, file, line, - "Expd %u, got %u: \"%s\"", exp, !err, cmd); -} - -#define CGN_MAX_CMD_LEN 5000 - -void -_dpt_cgn_cmd_fmt(bool print, bool exp, - const char *file, int line, const char *fmt_str, ...) -{ - char cmd[CGN_MAX_CMD_LEN]; - va_list ap; + dp_test_intf_real("dp2T1", real_ifname); + struct ifnet *ifp = dp_ifnet_byifname(real_ifname); - va_start(ap, fmt_str); - vsnprintf(cmd, CGN_MAX_CMD_LEN, fmt_str, ap); - _dpt_cgn_cmd(cmd, print, exp, file, line); - va_end(ap); -} + dpt_cgn_cmd_fmt(false, true, + "nat-ut pool add POOL1 " + "type=cgnat " + "prefix=RANGE1/1.1.1.192/26 " + "block-size=4096 " + "max-blocks=32 " + "addr-pooling=arbitrary " + "log-pba=no"); -/* - * This is called *after* the packet has been modified, but *before* the pkt - * queued on the tx ring is checked. - */ -static void -cgn_validate_cb(struct rte_mbuf *mbuf, struct ifnet *ifp, - struct dp_test_expected *expected, - enum dp_test_fwd_result_e fwd_result) -{ - struct cgn_ctx *ctx = dp_test_exp_get_validate_ctx(expected); + cgnat_policy_add("POLICY1", 10, "100.64.0.0/24", "POOL1", + "dp2T1", CGN_MAP_EIM, CGN_FLTR_EIF, CGN_3TUPLE, true); - /* call the saved check routine */ - if (ctx->do_check) { - (ctx->saved_cb)(mbuf, ifp, expected, fwd_result); - } else { - expected->pak_correct[0] = true; - expected->pak_checked[0] = true; - } -} + cgnat_policy_add("POLICY2", 20, "100.64.1.0/24", "POOL1", + "dp2T1", CGN_MAP_EIM, CGN_FLTR_EIF, CGN_5TUPLE, true); -/* - * policy add POLICY1 - * pri=10 - * src-addr=100.64.0.0/12 - * pool=POOL1 - * map-type=eim - * fltr-type=eif - * trans-type=napt44-dyn - * log-sess=yes - */ -void -_cgnat_policy_add(const char *policy, uint pri, const char *src, - const char *pool, const char *intf, - enum cgn_map_type eim, enum cgn_fltr_type eif, - bool log_sess, bool check_feat, - const char *file, const char *func, int line) -{ - char real_ifname[IFNAMSIZ]; - dp_test_intf_real(intf, real_ifname); - - /* Add cgnat policy */ - _dp_test_npf_cmd_fmt(false, file, line, - "cgn-ut policy add %s priority=%u " - "src-addr=%s pool=%s log-sess-all=%s", - policy, pri, src, pool, - log_sess ? "yes" : "no"); - - _dp_test_npf_cmd_fmt(false, file, line, - "cgn-ut policy attach name=%s intf=%s", - policy, real_ifname); - - /* Check cgnat feature is enabled */ - if (check_feat) { - dp_test_wait_for_pl_feat(intf, "vyatta:ipv4-cgnat-in", - "ipv4-validate"); - dp_test_wait_for_pl_feat(intf, "vyatta:ipv4-cgnat-out", - "ipv4-out"); - } -} + uint i, j, outer_count, inner_count; + uint64_t nsecs1, nsecs2, elapsed; /* nanosecs */ + uint64_t average; /* nanosecs */ + uint64_t overhead; -static void -_cgnat_policy_add2(const char *policy, uint pri, const char *src, - const char *pool, const char *intf, - enum cgn_map_type eim, enum cgn_fltr_type eif, - const char *log_group, bool check_feat, - const char *file, const char *func, int line) -{ - char real_ifname[IFNAMSIZ]; + static char subs_str[20]; + uint32_t subs_addr; + uint16_t subs_port; + int rc, error; + bool rv; + struct timespec ts; - dp_test_intf_real(intf, real_ifname); + /******************************************************************* + * Get execution time of gettimeofday() + */ - /* Add cgnat policy */ - _dp_test_npf_cmd_fmt(false, file, line, - "cgn-ut policy add %s priority=%u " - "src-addr=%s pool=%s log-sess-group=%s", - policy, pri, src, pool, log_group); + outer_count = 1; + inner_count = 50000; + overhead = 0; - _dp_test_npf_cmd_fmt(false, file, line, - "cgn-ut policy attach name=%s intf=%s", - policy, real_ifname); + printf("\n"); + printf("Test 1: (%u x %u) gettimeofday()\n", + outer_count, inner_count); - /* Check cgnat feature is enabled */ - if (check_feat) { - dp_test_wait_for_pl_feat(intf, "vyatta:ipv4-cgnat-in", - "ipv4-validate"); - dp_test_wait_for_pl_feat(intf, "vyatta:ipv4-cgnat-out", - "ipv4-out"); +loop1: + nsecs1 = cgn_time_nsecs(); + for (i = 0; i < outer_count; i++) { + for (j = 0; j < inner_count; j++) { + if (overhead > 0) + /* Do task */ + clock_gettime(CLOCK_REALTIME_COARSE, &ts); + } } -} - + nsecs2 = cgn_time_nsecs(); + elapsed = nsecs2 - nsecs1; + if (overhead == 0) { + overhead = elapsed; + goto loop1; + } + if (overhead <= elapsed) + elapsed -= overhead; + else + elapsed = 0; + average = elapsed / (i * j); + printf(" Time %lu nS, average %lu nS\n", elapsed, average); + + + /******************************************************************* + * Create 3-tuple session + */ + + outer_count = 1; + inner_count = 50000; + overhead = 0; + + subs_addr = dpt_init_ipaddr(subs_str, "100.64.0.1"); + subs_port = 1024; + + /* Initial session to create subscriber and apm structs */ + rc = dpt_cgn_map2(ifp, 12000, 17, subs_addr, subs_port++, NULL, NULL); + dp_test_fail_unless(rc == 0, "dpt_cgn_map2 failed"); + + printf("\n"); + printf("Test 2: (%u x %u) Create 3-tuple session\n", + outer_count, inner_count); + +loop2: + nsecs1 = cgn_time_nsecs(); + for (i = 0; i < outer_count; i++) { + for (j = 0; j < inner_count; j++) { + if (overhead > 0) { + /* Do task */ + rc = dpt_cgn_map2(ifp, 12000, 17, + subs_addr, subs_port, + NULL, NULL); + if (rc < 0) + goto end2; + + if (++subs_port == 65535) { + subs_port = 1024; + subs_addr = dpt_incr_ipaddr( + subs_addr, subs_str, + sizeof(subs_str)); + } + } + } + /* Change subs_addr if >1 */ + assert(outer_count == 1); + } +end2: + nsecs2 = cgn_time_nsecs(); + elapsed = nsecs2 - nsecs1; + if (overhead == 0) { + overhead = elapsed; + goto loop2; + } + if (overhead <= elapsed) + elapsed -= overhead; + else + elapsed = 0; + average = elapsed / (i * j); + printf(" Time %lu nS, average %lu nS\n", elapsed, average); + + cgn_session_cleanup(); + + + /******************************************************************* + * Create 5-tuple session + */ + + outer_count = 1; + inner_count = 50000; + overhead = 0; + + subs_addr = dpt_init_ipaddr(subs_str, "100.64.1.1"); + subs_port = 1024; + + /* Initial session to create subscriber and apm structs */ + rc = dpt_cgn_map2(ifp, 12000, 17, subs_addr, subs_port++, NULL, NULL); + dp_test_fail_unless(rc == 0, "dpt_cgn_map2 failed"); + + printf("\n"); + printf("Test 3: (%u x %u) Create 5-tuple session\n", + outer_count, inner_count); + +loop3: + nsecs1 = cgn_time_nsecs(); + for (i = 0; i < outer_count; i++) { + for (j = 0; j < inner_count; j++) { + if (overhead > 0) { + /* Do task */ + rc = dpt_cgn_map2(ifp, 12000, 17, + subs_addr, subs_port, + NULL, NULL); + if (rc < 0) + goto end3; + + if (++subs_port == 65535) { + subs_port = 1024; + subs_addr = dpt_incr_ipaddr( + subs_addr, subs_str, + sizeof(subs_str)); + } + } + } + /* Change subs_addr if >1 */ + assert(outer_count == 1); + } +end3: + nsecs2 = cgn_time_nsecs(); + elapsed = nsecs2 - nsecs1; + if (overhead == 0) { + overhead = elapsed; + goto loop3; + } + if (overhead <= elapsed) + elapsed -= overhead; + else + elapsed = 0; + average = elapsed / (i * j); + printf(" Time %lu nS, average %lu nS\n", elapsed, average); -void -_cgnat_policy_del(const char *policy, uint pri, const char *intf, - const char *file, const char *func, int line) -{ - char real_ifname[IFNAMSIZ]; + cgn_session_cleanup(); - dp_test_intf_real(intf, real_ifname); - _dp_test_npf_cmd_fmt(false, file, line, - "cgn-ut policy detach name=%s intf=%s", - policy, real_ifname); + /******************************************************************* + * Translate packet, 3-tuple session + */ - /* Delete cgnat policy */ - _dp_test_npf_cmd_fmt(false, file, line, - "cgn-ut policy delete %s", policy); -} + outer_count = 1; + inner_count = 50000; + overhead = 0; -static void cgnat_setup(void) -{ - dp_test_nl_add_ip_addr_and_connected("dp1T0", "2.2.2.254/24"); - dp_test_nl_add_ip_addr_and_connected("dp1T0", "100.64.0.254/16"); - dp_test_nl_add_ip_addr_and_connected("dp2T1", "1.1.1.254/24"); + subs_addr = dpt_init_ipaddr(subs_str, "100.64.0.1"); + subs_port = 1024; - /* - * Inside + struct dp_test_pkt_desc_t ins_pre = { + .text = "Inside pre", + .len = 20, + .ether_type = RTE_ETHER_TYPE_IPV4, + .l3_src = subs_str, + .l2_src = "aa:bb:cc:dd:1:a1", + .l3_dst = "1.1.1.1", + .l2_dst = "aa:bb:cc:dd:2:b1", + .proto = IPPROTO_TCP, + .l4 = { + .tcp = { + .sport = subs_port, + .dport = 80, + .flags = TH_ACK, + .seq = 0, + .ack = 0, + .win = 8192, + .opts = NULL + } + }, + .rx_intf = "dp1T0", + .tx_intf = "dp2T1" + }; + + /* Initial session to create subscriber and apm structs */ + dpt_cgn_map2(ifp, 12000, 6, subs_addr, subs_port, NULL, NULL); + + struct rte_mbuf *orig_mbuf = dp_test_v4_pkt_from_desc(&ins_pre); + + printf("\n"); + printf("Test 4: (%u x %u) Translate packet, 3-tuple session\n", + outer_count, inner_count); + +loop4: + nsecs1 = cgn_time_nsecs(); + for (i = 0; i < outer_count; i++) { + for (j = 0; j < inner_count; j++) { + struct rte_mbuf *mbuf; + + mbuf = pktmbuf_copy(orig_mbuf, orig_mbuf->pool); + rv = true; + error = 0; + + if (overhead > 0) + /* Do task */ + rv = ipv4_cgnat_test(&mbuf, ifp, + CGN_DIR_OUT, &error); + + rte_pktmbuf_free(mbuf); + if (!rv || error < 0) + goto end4; + } + } +end4: + rte_pktmbuf_free(orig_mbuf); + nsecs2 = cgn_time_nsecs(); + elapsed = nsecs2 - nsecs1; + if (overhead == 0) { + overhead = elapsed; + goto loop4; + } + if (overhead <= elapsed) + elapsed -= overhead; + else + elapsed = 0; + average = elapsed / (i * j); + printf(" Time %lu nS, average %lu nS\n", elapsed, average); + + cgn_session_cleanup(); + + + /******************************************************************* + * Translate packet, 5-tuple session + */ + + outer_count = 1; + inner_count = 50000; + overhead = 0; + + subs_addr = dpt_init_ipaddr(subs_str, "100.64.1.1"); + subs_port = 1024; + ins_pre.l3_src = subs_str; + ins_pre.l4.tcp.sport = subs_port; + + /* Initial session to create subscriber and apm structs */ + dpt_cgn_map2(ifp, 12000, 6, subs_addr, subs_port, NULL, NULL); + + orig_mbuf = dp_test_v4_pkt_from_desc(&ins_pre); + + printf("\n"); + printf("Test 5: (%u x %u) Translate packet, 5-tuple session\n", + outer_count, inner_count); + +loop5: + nsecs1 = cgn_time_nsecs(); + for (i = 0; i < outer_count; i++) { + for (j = 0; j < inner_count; j++) { + struct rte_mbuf *mbuf; + + mbuf = pktmbuf_copy(orig_mbuf, orig_mbuf->pool); + rv = true; + error = 0; + + if (overhead > 0) + /* Do task */ + rv = ipv4_cgnat_test(&mbuf, ifp, + CGN_DIR_OUT, &error); + + rte_pktmbuf_free(mbuf); + if (!rv || error < 0) + goto end5; + } + } +end5: + rte_pktmbuf_free(orig_mbuf); + nsecs2 = cgn_time_nsecs(); + elapsed = nsecs2 - nsecs1; + if (overhead == 0) { + overhead = elapsed; + goto loop5; + } + if (overhead <= elapsed) + elapsed -= overhead; + else + elapsed = 0; + average = elapsed / (i * j); + printf(" Time %lu nS, average %lu nS\n", elapsed, average); + + cgn_session_cleanup(); + + + /******************************************************************* + * Translate packet, new 3-tuple session per packet + */ + + outer_count = 1; + inner_count = 50000; + overhead = 0; + + subs_addr = dpt_init_ipaddr(subs_str, "100.64.0.1"); + subs_port = 1024; + ins_pre.l3_src = subs_str; + ins_pre.l4.tcp.sport = subs_port; + + /* Initial session to create subscriber and apm structs */ + dpt_cgn_map2(ifp, 12000, 6, subs_addr, subs_port++, NULL, NULL); + + printf("\n"); + printf("Test 6: (%u x %u) Translate packet, " + "new 3-tuple session per pkt\n", + outer_count, inner_count); + +loop6: + nsecs1 = cgn_time_nsecs(); + for (i = 0; i < outer_count; i++) { + for (j = 0; j < inner_count; j++) { + struct rte_mbuf *mbuf; + + ins_pre.l3_src = subs_str; + ins_pre.l4.tcp.sport = subs_port++; + mbuf = dp_test_v4_pkt_from_desc(&ins_pre); + + rv = true; + error = 0; + + if (overhead > 0) + /* Do task */ + rv = ipv4_cgnat_test(&mbuf, ifp, + CGN_DIR_OUT, &error); + + rte_pktmbuf_free(mbuf); + if (!rv || error < 0) + goto end6; + } + } +end6: + nsecs2 = cgn_time_nsecs(); + elapsed = nsecs2 - nsecs1; + if (overhead == 0) { + overhead = elapsed; + goto loop6; + } + if (overhead <= elapsed) + elapsed -= overhead; + else + elapsed = 0; + average = elapsed / (i * j); + printf(" Time %lu nS, average %lu nS\n", elapsed, average); + + cgn_session_cleanup(); + + + /******************************************************************* + * Translate packet, new 5-tuple session per packet + */ + + outer_count = 1; + inner_count = 50000; + overhead = 0; + + subs_addr = dpt_init_ipaddr(subs_str, "100.64.1.1"); + subs_port = 1024; + ins_pre.l3_src = subs_str; + ins_pre.l4.tcp.sport = subs_port; + + /* Initial session to create subscriber and apm structs */ + dpt_cgn_map2(ifp, 12000, 6, subs_addr, subs_port++, NULL, NULL); + + printf("\n"); + printf("Test 7: (%u x %u) Translate packet, " + "new 5-tuple session per pkt\n", + outer_count, inner_count); + +loop7: + nsecs1 = cgn_time_nsecs(); + for (i = 0; i < outer_count; i++) { + for (j = 0; j < inner_count; j++) { + struct rte_mbuf *mbuf; + + ins_pre.l3_src = subs_str; + ins_pre.l4.tcp.sport = subs_port++; + mbuf = dp_test_v4_pkt_from_desc(&ins_pre); + + rv = true; + error = 0; + + if (overhead > 0) + /* Do task */ + rv = ipv4_cgnat_test(&mbuf, ifp, + CGN_DIR_OUT, &error); + + rte_pktmbuf_free(mbuf); + if (!rv || error < 0) + goto end7; + } + } +end7: + nsecs2 = cgn_time_nsecs(); + elapsed = nsecs2 - nsecs1; + if (overhead == 0) { + overhead = elapsed; + goto loop7; + } + if (overhead <= elapsed) + elapsed -= overhead; + else + elapsed = 0; + average = elapsed / (i * j); + printf(" Time %lu nS, average %lu nS\n", elapsed, average); + + cgn_session_cleanup(); + + /* + * Cleanup + */ + printf("\n"); + cgnat_policy_del("POLICY1", 10, "dp2T1"); + cgnat_policy_del("POLICY2", 20, "dp2T1"); + + dp_test_npf_cmd_fmt(false, "nat-ut pool delete POOL1"); + +} DP_END_TEST; + +/* + * cgnat45 -- cgnat map command (for pcp) + */ +DP_DECL_TEST_CASE(npf_cgnat, cgnat45, cgnat_setup, cgnat_teardown); +DP_START_TEST(cgnat45, test) +{ + char real_ifname[IFNAMSIZ]; + int rc; + uint i; + + dp_test_intf_real("dp2T1", real_ifname); + + dpt_cgn_cmd_fmt(false, true, + "nat-ut pool add POOL1 " + "type=cgnat " + "address-range=RANGE1/1.1.1.11-1.1.1.20 " + "block-size=4096 " + "max-blocks=32 " + "addr-pooling=arbitrary " + "log-pba=no"); + + cgnat_policy_add("POLICY1", 10, "100.64.0.0/12", "POOL1", + "dp2T1", CGN_MAP_EIM, CGN_FLTR_EIF, CGN_3TUPLE, true); + + + static char subs_str[20]; + uint32_t subs_addr; + uint16_t subs_port; + char pub_str[20]; + int pub_port = 0; + bool debug = false; + + /* Subscriber addr and port */ + subs_addr = dpt_init_ipaddr(subs_str, "100.64.0.1"); + subs_port = 1024; + dp_test_fail_unless(subs_addr, "subs_addr"); + + /************************************************************** + * Let dataplane assign mapping + */ + dpt_init_ipaddr(pub_str, "0.0.0.0"); + pub_port = 0; + + if (debug) + printf("Let dataplane assign mapping ...\n"); + + rc = dpt_cgn_map(debug, real_ifname, 12000, 17, subs_str, subs_port, + pub_str, &pub_port); + dp_test_fail_unless(rc == 0, "map command"); + + dp_test_fail_unless(pub_port == 1024, + "Public port %d, expected 1024", pub_port); + dp_test_fail_unless(!strcmp(pub_str, "1.1.1.11"), + "Public address %s, expected 1.1.1.11", pub_str); + + /************************************************************** + * Request a specific mapping + */ + subs_addr = dpt_init_ipaddr(subs_str, "100.64.0.2"); + subs_port = 1234; + dp_test_fail_unless(subs_addr, "subs_addr"); + + dpt_init_ipaddr(pub_str, "1.1.1.15"); + pub_port = 2000; + + if (debug) + printf("Request mapping %s port %d\n", pub_str, pub_port); + + rc = dpt_cgn_map(debug, real_ifname, 12000, 17, subs_str, subs_port, + pub_str, &pub_port); + dp_test_fail_unless(rc == 0, "map command rc=%d", -rc); + + dp_test_fail_unless(pub_port == 2000, + "Public port %d, expected 2000", pub_port); + dp_test_fail_unless(!strcmp(pub_str, "1.1.1.15"), + "Public address %s, expected 1.1.1.15", pub_str); + + + /************************************************************** + * Refresh an existing mapping + */ + subs_addr = dpt_init_ipaddr(subs_str, "100.64.0.2"); + subs_port = 1234; + dp_test_fail_unless(subs_addr, "subs_addr"); + + dpt_init_ipaddr(pub_str, "1.1.1.15"); + pub_port = 2000; + + if (debug) + printf("Refresh an existing mapping %s port %d\n", + pub_str, pub_port); + + rc = dpt_cgn_map(debug, real_ifname, 12000, 17, subs_str, subs_port, + pub_str, &pub_port); + dp_test_fail_unless(rc == 0, "map command rc=%d", -rc); + + dp_test_fail_unless(pub_port == 2000, + "Public port %d, expected 2000", pub_port); + dp_test_fail_unless(!strcmp(pub_str, "1.1.1.15"), + "Public address %s, expected 1.1.1.15", pub_str); + + /************************************************************** + * Change policy to be 5-tuple (log-all) then create a mapping + */ + cgnat_policy_change("POLICY1", 10, "100.64.0.0/12", "POOL1", "dp2T1", + CGN_MAP_EIM, CGN_FLTR_EIF, CGN_5TUPLE, true); + + subs_addr = dpt_init_ipaddr(subs_str, "100.64.0.3"); + subs_port = 22; + dp_test_fail_unless(subs_addr, "subs_addr"); + + dpt_init_ipaddr(pub_str, "1.1.1.16"); + pub_port = 1024; + + if (debug) + printf("Request mapping %s port %d\n", pub_str, pub_port); + + rc = dpt_cgn_map(false, real_ifname, 12000, 17, subs_str, subs_port, + pub_str, &pub_port); + + dp_test_fail_unless(rc == 0, "map command rc=%d", -rc); + + dp_test_fail_unless(pub_port == 1024, + "Public port %d, expected 1024", pub_port); + dp_test_fail_unless(!strcmp(pub_str, "1.1.1.16"), + "Public address %s, expected 1.1.1.16", pub_str); + + if (debug) { + dpt_cgn_show_session("subs-addr 100.64.0.3", 0, + false, true, true); + + dp_test_npf_cmd_fmt(false, "cgn-op update session"); + dp_test_npf_cmd_fmt(false, "cgn-op update subscriber"); + + for (i = 0; i < CGN_SESS_GC_COUNT + 1; i++) + dp_test_npf_cmd_fmt(false, "cgn-op ut gc"); + + dpt_cgn_show_session("subs-addr 100.64.0.3", 0, + false, true, true); + + dpt_cgn_show_summary(true); + } + + /************************************************************** + * Send a packet matching the above 3-tuple session + */ + cgnat_udp("dp1T0", "aa:bb:cc:dd:1:a1", 0, + "100.64.0.3", 22, "1.1.1.1", 38, + "1.1.1.16", 1024, "1.1.1.1", 38, + "aa:bb:cc:dd:2:b1", 0, "dp2T1", + DP_TEST_FWD_FORWARDED); + + if (debug) { + dp_test_npf_cmd_fmt(false, "cgn-op update session"); + dp_test_npf_cmd_fmt(false, "cgn-op update subscriber"); + + dpt_cgn_show_session("subs-addr 100.64.0.3", 0, + false, true, true); + } + + /**************************************************************** + * Cleanup cgnat45 + */ + cgnat_policy_del("POLICY1", 10, "dp2T1"); + + dp_test_npf_cmd_fmt(false, "nat-ut pool delete POOL1"); + +} DP_END_TEST; /* cgnat45 */ + +/* + * cgnat46 - Verify CGNAT responds to echo request sent to CGNAT pool address + * on the outside interface. + */ +DP_DECL_TEST_CASE(npf_cgnat, cgnat46, cgnat_setup, cgnat_teardown); +DP_START_TEST(cgnat46, test) +{ + struct dp_test_expected *exp; + struct rte_mbuf *test_pak, *exp_pak; + int payload_len = 40; + + dpt_cgn_cmd_fmt(false, true, + "nat-ut pool add POOL1 " + "type=cgnat " + "address-range=RANGE1/1.1.1.11-1.1.1.20 " + "log-pba=yes " + ""); + + cgnat_policy_add("POLICY1", 10, "100.64.0.0/12", "POOL1", + "dp2T1", CGN_MAP_EIM, CGN_FLTR_EIF, CGN_3TUPLE, true); + + /* + * Send ICMP Echo Request to the outside interface dp2T1 with dest + * addr set to CGNAT pool address, and check for reply. + */ + test_pak = dp_test_create_icmp_ipv4_pak("1.1.1.1", + "1.1.1.11", + ICMP_ECHO /* echo request */, + 0 /* no code */, + DPT_ICMP_ECHO_DATA(0, 0), + 1 /* one mbuf */, + &payload_len, + NULL, NULL, NULL); + + (void)dp_test_pktmbuf_eth_init(test_pak, + dp_test_intf_name2mac_str("dp2T1"), + "aa:bb:cc:dd:2:b1", + RTE_ETHER_TYPE_IPV4); + + exp_pak = dp_test_create_icmp_ipv4_pak("1.1.1.11", + "1.1.1.1", + ICMP_ECHOREPLY /* echo reply */, + 0 /* no code */, + DPT_ICMP_ECHO_DATA(0, 0), + 1 /* one mbuf */, + &payload_len, + NULL, NULL, NULL); + + (void)dp_test_pktmbuf_eth_init(exp_pak, + "aa:bb:cc:dd:2:b1", + dp_test_intf_name2mac_str("dp2T1"), + RTE_ETHER_TYPE_IPV4); + + /* Create pak we expect to see in local_packet */ + exp = dp_test_exp_create(exp_pak); + rte_pktmbuf_free(exp_pak); + dp_test_exp_set_oif_name(exp, "dp2T1"); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_FORWARDED); + + dp_test_pak_receive(test_pak, "dp2T1", exp); + + /* Cleanup cgnat46 */ + cgnat_policy_del("POLICY1", 10, "dp2T1"); + + dp_test_npf_cmd_fmt(false, "nat-ut pool delete POOL1"); + +} DP_END_TEST; /* cgnat46 */ + +/* + * npf_cgnat_47 - Tests that two different subscribers may be allocated port + * blocks from the same public address. Note, address-pool-pairing must be + * disabled for this. + * + * Private Public + * dp1T0 +---+ dp2T1 + * 100.64.0.0/24 ----------| |--------------- 1.1.1.0/24 + * +---+ + */ +DP_DECL_TEST_CASE(npf_cgnat, cgnat47, cgnat_setup, cgnat_teardown); +DP_START_TEST(cgnat47, test) +{ + /* Create address pool with just 2 addresses */ + dpt_cgn_cmd_fmt(false, true, + "nat-ut pool add POOL1 " + "type=cgnat " + "address-range=RANGE1/1.1.1.11-1.1.1.12,shared=yes " + "block-size=128 " + "max-blocks=4 " + ""); + + cgnat_policy_add("POLICY1", 10, "100.64.0.0/12", "POOL1", "dp2T1", + CGN_MAP_EIM, CGN_FLTR_EIF, CGN_3TUPLE, true); + + /* + * Packet #1 + * 100.64.0.1:49152 / 1.1.1.11:1024 --> dst 1.1.1.1:80 + */ + cgnat_tcp(TH_SYN, "dp1T0", "aa:bb:cc:dd:1:a1", + "100.64.0.1", 49152, "1.1.1.1", 80, + "1.1.1.11", 1024, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* + * Packet #2 + * 100.64.0.2:2345 / 1.1.1.12:1024 --> dst 1.1.1.1:80 + */ + cgnat_tcp(TH_SYN, "dp1T0", "aa:bb:cc:dd:1:a2", + "100.64.0.2", 2345, "1.1.1.1", 80, + "1.1.1.12", 1024, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* + * Packet #3 -- No more free public addrs, so should use second + * port-block from first public address. + * + * 100.64.0.2:2345 / 1.1.1.11:1152 --> dst 1.1.1.1:80 + */ + cgnat_tcp(TH_SYN, "dp1T0", "aa:bb:cc:dd:1:a4", + "100.64.0.3", 4567, "1.1.1.1", 80, + "1.1.1.11", 1024 + 128, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* Cleanup cgnat47 */ + cgnat_policy_del("POLICY1", 10, "dp2T1"); + + dp_test_npf_cmd_fmt(false, "nat-ut pool delete POOL1"); + +} DP_END_TEST; /* cgnat47 */ + +/* + * cgnat48 -- Excercises threshold add/del code paths, and apm pb full code + * path. Does not verify log messages. + */ +DP_DECL_TEST_CASE(npf_cgnat, cgnat48, cgnat_setup, cgnat_teardown); +DP_START_TEST(cgnat48, test) +{ + /* + * 10 public addresses, 4 blocks of 64 ports per address + */ + dpt_cgn_cmd_fmt(false, true, + "nat-ut pool add POOL1 " + "type=cgnat " + "address-range=RANGE1/1.1.1.11-1.1.1.20 " + "port-range=1024-1279 " + "block-size=64 " + "log-pba=yes " + ""); + + cgnat_policy_add("POLICY1", 10, "100.64.0.0/12", "POOL1", + "dp2T1", CGN_MAP_EIM, CGN_FLTR_EIF, CGN_5TUPLE, true); + + dpt_cgn_cmd_fmt(false, true, + "cgn-ut warning add mapping-table threshold 50"); + dpt_cgn_cmd_fmt(false, true, + "cgn-ut warning add session-table threshold 50"); + dpt_cgn_cmd_fmt(false, true, + "cgn-ut warning add subscriber-table threshold 50"); + dpt_cgn_cmd_fmt(false, true, + "cgn-ut warning add public-addresses threshold 50"); + + char real_ifname[IFNAMSIZ]; + + dp_test_intf_real("dp2T1", real_ifname); + + uint i, j, outer_count, inner_count; + static char subs_str[20]; + uint32_t subs_addr; + uint16_t subs_port; + + outer_count = 10; /* 10 subscribers == 10 public addrs */ + inner_count = (3 * 64) + 1; /* Causes 4 port-blocks to be allocd */ + + /* Initial addr and port */ + subs_addr = dpt_init_ipaddr(subs_str, "100.64.0.1"); + subs_port = 1024; + + for (i = 0; i < outer_count; i++) { + for (j = 0; j < inner_count; j++) { + /* Create session */ + dpt_cgn_map(false, real_ifname, 120, 17, + subs_str, subs_port, NULL, NULL); + subs_port++; + } + + /* New subscriber means new public addr is allocd */ + subs_port = 1024; + subs_addr = dpt_incr_ipaddr(subs_addr, subs_str, + sizeof(subs_str)); + } + + dpt_cgn_show_source(false); + + dp_test_npf_cmd_fmt(false, "cgn-op clear session"); + + for (i = 0; i < CGN_SESS_GC_COUNT + 1; i++) + dp_test_npf_cmd_fmt(false, "cgn-op ut gc"); + + dpt_cgn_cmd_fmt(false, true, + "cgn-ut warning del mapping-table threshold 50"); + dpt_cgn_cmd_fmt(false, true, + "cgn-ut warning del session-table threshold 50"); + dpt_cgn_cmd_fmt(false, true, + "cgn-ut warning del subscriber-table threshold 50"); + dpt_cgn_cmd_fmt(false, true, + "cgn-ut warning del public-addresses threshold 50"); + + /* Cleanup cgnat48 */ + cgnat_policy_del("POLICY1", 10, "dp2T1"); + + dp_test_npf_cmd_fmt(false, "nat-ut pool delete POOL1"); + +} DP_END_TEST; /* cgnat48 */ + +/* + * cgnat49 - Tests a policy being uncfgd and re-cfgd while a subscriber + * structure exists + */ +DP_DECL_TEST_CASE(npf_cgnat, cgnat49, cgnat_setup, cgnat_teardown); +DP_START_TEST(cgnat49, test) +{ + dpt_cgn_cmd_fmt(false, true, + "nat-ut pool add POOL1 " + "type=cgnat " + "max-blocks=2 " + "prefix=RANGE1/1.1.1.192/26 " + ""); + + cgnat_policy_add("POLICY1", 10, "100.64.0.0/12", "POOL1", "dp2T1", + CGN_MAP_EIM, CGN_FLTR_EIF, CGN_5TUPLE, true); + + /* + * 100.64.0.1:49152 / 1.1.1.11:1024 --> dst 1.1.1.1:80 + */ + cgnat_tcp(TH_SYN, "dp1T0", "aa:bb:cc:dd:1:a1", + "100.64.0.1", 49152, "1.1.1.1", 80, + "1.1.1.192", 1024, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* Delete and re-add policy */ + cgnat_policy_del("POLICY1", 10, "dp2T1"); + cgnat_policy_add("POLICY1", 10, "100.64.0.0/12", "POOL1", "dp2T1", + CGN_MAP_EIM, CGN_FLTR_EIF, CGN_5TUPLE, true); + + /* + * Pk2 #2. Same source addr, different source port. + * + * 100.64.0.1:3456 / 1.1.1.11:1024 --> dst 1.1.1.1:80 + */ + cgnat_tcp(TH_SYN, "dp1T0", "aa:bb:cc:dd:1:a1", + "100.64.0.1", 3456, "1.1.1.1", 80, + "1.1.1.192", 1025, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* Cleanup cgnat49 */ + cgnat_policy_del("POLICY1", 10, "dp2T1"); + + dp_test_npf_cmd_fmt(false, "nat-ut pool delete POOL1"); + +} DP_END_TEST; /* cgnat49 */ + + +/* + * cgnat_log_methods -- Tests enabling/disabling of log methods. + */ +DP_DECL_TEST_CASE(npf_cgnat, cgnat_log_methods, cgnat_setup, cgnat_teardown); +DP_START_TEST(cgnat_log_methods, test) +{ + int rc; + int rte_log_rc; + enum cgn_log_type ltype; + + /* test using an invalid log type */ + /* add unknown log handler */ + rc = cgn_log_enable_handler(CGN_LOG_TYPE_COUNT, "rte_log"); + dp_test_fail_unless(rc == -EINVAL, "enable logging for invalid " + "log type (ltype %d)", CGN_LOG_TYPE_COUNT); + + for (ltype = 0; ltype < CGN_LOG_TYPE_COUNT; ltype++) { + /* remove rte_log handler, in case it was already enabled */ + rte_log_rc = cgn_log_disable_handler(ltype, "rte_log"); + + /* add unknown log handler */ + rc = cgn_log_enable_handler(ltype, "unknown"); + dp_test_fail_unless(rc == -ENOENT, "enable unknown cgnat log " + "handler (ltype %d)", ltype); + + /* disable unknown log handler */ + rc = cgn_log_disable_handler(ltype, "unknown"); + dp_test_fail_unless(rc == -ENOENT, "disable unknown cgnat log " + "handler (ltype %d)", ltype); + + /* add the rte_log handler */ + rc = cgn_log_enable_handler(ltype, "rte_log"); + dp_test_fail_unless(rc == 0, "enable rte_log cgnat log " + "handler (ltype %d)", ltype); + + /* enable the rte_log handler a second time */ + rc = cgn_log_enable_handler(ltype, "rte_log"); + dp_test_fail_unless(rc == -EEXIST, "enable rte_log cgnat log " + "handler twice (ltype %d)", ltype); + + /* disable the rte_log handler */ + rc = cgn_log_disable_handler(ltype, "rte_log"); + dp_test_fail_unless(rc == 0, "disable rte_log cgnat log " + "handler (ltype %d)", ltype); + + /* disable the rte_log handler when not enabled */ + rc = cgn_log_disable_handler(ltype, "rte_log"); + dp_test_fail_unless(rc == -ENOENT, "disable rte_log cgnat log " + "handler when not enabled (ltype %d)", + ltype); + + /* + * If rte_log handler was initially enabled, then enable it + * again. + */ + if (rte_log_rc == 0) { + rc = cgn_log_enable_handler(ltype, "rte_log"); + dp_test_fail_unless(rc == 0, "reenable rte_log cgnat " + "log handler (ltype %d)", ltype); + } + } +} DP_END_TEST; + + +/* + * npf_cgnat_50 - Tests policy address-group prefix matching + * + * Private Public + * dp1T0 +---+ dp2T1 + * 100.64.0.0/24 ----------| |--------------- 1.1.1.0/24 + * +---+ + */ +DP_DECL_TEST_CASE(npf_cgnat, cgnat50, cgnat_setup, cgnat_teardown); +DP_START_TEST(cgnat50, test) +{ + dpt_cgn_cmd_fmt(false, true, + "nat-ut pool add POOL1 " + "type=cgnat " + "address-range=RANGE1/1.1.1.11-1.1.1.20 " + "prefix=RANGE2/1.1.1.192/26 " + "log-pba=yes " + ""); + + /* + * Add policy prefix 100.64.0.128/30 and verify that it matches + * 100.64.0.128 - 100.64.0.131, and does *not* match 100.64.0.127 or + * 100.64.0.132 + */ + cgnat_policy_add("POLICY1", 10, "100.64.0.128/30", "POOL1", + "dp2T1", CGN_MAP_EIM, CGN_FLTR_EIF, CGN_3TUPLE, true); + + /* + * 100.64.0.129:1234 / 1.1.1.11:1024 --> dst 1.1.1.1:80 + */ + cgnat_udp("dp1T0", "aa:bb:cc:dd:1:a1", 0, + "100.64.0.129", 1234, "1.1.1.1", 80, + "1.1.1.11", 1024, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", 0, "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* + * 100.64.0.130:4321 / 1.1.1.12:1024 --> dst 1.1.1.1:80 + */ + cgnat_udp("dp1T0", "aa:bb:cc:dd:1:a1", 0, + "100.64.0.130", 4321, "1.1.1.1", 80, + "1.1.1.12", 1024, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", 0, "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* Src 100.64.0.127 - No translation */ + cgnat_udp("dp1T0", "aa:bb:cc:dd:1:a1", 0, + "100.64.0.127", 1234, "1.1.1.1", 80, + "100.64.0.127", 1234, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", 0, "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* Src 100.64.0.132 - No translation */ + cgnat_udp("dp1T0", "aa:bb:cc:dd:1:a1", 0, + "100.64.0.132", 1235, "1.1.1.1", 80, + "100.64.0.132", 1235, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", 0, "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* + * 100.64.0.128:1234 / 1.1.1.13:1024 --> dst 1.1.1.1:80 + */ + cgnat_udp("dp1T0", "aa:bb:cc:dd:1:a1", 0, + "100.64.0.128", 1234, "1.1.1.1", 80, + "1.1.1.13", 1024, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", 0, "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* + * 100.64.0.131:3333 / 1.1.1.14:1024 --> dst 1.1.1.1:80 + */ + cgnat_udp("dp1T0", "aa:bb:cc:dd:1:a1", 0, + "100.64.0.131", 3333, "1.1.1.1", 80, + "1.1.1.14", 1024, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", 0, "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* + * Add prefix 100.64.0.0/30 to match address-group and test first and + * last address in the prefix + */ + dp_test_npf_cmd_fmt(false, + "npf-ut fw table add POLICY1_AG 100.64.0.0/30"); + + /* + * 100.64.0.1:1111 / 1.1.1.15:1024 --> dst 1.1.1.1:80 + */ + cgnat_udp("dp1T0", "aa:bb:cc:dd:1:a1", 0, + "100.64.0.1", 1111, "1.1.1.1", 80, + "1.1.1.15", 1024, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", 0, "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* + * 100.64.0.3:1111 / 1.1.1.15:1024 --> dst 1.1.1.1:80 + */ + cgnat_udp("dp1T0", "aa:bb:cc:dd:1:a1", 0, + "100.64.0.3", 1111, "1.1.1.1", 80, + "1.1.1.16", 1024, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", 0, "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* + * 100.64.0.0:2222 / 1.1.1.15:1024 --> dst 1.1.1.1:80 + */ + cgnat_udp("dp1T0", "aa:bb:cc:dd:1:a1", 0, + "100.64.0.0", 2222, "1.1.1.1", 80, + "1.1.1.17", 1024, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", 0, "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* Cleanup cgnat50 */ + cgnat_policy_del("POLICY1", 10, "dp2T1"); + + dp_test_npf_cmd_fmt(false, "nat-ut pool delete POOL1"); + +} DP_END_TEST; /* cgnat50 */ + +/* + * cgnat51 - Tests changing a policy match address-group on a live policy + */ +DP_DECL_TEST_CASE(npf_cgnat, cgnat51, cgnat_setup, cgnat_teardown); +DP_START_TEST(cgnat51, test) +{ + char real_ifname[IFNAMSIZ]; + + dp_test_intf_real("dp2T1", real_ifname); + + dpt_cgn_cmd_fmt(false, true, + "nat-ut pool add POOL1 " + "type=cgnat " + "max-blocks=2 " + "prefix=RANGE1/1.1.1.192/26 " + ""); + + dpt_addr_grp_create("MATCH_AG1", "100.64.0.0/12"); + dpt_addr_grp_create("MATCH_AG2", "100.64.0.0/12"); + + dp_test_npf_cmd_fmt(false, + "cgn-ut policy add POLICY1 priority=10 " + "match-ag=MATCH_AG1 pool=POOL1 log-sess-all=no"); + + dp_test_npf_cmd_fmt(false, + "cgn-ut policy attach name=POLICY1 intf=%s", + real_ifname); + + /* Change match address-group after attach */ + dp_test_npf_cmd_fmt(false, + "cgn-ut policy add POLICY1 priority=10 " + "match-ag=MATCH_AG2 pool=POOL1 log-sess-all=no"); + + /* + * 100.64.0.1:49152 / 1.1.1.11:1024 --> dst 1.1.1.1:80 + */ + cgnat_tcp(TH_SYN, "dp1T0", "aa:bb:cc:dd:1:a1", + "100.64.0.1", 49152, "1.1.1.1", 80, + "1.1.1.192", 1024, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* + * Pk2 #2. Same source addr, different source port. + * + * 100.64.0.1:3456 / 1.1.1.11:1024 --> dst 1.1.1.1:80 + */ + cgnat_tcp(TH_SYN, "dp1T0", "aa:bb:cc:dd:1:a1", + "100.64.0.1", 3456, "1.1.1.1", 80, + "1.1.1.192", 1025, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* Cleanup cgnat51 */ + dp_test_npf_cmd_fmt(false, + "cgn-ut policy detach name=POLICY1 intf=%s", + real_ifname); + + dp_test_npf_cmd_fmt(false, "cgn-ut policy delete POLICY1"); + + dpt_addr_grp_destroy("MATCH_AG1", NULL); + dpt_addr_grp_destroy("MATCH_AG2", NULL); + + dp_test_npf_cmd_fmt(false, "nat-ut pool delete POOL1"); + +} DP_END_TEST; /* cgnat51 */ + + +/* + * cgnat52 - Test NAT pool lookup using the hidden NAT pool address-group. + */ +DP_DECL_TEST_CASE(npf_cgnat, cgnat52, cgnat_setup, cgnat_teardown); +DP_START_TEST(cgnat52, test) +{ + struct nat_pool *np; + uint32_t haddr; + + dpt_cgn_cmd_fmt(false, true, + "nat-ut pool add POOL1 " + "type=cgnat " + "address-range=RANGE1/10.0.1.1-10.0.1.9 " + "prefix=RANGE2/10.0.2.0/24 " + "prefix=RANGE3/10.0.3.2/31 " + "prefix=RANGE4/10.0.4.3/32 " + "address-range=RANGE5/10.0.5.3-10.0.5.3 " + ); + + np = nat_pool_lookup("POOL1"); + dp_test_fail_unless(np, "np"); + + haddr = 0x0a000100; + dp_test_fail_unless(!nat_pool_is_pool_addr(np, htonl(haddr)), + "0x%08X in pool", haddr); + + haddr = 0x0a000101; + dp_test_fail_unless(nat_pool_is_pool_addr(np, htonl(haddr)), + "0x%08X not in pool", haddr); + + haddr = 0x0a000109; + dp_test_fail_unless(nat_pool_is_pool_addr(np, htonl(haddr)), + "0x%08X not in pool", haddr); + + haddr = 0x0a00010a; + dp_test_fail_unless(!nat_pool_is_pool_addr(np, htonl(haddr)), + "0x%08X in pool", haddr); + + haddr = 0x0a000201; + dp_test_fail_unless(nat_pool_is_pool_addr(np, htonl(haddr)), + "0x%08X not in pool", haddr); + + /* + * First and last addr of 10.0.2.0/24 should not be in address-group + */ + haddr = 0x0a000200; + dp_test_fail_unless(!nat_pool_is_pool_addr(np, htonl(haddr)), + "0x%08X in pool", haddr); + + haddr = 0x0a000201; + dp_test_fail_unless(nat_pool_is_pool_addr(np, htonl(haddr)), + "0x%08X not in pool", haddr); + + haddr = 0x0a0002fe; + dp_test_fail_unless(nat_pool_is_pool_addr(np, htonl(haddr)), + "0x%08X not in pool", haddr); + + haddr = 0x0a0002ff; + dp_test_fail_unless(!nat_pool_is_pool_addr(np, htonl(haddr)), + "0x%08X in pool", haddr); + + /* + * 10.0.3.2/31 is a special case. 10.0.3.2 and 10.0.3.3 should be in + * addr-grp + */ + haddr = 0x0a000302; + dp_test_fail_unless(nat_pool_is_pool_addr(np, htonl(haddr)), + "0x%08X not in pool", haddr); + + haddr = 0x0a000303; + dp_test_fail_unless(nat_pool_is_pool_addr(np, htonl(haddr)), + "0x%08X not in pool", haddr); + + /* + * 10.0.4.3/32 + */ + haddr = 0x0a000403; + dp_test_fail_unless(nat_pool_is_pool_addr(np, htonl(haddr)), + "0x%08X not in pool", haddr); + + /* + * 10.0.5.3 - 10.0.5.3 + */ + haddr = 0x0a000503; + dp_test_fail_unless(nat_pool_is_pool_addr(np, htonl(haddr)), + "0x%08X not in pool", haddr); + + /* + * Add RANGE6. Only the NAT pool ranges data should be regenerated. + * The NAT pool pointer should remain valid. + */ + dpt_cgn_cmd_fmt(false, true, + "nat-ut pool add POOL1 " + "type=cgnat " + "address-range=RANGE1/10.0.1.1-10.0.1.9 " + "prefix=RANGE2/10.0.2.0/24 " + "prefix=RANGE3/10.0.3.2/31 " + "prefix=RANGE4/10.0.4.3/32 " + "address-range=RANGE5/10.0.5.3-10.0.5.3 " + "address-range=RANGE6/10.0.6.5-10.0.6.6 " + ); + + haddr = 0x0a000100; + dp_test_fail_unless(!nat_pool_is_pool_addr(np, htonl(haddr)), + "0x%08X in pool", haddr); + + /* + * 10.0.6.5 - 10.0.6.6 + */ + haddr = 0x0a000605; + dp_test_fail_unless(nat_pool_is_pool_addr(np, htonl(haddr)), + "0x%08X not in pool", haddr); + + haddr = 0x0a000606; + dp_test_fail_unless(nat_pool_is_pool_addr(np, htonl(haddr)), + "0x%08X not in pool", haddr); + + dp_test_npf_cmd_fmt(false, "nat-ut pool delete POOL1"); + +} DP_END_TEST; /* cgnat52 */ + + +/* + * npf_cgnat_53 - Checks TCP timeout values for a 5-tuple session + * + * Private Public + * dp1T0 +---+ dp2T1 + * 100.64.0.0/24 ----------| |--------------- 1.1.1.0/24 + * +---+ + */ +DP_DECL_TEST_CASE(npf_cgnat, cgnat53, cgnat_setup, cgnat_teardown); +DP_START_TEST(cgnat53, test) +{ + dpt_cgn_cmd_fmt(false, true, + "nat-ut pool add POOL1 " + "type=cgnat " + "address-range=RANGE1/1.1.1.11-1.1.1.11 " + "prefix=RANGE2/1.1.1.192/26 " + ""); + + cgnat_policy_add("POLICY1", 10, "100.64.0.0/12", "POOL1", "dp2T1", + CGN_MAP_EIM, CGN_FLTR_EIF, CGN_5TUPLE, true); + + char fltr[200]; + int timeout; + int exp_timeout = CGN_DEF_ETIME_TCP_ESTBD; + uint16_t subs_port = 4567; + uint16_t pub_port = 1024; + + /* + * 1st repeat: Default Established timer + * 2nd repeat: Set TCP Established port 80 time to 9 + * 3rd repeat: Set TCP Established port 80 time to 0 + */ + uint repeat = 2; + +repeat: + snprintf(fltr, sizeof(fltr), + "proto 6 subs-addr 100.64.0.1 subs-port %u " + "dst-addr 1.1.1.1 dst-port 80", subs_port); + + /* + * Check 5-tuple TCP timeout + */ + + /* Forw SYN */ + cgnat_tcp(TH_SYN, "dp1T0", "aa:bb:cc:dd:1:a1", + "100.64.0.1", subs_port, "1.1.1.1", 80, + "1.1.1.11", pub_port, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + timeout = dpt_cgn_sess_get_timeout(fltr, false); + dp_test_fail_unless(timeout == CGN_DEF_ETIME_TCP_OPENING, + "Port %u, Timeout %d, expected %d", + subs_port, timeout, CGN_DEF_ETIME_TCP_OPENING); + + /* Back SYN */ + cgnat_tcp(TH_SYN | TH_ACK, "dp2T1", "aa:bb:cc:dd:2:b1", + "1.1.1.1", 80, "1.1.1.11", pub_port, + "1.1.1.1", 80, "100.64.0.1", subs_port, + "aa:bb:cc:dd:1:a1", "dp1T0", + DP_TEST_FWD_FORWARDED); + + /* Session will be in Established state now */ + timeout = dpt_cgn_sess_get_timeout(fltr, false); + dp_test_fail_unless(timeout == exp_timeout, + "Port %u, Timeout %d, expected %d", + subs_port, timeout, exp_timeout); + + /* Forw ACK */ + cgnat_tcp(TH_ACK, "dp1T0", "aa:bb:cc:dd:1:a1", + "100.64.0.1", subs_port, "1.1.1.1", 80, + "1.1.1.11", pub_port, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + timeout = dpt_cgn_sess_get_timeout(fltr, false); + dp_test_fail_unless(timeout == exp_timeout, + "Port %u, Timeout %d, expected %d", + subs_port, timeout, exp_timeout); + + /* Forw FIN */ + cgnat_tcp(TH_FIN, "dp1T0", "aa:bb:cc:dd:1:a1", + "100.64.0.1", subs_port, "1.1.1.1", 80, + "1.1.1.11", pub_port, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + timeout = dpt_cgn_sess_get_timeout(fltr, false); + dp_test_fail_unless(timeout == CGN_DEF_ETIME_TCP_CLOSING, + "Timeout %d, expected %d", + timeout, CGN_DEF_ETIME_TCP_CLOSING); + + /* Back FIN|ACK */ + cgnat_tcp(TH_FIN | TH_ACK, "dp2T1", "aa:bb:cc:dd:2:b1", + "1.1.1.1", 80, "1.1.1.11", pub_port, + "1.1.1.1", 80, "100.64.0.1", subs_port, + "aa:bb:cc:dd:1:a1", "dp1T0", + DP_TEST_FWD_FORWARDED); + + timeout = dpt_cgn_sess_get_timeout(fltr, false); + dp_test_fail_unless(timeout == CGN_DEF_ETIME_TCP_CLOSING, + "Timeout %d, expected %d", + timeout, CGN_DEF_ETIME_TCP_CLOSING); + + if (repeat >= 1 && repeat <= 2) { + char *resp = NULL; + bool err; + + subs_port++; + pub_port++; + + if (repeat == 2) { + /* First repeat */ + resp = dp_test_console_request_w_err( + "cgn-ut session-timeouts tcp-estab " + "port 80 timeout 9", + &err, false); + + /* Expected Estbd timeout is now 9 */ + exp_timeout = 9; + + } else if (repeat == 1) { + /* Second repeat */ + resp = dp_test_console_request_w_err( + "cgn-ut session-timeouts tcp-estab " + "port 80 timeout 0", + &err, false); + + /* Expected Estbd timeout is now back to default */ + exp_timeout = 7440; + } + + repeat--; + + if (!resp || err) + dp_test_fail("cgnat port timeout command failed"); + + free(resp); + goto repeat; + } + + /* Cleanup */ + cgnat_policy_del("POLICY1", 10, "dp2T1"); + + dp_test_npf_cmd_fmt(false, "nat-ut pool delete POOL1"); + +} DP_END_TEST; /* cgnat53 */ + + +/* + * cgnat_54 - + * + * Private Public + * dp1T0 +---+ dp2T1 + * 100.64.0.0/24 ----------| |--------------- 1.1.1.0/24 + * | | dp2T2 + * | |--------------- 2.2.2.0/24 + * +---+ + */ +DP_DECL_TEST_CASE(npf_cgnat, cgnat54, NULL, NULL); +DP_START_TEST(cgnat54, test) +{ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "100.64.0.254/16"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "1.1.1.254/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T2", "2.2.2.254/24"); + + dp_test_netlink_add_neigh("dp1T0", "100.64.0.1", "aa:bb:cc:dd:1:a1"); + dp_test_netlink_add_neigh("dp1T0", "100.64.0.2", "aa:bb:cc:dd:1:a2"); + + dp_test_netlink_add_neigh("dp2T1", "1.1.1.1", "aa:bb:cc:dd:2:b1"); + dp_test_netlink_add_neigh("dp2T1", "1.1.1.2", "aa:bb:cc:dd:2:b2"); + + dp_test_netlink_add_neigh("dp2T2", "2.2.2.1", "aa:bb:cc:dd:3:c1"); + dp_test_netlink_add_neigh("dp2T2", "2.2.2.2", "aa:bb:cc:dd:3:c2"); + + dpt_cgn_cmd_fmt(false, true, + "nat-ut pool add POOL1 " + "type=cgnat " + "prefix=RANGE1/10.0.1.0/24 " + ""); + + dpt_cgn_cmd_fmt(false, true, + "nat-ut pool add POOL2 " + "type=cgnat " + "prefix=RANGE1/10.0.2.0/24 " + ""); + + cgnat_policy_add("POLICY1", 10, "100.64.0.0/12", "POOL1", + "dp2T1", CGN_MAP_EIM, CGN_FLTR_EIF, CGN_3TUPLE, true); + + cgnat_policy_add("POLICY2", 10, "100.64.0.0/12", "POOL2", + "dp2T2", CGN_MAP_EIM, CGN_FLTR_EIF, CGN_3TUPLE, true); + + bool debug = false; + + dpt_cgn_print_json("cgn-op show interface", debug); + + /* + * Add routes: 3.3.3.0/24 -> dp2T1 + * 4.4.4.0/24 -> dp2T2 + */ + dp_test_netlink_add_route("3.3.3.0/24 nh 1.1.1.1 int:dp2T1"); + dp_test_netlink_add_route("4.4.4.0/24 nh 2.2.2.1 int:dp2T2"); + + /* + * 100.64.0.1 -> 3.3.3.3, routed out dp2T1 + */ + cgnat_udp("dp1T0", "aa:bb:cc:dd:1:a1", 0, + "100.64.0.1", 49152, "3.3.3.3", 80, + "10.0.1.1", 1024, "3.3.3.3", 80, + "aa:bb:cc:dd:2:b1", 0, "dp2T1", + DP_TEST_FWD_FORWARDED); + + cgnat_udp("dp2T1", "aa:bb:cc:dd:2:b1", 0, + "3.3.3.3", 80, "10.0.1.1", 1024, + "3.3.3.3", 80, "100.64.0.1", 49152, + "aa:bb:cc:dd:1:a1", 0, "dp1T0", + DP_TEST_FWD_FORWARDED); + + /* + * 100.64.0.2 -> 4.4.4.4, routed out dp2T2 + */ + cgnat_udp("dp1T0", "aa:bb:cc:dd:1:a2", 0, + "100.64.0.2", 20123, "4.4.4.4", 80, + "10.0.2.1", 1024, "4.4.4.4", 80, + "aa:bb:cc:dd:3:c1", 0, "dp2T2", + DP_TEST_FWD_FORWARDED); + + cgnat_udp("dp2T2", "aa:bb:cc:dd:3:c1", 0, + "4.4.4.4", 80, "10.0.2.1", 1024, + "4.4.4.4", 80, "100.64.0.2", 20123, + "aa:bb:cc:dd:1:a2", 0, "dp1T0", + DP_TEST_FWD_FORWARDED); + + dpt_cgn_show_session(NULL, 10, false, debug, false); + + /* + * Change route: 3.3.3.0/24 -> dp2T2 + */ + dp_test_netlink_del_route("3.3.3.0/24 nh 1.1.1.1 int:dp2T1"); + dp_test_netlink_add_route("3.3.3.0/24 nh 2.2.2.1 int:dp2T2"); + + /* + * 100.64.0.1 -> 3.3.3.3, switched from dp2T1 to dp2T2 + */ + cgnat_udp("dp1T0", "aa:bb:cc:dd:1:a1", 0, + "100.64.0.1", 49152, "3.3.3.3", 80, + "10.0.1.1", 1024, "3.3.3.3", 80, + "aa:bb:cc:dd:3:c1", 0, "dp2T2", + DP_TEST_FWD_FORWARDED); + + cgnat_udp("dp2T2", "aa:bb:cc:dd:c:c1", 0, + "3.3.3.3", 80, "10.0.1.1", 1024, + "3.3.3.3", 80, "100.64.0.1", 49152, + "aa:bb:cc:dd:1:a1", 0, "dp1T0", + DP_TEST_FWD_FORWARDED); + + /* + * What happens to a new flow from 100.64.0.1 to 3.3.3.4 .. ? + * + * Address-pool pairing means it should use the same public address, + * 10.0.1.1. But this public address is on the policy on dp2T1. + */ + cgnat_udp("dp1T0", "aa:bb:cc:dd:1:a1", 0, + "100.64.0.1", 30001, "3.3.3.4", 80, + "10.0.1.1", 1025, "3.3.3.4", 80, + "aa:bb:cc:dd:3:c1", 0, "dp2T2", + DP_TEST_FWD_FORWARDED); + + dpt_cgn_print_json("cgn-op show interface", debug); + + dpt_cgn_show_session(NULL, 10, false, debug, false); + + dp_test_netlink_del_route("3.3.3.0/24 nh 2.2.2.1 int:dp2T2"); + dp_test_netlink_del_route("4.4.4.0/24 nh 2.2.2.1 int:dp2T2"); + + /* Unconfig */ + cgnat_policy_del("POLICY1", 10, "dp2T1"); + cgnat_policy_del("POLICY2", 10, "dp2T2"); + + dp_test_npf_cmd_fmt(false, "nat-ut pool delete POOL1"); + dp_test_npf_cmd_fmt(false, "nat-ut pool delete POOL2"); + + /* Cleanup */ + dp_test_netlink_del_neigh("dp1T0", "100.64.0.1", "aa:bb:cc:dd:1:a1"); + dp_test_netlink_del_neigh("dp1T0", "100.64.0.2", "aa:bb:cc:dd:1:a2"); + + dp_test_netlink_del_neigh("dp2T1", "1.1.1.1", "aa:bb:cc:dd:2:b1"); + dp_test_netlink_del_neigh("dp2T1", "1.1.1.2", "aa:bb:cc:dd:2:b2"); + + dp_test_netlink_del_neigh("dp2T2", "2.2.2.1", "aa:bb:cc:dd:3:c1"); + dp_test_netlink_del_neigh("dp2T2", "2.2.2.2", "aa:bb:cc:dd:3:c2"); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "100.64.0.254/16"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "1.1.1.254/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T2", "2.2.2.254/24"); + +} DP_END_TEST; /* cgnat54 */ + + + + +#ifdef CGN_HASH_COMPARISON + +/****************************************************************** + * RTE hash v URCU hash + * + * Applies to tests cgnat100 and cgnat101 + */ +#define HASH_TEST_TABLE_SIZE (1024 * 128) +#define HASH_TEST_NENTRIES 100000 +#define HASH_TEST_NLOOKUPS 200000 + +/* Hash table key. Exactly 16 bytes */ +struct ipv4_3tuple { + uint32_t ifindex; + uint32_t addr; + uint16_t port; + uint16_t pad0; + uint8_t expired; + uint8_t ipproto; + uint16_t pad1; +} __attribute__((__packed__)); + +/* rte hash table entry */ +struct rte_hash_entry { + struct ipv4_3tuple key; +}; + +/* hash function used for rte and urcu */ +static uint32_t +rte_ipv4_hash(const void *data, __rte_unused uint32_t data_len, + uint32_t init_val) +{ + const struct ipv4_3tuple *k = data; + uint32_t rv; + + rv = rte_jhash_3words(k->port, k->addr, k->ipproto, k->ifindex); + return rv; +} + +/* + * match function for rte hash. Return 0 for match. + * + * Custom compare function is needed so we can ignore expired keys + */ +static int rte_hash_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return memcmp(key1, key2, key_len); +} + +struct urcu_hash_entry { + struct cds_lfht_node node; + struct ipv4_3tuple key; +}; + +/* + * match function for urcu hash. Return 1 for match + */ +static int urcu_hash_match(struct cds_lfht_node *node, const void *void_key) +{ + struct urcu_hash_entry *ue1; + const struct ipv4_3tuple *key = void_key; + + ue1 = caa_container_of(node, struct urcu_hash_entry, node); + + int rc = memcmp(&ue1->key, key, sizeof(ue1->key)); + + return rc == 0 ? 1 : 0; +} + +/* + * cgnat100 - URCU hash table + */ +DP_DECL_TEST_CASE(npf_cgnat, cgnat100, cgnat_setup, cgnat_teardown); +DP_START_TEST_DONT_RUN(cgnat100, test) +{ + struct urcu_hash_entry *ue, *lookup; + struct urcu_hash_entry base; + struct cds_lfht_iter iter; + struct cds_lfht_node *node; + struct cds_lfht *ht; + uint32_t hash; + uint i; + + /* + * URCU hash table is extendable, but we have omitted that here since + * the RTE hash table is *not* extendable. + */ + ht = cds_lfht_new(HASH_TEST_TABLE_SIZE, + HASH_TEST_TABLE_SIZE, + HASH_TEST_TABLE_SIZE, + 0, NULL); + dp_test_fail_unless(ht != NULL, "urcu hash table"); + + base.key.ifindex = 1; + base.key.addr = 0; + base.key.port = 1; + base.key.ipproto = 17; + base.key.expired = 0; + base.key.pad0 = 0; + base.key.pad1 = 0; + + /* + * Set count + */ + uint lookup_count = HASH_TEST_NLOOKUPS; + uint nentries = HASH_TEST_NENTRIES; + + printf("\n"); + printf("URCU Hash Test\n"); + + /* + * Populate table + */ + printf(" Populate table with %u entries, table size %u\n", + nentries, HASH_TEST_TABLE_SIZE); + + for (i = 0; i < nentries; i++) { + ue = zmalloc_aligned(sizeof(*ue)); + dp_test_fail_unless(ue != NULL, "malloc"); + + memcpy(&ue->key, &base.key, sizeof(ue->key)); + ue->key.addr = i; + + hash = rte_ipv4_hash(&ue->key, sizeof(ue->key), 0); + + /* Add */ + node = cds_lfht_add_unique(ht, hash, urcu_hash_match, + ue, &ue->node); + dp_test_fail_unless(node == &ue->node, "urcu add"); + + /* Lookup */ + cds_lfht_lookup(ht, hash, urcu_hash_match, &ue->key, &iter); + node = cds_lfht_iter_get_node(&iter); + lookup = caa_container_of(node, struct urcu_hash_entry, node); + dp_test_fail_unless(lookup == ue, "urcu lookup"); + } + + uint64_t nsecs1, nsecs2, elapsed; /* nanosecs */ + uint64_t average, overhead = 0; /* nanosecs */ + bool do_work = false; + struct ipv4_3tuple lookup_key; + + memcpy(&lookup_key, &base.key, sizeof(lookup_key)); + + /* + * First time around loop is to calculate the loop overhead. Second + * time does the work. We subtract the first loops time from the + * second loops time to get an approximate time for the thing we are + * interested in. + */ + printf(" Do %u table lookups\n", lookup_count); +loop1: + nsecs1 = cgn_time_nsecs(); + for (i = 0; i < lookup_count; i++) { + lookup_key.addr = i % nentries; + ue = NULL; + + if (do_work) { + hash = rte_ipv4_hash(&lookup_key, + sizeof(lookup_key), 0); + cds_lfht_lookup(ht, hash, urcu_hash_match, + &lookup_key, &iter); + node = cds_lfht_iter_get_node(&iter); + ue = caa_container_of(node, struct urcu_hash_entry, + node); + } + } + nsecs2 = cgn_time_nsecs(); + elapsed = nsecs2 - nsecs1; + + if (!do_work) { + overhead = elapsed; + do_work = true; + goto loop1; + } + + /* Subtract overhead from elapsed to get time taken by work */ + if (overhead <= elapsed) + elapsed -= overhead; + else + elapsed = 0; + + average = elapsed / i; + printf(" -------------------------------------------------\n"); + printf(" Time %lu nS, average %lu nS\n", elapsed, average); + printf(" -------------------------------------------------\n"); + + /* + * Empty table + */ + cds_lfht_for_each_entry(ht, &iter, ue, node) { + (void)cds_lfht_del(ht, &ue->node); + free(ue); + } + + cds_lfht_destroy(ht, NULL); + +} DP_END_TEST; /* cgnat100 */ + + +/* + * cgnat101 - RTE hash table + */ +DP_DECL_TEST_CASE(npf_cgnat, cgnat101, cgnat_setup, cgnat_teardown); +DP_START_TEST_DONT_RUN(cgnat101, test) +{ + struct rte_hash_entry *ue, *lookup; + struct rte_hash *ht; + uint i; + + struct rte_hash_parameters ipv4_l3fwd_hash_params = { + .name = "rte hash table", + .entries = HASH_TEST_TABLE_SIZE, + .key_len = sizeof(struct ipv4_3tuple), + .hash_func = rte_ipv4_hash, + .hash_func_init_val = 0, + .socket_id = 0, + .extra_flag = RTE_HASH_EXTRA_FLAGS_EXT_TABLE | + RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL, + }; + + /* + * LCORE_ID_ANY is -1. + * + * If RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD flag is specified when the + * hash table is created then rte_hash_add_key_data uses the value + * from rte_lcore_id() to index an array *without* first checking the + * value returned. (it is storing the entry in per-core memory). + * + * The dataplane has its own version, dp_lcore_id, which returns 0 + * instead of LCORE_ID_ANY for non-dataplane threads. + */ + if (rte_lcore_id() != LCORE_ID_ANY) { + ipv4_l3fwd_hash_params.extra_flag |= + RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD; + ipv4_l3fwd_hash_params.extra_flag |= + RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY; + } + + ht = rte_hash_create(&ipv4_l3fwd_hash_params); + dp_test_fail_unless(ht != NULL, "rte_hash_create"); + + rte_hash_set_cmp_func(ht, rte_hash_cmp_eq); + + struct urcu_hash_entry base; + + base.key.ifindex = 1; + base.key.addr = 0; + base.key.port = 1; + base.key.ipproto = 17; + base.key.expired = 0; + base.key.pad0 = 0; + base.key.pad1 = 0; + + /* + * Set count + */ + uint lookup_count = HASH_TEST_NLOOKUPS; + uint nentries = HASH_TEST_NENTRIES; + int ret; + + printf("\n"); + printf("RTE Hash Test\n"); + + /* + * Populate table + */ + printf(" Populate table with %u entries\n", nentries); + + for (i = 0; i < nentries; i++) { + ue = zmalloc_aligned(sizeof(*ue)); + + dp_test_fail_unless(ue != NULL, "malloc"); + + memcpy(&ue->key, &base.key, sizeof(ue->key)); + ue->key.addr = i; + + /* Add */ + ret = rte_hash_add_key_data(ht, (void *)&ue->key, (void *)ue); + dp_test_fail_unless(ret >= 0, + "rte_hash_add_key returned %d for entry %u", + ret, i); + + /* Verify entry has been added */ + ret = rte_hash_lookup_data(ht, (const void *)&ue->key, + (void **)&lookup); + dp_test_fail_unless(ret >= 0, + "rte_hash_lookup_data returned %d", + ret); + dp_test_fail_unless(lookup == ue, "rte lookup"); + } + + uint64_t nsecs1, nsecs2, elapsed; /* nanosecs */ + uint64_t average, overhead = 0; /* nanosecs */ + bool do_work = false; + struct ipv4_3tuple lookup_key; + + memcpy(&lookup_key, &base.key, sizeof(lookup_key)); + + /* + * First time around loop is to calculate the loop overhead. Second + * time does the work. We subtract the first loops time from the + * second loops time to get an approximate time for the thing we are + * interested in. + */ + printf(" Do %u table lookups\n", lookup_count); +loop1: + nsecs1 = cgn_time_nsecs(); + for (i = 0; i < lookup_count; i++) { + lookup_key.addr = i % nentries; + ue = NULL; + + if (do_work) { + ret = rte_hash_lookup_data(ht, + (const void *)&lookup_key, + (void **)&ue); + } + } + nsecs2 = cgn_time_nsecs(); + elapsed = nsecs2 - nsecs1; + + if (!do_work) { + overhead = elapsed; + do_work = true; + goto loop1; + } + + /* Subtract overhead from elapsed to get time taken by work */ + if (overhead <= elapsed) + elapsed -= overhead; + else + elapsed = 0; + + average = elapsed / i; + printf(" -------------------------------------------------\n"); + printf(" Time %lu nS, average %lu nS\n", elapsed, average); + printf(" -------------------------------------------------\n"); + + /* + * Empty table + */ + uint32_t iterator = 0; + + while (true) { + const void *key; + void *data; + + ret = rte_hash_iterate(ht, &key, &data, &iterator); + if (ret < 0) + break; + + ret = rte_hash_del_key(ht, (void *)key); + dp_test_fail_unless(ret >= 0, "rte_hash_del_key returned %d", + ret); + + if (ret >= 0) { + rte_hash_free_key_with_position(ht, ret); + free(data); + } + } + + rte_hash_free(ht); + +} DP_END_TEST; /* cgnat101 */ + +#endif /* CGN_HASH_COMPARISON */ + + +/********************************************************************** + * Support Functions + *********************************************************************/ + +/* + * Issue command to dataplane + */ +static void +_dpt_cgn_cmd(const char *cmd, bool print, bool exp, + const char *file, int line) +{ + char *reply; + bool err; + + reply = dp_test_console_request_w_err(cmd, &err, print); + + /* + * Returned string for npf commands is just an empty string, which is + * of no interest + */ + free(reply); + + _dp_test_fail_unless(err != exp, file, line, + "Expd %u, got %u: \"%s\"", exp, !err, cmd); +} + +#define CGN_MAX_CMD_LEN 5000 + +void +_dpt_cgn_cmd_fmt(bool print, bool exp, + const char *file, int line, const char *fmt_str, ...) +{ + char cmd[CGN_MAX_CMD_LEN]; + va_list ap; + + va_start(ap, fmt_str); + vsnprintf(cmd, CGN_MAX_CMD_LEN, fmt_str, ap); + _dpt_cgn_cmd(cmd, print, exp, file, line); + va_end(ap); +} + +/* + * This is called *after* the packet has been modified, but *before* the pkt + * queued on the tx ring is checked. + */ +static void +cgn_validate_cb(struct rte_mbuf *mbuf, struct ifnet *ifp, + struct dp_test_expected *expected, + enum dp_test_fwd_result_e fwd_result) +{ + struct cgn_ctx *ctx = dp_test_exp_get_validate_ctx(expected); + + /* call the saved check routine */ + if (ctx->do_check) { + (ctx->saved_cb)(mbuf, ifp, expected, fwd_result); + } else { + expected->pak_correct[0] = true; + expected->pak_checked[0] = true; + } +} + +static void cgnat_setup(void) +{ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "2.2.2.254/24"); + dp_test_nl_add_ip_addr_and_connected("dp1T0", "100.64.0.254/16"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "1.1.1.254/24"); + + /* + * Inside */ dp_test_netlink_add_neigh("dp1T0", "100.64.0.1", "aa:bb:cc:dd:1:a1"); dp_test_netlink_add_neigh("dp1T0", "100.64.0.2", "aa:bb:cc:dd:1:a2"); + dp_test_netlink_add_neigh("dp1T0", "100.64.0.3", + "aa:bb:cc:dd:1:a4"); dp_test_netlink_add_neigh("dp1T0", "100.64.1.1", "aa:bb:cc:dd:1:a3"); @@ -5051,6 +7272,7 @@ static void cgnat_teardown(void) /* Cleanup */ dp_test_netlink_del_neigh("dp1T0", "100.64.0.1", "aa:bb:cc:dd:1:a1"); dp_test_netlink_del_neigh("dp1T0", "100.64.0.2", "aa:bb:cc:dd:1:a2"); + dp_test_netlink_del_neigh("dp1T0", "100.64.0.3", "aa:bb:cc:dd:1:a4"); dp_test_netlink_del_neigh("dp1T0", "100.64.1.1", "aa:bb:cc:dd:1:a3"); dp_test_netlink_del_neigh("dp1T0", "2.2.2.1", "aa:bb:cc:dd:1:a4"); @@ -5085,7 +7307,7 @@ _cgnat_udp(const char *rx_intf, const char *pre_smac, int pre_vlan, struct dp_test_pkt_desc_t pre_pkt_UDP = { .text = "IPv4 UDP", .len = len, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = pre_saddr, .l2_src = pre_smac, .l3_dst = pre_daddr, @@ -5105,7 +7327,7 @@ _cgnat_udp(const char *rx_intf, const char *pre_smac, int pre_vlan, struct dp_test_pkt_desc_t post_pkt_UDP = { .text = "IPv4 UDP", .len = len, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = post_saddr, .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = post_daddr, @@ -5167,12 +7389,12 @@ _cgnat_udp(const char *rx_intf, const char *pre_smac, int pre_vlan, /* Set TOS, then reset checksum */ ip->tos = 0xc0; ip->check = 0; - ip->check = rte_ipv4_cksum((const struct ipv4_hdr *)ip); + ip->check = rte_ipv4_cksum((const struct rte_ipv4_hdr *)ip); dp_test_pktmbuf_eth_init( exp_pak, pre_pkt_UDP.l2_src, dp_test_intf_name2mac_str(rx_intf), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); } else { exp_pak = dp_test_v4_pkt_from_desc(&post_pkt_UDP); @@ -5191,7 +7413,7 @@ _cgnat_udp(const char *rx_intf, const char *pre_smac, int pre_vlan, dp_test_exp_get_pak(test_exp), post_dmac, dp_test_intf_name2mac_str(tx_intf), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); } dp_test_exp_set_fwd_status(test_exp, status); @@ -5223,7 +7445,7 @@ _cgnat_tcp(uint8_t flags, const char *rx_intf, const char *pre_smac, struct dp_test_pkt_desc_t pre_pkt_TCP = { .text = "IPv4 TCP", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = pre_saddr, .l2_src = pre_smac, .l3_dst = pre_daddr, @@ -5248,7 +7470,7 @@ _cgnat_tcp(uint8_t flags, const char *rx_intf, const char *pre_smac, struct dp_test_pkt_desc_t post_pkt_TCP = { .text = "IPv4 TCP", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = post_saddr, .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = post_daddr, @@ -5315,12 +7537,12 @@ _cgnat_tcp(uint8_t flags, const char *rx_intf, const char *pre_smac, /* Set TOS, then reset checksum */ ip->tos = 0xc0; ip->check = 0; - ip->check = rte_ipv4_cksum((const struct ipv4_hdr *)ip); + ip->check = rte_ipv4_cksum((const struct rte_ipv4_hdr *)ip); dp_test_pktmbuf_eth_init( exp_pak, pre_pkt_TCP.l2_src, dp_test_intf_name2mac_str(rx_intf), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); } else { exp_pak = dp_test_v4_pkt_from_desc(&post_pkt_TCP); @@ -5358,7 +7580,7 @@ _cgnat_icmp(uint8_t icmp_type, struct dp_test_pkt_desc_t pre_pkt_ICMP = { .text = "IPv4 ICMP", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = pre_saddr, .l2_src = pre_smac, .l3_dst = pre_daddr, @@ -5382,7 +7604,7 @@ _cgnat_icmp(uint8_t icmp_type, struct dp_test_pkt_desc_t post_pkt_ICMP = { .text = "Packet A, IPv4 ICMP", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = post_saddr, .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = post_daddr, @@ -5416,4 +7638,3 @@ _cgnat_icmp(uint8_t icmp_type, _dp_test_pak_receive(test_pak, rx_intf, test_exp, file, func, line); } - diff --git a/tests/whole_dp/src/dp_test_npf_commands.c b/tests/whole_dp/src/dp_test_npf_commands.c index 8dd0c84e..d86d5bc0 100644 --- a/tests/whole_dp/src/dp_test_npf_commands.c +++ b/tests/whole_dp/src/dp_test_npf_commands.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -19,12 +19,12 @@ #include "dp_test.h" #include "dp_test_str.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_pkt.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_lib.h" @@ -207,7 +207,7 @@ static const struct dp_test_command_t npf_cmd[] = { /* table doesn't exist */ "npf-ut fw table add ADDR_GRP2 12.0.0.1", EXP_EMPTY_STRING, - true, + false, false, }, { @@ -243,8 +243,8 @@ static const struct dp_test_command_t npf_cmd[] = { }, { "npf-ut fw table delete ADDR_GRP2", - EXP_EMPTY_STRING, - true, + "npf address-group ADDR_GRP2 not found", + false, false, }, /* cmd_npf_fw_session_log_add */ @@ -394,8 +394,22 @@ static const struct dp_test_command_t npf_cmd[] = { "{" " \"apm\":" " { \"section_size\": 512," - " \"mapping_count\": 0," - " }" + " \"protocols\":" + " [ " + " { " + " \"protocol\": \"tcp\", " + " \"mapping_count\": 0" + " }, " + " { " + " \"protocol\": \"udp\", " + " \"mapping_count\": 0" + " }, " + " { " + " \"protocol\": \"other\", " + " \"mapping_count\": 0" + " } " + " ] " + " } " "}", true, true, @@ -862,7 +876,7 @@ static const struct dp_test_command_t npf_cmd[] = { * clear the statistics, and dump the generated file */ { /* add rule to a rule group */ - "npf-ut add fw:FW1 10 action=accept proto=6 src-port=80", + "npf-ut add fw:FW1 10 action=accept proto-final=6 src-port=80", EXP_EMPTY_STRING, true, false, @@ -900,7 +914,7 @@ static const struct dp_test_command_t npf_cmd[] = { " \"bytes\":0," " \"packets\":0," " \"action\":\"pass \"," - " \"match\":\"proto 6 from any port 80 \"" + " \"match\":\"proto-final 6 from any port 80 \"" " }" " }" " }" diff --git a/tests/whole_dp/src/dp_test_npf_defrag.c b/tests/whole_dp/src/dp_test_npf_defrag.c index 31a73d5a..e3efdbd5 100644 --- a/tests/whole_dp/src/dp_test_npf_defrag.c +++ b/tests/whole_dp/src/dp_test_npf_defrag.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * @@ -21,11 +21,11 @@ #include "dp_test.h" #include "dp_test_controller.h" #include "dp_test_console.h" -#include "dp_test_netlink_state.h" -#include "dp_test_cmd_check.h" -#include "dp_test_lib.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test/dp_test_cmd_check.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_lib_exp.h" #include "dp_test_lib_pkt.h" #include "dp_test_npf_lib.h" @@ -68,6 +68,21 @@ _defrag_multi(const char *rx_intf, const char *pre_smac, int pre_vlan, _i, _j, _k, _l, _m, _n, _o, \ __FILE__, __func__, __LINE__) +static void +_defrag_duplicate(const char *rx_intf, const char *pre_smac, int pre_vlan, + const char *pre_saddr, uint16_t pre_sport, + const char *pre_daddr, uint16_t pre_dport, + const char *post_saddr, uint16_t post_sport, + const char *post_daddr, uint16_t post_dport, + const char *post_dmac, int post_vlan, const char *tx_intf, + int status, int duplicate_index, + const char *file, const char *func, int line); +#define defrag_duplicate(_a, _b, _c, _d, _e, _f, _g, _h, \ + _i, _j, _k, _l, _m, _n, _o, _p) \ + _defrag_duplicate(_a, _b, _c, _d, _e, _f, _g, _h, \ + _i, _j, _k, _l, _m, _n, _o, _p, \ + __FILE__, __func__, __LINE__) + /* * defrag1 - Tests defrag with SNAT and UDP. * @@ -87,6 +102,7 @@ DP_START_TEST(defrag1, test) .ifname = "dp2T1", .proto = IPPROTO_UDP, .map = "dynamic", + .port_alloc = NULL, .from_addr = "100.64.0.1", .from_port = NULL, .to_addr = NULL, @@ -139,7 +155,7 @@ DP_START_TEST(defrag2, test) .rule = "10", .pass = PASS, .stateful = false, - .npf = "proto=17" + .npf = "proto-final=17" }, RULE_DEF_BLOCK, NULL_RULE @@ -199,7 +215,7 @@ DP_START_TEST(defrag3, test) .rule = "10", .pass = PASS, .stateful = false, - .npf = "proto=17" + .npf = "proto-final=17" }, RULE_DEF_BLOCK, NULL_RULE @@ -260,13 +276,13 @@ DP_START_TEST(defrag4, test) .rule = "10", .pass = PASS, .stateful = false, - .npf = "proto=17" + .npf = "proto-final=17" }, { .rule = "20", .pass = PASS, .stateful = false, - .npf = "proto=6" + .npf = "proto-final=6" }, RULE_DEF_BLOCK, NULL_RULE @@ -307,6 +323,78 @@ DP_START_TEST(defrag4, test) } DP_END_TEST; +DP_DECL_TEST_CASE(npf_defrag, defrag5, defrag_setup, defrag_teardown); +DP_START_TEST(defrag5, test) +{ + struct dp_test_npf_rule_t rset[] = { + { + .rule = "10", + .pass = PASS, + .stateful = false, + .npf = "proto-final=17" + }, + { + .rule = "20", + .pass = PASS, + .stateful = false, + .npf = "proto-final=6" + }, + RULE_DEF_BLOCK, + NULL_RULE + }; + + struct dp_test_npf_ruleset_t fw = { + .rstype = "fw-in", + .name = "IN_FW", + .enable = 1, + .attach_point = "dp1T0", + .fwd = FWD, + .dir = "in", + .rules = rset + }; + + dp_test_npf_fw_add(&fw, false); + + /* Check defrag feature is enabled */ + dp_test_wait_for_pl_feat("dp1T0", "vyatta:ipv4-defrag-in", + "ipv4-validate"); + dp_test_wait_for_pl_feat("dp1T0", "vyatta:ipv4-defrag-out", + "ipv4-out"); + + defrag_duplicate("dp1T0", "aa:bb:cc:dd:1:a1", 0, + "100.64.0.1", 49152, "1.1.1.1", 80, + "100.64.0.1", 49152, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", 0, "dp2T1", + DP_TEST_FWD_FORWARDED, 0); + + defrag_duplicate("dp1T0", "aa:bb:cc:dd:1:a1", 0, + "100.64.0.1", 49152, "1.1.1.1", 80, + "100.64.0.1", 49152, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", 0, "dp2T1", + DP_TEST_FWD_FORWARDED, 1); + + defrag_duplicate("dp1T0", "aa:bb:cc:dd:1:a1", 0, + "100.64.0.1", 49152, "1.1.1.1", 80, + "100.64.0.1", 49152, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", 0, "dp2T1", + DP_TEST_FWD_FORWARDED, 2); + + defrag_duplicate("dp1T0", "aa:bb:cc:dd:1:a1", 0, + "100.64.0.1", 49152, "1.1.1.1", 80, + "100.64.0.1", 49152, "1.1.1.1", 80, + "aa:bb:cc:dd:2:b1", 0, "dp2T1", + DP_TEST_FWD_FORWARDED, 3); + + /* Cleanup */ + dp_test_npf_fw_del(&fw, false); + + /* Check defrag feature is disabled */ + dp_test_wait_for_pl_feat_gone("dp1T0", "vyatta:ipv4-defrag-in", + "ipv4-validate"); + dp_test_wait_for_pl_feat_gone("dp1T0", "vyatta:ipv4-defrag-out", + "ipv4-out"); + +} DP_END_TEST; static void defrag_setup(void) { @@ -371,7 +459,7 @@ _defrag_udp(const char *rx_intf, const char *pre_smac, int pre_vlan, struct dp_test_pkt_desc_t pre_pkt_UDP = { .text = "IPv4 UDP", .len = len, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = pre_saddr, .l2_src = pre_smac, .l3_dst = pre_daddr, @@ -391,7 +479,7 @@ _defrag_udp(const char *rx_intf, const char *pre_smac, int pre_vlan, struct dp_test_pkt_desc_t post_pkt_UDP = { .text = "IPv4 UDP", .len = len, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = post_saddr, .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = post_daddr, @@ -514,7 +602,7 @@ _defrag_send_frag(struct rte_mbuf *frag, _dp_test_pak_receive(frag, pkt->rx_intf, test_exp, file, func, line); } -#define defrag_send_frag(_a, _b, _c) \ +#define defrag_send_frag(_a, _b, _c) \ _defrag_send_frag(_a, _b, _c, __FILE__, __func__, __LINE__) @@ -538,7 +626,7 @@ _defrag_multi(const char *rx_intf, const char *pre_smac, int pre_vlan, struct dp_test_pkt_desc_t pkt_UDP = { .text = "IPv4 UDP", .len = 1200, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = pre_saddr, .l2_src = pre_smac, .l3_dst = pre_daddr, @@ -558,7 +646,7 @@ _defrag_multi(const char *rx_intf, const char *pre_smac, int pre_vlan, struct dp_test_pkt_desc_t pkt_TCP = { .text = "IPv4 TCP", .len = 1204, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = pre_saddr, .l2_src = pre_smac, .l3_dst = pre_daddr, @@ -617,3 +705,79 @@ _defrag_multi(const char *rx_intf, const char *pre_smac, int pre_vlan, /* Last TCP fragment */ defrag_send_frag(frag_pkts2[2], &pkt_TCP, DP_TEST_FWD_FORWARDED); } + + + +/* + * defrag_multi + * + * Interleave two sets of packet fragments. Only difference in the packets is + * the protocol (TCP and UDP). + */ +static void +_defrag_duplicate(const char *rx_intf, const char *pre_smac, int pre_vlan, + const char *pre_saddr, uint16_t pre_sport, + const char *pre_daddr, uint16_t pre_dport, + const char *post_saddr, uint16_t post_sport, + const char *post_daddr, uint16_t post_dport, + const char *post_dmac, int post_vlan, const char *tx_intf, + int status, int duplicate_index, + const char *file, const char *func, int line) +{ + /* IPv4 UDP packet */ + struct dp_test_pkt_desc_t pkt_UDP = { + .text = "IPv4 UDP", + .len = 1200, + .ether_type = RTE_ETHER_TYPE_IPV4, + .l3_src = pre_saddr, + .l2_src = pre_smac, + .l3_dst = pre_daddr, + .l2_dst = "aa:bb:cc:dd:2:b1", + .proto = IPPROTO_UDP, + .l4 = { + .udp = { + .sport = pre_sport, + .dport = pre_dport + } + }, + .rx_intf = rx_intf, + .tx_intf = tx_intf + }; + enum dp_test_fwd_result_e fwd_status = DP_TEST_FWD_DROPPED; + + /* Fragment UDP test pak */ + struct rte_mbuf *frag_pkts1[4] = { 0 }; + uint16_t frag_sizes1[4] = { 400, 400, 400, 8 }; + + defrag_create_frags(&pkt_UDP, frag_pkts1, frag_sizes1, 4); + + /* Array indices to get an in order packet are: 3,0,1,2 */ + + /* Send the duplicate first */ + _defrag_send_frag(frag_pkts1[duplicate_index], + &pkt_UDP, fwd_status, + file, func, line); + + /* First packet - start of packet */ + _defrag_send_frag(frag_pkts1[3], &pkt_UDP, fwd_status, + file, func, line); + + /* 2nd UDP fragment */ + _defrag_send_frag(frag_pkts1[0], &pkt_UDP, fwd_status, + file, func, line); + + /* 3rd UDP fragment */ + if (duplicate_index == 2) + fwd_status = DP_TEST_FWD_FORWARDED; + _defrag_send_frag(frag_pkts1[1], &pkt_UDP, fwd_status, + file, func, line); + + /* Last UDP fragment */ + if (duplicate_index == 2) + fwd_status = DP_TEST_FWD_DROPPED; + else + fwd_status = DP_TEST_FWD_FORWARDED; + _defrag_send_frag(frag_pkts1[2], &pkt_UDP, fwd_status, + file, func, line); +} + diff --git a/tests/whole_dp/src/dp_test_npf_dscp.c b/tests/whole_dp/src/dp_test_npf_dscp.c index cd9d4cba..b52bf65f 100644 --- a/tests/whole_dp/src/dp_test_npf_dscp.c +++ b/tests/whole_dp/src/dp_test_npf_dscp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * @@ -16,12 +16,12 @@ #include "dp_test.h" #include "dp_test_str.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" #include "dp_test_lib_pkt.h" -#include "dp_test_lib_intf.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_lib.h" @@ -138,7 +138,7 @@ DP_START_TEST(dscp_ipv4, single_dscp) struct dp_test_pkt_desc_t v4_pkt = { .text = "DSCP IPv4 single", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.11", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2.2.2.11", @@ -245,7 +245,7 @@ DP_START_TEST(dscp_ipv6, single_dscp) struct dp_test_pkt_desc_t v6_pkt = { .text = "ICMP IPv6", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:1:1::2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2002:2:2::1", @@ -464,7 +464,7 @@ DP_START_TEST(dscp_ipv4, group_dscp) struct dp_test_pkt_desc_t v4_pkt = { .text = "DSCP IPv4", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.11", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2.2.2.11", @@ -584,7 +584,7 @@ DP_START_TEST(dscp_ipv6, group_dscp) struct dp_test_pkt_desc_t v6_pkt = { .text = "ICMP IPv6", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:1:1::2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2002:2:2::1", diff --git a/tests/whole_dp/src/dp_test_npf_feat.c b/tests/whole_dp/src/dp_test_npf_feat.c index 77a98e83..7635602f 100644 --- a/tests/whole_dp/src/dp_test_npf_feat.c +++ b/tests/whole_dp/src/dp_test_npf_feat.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -22,12 +22,13 @@ #include "dp_test.h" #include "dp_test_str.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" #include "dp_test_lib_pkt.h" -#include "dp_test_lib_intf.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" + #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_lib.h" @@ -143,6 +144,31 @@ _dp_test_wait_for_pl_fw(const char *ifname, bool exp_gone, __FILE__, __func__, __LINE__) \ +static void +_dp_test_wait_for_pl_nat64(const char *ifname, bool exp_gone, + const char *file, const char *func, int line) +{ + _dp_test_wait_for_pl_feat(ifname, "vyatta:ipv4-nat46-in", + "ipv4-validate", exp_gone, + file, func, line); + + _dp_test_wait_for_pl_feat(ifname, "vyatta:ipv6-nat64-in", + "ipv6-validate", exp_gone, + file, func, line); + + _dp_test_wait_for_pl_feat(ifname, "vyatta:ipv4-nat64-out", + "ipv4-out", exp_gone, + file, func, line); + + _dp_test_wait_for_pl_feat(ifname, "vyatta:ipv6-nat46-out", + "ipv6-out", exp_gone, + file, func, line); +} +#define dp_test_wait_for_pl_nat64(_intf, _gone) \ + _dp_test_wait_for_pl_nat64(_intf, _gone, \ + __FILE__, __func__, __LINE__) \ + + DP_DECL_TEST_SUITE(npf_feat); /* @@ -173,13 +199,64 @@ DP_START_TEST(npf_feat1, test) } DP_END_TEST; +/* + * npf_feat2 - zone ruleset + * + * make -j4 dataplane_test_run CK_RUN_CASE=npf_feat2 + */ +DP_DECL_TEST_CASE(npf_feat, npf_feat2, NULL, NULL); +DP_START_TEST(npf_feat2, test) +{ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp1T1", "2.2.2.2/24"); + dp_test_nl_add_ip_addr_and_connected("dp1T2", "3.3.3.3/24"); + + /* Add zone with member intfs dp1T0 and dp1T1 */ + struct dpt_zone_cfg cfg = { + .private = { + .name = "PRIVATE", + .intf = { "dp1T0", "dp1T1", NULL }, + .local = false, + }, + .public = { NULL, { NULL, NULL, NULL }, false }, + .local = { 0 }, + .pub_to_priv = { 0 }, + .priv_to_pub = { 0 }, + .local_to_priv = { 0 }, + .priv_to_local = { 0 }, + .local_to_pub = { 0 }, + .pub_to_local = { 0 }, + }; + + dpt_zone_cfg(&cfg, true, false); + + dp_test_wait_for_pl_defrag("dp1T0", EXP_PRESENT); + dp_test_wait_for_pl_fw("dp1T0", EXP_PRESENT); + + dp_test_wait_for_pl_defrag("dp1T2", EXP_PRESENT); + dp_test_wait_for_pl_fw("dp1T2", EXP_PRESENT); + + dpt_zone_cfg(&cfg, false, false); + + dp_test_wait_for_pl_defrag("dp1T0", EXP_GONE); + dp_test_wait_for_pl_fw("dp1T0", EXP_GONE); + + dp_test_wait_for_pl_defrag("dp1T2", EXP_GONE); + dp_test_wait_for_pl_fw("dp1T2", EXP_GONE); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp1T1", "2.2.2.2/24"); + dp_test_nl_del_ip_addr_and_connected("dp1T2", "3.3.3.3/24"); + +} DP_END_TEST; + /* * npf_feat3 - nat64 ruleset * * make -j4 dataplane_test_run CK_RUN_CASE=npf_feat3 */ DP_DECL_TEST_CASE(npf_feat, npf_feat3, NULL, NULL); -DP_START_TEST(npf_feat3, test) +DP_START_TEST_FULL_RUN(npf_feat3, test) { dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:101:1::a0a:1fe/96"); dp_test_nl_add_ip_addr_and_connected("dp1T1", "2002:101:1::a0a:1fe/96"); @@ -188,18 +265,22 @@ DP_START_TEST(npf_feat3, test) npf_feat_nat64_ruleset("dp1T0", "N64_GROUP1", ACTION_ADD); dp_test_wait_for_pl_defrag("dp1T0", EXP_PRESENT); - dp_test_wait_for_pl_fw("dp1T0", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T0", EXP_PRESENT); dp_test_wait_for_pl_defrag("dp1T1", EXP_PRESENT); - dp_test_wait_for_pl_fw("dp1T1", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T1", EXP_PRESENT); + + /* Firewall should not be present */ + dp_test_wait_for_pl_fw("dp1T0", EXP_GONE); + dp_test_wait_for_pl_fw("dp1T1", EXP_GONE); npf_feat_nat64_ruleset("dp1T0", "N64_GROUP1", ACTION_DEL); dp_test_wait_for_pl_defrag("dp1T0", EXP_GONE); - dp_test_wait_for_pl_fw("dp1T0", EXP_GONE); + dp_test_wait_for_pl_nat64("dp1T0", EXP_GONE); dp_test_wait_for_pl_defrag("dp1T1", EXP_GONE); - dp_test_wait_for_pl_fw("dp1T1", EXP_GONE); + dp_test_wait_for_pl_nat64("dp1T1", EXP_GONE); dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:101:1::a0a:1fe/96"); dp_test_nl_del_ip_addr_and_connected("dp1T1", "2002:101:1::a0a:1fe/96"); @@ -207,12 +288,13 @@ DP_START_TEST(npf_feat3, test) } DP_END_TEST; /* - * npf_feat4 - firewall and nat64 rulesets + * npf_feat4a - firewall and nat64 rulesets. Remove nat64 first then check + * firewall still enabled. * * make -j4 dataplane_test_run CK_RUN_CASE=npf_feat4 */ -DP_DECL_TEST_CASE(npf_feat, npf_feat4, NULL, NULL); -DP_START_TEST(npf_feat4, test) +DP_DECL_TEST_CASE(npf_feat, npf_feat4a, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_feat4a, test) { dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:101:1::a0a:1fe/96"); dp_test_nl_add_ip_addr_and_connected("dp1T1", "2002:101:1::a0a:1fe/96"); @@ -226,9 +308,11 @@ DP_START_TEST(npf_feat4, test) dp_test_wait_for_pl_defrag("dp1T0", EXP_PRESENT); dp_test_wait_for_pl_fw("dp1T0", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T0", EXP_PRESENT); dp_test_wait_for_pl_defrag("dp1T1", EXP_PRESENT); - dp_test_wait_for_pl_fw("dp1T1", EXP_PRESENT); + dp_test_wait_for_pl_fw("dp1T1", EXP_GONE); /* No fw on dp1T1 */ + dp_test_wait_for_pl_nat64("dp1T1", EXP_PRESENT); /* * Remove nat64 from dp1T0. defrag and fw features should still be @@ -238,9 +322,11 @@ DP_START_TEST(npf_feat4, test) dp_test_wait_for_pl_defrag("dp1T0", EXP_PRESENT); dp_test_wait_for_pl_fw("dp1T0", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T0", EXP_GONE); dp_test_wait_for_pl_defrag("dp1T1", EXP_GONE); dp_test_wait_for_pl_fw("dp1T1", EXP_GONE); + dp_test_wait_for_pl_nat64("dp1T1", EXP_GONE); /* * Remove firewall from dp1T0 @@ -258,13 +344,68 @@ DP_START_TEST(npf_feat4, test) dp_test_nl_del_ip_addr_and_connected("dp1T2", "3.3.3.3/24"); } DP_END_TEST; +/* + * npf_feat4b - firewall and nat64 rulesets. Remove nat64 first then check + * firewall still enabled. + * + * make -j4 dataplane_test_run CK_RUN_CASE=npf_feat4 + */ +DP_DECL_TEST_CASE(npf_feat, npf_feat4b, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_feat4b, test) +{ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:101:1::a0a:1fe/96"); + dp_test_nl_add_ip_addr_and_connected("dp1T1", "2002:101:1::a0a:1fe/96"); + dp_test_nl_add_ip_addr_and_connected("dp1T2", "3.3.3.3/24"); + + /* + * Add nat64 and firewall to dp1T0 + */ + npf_feat_fw_ruleset("dp1T0", "FW_GROUP1", true, ACTION_ADD); + npf_feat_nat64_ruleset("dp1T0", "N64_GROUP1", ACTION_ADD); + + dp_test_wait_for_pl_defrag("dp1T0", EXP_PRESENT); + dp_test_wait_for_pl_fw("dp1T0", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T0", EXP_PRESENT); + + dp_test_wait_for_pl_defrag("dp1T1", EXP_PRESENT); + dp_test_wait_for_pl_fw("dp1T1", EXP_GONE); /* No fw on dp1T1 */ + dp_test_wait_for_pl_nat64("dp1T1", EXP_PRESENT); + + /* + * Remove firewall from dp1T0. defrag and nat64 features should still + * be present on dp1T0 + */ + npf_feat_fw_ruleset("dp1T0", "FW_GROUP1", true, ACTION_DEL); + + dp_test_wait_for_pl_defrag("dp1T0", EXP_PRESENT); + dp_test_wait_for_pl_fw("dp1T0", EXP_GONE); + dp_test_wait_for_pl_nat64("dp1T0", EXP_PRESENT); + + /* + * Remove nat64 from dp1T0. + */ + npf_feat_nat64_ruleset("dp1T0", "N64_GROUP1", ACTION_DEL); + + dp_test_wait_for_pl_defrag("dp1T0", EXP_GONE); + dp_test_wait_for_pl_fw("dp1T0", EXP_GONE); + dp_test_wait_for_pl_nat64("dp1T0", EXP_GONE); + + dp_test_wait_for_pl_defrag("dp1T1", EXP_GONE); + dp_test_wait_for_pl_fw("dp1T1", EXP_GONE); + dp_test_wait_for_pl_nat64("dp1T1", EXP_GONE); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:101:1::a0a:1fe/96"); + dp_test_nl_del_ip_addr_and_connected("dp1T1", "2002:101:1::a0a:1fe/96"); + dp_test_nl_del_ip_addr_and_connected("dp1T2", "3.3.3.3/24"); +} DP_END_TEST; + /* * npf_feat5 - nat64 ruleset * * make -j4 dataplane_test_run CK_RUN_CASE=npf_feat5 */ DP_DECL_TEST_CASE(npf_feat, npf_feat5, NULL, NULL); -DP_START_TEST(npf_feat5, test) +DP_START_TEST_FULL_RUN(npf_feat5, test) { dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:101:1::a0a:1fe/96"); dp_test_nl_add_ip_addr_and_connected("dp1T1", "2002:101:1::a0a:1fe/96"); @@ -273,10 +414,10 @@ DP_START_TEST(npf_feat5, test) npf_feat_nat64_ruleset("dp1T0", "N64_GROUP1", ACTION_ADD); dp_test_wait_for_pl_defrag("dp1T0", EXP_PRESENT); - dp_test_wait_for_pl_fw("dp1T0", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T0", EXP_PRESENT); dp_test_wait_for_pl_defrag("dp1T1", EXP_PRESENT); - dp_test_wait_for_pl_fw("dp1T1", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T1", EXP_PRESENT); /* Create an incomplete vif interface */ dp_test_intf_vif_create_incmpl("dp1T1.100", "dp1T1", 100); @@ -287,7 +428,7 @@ DP_START_TEST(npf_feat5, test) /* Check features on vif interface */ dp_test_wait_for_pl_defrag("dp1T1.100", EXP_PRESENT); - dp_test_wait_for_pl_fw("dp1T1.100", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T1.100", EXP_PRESENT); /* Remove vif interface */ dp_test_nl_del_ip_addr_and_connected("dp1T1.100", "2.2.2.2/24"); @@ -296,12 +437,218 @@ DP_START_TEST(npf_feat5, test) npf_feat_nat64_ruleset("dp1T0", "N64_GROUP1", ACTION_DEL); + dp_test_wait_for_pl_defrag("dp1T0", EXP_GONE); + dp_test_wait_for_pl_nat64("dp1T0", EXP_GONE); + + dp_test_wait_for_pl_defrag("dp1T1", EXP_GONE); + dp_test_wait_for_pl_nat64("dp1T1", EXP_GONE); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:101:1::a0a:1fe/96"); + dp_test_nl_del_ip_addr_and_connected("dp1T1", "2002:101:1::a0a:1fe/96"); + dp_test_nl_del_ip_addr_and_connected("dp1T2", "3.3.3.3/24"); +} DP_END_TEST; + +/* + * npf_feat6 - nat64 and zone rulesets + * + * make -j4 dataplane_test_run CK_RUN_CASE=npf_feat6 + */ +DP_DECL_TEST_CASE(npf_feat, npf_feat6, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_feat6, test) +{ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:101:1::a0a:1fe/96"); + dp_test_nl_add_ip_addr_and_connected("dp1T1", "2002:101:1::a0a:1fe/96"); + dp_test_nl_add_ip_addr_and_connected("dp1T2", "3.3.3.3/24"); + + npf_feat_nat64_ruleset("dp1T0", "N64_GROUP1", ACTION_ADD); + + /* + * Add zone with member intfs dp1T0 and dp1T1 + */ + struct dpt_zone_cfg cfg = { + .private = { + .name = "PRIVATE", + .intf = { "dp1T0", "dp1T1", NULL }, + .local = false, + }, + .public = { NULL, { NULL, NULL, NULL }, false }, + .local = { 0 }, + .pub_to_priv = { 0 }, + .priv_to_pub = { 0 }, + .local_to_priv = { 0 }, + .priv_to_local = { 0 }, + .local_to_pub = { 0 }, + .pub_to_local = { 0 }, + }; + + dpt_zone_cfg(&cfg, true, false); + + /* + * Check fw and defrag are enabled on zone and non-zone interfaces. + */ + dp_test_wait_for_pl_defrag("dp1T0", EXP_PRESENT); + dp_test_wait_for_pl_fw("dp1T0", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T0", EXP_PRESENT); + + dp_test_wait_for_pl_defrag("dp1T1", EXP_PRESENT); + dp_test_wait_for_pl_fw("dp1T1", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T1", EXP_PRESENT); + + dp_test_wait_for_pl_defrag("dp1T2", EXP_PRESENT); + dp_test_wait_for_pl_fw("dp1T2", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T2", EXP_PRESENT); + + /* + * Create vif interface + */ + dp_test_intf_vif_create_incmpl("dp1T1.100", "dp1T1", 100); + dp_test_intf_vif_create_incmpl_fin("dp1T1.100", "dp1T1", 100); + dp_test_nl_add_ip_addr_and_connected("dp1T1.100", "2.2.2.2/24"); + + /* + * Check fw and defrag features are enabled on vif interface + */ + dp_test_wait_for_pl_defrag("dp1T1.100", EXP_PRESENT); + dp_test_wait_for_pl_fw("dp1T1.100", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T1.100", EXP_PRESENT); + + /* + * Remove vif interface + */ + dp_test_nl_del_ip_addr_and_connected("dp1T1.100", "2.2.2.2/24"); + dp_test_intf_vif_del("dp1T1.100", 100); + + /* Remove nat64 ruleset */ + npf_feat_nat64_ruleset("dp1T0", "N64_GROUP1", ACTION_DEL); + + /* Check features still enabled */ + dp_test_wait_for_pl_defrag("dp1T0", EXP_PRESENT); + dp_test_wait_for_pl_fw("dp1T0", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T0", EXP_GONE); + + dp_test_wait_for_pl_defrag("dp1T1", EXP_PRESENT); + dp_test_wait_for_pl_fw("dp1T1", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T1", EXP_GONE); + + dp_test_wait_for_pl_defrag("dp1T2", EXP_PRESENT); + dp_test_wait_for_pl_fw("dp1T2", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T2", EXP_GONE); + + /* Remove zone */ + dpt_zone_cfg(&cfg, false, false); + + /* Check features are now gone */ + dp_test_wait_for_pl_defrag("dp1T0", EXP_GONE); + dp_test_wait_for_pl_fw("dp1T0", EXP_GONE); + + dp_test_wait_for_pl_defrag("dp1T1", EXP_GONE); + dp_test_wait_for_pl_fw("dp1T1", EXP_GONE); + + dp_test_wait_for_pl_defrag("dp1T2", EXP_GONE); + dp_test_wait_for_pl_fw("dp1T2", EXP_GONE); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:101:1::a0a:1fe/96"); + dp_test_nl_del_ip_addr_and_connected("dp1T1", "2002:101:1::a0a:1fe/96"); + dp_test_nl_del_ip_addr_and_connected("dp1T2", "3.3.3.3/24"); +} DP_END_TEST; + +/* + * npf_feat7 - nat64 and zone rulesets + * + * make -j4 dataplane_test_run CK_RUN_CASE=npf_feat7 + */ +DP_DECL_TEST_CASE(npf_feat, npf_feat7, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_feat7, test) +{ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:101:1::a0a:1fe/96"); + dp_test_nl_add_ip_addr_and_connected("dp1T1", "2002:101:1::a0a:1fe/96"); + dp_test_nl_add_ip_addr_and_connected("dp1T2", "3.3.3.3/24"); + + /* + * Create vif interface *before* nat64 and zone config + */ + dp_test_intf_vif_create_incmpl("dp1T1.100", "dp1T1", 100); + dp_test_intf_vif_create_incmpl_fin("dp1T1.100", "dp1T1", 100); + dp_test_nl_add_ip_addr_and_connected("dp1T1.100", "2.2.2.2/24"); + + /* Add nat64 config */ + npf_feat_nat64_ruleset("dp1T0", "N64_GROUP1", ACTION_ADD); + + /* + * Add zone with member intfs dp1T0 and dp1T1.100 + */ + struct dpt_zone_cfg cfg = { + .private = { + .name = "PRIVATE", + .intf = { "dp1T0", "dp1T1.100", NULL }, + .local = false, + }, + .public = { NULL, { NULL, NULL, NULL }, false }, + .local = { 0 }, + .pub_to_priv = { 0 }, + .priv_to_pub = { 0 }, + .local_to_priv = { 0 }, + .priv_to_local = { 0 }, + .local_to_pub = { 0 }, + .pub_to_local = { 0 }, + }; + + dpt_zone_cfg(&cfg, true, false); + + /* + * Check fw and defrag are enabled on zone and non-zone interfaces. + */ + dp_test_wait_for_pl_defrag("dp1T0", EXP_PRESENT); + dp_test_wait_for_pl_fw("dp1T0", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T0", EXP_PRESENT); + + dp_test_wait_for_pl_defrag("dp1T1", EXP_PRESENT); + dp_test_wait_for_pl_fw("dp1T1", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T1", EXP_PRESENT); + + dp_test_wait_for_pl_defrag("dp1T2", EXP_PRESENT); + dp_test_wait_for_pl_fw("dp1T2", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T2", EXP_PRESENT); + + dp_test_wait_for_pl_defrag("dp1T1.100", EXP_PRESENT); + dp_test_wait_for_pl_fw("dp1T1.100", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T1.100", EXP_PRESENT); + + /* + * Remove vif interface + */ + dp_test_nl_del_ip_addr_and_connected("dp1T1.100", "2.2.2.2/24"); + dp_test_intf_vif_del("dp1T1.100", 100); + + /* Remove nat64 ruleset */ + npf_feat_nat64_ruleset("dp1T0", "N64_GROUP1", ACTION_DEL); + + /* Check features still enabled */ + dp_test_wait_for_pl_defrag("dp1T0", EXP_PRESENT); + dp_test_wait_for_pl_fw("dp1T0", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T0", EXP_GONE); + + dp_test_wait_for_pl_defrag("dp1T1", EXP_PRESENT); + dp_test_wait_for_pl_fw("dp1T1", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T1", EXP_GONE); + + dp_test_wait_for_pl_defrag("dp1T2", EXP_PRESENT); + dp_test_wait_for_pl_fw("dp1T2", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T2", EXP_GONE); + + /* Remove zone */ + dpt_zone_cfg(&cfg, false, false); + + /* Check features are now gone */ dp_test_wait_for_pl_defrag("dp1T0", EXP_GONE); dp_test_wait_for_pl_fw("dp1T0", EXP_GONE); dp_test_wait_for_pl_defrag("dp1T1", EXP_GONE); dp_test_wait_for_pl_fw("dp1T1", EXP_GONE); + dp_test_wait_for_pl_defrag("dp1T2", EXP_GONE); + dp_test_wait_for_pl_fw("dp1T2", EXP_GONE); + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:101:1::a0a:1fe/96"); dp_test_nl_del_ip_addr_and_connected("dp1T1", "2002:101:1::a0a:1fe/96"); dp_test_nl_del_ip_addr_and_connected("dp1T2", "3.3.3.3/24"); @@ -313,7 +660,7 @@ DP_START_TEST(npf_feat5, test) * make -j4 dataplane_test_run CK_RUN_CASE=npf_feat8 */ DP_DECL_TEST_CASE(npf_feat, npf_feat8, NULL, NULL); -DP_START_TEST(npf_feat8, test) +DP_START_TEST_FULL_RUN(npf_feat8, test) { uint vrfid = 69; @@ -339,13 +686,16 @@ DP_START_TEST(npf_feat8, test) dp_test_wait_for_pl_defrag("dp1T0", EXP_PRESENT); dp_test_wait_for_pl_fw("dp1T0", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T0", EXP_PRESENT); dp_test_wait_for_pl_defrag("dp1T2", EXP_PRESENT); - dp_test_wait_for_pl_fw("dp1T2", EXP_PRESENT); + dp_test_wait_for_pl_fw("dp1T2", EXP_GONE); + dp_test_wait_for_pl_nat64("dp1T2", EXP_PRESENT); /* Check features on interface in different vrf */ dp_test_wait_for_pl_defrag("dp1T1", EXP_PRESENT); - dp_test_wait_for_pl_fw("dp1T1", EXP_PRESENT); + dp_test_wait_for_pl_fw("dp1T1", EXP_GONE); + dp_test_wait_for_pl_nat64("dp1T1", EXP_PRESENT); /* * Remove nat64 from dp1T0. defrag and fw features should still be @@ -355,12 +705,15 @@ DP_START_TEST(npf_feat8, test) dp_test_wait_for_pl_defrag("dp1T0", EXP_PRESENT); dp_test_wait_for_pl_fw("dp1T0", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T0", EXP_GONE); dp_test_wait_for_pl_defrag("dp1T2", EXP_GONE); dp_test_wait_for_pl_fw("dp1T2", EXP_GONE); + dp_test_wait_for_pl_nat64("dp1T2", EXP_GONE); dp_test_wait_for_pl_defrag("dp1T1", EXP_GONE); dp_test_wait_for_pl_fw("dp1T1", EXP_GONE); + dp_test_wait_for_pl_nat64("dp1T1", EXP_GONE); /* * Remove firewall from dp1T0 @@ -391,7 +744,7 @@ DP_START_TEST(npf_feat8, test) * make -j4 dataplane_test_run CK_RUN_CASE=npf_feat9 */ DP_DECL_TEST_CASE(npf_feat, npf_feat9, NULL, NULL); -DP_START_TEST(npf_feat9, test) +DP_START_TEST_FULL_RUN(npf_feat9, test) { uint vrfid = 69; @@ -417,9 +770,10 @@ DP_START_TEST(npf_feat9, test) dp_test_wait_for_pl_defrag("dp1T0", EXP_PRESENT); dp_test_wait_for_pl_fw("dp1T0", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T0", EXP_PRESENT); dp_test_wait_for_pl_defrag("dp1T2", EXP_PRESENT); - dp_test_wait_for_pl_fw("dp1T2", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T2", EXP_PRESENT); /* * Delete vrf and set ints back to default @@ -434,9 +788,10 @@ DP_START_TEST(npf_feat9, test) dp_test_wait_for_pl_defrag("dp1T0", EXP_PRESENT); dp_test_wait_for_pl_fw("dp1T0", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T0", EXP_PRESENT); dp_test_wait_for_pl_defrag("dp1T2", EXP_PRESENT); - dp_test_wait_for_pl_fw("dp1T2", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T2", EXP_PRESENT); /* * Remove nat64 from dp1T0. defrag and fw features should still be @@ -446,9 +801,10 @@ DP_START_TEST(npf_feat9, test) dp_test_wait_for_pl_defrag("dp1T0", EXP_PRESENT); dp_test_wait_for_pl_fw("dp1T0", EXP_PRESENT); + dp_test_wait_for_pl_nat64("dp1T0", EXP_GONE); dp_test_wait_for_pl_defrag("dp1T2", EXP_GONE); - dp_test_wait_for_pl_fw("dp1T2", EXP_GONE); + dp_test_wait_for_pl_nat64("dp1T2", EXP_GONE); /* * Remove firewall from dp1T0 @@ -468,12 +824,12 @@ DP_START_TEST(npf_feat9, test) /* - * npf_feat10 - firewall and nat64 rulesets, VRF 69 + * npf_feat10 - firewall rulesets, VRF 69 * * make -j4 dataplane_test_run CK_RUN_CASE=npf_feat10 */ DP_DECL_TEST_CASE(npf_feat, npf_feat10, NULL, NULL); -DP_START_TEST(npf_feat10, test) +DP_START_TEST_FULL_RUN(npf_feat10, test) { uint vrfid = 69; @@ -531,7 +887,7 @@ DP_START_TEST(npf_feat10, test) * make -j4 dataplane_test_run CK_RUN_CASE=npf_feat11 */ DP_DECL_TEST_CASE(npf_feat, npf_feat11, NULL, NULL); -DP_START_TEST(npf_feat11, test) +DP_START_TEST_FULL_RUN(npf_feat11, test) { struct dp_test_expected *exp; struct rte_mbuf *test_pak; @@ -563,7 +919,7 @@ DP_START_TEST(npf_feat11, test) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -573,7 +929,7 @@ DP_START_TEST(npf_feat11, test) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str1, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); @@ -587,7 +943,7 @@ DP_START_TEST(npf_feat11, test) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -597,7 +953,7 @@ DP_START_TEST(npf_feat11, test) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str2, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); @@ -616,3 +972,138 @@ DP_START_TEST(npf_feat11, test) } DP_END_TEST; +/* + * Test that the ipv4-fw-orig feature is enabled on *all* interfaces when it + * is attached to "global:". + * + * This is what happens when an 'originate' ruleset is configured on a + * loopback interface. + */ +DP_DECL_TEST_CASE(npf_feat, npf_orig_feat, NULL, NULL); + +DP_START_TEST_FULL_RUN(npf_orig_feat, test1) +{ + bool rv, debug = false; + + /* Setup interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_netlink_add_neigh("dp1T0", "1.1.1.2", + "aa:bb:cc:dd:1:a1"); + + rv = dp_pipeline_is_feature_enabled_by_inst("vyatta:ipv4-fw-orig", + "dpT10"); + dp_test_fail_unless(!rv, "ipv4-fw-orig is enabled"); + + dp_test_npf_cmd_fmt(debug, "npf-ut add fw:FW_ORIG 1 action=accept"); + dp_test_npf_cmd_fmt(debug, + "npf-ut attach global: originate fw:FW_ORIG"); + dp_test_npf_commit(); + + /* Check ipv4-fw-orig enabled on dpT10 */ + rv = dp_pipeline_is_feature_enabled_by_inst("vyatta:ipv4-fw-orig", + "dpT10"); + dp_test_fail_unless(rv, "ipv4-fw-orig not enabled on dpT10 " + "when attached to \"global:\""); + + /* Check ipv4-fw-orig is enabled on dpT11 */ + rv = dp_pipeline_is_feature_enabled_by_inst("vyatta:ipv4-fw-orig", + "dpT11"); + dp_test_fail_unless(rv, "ipv4-fw-orig not enabled on dpT11 " + "when attached to \"global:\""); + + /* Check ipv4-defrag-out-spath is enabled on dpT10 */ + rv = dp_pipeline_is_feature_enabled_by_inst( + "vyatta:ipv4-defrag-out-spath", "dpT10"); + dp_test_fail_unless(rv, "ipv4-defrag-out-spath not enabled on dpT10 " + "when attached to \"global:\""); + + /* Check ipv4-defrag-out-spath is enabled on dpT11 */ + rv = dp_pipeline_is_feature_enabled_by_inst( + "vyatta:ipv4-defrag-out-spath", "dpT11"); + dp_test_fail_unless(rv, "ipv4-defrag-out-spath not enabled on dpT11 " + "when attached to \"global:\""); + + /* Check ipv4-defrag-out is NOT enabled on dpT10 */ + rv = dp_pipeline_is_feature_enabled_by_inst( + "vyatta:ipv4-defrag-out", "dpT10"); + dp_test_fail_unless(!rv, "ipv4-defrag-out enabled on dpT10 " + "when attached to \"global:\""); + + dp_test_npf_cmd_fmt(debug, + "npf-ut detach global: originate fw:FW_ORIG"); + dp_test_npf_cmd_fmt(debug, "npf-ut delete fw:FW_ORIG"); + dp_test_npf_commit(); + + rv = dp_pipeline_is_feature_enabled_by_inst("vyatta:ipv4-fw-orig", + "dpT10"); + dp_test_fail_unless(!rv, "ipv4-fw-orig is enabled"); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_netlink_del_neigh("dp1T0", "1.1.1.2", + "aa:bb:cc:dd:1:a1"); + +} DP_END_TEST; + +/* + * Test that the ipv4-fw-orig feature is enabled only on one interface when it + * is attached to that interface. + */ +DP_START_TEST_FULL_RUN(npf_orig_feat, test2) +{ + bool rv, debug = false; + + /* Setup interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_netlink_add_neigh("dp1T0", "1.1.1.2", + "aa:bb:cc:dd:1:a1"); + + rv = dp_pipeline_is_feature_enabled_by_inst("vyatta:ipv4-fw-orig", + "dpT10"); + dp_test_fail_unless(!rv, "ipv4-fw-orig is enabled"); + + dp_test_npf_cmd_fmt(debug, "npf-ut add fw:FW_ORIG 1 action=accept"); + dp_test_npf_cmd_fmt(debug, + "npf-ut attach interface:dpT10 " + "originate fw:FW_ORIG"); + dp_test_npf_commit(); + + /* Check ipv4-fw-orig is enabled on dpT10 */ + rv = dp_pipeline_is_feature_enabled_by_inst("vyatta:ipv4-fw-orig", + "dpT10"); + dp_test_fail_unless(rv, "ipv4-fw-orig not enabled on dpT10"); + + /* Check ipv4-fw-orig is NOT enabled on dpT11 */ + rv = dp_pipeline_is_feature_enabled_by_inst("vyatta:ipv4-fw-orig", + "dpT11"); + dp_test_fail_unless(!rv, "ipv4-fw-orig enabled on dpT11"); + + /* Check ipv4-defrag-out-spath is enabled on dpT10 */ + rv = dp_pipeline_is_feature_enabled_by_inst( + "vyatta:ipv4-defrag-out-spath", "dpT10"); + dp_test_fail_unless(rv, "ipv4-defrag-out-spath not enabled on dpT10"); + + /* Check ipv4-defrag-out-spath is NOT enabled on dpT11 */ + rv = dp_pipeline_is_feature_enabled_by_inst( + "vyatta:ipv4-defrag-out-spath", "dpT11"); + dp_test_fail_unless(!rv, "ipv4-defrag-out-spath not enabled on dpT11"); + + /* Check ipv4-defrag-out is NOT enabled on dpT10 */ + rv = dp_pipeline_is_feature_enabled_by_inst( + "vyatta:ipv4-defrag-out", "dpT10"); + dp_test_fail_unless(!rv, "ipv4-defrag-out enabled on dpT10"); + + dp_test_npf_cmd_fmt(debug, + "npf-ut detach interface:dpT10 " + "originate fw:FW_ORIG"); + dp_test_npf_cmd_fmt(debug, "npf-ut delete fw:FW_ORIG"); + dp_test_npf_commit(); + + rv = dp_pipeline_is_feature_enabled_by_inst("vyatta:ipv4-fw-orig", + "dpT10"); + dp_test_fail_unless(!rv, "ipv4-fw-orig is enabled"); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_netlink_del_neigh("dp1T0", "1.1.1.2", + "aa:bb:cc:dd:1:a1"); + +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_npf_fw.c b/tests/whole_dp/src/dp_test_npf_fw.c index 8ebb82ee..4151f854 100644 --- a/tests/whole_dp/src/dp_test_npf_fw.c +++ b/tests/whole_dp/src/dp_test_npf_fw.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -20,12 +20,12 @@ #include "dp_test.h" #include "dp_test_str.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_pkt.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_lib.h" @@ -460,7 +460,7 @@ DP_START_TEST(fw_ipv4, fwd) struct dp_test_pkt_desc_t v4_pktA = { .text = "A fwd IPv4 n1->n1", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2.2.2.1", @@ -480,7 +480,7 @@ DP_START_TEST(fw_ipv4, fwd) struct dp_test_pkt_desc_t v4_pktB = { .text = "B rev IPv4 n1->n1", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "2.2.2.1", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "1.1.1.2", @@ -499,7 +499,7 @@ DP_START_TEST(fw_ipv4, fwd) struct dp_test_pkt_desc_t v4_pktD = { .text = "D rev IPv4 n2->n2", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "2.2.2.3", .l2_src = "aa:bb:cc:dd:2:b3", .l3_dst = "1.1.1.3", @@ -608,7 +608,7 @@ DP_START_TEST(fw_ipv6, fwd) struct dp_test_pkt_desc_t v6_pktA = { .text = "A fwd IPv6 n1->n1", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:1:1::2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2002:2:2::1", @@ -628,7 +628,7 @@ DP_START_TEST(fw_ipv6, fwd) struct dp_test_pkt_desc_t v6_pktB = { .text = "B rev IPv6 n1->n1", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2002:2:2::1", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "2001:1:1::2", @@ -647,7 +647,7 @@ DP_START_TEST(fw_ipv6, fwd) struct dp_test_pkt_desc_t v6_pktD = { .text = "D fwd IPv6 n2->n2", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2002:2:2::3", .l2_src = "aa:bb:cc:dd:2:b3", .l3_dst = "2001:1:1::3", @@ -749,7 +749,7 @@ DP_START_TEST(fw_ipv4, matching) struct dp_test_pkt_desc_t v4_pkt = { .text = "IPv4 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2.2.2.1", @@ -772,7 +772,7 @@ DP_START_TEST(fw_ipv4, matching) */ struct dp_test_npf_npf_t npf_ipv4_addr[] = { {DP_TEST_FWD_FORWARDED, ""}, - {DP_TEST_FWD_FORWARDED, "proto=17"}, + {DP_TEST_FWD_FORWARDED, "proto-final=17"}, {DP_TEST_FWD_FORWARDED, "src-addr=1.1.1.0/24"}, {DP_TEST_FWD_FORWARDED, "src-addr=!2.1.1.0/24"}, {DP_TEST_FWD_FORWARDED, "src-addr=1.1.1.2"}, @@ -783,17 +783,17 @@ DP_START_TEST(fw_ipv4, matching) {DP_TEST_FWD_FORWARDED, "src-addr=1.1.1.2 dst-addr=2.2.2.1"}, {DP_TEST_FWD_FORWARDED, "src-addr=!1.1.4.0/24"}, {DP_TEST_FWD_FORWARDED, "src-addr=!1.1.4.2"}, - {DP_TEST_FWD_FORWARDED, "proto=17 src-addr=1.1.1.2 " + {DP_TEST_FWD_FORWARDED, "proto-final=17 src-addr=1.1.1.2 " "src-port=41000 dst-addr=2.2.2.1 dst-port=1000"}, {DP_TEST_FWD_FORWARDED, "src-addr-group=ADDR_GRP0"}, {DP_TEST_FWD_FORWARDED, "dst-addr-group=ADDR_GRP1"}, - {DP_TEST_FWD_FORWARDED, "proto=17 src-port=41000"}, - {DP_TEST_FWD_FORWARDED, "proto=17 dst-port=1000"}, - {DP_TEST_FWD_FORWARDED, "proto=17 src-port-group=PG1"}, - {DP_TEST_FWD_FORWARDED, "proto=17 dst-port-group=PG2"}, - {DP_TEST_FWD_FORWARDED, "proto=17 dst-port-group=PG3"}, - {DP_TEST_FWD_FORWARDED, "proto=17 dst-port-group=PG5"}, - {DP_TEST_FWD_DROPPED, "proto=6"}, + {DP_TEST_FWD_FORWARDED, "proto-final=17 src-port=41000"}, + {DP_TEST_FWD_FORWARDED, "proto-final=17 dst-port=1000"}, + {DP_TEST_FWD_FORWARDED, "proto-final=17 src-port-group=PG1"}, + {DP_TEST_FWD_FORWARDED, "proto-final=17 dst-port-group=PG2"}, + {DP_TEST_FWD_FORWARDED, "proto-final=17 dst-port-group=PG3"}, + {DP_TEST_FWD_FORWARDED, "proto-final=17 dst-port-group=PG5"}, + {DP_TEST_FWD_DROPPED, "proto-final=6"}, {DP_TEST_FWD_DROPPED, "src-addr=1.1.2.0/24"}, {DP_TEST_FWD_DROPPED, "src-addr=!1.1.1.0/24"}, {DP_TEST_FWD_DROPPED, "src-addr=1.1.1.3"}, @@ -802,13 +802,13 @@ DP_START_TEST(fw_ipv4, matching) {DP_TEST_FWD_DROPPED, "src-addr=1.1.1.0/24 dst-addr=2.2.1.0/24"}, {DP_TEST_FWD_DROPPED, "src-addr=1.1.1.3 dst-addr=2.2.2.1"}, - {DP_TEST_FWD_DROPPED, "proto=17 src-addr=1.1.1.2 " + {DP_TEST_FWD_DROPPED, "proto-final=17 src-addr=1.1.1.2 " "src-port=41001 dst-addr=2.2.2.1 dst-port=1000"}, {DP_TEST_FWD_DROPPED, "src-addr-group=ADDR_GRP2"}, {DP_TEST_FWD_DROPPED, "dst-addr-group=ADDR_GRP3"}, - {DP_TEST_FWD_DROPPED, "proto=17 src-port=41001"}, - {DP_TEST_FWD_DROPPED, "proto=17 dst-port=1001"}, - {DP_TEST_FWD_DROPPED, "proto=17 dst-port-group=PG4"}, + {DP_TEST_FWD_DROPPED, "proto-final=17 src-port=41001"}, + {DP_TEST_FWD_DROPPED, "proto-final=17 dst-port=1001"}, + {DP_TEST_FWD_DROPPED, "proto-final=17 dst-port-group=PG4"}, }; struct dp_test_npf_rule_t rules[] = { @@ -937,7 +937,7 @@ DP_START_TEST(fw_ipv6, matching) struct dp_test_pkt_desc_t v6_pkt = { .text = "IPv6 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:1:1::2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2002:2:2::1", @@ -960,7 +960,7 @@ DP_START_TEST(fw_ipv6, matching) */ struct dp_test_npf_npf_t npf_ipv6_addr[] = { {DP_TEST_FWD_FORWARDED, ""}, - {DP_TEST_FWD_FORWARDED, "proto=17"}, + {DP_TEST_FWD_FORWARDED, "proto-final=17"}, {DP_TEST_FWD_FORWARDED, "src-addr=2001:1:1::/64"}, {DP_TEST_FWD_FORWARDED, "src-addr=2001:1:1::2"}, {DP_TEST_FWD_FORWARDED, "src-addr=2001:1:1::/126"}, @@ -1020,18 +1020,18 @@ DP_START_TEST(fw_ipv6, matching) "src-addr=2001:1:1::2 dst-addr=2002:2:2::1"}, {DP_TEST_FWD_FORWARDED, "src-addr=!2001:1:4::/64"}, {DP_TEST_FWD_FORWARDED, - "proto=17 src-addr=2001:1:1::2 src-port=41000 " + "proto-final=17 src-addr=2001:1:1::2 src-port=41000 " "dst-addr=2002:2:2::1 dst-port=1000"}, {DP_TEST_FWD_FORWARDED, "src-addr-group=ADDR_GRP0"}, {DP_TEST_FWD_FORWARDED, "dst-addr-group=ADDR_GRP1"}, - {DP_TEST_FWD_FORWARDED, "proto=17 src-port=41000"}, - {DP_TEST_FWD_FORWARDED, "proto=17 dst-port=1000"}, - {DP_TEST_FWD_FORWARDED, "proto=17 src-port-group=PG1"}, - {DP_TEST_FWD_FORWARDED, "proto=17 dst-port-group=PG2"}, - {DP_TEST_FWD_FORWARDED, "proto=17 dst-port-group=PG3"}, - {DP_TEST_FWD_FORWARDED, "proto=17 dst-port-group=PG5"}, + {DP_TEST_FWD_FORWARDED, "proto-final=17 src-port=41000"}, + {DP_TEST_FWD_FORWARDED, "proto-final=17 dst-port=1000"}, + {DP_TEST_FWD_FORWARDED, "proto-final=17 src-port-group=PG1"}, + {DP_TEST_FWD_FORWARDED, "proto-final=17 dst-port-group=PG2"}, + {DP_TEST_FWD_FORWARDED, "proto-final=17 dst-port-group=PG3"}, + {DP_TEST_FWD_FORWARDED, "proto-final=17 dst-port-group=PG5"}, {DP_TEST_FWD_FORWARDED, "src-addr=!2001:1:1::3"}, - {DP_TEST_FWD_DROPPED, "proto=6"}, + {DP_TEST_FWD_DROPPED, "proto-final=6"}, {DP_TEST_FWD_DROPPED, "src-addr=2001:1:2::/64"}, {DP_TEST_FWD_DROPPED, "src-addr=2001:1:1::3"}, {DP_TEST_FWD_DROPPED, "dst-addr=2002:2:1::/64"}, @@ -1041,13 +1041,13 @@ DP_START_TEST(fw_ipv6, matching) {DP_TEST_FWD_DROPPED, "src-addr=2001:1:1::3 dst-addr=2002:2:2::1"}, {DP_TEST_FWD_DROPPED, - "proto=17 src-addr=2001:1:1::2 src-port=41001 " + "proto-final=17 src-addr=2001:1:1::2 src-port=41001 " "dst-addr=2002:2:2::1 dst-port=1000"}, {DP_TEST_FWD_DROPPED, "src-addr-group=ADDR_GRP2"}, {DP_TEST_FWD_DROPPED, "dst-addr-group=ADDR_GRP3"}, - {DP_TEST_FWD_DROPPED, "proto=17 src-port=41001"}, - {DP_TEST_FWD_DROPPED, "proto=17 dst-port=1001"}, - {DP_TEST_FWD_DROPPED, "proto=17 dst-port-group=PG4"}, + {DP_TEST_FWD_DROPPED, "proto-final=17 src-port=41001"}, + {DP_TEST_FWD_DROPPED, "proto-final=17 dst-port=1001"}, + {DP_TEST_FWD_DROPPED, "proto-final=17 dst-port-group=PG4"}, {DP_TEST_FWD_DROPPED, "src-addr=!2001:1:1::2"}, }; @@ -1211,7 +1211,7 @@ DP_START_TEST(fw_ipv4, stateful) struct dp_test_pkt_desc_t v4_pktA = { .text = "A fwd IPv4 n1->n1", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2.2.2.1", @@ -1329,7 +1329,7 @@ DP_START_TEST(fw_ipv4, large_ruleset) struct dp_test_pkt_desc_t v4_pkt = { .text = "IPv4 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2.2.2.1", @@ -1497,7 +1497,7 @@ DP_START_TEST(fw_ipv4, port_range1) struct dp_test_pkt_desc_t v4_pkt = { .text = "IPv4 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2.2.2.1", @@ -1516,13 +1516,13 @@ DP_START_TEST(fw_ipv4, port_range1) struct dp_test_npf_rule_t ruleset1[] = { {"10", PASS, STATELESS, - "proto=17 dst-addr=2.2.2.1 dst-port-group=PG1"}, + "proto-final=17 dst-addr=2.2.2.1 dst-port-group=PG1"}, RULE_DEF_BLOCK, NULL_RULE }; struct dp_test_npf_rule_t ruleset2[] = { {"10", BLOCK, STATELESS, - "proto=17 dst-addr=2.2.2.1 dst-port-group=PG1"}, + "proto-final=17 dst-addr=2.2.2.1 dst-port-group=PG1"}, RULE_DEF_PASS, NULL_RULE }; @@ -1702,7 +1702,7 @@ DP_START_TEST(fw_ipv4, mrules) struct dp_test_pkt_desc_t v4_pkt1 = { .text = "IPv4 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2.2.2.1", @@ -1721,7 +1721,7 @@ DP_START_TEST(fw_ipv4, mrules) struct dp_test_pkt_desc_t v4_pkt2 = { .text = "ICMP IPv4", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2.2.2.3", @@ -1742,9 +1742,11 @@ DP_START_TEST(fw_ipv4, mrules) }; struct dp_test_npf_rule_t ruleset1[] = { - {"10", PASS, STATEFUL, "proto=17 dst-addr=2.2.2.1 dst-port=81"}, - {"20", PASS, STATEFUL, "proto=17 dst-addr=2.2.2.1 dst-port=80"}, - {"30", PASS, STATEFUL, "proto=1 dst-addr=2.2.2.3"}, + {"10", PASS, STATEFUL, + "proto-final=17 dst-addr=2.2.2.1 dst-port=81"}, + {"20", PASS, STATEFUL, + "proto-final=17 dst-addr=2.2.2.1 dst-port=80"}, + {"30", PASS, STATEFUL, "proto-final=1 dst-addr=2.2.2.3"}, RULE_DEF_BLOCK, NULL_RULE }; @@ -1780,7 +1782,8 @@ DP_START_TEST(fw_ipv4, mrules) /* Run the test */ dp_test_pak_receive(test_pak, pkt->rx_intf, test_exp); - dp_test_npf_verify_rule_pkt_count("to proto 17, dest 2.2.2.1 port 80", + dp_test_npf_verify_rule_pkt_count( + "to proto-final 17, dest 2.2.2.1 port 80", &fw, fw.rules[1].rule, 1); /* Verify a session was created */ @@ -1802,7 +1805,7 @@ DP_START_TEST(fw_ipv4, mrules) /* Run the test */ dp_test_pak_receive(test_pak, pkt->rx_intf, test_exp); - dp_test_npf_verify_rule_pkt_count("to proto 1, dest 2.2.2.3", + dp_test_npf_verify_rule_pkt_count("to proto-final 1, dest 2.2.2.3", &fw, fw.rules[2].rule, 1); /* Verify a session was created */ @@ -1824,7 +1827,7 @@ DP_START_TEST(fw_ipv4, mrules) /* Run the test */ dp_test_pak_receive(test_pak, pkt->rx_intf, test_exp); - dp_test_npf_verify_rule_pkt_count("to proto 1, dest 2.2.2.1", + dp_test_npf_verify_rule_pkt_count("to proto-final 1, dest 2.2.2.1", &fw, fw.rules[3].rule, 1); @@ -1856,7 +1859,7 @@ DP_START_TEST(fw_ipv6, mrules) struct dp_test_pkt_desc_t v6_pkt1 = { .text = "IPv6 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:1:1::2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2002:2:2::1", @@ -1875,7 +1878,7 @@ DP_START_TEST(fw_ipv6, mrules) struct dp_test_pkt_desc_t v6_pkt2 = { .text = "ICMP IPv6", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:1:1::2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2002:2:2::3", @@ -1894,10 +1897,10 @@ DP_START_TEST(fw_ipv6, mrules) struct dp_test_npf_rule_t ruleset1[] = { {"10", PASS, STATEFUL, - "proto=17 dst-addr=2002:2:2::1 dst-port=81"}, + "proto-final=17 dst-addr=2002:2:2::1 dst-port=81"}, {"20", PASS, STATEFUL, - "proto=17 dst-addr=2002:2:2::1 dst-port=80"}, - {"30", PASS, STATEFUL, "proto=58 dst-addr=2002:2:2::3"}, + "proto-final=17 dst-addr=2002:2:2::1 dst-port=80"}, + {"30", PASS, STATEFUL, "proto-final=58 dst-addr=2002:2:2::3"}, RULE_DEF_BLOCK, NULL_RULE }; @@ -1934,7 +1937,7 @@ DP_START_TEST(fw_ipv6, mrules) dp_test_pak_receive(test_pak, pkt->rx_intf, test_exp); dp_test_npf_verify_rule_pkt_count( - "to proto 17, dest 2002:2:2::1 port 80", + "to proto-final 17, dest 2002:2:2::1 port 80", &fw, fw.rules[1].rule, 1); /* Verify a session was created */ @@ -1956,7 +1959,7 @@ DP_START_TEST(fw_ipv6, mrules) /* Run the test */ dp_test_pak_receive(test_pak, pkt->rx_intf, test_exp); - dp_test_npf_verify_rule_pkt_count("to proto 58, dest 2002:2:2::3", + dp_test_npf_verify_rule_pkt_count("to proto-final 58, dest 2002:2:2::3", &fw, fw.rules[2].rule, 1); /* Verify a session was created */ @@ -1978,7 +1981,7 @@ DP_START_TEST(fw_ipv6, mrules) /* Run the test */ dp_test_pak_receive(test_pak, pkt->rx_intf, test_exp); - dp_test_npf_verify_rule_pkt_count("to proto 58, dest 2002:2:2::1", + dp_test_npf_verify_rule_pkt_count("to proto-final 58, dest 2002:2:2::1", &fw, fw.rules[3].rule, 1); @@ -2010,7 +2013,7 @@ DP_START_TEST(fw_ipv4, address_groups) struct dp_test_pkt_desc_t v4_pkt1 = { .text = "IPv4 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2.2.2.1", @@ -2214,7 +2217,7 @@ DP_START_TEST(fw_ipv4, macvlan) dp_test_pktmbuf_eth_init(test_pak, macvlan_mac, DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -2232,13 +2235,13 @@ DP_START_TEST(fw_ipv4, macvlan) dp_test_pktmbuf_eth_init(test_pak, macvlan_mac, DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str, dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_exp_set_oif_name(exp, "dp2T1"); @@ -2303,7 +2306,7 @@ DP_START_TEST(fw_ipv4, macvlan_multicast) dp_test_pktmbuf_eth_init(test_pak, vrrp_multicast_mac, DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); @@ -2321,13 +2324,13 @@ DP_START_TEST(fw_ipv4, macvlan_multicast) dp_test_pktmbuf_eth_init(test_pak, vrrp_multicast_mac, DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_exp_set_oif_name(exp, "dp1T1"); diff --git a/tests/whole_dp/src/dp_test_npf_fw_ipv6.c b/tests/whole_dp/src/dp_test_npf_fw_ipv6.c index 6c55a2c3..276d2bec 100644 --- a/tests/whole_dp/src/dp_test_npf_fw_ipv6.c +++ b/tests/whole_dp/src/dp_test_npf_fw_ipv6.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -20,12 +20,12 @@ #include "dp_test.h" #include "dp_test_str.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_pkt.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_npf_lib.h" #include "dp_test_npf_fw_lib.h" @@ -39,8 +39,8 @@ DP_DECL_TEST_CASE(npf_fw_ipv6, npf_ipv6, NULL, NULL); /* * This test checks that a packet with an ipv6 routing header matches * against a rule trying to match one. Note that the header is placed - * as a second extension header, to ensure it is only the first header - * that is looked at. + * as a second extension header, to ensure it is not only the first + * header that is looked at. */ DP_START_TEST(npf_ipv6, ipv6_routing_hdr) { @@ -62,10 +62,9 @@ DP_START_TEST(npf_ipv6, ipv6_routing_hdr) .pass = PASS, .stateful = STATELESS, /* - * 43 is the value of protocol ipv6-route, - * and 1 is the ipv6-route type + * 1 is the ipv6-route type in the ipv6 RH */ - .npf = "proto=43 ipv6-route=1" + .npf = "ipv6-route=1" }, RULE_DEF_BLOCK, NULL_RULE @@ -99,7 +98,7 @@ DP_START_TEST(npf_ipv6, ipv6_routing_hdr) rp = dp_test_pktmbuf_eth_init(pak, dp_test_intf_name2mac_str("dp1T0"), - "aa:bb:cc:dd:1:a1", ETHER_TYPE_IPv6); + "aa:bb:cc:dd:1:a1", RTE_ETHER_TYPE_IPV6); dp_test_assert_internal(rp != NULL); ip6 = dp_test_pktmbuf_ip6_init(pak, "2001:1:1::2", @@ -146,7 +145,7 @@ DP_START_TEST(npf_ipv6, ipv6_routing_hdr) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), "aa:bb:cc:dd:2:b1", dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); dp_test_ipv6_decrement_ttl(dp_test_exp_get_pak(exp)); diff --git a/tests/whole_dp/src/dp_test_npf_fw_lib.c b/tests/whole_dp/src/dp_test_npf_fw_lib.c index 0c1e0abe..f1c79d42 100644 --- a/tests/whole_dp/src/dp_test_npf_fw_lib.c +++ b/tests/whole_dp/src/dp_test_npf_fw_lib.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -16,16 +16,228 @@ #include "main.h" #include "dp_test.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_lib.h" #include "dp_test_npf_fw_lib.h" +/* + * npf Zones + */ +static void dpt_zone_cfg_zone(struct dpt_zone *zn, + bool add, bool debug) +{ + char rname[IFNAMSIZ]; + uint i; + + if (!zn || !zn->name) + return; + + if (add) { + dp_test_npf_cmd_fmt(debug, "npf-ut zone add %s", zn->name); + if (zn->local) + dp_test_npf_cmd_fmt(debug, "npf-ut zone local %s set", + zn->name); + + /* Add interfaces to zone */ + for (i = 0; i < ARRAY_SIZE(zn->intf); i++) { + if (!zn->intf[i]) + continue; + + dp_test_npf_cmd_fmt(debug, + "npf-ut zone intf add %s %s", + zn->name, + dp_test_intf_real(zn->intf[i], + rname)); + } + } else { + for (i = 0; i < ARRAY_SIZE(zn->intf); i++) { + if (!zn->intf[i]) + continue; + + dp_test_npf_cmd_fmt(debug, + "npf-ut zone intf remove %s %s", + zn->name, + dp_test_intf_real(zn->intf[i], + rname)); + } + + if (zn->local) + dp_test_npf_cmd_fmt(debug, "npf-ut zone local %s clear", + zn->name); + dp_test_npf_cmd_fmt(debug, "npf-ut zone remove %s", zn->name); + } +} + +static void +dpt_zone_cfg_rule(struct dpt_zone *from, struct dpt_zone *to, + struct dpt_zone_rule *rl, bool add, bool debug) +{ + char attach_point[100]; + + if (!rl || !rl->name) + return; + + snprintf(attach_point, sizeof(attach_point), + "%s>%s", from->name, to->name); + + struct dp_test_npf_rule_t rule[] = { + { + .rule = "1", + .pass = rl->pass, + .stateful = rl->stateful, + .npf = rl->npf, + }, + NULL_RULE + }; + + struct dp_test_npf_ruleset_t rlset = { + .rstype = "zone", + .name = rl->name, + .enable = 1, + .attach_point = attach_point, + .fwd = 0, + .dir = "out", + .rules = rule + }; + + if (add) { + dp_test_npf_cmd_fmt(debug, "npf-ut zone policy add %s %s", + from->name, to->name); + + dp_test_npf_fw_add(&rlset, debug); + } else { + dp_test_npf_fw_del(&rlset, debug); + + dp_test_npf_cmd_fmt(debug, "npf-ut zone policy remove %s %s", + from->name, to->name); + } +} + +static void dpt_zone_cfg_rules(struct dpt_zone_cfg *cfg, bool add, bool debug) +{ + dpt_zone_cfg_rule(&cfg->public, &cfg->private, + &cfg->pub_to_priv, add, debug); + + dpt_zone_cfg_rule(&cfg->private, &cfg->public, + &cfg->priv_to_pub, add, debug); + + dpt_zone_cfg_rule(&cfg->local, &cfg->private, + &cfg->local_to_priv, add, debug); + + dpt_zone_cfg_rule(&cfg->private, &cfg->local, + &cfg->priv_to_local, add, debug); + + dpt_zone_cfg_rule(&cfg->local, &cfg->public, + &cfg->local_to_pub, add, debug); + + dpt_zone_cfg_rule(&cfg->public, &cfg->local, + &cfg->pub_to_local, add, debug); +} + +void dpt_zone_cfg(struct dpt_zone_cfg *cfg, bool add, bool debug) +{ + if (add) { + dpt_zone_cfg_zone(&cfg->private, add, debug); + dpt_zone_cfg_zone(&cfg->public, add, debug); + dpt_zone_cfg_zone(&cfg->local, add, debug); + + dpt_zone_cfg_rules(cfg, add, debug); + } else { + dpt_zone_cfg_rules(cfg, add, debug); + + dpt_zone_cfg_zone(&cfg->private, add, debug); + dpt_zone_cfg_zone(&cfg->public, add, debug); + dpt_zone_cfg_zone(&cfg->local, add, debug); + } +} + +void +_dp_test_zone_add(const char *zname, const char *file, int line) +{ + char cmd[TEST_MAX_CMD_LEN]; + + spush(cmd, sizeof(cmd), "npf-ut zone add %s", zname); + + _dp_test_npf_cmd(cmd, false, file, line); +} + +void +_dp_test_zone_remove(const char *zname, const char *file, int line) +{ + char cmd[TEST_MAX_CMD_LEN]; + + spush(cmd, sizeof(cmd), "npf-ut zone remove %s", zname); + + _dp_test_npf_cmd(cmd, false, file, line); +} + +void +_dp_test_zone_local(const char *zname, bool set, const char *file, int line) +{ + char cmd[TEST_MAX_CMD_LEN]; + + spush(cmd, sizeof(cmd), "npf-ut zone local %s %s", + zname, set ? "set" : "clear"); + + _dp_test_npf_cmd(cmd, false, file, line); +} + +void +_dp_test_zone_policy_add(const char *zone, const char *policy, + const char *file, int line) +{ + char cmd[TEST_MAX_CMD_LEN]; + + spush(cmd, sizeof(cmd), "npf-ut zone policy add %s %s", + zone, policy); + + _dp_test_npf_cmd(cmd, false, file, line); +} + +void +_dp_test_zone_policy_del(const char *zone, const char *policy, + const char *file, int line) +{ + char cmd[TEST_MAX_CMD_LEN]; + + spush(cmd, sizeof(cmd), "npf-ut zone policy remove %s %s", + zone, policy); + + _dp_test_npf_cmd(cmd, false, file, line); +} + +void +_dp_test_zone_intf_add(const char *zname, const char *ifname, + const char *file, int line) +{ + char cmd[TEST_MAX_CMD_LEN]; + char rname[IFNAMSIZ]; + + spush(cmd, sizeof(cmd), "npf-ut zone intf add %s %s", + zname, dp_test_intf_real(ifname, rname)); + + _dp_test_npf_cmd(cmd, false, file, line); +} + +void +_dp_test_zone_intf_del(const char *zname, const char *ifname, + const char *file, int line) +{ + char cmd[TEST_MAX_CMD_LEN]; + char rname[IFNAMSIZ]; + + spush(cmd, sizeof(cmd), "npf-ut zone intf remove %s %s", + zname, dp_test_intf_real(ifname, rname)); + + _dp_test_npf_cmd(cmd, false, file, line); +} + /* * Create an address-group. Table is a number string, e.g. "0" or "1". */ @@ -121,3 +333,409 @@ _dp_test_npf_fw_port_group_del(const char *name, _dp_test_npf_cmd_fmt(false, file, line, "npf-ut delete port-group:%s", name); } + + +/* + * Simple wrapper around receiving an IPv4 or IPv6 UDP packet. + * + * 'post' params are optional (use if NATing etc.). + * + * If rx_intf is NULL then packet is sent via the vRouter spath + * + * e.g. + * dpt_udp("dp1T2", "aa:bb:cc:dd:3:a1", 0, + * "3.3.3.11", 41003, "4.4.4.11", 1004, + * NULL, 0, NULL, 0, + * "aa:bb:cc:dd:4:a1", 0, "dp1T3", + * DP_TEST_FWD_FORWARDED); + * + */ +void +_dpt_udp(const char *rx_intf, const char *pre_smac, + const char *pre_saddr, uint16_t pre_sport, + const char *pre_daddr, uint16_t pre_dport, + const char *post_saddr, uint16_t post_sport, + const char *post_daddr, uint16_t post_dport, + const char *post_dmac, const char *tx_intf, + int status, int pre_vlan, int post_vlan, + const char *pre_pl, int pre_len, + const char *post_pl, int post_len, + const char *file, const char *func, int line) +{ + struct dp_test_expected *test_exp; + struct rte_mbuf *test_pak, *exp_pak; + int len = 20; + bool v4; + uint16_t ether_type; + + if (strchr(pre_saddr, ':')) + v4 = false; + else + v4 = true; + + ether_type = v4 ? RTE_ETHER_TYPE_IPV4 : RTE_ETHER_TYPE_IPV6; + + /* + * If tx_intf is NULL then assume pkt is intf to local. + * If rx_intf is NULL then assume pkt is local to intf. + */ + if (!rx_intf && !tx_intf) + _dp_test_fail(file, line, + "Both rx_intf and tx_intf can be NULL"); + + /* Pre UDP packet */ + struct dp_test_pkt_desc_t pre_pkt_UDP = { + .text = "UDP pre", + .len = pre_len ? pre_len : len, + .ether_type = ether_type, + .l3_src = pre_saddr, + .l2_src = pre_smac, + .l3_dst = pre_daddr, + .l2_dst = post_dmac, + .proto = IPPROTO_UDP, + .l4 = { + .udp = { + .sport = pre_sport, + .dport = pre_dport + } + }, + .rx_intf = rx_intf ? rx_intf : tx_intf, + .tx_intf = tx_intf ? tx_intf : rx_intf, + }; + + /* If 'post' values NULL then use 'pre' values */ + bool use_pre = (post_saddr == NULL); + + /* Post UDP packet */ + struct dp_test_pkt_desc_t post_pkt_UDP = { + .text = "UDP post", + .len = post_len ? post_len : len, + .ether_type = ether_type, + .l3_src = use_pre ? pre_saddr : post_saddr, + .l2_src = pre_smac, + .l3_dst = use_pre ? pre_daddr : post_daddr, + .l2_dst = post_dmac, + .proto = IPPROTO_UDP, + .l4 = { + .udp = { + .sport = use_pre ? pre_sport : post_sport, + .dport = use_pre ? pre_dport : post_dport + } + }, + .rx_intf = pre_pkt_UDP.rx_intf, + .tx_intf = pre_pkt_UDP.tx_intf + }; + + /* + * If rx_intf is NULL then its local -> tx_intf + */ + if (v4) { + if (rx_intf) + test_pak = dp_test_v4_pkt_from_desc(&pre_pkt_UDP); + else + test_pak = dp_test_from_spath_pkt_from_desc( + &pre_pkt_UDP); + + exp_pak = dp_test_v4_pkt_from_desc(&post_pkt_UDP); + } else { + if (rx_intf) + test_pak = dp_test_v6_pkt_from_desc(&pre_pkt_UDP); + else + test_pak = dp_test_from_spath_pkt_from_desc( + &pre_pkt_UDP); + + exp_pak = dp_test_v6_pkt_from_desc(&post_pkt_UDP); + } + + if (pre_pl) { + struct rte_mbuf *m = test_pak; + struct iphdr *ip = iphdr(m); + struct udphdr *udp = (struct udphdr *)(ip + 1); + + memcpy((char *)(udp + 1), pre_pl, pre_len); + + ip->check = 0; + ip->check = ip_checksum(ip, ip->ihl*4); + + udp->check = 0; + udp->check = dp_test_ipv4_udptcp_cksum(m, ip, udp); + } + + if (post_pl) { + struct rte_mbuf *m = exp_pak; + struct iphdr *ip = iphdr(m); + struct udphdr *udp = (struct udphdr *)(ip + 1); + + memcpy((char *)(udp + 1), post_pl, post_len); + + ip->check = 0; + ip->check = ip_checksum(ip, ip->ihl*4); + + udp->check = 0; + udp->check = dp_test_ipv4_udptcp_cksum(m, ip, udp); + } + + test_exp = dp_test_exp_create(exp_pak); + + rte_pktmbuf_free(exp_pak); + exp_pak = dp_test_exp_get_pak(test_exp); + + if (rx_intf && tx_intf) { + /* intf -> intf */ + dp_test_exp_set_oif_name(test_exp, tx_intf); + + dp_test_pktmbuf_eth_init(exp_pak, post_dmac, + dp_test_intf_name2mac_str(tx_intf), + ether_type); + if (v4) + dp_test_ipv4_decrement_ttl(exp_pak); + else + dp_test_ipv6_decrement_ttl(exp_pak); + + } else if (!rx_intf && tx_intf) { + /* local -> intf */ + dp_test_exp_set_oif_name(test_exp, tx_intf); + + dp_test_pktmbuf_eth_init(test_pak, post_dmac, + dp_test_intf_name2mac_str(tx_intf), + ether_type); + + dp_test_pktmbuf_eth_init(exp_pak, post_dmac, + dp_test_intf_name2mac_str(tx_intf), + ether_type); + + } else if (rx_intf && !tx_intf) { + /* intf -> local */ + dp_test_pktmbuf_eth_init(exp_pak, + dp_test_intf_name2mac_str(rx_intf), + pre_smac, + ether_type); + + if (status == DP_TEST_FWD_FORWARDED) + status = DP_TEST_FWD_LOCAL; + } + + /* vlan */ + if (pre_vlan > 0) + dp_test_pktmbuf_vlan_init(test_pak, pre_vlan); + + if (post_vlan > 0) { + dp_test_exp_set_vlan_tci(test_exp, post_vlan); + + (void)dp_test_pktmbuf_eth_init( + dp_test_exp_get_pak(test_exp), + post_dmac, + dp_test_intf_name2mac_str(tx_intf), + ether_type); + } + + dp_test_exp_set_fwd_status(test_exp, status); + + /* Run the test */ + if (rx_intf) + /* intf -> intf or local */ + _dp_test_pak_receive(test_pak, rx_intf, test_exp, + file, func, line); + else + /* local -> intf */ + _dp_test_send_slowpath_pkt(test_pak, test_exp, + file, func, line); +} + +/* + * dpt_tcp + */ +void +_dpt_tcp(uint8_t flags, const char *rx_intf, const char *pre_smac, + const char *pre_saddr, uint16_t pre_sport, + const char *pre_daddr, uint16_t pre_dport, + const char *post_saddr, uint16_t post_sport, + const char *post_daddr, uint16_t post_dport, + const char *post_dmac, const char *tx_intf, + int status, int pre_vlan, int post_vlan, + const char *file, const char *func, int line) +{ + struct dp_test_expected *test_exp; + struct rte_mbuf *test_pak, *exp_pak; + bool v4; + uint16_t ether_type; + + if (strchr(pre_saddr, ':')) + v4 = false; + else + v4 = true; + + ether_type = v4 ? RTE_ETHER_TYPE_IPV4 : RTE_ETHER_TYPE_IPV6; + + /* Pre TCP packet */ + struct dp_test_pkt_desc_t pre_pkt_TCP = { + .text = "pre TCP", + .len = 20, + .ether_type = ether_type, + .l3_src = pre_saddr, + .l2_src = pre_smac, + .l3_dst = pre_daddr, + .l2_dst = "aa:bb:cc:dd:2:b1", + .proto = IPPROTO_TCP, + .l4 = { + .tcp = { + .sport = pre_sport, + .dport = pre_dport, + .flags = flags, + .seq = 0, + .ack = 0, + .win = 8192, + .opts = NULL + } + }, + .rx_intf = rx_intf, + .tx_intf = tx_intf + }; + + /* Post TCP packet */ + struct dp_test_pkt_desc_t post_pkt_TCP = { + .text = "post TCP", + .len = 20, + .ether_type = ether_type, + .l3_src = post_saddr, + .l2_src = "aa:bb:cc:dd:2:b1", + .l3_dst = post_daddr, + .l2_dst = post_dmac, + .proto = IPPROTO_TCP, + .l4 = { + .tcp = { + .sport = post_sport, + .dport = post_dport, + .flags = flags, + .seq = 0, + .ack = 0, + .win = 8192, + .opts = NULL + } + }, + .rx_intf = rx_intf, + .tx_intf = tx_intf + }; + + if (v4) { + test_pak = dp_test_v4_pkt_from_desc(&pre_pkt_TCP); + exp_pak = dp_test_v4_pkt_from_desc(&post_pkt_TCP); + } else { + test_pak = dp_test_v6_pkt_from_desc(&pre_pkt_TCP); + exp_pak = dp_test_v6_pkt_from_desc(&post_pkt_TCP); + } + + test_exp = dp_test_exp_from_desc(exp_pak, &post_pkt_TCP); + rte_pktmbuf_free(exp_pak); + + /* vlan */ + if (pre_vlan > 0) + dp_test_pktmbuf_vlan_init(test_pak, pre_vlan); + + if (post_vlan > 0) { + dp_test_exp_set_vlan_tci(test_exp, post_vlan); + + (void)dp_test_pktmbuf_eth_init( + dp_test_exp_get_pak(test_exp), + post_dmac, + dp_test_intf_name2mac_str(tx_intf), + ether_type); + } + + dp_test_exp_set_fwd_status(test_exp, status); + + _dp_test_pak_receive(test_pak, rx_intf, test_exp, + file, func, line); +} + + +/* + * dpt_icmp + */ +void +_dpt_icmp(uint8_t icmp_type, + const char *rx_intf, const char *pre_smac, + const char *pre_saddr, uint16_t pre_icmp_id, + const char *pre_daddr, + const char *post_saddr, uint16_t post_icmp_id, + const char *post_daddr, + const char *post_dmac, const char *tx_intf, + int status, int pre_vlan, int post_vlan, + const char *file, const char *func, int line) +{ + struct dp_test_expected *test_exp; + struct rte_mbuf *test_pak, *exp_pak; + + /* Pre IPv4 ICMP packet */ + struct dp_test_pkt_desc_t pre_pkt_ICMP = { + .text = "IPv4 ICMP", + .len = 20, + .ether_type = RTE_ETHER_TYPE_IPV4, + .l3_src = pre_saddr, + .l2_src = pre_smac, + .l3_dst = pre_daddr, + .l2_dst = "aa:bb:cc:dd:2:b1", + .proto = IPPROTO_ICMP, + .l4 = { + .icmp = { + .type = icmp_type, + .code = 0, + { + .dpt_icmp_id = pre_icmp_id, + .dpt_icmp_seq = 0, + }, + } + }, + .rx_intf = rx_intf, + .tx_intf = tx_intf + }; + + /* Post IPv4 ICMP packet */ + struct dp_test_pkt_desc_t post_pkt_ICMP = { + .text = "Packet A, IPv4 ICMP", + .len = 20, + .ether_type = RTE_ETHER_TYPE_IPV4, + .l3_src = post_saddr, + .l2_src = "aa:bb:cc:dd:2:b1", + .l3_dst = post_daddr, + .l2_dst = post_dmac, + .proto = IPPROTO_ICMP, + .l4 = { + .icmp = { + .type = icmp_type, + .code = 0, + { + .dpt_icmp_id = post_icmp_id, + .dpt_icmp_seq = 0, + }, + } + }, + .rx_intf = rx_intf, + .tx_intf = tx_intf + }; + + test_pak = dp_test_v4_pkt_from_desc(&pre_pkt_ICMP); + + exp_pak = dp_test_v4_pkt_from_desc(&post_pkt_ICMP); + test_exp = dp_test_exp_from_desc(exp_pak, &post_pkt_ICMP); + rte_pktmbuf_free(exp_pak); + + /* vlan */ + if (pre_vlan > 0) + dp_test_pktmbuf_vlan_init(test_pak, pre_vlan); + + if (post_vlan > 0) { + dp_test_exp_set_vlan_tci(test_exp, post_vlan); + + (void)dp_test_pktmbuf_eth_init( + dp_test_exp_get_pak(test_exp), + post_dmac, + dp_test_intf_name2mac_str(tx_intf), + RTE_ETHER_TYPE_IPV4); + } + + dp_test_exp_set_fwd_status(test_exp, status); + + _dp_test_pak_receive(test_pak, rx_intf, test_exp, + file, func, line); +} diff --git a/tests/whole_dp/src/dp_test_npf_fw_lib.h b/tests/whole_dp/src/dp_test_npf_fw_lib.h index 78421f9d..dd3a33a4 100644 --- a/tests/whole_dp/src/dp_test_npf_fw_lib.h +++ b/tests/whole_dp/src/dp_test_npf_fw_lib.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -15,10 +15,130 @@ #include #include +#include "dp_test_lib_pkt.h" #include "dp_test_npf_lib.h" -const char * -dp_test_npf_fw_str(struct dp_test_npf_ruleset_t *fw); +/* + * Zone test config structure + * + * Simple zone config for two interface zones and a local zone. Example: + * + * struct dpt_zone_cfg cfg = { + * .private = { + * .name = "PRIVATE", + * .intf = { "dp1T0", "dp1T1", NULL }, + * .local = false, + * }, + * .public = { + * .name = "PUBLIC", + * .intf = { "dp1T2", "dp1T3", NULL }, + * .local = false, + * }, + * .pub_to_priv = { + * .name = "PUB_TO_PRIV", + * .pass = BLOCK, + * .stateful = STATELESS, + * .npf = "", + * }, + * .local = { 0 }, + * .priv_to_pub = { + * .name = "PRIV_TO_PUB", + * .pass = PASS, + * .stateful = STATELESS, + * .npf = "", + * }, + * .local_to_priv = { 0 }, + * .priv_to_local = { 0 }, + * .local_to_pub = { 0 }, + * .pub_to_local = { 0 }, + * }; + * + * dpt_zone_cfg(&cfg, true, false); // enable + * dpt_zone_cfg(&cfg, false, false); // disable + * + */ +#define INTF_PER_ZONE 3 + +struct dpt_zone { + /* Zone name */ + const char *name; + + /* Zone member interfaces */ + const char *intf[INTF_PER_ZONE]; + + /* Local zone if true */ + bool local; +}; + +/* Zone ruleset and rule variables */ +struct dpt_zone_rule { + const char *name; /* No rule added if NULL */ + bool pass; /* BLOCK or PASS */ + bool stateful; /* STATELESS or STATEFUL */ + const char *npf; /* npf rule */ +}; + +struct dpt_zone_cfg { + struct dpt_zone private; + struct dpt_zone public; + struct dpt_zone local; + struct dpt_zone_rule pub_to_priv; + struct dpt_zone_rule priv_to_pub; + struct dpt_zone_rule local_to_priv; + struct dpt_zone_rule priv_to_local; + struct dpt_zone_rule local_to_pub; + struct dpt_zone_rule pub_to_local; +}; + +void dpt_zone_cfg(struct dpt_zone_cfg *cfg, bool add, bool debug); + +void _dp_test_zone_add(const char *zname, const char *file, int line); + +#define dp_test_zone_add(name) \ + _dp_test_zone_add(name, __FILE__, __LINE__) + + +void _dp_test_zone_remove(const char *zname, const char *file, int line); + +#define dp_test_zone_remove(name) \ + _dp_test_zone_remove(name, __FILE__, __LINE__) + +void _dp_test_zone_local(const char *zname, bool set, + const char *file, int line); + +#define dp_test_zone_local(name, set) \ + _dp_test_zone_local(name, set, __FILE__, __LINE__) + + +void _dp_test_zone_policy_add(const char *zone, const char *policy, + const char *file, int line); + +#define dp_test_zone_policy_add(zn, pl) \ + _dp_test_zone_policy_add(zn, pl, __FILE__, __LINE__) + + +void _dp_test_zone_policy_del(const char *zone, const char *policy, + const char *file, int line); + +#define dp_test_zone_policy_del(zn, pl) \ + _dp_test_zone_policy_del(zn, pl, __FILE__, __LINE__) + +/* + * Add a zone + */ +void _dp_test_zone_intf_add(const char *zname, const char *ifname, + const char *file, int line); + +#define dp_test_zone_intf_add(zn, ifn) \ + _dp_test_zone_intf_add(zn, ifn, __FILE__, __LINE__) + + +void _dp_test_zone_intf_del(const char *zname, const char *ifname, + const char *file, int line); + +#define dp_test_zone_intf_del(zn, ifn) \ + _dp_test_zone_intf_del(zn, ifn, __FILE__, __LINE__) + /* * Address group @@ -67,6 +187,12 @@ _dp_test_npf_fw_addr_group_addr_del(const char *table, const char *addr, #define dp_test_npf_fw_addr_group_addr_del(table, addr) \ _dp_test_npf_fw_addr_group_addr_del(table, addr, __FILE__, __LINE__) +#define NPF_ZONES_SHOW_INTFS 0x01 +#define NPF_ZONES_SHOW_POLS 0x02 +#define NPF_ZONES_SHOW_RSETS 0x04 +#define NPF_ZONES_SHOW_ALL (NPF_ZONES_SHOW_INTFS | NPF_ZONES_SHOW_POLS | \ + NPF_ZONES_SHOW_RSETS) + /* * Add a port group * @@ -91,4 +217,89 @@ _dp_test_npf_fw_port_group_del(const char *name, #define dp_test_npf_fw_port_group_del(name) \ _dp_test_npf_fw_port_group_del(name, __FILE__, __LINE__) +/* + * Utilities for parsing the json output of "npf fw list" + */ + +/* + * Return the json object for a specific firewall zone group + * + * The returned json object has its ref count incremented, so json_object_put + * should be called once the caller has finished with the object. + */ +json_object * +dp_test_npf_json_get_fw_zone(const char *name, const char *from_intf, + const char *to_intf); + +/* + * Wrapper around UDP test packet + */ +void +_dpt_udp(const char *rx_intf, const char *pre_smac, + const char *pre_saddr, uint16_t pre_sport, + const char *pre_daddr, uint16_t pre_dport, + const char *post_saddr, uint16_t post_sport, + const char *post_daddr, uint16_t post_dport, + const char *post_dmac, const char *tx_intf, + int status, int pre_vlan, int post_vlan, + const char *pre_pl, int pre_len, + const char *post_pl, int post_len, + const char *file, const char *func, int line); + +#define dpt_udp(_a, _b, _c, _d, _e, _f, _g, _h, \ + _i, _j, _k, _l, _m) \ + _dpt_udp(_a, _b, _c, _d, _e, _f, _g, _h, \ + _i, _j, _k, _l, _m, 0, 0, \ + NULL, 0, NULL, 0, \ + __FILE__, __func__, __LINE__) + +#define dpt_vlan_udp(_a, _b, _c, _d, _e, _f, _g, _h, \ + _i, _j, _k, _l, _m, _n, _o) \ + _dpt_udp(_a, _b, _c, _d, _e, _f, _g, _h, \ + _i, _j, _k, _l, _m, _n, _o, \ + NULL, 0, NULL, 0, \ + __FILE__, __func__, __LINE__) + +#define dpt_udp_pl(_a, _b, _c, _d, _e, _f, _g, _h, \ + _i, _j, _k, _l, _m, _n, _o, _p, _q, _r) \ + _dpt_udp(_a, _b, _c, _d, _e, _f, _g, _h, \ + _i, _j, _k, _l, _m, 0, 0, \ + _n, _o, _p, _q, \ + __FILE__, _r ? _r : __func__, __LINE__) + + +void +_dpt_tcp(uint8_t flags, + const char *rx_intf, const char *pre_smac, + const char *pre_saddr, uint16_t pre_sport, + const char *pre_daddr, uint16_t pre_dport, + const char *post_saddr, uint16_t post_sport, + const char *post_daddr, uint16_t post_dport, + const char *post_dmac, const char *tx_intf, + int status, int pre_vlan, int post_vlan, + const char *file, const char *func, int line); + +#define dpt_tcp(_a, _b, _c, _d, _e, _f, _g, _h, \ + _i, _j, _k, _l, _m, _n) \ + _dpt_tcp(_a, _b, _c, _d, _e, _f, _g, _h, \ + _i, _j, _k, _l, _m, _n, 0, 0, \ + __FILE__, __func__, __LINE__) + +void +_dpt_icmp(uint8_t icmp_type, + const char *rx_intf, const char *pre_smac, + const char *pre_saddr, uint16_t pre_icmp_id, + const char *pre_daddr, + const char *post_saddr, uint16_t post_icmp_id, + const char *post_daddr, + const char *post_dmac, const char *tx_intf, + int status, int pre_vlan, int post_vlan, + const char *file, const char *func, int line); + +#define dpt_icmp(_a, _b, _c, _d, _e, _f, _g, _h, \ + _i, _j, _k, _l) \ + _dpt_icmp(_a, _b, _c, _d, _e, _f, _g, \ + _h, _i, _j, _k, _l, 0, 0, \ + __FILE__, __func__, __LINE__) + #endif diff --git a/tests/whole_dp/src/dp_test_npf_golden.c b/tests/whole_dp/src/dp_test_npf_golden.c index 89bab4ae..966a2b46 100644 --- a/tests/whole_dp/src/dp_test_npf_golden.c +++ b/tests/whole_dp/src/dp_test_npf_golden.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2021, AT&T Intellectual Property. All rights reserved. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -22,12 +22,12 @@ #include "dp_test.h" #include "dp_test_str.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" #include "dp_test_lib_pkt.h" -#include "dp_test_lib_intf.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_lib.h" @@ -35,10 +35,6 @@ #include "dp_test_npf_sess_lib.h" #include "dp_test_npf_nat_lib.h" -/* - * Some tests do not work yet with the new session code - */ -#define DP_SESSIONS /* * There are 6 main tests, each with multiple sub-tests (49 total). Each test @@ -56,7 +52,7 @@ * make -j4 dataplane_test_run CK_RUN_CASE=npf_golden1a * * /------------- In -----------\ /------ Out ---------\ - * Test Case FW sfull DNAT NAT64 SNAT FW sfull + * Test Case FW/Zone sfull DNAT NAT64 SNAT FW/Zone sfull * npf_golden1 0 0 0 - 0 0 0 * npf_golden1a 1 0 0 - 0 0 0 * npf_golden1b 1 1 0 - 0 0 0 @@ -70,6 +66,17 @@ * npf_golden1j 0 1 1 - 1 1 1 * npf_golden1k 1 1 0 - 0 1 1 * + * npf_golden1l 0 0 0 - 0 Zn 0 + * npf_golden1m Zn 0 0 - 0 0 0 + * npf_golden1n Zn 0 0 - 0 Zn 0 + * npf_golden1o Zn 0 0 - 0 Zn 0 unm pkt + * npf_golden1p Zn 0 1 - 0 Zn 0 + * npf_golden1q Zn 0 0 - 1 Zn 0 + * npf_golden1r Zn 0 0 - 0 Zn 1 + * npf_golden1s Zn 0 1 - 0 Zn 1 + * npf_golden1t Zn 0 0 - 1 Zn 1 + * npf_golden1u Zn 0 1 - 1 Zn 1 + * * npf_golden2 0 0 - 0 - 0 0 * npf_golden2a 1 0 - 0 - 0 0 * npf_golden2b 1 1 - 0 - 0 0 @@ -81,6 +88,14 @@ * npf_golden2h 0 0 - 1 1 1 1 * npf_golden2i 1 1 - 1 1 1 1 * + * npf_golden2l 0 0 - 0 - Zn 0 + * npf_golden2m Zn 0 - 0 - 0 0 + * npf_golden2n Zn 0 - 0 - Zn 0 + * npf_golden2o Zn 0 - 1 0 Zn 0 + * npf_golden2p Zn 0 - 1 1 Zn 0 + * npf_golden2q Zn 0 - 0 - Zn 1 + * npf_golden2s Zn 0 - 1 1 Zn 1 + * * npf_golden3 - - - - 0 0 0 * npf_golden3a - - - - 0 1 0 * npf_golden3b - - - - 0 1 1 @@ -93,16 +108,38 @@ * npf_golden3l - - - - 0 1 0 unm pkt * npf_golden3m - - - - 0 1 1 unm pkt + * npf_golden3f - - - - 0 Zn 0 + * npf_golden3g - - - - 0 Zn 1 + * npf_golden3h - - - - 1 Zn 0 + * npf_golden3i - - - - 1 Zn 1 + * + * npf_golden3n local - - - 0 Zn 0 !zp pub-to-loc + * npf_golden3q local - - - 0 Zn 1 !zp pub-to-loc + * npf_golden3r local - - - 1 Zn 0 !zp pub-to-loc + * npf_golden3s local - - - 1 Zn 1 !zp pub-to-loc + * npf_golden3t local - - - 0 Zn 0 zp pub-to-loc + * npf_golden3u local - - - 0 Zn 1 zp pub-to-loc + * npf_golden3v local - - - 1 Zn 0 zp pub-to-loc + * npf_golden3w local - - - 1 Zn 1 zp pub-to-loc + * * npf_golden4 0 0 0 0 - - - * npf_golden4a 1 0 0 0 - - - * npf_golden4b 1 1 0 0 - - - * npf_golden4c 0 0 1 0 - - - * npf_golden4d 1 0 1 0 - - - * npf_golden4e 1 1 1 0 - - - + * npf_golden4f Zn 0 0 0 - - - + * npf_golden4g 0 0 0 0 - Zn - local zone + * npf_golden4h Zn 0 0 0 - Zn - priv to local + * npf_golden4i Zn 0 0 0 - Zn - pass rule + * npf_golden4j Zn 0 0 0 - Zn - drop rule + * npf_golden4k Zn 0 0 0 - Zn - unmatched + * npf_golden4l Zn 0 0 0 - Zn - stateful * * npf_golden5 - - - - - 0 0 * npf_golden5a - - - - - 1 0 * npf_golden5b - - - - - 1 1 + * npf_golden5f - - - - - Zn 0 * * npf_golden6 0 0 - 0 - - - * npf_golden6a 1 0 - 0 - - - @@ -142,8 +179,39 @@ enum dp_test_golden_flags { /* NAT64 on input on dp1T0 */ DPT_IN_NAT64 = 1 << 12, + /* Zone PRIVATE, dp1T0 */ + DPT_ZONE_PRIV = 1 << 16, + DPT_ZONE_PRIV_S = 1 << 17, + DPT_ZONE_PRIV_UNM = 1 << 18, + DPT_ZONE_PRIV_BLK = 1 << 19, + + /* Zone PUBLIC, dp1T1 */ + DPT_ZONE_PUB = 1 << 20, + DPT_ZONE_PUB_S = 1 << 21, + DPT_ZONE_PUB_UNM = 1 << 22, + DPT_ZONE_PUB_BLK = 1 << 23, + + /* Local zone */ + DPT_ZONE_LOCAL = 1 << 24, + DPT_ZP_PUB_TO_LOCAL = 1 << 25, + DPT_ZP_PUB_TO_LOCAL_S = 1 << 26, + DPT_ZP_PUB_TO_LOCAL_BLK = 1 << 27, + DPT_ZP_PRIV_TO_LOCAL = 1 << 28, + DPT_ZP_PRIV_TO_LOCAL_S = 1 << 29, + DPT_ZP_PRIV_TO_LOCAL_BLK = 1 << 30, + DPT_ZP_PRIV_TO_LOCAL_UNM = 1 << 31, }; +#define DPT_ZONE (DPT_ZONE_PRIV | DPT_ZONE_PUB | DPT_ZONE_LOCAL) + +#define DPT_DIFF_ZONES(ctx) (((ctx)->flags & DPT_ZONE) && \ + (!((ctx)->flags & DPT_ZONE_PUB) ^ \ + !((ctx)->flags & DPT_ZONE_PRIV))) + +/* Same or no zones */ +#define DPT_SAME_ZONE(ctx) !DPT_DIFF_ZONES(ctx) + + struct dp_test_golden_ctx { uint32_t flags; uint count; /* repeat count */ @@ -151,6 +219,7 @@ struct dp_test_golden_ctx { uint fw_out; /* expected pkt counts on rule */ int exp_fwd; /* Forwarding expect status for fwd pkt */ int exp_back;/* Forwarding expect status for fwd pkt */ + uint exp_session; }; enum test_fw { @@ -174,13 +243,13 @@ npf_golden_in_fw(enum test_fw action, struct dp_test_golden_ctx *ctx) .rule = "10", .pass = PASS, .stateful = (ctx->flags & DPT_IN_FW_S) != 0, - .npf = "proto=17 dst-port=48879" + .npf = "proto-final=17 dst-port=48879" }, { .rule = "20", .pass = BLOCK, .stateful = STATELESS, - .npf = "proto=17 dst-port=48878" + .npf = "proto-final=17 dst-port=48878" }, RULE_DEF_BLOCK, NULL_RULE @@ -229,13 +298,13 @@ npf_golden_out_fw(enum test_fw action, struct dp_test_golden_ctx *ctx) .rule = "10", .pass = PASS, .stateful = (ctx->flags & DPT_OUT_FW_S) != 0, - .npf = "proto=17 src-port=57005" + .npf = "proto-final=17 src-port=57005" }, { .rule = "20", .pass = BLOCK, .stateful = STATELESS, - .npf = "proto=17 src-port=57004" + .npf = "proto-final=17 src-port=57004" }, RULE_DEF_BLOCK, NULL_RULE @@ -295,6 +364,7 @@ npf_golden_in_dnat(enum test_fw action, struct dp_test_golden_ctx *ctx) .ifname = "dp1T0", .proto = IPPROTO_UDP, .map = "dynamic", + .port_alloc = NULL, .from_addr = NULL, .from_port = NULL, .to_addr = "2.2.2.12", @@ -325,6 +395,7 @@ npf_golden_in_dnat_local(enum test_fw action, struct dp_test_golden_ctx *ctx) .ifname = "dp1T0", .proto = IPPROTO_UDP, .map = "dynamic", + .port_alloc = NULL, .from_addr = NULL, .from_port = NULL, .to_addr = "1.1.1.2", @@ -355,6 +426,7 @@ npf_golden_out_snat(enum test_fw action, struct dp_test_golden_ctx *ctx) .ifname = "dp1T1", .proto = IPPROTO_UDP, .map = "dynamic", + .port_alloc = NULL, .from_addr = "1.1.1.11", .from_port = NULL, .to_addr = NULL, @@ -387,6 +459,7 @@ npf_golden_out_snat_local(enum test_fw action, struct dp_test_golden_ctx *ctx) .ifname = "dp1T1", .proto = IPPROTO_UDP, .map = "dynamic", + .port_alloc = NULL, .from_addr = "2.2.2.2", .from_port = NULL, .to_addr = NULL, @@ -430,217 +503,683 @@ npf_golden_in_nat64(enum test_fw action, struct dp_test_golden_ctx *ctx) } } -/* - * Simple custom timeout for UDP to exercise tag rproc and custom timeout - * ruleset - */ -static void npf_custom_timeout(bool enable) +/*********************************************************** + * Zone PUBLIC + ***********************************************************/ +static void +npf_golden_zone_public(enum test_fw action, struct dp_test_golden_ctx *ctx) { - if (enable) - dp_test_npf_cmd_fmt( - false, - "npf-ut add custom-timeout:1 1 proto=17 handle=tag(50)"); - else - dp_test_npf_cmd_fmt( - false, - "npf-ut delete custom-timeout:1 1"); - - dp_test_npf_commit(); -} + uint i; + const char *intf; + + /* List of interfaces in PUBLIC zone */ + const char * const intf_public[] = { + "dp1T1", + "dp1T2", + "dp2T2", + NULL + }; -/* - * IPv4 Tests, Forwards pkt from interface dp1T0 to dp1T1, then reverse - * packet. - */ -static void _dp_test_npf_golden_1(struct dp_test_golden_ctx *ctx, - const char *file, const char *func); -#define dp_test_npf_golden_1(ctx) \ - _dp_test_npf_golden_1(ctx, __FILE__, __func__) + if (action == TEST_FW_ADD) { -static void _dp_test_npf_golden_1(struct dp_test_golden_ctx *ctx, - const char *file, const char *func) -{ - /* Setup interfaces and neighbours */ - dp_test_nl_add_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); - dp_test_nl_add_ip_addr_and_connected("dp1T1", "2.2.2.2/24"); - dp_test_nl_add_ip_addr_and_connected("dp1T2", "3.3.3.3/24"); + dp_test_zone_add("PUBLIC"); - dp_test_netlink_add_neigh("dp1T0", "1.1.1.11", - "aa:bb:cc:dd:1:11"); - dp_test_netlink_add_neigh("dp1T1", "2.2.2.11", - "aa:bb:cc:dd:2:11"); - dp_test_netlink_add_neigh("dp1T2", "3.3.3.11", - "aa:bb:cc:dd:3:11"); + /* Add interfaces to zone PUBLIC */ + for (i = 0; i < ARRAY_SIZE(intf_public); i++) { + intf = intf_public[i]; + if (intf) + dp_test_zone_intf_add("PUBLIC", intf); + } + } - /* Add one or more firewalls dependent on ctx flags */ - npf_golden_in_fw(TEST_FW_ADD, ctx); - npf_golden_in_dnat(TEST_FW_ADD, ctx); - npf_golden_out_snat(TEST_FW_ADD, ctx); - npf_golden_out_fw(TEST_FW_ADD, ctx); + if (action == TEST_FW_REMOVE) { - /* - * UDP packet - */ - struct dp_test_pkt_desc_t pre_pkt = { - .text = "Pre", - .len = 20, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "1.1.1.11", - .l2_src = "aa:bb:cc:dd:1:11", - .l3_dst = "2.2.2.11", - .l2_dst = "aa:bb:cc:dd:2:11", - .proto = IPPROTO_UDP, - .l4 = { - .udp = { - .sport = 0xDEAD, /* 57005 */ - .dport = 0xBEEF, /* 48879 */ - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T1" - }; + for (i = 0; i < ARRAY_SIZE(intf_public); i++) { + intf = intf_public[i]; + if (intf) + dp_test_zone_intf_del("PUBLIC", intf); + } - if ((ctx->flags & DPT_IN_FW_BLK) != 0) - pre_pkt.l4.udp.dport = 48878; - else if ((ctx->flags & DPT_IN_FW_UNM) != 0) - pre_pkt.l4.udp.dport = 48877; + dp_test_zone_remove("PUBLIC"); + } +} - if ((ctx->flags & DPT_OUT_FW_BLK) != 0) - pre_pkt.l4.udp.sport = 57004; - else if ((ctx->flags & DPT_OUT_FW_UNM) != 0) - pre_pkt.l4.udp.sport = 57003; +/*********************************************************** + * Zone PRIVATE + ***********************************************************/ +static void +npf_golden_zone_private(enum test_fw action, struct dp_test_golden_ctx *ctx) +{ + uint i; + const char *intf; - struct dp_test_pkt_desc_t post_pkt; - struct dp_test_pkt_desc_t *pre_desc; - struct dp_test_pkt_desc_t *post_desc; - struct dp_test_expected *test_exp; - struct rte_mbuf *pre_pak, *post_pak; + /* List of interfaces in PRIVATE zone */ + const char * const intf_private[] = { + "dp1T0", + NULL + }; - uint repeat_count = ctx->count; - int exp_session = 0; - bool first_time = true; + if (action == TEST_FW_ADD) { + dp_test_zone_add("PRIVATE"); -repeat: - post_pkt = pre_pkt; - pre_desc = &pre_pkt; - post_desc = &post_pkt; + /* Add interfaces to zone PRIVATE */ + for (i = 0; i < ARRAY_SIZE(intf_private); i++) { + intf = intf_private[i]; + if (intf) + dp_test_zone_intf_add("PRIVATE", intf); + } + } - if ((ctx->flags & DPT_IN_DNAT) != 0) - /* DNAT 2.2.2.12 -> 2.2.2.11 */ - pre_desc->l3_dst = "2.2.2.12"; + if (action == TEST_FW_REMOVE) { - if ((ctx->flags & DPT_OUT_SNAT) != 0) - /* SNAT 1.1.1.11 -> 1.1.1.13 */ - post_desc->l3_src = "1.1.1.13"; + for (i = 0; i < ARRAY_SIZE(intf_private); i++) { + intf = intf_private[i]; + if (intf) + dp_test_zone_intf_del("PRIVATE", intf); + } - /* - * Forwards packet - */ - pre_pak = dp_test_v4_pkt_from_desc(pre_desc); - post_pak = dp_test_v4_pkt_from_desc(post_desc); + dp_test_zone_remove("PRIVATE"); + } +} - test_exp = dp_test_exp_from_desc(post_pak, post_desc); - rte_pktmbuf_free(post_pak); +/*********************************************************** + * Zone _local + ***********************************************************/ +static void +npf_golden_zone_local(enum test_fw action, struct dp_test_golden_ctx *ctx) +{ + if (action == TEST_FW_ADD) { + dp_test_zone_add("LOCAL"); + dp_test_zone_local("LOCAL", true); + } - /* If in-fw and dnat are cfgd, then they use the same session */ - if (first_time && ctx->exp_fwd == DP_TEST_FWD_FORWARDED) { - first_time = false; + if (action == TEST_FW_REMOVE) { + dp_test_zone_local("LOCAL", false); + dp_test_zone_remove("LOCAL"); + } +} - if ((ctx->flags & DPT_IN_FW_S) != 0 || - (ctx->flags & DPT_IN_DNAT) != 0) - exp_session++; +/* + * Zone policy for forwards pkts (PRIVATE to PUBLIC zones). + */ +static void +npf_golden_zone_policy_priv_to_pub(enum test_fw action, + struct dp_test_golden_ctx *ctx) +{ + struct dp_test_npf_rule_t rule_priv_to_pub[] = { + { + .rule = "10", + .pass = PASS, + .stateful = (ctx->flags & DPT_ZONE_PRIV_S) != 0, + .npf = "proto-final=17 src-port=57005" + }, + { + .rule = "20", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "proto-final=17 src-port=57004" + }, + RULE_DEF_BLOCK, + NULL_RULE + }; - if ((ctx->flags & DPT_OUT_FW_S) != 0 || - (ctx->flags & DPT_OUT_SNAT) != 0) - exp_session++; + if (ctx->flags & DPT_ZONE_PRIV_UNM) { + rule_priv_to_pub[2].rule = NULL; + rule_priv_to_pub[2].npf = NULL; } - dp_test_exp_set_fwd_status(test_exp, ctx->exp_fwd); - - spush(test_exp->description, sizeof(test_exp->description), - "\nTest: \"%s\", Forwards, exp %s", func, - ctx->exp_fwd == DP_TEST_FWD_FORWARDED ? "FORW":"DROP"); + struct dp_test_npf_ruleset_t rlset_priv_to_pub = { + .rstype = "zone", + .name = "PRIV_TO_PUB", + .enable = 1, + .attach_point = "PRIVATE>PUBLIC", + .fwd = 0, + .dir = "out", + .rules = rule_priv_to_pub + }; - /* Run the test */ - _dp_test_pak_receive(pre_pak, pre_desc->rx_intf, test_exp, - file, func, __LINE__); + if (action == TEST_FW_ADD) { + dp_test_zone_policy_add("PRIVATE", "PUBLIC"); - /* Check fw counts for first pkt */ - if (repeat_count == ctx->count) { - npf_golden_in_fw(TEST_FW_VERIFY, ctx); - npf_golden_out_fw(TEST_FW_VERIFY, ctx); + /* Add ruleset, and attach to attach point */ + dp_test_npf_fw_add(&rlset_priv_to_pub, false); } - dp_test_npf_session_count_verify(exp_session); + if (action == TEST_FW_REMOVE) { + /* detach and delete ruleset */ + dp_test_npf_fw_del(&rlset_priv_to_pub, false); - if (exp_session) { - if ((ctx->flags & DPT_IN_FW_S) != 0) { - if ((ctx->flags & DPT_IN_DNAT) != 0) - dp_test_nat_session_verify_desc( - false, 0x0D, pre_desc, post_desc); - else - dp_test_npf_session_verify_desc( - NULL, pre_desc, pre_desc->rx_intf, - SE_ACTIVE, SE_FLAGS_AE, true); - } + dp_test_zone_policy_del("PRIVATE", "PUBLIC"); + } +} - if ((ctx->flags & DPT_IN_FW_S) != 0 || - (ctx->flags & DPT_IN_DNAT) != 0) { +/* + * Zone policy for reverse pkts (PUBLIC to PRIVATE zones). + */ +static void +npf_golden_zone_policy_pub_to_priv(enum test_fw action, + struct dp_test_golden_ctx *ctx) +{ + struct dp_test_npf_rule_t rule_pub_to_priv[] = { + { + .rule = "1", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "" + }, + NULL_RULE + }; - dp_test_npf_session_verify_desc( - NULL, pre_desc, pre_desc->rx_intf, - SE_ACTIVE, SE_FLAGS_AE, true); + struct dp_test_npf_ruleset_t rlset_pub_to_priv = { + .rstype = "zone", + .name = "PUB_TO_PRIV", + .enable = 1, + .attach_point = "PUBLIC>PRIVATE", + .fwd = 0, + .dir = "out", + .rules = rule_pub_to_priv + }; - } else if ((ctx->flags & DPT_OUT_FW_S) != 0 || - (ctx->flags & DPT_OUT_SNAT) != 0) { + if (action == TEST_FW_ADD) { - dp_test_npf_session_verify_desc( - NULL, pre_desc, pre_desc->tx_intf, - SE_ACTIVE, SE_FLAGS_AE, true); - } + dp_test_zone_policy_add("PUBLIC", "PRIVATE"); + + /* Add ruleset, and attach to attach point */ + dp_test_npf_fw_add(&rlset_pub_to_priv, false); } - /* - * Reverse packet - */ - pre_pak = dp_test_reverse_v4_pkt_from_desc(post_desc); - post_pak = dp_test_reverse_v4_pkt_from_desc(pre_desc); + if (action == TEST_FW_REMOVE) { - test_exp = dp_test_reverse_exp_from_desc(post_pak, pre_desc); - rte_pktmbuf_free(post_pak); + /* detach and delete ruleset */ + dp_test_npf_fw_del(&rlset_pub_to_priv, false); - dp_test_exp_set_fwd_status(test_exp, ctx->exp_back); + dp_test_zone_policy_del("PUBLIC", "PRIVATE"); + } +} - spush(test_exp->description, sizeof(test_exp->description), - "\nTest: \"%s\", Reverse, exp %s", func, - ctx->exp_back == DP_TEST_FWD_FORWARDED ? "FORW":"DROP"); +static void +npf_golden_zone_policy_local_to_pub(enum test_fw action, + struct dp_test_golden_ctx *ctx) +{ + struct dp_test_npf_rule_t rule_local_to_pub[] = { + { + .rule = "10", + .pass = PASS, + .stateful = (ctx->flags & DPT_ZONE_PUB_S) != 0, + .npf = "proto-final=17 src-port=57005" + }, + { + .rule = "20", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "proto-final=17 src-port=57004" + }, + RULE_DEF_BLOCK, + NULL_RULE + }; - /* Run the test */ - _dp_test_pak_receive(pre_pak, pre_desc->tx_intf, test_exp, - file, func, __LINE__); + if (ctx->flags & DPT_ZONE_PUB_UNM) { + rule_local_to_pub[2].rule = NULL; + rule_local_to_pub[2].npf = NULL; + } - dp_test_npf_session_count_verify(exp_session); + struct dp_test_npf_ruleset_t rlset_local_to_pub = { + .rstype = "zone", + .name = "LOCAL_TO_PUB", + .enable = 1, + .attach_point = "LOCAL>PUBLIC", + .fwd = 0, + .dir = "out", + .rules = rule_local_to_pub + }; - if (ctx->count > 1) { - ctx->count--; - goto repeat; - } - /* Cleanup */ + if (action == TEST_FW_ADD) { + dp_test_zone_policy_add("LOCAL", "PUBLIC"); - npf_golden_in_fw(TEST_FW_REMOVE, ctx); - npf_golden_in_dnat(TEST_FW_REMOVE, ctx); - npf_golden_out_snat(TEST_FW_REMOVE, ctx); - npf_golden_out_fw(TEST_FW_REMOVE, ctx); + /* Add ruleset, and attach to attach point */ + dp_test_npf_fw_add(&rlset_local_to_pub, false); + } - dp_test_npf_cleanup(); + if (action == TEST_FW_REMOVE) { + /* detach and delete ruleset */ + dp_test_npf_fw_del(&rlset_local_to_pub, false); - dp_test_nl_del_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); - dp_test_nl_del_ip_addr_and_connected("dp1T1", "2.2.2.2/24"); - dp_test_nl_del_ip_addr_and_connected("dp1T2", "3.3.3.3/24"); + dp_test_zone_policy_del("LOCAL", "PUBLIC"); + } +} - dp_test_netlink_del_neigh("dp1T0", "1.1.1.11", - "aa:bb:cc:dd:1:11"); +static void +npf_golden_zone_policy_pub_to_local(enum test_fw action, + struct dp_test_golden_ctx *ctx) +{ + struct dp_test_npf_rule_t rule_pub_to_local[] = { + { + .rule = "10", + .pass = PASS, + .stateful = (ctx->flags & DPT_ZP_PUB_TO_LOCAL_S) != 0, + .npf = "proto-final=17 src-port=48879" + }, + { + .rule = "20", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "proto-final=17 src-port=48878" + }, + RULE_DEF_BLOCK, + NULL_RULE + }; + + struct dp_test_npf_ruleset_t rlset_pub_to_local = { + .rstype = "zone", + .name = "PUB_TO_LOCAL", + .enable = 1, + .attach_point = "PUBLIC>LOCAL", + .fwd = 0, + .dir = "out", + .rules = rule_pub_to_local + }; + + if (action == TEST_FW_ADD) { + dp_test_zone_policy_add("PUBLIC", "LOCAL"); + + /* Add ruleset, and attach to attach point */ + dp_test_npf_fw_add(&rlset_pub_to_local, false); + } + + if (action == TEST_FW_REMOVE) { + + /* detach and delete ruleset */ + dp_test_npf_fw_del(&rlset_pub_to_local, false); + + dp_test_zone_policy_del("PUBLIC", "LOCAL"); + } +} + +static void +npf_golden_zone_policy_local_to_priv(enum test_fw action, + struct dp_test_golden_ctx *ctx) +{ + struct dp_test_npf_rule_t rule_local_to_priv[] = { + { + .rule = "10", + .pass = PASS, + .stateful = (ctx->flags & DPT_ZONE_PRIV_S) != 0, + .npf = "proto-final=17 src-port=48879" + }, + { + .rule = "20", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "proto-final=17 src-port=48878" + }, + RULE_DEF_BLOCK, + NULL_RULE + }; + + if (ctx->flags & DPT_ZONE_PRIV_UNM) { + rule_local_to_priv[2].rule = NULL; + rule_local_to_priv[2].npf = NULL; + } + + struct dp_test_npf_ruleset_t rlset_local_to_priv = { + .rstype = "zone", + .name = "LOCAL_TO_PRIV", + .enable = 1, + .attach_point = "LOCAL>PRIVATE", + .fwd = 0, + .dir = "out", + .rules = rule_local_to_priv + }; + + + if (action == TEST_FW_ADD) { + dp_test_zone_policy_add("LOCAL", "PRIVATE"); + + /* Add ruleset, and attach to attach point */ + dp_test_npf_fw_add(&rlset_local_to_priv, false); + } + + if (action == TEST_FW_REMOVE) { + /* detach and delete ruleset */ + dp_test_npf_fw_del(&rlset_local_to_priv, false); + + dp_test_zone_policy_del("LOCAL", "PRIVATE"); + } +} + +static void +npf_golden_zone_policy_priv_to_local(enum test_fw action, + struct dp_test_golden_ctx *ctx) +{ + struct dp_test_npf_rule_t rule_priv_to_local[] = { + { + .rule = "10", + .pass = PASS, + .stateful = (ctx->flags & DPT_ZP_PRIV_TO_LOCAL_S) != 0, + .npf = "proto-final=17 src-port=57005" + }, + { + .rule = "20", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "proto-final=17 src-port=57004" + }, + RULE_DEF_BLOCK, + NULL_RULE + }; + + if (ctx->flags & DPT_ZP_PRIV_TO_LOCAL_UNM) { + rule_priv_to_local[2].rule = NULL; + rule_priv_to_local[2].npf = NULL; + } + + struct dp_test_npf_ruleset_t rlset_priv_to_local = { + .rstype = "zone", + .name = "PRIV_TO_LOCAL", + .enable = 1, + .attach_point = "PRIVATE>LOCAL", + .fwd = 0, + .dir = "out", + .rules = rule_priv_to_local + }; + + if (action == TEST_FW_ADD) { + dp_test_zone_policy_add("PRIVATE", "LOCAL"); + + /* Add ruleset, and attach to attach point */ + dp_test_npf_fw_add(&rlset_priv_to_local, false); + } + + if (action == TEST_FW_REMOVE) { + + /* detach and delete ruleset */ + dp_test_npf_fw_del(&rlset_priv_to_local, false); + + dp_test_zone_policy_del("PRIVATE", "LOCAL"); + } +} + +static void +npf_golden_zone(enum test_fw action, struct dp_test_golden_ctx *ctx) +{ + if ((ctx->flags & DPT_ZONE) == 0) + return; + + if ((ctx->flags & (DPT_IN_FW | DPT_OUT_FW))) + dp_test_fail("Cannot cfg zones and fw at same time"); + + if (action == TEST_FW_ADD) { + if (ctx->flags & DPT_ZONE_PUB) { + npf_golden_zone_public(action, ctx); + npf_golden_zone_policy_pub_to_priv(action, ctx); + + if (ctx->flags & DPT_ZP_PUB_TO_LOCAL) + npf_golden_zone_policy_pub_to_local(action, + ctx); + } + + if (ctx->flags & DPT_ZONE_PRIV) { + npf_golden_zone_private(action, ctx); + npf_golden_zone_policy_priv_to_pub(action, ctx); + + if (ctx->flags & DPT_ZP_PRIV_TO_LOCAL) + npf_golden_zone_policy_priv_to_local(action, + ctx); + } + + if (ctx->flags & DPT_ZONE_LOCAL) { + npf_golden_zone_local(action, ctx); + + if (ctx->flags & DPT_ZONE_PUB) + npf_golden_zone_policy_local_to_pub(action, + ctx); + if (ctx->flags & DPT_ZONE_PRIV) + npf_golden_zone_policy_local_to_priv(action, + ctx); + } + } + + if (action == TEST_FW_REMOVE) { + if (ctx->flags & DPT_ZONE_PUB) { + if (ctx->flags & DPT_ZP_PUB_TO_LOCAL) + npf_golden_zone_policy_pub_to_local(action, + ctx); + + npf_golden_zone_policy_pub_to_priv(action, ctx); + npf_golden_zone_public(action, ctx); + } + + if (ctx->flags & DPT_ZONE_PRIV) { + if (ctx->flags & DPT_ZP_PRIV_TO_LOCAL) + npf_golden_zone_policy_priv_to_local(action, + ctx); + + npf_golden_zone_policy_priv_to_pub(action, ctx); + npf_golden_zone_private(action, ctx); + } + + if (ctx->flags & DPT_ZONE_LOCAL) { + if (ctx->flags & DPT_ZONE_PUB) + npf_golden_zone_policy_local_to_pub(action, + ctx); + + if (ctx->flags & DPT_ZONE_PRIV) + npf_golden_zone_policy_local_to_priv(action, + ctx); + + npf_golden_zone_local(action, ctx); + } + } +} + +/* + * Simple custom timeout for UDP to exercise tag rproc and custom timeout + * ruleset + */ +static void npf_custom_timeout(bool enable) +{ + if (enable) + dp_test_npf_cmd_fmt( + false, + "npf-ut add custom-timeout:1 1 proto-final=17 handle=tag(50)"); + else + dp_test_npf_cmd_fmt( + false, + "npf-ut delete custom-timeout:1 1"); + + dp_test_npf_commit(); +} + +/* + * IPv4 Tests, Forwards pkt from interface dp1T0 to dp1T1, then reverse + * packet. + */ +static void _dp_test_npf_golden_1(struct dp_test_golden_ctx *ctx, + const char *file, const char *func); +#define dp_test_npf_golden_1(ctx) \ + _dp_test_npf_golden_1(ctx, __FILE__, __func__) + +static void _dp_test_npf_golden_1(struct dp_test_golden_ctx *ctx, + const char *file, const char *func) +{ + /* Setup interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp1T1", "2.2.2.2/24"); + dp_test_nl_add_ip_addr_and_connected("dp1T2", "3.3.3.3/24"); + + dp_test_netlink_add_neigh("dp1T0", "1.1.1.11", + "aa:bb:cc:dd:1:11"); + dp_test_netlink_add_neigh("dp1T1", "2.2.2.11", + "aa:bb:cc:dd:2:11"); + dp_test_netlink_add_neigh("dp1T2", "3.3.3.11", + "aa:bb:cc:dd:3:11"); + + /* Add one or more firewalls dependent on ctx flags */ + npf_golden_in_fw(TEST_FW_ADD, ctx); + npf_golden_in_dnat(TEST_FW_ADD, ctx); + npf_golden_out_snat(TEST_FW_ADD, ctx); + npf_golden_out_fw(TEST_FW_ADD, ctx); + npf_golden_zone(TEST_FW_ADD, ctx); + + /* + * UDP packet + */ + struct dp_test_pkt_desc_t pre_pkt = { + .text = "Pre", + .len = 20, + .ether_type = RTE_ETHER_TYPE_IPV4, + .l3_src = "1.1.1.11", + .l2_src = "aa:bb:cc:dd:1:11", + .l3_dst = "2.2.2.11", + .l2_dst = "aa:bb:cc:dd:2:11", + .proto = IPPROTO_UDP, + .l4 = { + .udp = { + .sport = 0xDEAD, /* 57005 */ + .dport = 0xBEEF, /* 48879 */ + } + }, + .rx_intf = "dp1T0", + .tx_intf = "dp1T1" + }; + + if ((ctx->flags & DPT_IN_FW_BLK) != 0) + pre_pkt.l4.udp.dport = 48878; + else if ((ctx->flags & DPT_IN_FW_UNM) != 0) + pre_pkt.l4.udp.dport = 48877; + + if ((ctx->flags & DPT_OUT_FW_BLK) != 0) + pre_pkt.l4.udp.sport = 57004; + else if ((ctx->flags & DPT_OUT_FW_UNM) != 0 || + (ctx->flags & DPT_ZONE_PRIV_UNM) != 0) + pre_pkt.l4.udp.sport = 57003; + + struct dp_test_pkt_desc_t post_pkt; + struct dp_test_pkt_desc_t *pre_desc; + struct dp_test_pkt_desc_t *post_desc; + struct dp_test_expected *test_exp; + struct rte_mbuf *pre_pak, *post_pak; + + uint repeat_count = ctx->count; + bool first_time = true; + +repeat: + post_pkt = pre_pkt; + pre_desc = &pre_pkt; + post_desc = &post_pkt; + + if ((ctx->flags & DPT_IN_DNAT) != 0) + /* DNAT 2.2.2.12 -> 2.2.2.11 */ + pre_desc->l3_dst = "2.2.2.12"; + + if ((ctx->flags & DPT_OUT_SNAT) != 0) + /* SNAT 1.1.1.11 -> 1.1.1.13 */ + post_desc->l3_src = "1.1.1.13"; + + /* + * Forwards packet + */ + pre_pak = dp_test_v4_pkt_from_desc(pre_desc); + post_pak = dp_test_v4_pkt_from_desc(post_desc); + + test_exp = dp_test_exp_from_desc(post_pak, post_desc); + rte_pktmbuf_free(post_pak); + + /* If in-fw and dnat are cfgd, then they use the same session */ + if (first_time && ctx->exp_fwd == DP_TEST_FWD_FORWARDED) + first_time = false; + + dp_test_exp_set_fwd_status(test_exp, ctx->exp_fwd); + + spush(test_exp->description, sizeof(test_exp->description), + "\nTest: \"%s\", Forwards, exp %s", func, + ctx->exp_fwd == DP_TEST_FWD_FORWARDED ? "FORW":"DROP"); + + /* Run the test */ + _dp_test_pak_receive(pre_pak, pre_desc->rx_intf, test_exp, + file, func, __LINE__); + + /* Check fw counts for first pkt */ + if (repeat_count == ctx->count) { + npf_golden_in_fw(TEST_FW_VERIFY, ctx); + npf_golden_out_fw(TEST_FW_VERIFY, ctx); + } + + _dp_test_npf_session_count_verify(ctx->exp_session, false, + file, func, __LINE__); + + if (ctx->exp_session) { + if ((ctx->flags & DPT_IN_FW_S) != 0) { + if ((ctx->flags & DPT_IN_DNAT) != 0) + dp_test_nat_session_verify_desc( + false, 0x0D, pre_desc, post_desc); + else + dp_test_npf_session_verify_desc( + NULL, pre_desc, pre_desc->rx_intf, + SE_ACTIVE, SE_FLAGS_AE, true); + } + + if ((ctx->flags & DPT_IN_FW_S) != 0 || + (ctx->flags & DPT_IN_DNAT) != 0) { + + dp_test_npf_session_verify_desc( + NULL, pre_desc, pre_desc->rx_intf, + SE_ACTIVE, SE_FLAGS_AE, true); + + } else if ((ctx->flags & DPT_OUT_FW_S) != 0 || + (ctx->flags & DPT_OUT_SNAT) != 0 || + (ctx->flags & DPT_ZONE_PRIV_S) != 0) { + + dp_test_npf_session_verify_desc( + NULL, pre_desc, pre_desc->tx_intf, + SE_ACTIVE, SE_FLAGS_AE, true); + } + } + + /* + * Reverse packet + */ + pre_pak = dp_test_reverse_v4_pkt_from_desc(post_desc); + post_pak = dp_test_reverse_v4_pkt_from_desc(pre_desc); + + test_exp = dp_test_reverse_exp_from_desc(post_pak, pre_desc); + rte_pktmbuf_free(post_pak); + + dp_test_exp_set_fwd_status(test_exp, ctx->exp_back); + + spush(test_exp->description, sizeof(test_exp->description), + "\nTest: \"%s\", Reverse, exp %s", func, + ctx->exp_back == DP_TEST_FWD_FORWARDED ? "FORW":"DROP"); + + /* Run the test */ + _dp_test_pak_receive(pre_pak, pre_desc->tx_intf, test_exp, + file, func, __LINE__); + + _dp_test_npf_session_count_verify(ctx->exp_session, false, + file, func, __LINE__); + + if (ctx->count > 1) { + ctx->count--; + goto repeat; + } + + /* Cleanup */ + + npf_golden_in_fw(TEST_FW_REMOVE, ctx); + npf_golden_in_dnat(TEST_FW_REMOVE, ctx); + npf_golden_out_snat(TEST_FW_REMOVE, ctx); + npf_golden_out_fw(TEST_FW_REMOVE, ctx); + npf_golden_zone(TEST_FW_REMOVE, ctx); + + dp_test_npf_cleanup(); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp1T1", "2.2.2.2/24"); + dp_test_nl_del_ip_addr_and_connected("dp1T2", "3.3.3.3/24"); + + dp_test_netlink_del_neigh("dp1T0", "1.1.1.11", + "aa:bb:cc:dd:1:11"); dp_test_netlink_del_neigh("dp1T1", "2.2.2.11", "aa:bb:cc:dd:2:11"); dp_test_netlink_del_neigh("dp1T2", "3.3.3.11", @@ -657,6 +1196,7 @@ DP_DECL_TEST_CASE(npf_golden, npf_golden1, NULL, NULL); DP_START_TEST(npf_golden1, test) { struct dp_test_golden_ctx ctx = { + .exp_session = 0, .flags = 0, .count = 1, .fw_in = 0, @@ -673,9 +1213,96 @@ DP_START_TEST(npf_golden1, test) * IPv4, In FW */ DP_DECL_TEST_CASE(npf_golden, npf_golden1a, NULL, NULL); -DP_START_TEST(npf_golden1a, test) +DP_START_TEST_FULL_RUN(npf_golden1a, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, + .flags = 0, + .count = 1, + .fw_in = 1, + .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_FORWARDED, + }; + ctx.flags |= DPT_IN_FW; + + dp_test_npf_golden_1(&ctx); + +} DP_END_TEST; + +/* + * IPv4, In sFW + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden1b, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden1b, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 1, + .flags = 0, + .count = 1, + .fw_in = 1, + .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_FORWARDED, + }; + ctx.flags |= DPT_IN_FW; + ctx.flags |= DPT_IN_FW_S; + + dp_test_npf_golden_1(&ctx); + +} DP_END_TEST; + +/* + * + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden1c, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden1c, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 1, + .flags = 0, + .count = 1, + .fw_in = 0, + .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_FORWARDED, + }; + ctx.flags |= DPT_IN_DNAT; + + dp_test_npf_golden_1(&ctx); + +} DP_END_TEST; + +/* + * + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden1d, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden1d, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 1, + .flags = 0, + .count = 1, + .fw_in = 1, + .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_FORWARDED, + }; + ctx.flags |= DPT_IN_FW; + ctx.flags |= DPT_IN_DNAT; + + dp_test_npf_golden_1(&ctx); + +} DP_END_TEST; + +/* + * + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden1e, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden1e, test) { struct dp_test_golden_ctx ctx = { + .exp_session = 1, .flags = 0, .count = 1, .fw_in = 1, @@ -684,227 +1311,340 @@ DP_START_TEST(npf_golden1a, test) .exp_back = DP_TEST_FWD_FORWARDED, }; ctx.flags |= DPT_IN_FW; + ctx.flags |= DPT_IN_FW_S; + ctx.flags |= DPT_IN_DNAT; + + npf_custom_timeout(true); + + dp_test_npf_golden_1(&ctx); + + npf_custom_timeout(false); + +} DP_END_TEST; + +/* + * + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden1f, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden1f, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 1, .flags = 0, + .count = 1, + .fw_in = 0, + .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_FORWARDED, + }; + ctx.flags |= DPT_OUT_SNAT; + + dp_test_npf_golden_1(&ctx); + +} DP_END_TEST; + +/* + * + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden1g, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden1g, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 1, .flags = 0, + .count = 1, + .fw_in = 0, + .fw_out = 1, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_FORWARDED, + }; + ctx.flags |= DPT_OUT_SNAT; + ctx.flags |= DPT_OUT_FW; + + dp_test_npf_golden_1(&ctx); + +} DP_END_TEST; + +/* + * + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden1h, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden1h, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 1, .flags = 0, + .count = 1, + .fw_in = 0, + .fw_out = 1, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_FORWARDED, + }; + ctx.flags |= DPT_OUT_SNAT; + ctx.flags |= DPT_OUT_FW; + ctx.flags |= DPT_OUT_FW_S; + + dp_test_npf_golden_1(&ctx); + +} DP_END_TEST; + + +/* + * v4: In -> DNAT -> Out -> SNAT + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden1i, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden1i, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 2, .flags = 0, + .count = 1, + .fw_in = 0, + .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_FORWARDED, + }; + ctx.flags |= DPT_IN_DNAT; + ctx.flags |= DPT_OUT_SNAT; + + dp_test_npf_golden_1(&ctx); + +} DP_END_TEST; + +/* + * v4: In -> sFW -> DNAT -> Out -> SNAT -> sFW + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden1j, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden1j, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 2, .flags = 0, + .count = 1, + .fw_in = 1, + .fw_out = 1, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_FORWARDED, + }; + ctx.flags |= DPT_IN_FW; + ctx.flags |= DPT_IN_FW_S; + ctx.flags |= DPT_IN_DNAT; + ctx.flags |= DPT_OUT_SNAT; + ctx.flags |= DPT_OUT_FW; + ctx.flags |= DPT_OUT_FW_S; + + dp_test_npf_golden_1(&ctx); + +} DP_END_TEST; + +/* + * v4: In -> sFW -> Out -> sFW + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden1k, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden1k, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 2, .flags = 0, + .count = 2, + .fw_in = 1, + .fw_out = 1, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_FORWARDED, + }; + ctx.flags |= DPT_IN_FW; + ctx.flags |= DPT_IN_FW_S; + ctx.flags |= DPT_OUT_FW; + ctx.flags |= DPT_OUT_FW_S; dp_test_npf_golden_1(&ctx); } DP_END_TEST; /* - * IPv4, In sFW + * non-zone to zone */ -DP_DECL_TEST_CASE(npf_golden, npf_golden1b, NULL, NULL); -DP_START_TEST(npf_golden1b, test) +DP_DECL_TEST_CASE(npf_golden, npf_golden1l, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden1l, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, + .exp_session = 0, .flags = 0, .count = 1, - .fw_in = 1, + .fw_in = 0, .fw_out = 0, - .exp_fwd = DP_TEST_FWD_FORWARDED, - .exp_back = DP_TEST_FWD_FORWARDED, + .exp_fwd = DP_TEST_FWD_DROPPED, + .exp_back = DP_TEST_FWD_DROPPED, }; - ctx.flags |= DPT_IN_FW; - ctx.flags |= DPT_IN_FW_S; + ctx.flags |= DPT_ZONE_PRIV; dp_test_npf_golden_1(&ctx); } DP_END_TEST; /* - * + * Zone to non-zone */ -DP_DECL_TEST_CASE(npf_golden, npf_golden1c, NULL, NULL); -DP_START_TEST(npf_golden1c, test) +DP_DECL_TEST_CASE(npf_golden, npf_golden1m, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden1m, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, + .exp_session = 0, .flags = 0, .count = 1, .fw_in = 0, .fw_out = 0, - .exp_fwd = DP_TEST_FWD_FORWARDED, - .exp_back = DP_TEST_FWD_FORWARDED, + .exp_fwd = DP_TEST_FWD_DROPPED, + .exp_back = DP_TEST_FWD_DROPPED, }; - ctx.flags |= DPT_IN_DNAT; + ctx.flags |= DPT_ZONE_PUB; dp_test_npf_golden_1(&ctx); } DP_END_TEST; -/* - * - */ -DP_DECL_TEST_CASE(npf_golden, npf_golden1d, NULL, NULL); -DP_START_TEST(npf_golden1d, test) +DP_DECL_TEST_CASE(npf_golden, npf_golden1n, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden1n, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, + .exp_session = 0, .flags = 0, .count = 1, - .fw_in = 1, + .fw_in = 0, .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, - .exp_back = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_DROPPED, }; - ctx.flags |= DPT_IN_FW; - ctx.flags |= DPT_IN_DNAT; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_PRIV; dp_test_npf_golden_1(&ctx); } DP_END_TEST; -/* - * - */ -DP_DECL_TEST_CASE(npf_golden, npf_golden1e, NULL, NULL); -DP_START_TEST(npf_golden1e, test) +DP_DECL_TEST_CASE(npf_golden, npf_golden1o, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden1o, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, + .exp_session = 0, .flags = 0, .count = 1, - .fw_in = 1, + .fw_in = 0, .fw_out = 0, - .exp_fwd = DP_TEST_FWD_FORWARDED, - .exp_back = DP_TEST_FWD_FORWARDED, + .exp_fwd = DP_TEST_FWD_DROPPED, + .exp_back = DP_TEST_FWD_DROPPED, }; - ctx.flags |= DPT_IN_FW; - ctx.flags |= DPT_IN_FW_S; - ctx.flags |= DPT_IN_DNAT; - - npf_custom_timeout(true); + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_PRIV; + ctx.flags |= DPT_ZONE_PRIV_UNM; dp_test_npf_golden_1(&ctx); - npf_custom_timeout(false); - } DP_END_TEST; -/* - * - */ -DP_DECL_TEST_CASE(npf_golden, npf_golden1f, NULL, NULL); -DP_START_TEST(npf_golden1f, test) +DP_DECL_TEST_CASE(npf_golden, npf_golden1p, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden1p, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, + .exp_session = 1, .flags = 0, .count = 1, .fw_in = 0, .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, .exp_back = DP_TEST_FWD_FORWARDED, }; - ctx.flags |= DPT_OUT_SNAT; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_PRIV; + ctx.flags |= DPT_IN_DNAT; dp_test_npf_golden_1(&ctx); } DP_END_TEST; -/* - * - */ -DP_DECL_TEST_CASE(npf_golden, npf_golden1g, NULL, NULL); -DP_START_TEST(npf_golden1g, test) +DP_DECL_TEST_CASE(npf_golden, npf_golden1q, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden1q, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, + .exp_session = 1, .flags = 0, .count = 1, .fw_in = 0, - .fw_out = 1, + .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, .exp_back = DP_TEST_FWD_FORWARDED, }; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_PRIV; ctx.flags |= DPT_OUT_SNAT; - ctx.flags |= DPT_OUT_FW; dp_test_npf_golden_1(&ctx); } DP_END_TEST; -/* - * - */ -DP_DECL_TEST_CASE(npf_golden, npf_golden1h, NULL, NULL); -DP_START_TEST(npf_golden1h, test) +DP_DECL_TEST_CASE(npf_golden, npf_golden1r, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden1r, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, + .exp_session = 1, .flags = 0, .count = 1, .fw_in = 0, - .fw_out = 1, + .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, .exp_back = DP_TEST_FWD_FORWARDED, }; - ctx.flags |= DPT_OUT_SNAT; - ctx.flags |= DPT_OUT_FW; - ctx.flags |= DPT_OUT_FW_S; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_PRIV; + ctx.flags |= DPT_ZONE_PRIV_S; dp_test_npf_golden_1(&ctx); } DP_END_TEST; - -/* - * v4: In -> DNAT -> Out -> SNAT - */ -DP_DECL_TEST_CASE(npf_golden, npf_golden1i, NULL, NULL); -DP_START_TEST(npf_golden1i, test) +DP_DECL_TEST_CASE(npf_golden, npf_golden1s, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden1s, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, + .exp_session = 2, .flags = 0, .count = 1, .fw_in = 0, .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, .exp_back = DP_TEST_FWD_FORWARDED, }; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_PRIV; + ctx.flags |= DPT_ZONE_PRIV_S; ctx.flags |= DPT_IN_DNAT; - ctx.flags |= DPT_OUT_SNAT; dp_test_npf_golden_1(&ctx); } DP_END_TEST; -/* - * v4: In -> sFW -> DNAT -> Out -> SNAT -> sFW - */ -DP_DECL_TEST_CASE(npf_golden, npf_golden1j, NULL, NULL); -DP_START_TEST(npf_golden1j, test) +DP_DECL_TEST_CASE(npf_golden, npf_golden1t, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden1t, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, + .exp_session = 1, .flags = 0, .count = 1, - .fw_in = 1, - .fw_out = 1, + .fw_in = 0, + .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, .exp_back = DP_TEST_FWD_FORWARDED, }; - ctx.flags |= DPT_IN_FW; - ctx.flags |= DPT_IN_FW_S; - ctx.flags |= DPT_IN_DNAT; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_PRIV; + ctx.flags |= DPT_ZONE_PRIV_S; ctx.flags |= DPT_OUT_SNAT; - ctx.flags |= DPT_OUT_FW; - ctx.flags |= DPT_OUT_FW_S; dp_test_npf_golden_1(&ctx); } DP_END_TEST; -/* - * v4: In -> sFW -> Out -> sFW - */ -DP_DECL_TEST_CASE(npf_golden, npf_golden1k, NULL, NULL); -DP_START_TEST(npf_golden1k, test) +DP_DECL_TEST_CASE(npf_golden, npf_golden1u, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden1u, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, - .count = 2, - .fw_in = 1, - .fw_out = 1, + .exp_session = 2, .flags = 0, + .count = 1, + .fw_in = 0, + .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, .exp_back = DP_TEST_FWD_FORWARDED, }; - ctx.flags |= DPT_IN_FW; - ctx.flags |= DPT_IN_FW_S; - ctx.flags |= DPT_OUT_FW; - ctx.flags |= DPT_OUT_FW_S; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_PRIV; + ctx.flags |= DPT_ZONE_PRIV_S; + ctx.flags |= DPT_IN_DNAT; + ctx.flags |= DPT_OUT_SNAT; dp_test_npf_golden_1(&ctx); @@ -949,13 +1689,15 @@ static void _dp_test_npf_golden_2(struct dp_test_golden_ctx *ctx, npf_golden_out_snat(TEST_FW_ADD, ctx); } + npf_golden_zone(TEST_FW_ADD, ctx); + /* * UDP packet */ struct dp_test_pkt_desc_t pre_pkt = { .text = "Pre", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:101:1::101:10b", .l2_src = "aa:bb:cc:dd:1:11", .l3_dst = "2001:101:2::202:20b", @@ -988,7 +1730,6 @@ static void _dp_test_npf_golden_2(struct dp_test_golden_ctx *ctx, struct rte_mbuf *pre_pak, *post_pak; uint repeat_count = ctx->count; - int exp_session = 0; bool first_time = true; repeat: @@ -997,7 +1738,7 @@ static void _dp_test_npf_golden_2(struct dp_test_golden_ctx *ctx, post_desc = &post_pkt; if ((ctx->flags & DPT_IN_NAT64) != 0) { - post_desc->ether_type = ETHER_TYPE_IPv4; + post_desc->ether_type = RTE_ETHER_TYPE_IPV4; if ((ctx->flags & DPT_OUT_SNAT) != 0) post_desc->l3_src = "1.1.1.13"; @@ -1017,22 +1758,9 @@ static void _dp_test_npf_golden_2(struct dp_test_golden_ctx *ctx, rte_pktmbuf_free(post_pak); /* If in-fw and dnat are cfgd, then they use the same session */ - if (first_time && ctx->exp_fwd == DP_TEST_FWD_FORWARDED) { + if (first_time && ctx->exp_fwd == DP_TEST_FWD_FORWARDED) first_time = false; - if ((ctx->flags & DPT_IN_FW_S) != 0) - exp_session++; - - if ((ctx->flags & DPT_OUT_FW_S) != 0 || - (ctx->flags & DPT_OUT_SNAT) != 0 || - (ctx->flags & DPT_IN_NAT64) != 0) - exp_session++; - - /* Nat64 created two sessions per flow */ - if ((ctx->flags & DPT_IN_NAT64) != 0) - exp_session++; - } - dp_test_exp_set_fwd_status(test_exp, ctx->exp_fwd); spush(test_exp->description, sizeof(test_exp->description), @@ -1051,9 +1779,11 @@ static void _dp_test_npf_golden_2(struct dp_test_golden_ctx *ctx, npf_golden_out_fw(TEST_FW_VERIFY, ctx); } - dp_test_npf_session_count_verify(exp_session); + // dp_test_npf_print_session_table(true); + _dp_test_npf_session_count_verify(ctx->exp_session, false, + file, func, __LINE__); - if (exp_session) { + if (ctx->exp_session) { if ((ctx->flags & DPT_IN_NAT64) == 0) { /* Temporarily ignore sessions if nat64 cfgd */ if ((ctx->flags & DPT_IN_FW_S) != 0) { @@ -1062,7 +1792,8 @@ static void _dp_test_npf_golden_2(struct dp_test_golden_ctx *ctx, NULL, pre_desc, pre_desc->rx_intf, SE_ACTIVE, SE_FLAGS_AE, true); - } else if ((ctx->flags & DPT_OUT_FW_S) != 0) { + } else if ((ctx->flags & DPT_OUT_FW_S) != 0 || + (ctx->flags & DPT_ZONE_PRIV_S) != 0) { dp_test_npf_session_verify_desc( NULL, post_desc, post_desc->tx_intf, @@ -1089,7 +1820,8 @@ static void _dp_test_npf_golden_2(struct dp_test_golden_ctx *ctx, /* Run the test */ dp_test_pak_receive(pre_pak, pre_desc->tx_intf, test_exp); - dp_test_npf_session_count_verify(exp_session); + _dp_test_npf_session_count_verify(ctx->exp_session, false, + file, func, __LINE__); if (ctx->count > 1) { ctx->count--; @@ -1108,6 +1840,8 @@ static void _dp_test_npf_golden_2(struct dp_test_golden_ctx *ctx, npf_golden_out_snat(TEST_FW_REMOVE, ctx); } + npf_golden_zone(TEST_FW_REMOVE, ctx); + dp_test_npf_cleanup(); dp_test_netlink_del_neigh("dp1T0", "2001:101:1::101:10b", @@ -1125,6 +1859,7 @@ static void _dp_test_npf_golden_2(struct dp_test_golden_ctx *ctx, dp_test_nl_del_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); dp_test_nl_del_ip_addr_and_connected("dp1T1", "2.2.2.2/24"); + } /* @@ -1134,7 +1869,7 @@ DP_DECL_TEST_CASE(npf_golden, npf_golden2, NULL, NULL); DP_START_TEST(npf_golden2, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, + .exp_session = 0, .flags = 0, .count = 1, .fw_in = 0, .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, @@ -1150,10 +1885,10 @@ DP_START_TEST(npf_golden2, test) * IPv6 */ DP_DECL_TEST_CASE(npf_golden, npf_golden2a, NULL, NULL); -DP_START_TEST(npf_golden2a, test) +DP_START_TEST_FULL_RUN(npf_golden2a, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, + .exp_session = 0, .flags = 0, .count = 1, .fw_in = 1, .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, .exp_back = DP_TEST_FWD_FORWARDED, @@ -1168,9 +1903,10 @@ DP_START_TEST(npf_golden2a, test) * IPv6 */ DP_DECL_TEST_CASE(npf_golden, npf_golden2b, NULL, NULL); -DP_START_TEST(npf_golden2b, test) +DP_START_TEST_FULL_RUN(npf_golden2b, test) { struct dp_test_golden_ctx ctx = { + .exp_session = 1, .flags = 0, .count = 1, .fw_in = 1, .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, @@ -1187,9 +1923,10 @@ DP_START_TEST(npf_golden2b, test) * IPv6 */ DP_DECL_TEST_CASE(npf_golden, npf_golden2c, NULL, NULL); -DP_START_TEST(npf_golden2c, test) +DP_START_TEST_FULL_RUN(npf_golden2c, test) { struct dp_test_golden_ctx ctx = { + .exp_session = 1, .flags = 0, .count = 1, .fw_in = 0, .fw_out = 1, .exp_fwd = DP_TEST_FWD_FORWARDED, @@ -1206,13 +1943,14 @@ DP_START_TEST(npf_golden2c, test) * IPv6 */ DP_DECL_TEST_CASE(npf_golden, npf_golden2d, NULL, NULL); -DP_START_TEST(npf_golden2d, test) +DP_START_TEST_FULL_RUN(npf_golden2d, test) { struct dp_test_golden_ctx ctx = { .flags = 0, .count = 1, .fw_in = 0, .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, .exp_back = DP_TEST_FWD_FORWARDED, + .exp_session = 2, }; ctx.flags |= DPT_IN_NAT64; @@ -1224,13 +1962,14 @@ DP_START_TEST(npf_golden2d, test) * IPv6 */ DP_DECL_TEST_CASE(npf_golden, npf_golden2e, NULL, NULL); -DP_START_TEST(npf_golden2e, test) +DP_START_TEST_FULL_RUN(npf_golden2e, test) { struct dp_test_golden_ctx ctx = { .flags = 0, .count = 1, .fw_in = 1, .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, .exp_back = DP_TEST_FWD_FORWARDED, + .exp_session = 2, }; ctx.flags |= DPT_IN_FW; ctx.flags |= DPT_IN_NAT64; @@ -1243,19 +1982,19 @@ DP_START_TEST(npf_golden2e, test) * IPv6 */ DP_DECL_TEST_CASE(npf_golden, npf_golden2f, NULL, NULL); -DP_START_TEST(npf_golden2f, test) +DP_START_TEST_FULL_RUN(npf_golden2f, test) { /* * Not working yet with the new session code. If an input fw session * exists and nat64 is enabled, then the return pkt fails to find the * nat64 session. */ -#ifndef DP_SESSIONS struct dp_test_golden_ctx ctx = { .flags = 0, .count = 1, .fw_in = 1, .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, .exp_back = DP_TEST_FWD_FORWARDED, + .exp_session = 2, }; ctx.flags |= DPT_IN_FW; ctx.flags |= DPT_IN_FW_S; @@ -1263,31 +2002,215 @@ DP_START_TEST(npf_golden2f, test) dp_test_npf_golden_2(&ctx); -#endif } DP_END_TEST; /* * IPv6 */ DP_DECL_TEST_CASE(npf_golden, npf_golden2g, NULL, NULL); -DP_START_TEST(npf_golden2g, test) +DP_START_TEST_FULL_RUN(npf_golden2g, test) { struct dp_test_golden_ctx ctx = { + .exp_session = 2, .flags = 0, .count = 1, .fw_in = 0, .fw_out = 1, .exp_fwd = DP_TEST_FWD_FORWARDED, .exp_back = DP_TEST_FWD_FORWARDED, }; ctx.flags |= DPT_IN_NAT64; - ctx.flags |= DPT_OUT_FW; - ctx.flags |= DPT_OUT_FW_S; + ctx.flags |= DPT_OUT_FW; + ctx.flags |= DPT_OUT_FW_S; + + dp_test_npf_golden_2(&ctx); + +} DP_END_TEST; + +/* + * IPv6 + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden2h, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden2h, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 2, .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_FORWARDED, + }; + ctx.flags |= DPT_IN_NAT64; + ctx.flags |= DPT_OUT_SNAT; + + dp_test_npf_golden_2(&ctx); + +} DP_END_TEST; + +/* + * IPv6 + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden2i, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden2i, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 2, .flags = 0, .count = 1, + .fw_in = 1, .fw_out = 1, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_FORWARDED, + }; + ctx.flags |= DPT_IN_FW; + ctx.flags |= DPT_IN_FW_S; + ctx.flags |= DPT_IN_NAT64; + ctx.flags |= DPT_OUT_FW; + ctx.flags |= DPT_OUT_FW_S; + ctx.flags |= DPT_OUT_SNAT; + + dp_test_npf_golden_2(&ctx); + +} DP_END_TEST; + +/* + * non-zone to zone + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden2l, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden2l, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, .flags = 0, + .count = 1, + .fw_in = 0, + .fw_out = 0, + .exp_fwd = DP_TEST_FWD_DROPPED, + .exp_back = DP_TEST_FWD_DROPPED, + }; + ctx.flags |= DPT_ZONE_PRIV; + + dp_test_npf_golden_2(&ctx); + +} DP_END_TEST; + +/* + * Zone to non-zone + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden2m, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden2m, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, .flags = 0, + .count = 1, + .fw_in = 0, + .fw_out = 0, + .exp_fwd = DP_TEST_FWD_DROPPED, + .exp_back = DP_TEST_FWD_DROPPED, + }; + ctx.flags |= DPT_ZONE_PUB; + + dp_test_npf_golden_2(&ctx); + +} DP_END_TEST; + +DP_DECL_TEST_CASE(npf_golden, npf_golden2n, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden2n, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, .flags = 0, + .count = 1, + .fw_in = 0, + .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_DROPPED, + }; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_PRIV; + + dp_test_npf_golden_2(&ctx); + +} DP_END_TEST; + +DP_DECL_TEST_CASE(npf_golden, npf_golden2o, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden2o, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 2, .flags = 0, + .count = 1, + .fw_in = 0, + .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_DROPPED, + }; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_PRIV; + ctx.flags |= DPT_IN_NAT64; + + dp_test_npf_golden_2(&ctx); + +} DP_END_TEST; + +DP_DECL_TEST_CASE(npf_golden, npf_golden2p, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden2p, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 2, .flags = 0, + .count = 1, + .fw_in = 0, + .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_FORWARDED, + }; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_PRIV; + ctx.flags |= DPT_IN_NAT64; + ctx.flags |= DPT_OUT_SNAT; + + dp_test_npf_golden_2(&ctx); + +} DP_END_TEST; + +DP_DECL_TEST_CASE(npf_golden, npf_golden2q, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden2q, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 1, .flags = 0, + .count = 1, + .fw_in = 0, + .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_FORWARDED, + }; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_PRIV; + ctx.flags |= DPT_ZONE_PRIV_S; + + dp_test_npf_golden_2(&ctx); + +} DP_END_TEST; + +DP_DECL_TEST_CASE(npf_golden, npf_golden2s, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden2s, test) +{ + /* + * Reverse pkt is blocked due to the PUB to PRIV zone rule. session + * inspect does not find a session, so there is no automatic pass. + */ + struct dp_test_golden_ctx ctx = { + .exp_session = 2, .flags = 0, + .count = 1, + .fw_in = 0, + .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_FORWARDED, + }; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_PRIV; + ctx.flags |= DPT_ZONE_PRIV_S; + ctx.flags |= DPT_IN_NAT64; + ctx.flags |= DPT_OUT_SNAT; dp_test_npf_golden_2(&ctx); } DP_END_TEST; + /* - * IPv4, local to net + * IPv4, local to net (dp1T1, zone PUBLIC) */ static void _dp_test_npf_golden_3(struct dp_test_golden_ctx *ctx, const char *file, const char *func); @@ -1308,6 +2231,7 @@ static void _dp_test_npf_golden_3(struct dp_test_golden_ctx *ctx, npf_golden_out_snat_local(TEST_FW_ADD, ctx); npf_golden_out_fw(TEST_FW_ADD, ctx); + npf_golden_zone(TEST_FW_ADD, ctx); /* * UDP packet. Local to net @@ -1315,7 +2239,7 @@ static void _dp_test_npf_golden_3(struct dp_test_golden_ctx *ctx, struct dp_test_pkt_desc_t pre_pkt = { .text = "Pre", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "2.2.2.2", .l2_src = "0:0:a4:0:0:65", .l3_dst = "2.2.2.11", @@ -1336,9 +2260,11 @@ static void _dp_test_npf_golden_3(struct dp_test_golden_ctx *ctx, else if ((ctx->flags & DPT_IN_FW_UNM) != 0) pre_pkt.l4.udp.dport = 48877; - if ((ctx->flags & DPT_OUT_FW_BLK) != 0) + if ((ctx->flags & DPT_OUT_FW_BLK) != 0 || + (ctx->flags & DPT_ZONE_PUB_BLK) != 0) pre_pkt.l4.udp.sport = 57004; - else if ((ctx->flags & DPT_OUT_FW_UNM) != 0) + else if ((ctx->flags & DPT_OUT_FW_UNM) != 0 || + (ctx->flags & DPT_ZONE_PUB_UNM) != 0) pre_pkt.l4.udp.sport = 57003; struct dp_test_pkt_desc_t post_pkt; @@ -1377,16 +2303,10 @@ static void _dp_test_npf_golden_3(struct dp_test_golden_ctx *ctx, if ((ctx->flags & DPT_OUT_FW) != 0) npf_golden_out_fw(TEST_FW_VERIFY, ctx); - uint sess_out_exp = 0; - - if (((ctx->flags & DPT_OUT_FW_S) != 0 && - (ctx->flags & (DPT_OUT_FW_BLK | DPT_OUT_FW_UNM)) == 0) || - (ctx->flags & DPT_OUT_SNAT_LOCAL) != 0) - sess_out_exp++; - - dp_test_npf_session_count_verify(sess_out_exp); + _dp_test_npf_session_count_verify(ctx->exp_session, false, + file, func, __LINE__); - if (sess_out_exp) { + if (ctx->exp_session) { dp_test_npf_session_verify_desc(NULL, pre_desc, pre_desc->tx_intf, SE_ACTIVE, @@ -1408,7 +2328,8 @@ static void _dp_test_npf_golden_3(struct dp_test_golden_ctx *ctx, /* Run the test */ dp_test_pak_receive(pre_pak, pre_desc->tx_intf, test_exp); - dp_test_npf_session_count_verify(sess_out_exp); + _dp_test_npf_session_count_verify(ctx->exp_session, false, + file, func, __LINE__); if (ctx->count > 1) { ctx->count--; @@ -1419,6 +2340,7 @@ static void _dp_test_npf_golden_3(struct dp_test_golden_ctx *ctx, npf_golden_out_snat_local(TEST_FW_REMOVE, ctx); npf_golden_out_fw(TEST_FW_REMOVE, ctx); + npf_golden_zone(TEST_FW_REMOVE, ctx); dp_test_npf_cleanup(); @@ -1429,6 +2351,7 @@ static void _dp_test_npf_golden_3(struct dp_test_golden_ctx *ctx, "aa:bb:cc:dd:1:11"); dp_test_netlink_del_neigh("dp1T1", "2.2.2.11", "aa:bb:cc:dd:2:11"); + } /* @@ -1438,13 +2361,12 @@ DP_DECL_TEST_CASE(npf_golden, npf_golden3, NULL, NULL); DP_START_TEST(npf_golden3, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, + .exp_session = 0, .flags = 0, .count = 1, .fw_in = 0, .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, .exp_back = DP_TEST_FWD_LOCAL, }; - dp_test_npf_golden_3(&ctx); } DP_END_TEST; @@ -1453,10 +2375,12 @@ DP_START_TEST(npf_golden3, test) * IPv4, local, Out FW */ DP_DECL_TEST_CASE(npf_golden, npf_golden3a, NULL, NULL); -DP_START_TEST(npf_golden3a, test) +DP_START_TEST_FULL_RUN(npf_golden3a, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, .fw_in = 0, .fw_out = 1, + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 1, .exp_fwd = DP_TEST_FWD_FORWARDED, .exp_back = DP_TEST_FWD_LOCAL, }; @@ -1470,10 +2394,12 @@ DP_START_TEST(npf_golden3a, test) * IPv4, local, Out sFW */ DP_DECL_TEST_CASE(npf_golden, npf_golden3b, NULL, NULL); -DP_START_TEST(npf_golden3b, test) +DP_START_TEST_FULL_RUN(npf_golden3b, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, .fw_in = 0, .fw_out = 1, + .exp_session = 1, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 1, .exp_fwd = DP_TEST_FWD_FORWARDED, .exp_back = DP_TEST_FWD_LOCAL, }; @@ -1488,10 +2414,12 @@ DP_START_TEST(npf_golden3b, test) * IPv4, local, Out SNAT */ DP_DECL_TEST_CASE(npf_golden, npf_golden3c, NULL, NULL); -DP_START_TEST(npf_golden3c, test) +DP_START_TEST_FULL_RUN(npf_golden3c, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, .fw_in = 0, .fw_out = 0, + .exp_session = 1, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, .exp_back = DP_TEST_FWD_LOCAL, }; @@ -1505,10 +2433,12 @@ DP_START_TEST(npf_golden3c, test) * IPv4, local, Out SNAT -> FW */ DP_DECL_TEST_CASE(npf_golden, npf_golden3d, NULL, NULL); -DP_START_TEST(npf_golden3d, test) +DP_START_TEST_FULL_RUN(npf_golden3d, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, .fw_in = 0, .fw_out = 1, + .exp_session = 1, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 1, .exp_fwd = DP_TEST_FWD_FORWARDED, .exp_back = DP_TEST_FWD_LOCAL, }; @@ -1523,10 +2453,12 @@ DP_START_TEST(npf_golden3d, test) * IPv4, local, Out SNAT -> FW */ DP_DECL_TEST_CASE(npf_golden, npf_golden3e, NULL, NULL); -DP_START_TEST(npf_golden3e, test) +DP_START_TEST_FULL_RUN(npf_golden3e, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, .fw_in = 0, .fw_out = 1, + .exp_session = 1, .flags = 0, + .count = 1, + .fw_in = 0, .fw_out = 1, .exp_fwd = DP_TEST_FWD_FORWARDED, .exp_back = DP_TEST_FWD_LOCAL, }; @@ -1538,16 +2470,100 @@ DP_START_TEST(npf_golden3e, test) } DP_END_TEST; +/* + * IPv4, local, Out Zone + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden3f, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden3f, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, + .flags = 0, + .count = 1, .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_LOCAL, + }; + ctx.flags |= DPT_ZONE_PUB; + + dp_test_npf_golden_3(&ctx); + +} DP_END_TEST; + +/* + * IPv4, local, Out Zone + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden3g, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden3g, test) +{ + /* no session is created for local to network traffic */ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_LOCAL, + }; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_PUB_S; + + dp_test_npf_golden_3(&ctx); + +} DP_END_TEST; + +/* + * IPv4, local, Out SNAT -> Zone + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden3h, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden3h, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 1, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_LOCAL, + }; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_OUT_SNAT_LOCAL; + + dp_test_npf_golden_3(&ctx); + +} DP_END_TEST; + +/* + * IPv4, local, Out SNAT -> Zone, stateful + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden3i, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden3i, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 1, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_LOCAL, + }; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_PUB_S; + ctx.flags |= DPT_OUT_SNAT_LOCAL; + + dp_test_npf_golden_3(&ctx); + +} DP_END_TEST; + + /* * 3j: IPv4, local, Out sFW, pkt matching block rule * * Packet will be sent, but no firewall rule stats will be incremented */ DP_DECL_TEST_CASE(npf_golden, npf_golden3j, NULL, NULL); -DP_START_TEST(npf_golden3j, test) +DP_START_TEST_FULL_RUN(npf_golden3j, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 0, .fw_in = 0, .fw_out = 0, + .exp_session = 0, + .flags = 0, .count = 0, + .fw_in = 0, .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, .exp_back = DP_TEST_FWD_LOCAL, }; @@ -1562,10 +2578,12 @@ DP_START_TEST(npf_golden3j, test) * 3k: IPv4, local, Out sFW, pkt matching block rule */ DP_DECL_TEST_CASE(npf_golden, npf_golden3k, NULL, NULL); -DP_START_TEST(npf_golden3k, test) +DP_START_TEST_FULL_RUN(npf_golden3k, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, .fw_in = 0, .fw_out = 0, + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, .exp_back = DP_TEST_FWD_DROPPED, }; @@ -1581,10 +2599,12 @@ DP_START_TEST(npf_golden3k, test) * 3l: IPv4, local, Out sFW, pkt matching no rule */ DP_DECL_TEST_CASE(npf_golden, npf_golden3l, NULL, NULL); -DP_START_TEST(npf_golden3l, test) +DP_START_TEST_FULL_RUN(npf_golden3l, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, .fw_in = 0, .fw_out = 1, + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 1, .exp_fwd = DP_TEST_FWD_FORWARDED, .exp_back = DP_TEST_FWD_LOCAL, }; @@ -1599,16 +2619,252 @@ DP_START_TEST(npf_golden3l, test) * 3m: IPv4, local, Out sFW, pkt matching no rule */ DP_DECL_TEST_CASE(npf_golden, npf_golden3m, NULL, NULL); -DP_START_TEST(npf_golden3m, test) +DP_START_TEST_FULL_RUN(npf_golden3m, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 1, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_DROPPED, + }; + ctx.flags |= DPT_OUT_FW; + ctx.flags |= DPT_OUT_FW_S; + ctx.flags |= DPT_OUT_FW_UNM; + + dp_test_npf_golden_3(&ctx); + +} DP_END_TEST; + +/* + * IPv4, local zone to Public Zone. There is no zone policy for PUB to local, + * so the reverse packet is dropped. + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden3n, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden3n, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_DROPPED, + }; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_LOCAL; + + dp_test_npf_golden_3(&ctx); + +} DP_END_TEST; + +/* + * IPv4, local zone to Public Zone, unmatched + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden3o, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden3o, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_DROPPED, + }; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_PUB_UNM; + ctx.flags |= DPT_ZONE_LOCAL; + + dp_test_npf_golden_3(&ctx); + +} DP_END_TEST; + +/* + * IPv4, local zone to Public zone, block + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden3p, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden3p, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_DROPPED, + }; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_PUB_BLK; + ctx.flags |= DPT_ZONE_LOCAL; + + dp_test_npf_golden_3(&ctx); + +} DP_END_TEST; + +/* + * IPv4, local zone to Public Zone (stateful) + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden3q, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden3q, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 1, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_LOCAL, + }; + ctx.flags |= DPT_ZONE_PUB_S; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_LOCAL; + + dp_test_npf_golden_3(&ctx); + +} DP_END_TEST; + +/* + * IPv4, local zone, Out SNAT -> Zone + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden3r, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden3r, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 1, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_LOCAL, + }; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_OUT_SNAT_LOCAL; + ctx.flags |= DPT_ZONE_LOCAL; + + dp_test_npf_golden_3(&ctx); + +} DP_END_TEST; + +/* + * IPv4, local zone, Out SNAT -> Zone + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden3s, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden3s, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 1, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_LOCAL, + }; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_PUB_S; + ctx.flags |= DPT_OUT_SNAT_LOCAL; + ctx.flags |= DPT_ZONE_LOCAL; + + dp_test_npf_golden_3(&ctx); + +} DP_END_TEST; + +/* + * IPv4, local zone to Public Zone + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden3t, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden3t, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_LOCAL, + }; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_LOCAL; + ctx.flags |= DPT_ZP_PUB_TO_LOCAL; + + dp_test_npf_golden_3(&ctx); + +} DP_END_TEST; + +/* + * IPv4, local zone to Public Zone (stateful) + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden3u, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden3u, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 1, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_LOCAL, + }; + ctx.flags |= DPT_ZONE_PUB_S; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_LOCAL; + ctx.flags |= DPT_ZP_PUB_TO_LOCAL; + + dp_test_npf_golden_3(&ctx); + +} DP_END_TEST; + +/* + * IPv4, local zone, Out SNAT -> Zone + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden3v, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden3v, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, .fw_in = 0, .fw_out = 1, + .exp_session = 1, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_LOCAL, + }; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_OUT_SNAT_LOCAL; + ctx.flags |= DPT_ZONE_LOCAL; + ctx.flags |= DPT_ZP_PUB_TO_LOCAL; + + dp_test_npf_golden_3(&ctx); + +} DP_END_TEST; + +/* + * IPv4, local zone, Out SNAT -> Zone + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden3w, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden3w, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 1, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_LOCAL, + }; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_PUB_S; + ctx.flags |= DPT_OUT_SNAT_LOCAL; + ctx.flags |= DPT_ZONE_LOCAL; + ctx.flags |= DPT_ZP_PUB_TO_LOCAL; + + dp_test_npf_golden_3(&ctx); + +} DP_END_TEST; + +/* + * IPv4, local zone to non-zone + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden3x, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden3x, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, - .exp_back = DP_TEST_FWD_DROPPED, + .exp_back = DP_TEST_FWD_LOCAL, }; - ctx.flags |= DPT_OUT_FW; - ctx.flags |= DPT_OUT_FW_S; - ctx.flags |= DPT_OUT_FW_UNM; + ctx.flags |= DPT_ZONE_LOCAL; dp_test_npf_golden_3(&ctx); @@ -1616,7 +2872,7 @@ DP_START_TEST(npf_golden3m, test) /* - * IPv4, net to local + * IPv4, net (dp1T0, zone PRIVATE) to local */ static void _dp_test_npf_golden_4(struct dp_test_golden_ctx *ctx, const char *file, const char *func); @@ -1637,6 +2893,7 @@ static void _dp_test_npf_golden_4(struct dp_test_golden_ctx *ctx, npf_golden_in_fw(TEST_FW_ADD, ctx); npf_golden_in_dnat_local(TEST_FW_ADD, ctx); + npf_golden_zone(TEST_FW_ADD, ctx); /* * UDP packet. Net to local @@ -1644,7 +2901,7 @@ static void _dp_test_npf_golden_4(struct dp_test_golden_ctx *ctx, struct dp_test_pkt_desc_t pre_pkt = { .text = "Pre", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.11", .l2_src = "aa:bb:cc:dd:1:11", .l3_dst = "1.1.1.1", @@ -1665,6 +2922,11 @@ static void _dp_test_npf_golden_4(struct dp_test_golden_ctx *ctx, else if ((ctx->flags & DPT_IN_FW_UNM) != 0) pre_pkt.l4.udp.dport = 48877; + if ((ctx->flags & DPT_ZP_PRIV_TO_LOCAL_BLK) != 0) + pre_pkt.l4.udp.sport = 57004; + else if ((ctx->flags & DPT_ZP_PRIV_TO_LOCAL_UNM) != 0) + pre_pkt.l4.udp.sport = 57003; + struct dp_test_pkt_desc_t post_pkt; struct dp_test_pkt_desc_t *pre_desc; struct dp_test_pkt_desc_t *post_desc; @@ -1701,16 +2963,18 @@ static void _dp_test_npf_golden_4(struct dp_test_golden_ctx *ctx, if ((ctx->flags & DPT_OUT_FW) != 0) npf_golden_out_fw(TEST_FW_VERIFY, ctx); - uint sess_in_exp = 0; + bool exp_zone_sess = false; - /* If in-fw and dnat are cfgd, then they use the same session */ - if ((ctx->flags & DPT_IN_FW_S) != 0 || - (ctx->flags & DPT_IN_DNAT_LOCAL) != 0) - sess_in_exp++; + if ((ctx->flags & DPT_ZONE_PRIV) != 0 && + (ctx->flags & DPT_ZONE_LOCAL) != 0 && + (ctx->flags & DPT_ZP_PRIV_TO_LOCAL) != 0 && + (ctx->flags & DPT_ZP_PRIV_TO_LOCAL_S) != 0) + exp_zone_sess = true; - dp_test_npf_session_count_verify(sess_in_exp); + _dp_test_npf_session_count_verify(ctx->exp_session, false, + file, func, __LINE__); - if ((ctx->flags & DPT_IN_FW_S) != 0) { + if ((ctx->flags & DPT_IN_FW_S) != 0 || exp_zone_sess) { if ((ctx->flags & DPT_IN_DNAT_LOCAL) != 0) dp_test_nat_session_verify_desc(false, 0x0D, pre_desc, post_desc); @@ -1729,7 +2993,8 @@ static void _dp_test_npf_golden_4(struct dp_test_golden_ctx *ctx, SE_ACTIVE, SE_FLAGS_AE, true); } else if ((ctx->flags & DPT_OUT_FW_S) != 0 || - (ctx->flags & DPT_OUT_SNAT) != 0) { + (ctx->flags & DPT_OUT_SNAT) != 0 || + (ctx->flags & DPT_ZONE_PRIV_S) != 0) { dp_test_npf_session_verify_desc(NULL, pre_desc, pre_desc->tx_intf, SE_ACTIVE, @@ -1742,7 +3007,7 @@ static void _dp_test_npf_golden_4(struct dp_test_golden_ctx *ctx, struct dp_test_pkt_desc_t pre_pkt2 = { .text = "Pre", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.1", .l2_src = "0:0:a4:0:0:65", .l3_dst = "1.1.1.11", @@ -1790,6 +3055,7 @@ static void _dp_test_npf_golden_4(struct dp_test_golden_ctx *ctx, npf_golden_in_fw(TEST_FW_REMOVE, ctx); npf_golden_in_dnat_local(TEST_FW_REMOVE, ctx); + npf_golden_zone(TEST_FW_REMOVE, ctx); dp_test_npf_cleanup(); @@ -1810,7 +3076,9 @@ DP_DECL_TEST_CASE(npf_golden, npf_golden4, NULL, NULL); DP_START_TEST(npf_golden4, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, .fw_in = 0, .fw_out = 0, + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, .exp_fwd = DP_TEST_FWD_LOCAL, .exp_back = DP_TEST_FWD_FORWARDED, }; @@ -1822,10 +3090,11 @@ DP_START_TEST(npf_golden4, test) * IPv4, Net to local, In FW */ DP_DECL_TEST_CASE(npf_golden, npf_golden4a, NULL, NULL); -DP_START_TEST(npf_golden4a, test) +DP_START_TEST_FULL_RUN(npf_golden4a, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, .fw_in = 1, .fw_out = 0, + .exp_session = 0, .flags = 0, .count = 1, + .fw_in = 1, .fw_out = 0, .exp_fwd = DP_TEST_FWD_LOCAL, .exp_back = DP_TEST_FWD_FORWARDED, }; @@ -1838,10 +3107,12 @@ DP_START_TEST(npf_golden4a, test) * IPv4, Net to local, In sFW */ DP_DECL_TEST_CASE(npf_golden, npf_golden4b, NULL, NULL); -DP_START_TEST(npf_golden4b, test) +DP_START_TEST_FULL_RUN(npf_golden4b, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, .fw_in = 1, .fw_out = 0, + .exp_session = 1, + .flags = 0, .count = 1, + .fw_in = 1, .fw_out = 0, .exp_fwd = DP_TEST_FWD_LOCAL, .exp_back = DP_TEST_FWD_FORWARDED, }; @@ -1855,10 +3126,12 @@ DP_START_TEST(npf_golden4b, test) * IPv4, Net to local, In DNAT */ DP_DECL_TEST_CASE(npf_golden, npf_golden4c, NULL, NULL); -DP_START_TEST(npf_golden4c, test) +DP_START_TEST_FULL_RUN(npf_golden4c, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, .fw_in = 0, .fw_out = 0, + .exp_session = 1, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, .exp_fwd = DP_TEST_FWD_LOCAL, .exp_back = DP_TEST_FWD_FORWARDED, }; @@ -1871,10 +3144,12 @@ DP_START_TEST(npf_golden4c, test) * IPv4, Net to local, In FW -> DNAT */ DP_DECL_TEST_CASE(npf_golden, npf_golden4d, NULL, NULL); -DP_START_TEST(npf_golden4d, test) +DP_START_TEST_FULL_RUN(npf_golden4d, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, .fw_in = 1, .fw_out = 0, + .exp_session = 1, + .flags = 0, .count = 1, + .fw_in = 1, .fw_out = 0, .exp_fwd = DP_TEST_FWD_LOCAL, .exp_back = DP_TEST_FWD_FORWARDED, }; @@ -1888,10 +3163,12 @@ DP_START_TEST(npf_golden4d, test) * IPv4, Net to local, In sFW -> DNAT */ DP_DECL_TEST_CASE(npf_golden, npf_golden4e, NULL, NULL); -DP_START_TEST(npf_golden4e, test) +DP_START_TEST_FULL_RUN(npf_golden4e, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, .fw_in = 1, .fw_out = 0, + .exp_session = 1, + .flags = 0, .count = 1, + .fw_in = 1, .fw_out = 0, .exp_fwd = DP_TEST_FWD_LOCAL, .exp_back = DP_TEST_FWD_FORWARDED, }; @@ -1902,6 +3179,144 @@ DP_START_TEST(npf_golden4e, test) dp_test_npf_golden_4(&ctx); } DP_END_TEST; +/* + * IPv4, Net to local, In zone to non-zone + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden4f, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden4f, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 1, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_LOCAL, + .exp_back = DP_TEST_FWD_FORWARDED, + }; + ctx.flags |= DPT_ZONE_PRIV; + + dp_test_npf_golden_4(&ctx); +} DP_END_TEST; + +/* + * IPv4, Net to local, non-zone to zone + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden4g, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden4g, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 1, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_LOCAL, + .exp_back = DP_TEST_FWD_FORWARDED, + }; + ctx.flags |= DPT_ZONE_LOCAL; + + dp_test_npf_golden_4(&ctx); +} DP_END_TEST; + +/* + * IPv4, Net to local, zone to zone, no ruleset + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden4h, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden4h, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 1, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_DROPPED, + .exp_back = DP_TEST_FWD_FORWARDED, + }; + ctx.flags |= DPT_ZONE_PRIV; + ctx.flags |= DPT_ZONE_LOCAL; + + dp_test_npf_golden_4(&ctx); +} DP_END_TEST; + +/* + * IPv4, Net to local, zone to zone, matching pass rule + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden4i, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden4i, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 1, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_LOCAL, + .exp_back = DP_TEST_FWD_FORWARDED, + }; + ctx.flags |= DPT_ZONE_PRIV; + ctx.flags |= DPT_ZONE_LOCAL; + ctx.flags |= DPT_ZP_PRIV_TO_LOCAL; + + dp_test_npf_golden_4(&ctx); +} DP_END_TEST; + +/* + * IPv4, Net to local, zone to zone, matching drop rule + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden4j, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden4j, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 1, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_DROPPED, + .exp_back = DP_TEST_FWD_FORWARDED, + }; + ctx.flags |= DPT_ZONE_PRIV; + ctx.flags |= DPT_ZONE_LOCAL; + ctx.flags |= DPT_ZP_PRIV_TO_LOCAL; + ctx.flags |= DPT_ZP_PRIV_TO_LOCAL_BLK; + + dp_test_npf_golden_4(&ctx); +} DP_END_TEST; + +/* + * IPv4, Net to local, zone to zone, no matching rule + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden4k, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden4k, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 1, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_DROPPED, + .exp_back = DP_TEST_FWD_FORWARDED, + }; + ctx.flags |= DPT_ZONE_PRIV; + ctx.flags |= DPT_ZONE_LOCAL; + ctx.flags |= DPT_ZP_PRIV_TO_LOCAL; + ctx.flags |= DPT_ZP_PRIV_TO_LOCAL_UNM; + + dp_test_npf_golden_4(&ctx); +} DP_END_TEST; + +/* + * IPv4, Net to local, zone to zone, matching stateful rule + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden4l, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden4l, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 1, + .flags = 0, .count = 1, + .fw_in = 1, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_LOCAL, + .exp_back = DP_TEST_FWD_FORWARDED, + }; + ctx.flags |= DPT_ZONE_PRIV; + ctx.flags |= DPT_ZONE_LOCAL; + ctx.flags |= DPT_ZP_PRIV_TO_LOCAL; + ctx.flags |= DPT_ZP_PRIV_TO_LOCAL_S; + + dp_test_npf_golden_4(&ctx); +} DP_END_TEST; + /* * IPv6, local to net @@ -1932,6 +3347,7 @@ static void _dp_test_npf_golden_5(struct dp_test_golden_ctx *ctx, "aa:bb:cc:dd:2:11"); npf_golden_out_fw(TEST_FW_ADD, ctx); + npf_golden_zone(TEST_FW_ADD, ctx); /* @@ -1940,7 +3356,7 @@ static void _dp_test_npf_golden_5(struct dp_test_golden_ctx *ctx, struct dp_test_pkt_desc_t pre_pkt = { .text = "Pre", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:101:2::202:202", .l2_src = "0:0:a4:0:0:65", .l3_dst = "2001:101:2::202:20b", @@ -1961,9 +3377,11 @@ static void _dp_test_npf_golden_5(struct dp_test_golden_ctx *ctx, else if ((ctx->flags & DPT_IN_FW_UNM) != 0) pre_pkt.l4.udp.dport = 48877; - if ((ctx->flags & DPT_OUT_FW_BLK) != 0) + if ((ctx->flags & DPT_OUT_FW_BLK) != 0 || + (ctx->flags & DPT_ZONE_PUB_BLK) != 0) pre_pkt.l4.udp.sport = 57004; - else if ((ctx->flags & DPT_OUT_FW_UNM) != 0) + else if ((ctx->flags & DPT_OUT_FW_UNM) != 0 || + (ctx->flags & DPT_ZONE_PUB_UNM) != 0) pre_pkt.l4.udp.sport = 57003; struct dp_test_pkt_desc_t post_pkt; @@ -2001,11 +3419,14 @@ static void _dp_test_npf_golden_5(struct dp_test_golden_ctx *ctx, if (((ctx->flags & DPT_OUT_FW_S) != 0 && (ctx->flags & (DPT_OUT_FW_BLK | DPT_OUT_FW_UNM)) == 0) || - (ctx->flags & DPT_OUT_SNAT_LOCAL) != 0) { + (ctx->flags & DPT_OUT_SNAT_LOCAL) != 0 || + ((ctx->flags & DPT_ZONE_PUB_S) != 0 && + (ctx->flags & DPT_ZONE_LOCAL) != 0)) { sess_out_exp++; } - dp_test_npf_session_count_verify(sess_out_exp); + _dp_test_npf_session_count_verify(ctx->exp_session, false, + file, func, __LINE__); if (sess_out_exp) dp_test_npf_session_verify_desc(NULL, pre_desc, @@ -2028,7 +3449,8 @@ static void _dp_test_npf_golden_5(struct dp_test_golden_ctx *ctx, /* Run the test */ dp_test_pak_receive(pre_pak, pre_desc->tx_intf, test_exp); - dp_test_npf_session_count_verify(sess_out_exp); + _dp_test_npf_session_count_verify(ctx->exp_session, false, + file, func, __LINE__); if (ctx->count > 1) { ctx->count--; @@ -2038,6 +3460,7 @@ static void _dp_test_npf_golden_5(struct dp_test_golden_ctx *ctx, /* Cleanup */ npf_golden_out_fw(TEST_FW_REMOVE, ctx); + npf_golden_zone(TEST_FW_REMOVE, ctx); dp_test_npf_cleanup(); @@ -2051,129 +3474,323 @@ static void _dp_test_npf_golden_5(struct dp_test_golden_ctx *ctx, dp_test_netlink_del_neigh("dp1T1", "2.2.2.11", "aa:bb:cc:dd:2:11"); - dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:101:1::101:101/96"); - dp_test_nl_del_ip_addr_and_connected("dp1T1", "2001:101:2::202:202/96"); + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:101:1::101:101/96"); + dp_test_nl_del_ip_addr_and_connected("dp1T1", "2001:101:2::202:202/96"); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp1T1", "2.2.2.2/24"); +} + +/* + * IPv6, local, no npf + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden5, NULL, NULL); +DP_START_TEST(npf_golden5, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_LOCAL, + }; + + dp_test_npf_golden_5(&ctx); +} DP_END_TEST; + +/* + * IPv6, local, Out FW + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden5a, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden5a, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 1, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_LOCAL, + }; + ctx.flags |= DPT_OUT_FW; + + dp_test_npf_golden_5(&ctx); +} DP_END_TEST; + +/* + * IPv6, local, Out sFW + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden5b, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden5b, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 1, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 1, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_LOCAL, + }; + ctx.flags |= DPT_OUT_FW; + ctx.flags |= DPT_OUT_FW_S; + + dp_test_npf_golden_5(&ctx); +} DP_END_TEST; + +/* + * IPv6, local, Out Zone + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden5f, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden5f, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_LOCAL, + }; + ctx.flags |= DPT_ZONE_PUB; + + dp_test_npf_golden_5(&ctx); +} DP_END_TEST; + +/* + * IPv6, local, Out Zone + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden5g, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden5g, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_LOCAL, + }; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_PUB_S; + + dp_test_npf_golden_5(&ctx); +} DP_END_TEST; + +/* + * 5j: IPv6, local, Out sFW, pkt matching block rule + * + * Packet will be sent, but no firewall rule stats will be incremented + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden5j, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden5j, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, + .flags = 0, .count = 0, + .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_LOCAL, + }; + ctx.flags |= DPT_OUT_FW; + ctx.flags |= DPT_OUT_FW_BLK; + + dp_test_npf_golden_5(&ctx); + +} DP_END_TEST; + +DP_DECL_TEST_CASE(npf_golden, npf_golden5k, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden5k, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, + .flags = 0, .count = 0, + .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_DROPPED, + }; + ctx.flags |= DPT_OUT_FW; + ctx.flags |= DPT_OUT_FW_S; + ctx.flags |= DPT_OUT_FW_BLK; + + dp_test_npf_golden_5(&ctx); + +} DP_END_TEST; + +/* + * 5l: IPv4, local, Out sFW, pkt matching no rule + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden5l, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden5l, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 1, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_LOCAL, + }; + ctx.flags |= DPT_OUT_FW; + ctx.flags |= DPT_OUT_FW_UNM; + + dp_test_npf_golden_5(&ctx); + +} DP_END_TEST; + +/* + * 5m: IPv6, local, Out sFW, pkt matching no rule + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden5m, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden5m, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 1, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_DROPPED, + }; + ctx.flags |= DPT_OUT_FW; + ctx.flags |= DPT_OUT_FW_S; + ctx.flags |= DPT_OUT_FW_UNM; + + dp_test_npf_golden_5(&ctx); - dp_test_nl_del_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); - dp_test_nl_del_ip_addr_and_connected("dp1T1", "2.2.2.2/24"); -} +} DP_END_TEST; /* - * IPv6, local, no npf + * IPv6, local zone to Public Zone */ -DP_DECL_TEST_CASE(npf_golden, npf_golden5, NULL, NULL); -DP_START_TEST(npf_golden5, test) +DP_DECL_TEST_CASE(npf_golden, npf_golden5n, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden5n, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, .fw_in = 0, .fw_out = 0, + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, - .exp_back = DP_TEST_FWD_LOCAL, + .exp_back = DP_TEST_FWD_DROPPED, }; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_LOCAL; dp_test_npf_golden_5(&ctx); + } DP_END_TEST; /* - * IPv6, local, Out FW + * IPv6, local zone to Public Zone, unmatched */ -DP_DECL_TEST_CASE(npf_golden, npf_golden5a, NULL, NULL); -DP_START_TEST(npf_golden5a, test) +DP_DECL_TEST_CASE(npf_golden, npf_golden5o, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden5o, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, .fw_in = 0, .fw_out = 1, + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, - .exp_back = DP_TEST_FWD_LOCAL, + .exp_back = DP_TEST_FWD_DROPPED, }; - ctx.flags |= DPT_OUT_FW; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_PUB_UNM; + ctx.flags |= DPT_ZONE_LOCAL; dp_test_npf_golden_5(&ctx); + } DP_END_TEST; /* - * IPv6, local, Out sFW + * IPv6, local zone to Public zone, block */ -DP_DECL_TEST_CASE(npf_golden, npf_golden5b, NULL, NULL); -DP_START_TEST(npf_golden5b, test) +DP_DECL_TEST_CASE(npf_golden, npf_golden5p, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden5p, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, .fw_in = 0, .fw_out = 1, + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, - .exp_back = DP_TEST_FWD_LOCAL, + .exp_back = DP_TEST_FWD_DROPPED, }; - ctx.flags |= DPT_OUT_FW; - ctx.flags |= DPT_OUT_FW_S; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_PUB_BLK; + ctx.flags |= DPT_ZONE_LOCAL; dp_test_npf_golden_5(&ctx); + } DP_END_TEST; /* - * 5j: IPv6, local, Out sFW, pkt matching block rule - * - * Packet will be sent, but no firewall rule stats will be incremented + * IPv6, local zone to Public Zone (stateful) */ -DP_DECL_TEST_CASE(npf_golden, npf_golden5j, NULL, NULL); -DP_START_TEST(npf_golden5j, test) +DP_DECL_TEST_CASE(npf_golden, npf_golden5q, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden5q, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 0, .fw_in = 0, .fw_out = 0, + .exp_session = 1, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, .exp_back = DP_TEST_FWD_LOCAL, }; - ctx.flags |= DPT_OUT_FW; - ctx.flags |= DPT_OUT_FW_BLK; + ctx.flags |= DPT_ZONE_PUB_S; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_LOCAL; dp_test_npf_golden_5(&ctx); } DP_END_TEST; -DP_DECL_TEST_CASE(npf_golden, npf_golden5k, NULL, NULL); -DP_START_TEST(npf_golden5k, test) +/* + * IPv6, local zone to Public Zone + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden5t, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden5t, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 0, .fw_in = 0, .fw_out = 0, + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, - .exp_back = DP_TEST_FWD_DROPPED, + .exp_back = DP_TEST_FWD_LOCAL, }; - ctx.flags |= DPT_OUT_FW; - ctx.flags |= DPT_OUT_FW_S; - ctx.flags |= DPT_OUT_FW_BLK; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_LOCAL; + ctx.flags |= DPT_ZP_PUB_TO_LOCAL; dp_test_npf_golden_5(&ctx); } DP_END_TEST; /* - * 5l: IPv4, local, Out sFW, pkt matching no rule + * IPv6, local zone to Public Zone (stateful) */ -DP_DECL_TEST_CASE(npf_golden, npf_golden5l, NULL, NULL); -DP_START_TEST(npf_golden5l, test) +DP_DECL_TEST_CASE(npf_golden, npf_golden5u, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden5u, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, .fw_in = 0, .fw_out = 1, + .exp_session = 1, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, .exp_back = DP_TEST_FWD_LOCAL, }; - ctx.flags |= DPT_OUT_FW; - ctx.flags |= DPT_OUT_FW_UNM; + ctx.flags |= DPT_ZONE_PUB_S; + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_LOCAL; + ctx.flags |= DPT_ZP_PUB_TO_LOCAL; dp_test_npf_golden_5(&ctx); } DP_END_TEST; /* - * 5m: IPv6, local, Out sFW, pkt matching no rule + * IPv6, local zone to non-zone */ -DP_DECL_TEST_CASE(npf_golden, npf_golden5m, NULL, NULL); -DP_START_TEST(npf_golden5m, test) +DP_DECL_TEST_CASE(npf_golden, npf_golden5x, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden5x, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, .fw_in = 0, .fw_out = 1, + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, - .exp_back = DP_TEST_FWD_DROPPED, + .exp_back = DP_TEST_FWD_LOCAL, }; - ctx.flags |= DPT_OUT_FW; - ctx.flags |= DPT_OUT_FW_S; - ctx.flags |= DPT_OUT_FW_UNM; + ctx.flags |= DPT_ZONE_LOCAL; dp_test_npf_golden_5(&ctx); @@ -2209,6 +3826,7 @@ static void _dp_test_npf_golden_6(struct dp_test_golden_ctx *ctx, "aa:bb:cc:dd:2:11"); npf_golden_in_fw(TEST_FW_ADD, ctx); + npf_golden_zone(TEST_FW_ADD, ctx); /* @@ -2217,7 +3835,7 @@ static void _dp_test_npf_golden_6(struct dp_test_golden_ctx *ctx, struct dp_test_pkt_desc_t pre_pkt = { .text = "Pre", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:101:2::202:20b", .l2_src = "aa:bb:cc:dd:2:11", .l3_dst = "2001:101:2::202:202", @@ -2275,14 +3893,8 @@ static void _dp_test_npf_golden_6(struct dp_test_golden_ctx *ctx, if ((ctx->flags & DPT_OUT_FW) != 0) npf_golden_out_fw(TEST_FW_VERIFY, ctx); - uint sess_in_exp = 0; - - /* If in-fw and dnat are cfgd, then they use the same session */ - if ((ctx->flags & DPT_IN_FW_S) != 0 || - (ctx->flags & DPT_IN_DNAT) != 0) - sess_in_exp++; - - dp_test_npf_session_count_verify(sess_in_exp); + _dp_test_npf_session_count_verify(ctx->exp_session, false, + file, func, __LINE__); if ((ctx->flags & DPT_IN_FW_S) != 0) { dp_test_npf_session_verify_desc(NULL, pre_desc, @@ -2299,7 +3911,8 @@ static void _dp_test_npf_golden_6(struct dp_test_golden_ctx *ctx, SE_ACTIVE, SE_FLAGS_AE, true); } else if ((ctx->flags & DPT_OUT_FW_S) != 0 || - (ctx->flags & DPT_OUT_SNAT) != 0) { + (ctx->flags & DPT_OUT_SNAT) != 0 || + (ctx->flags & DPT_ZONE_PRIV_S) != 0) { dp_test_npf_session_verify_desc(NULL, pre_desc, pre_desc->tx_intf, SE_ACTIVE, @@ -2312,7 +3925,7 @@ static void _dp_test_npf_golden_6(struct dp_test_golden_ctx *ctx, struct dp_test_pkt_desc_t pre_pkt2 = { .text = "Pre", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:101:2::202:202", .l2_src = "0:0:a4:0:0:65", .l3_dst = "2001:101:2::202:20b", @@ -2354,6 +3967,7 @@ static void _dp_test_npf_golden_6(struct dp_test_golden_ctx *ctx, /* Cleanup */ npf_golden_in_fw(TEST_FW_REMOVE, ctx); + npf_golden_zone(TEST_FW_REMOVE, ctx); dp_test_npf_cleanup(); @@ -2382,7 +3996,8 @@ DP_DECL_TEST_CASE(npf_golden, npf_golden6, NULL, NULL); DP_START_TEST(npf_golden6, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, .fw_in = 0, .fw_out = 0, + .exp_session = 0, .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, .exp_fwd = DP_TEST_FWD_LOCAL, .exp_back = DP_TEST_FWD_FORWARDED, }; @@ -2394,10 +4009,11 @@ DP_START_TEST(npf_golden6, test) * IPv6, Net to local, In FW */ DP_DECL_TEST_CASE(npf_golden, npf_golden6a, NULL, NULL); -DP_START_TEST(npf_golden6a, test) +DP_START_TEST_FULL_RUN(npf_golden6a, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, .fw_in = 1, .fw_out = 0, + .exp_session = 0, .flags = 0, .count = 1, + .fw_in = 1, .fw_out = 0, .exp_fwd = DP_TEST_FWD_LOCAL, .exp_back = DP_TEST_FWD_FORWARDED, }; @@ -2410,10 +4026,12 @@ DP_START_TEST(npf_golden6a, test) * IPv6, Net to local, In sFW */ DP_DECL_TEST_CASE(npf_golden, npf_golden6b, NULL, NULL); -DP_START_TEST(npf_golden6b, test) +DP_START_TEST_FULL_RUN(npf_golden6b, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, .fw_in = 1, .fw_out = 0, + .exp_session = 1, + .flags = 0, .count = 1, + .fw_in = 1, .fw_out = 0, .exp_fwd = DP_TEST_FWD_LOCAL, .exp_back = DP_TEST_FWD_FORWARDED, }; @@ -2444,6 +4062,7 @@ static void _dp_test_npf_golden_7(struct dp_test_golden_ctx *ctx, "aa:bb:cc:dd:2:11"); npf_golden_out_fw(TEST_FW_ADD, ctx); + npf_golden_zone(TEST_FW_ADD, ctx); /* @@ -2452,7 +4071,7 @@ static void _dp_test_npf_golden_7(struct dp_test_golden_ctx *ctx, struct dp_test_pkt_desc_t pre_pkt = { .text = "Pre", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.11", .l2_src = "aa:bb:cc:dd:1:11", .l3_dst = "2.2.2.11", @@ -2510,7 +4129,8 @@ static void _dp_test_npf_golden_7(struct dp_test_golden_ctx *ctx, sess_out_exp++; } - dp_test_npf_session_count_verify(sess_out_exp); + _dp_test_npf_session_count_verify(ctx->exp_session, false, + file, func, __LINE__); if (sess_out_exp) { dp_test_npf_session_verify_desc(NULL, pre_desc, @@ -2527,6 +4147,7 @@ static void _dp_test_npf_golden_7(struct dp_test_golden_ctx *ctx, /* Cleanup */ npf_golden_out_fw(TEST_FW_REMOVE, ctx); + npf_golden_zone(TEST_FW_REMOVE, ctx); dp_test_npf_cleanup(); @@ -2547,7 +4168,7 @@ DP_DECL_TEST_CASE(npf_golden, npf_golden7, NULL, NULL); DP_START_TEST(npf_golden7, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, + .exp_session = 0, .flags = 0, .count = 1, .fw_in = 0, .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, }; @@ -2561,10 +4182,10 @@ DP_START_TEST(npf_golden7, test) * IPv4, Local (kernel forwarded) to Net */ DP_DECL_TEST_CASE(npf_golden, npf_golden7a, NULL, NULL); -DP_START_TEST(npf_golden7a, test) +DP_START_TEST_FULL_RUN(npf_golden7a, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, + .exp_session = 0, .flags = 0, .count = 1, .fw_in = 0, .fw_out = 1, .exp_fwd = DP_TEST_FWD_FORWARDED, }; @@ -2579,9 +4200,10 @@ DP_START_TEST(npf_golden7a, test) * IPv4, Local (kernel forwarded) to Net */ DP_DECL_TEST_CASE(npf_golden, npf_golden7b, NULL, NULL); -DP_START_TEST(npf_golden7b, test) +DP_START_TEST_FULL_RUN(npf_golden7b, test) { struct dp_test_golden_ctx ctx = { + .exp_session = 0, .flags = 0, .count = 1, .fw_in = 0, .fw_out = 1, .exp_fwd = DP_TEST_FWD_DROPPED, @@ -2598,10 +4220,10 @@ DP_START_TEST(npf_golden7b, test) * IPv4, Local (kernel forwarded) to Net */ DP_DECL_TEST_CASE(npf_golden, npf_golden7c, NULL, NULL); -DP_START_TEST(npf_golden7c, test) +DP_START_TEST_FULL_RUN(npf_golden7c, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, + .exp_session = 0, .flags = 0, .count = 1, .fw_in = 0, .fw_out = 1, .exp_fwd = DP_TEST_FWD_DROPPED, }; @@ -2613,6 +4235,44 @@ DP_START_TEST(npf_golden7c, test) } DP_END_TEST; +/* + * IPv4, Local (kernel forwarded) to Net + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden7d, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden7d, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_DROPPED, + }; + + ctx.flags |= DPT_ZONE_PUB; + + dp_test_npf_golden_7(&ctx); + +} DP_END_TEST; + +/* + * IPv4, Local (kernel forwarded) to Net. This qualifies as non-zone to zone, + * so id dropped. + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden7e, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden7e, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_DROPPED, + }; + + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_LOCAL; + + dp_test_npf_golden_7(&ctx); + +} DP_END_TEST; + /* * IPv6, Local (kernel forwarded) to Net @@ -2643,6 +4303,7 @@ static void _dp_test_npf_golden_8(struct dp_test_golden_ctx *ctx, "aa:bb:cc:dd:2:11"); npf_golden_out_fw(TEST_FW_ADD, ctx); + npf_golden_zone(TEST_FW_ADD, ctx); /* @@ -2651,7 +4312,7 @@ static void _dp_test_npf_golden_8(struct dp_test_golden_ctx *ctx, struct dp_test_pkt_desc_t pre_pkt = { .text = "Pre", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:101:1::101:10b", .l2_src = "aa:bb:cc:dd:1:11", .l3_dst = "2001:101:2::202:20b", @@ -2709,7 +4370,8 @@ static void _dp_test_npf_golden_8(struct dp_test_golden_ctx *ctx, sess_out_exp++; } - dp_test_npf_session_count_verify(sess_out_exp); + _dp_test_npf_session_count_verify(ctx->exp_session, false, + file, func, __LINE__); if (sess_out_exp) { dp_test_npf_session_verify_desc(NULL, pre_desc, @@ -2726,6 +4388,7 @@ static void _dp_test_npf_golden_8(struct dp_test_golden_ctx *ctx, /* Cleanup */ npf_golden_out_fw(TEST_FW_REMOVE, ctx); + npf_golden_zone(TEST_FW_REMOVE, ctx); dp_test_npf_cleanup(); @@ -2753,7 +4416,7 @@ DP_DECL_TEST_CASE(npf_golden, npf_golden8, NULL, NULL); DP_START_TEST(npf_golden8, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, + .exp_session = 0, .flags = 0, .count = 1, .fw_in = 0, .fw_out = 0, .exp_fwd = DP_TEST_FWD_FORWARDED, }; @@ -2767,10 +4430,10 @@ DP_START_TEST(npf_golden8, test) * IPv6, Local (kernel forwarded) to Net */ DP_DECL_TEST_CASE(npf_golden, npf_golden8a, NULL, NULL); -DP_START_TEST(npf_golden8a, test) +DP_START_TEST_FULL_RUN(npf_golden8a, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, + .exp_session = 0, .flags = 0, .count = 1, .fw_in = 0, .fw_out = 1, .exp_fwd = DP_TEST_FWD_FORWARDED, }; @@ -2785,10 +4448,10 @@ DP_START_TEST(npf_golden8a, test) * IPv6, Local (kernel forwarded) to Net */ DP_DECL_TEST_CASE(npf_golden, npf_golden8b, NULL, NULL); -DP_START_TEST(npf_golden8b, test) +DP_START_TEST_FULL_RUN(npf_golden8b, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, + .exp_session = 0, .flags = 0, .count = 1, .fw_in = 0, .fw_out = 1, .exp_fwd = DP_TEST_FWD_DROPPED, }; @@ -2804,10 +4467,10 @@ DP_START_TEST(npf_golden8b, test) * IPv6, Local (kernel forwarded) to Net */ DP_DECL_TEST_CASE(npf_golden, npf_golden8c, NULL, NULL); -DP_START_TEST(npf_golden8c, test) +DP_START_TEST_FULL_RUN(npf_golden8c, test) { struct dp_test_golden_ctx ctx = { - .flags = 0, .count = 1, + .exp_session = 0, .flags = 0, .count = 1, .fw_in = 0, .fw_out = 1, .exp_fwd = DP_TEST_FWD_DROPPED, }; @@ -2818,3 +4481,186 @@ DP_START_TEST(npf_golden8c, test) dp_test_npf_golden_8(&ctx); } DP_END_TEST; + +/* + * IPv6, Local (kernel forwarded) to Net + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden8d, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden8d, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_DROPPED, + }; + + ctx.flags |= DPT_ZONE_PUB; + + dp_test_npf_golden_8(&ctx); + +} DP_END_TEST; + +/* + * IPv6, Local (kernel forwarded) to Net. Non-zone to zone. + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden8e, NULL, NULL); +DP_START_TEST_FULL_RUN(npf_golden8e, test) +{ + struct dp_test_golden_ctx ctx = { + .exp_session = 0, .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_DROPPED, + }; + + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZONE_LOCAL; + + dp_test_npf_golden_8(&ctx); + +} DP_END_TEST; + +/* + * This tests that an SNATd packet from the router creates a NAT pinhole for + * return traffic that would otherwise be blocked by the local zone firewall. + */ +DP_DECL_TEST_CASE(npf_golden, npf_golden9, NULL, NULL); +DP_START_TEST(npf_golden9, test) +{ + /* Setup interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp1T1", "2.2.2.2/24"); + + dp_test_netlink_add_neigh("dp1T0", "1.1.1.11", + "aa:bb:cc:dd:1:11"); + dp_test_netlink_add_neigh("dp1T1", "2.2.2.11", + "aa:bb:cc:dd:2:11"); + + struct dp_test_golden_ctx ctx = { + .exp_session = 0, + .flags = 0, .count = 1, + .fw_in = 0, .fw_out = 0, + .exp_fwd = DP_TEST_FWD_FORWARDED, + .exp_back = DP_TEST_FWD_LOCAL, + }; + + /* + * Change source addr from 2.2.2.2 to 2.2.2.3 for traffic out dp1T1 + */ + ctx.flags |= DPT_OUT_SNAT_LOCAL; + npf_golden_out_snat_local(TEST_FW_ADD, &ctx); + + /* + * Zone fw. + * Local to PUBLIC - PASS for src-port 57005, BLOCK for 57004. + * PUBLIC to local - PASS for src-port 48879, BLOCK for 48878. + */ + ctx.flags |= DPT_ZONE_PUB; + ctx.flags |= DPT_ZP_PUB_TO_LOCAL; + + npf_golden_zone_public(TEST_FW_ADD, &ctx); + npf_golden_zone_local(TEST_FW_ADD, &ctx); + npf_golden_zone_policy_pub_to_local(TEST_FW_ADD, &ctx); + npf_golden_zone_policy_local_to_pub(TEST_FW_ADD, &ctx); + + + /* + * UDP packet. Local to net + */ + struct dp_test_pkt_desc_t pre_pkt = { + .text = "Pre", + .len = 20, + .ether_type = RTE_ETHER_TYPE_IPV4, + .l3_src = "2.2.2.2", + .l2_src = "0:0:a4:0:0:65", + .l3_dst = "2.2.2.11", + .l2_dst = "aa:bb:cc:dd:2:11", + .proto = IPPROTO_UDP, + .l4 = { + .udp = { + .sport = 0xDEAD, /* 57005 */ + .dport = 0xBEEF, /* 48879 */ + } + }, + .rx_intf = "dp1T1", + .tx_intf = "dp1T1" + }; + + pre_pkt.l4.udp.dport = 48878; + + struct dp_test_pkt_desc_t post_pkt = pre_pkt; + struct dp_test_pkt_desc_t *pre_desc = &pre_pkt; + struct dp_test_pkt_desc_t *post_desc = &post_pkt; + struct dp_test_expected *test_exp; + struct rte_mbuf *pre_pak, *post_pak; + + if ((ctx.flags & DPT_OUT_SNAT_LOCAL) != 0) + /* SNAT 2.2.2.2 to 2.2.2.3*/ + post_desc->l3_src = "2.2.2.3"; + +repeat: + pre_pak = dp_test_from_spath_v4_pkt_from_desc(pre_desc); + post_pak = dp_test_from_spath_v4_pkt_from_desc(post_desc); + + test_exp = dp_test_exp_create(post_pak); + rte_pktmbuf_free(post_pak); + + dp_test_exp_set_fwd_status(test_exp, ctx.exp_fwd); + dp_test_exp_set_oif_name(test_exp, pre_desc->tx_intf); + + spush(test_exp->description, sizeof(test_exp->description), + "\nTest: \"%s\", Forwards", __func__); + + /* Run the test */ + dp_test_send_slowpath_pkt(pre_pak, test_exp); + + uint sess_out_exp = 1; + + dp_test_npf_session_count_verify(sess_out_exp); + + if (sess_out_exp) { + dp_test_npf_session_verify_desc(NULL, pre_desc, + pre_desc->tx_intf, + SE_ACTIVE, + SE_FLAGS_AE, true); + } + + pre_pak = dp_test_reverse_v4_pkt_from_desc(post_desc); + post_pak = dp_test_reverse_v4_pkt_from_desc(pre_desc); + + test_exp = dp_test_exp_create(post_pak); + rte_pktmbuf_free(post_pak); + + dp_test_exp_set_fwd_status(test_exp, ctx.exp_back); + dp_test_exp_set_oif_name(test_exp, pre_desc->tx_intf); + + spush(test_exp->description, sizeof(test_exp->description), + "\nTest: \"%s\", Reverse", __func__); + + /* Run the test */ + dp_test_pak_receive(pre_pak, pre_desc->tx_intf, test_exp); + + dp_test_npf_session_count_verify(sess_out_exp); + + if (ctx.count > 1) { + ctx.count--; + goto repeat; + } + + /* Cleanup */ + + npf_golden_zone_policy_pub_to_local(TEST_FW_REMOVE, &ctx); + npf_golden_zone_policy_local_to_pub(TEST_FW_REMOVE, &ctx); + npf_golden_zone_public(TEST_FW_REMOVE, &ctx); + npf_golden_zone_local(TEST_FW_REMOVE, &ctx); + npf_golden_out_snat_local(TEST_FW_REMOVE, &ctx); + + dp_test_npf_cleanup(); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp1T1", "2.2.2.2/24"); + + dp_test_netlink_del_neigh("dp1T0", "1.1.1.11", + "aa:bb:cc:dd:1:11"); + dp_test_netlink_del_neigh("dp1T1", "2.2.2.11", + "aa:bb:cc:dd:2:11"); +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_npf_hairpin.c b/tests/whole_dp/src/dp_test_npf_hairpin.c index 4dc93346..6f46561e 100644 --- a/tests/whole_dp/src/dp_test_npf_hairpin.c +++ b/tests/whole_dp/src/dp_test_npf_hairpin.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -52,14 +52,14 @@ #include "dp_test.h" #include "dp_test_controller.h" #include "dp_test_cmd_state.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" #include "dp_test_str.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_pkt.h" #include "dp_test_lib_tcp.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_sess_lib.h" @@ -177,7 +177,7 @@ DP_START_TEST(npf_hairpin_udp1, test) struct dp_test_pkt_desc_t forw_in = { .text = "UDP Forwards In", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "4.4.4.4", .l2_src = "aa:bb:cc:dd:1:11", .l3_dst = "3.3.3.3", @@ -199,7 +199,7 @@ DP_START_TEST(npf_hairpin_udp1, test) struct dp_test_pkt_desc_t rev_in = { .text = "UDP Forwards In", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "3.3.3.3", .l2_src = "aa:bb:cc:dd:1:12", .l3_dst = "4.4.4.4", @@ -342,7 +342,7 @@ DP_START_TEST(npf_hairpin_udp2, test) struct dp_test_pkt_desc_t forw_in = { .text = "UDP Forwards In", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "4.4.4.4", .l2_src = "aa:bb:cc:dd:1:11", .l3_dst = "3.3.3.3", @@ -364,7 +364,7 @@ DP_START_TEST(npf_hairpin_udp2, test) struct dp_test_pkt_desc_t rev_in = { .text = "UDP Forwards In", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "3.3.3.3", .l2_src = "aa:bb:cc:dd:1:12", .l3_dst = "4.4.4.4", @@ -467,36 +467,9 @@ DP_START_TEST(npf_hairpin_udp2, test) } DP_END_TEST; -/* - * Callback function for TCP call simulator - */ -static void tcp_test_cb1(const char *str, - uint pktno, enum dp_test_tcp_dir dir, - uint8_t flags, - struct dp_test_pkt_desc_t *pre, - struct dp_test_pkt_desc_t *post, - void *data, uint index) -{ - struct rte_mbuf *pre_pak, *post_pak; - struct dp_test_expected *test_exp; - - pre_pak = dp_test_v4_pkt_from_desc(pre); - post_pak = dp_test_v4_pkt_from_desc(post); - - test_exp = dp_test_exp_from_desc(post_pak, post); - rte_pktmbuf_free(post_pak); - dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_FORWARDED); - - spush(test_exp->description, sizeof(test_exp->description), - "%s", str); - - /* Run the test */ - dp_test_pak_receive(pre_pak, pre->rx_intf, test_exp); -} - /* * Routes a TCP call out the same interface it came in on. Stateless firewall - * only condigured. + * only configured. * * | * 4.4.4.4 ----- 1.1.1.11 -----+ @@ -533,134 +506,70 @@ DP_START_TEST(npf_hairpin_tcp1, test) /* * TCP packet */ - struct dp_test_pkt_desc_t fwd_in = { - .text = "TCP Forwards In", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "4.4.4.4", - .l2_src = "aa:bb:cc:dd:1:11", - .l3_dst = "3.3.3.3", - .l2_dst = "00:00:a4:00:00:64", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 1000, - .dport = 80, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T0" - }; - - struct dp_test_pkt_desc_t fwd_out = { - .text = "TCP Forwards Out", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "4.4.4.4", - .l2_src = "00:00:a4:00:00:64", - .l3_dst = "3.3.3.3", - .l2_dst = "aa:bb:cc:dd:1:12", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 1000, - .dport = 80, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T0" - }; - - struct dp_test_pkt_desc_t rev_in = { - .text = "TCP Reverse In", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "3.3.3.3", - .l2_src = "aa:bb:cc:dd:1:12", - .l3_dst = "4.4.4.4", - .l2_dst = "00:00:a4:00:00:64", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 80, - .dport = 1000, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } + struct dp_test_pkt_desc_t *fwd_in, *fwd_out; + struct dp_test_pkt_desc_t *rev_in, *rev_out; + + fwd_in = dpt_pdesc_v4_create( + "TCP Forwards In", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "4.4.4.4", 1000, + "00:00:a4:00:00:64", "3.3.3.3", 80, + "dp1T0", "dp1T0"); + + fwd_out = dpt_pdesc_v4_create( + "TCP Forwards Out", IPPROTO_TCP, + "00:00:a4:00:00:64", "4.4.4.4", 1000, + "aa:bb:cc:dd:1:12", "3.3.3.3", 80, + "dp1T0", "dp1T0"); + + rev_in = dpt_pdesc_v4_create( + "TCP Reverse In", IPPROTO_TCP, + "aa:bb:cc:dd:1:12", "3.3.3.3", 80, + "00:00:a4:00:00:64", "4.4.4.4", 1000, + "dp1T0", "dp1T0"); + + rev_out = dpt_pdesc_v4_create( + "TCP Reverse Out", IPPROTO_TCP, + "00:00:a4:00:00:64", "3.3.3.3", 80, + "aa:bb:cc:dd:1:11", "4.4.4.4", 1000, + "dp1T0", "dp1T0"); + + struct dpt_tcp_flow tcp_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = fwd_in, + .pst = fwd_out, }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T0" - }; - - struct dp_test_pkt_desc_t rev_out = { - .text = "TCP Reverse Out", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "3.3.3.3", - .l2_src = "00:00:a4:00:00:64", - .l3_dst = "4.4.4.4", - .l2_dst = "aa:bb:cc:dd:1:11", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 80, - .dport = 1000, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = rev_in, + .pst = rev_out, }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T0" - }; - - struct dp_test_tcp_call tcp_call = { - .str[0] = '\0', - .isn = {0, 0}, - .desc[DP_DIR_FORW] = { - .pre = &fwd_in, - .post = &fwd_out, - }, - .desc[DP_DIR_BACK] = { - .pre = &rev_in, - .post = &rev_out, - }, - .test_cb = tcp_test_cb1, - .post_cb = NULL, + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ }; - struct dp_test_tcp_flow_pkt tcp_pkt1[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_ACK, 20, NULL}, - {DP_DIR_FORW, TH_ACK, 50, NULL}, - {DP_DIR_FORW, TH_ACK | TH_FIN, 10, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_BACK, TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, + struct dpt_tcp_flow_pkt tcp_pkt1[] = { + { DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK, 20, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 50, NULL, 0, NULL }, + { DPT_FORW, TH_ACK | TH_FIN, 10, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, }; /* Simulate the TCP call */ - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), NULL, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, NULL, 0); + + free(fwd_in); + free(fwd_out); + free(rev_in); + free(rev_out); /* Cleanup */ dp_test_fw_cfg(false, DPT_FW_IN | DPT_FW_OUT); @@ -681,34 +590,6 @@ DP_START_TEST(npf_hairpin_tcp1, test) } DP_END_TEST; -/* - * Callback function for TCP call simulator for stateful fw - */ -static void tcp_test_cb2(const char *str, - uint pktno, enum dp_test_tcp_dir dir, - uint8_t flags, - struct dp_test_pkt_desc_t *pre, - struct dp_test_pkt_desc_t *post, - void *data, uint index) -{ - struct rte_mbuf *pre_pak, *post_pak; - struct dp_test_expected *test_exp; - - pre_pak = dp_test_v4_pkt_from_desc(pre); - post_pak = dp_test_v4_pkt_from_desc(post); - - test_exp = dp_test_exp_from_desc(post_pak, post); - rte_pktmbuf_free(post_pak); - - dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_FORWARDED); - - spush(test_exp->description, sizeof(test_exp->description), - "%s", str); - - /* Run the test */ - dp_test_pak_receive(pre_pak, pre->rx_intf, test_exp); -} - /* * Routes a TCP call out the same interface it came in on. * @@ -749,135 +630,71 @@ DP_START_TEST(npf_hairpin_tcp2, test) /* * TCP packet */ - struct dp_test_pkt_desc_t fwd_in = { - .text = "TCP Forwards In", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "4.4.4.4", - .l2_src = "aa:bb:cc:dd:1:11", - .l3_dst = "3.3.3.3", - .l2_dst = "00:00:a4:00:00:64", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 1000, - .dport = 80, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } + struct dp_test_pkt_desc_t *fwd_in, *fwd_out; + struct dp_test_pkt_desc_t *rev_in, *rev_out; + + fwd_in = dpt_pdesc_v4_create( + "TCP Forwards In", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "4.4.4.4", 1000, + "00:00:a4:00:00:64", "3.3.3.3", 80, + "dp1T0", "dp1T0"); + + fwd_out = dpt_pdesc_v4_create( + "TCP Forwards Out", IPPROTO_TCP, + "00:00:a4:00:00:64", "4.4.4.4", 1000, + "aa:bb:cc:dd:1:12", "3.3.3.3", 80, + "dp1T0", "dp1T0"); + + rev_in = dpt_pdesc_v4_create( + "TCP Reverse In", IPPROTO_TCP, + "aa:bb:cc:dd:1:12", "3.3.3.3", 80, + "00:00:a4:00:00:64", "4.4.4.4", 1000, + "dp1T0", "dp1T0"); + + rev_out = dpt_pdesc_v4_create( + "TCP Reverse Out", IPPROTO_TCP, + "00:00:a4:00:00:64", "3.3.3.3", 80, + "aa:bb:cc:dd:1:11", "4.4.4.4", 1000, + "dp1T0", "dp1T0"); + + + struct dpt_tcp_flow tcp_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = fwd_in, + .pst = fwd_out, }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T0" - }; - - struct dp_test_pkt_desc_t fwd_out = { - .text = "TCP Forwards Out", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "4.4.4.4", - .l2_src = "00:00:a4:00:00:64", - .l3_dst = "3.3.3.3", - .l2_dst = "aa:bb:cc:dd:1:12", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 1000, - .dport = 80, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = rev_in, + .pst = rev_out, }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T0" + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ }; - struct dp_test_pkt_desc_t rev_in = { - .text = "TCP Reverse In", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "3.3.3.3", - .l2_src = "aa:bb:cc:dd:1:12", - .l3_dst = "4.4.4.4", - .l2_dst = "00:00:a4:00:00:64", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 80, - .dport = 1000, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T0" - }; - - struct dp_test_pkt_desc_t rev_out = { - .text = "TCP Reverse Out", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "3.3.3.3", - .l2_src = "00:00:a4:00:00:64", - .l3_dst = "4.4.4.4", - .l2_dst = "aa:bb:cc:dd:1:11", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 80, - .dport = 1000, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T0" - }; - - struct dp_test_tcp_call tcp_call = { - .str[0] = '\0', - .isn = {0, 0}, - .desc[DP_DIR_FORW] = { - .pre = &fwd_in, - .post = &fwd_out, - }, - .desc[DP_DIR_BACK] = { - .pre = &rev_in, - .post = &rev_out, - }, - .test_cb = tcp_test_cb2, - .post_cb = NULL, - }; - - /* Comment is new npf_tcp_fsm state */ - struct dp_test_tcp_flow_pkt tcp_pkt1[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_ACK, 20, NULL}, - {DP_DIR_FORW, TH_ACK, 50, NULL}, - {DP_DIR_FORW, TH_ACK | TH_FIN, 10, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_BACK, TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, + struct dpt_tcp_flow_pkt tcp_pkt1[] = { + { DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK, 20, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 50, NULL, 0, NULL }, + { DPT_FORW, TH_ACK | TH_FIN, 10, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, }; /* Simulate the TCP call */ - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), NULL, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, NULL, 0); + + free(fwd_in); + free(fwd_out); + free(rev_in); + free(rev_out); /* Cleanup */ dp_test_fw_cfg(false, DPT_FW_IN | DPT_FW_IN_STATEFUL | DPT_FW_OUT); @@ -898,34 +715,6 @@ DP_START_TEST(npf_hairpin_tcp2, test) } DP_END_TEST; -/* - * Callback function for TCP call simulator for stateful fw with tcp-strict - */ -static void tcp_test_cb3(const char *str, - uint pktno, enum dp_test_tcp_dir dir, - uint8_t flags, - struct dp_test_pkt_desc_t *pre, - struct dp_test_pkt_desc_t *post, - void *data, uint index) -{ - struct rte_mbuf *pre_pak, *post_pak; - struct dp_test_expected *test_exp; - - pre_pak = dp_test_v4_pkt_from_desc(pre); - post_pak = dp_test_v4_pkt_from_desc(post); - - test_exp = dp_test_exp_from_desc(post_pak, post); - rte_pktmbuf_free(post_pak); - - dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_FORWARDED); - - spush(test_exp->description, sizeof(test_exp->description), - "%s", str); - - /* Run the test */ - dp_test_pak_receive(pre_pak, pre->rx_intf, test_exp); -} - /* * Routes a TCP call out the same interface it came in on. * @@ -969,135 +758,70 @@ DP_START_TEST(npf_hairpin_tcp3, test) /* * TCP packet */ - struct dp_test_pkt_desc_t fwd_in = { - .text = "TCP Forwards In", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "4.4.4.4", - .l2_src = "aa:bb:cc:dd:1:11", - .l3_dst = "3.3.3.3", - .l2_dst = "00:00:a4:00:00:64", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 1000, - .dport = 80, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } + struct dp_test_pkt_desc_t *fwd_in, *fwd_out; + struct dp_test_pkt_desc_t *rev_in, *rev_out; + + fwd_in = dpt_pdesc_v4_create( + "TCP Forwards In", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "4.4.4.4", 1000, + "00:00:a4:00:00:64", "3.3.3.3", 80, + "dp1T0", "dp1T0"); + + fwd_out = dpt_pdesc_v4_create( + "TCP Forwards Out", IPPROTO_TCP, + "00:00:a4:00:00:64", "4.4.4.4", 1000, + "aa:bb:cc:dd:1:12", "3.3.3.3", 80, + "dp1T0", "dp1T0"); + + rev_in = dpt_pdesc_v4_create( + "TCP Reverse In", IPPROTO_TCP, + "aa:bb:cc:dd:1:12", "3.3.3.3", 80, + "00:00:a4:00:00:64", "4.4.4.4", 1000, + "dp1T0", "dp1T0"); + + rev_out = dpt_pdesc_v4_create( + "TCP Reverse Out", IPPROTO_TCP, + "00:00:a4:00:00:64", "3.3.3.3", 80, + "aa:bb:cc:dd:1:11", "4.4.4.4", 1000, + "dp1T0", "dp1T0"); + + struct dpt_tcp_flow tcp_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = fwd_in, + .pst = fwd_out, }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T0" - }; - - struct dp_test_pkt_desc_t fwd_out = { - .text = "TCP Forwards Out", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "4.4.4.4", - .l2_src = "00:00:a4:00:00:64", - .l3_dst = "3.3.3.3", - .l2_dst = "aa:bb:cc:dd:1:12", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 1000, - .dport = 80, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = rev_in, + .pst = rev_out, }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T0" + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ }; - struct dp_test_pkt_desc_t rev_in = { - .text = "TCP Reverse In", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "3.3.3.3", - .l2_src = "aa:bb:cc:dd:1:12", - .l3_dst = "4.4.4.4", - .l2_dst = "00:00:a4:00:00:64", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 80, - .dport = 1000, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T0" - }; - - struct dp_test_pkt_desc_t rev_out = { - .text = "TCP Reverse Out", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "3.3.3.3", - .l2_src = "00:00:a4:00:00:64", - .l3_dst = "4.4.4.4", - .l2_dst = "aa:bb:cc:dd:1:11", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 80, - .dport = 1000, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T0" - }; - - struct dp_test_tcp_call tcp_call = { - .str[0] = '\0', - .isn = {0, 0}, - .desc[DP_DIR_FORW] = { - .pre = &fwd_in, - .post = &fwd_out, - }, - .desc[DP_DIR_BACK] = { - .pre = &rev_in, - .post = &rev_out, - }, - .test_cb = tcp_test_cb3, - .post_cb = NULL, - }; - - /* Comment is new npf_tcp_fsm state */ - struct dp_test_tcp_flow_pkt tcp_pkt1[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_ACK, 20, NULL}, - {DP_DIR_FORW, TH_ACK, 50, NULL}, - {DP_DIR_FORW, TH_ACK | TH_FIN, 10, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_BACK, TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, + struct dpt_tcp_flow_pkt tcp_pkt1[] = { + { DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK, 20, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 50, NULL, 0, NULL }, + { DPT_FORW, TH_ACK | TH_FIN, 10, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, }; /* Simulate the TCP call */ - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), NULL, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, NULL, 0); + + free(fwd_in); + free(fwd_out); + free(rev_in); + free(rev_out); /* Cleanup */ dp_test_npf_cmd("npf-ut fw global tcp-strict disable", false); @@ -1164,125 +888,62 @@ DP_START_TEST(npf_hairpin_tcp4, test) /* * TCP packet */ - struct dp_test_pkt_desc_t fwd_in = { - .text = "TCP Forwards In", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "4.4.4.4", - .l2_src = "aa:bb:cc:dd:1:11", - .l3_dst = "3.3.3.3", - .l2_dst = "00:00:a4:00:00:64", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 1000, - .dport = 80, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T0" - }; - - struct dp_test_pkt_desc_t fwd_out = { - .text = "TCP Forwards Out", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "4.4.4.4", - .l2_src = "00:00:a4:00:00:64", - .l3_dst = "3.3.3.3", - .l2_dst = "aa:bb:cc:dd:1:12", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 1000, - .dport = 80, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T0" - }; - - struct dp_test_pkt_desc_t rev_in = { - .text = "TCP Reverse In", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "3.3.3.3", - .l2_src = "aa:bb:cc:dd:1:12", - .l3_dst = "4.4.4.4", - .l2_dst = "00:00:a4:00:00:64", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 80, - .dport = 1000, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } + struct dp_test_pkt_desc_t *fwd_in, *fwd_out; + struct dp_test_pkt_desc_t *rev_in, *rev_out; + + fwd_in = dpt_pdesc_v4_create( + "TCP Forwards In", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "4.4.4.4", 1000, + "00:00:a4:00:00:64", "3.3.3.3", 80, + "dp1T0", "dp1T0"); + + fwd_out = dpt_pdesc_v4_create( + "TCP Forwards Out", IPPROTO_TCP, + "00:00:a4:00:00:64", "4.4.4.4", 1000, + "aa:bb:cc:dd:1:12", "3.3.3.3", 80, + "dp1T0", "dp1T0"); + + rev_in = dpt_pdesc_v4_create( + "TCP Reverse In", IPPROTO_TCP, + "aa:bb:cc:dd:1:12", "3.3.3.3", 80, + "00:00:a4:00:00:64", "4.4.4.4", 1000, + "dp1T0", "dp1T0"); + + rev_out = dpt_pdesc_v4_create( + "TCP Reverse Out", IPPROTO_TCP, + "00:00:a4:00:00:64", "3.3.3.3", 80, + "aa:bb:cc:dd:1:11", "4.4.4.4", 1000, + "dp1T0", "dp1T0"); + + struct dpt_tcp_flow tcp_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = fwd_in, + .pst = fwd_out, }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T0" - }; - - struct dp_test_pkt_desc_t rev_out = { - .text = "TCP Reverse Out", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "3.3.3.3", - .l2_src = "00:00:a4:00:00:64", - .l3_dst = "4.4.4.4", - .l2_dst = "aa:bb:cc:dd:1:11", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 80, - .dport = 1000, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = rev_in, + .pst = rev_out, }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T0" + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ }; - struct dp_test_tcp_call tcp_call = { - .str[0] = '\0', - .isn = {0, 0}, - .desc[DP_DIR_FORW] = { - .pre = &fwd_in, - .post = &fwd_out, - }, - .desc[DP_DIR_BACK] = { - .pre = &rev_in, - .post = &rev_out, - }, - .test_cb = tcp_test_cb3, - .post_cb = NULL, - }; - - /* Comment is new npf_tcp_fsm state */ - struct dp_test_tcp_flow_pkt tcp_pkt1[] = { - {DP_DIR_FORW, TH_SYN, 0}, /* NPF_TCPS_SYN_SENT */ - {DP_DIR_BACK, TH_RST | TH_ACK, 0}, /* NPF_TCPS_CLOSED */ + struct dpt_tcp_flow_pkt tcp_pkt1[] = { + { DPT_FORW, TH_SYN, 0, NULL, /* NPF_TCPS_SYN_SENT */ + 0, NULL }, + { DPT_BACK, TH_RST | TH_ACK, 0, NULL, /* NPF_TCPS_CLOSED */ + 0, NULL }, }; /* Simulate the TCP call */ - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), NULL, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, NULL, 0); + + free(fwd_in); + free(fwd_out); + free(rev_in); + free(rev_out); /* Cleanup */ dp_test_npf_cmd("npf-ut fw global tcp-strict disable", false); @@ -1349,125 +1010,62 @@ DP_START_TEST(npf_hairpin_tcp5, test) /* * TCP packet */ - struct dp_test_pkt_desc_t fwd_in = { - .text = "TCP Forwards In", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "4.4.4.4", - .l2_src = "aa:bb:cc:dd:1:11", - .l3_dst = "3.3.3.3", - .l2_dst = "00:00:a4:00:00:64", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 1000, - .dport = 80, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } + struct dp_test_pkt_desc_t *fwd_in, *fwd_out; + struct dp_test_pkt_desc_t *rev_in, *rev_out; + + fwd_in = dpt_pdesc_v4_create( + "TCP Forwards In", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "4.4.4.4", 1000, + "00:00:a4:00:00:64", "3.3.3.3", 80, + "dp1T0", "dp1T0"); + + fwd_out = dpt_pdesc_v4_create( + "TCP Forwards Out", IPPROTO_TCP, + "00:00:a4:00:00:64", "4.4.4.4", 1000, + "aa:bb:cc:dd:1:12", "3.3.3.3", 80, + "dp1T0", "dp1T0"); + + rev_in = dpt_pdesc_v4_create( + "TCP Reverse In", IPPROTO_TCP, + "aa:bb:cc:dd:1:12", "3.3.3.3", 80, + "00:00:a4:00:00:64", "4.4.4.4", 1000, + "dp1T0", "dp1T0"); + + rev_out = dpt_pdesc_v4_create( + "TCP Reverse Out", IPPROTO_TCP, + "00:00:a4:00:00:64", "3.3.3.3", 80, + "aa:bb:cc:dd:1:11", "4.4.4.4", 1000, + "dp1T0", "dp1T0"); + + struct dpt_tcp_flow tcp_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = fwd_in, + .pst = fwd_out, }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T0" - }; - - struct dp_test_pkt_desc_t fwd_out = { - .text = "TCP Forwards Out", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "4.4.4.4", - .l2_src = "00:00:a4:00:00:64", - .l3_dst = "3.3.3.3", - .l2_dst = "aa:bb:cc:dd:1:12", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 1000, - .dport = 80, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T0" - }; - - struct dp_test_pkt_desc_t rev_in = { - .text = "TCP Reverse In", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "3.3.3.3", - .l2_src = "aa:bb:cc:dd:1:12", - .l3_dst = "4.4.4.4", - .l2_dst = "00:00:a4:00:00:64", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 80, - .dport = 1000, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T0" - }; - - struct dp_test_pkt_desc_t rev_out = { - .text = "TCP Reverse Out", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "3.3.3.3", - .l2_src = "00:00:a4:00:00:64", - .l3_dst = "4.4.4.4", - .l2_dst = "aa:bb:cc:dd:1:11", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 80, - .dport = 1000, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = rev_in, + .pst = rev_out, }, - .rx_intf = "dp1T0", - .tx_intf = "dp1T0" + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ }; - struct dp_test_tcp_call tcp_call = { - .str[0] = '\0', - .isn = {0, 0}, - .desc[DP_DIR_FORW] = { - .pre = &fwd_in, - .post = &fwd_out, - }, - .desc[DP_DIR_BACK] = { - .pre = &rev_in, - .post = &rev_out, - }, - .test_cb = tcp_test_cb3, - .post_cb = NULL, - }; - - /* Comment is new npf_tcp_fsm state */ - struct dp_test_tcp_flow_pkt tcp_pkt1[] = { - {DP_DIR_FORW, TH_SYN, 0}, /* NPF_TCPS_SYN_SENT */ - {DP_DIR_BACK, TH_RST | TH_ACK, 0}, /* NPF_TCPS_CLOSED */ + struct dpt_tcp_flow_pkt tcp_pkt1[] = { + { DPT_FORW, TH_SYN, 0, NULL, /* NPF_TCPS_SYN_SENT */ + 0, NULL }, + { DPT_BACK, TH_RST | TH_ACK, 0, NULL, /* NPF_TCPS_CLOSED */ + 0, NULL }, }; /* Simulate the TCP call */ - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), NULL, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, NULL, 0); + + free(fwd_in); + free(fwd_out); + free(rev_in); + free(rev_out); /* Cleanup */ dp_test_npf_cmd("npf-ut fw global tcp-strict disable", false); @@ -1535,7 +1133,7 @@ DP_START_TEST(npf_hairpin_icmp1, test) struct dp_test_pkt_desc_t forw_in = { .text = "ICMP Forwards In", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "4.4.4.4", .l2_src = "aa:bb:cc:dd:1:11", .l3_dst = "3.3.3.3", @@ -1558,7 +1156,7 @@ DP_START_TEST(npf_hairpin_icmp1, test) struct dp_test_pkt_desc_t rev_in = { .text = "ICMP Forwards In", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "3.3.3.3", .l2_src = "aa:bb:cc:dd:1:12", .l3_dst = "4.4.4.4", @@ -1702,7 +1300,7 @@ DP_START_TEST(npf_hairpin_icmp2, test) struct dp_test_pkt_desc_t forw_in = { .text = "ICMP Forwards In", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "4.4.4.4", .l2_src = "aa:bb:cc:dd:1:11", .l3_dst = "3.3.3.3", @@ -1725,7 +1323,7 @@ DP_START_TEST(npf_hairpin_icmp2, test) struct dp_test_pkt_desc_t rev_in = { .text = "ICMP Forwards In", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "3.3.3.3", .l2_src = "aa:bb:cc:dd:1:12", .l3_dst = "4.4.4.4", diff --git a/tests/whole_dp/src/dp_test_npf_icmp.c b/tests/whole_dp/src/dp_test_npf_icmp.c index f5991050..a6bccef5 100644 --- a/tests/whole_dp/src/dp_test_npf_icmp.c +++ b/tests/whole_dp/src/dp_test_npf_icmp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -20,12 +20,12 @@ #include "dp_test.h" #include "dp_test_str.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" #include "dp_test_lib_pkt.h" -#include "dp_test_lib_intf.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_lib.h" @@ -33,6 +33,19 @@ #include "dp_test_npf_sess_lib.h" #include "dp_test_npf_nat_lib.h" +/* + * icmpv4_1 Match on ICMP type and code + * icmpv4_2 ICMP groups, accept packets specified by the firewall + * icmpv4_3 ICMP groups, drop packets specified by the firewall + * icmpv6_1 Match on ICMP type and code + * icmpv6_2 ICMP groups, accept packets specified by the firewall + * icmpv6_3 ICMP groups, drop packets specified by the firewall + * icmpv4_4 ICMP echo request and reply with a stateful firewall rule + * icmpv6_4 ICMPv6 echo request and reply with a stateful firewall rule + * icmpv4_5 Strict ICMP echo request/response sessions + * icmpv6_5 Strict ICMP echo request/response sessions + * icmpv4_6 ICMP echo request and reply with SNAT + */ struct dp_test_npf_icmp_t { /* DP_TEST_FWD_FORWARDED or DP_TEST_FWD_DROPPED */ @@ -53,8 +66,6 @@ struct dp_test_npf_icmp_t { DP_DECL_TEST_SUITE(npf_icmp); -DP_DECL_TEST_CASE(npf_icmp, icmp_ipv4, NULL, NULL); - /* * Match on ICMP type and code * @@ -64,7 +75,8 @@ DP_DECL_TEST_CASE(npf_icmp, icmp_ipv4, NULL, NULL); * dp1T0 | | dp2T1 * intf1 +-----+ intf2 */ -DP_START_TEST(icmp_ipv4, test1) +DP_DECL_TEST_CASE(npf_icmp, icmpv4_1, NULL, NULL); +DP_START_TEST(icmpv4_1, test) { struct dp_test_pkt_desc_t *pkt; struct dp_test_expected *test_exp; @@ -83,7 +95,7 @@ DP_START_TEST(icmp_ipv4, test1) struct dp_test_pkt_desc_t v4_pkt = { .text = "ICMP IPv4", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.11", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2.2.2.11", @@ -127,7 +139,7 @@ DP_START_TEST(icmp_ipv4, test1) .dpt_icmp_id = 0, .dpt_icmp_seq = 0, }, - .npf = "proto=1 icmpv4=8" + .npf = "proto-final=1 icmpv4=8" }, { .fwd_status = DP_TEST_FWD_FORWARDED, @@ -137,7 +149,7 @@ DP_START_TEST(icmp_ipv4, test1) .dpt_icmp_id = 0, .dpt_icmp_seq = 0, }, - .npf = "proto=1 icmpv4=echo-request" + .npf = "proto-final=1 icmpv4=echo-request" }, { .fwd_status = DP_TEST_FWD_FORWARDED, @@ -146,7 +158,7 @@ DP_START_TEST(icmp_ipv4, test1) { .udata32 = 0 }, - "proto=1 icmpv4=3:3" + "proto-final=1 icmpv4=3:3" }, { .fwd_status = DP_TEST_FWD_FORWARDED, @@ -155,7 +167,7 @@ DP_START_TEST(icmp_ipv4, test1) { .udata32 = 0 }, - "proto=1 icmpv4=port-unreachable" + "proto-final=1 icmpv4=port-unreachable" }, { .fwd_status = DP_TEST_FWD_DROPPED, @@ -164,7 +176,7 @@ DP_START_TEST(icmp_ipv4, test1) { .udata32 = 0 }, - "proto=1 icmpv4=3:3" + "proto-final=1 icmpv4=3:3" }, { .fwd_status = DP_TEST_FWD_DROPPED, @@ -173,7 +185,7 @@ DP_START_TEST(icmp_ipv4, test1) { .udata32 = 0 }, - "proto=1 icmpv4=port-unreachable" + "proto-final=1 icmpv4=port-unreachable" }, }; @@ -244,7 +256,8 @@ DP_START_TEST(icmp_ipv4, test1) /* * ICMP groups, accept packets specified by the firewall */ -DP_START_TEST(icmp_ipv4, test2) +DP_DECL_TEST_CASE(npf_icmp, icmpv4_2, NULL, NULL); +DP_START_TEST(icmpv4_2, test) { struct dp_test_pkt_desc_t *pkt; struct dp_test_expected *test_exp; @@ -272,7 +285,7 @@ DP_START_TEST(icmp_ipv4, test2) .rule = "10", .pass = PASS, .stateful = STATELESS, - .npf = "proto=1 icmpv4-group=ICMP1"}, + .npf = "proto-final=1 icmpv4-group=ICMP1"}, RULE_DEF_BLOCK, NULL_RULE }; @@ -291,7 +304,7 @@ DP_START_TEST(icmp_ipv4, test2) struct dp_test_pkt_desc_t v4_pkt = { .text = "ICMP IPv4", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.11", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2.2.2.11", @@ -371,7 +384,8 @@ DP_START_TEST(icmp_ipv4, test2) /* * ICMP groups, drop packets specified by the firewall */ -DP_START_TEST(icmp_ipv4, test3) +DP_DECL_TEST_CASE(npf_icmp, icmpv4_3, NULL, NULL); +DP_START_TEST(icmpv4_3, test) { struct dp_test_pkt_desc_t *pkt; struct dp_test_expected *test_exp; @@ -399,7 +413,7 @@ DP_START_TEST(icmp_ipv4, test3) .rule = "10", .pass = BLOCK, .stateful = STATELESS, - .npf = "proto=1 icmpv4-group=ICMP1"}, + .npf = "proto-final=1 icmpv4-group=ICMP1"}, RULE_DEF_PASS, NULL_RULE }; @@ -418,7 +432,7 @@ DP_START_TEST(icmp_ipv4, test3) struct dp_test_pkt_desc_t v4_pkt = { .text = "ICMP IPv4", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.11", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2.2.2.11", @@ -496,7 +510,6 @@ DP_START_TEST(icmp_ipv4, test3) } DP_END_TEST; -DP_DECL_TEST_CASE(npf_icmp, icmp_ipv6, NULL, NULL); /* * Match on ICMP type and code @@ -508,7 +521,8 @@ DP_DECL_TEST_CASE(npf_icmp, icmp_ipv6, NULL, NULL); * intf1 +-----+ intf2 * */ -DP_START_TEST(icmp_ipv6, test1) +DP_DECL_TEST_CASE(npf_icmp, icmpv6_1, NULL, NULL); +DP_START_TEST(icmpv6_1, test) { struct dp_test_pkt_desc_t *pkt; struct dp_test_expected *test_exp; @@ -527,7 +541,7 @@ DP_START_TEST(icmp_ipv6, test1) struct dp_test_pkt_desc_t v6_pkt = { .text = "ICMP IPv6", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:1:1::2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2002:2:2::1", @@ -561,7 +575,7 @@ DP_START_TEST(icmp_ipv6, test1) .dpt_icmp_id = 0, .dpt_icmp_seq = 0, }, - .npf = "proto=58 icmpv6=128" + .npf = "proto-final=58 icmpv6=128" }, { .fwd_status = DP_TEST_FWD_FORWARDED, @@ -571,7 +585,7 @@ DP_START_TEST(icmp_ipv6, test1) .dpt_icmp_id = 0, .dpt_icmp_seq = 0, }, - .npf = "proto=58 icmpv6=echo-request" + .npf = "proto-final=58 icmpv6=echo-request" }, { .fwd_status = DP_TEST_FWD_FORWARDED, @@ -580,7 +594,7 @@ DP_START_TEST(icmp_ipv6, test1) { .udata32 = 0 }, - .npf = "proto=58 icmpv6=1:4" + .npf = "proto-final=58 icmpv6=1:4" }, { .fwd_status = DP_TEST_FWD_FORWARDED, @@ -589,7 +603,7 @@ DP_START_TEST(icmp_ipv6, test1) { .udata32 = 0 }, - .npf = "proto=58 icmpv6=port-unreachable" + .npf = "proto-final=58 icmpv6=port-unreachable" }, { .fwd_status = DP_TEST_FWD_DROPPED, @@ -598,7 +612,7 @@ DP_START_TEST(icmp_ipv6, test1) { .udata32 = 0 }, - .npf = "proto=58 icmpv6=1:4" + .npf = "proto-final=58 icmpv6=1:4" }, { .fwd_status = DP_TEST_FWD_DROPPED, @@ -607,7 +621,7 @@ DP_START_TEST(icmp_ipv6, test1) { .udata32 = 0 }, - .npf = "proto=58 icmpv6=port-unreachable" + .npf = "proto-final=58 icmpv6=port-unreachable" }, }; @@ -673,7 +687,8 @@ DP_START_TEST(icmp_ipv6, test1) /* * ICMP groups, accept packets specified by the firewall */ -DP_START_TEST(icmp_ipv6, test2) +DP_DECL_TEST_CASE(npf_icmp, icmpv6_2, NULL, NULL); +DP_START_TEST(icmpv6_2, test) { struct dp_test_pkt_desc_t *pkt; struct dp_test_expected *test_exp; @@ -699,7 +714,7 @@ DP_START_TEST(icmp_ipv6, test2) .rule = "10", .pass = PASS, .stateful = STATELESS, - .npf = "proto=58 icmpv6-group=ICMP1"}, + .npf = "proto-final=58 icmpv6-group=ICMP1"}, RULE_DEF_BLOCK, NULL_RULE }; @@ -718,7 +733,7 @@ DP_START_TEST(icmp_ipv6, test2) struct dp_test_pkt_desc_t v6_pkt = { .text = "ICMP IPv6", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:1:1::2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2002:2:2::1", @@ -799,7 +814,8 @@ DP_START_TEST(icmp_ipv6, test2) /* * ICMP groups, drop packets specified by the firewall */ -DP_START_TEST(icmp_ipv6, test3) +DP_DECL_TEST_CASE(npf_icmp, icmpv6_3, NULL, NULL); +DP_START_TEST(icmpv6_3, test) { struct dp_test_pkt_desc_t *pkt; struct dp_test_expected *test_exp; @@ -825,7 +841,7 @@ DP_START_TEST(icmp_ipv6, test3) .rule = "10", .pass = BLOCK, .stateful = STATELESS, - .npf = "proto=58 icmpv6-group=ICMP1"}, + .npf = "proto-final=58 icmpv6-group=ICMP1"}, RULE_DEF_PASS, NULL_RULE }; @@ -844,7 +860,7 @@ DP_START_TEST(icmp_ipv6, test3) struct dp_test_pkt_desc_t v6_pkt = { .text = "ICMP IPv6", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:1:1::2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2002:2:2::1", @@ -925,7 +941,8 @@ DP_START_TEST(icmp_ipv6, test3) /* * ICMP echo request and reply with a stateful firewall rule */ -DP_START_TEST(icmp_ipv4, test4) +DP_DECL_TEST_CASE(npf_icmp, icmpv4_4, NULL, NULL); +DP_START_TEST(icmpv4_4, test) { char *dp1T0_mac = dp_test_intf_name2mac_str("dp1T0"); char *dp2T1_mac = dp_test_intf_name2mac_str("dp2T1"); @@ -945,7 +962,7 @@ DP_START_TEST(icmp_ipv4, test4) struct dp_test_pkt_desc_t ins_pre = { .text = "Inside pre", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "100.101.102.103", .l2_src = "aa:bb:cc:16:0:20", .l3_dst = "200.201.202.203", @@ -968,7 +985,7 @@ DP_START_TEST(icmp_ipv4, test4) struct dp_test_pkt_desc_t ins_post = { .text = "Inside post", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "100.101.102.103", .l2_src = dp2T1_mac, .l3_dst = "200.201.202.203", @@ -991,7 +1008,7 @@ DP_START_TEST(icmp_ipv4, test4) struct dp_test_pkt_desc_t outs_pre = { .text = "Outside pre", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "200.201.202.203", .l2_src = "aa:bb:cc:18:0:1", .l3_dst = "100.101.102.103", @@ -1014,7 +1031,7 @@ DP_START_TEST(icmp_ipv4, test4) struct dp_test_pkt_desc_t outs_post = { .text = "Outside post", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "200.201.202.203", .l2_src = dp1T0_mac, .l3_dst = "100.101.102.103", @@ -1124,7 +1141,8 @@ DP_START_TEST(icmp_ipv4, test4) /* * ICMPv6 echo request and reply with a stateful firewall rule */ -DP_START_TEST(icmp_ipv6, test4) +DP_DECL_TEST_CASE(npf_icmp, icmpv6_4, NULL, NULL); +DP_START_TEST(icmpv6_4, test) { /* Setup interfaces and neighbours */ dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); @@ -1136,7 +1154,7 @@ DP_START_TEST(icmp_ipv6, test4) struct dp_test_pkt_desc_t ins = { .text = "ICMP IPv6", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:1:1::2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2002:2:2::1", @@ -1156,7 +1174,7 @@ DP_START_TEST(icmp_ipv6, test4) struct dp_test_pkt_desc_t outs = { .text = "ICMP IPv6", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2002:2:2::1", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "2001:1:1::2", @@ -1258,7 +1276,8 @@ DP_START_TEST(icmp_ipv6, test4) * Then enforce within that session that the forward packets must be echo * requests, and the reverse packets echo replies. */ -DP_START_TEST(icmp_ipv4, test5) +DP_DECL_TEST_CASE(npf_icmp, icmpv4_5, NULL, NULL); +DP_START_TEST(icmpv4_5, test) { /* Setup interfaces and neighbours */ dp_test_nl_add_ip_addr_and_connected("dp1T0", "100.101.102.1/24"); @@ -1275,7 +1294,7 @@ DP_START_TEST(icmp_ipv4, test5) struct dp_test_pkt_desc_t ins = { .text = "Inside", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "100.101.102.103", .l2_src = "aa:bb:cc:16:0:20", .l3_dst = "200.201.202.203", @@ -1298,7 +1317,7 @@ DP_START_TEST(icmp_ipv4, test5) struct dp_test_pkt_desc_t outs = { .text = "Outside", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "200.201.202.203", .l2_src = "aa:bb:cc:18:0:1", .l3_dst = "100.101.102.103", @@ -1466,7 +1485,8 @@ DP_START_TEST(icmp_ipv4, test5) * Then enforce within that session that the forward packets must be echo * requests, and the reverse packets echo replies. */ -DP_START_TEST(icmp_ipv6, test5) +DP_DECL_TEST_CASE(npf_icmp, icmpv6_5, NULL, NULL); +DP_START_TEST(icmpv6_5, test) { /* Setup interfaces and neighbours */ dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); @@ -1479,7 +1499,7 @@ DP_START_TEST(icmp_ipv6, test5) struct dp_test_pkt_desc_t ins = { .text = "ICMP IPv6", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:1:1::2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2002:2:2::1", @@ -1499,7 +1519,7 @@ DP_START_TEST(icmp_ipv6, test5) struct dp_test_pkt_desc_t outs = { .text = "ICMP IPv6", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2002:2:2::1", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "2001:1:1::2", @@ -1652,7 +1672,8 @@ DP_START_TEST(icmp_ipv6, test5) /* * ICMP echo request and reply with SNAT */ -DP_START_TEST(icmp_ipv4, test6) +DP_DECL_TEST_CASE(npf_icmp, icmpv4_6, NULL, NULL); +DP_START_TEST(icmpv4_6, test) { char *dp1T0_mac = dp_test_intf_name2mac_str("dp1T0"); char *dp2T1_mac = dp_test_intf_name2mac_str("dp2T1"); @@ -1672,7 +1693,7 @@ DP_START_TEST(icmp_ipv4, test6) struct dp_test_pkt_desc_t ins_pre = { .text = "Inside pre", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "100.101.102.103", .l2_src = "aa:bb:cc:16:0:20", .l3_dst = "200.201.202.203", @@ -1695,7 +1716,7 @@ DP_START_TEST(icmp_ipv4, test6) struct dp_test_pkt_desc_t ins_post = { .text = "Inside post", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "200.201.202.2", .l2_src = dp2T1_mac, .l3_dst = "200.201.202.203", @@ -1718,7 +1739,7 @@ DP_START_TEST(icmp_ipv4, test6) struct dp_test_pkt_desc_t outs_pre = { .text = "Outside pre", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "200.201.202.203", .l2_src = "aa:bb:cc:18:0:1", .l3_dst = "200.201.202.2", @@ -1741,7 +1762,7 @@ DP_START_TEST(icmp_ipv4, test6) struct dp_test_pkt_desc_t outs_post = { .text = "Outside post", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "200.201.202.203", .l2_src = dp1T0_mac, .l3_dst = "100.101.102.103", @@ -1770,6 +1791,7 @@ DP_START_TEST(icmp_ipv4, test6) .ifname = "dp2T1", .proto = IPPROTO_ICMP, .map = "dynamic", + .port_alloc = NULL, .from_addr = "100.101.102.103", .from_port = NULL, .to_addr = NULL, @@ -1860,3 +1882,130 @@ DP_START_TEST(icmp_ipv4, test6) dp_test_nl_del_ip_addr_and_connected("dp2T1", "200.201.202.1/24"); } DP_END_TEST; + +/* + * Create an ICMP unreachable packet with an embedded packet in the payload + */ +static void +gen_icmp_unreach(struct rte_mbuf **rv_pak, struct dp_test_expected **rv_exp, + const void *payload, int payload_len) +{ + struct rte_mbuf *test_pak; + struct iphdr *ip; + struct dp_test_expected *exp; + + test_pak = dp_test_create_icmp_ipv4_pak("11.0.0.1", + "21.0.0.1", + ICMP_DEST_UNREACH, + ICMP_NET_UNREACH, + DPT_ICMP_UNREACH_DATA(0), + 1, /* one mbuf please */ + &payload_len, payload, + &ip, NULL); + + dp_test_set_pak_ip_field(ip, DP_TEST_SET_TOS, + IPTOS_PREC_INTERNETCONTROL); + + (void)dp_test_pktmbuf_eth_init(test_pak, + dp_test_intf_name2mac_str("dp1T0"), + NULL, RTE_ETHER_TYPE_IPV4); + + exp = dp_test_exp_create(test_pak); + + (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), + "aa:bb:cc:dd:21:1", + dp_test_intf_name2mac_str("dp2T1"), + RTE_ETHER_TYPE_IPV4); + dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); + + dp_test_exp_set_oif_name(exp, "dp2T1"); + + *rv_pak = test_pak; + *rv_exp = exp; +} + +/* + * ICMP error message with corrupted embedded packet. + * + * If the embedded packet IP or IPv6 header is corrupted such that + * npf_ipv4_valid or npf_ipv6_valid fails then the packet cache off the + * embedded packet will not have been fully setup, including the pointers to + * the IP/IPv6 addresses. + * + * Any attempt to subsequently access the addresses will fail. This may occur + * NAT or a firewall wule with logging enabled. + */ +DP_DECL_TEST_CASE(npf_icmp, icmpv4_7, NULL, NULL); +DP_START_TEST(icmpv4_7, test) +{ + struct rte_mbuf *test_pak, *embd_pak; + struct dp_test_expected *exp; + struct iphdr *embd_ip; + int len = 20; + int embd_len; + + /* Setup */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "10.0.0.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "20.0.0.1/24"); + + dp_test_netlink_add_neigh("dp1T0", "11.0.0.1", "aa:bb:cc:dd:11:1"); + dp_test_netlink_add_neigh("dp2T1", "21.0.0.1", "aa:bb:cc:dd:21:1"); + + dp_test_netlink_add_route("0.0.0.0/0 nh 21.0.0.1 int:dp2T1"); + + /* + * Add firewall rule with logging enabled + */ + dp_test_npf_cmd_fmt(false, "npf-ut add fw:FW_OUT 10 " + "action=accept to=any rproc=log"); + dp_test_npf_cmd_fmt(false, + "npf-ut attach interface:%s fw-out fw:FW_OUT", + dp_test_intf_real_buf("dp2T1")); + dp_test_npf_commit(); + + /* + * Create UDP pkt to be embedded within ICMP error packet + */ + embd_pak = dp_test_create_ipv4_pak("21.0.0.1", "11.0.0.1", + 1, &len); + embd_ip = iphdr(embd_pak); + dp_test_set_pak_ip_field(embd_ip, DP_TEST_SET_DF, 1); + embd_len = sizeof(struct iphdr) + sizeof(struct udphdr) + len; + + /* + * Send an ICMP unreachable with the above good pkt embedded within it + */ + gen_icmp_unreach(&test_pak, &exp, embd_ip, embd_len); + dp_test_pak_receive(test_pak, "dp1T0", exp); + + /* + * Repeat test. But this time corrupt the embedded packet IP header + * version field. This will cause the caching of the embedded packet + * to fail. + */ + dp_test_set_pak_ip_field(embd_ip, DP_TEST_SET_VERSION, 5); + gen_icmp_unreach(&test_pak, &exp, embd_ip, embd_len); + dp_test_pak_receive(test_pak, "dp1T0", exp); + + /* Delete firewall rule */ + dp_test_npf_cmd_fmt(false, + "npf-ut detach interface:%s fw-out fw:FW_OUT", + dp_test_intf_real_buf("dp2T1")); + dp_test_npf_cmd_fmt(false, "npf-ut delete fw:FW_OUT"); + dp_test_npf_commit(); + + /* + * Cleanup + */ + rte_pktmbuf_free(embd_pak); + dp_test_netlink_del_route("0.0.0.0/0 nh 21.0.0.1 int:dp2T1"); + + dp_test_netlink_del_neigh("dp1T0", "11.0.0.1", "aa:bb:cc:dd:11:1"); + dp_test_netlink_del_neigh("dp2T1", "21.0.0.1", "aa:bb:cc:dd:21:1"); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "10.0.0.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "20.0.0.1/24"); + + dp_test_npf_cleanup(); + +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_npf_lib.c b/tests/whole_dp/src/dp_test_npf_lib.c index 77750ef5..4e29c515 100644 --- a/tests/whole_dp/src/dp_test_npf_lib.c +++ b/tests/whole_dp/src/dp_test_npf_lib.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -15,18 +15,21 @@ #include "if_var.h" #include "main.h" +#include "npf/npf_rc.h" #include "npf/npf_state.h" #include "npf/npf_timeouts.h" -#include "npf/alg/npf_alg_public.h" -#include "npf/alg/npf_alg_private.h" +#include "npf/alg/alg_npf.h" +#include "npf/alg/alg.h" +#include "npf/cgnat/cgn_test.h" #include "dp_test.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" +#include "dp_test_npf_lib.h" #include "dp_test_npf_fw_lib.h" #include "dp_test_npf_sess_lib.h" #include "dp_test_npf_lib.h" @@ -34,9 +37,9 @@ /* * Returns "action=accept" or "action=drop" */ -const char *npf_action_string(bool pass) +const char *npf_action_string(bool accept) { - return pass ? "action=accept" : "action=drop"; + return accept ? "action=accept" : "action=drop"; } /* @@ -76,19 +79,6 @@ struct dp_test_npf_rule_t rule_10_block_udp[] = { RULE_DEF_PASS, NULL_RULE }; - -/* - * Enable/disable npf debugging in the dataplane - */ -void dp_test_npf_debug(bool enable) -{ - char cmd[TEST_MAX_CMD_LEN]; - - snprintf(cmd, TEST_MAX_CMD_LEN, "debug %snpf", enable ? "":"- "); - dp_test_console_request_reply(cmd, false); -} - - void _dp_test_npf_commit(const char *file, int line) { @@ -135,6 +125,133 @@ _dp_test_npf_cmd_fmt(bool print, const char *file, int line, va_end(ap); } +/* + * Create an address group and (optionally) add one address or prefix + */ +void _dpt_addr_grp_create(const char *name, const char *addr, + const char *file, int line) +{ + _dp_test_npf_cmd_fmt(false, file, line, + "npf-ut fw table create %s", name); + + if (addr) + _dp_test_npf_cmd_fmt(false, file, line, + "npf-ut fw table add %s %s", + name, addr); +} + +/* + * Destroy an address group + */ +void _dpt_addr_grp_destroy(const char *name, const char *addr, + const char *file, int line) +{ + if (addr) + _dp_test_npf_cmd_fmt(false, file, line, + "npf-ut fw table remove %s %s", + name, addr); + + _dp_test_npf_cmd_fmt(false, file, line, + "npf-ut fw table delete %s", name); +} + + +/* + * Create and attach a CGNAT policy. e.g. + * + * cgnat_policy_add("POLICY1", 10, "100.64.0.0/12", "POOL1", + * "dp2T1", CGN_MAP_EIM, CGN_FLTR_EIF, CGN_3TUPLE, true); + * + * Note that this creates a match address-group, e.g. "POLICY1_AG" + */ +void +_cgnat_policy_add(const char *policy, uint pri, const char *src, + const char *pool, const char *intf, + enum cgn_map_type eim, enum cgn_fltr_type eif, + bool log_sess, bool check_feat, + bool add_or_change, + const char *file, const char *func, int line) +{ + char real_ifname[IFNAMSIZ]; + char addr_grp[60]; + + dp_test_intf_real(intf, real_ifname); + + snprintf(addr_grp, sizeof(addr_grp), "%s_AG", policy); + + /* Add match address-group */ + if (add_or_change) + _dpt_addr_grp_create(addr_grp, src, file, line); + + /* Add cgnat policy */ + _dp_test_npf_cmd_fmt(false, file, line, + "cgn-ut policy add %s priority=%u " + "match-ag=%s pool=%s log-sess-all=%s", + policy, pri, addr_grp, pool, + log_sess ? "yes" : "no"); + + _dp_test_npf_cmd_fmt(false, file, line, + "cgn-ut policy attach name=%s intf=%s", + policy, real_ifname); + + /* Check cgnat feature is enabled */ + if (check_feat) { + dp_test_wait_for_pl_feat(intf, "vyatta:ipv4-cgnat-in", + "ipv4-validate"); + dp_test_wait_for_pl_feat(intf, "vyatta:ipv4-cgnat-out", + "ipv4-out"); + } +} + +void +_cgnat_policy_add2(const char *policy, uint pri, const char *src, + const char *pool, const char *intf, + const char *other, + const char *file, const char *func, int line) +{ + char real_ifname[IFNAMSIZ]; + char addr_grp[60]; + + dp_test_intf_real(intf, real_ifname); + + snprintf(addr_grp, sizeof(addr_grp), "%s_AG", policy); + + /* Add match address-group */ + _dpt_addr_grp_create(addr_grp, src, file, line); + + /* Add cgnat policy */ + _dp_test_npf_cmd_fmt(false, file, line, + "cgn-ut policy add %s priority=%u " + "match-ag=%s pool=%s %s", + policy, pri, addr_grp, pool, + other ? other : ""); + + _dp_test_npf_cmd_fmt(false, file, line, + "cgn-ut policy attach name=%s intf=%s", + policy, real_ifname); +} + +void +_cgnat_policy_del(const char *policy, uint pri, const char *intf, + const char *file, const char *func, int line) +{ + char real_ifname[IFNAMSIZ]; + char addr_grp[60]; + + dp_test_intf_real(intf, real_ifname); + + _dp_test_npf_cmd_fmt(false, file, line, + "cgn-ut policy detach name=%s intf=%s", + policy, real_ifname); + + /* Delete cgnat policy */ + _dp_test_npf_cmd_fmt(false, file, line, + "cgn-ut policy delete %s", policy); + + snprintf(addr_grp, sizeof(addr_grp), "%s_AG", policy); + _dpt_addr_grp_destroy(addr_grp, NULL, file, line); +} + /* * Clear npf counters for one or more or all npf ruleset types/classes @@ -283,6 +400,7 @@ dp_test_npf_ruleset_attach_type(const char *rstype) case NPF_RS_DNAT: case NPF_RS_SNAT: case NPF_RS_LOCAL: + case NPF_RS_ORIGINATE: case NPF_RS_SESSION_RPROC: case NPF_RS_BRIDGE: case NPF_RS_PBR: @@ -291,6 +409,8 @@ dp_test_npf_ruleset_attach_type(const char *rstype) case NPF_RS_NAT64: case NPF_RS_NAT46: return "interface"; + case NPF_RS_ZONE: + return "zone"; case NPF_RS_IPSEC: /* TBD */ break; @@ -334,6 +454,7 @@ dp_test_npf_ruleset_attach_point(struct dp_test_npf_ruleset_t *rset) case NPF_RS_DNAT: case NPF_RS_SNAT: case NPF_RS_LOCAL: + case NPF_RS_ORIGINATE: case NPF_RS_SESSION_RPROC: case NPF_RS_BRIDGE: case NPF_RS_PBR: @@ -343,6 +464,8 @@ dp_test_npf_ruleset_attach_point(struct dp_test_npf_ruleset_t *rset) case NPF_RS_NAT46: /* Attach-point is interface name */ return dp_test_intf_real_buf(rset->attach_point); + case NPF_RS_ZONE: + return rset->attach_point; case NPF_RS_IPSEC: /* TBD */ return NULL; @@ -378,7 +501,6 @@ _dp_test_npf_ruleset_attach(struct dp_test_npf_ruleset_t *rset, { char cmd[TEST_MAX_CMD_LEN]; const char *attach_type, *attach_point; - int l = 0; attach_type = dp_test_npf_ruleset_attach_type(rset->rstype); _dp_test_fail_unless(attach_type, file, line, @@ -389,9 +511,9 @@ _dp_test_npf_ruleset_attach(struct dp_test_npf_ruleset_t *rset, _dp_test_fail_unless(attach_point, file, line, "Failed to determine attach point"); - l += spush(cmd + l, sizeof(cmd) - l, - "npf-ut attach %s:%s %s %s:%s", - attach_type, attach_point, rset->rstype, class, rset->name); + spush(cmd, sizeof(cmd), + "npf-ut attach %s:%s %s %s:%s", + attach_type, attach_point, rset->rstype, class, rset->name); _dp_test_npf_cmd(cmd, debug, file, line); @@ -408,7 +530,6 @@ _dp_test_npf_ruleset_detach(struct dp_test_npf_ruleset_t *rset, { char cmd[TEST_MAX_CMD_LEN]; const char *attach_type, *attach_point; - int l = 0; attach_type = dp_test_npf_ruleset_attach_type(rset->rstype); _dp_test_fail_unless(attach_type, file, line, @@ -419,9 +540,9 @@ _dp_test_npf_ruleset_detach(struct dp_test_npf_ruleset_t *rset, _dp_test_fail_unless(attach_point, file, line, "Failed to determine attach point"); - l += spush(cmd + l, sizeof(cmd) - l, - "npf-ut detach %s:%s %s %s:%s", - attach_type, attach_point, rset->rstype, class, rset->name); + spush(cmd, sizeof(cmd), + "npf-ut detach %s:%s %s %s:%s", + attach_type, attach_point, rset->rstype, class, rset->name); _dp_test_npf_cmd(cmd, debug, file, line); @@ -433,11 +554,12 @@ _dp_test_npf_ruleset_detach(struct dp_test_npf_ruleset_t *rset, /* * Add an npf ruleset. Attach to attach_point if rset->attach_point is set + * (not zones). * * Example useage: * * struct dp_test_npf_rule_t rules[] = { - * {"10", PASS, STATELESS, "proto=6"}, + * {"10", PASS, STATELESS, "proto-final=6"}, * RULE_DEF_BLOCK, * NULL_RULE }; * @@ -452,11 +574,11 @@ _dp_test_npf_ruleset_detach(struct dp_test_npf_ruleset_t *rset, * }; * dp_test_npf_fw_add(&rset, false); * - * The ruleset class name is one of: fw, fw-internal, pbr, qos, ipsec, - * custom-timeout, session-limiter, app-firewall. + * The ruleset class name is one of: fw, fw-internal (intra zone class), pbr, + * qos, ipsec, custom-timeout, session-limiter, app-firewall. */ void -_dp_test_npf_ruleset_add(struct dp_test_npf_ruleset_t *rset, +_dp_test_fw_ruleset_add(struct dp_test_fw_ruleset_t *rset, const char *class, bool debug, bool verify, const char *file, int line) { @@ -469,7 +591,7 @@ _dp_test_npf_ruleset_add(struct dp_test_npf_ruleset_t *rset, * Add ruleset rules */ for (rule = rset->rules; rule && rule->rule; rule++) { - char *str = strstr(rule->npf, "proto="); + char *str = strstr(rule->npf, "proto-final="); /* * If the npf rule has a protocol specified, it must be a @@ -479,7 +601,7 @@ _dp_test_npf_ruleset_add(struct dp_test_npf_ruleset_t *rset, char *endp; ulong proto; - str += 6; + str += 12; proto = strtoul(str, &endp, 10); if (endp == str || proto > 255) _dp_test_fail( @@ -491,7 +613,7 @@ _dp_test_npf_ruleset_add(struct dp_test_npf_ruleset_t *rset, * * npf-ut add : action=accept|drop * [stateful=y] - * [proto=] + * [proto-final=] * [src-addr=[/]] * [src-port=] * [dst-addr=[/]] @@ -519,7 +641,7 @@ _dp_test_npf_ruleset_add(struct dp_test_npf_ruleset_t *rset, * ruleset */ void -_dp_test_npf_ruleset_del(struct dp_test_npf_ruleset_t *rset, +_dp_test_fw_ruleset_del(struct dp_test_fw_ruleset_t *rset, const char *class, bool debug, bool verify, const char *file, int line) { @@ -555,9 +677,8 @@ dp_test_npf_json_get_rs_groups(const char *rstype, char cmd[TEST_MAX_CMD_LEN]; char *response; bool err; - int l = 0; - l += snprintf(cmd+l, sizeof(cmd)-l, "npf-op show all: %s", rstype); + snprintf(cmd, sizeof(cmd), "npf-op show all: %s", rstype); response = dp_test_console_request_w_err(cmd, &err, false); if (!response || err) { @@ -624,6 +745,10 @@ dp_test_npf_json_get_rs_groups(const char *rstype, * } *] * + * For zones, there is an attach point for each interface in the zone. The + * attach point is the receive interface. The groups array in that ruleset + * contains an "out" entry for every other interface in the zone. + * * For custom-timeout, the attach_point is the VRF ID, i.e. "1" for default * VRF. */ @@ -698,6 +823,7 @@ _dp_test_npf_json_get_rs(const char *rstype, const char *attach_point, case NPF_RS_DNAT: case NPF_RS_SNAT: case NPF_RS_LOCAL: + case NPF_RS_ORIGINATE: case NPF_RS_SESSION_RPROC: case NPF_RS_BRIDGE: case NPF_RS_PBR: @@ -711,6 +837,10 @@ _dp_test_npf_json_get_rs(const char *rstype, const char *attach_point, key[KEY_INDEX_ATTACH_TYPE].val = "interface"; key[KEY_INDEX_ATTACH_POINT].val = real_ifname; break; + case NPF_RS_ZONE: + key[KEY_INDEX_ATTACH_TYPE].val = "zone"; + key[KEY_INDEX_ATTACH_POINT].val = attach_point; + break; case NPF_RS_IPSEC: /* TBD */ break; @@ -819,65 +949,9 @@ _dp_test_npf_json_get_rs_name(json_object *jarray, const char *name, return jobj; } -static bool -json_match_intf(json_object *jobj, void *arg) -{ - char *intf = arg; - const char *ointf; - - if (!dp_test_json_string_field_from_obj(jobj, "interface", &ointf)) - return false; - - return strcmp(ointf, intf) == 0; -} - -/* - * Get json object in ruleset groups array with specific interface - * - * Returns json object. json_object_put should be called once the caller has - * finished with the object. - * - * Example useage: - * - * jarray = dp_test_npf_json_get_rs("fw-in", "dp1T0", "in"); - * jobj = dp_test_npf_json_get_rs_intf(jarray, "dp2T1"); - * ... - * json_object_put(jarray); - * json_object_put(jobj); - */ -json_object * -_dp_test_npf_json_get_rs_intf(json_object *jarray, const char *intf, - const char *file, int line) -{ - json_object *jobj; - void *arg = (void *)intf; - - if (!jarray || !intf) - _dp_test_fail(file, line, "%s bad params", __func__); - - jobj = dp_test_json_array_iterate(jarray, &json_match_intf, arg); - - if (!jobj) - return NULL; - - jobj = json_object_get(jobj); - if (!jobj) - _dp_test_fail(file, line, - "%s json_object_get failed", __func__); - -#if 0 - const char *str = json_object_to_json_string_ext( - jobj, JSON_C_TO_STRING_PRETTY); - if (str) - printf("%s\n", str); -#endif - - return jobj; -} - /* * Get a specific rule from a json ruleset. The ruleset is typically what is - * returned by dp_test_npf_json_get_rs_name or dp_test_npf_json_get_rs_intf. + * returned by dp_test_npf_json_get_rs_name. * * Returns json object. json_object_put should be called once the caller has * finished with the object. @@ -918,6 +992,7 @@ dp_test_npf_json_get_rs_rule(json_object *jrset, const char *rule) * jrset = dp_test_npf_json_get_ruleset("dnat", "dp1T0", "in", NULL); * jrset = dp_test_npf_json_get_ruleset("snat", "dp1T0", "out", NULL); * jrset = dp_test_npf_json_get_ruleset("nat64", "dp1T0", "in", "NAT64_1"); + * jrset = dp_test_npf_json_get_ruleset("zone", "dp1T0", "out", "dp2T1"); */ json_object * _dp_test_npf_json_get_ruleset(const char *rstype, const char *attach_point, @@ -948,6 +1023,7 @@ _dp_test_npf_json_get_ruleset(const char *rstype, const char *attach_point, /* * For nat64 and nat46, the attach point is an interface. + * For zone, the ruleset name is the 'to' interface. */ if (t == NPF_RS_NAT64 || t == NPF_RS_NAT46) dp_test_intf_real(attach_point, real_ifname); @@ -991,6 +1067,7 @@ _dp_test_npf_json_get_ruleset(const char *rstype, const char *attach_point, * jrule = dp_test_npf_json_get_rule("dnat", "dp1T0", "in", NULL, "10"); * jrule = dp_test_npf_json_get_rule("snat", "dp1T0", "out", NULL, "10"); * jrule = dp_test_npf_json_get_rule("nat64", NULL, "in", "dp1T0", "1"); + * jrule = dp_test_npf_json_get_rule("zone", "dp1T0", "out", "dp2T1", "1"); */ json_object * _dp_test_npf_json_get_rule(const char *rstype, const char *attach_point, @@ -1020,78 +1097,6 @@ _dp_test_npf_json_get_rule(const char *rstype, const char *attach_point, return jrule; } -/* - * Get the packet count for all rules in a ruleset - */ -bool -_dp_test_npf_ruleset_pkt_count(struct dp_test_npf_ruleset_t *rset, - uint *packets, bool debug, - const char *file, int line) -{ - struct dp_test_npf_rule_t *rule; - json_object *jrule; - json_object *jgrp; - uint rpkts; - - *packets = 0; - - jgrp = _dp_test_npf_json_get_ruleset(rset->rstype, rset->attach_point, - rset->dir, rset->name, - debug, file, line); - _dp_test_fail_unless(jgrp, file, line, - "Failed to find ruleset %s %s", - rset->rstype, rset->name); - - for (rule = rset->rules; rule && rule->rule != NULL; rule++) { - jrule = dp_test_npf_json_get_rs_rule(jgrp, rule->rule); - _dp_test_fail_unless(jrule, file, line, - "Failed to find rule %s %s %s", - rset->rstype, rset->name, rule->rule); - - if (!dp_test_json_int_field_from_obj(jrule, "packets", - (int *)&rpkts)) { - _dp_test_fail(file, line, - "Failed to get pkts count " - "from rule %s %s %s", - rset->rstype, rset->name, rule->rule); - return false; - } - json_object_put(jrule); - - *packets += rpkts; - } - json_object_put(jgrp); - return true; -} - -/* - * Get the packet count for one rule in a ruleset - */ -bool -_dp_test_npf_rule_pkt_count(struct dp_test_npf_ruleset_t *rset, - const char *rule, uint *packets, - bool debug, const char *file, int line) -{ - json_object *jrule; - bool rv; - - jrule = dp_test_npf_json_get_rule(rset->rstype, rset->attach_point, - rset->dir, rset->name, rule); - _dp_test_fail_unless(jrule, file, line, - "Failed to find rule %s %s %s", - rset->rstype, rset->name, rule); - - rv = dp_test_json_int_field_from_obj(jrule, "packets", - (int *)packets); - _dp_test_fail_unless(rv, file, line, - "Failed to get pkts count " - "from rule %s %s %s", - rset->rstype, rset->name, rule); - - json_object_put(jrule); - return rv; -} - /* * Verify the packet count of an npf rule. * @@ -1101,6 +1106,7 @@ _dp_test_npf_rule_pkt_count(struct dp_test_npf_ruleset_t *rset, * _dp_test_npf_verify_pkt_count(NULL, "dnat", "dp1T0", "in", NULL, "10", 0); * _dp_test_npf_verify_pkt_count(NULL, "snat", "dp1T0", "out", NULL, "10", 2); * _dp_test_npf_verify_pkt_count(NULL, "nat64", NULL, "in", "dp1T0", "1", 1); + * _dp_test_npf_verify_pkt_count(NULL, "zone", "dp1T0", "out", "dp2T1", "1", 2); */ void _dp_test_npf_verify_pkt_count(const char *desc, @@ -1162,34 +1168,6 @@ dp_test_npf_print_sessions(const char *desc) json_object_put(jresp); } -void -dp_test_npf_print_sessions_summary(const char *desc) -{ - json_object *jresp; - const char *str; - char *response; - bool err; - - if (desc) - printf("%s\n", desc); - - response = dp_test_console_request_w_err( - "session-op show sessions summary", - &err, true); - if (!response || err) - return; - - jresp = parse_json(response, parse_err_str, sizeof(parse_err_str)); - free(response); - if (!jresp) - return; - - str = json_object_to_json_string_ext(jresp, JSON_C_TO_STRING_PRETTY); - if (str) - printf("%s\n", str); - json_object_put(jresp); -} - void dp_test_npf_print_nat_sessions(const char *desc) { @@ -1232,8 +1210,6 @@ dp_test_npf_flush_rulesets(void) dp_test_console_request_reply("npf-op flush", false); } -void dp_test_npf_clear_cgnat(void); - void dp_test_npf_cleanup(void) { @@ -1249,6 +1225,9 @@ dp_test_npf_cleanup(void) /* Clear sessions */ dp_test_npf_clear_sessions(); + /* Reset session ID to 0 */ + dp_test_npf_reset_session_id(); + /* Clear portmaps */ dp_test_npf_flush_portmap(); @@ -1301,26 +1280,26 @@ _dp_test_npf_raw(int index, struct rte_mbuf *pkt, uint16_t exp_etype; npf_rule_t *rule; int exp_alen; - bool rv; + int rc; /* * Use the expected cache info to determine IPv4 or IPv6 */ if (exp_npc & NPC_IP6) { exp_alen = 16; - exp_etype = ETHER_TYPE_IPv6; + exp_etype = RTE_ETHER_TYPE_IPV6; } else { exp_alen = 4; - exp_etype = ETHER_TYPE_IPv4; + exp_etype = RTE_ETHER_TYPE_IPV4; } /* * Cache packet and verify cache */ npf_cache_init(npc); - rv = npf_cache_all(npc, pkt, htons(exp_etype)); + rc = npf_cache_all(npc, pkt, htons(exp_etype)); - _dp_test_fail_unless(rv, file, line, + _dp_test_fail_unless(rc == 0, file, line, "packet cache [%d]\n", index); _dp_test_fail_unless(npc->npc_alen, file, line, diff --git a/tests/whole_dp/src/dp_test_npf_lib.h b/tests/whole_dp/src/dp_test_npf_lib.h index 83861622..971404aa 100644 --- a/tests/whole_dp/src/dp_test_npf_lib.h +++ b/tests/whole_dp/src/dp_test_npf_lib.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -14,108 +14,21 @@ #include #include #include "npf/npf.h" +#include "npf/npf_rc.h" #include "npf/npf_ruleset.h" #include "npf/config/npf_ruleset_type.h" -/* - * A firewall comprises a single firewall group structure and one or - * more firewall rule structures. - * - * First step is to create a firewall rules array, terminated with NULL_RULE: - * - * struct dp_test_npf_rule_t rules[] = { - * { - * .rule = "10", - * .pass = PASS, - * .stateful = STATELESS, - * .npf = "pass proto 6"}, - * RULE_DEF_BLOCK, - * NULL_RULE }; - * - * There are some predefined rules below, e.g. RULE_DEF_BLOCK is the same as - * sonfiguring the default action to 'block' - * - * Second step is to create the firewall group: - * - * struct dp_test_npf_ruleset_t fw = { - * .rstype = "fw-in", - * .name = "FW1_IN", - * .enable = 1, - * .intf = "dp2T1", - * .fwd = FWD, - * .dir = "in", - * .rules = rules - * }; - * - * The firewall group is added to the dataplane and assigned to an interface - * by calling: - * - * dp_test_npf_fw_add(&fw, false) - */ +#include "dp_test/dp_test_firewall_lib.h" /* - * Simple, *short* definitions that make the dp_test_npf_fw.c test matrix - * more readable. + * Defines for backward compatibility. */ -#define STATELESS false -#define STATEFUL true - -#define BLOCK false -#define PASS true - -#define ASSIGN true -#define REMOVE false +#define dp_test_npf_rule_t dp_test_fw_rule_t +#define dp_test_npf_ruleset_t dp_test_fw_ruleset_t -#define FORWARDS false -#define REVERSE true - -/* - * Firewall rule - */ -struct dp_test_npf_rule_t { - const char *rule; /* Rule number e.g. "10" */ - bool pass; /* BLOCK or PASS */ - bool stateful; /* STATELESS or STATEFUL */ - const char *npf; /* Actual rule e.g. "pass proto 6" */ -}; - -/* - * npf ruleset - * - * If 'attach_point' is non-NULL then the ruleset is attached to that - * attach_point when dp_test_npf_ruleset_add is called. - * - * 'fwd' is a convenience variable to describe if the ruleset is used in the - * forwards or reverse packet flow for a particular test. It is not used by - * any library code. Currently it is only used by the test arrays in - * dp_test_npf_fw.c. Other users may ignore it. - */ -struct dp_test_npf_ruleset_t { - const char *rstype; /* Feature name e.g. "fw-in" */ - const char *name; /* Ruleset name e.g. "FW1" */ - bool enable; - const char *attach_point; /* Attach point e.g. interface name */ - bool fwd; /* true for forwards direction */ - const char *dir; /* "in" or "out" */ - /* - * Array of rules, terminated by a rule with NULL_RULE - */ - struct dp_test_npf_rule_t *rules; -}; - -/* - * Note, the dataplane has changed to only accept protocol numbers. and - * not strings - */ -#define NPF_PROTO_TCP "proto=6" -#define NPF_PROTO_UDP "proto=17" +#define NPF_PROTO_TCP FW_PROTO_TCP +#define NPF_PROTO_UDP FW_PROTO_UDP -/* - * npf rule (struct dp_test_npf_rule_t) templates - */ -#define NULL_RULE {NULL, BLOCK, STATELESS, NULL} -#define RULE_DEF_PASS {"10000", PASS, STATELESS, ""} -#define RULE_DEF_BLOCK {"10000", BLOCK, STATELESS, ""} #define RULE_1_PASS {"1", PASS, STATELESS, ""} #define RULE_10_PASS_TO_ANY {"10", PASS, STATELESS, ""} #define RULE_10_PASS_FM_ANY {"10", PASS, STATELESS, ""} @@ -137,23 +50,11 @@ extern struct dp_test_npf_rule_t rule_10_pass_udp[]; extern struct dp_test_npf_rule_t rule_10_pass_udp_sf[]; extern struct dp_test_npf_rule_t rule_10_block_udp[]; -/* dp_test_npf_ruleset_t 'fwd' field */ -#define FWD true -#define REV false - -/* struct dp_test_npf_ruleset_t templates */ -#define NULL_FW {NULL, NULL, 0, NULL, 0, "-", NULL} - /* * Returns "action=accept" or "action=drop" */ const char *npf_action_string(bool accept); -/* - * Enable/disable npf debugging in the dataplane - */ -void dp_test_npf_debug(bool enable); - /* * Get the real interface name. Return in a temporary buffer from a circular * array. @@ -180,6 +81,68 @@ _dp_test_npf_cmd_fmt(bool print, const char *file, int line, _dp_test_npf_cmd_fmt(print, __FILE__, __LINE__, \ fmt_str, ##__VA_ARGS__) +/* + * Simple config to create an address-group and (optionally) add one prefix or + * address. + */ +void _dpt_addr_grp_create(const char *name, const char *addr, + const char *file, int line); + +#define dpt_addr_grp_create(a, b) \ + _dpt_addr_grp_create(a, b, __FILE__, __LINE__) + +void _dpt_addr_grp_destroy(const char *name, const char *addr, + const char *file, int line); + +#define dpt_addr_grp_destroy(a, b) \ + _dpt_addr_grp_destroy(a, b, __FILE__, __LINE__) + +/* + * Create and attach a CGNAT policy, e.g. + * + * cgnat_policy_add("POLICY1", 10, "100.64.0.0/12", "POOL1", + * "dp2T1", CGN_MAP_EIM, CGN_FLTR_EIF, CGN_3TUPLE, true); + * + * Note that this creates a match address-group, e.g. "POLICY1_AG" + */ +#include "npf/cgnat/cgn_policy.h" +#define CGN_3TUPLE false +#define CGN_5TUPLE true + +void _cgnat_policy_add(const char *policy, uint pri, const char *src, + const char *pool, const char *intf, + enum cgn_map_type eim, enum cgn_fltr_type eif, + bool log_sess, bool check_feat, + bool add_or_change, + const char *file, const char *func, int line); + +#define cgnat_policy_add(_a, _b, _c, _d, _e, _f, _g, _h, _i) \ + _cgnat_policy_add(_a, _b, _c, _d, _e, _f, _g, _h, _i, true, \ + __FILE__, __func__, __LINE__) + +#define cgnat_policy_change(_a, _b, _c, _d, _e, _f, _g, _h, _i) \ + _cgnat_policy_add(_a, _b, _c, _d, _e, _f, _g, _h, _i, \ + false, __FILE__, __func__, __LINE__) + +void _cgnat_policy_add2(const char *policy, uint pri, const char *src, + const char *pool, const char *intf, + const char *other, + const char *file, const char *func, int line); +#define cgnat_policy_add2(_a, _b, _c, _d, _e, _f) \ + _cgnat_policy_add2(_a, _b, _c, _d, _e, _f, \ + __FILE__, __func__, __LINE__) + + +/* + * cgnat_policy_del("POLICY1", 10, "dp2T1"); + */ +void _cgnat_policy_del(const char *policy, uint pri, const char *intf, + const char *file, const char *func, int line); + +#define cgnat_policy_del(_a, _b, _c) \ + _cgnat_policy_del(_a, _b, _c, __FILE__, __func__, __LINE__) + + /* * Clear npf counters for one or more or all npf ruleset types/classes * @@ -199,10 +162,6 @@ _dp_test_npf_commit(const char *file, int line); void dp_test_npf_print_sessions(const char *desc); -/* Pretty prints "npf fw list sessions summary" */ -void -dp_test_npf_print_sessions_summary(const char *desc); - /* Pretty prints "npf fw list sessions nat" */ void dp_test_npf_print_nat_sessions(const char *desc); @@ -219,45 +178,24 @@ void dp_test_npf_cleanup(void); -/* - * Add an npf ruleset. Attach to attach_point if rset->attach_point is set. - * - * If 'verify' is set we check the ruleset has been added to the dataplane. - */ -void -_dp_test_npf_ruleset_add(struct dp_test_npf_ruleset_t *ruleset, - const char *class, bool debug, bool verify, - const char *file, int line); - #define dp_test_npf_ruleset_add(rs, class, debug) \ - _dp_test_npf_ruleset_add(rs, class, debug, \ + _dp_test_fw_ruleset_add(rs, class, debug, \ true, __FILE__, __LINE__) #define dp_test_npf_fw_add(rs, debug) \ - _dp_test_npf_ruleset_add(rs, "fw", debug, \ + _dp_test_fw_ruleset_add(rs, "fw", debug, \ true, __FILE__, __LINE__) #define dp_test_npf_fw_intnl_add(rs, debug) \ - _dp_test_npf_ruleset_add(rs, "fw-internal", debug, \ + _dp_test_fw_ruleset_add(rs, "fw-internal", debug, \ true, __FILE__, __LINE__) -/* - * Remove an npf ruleset from and interface and delete it - * - * If 'verify' is set we check the ruleset has been removed from the - * dataplane. - */ -void -_dp_test_npf_ruleset_del(struct dp_test_npf_ruleset_t *ruleset, - const char *class, bool debug, bool verify, - const char *file, int line); - #define dp_test_npf_fw_del(fw, debug) \ - _dp_test_npf_ruleset_del(fw, "fw", debug, \ + _dp_test_fw_ruleset_del(fw, "fw", debug, \ true, __FILE__, __LINE__) #define dp_test_npf_fw_intnl_del(fw, debug) \ - _dp_test_npf_ruleset_del(fw, "fw-internal", debug, \ + _dp_test_fw_ruleset_del(fw, "fw-internal", debug, \ true, __FILE__, __LINE__) /* @@ -272,12 +210,17 @@ _dp_test_npf_ruleset_del(struct dp_test_npf_ruleset_t *ruleset, * finished with the object. * * For most rulesets (fw-in, fw-out, dnat, snat, local, session-rproc, bridge, - * pbr), the attach_point is an interface. + * pbr, zone), the attach_point is an interface. + * + * For zones, there is an attach point for each interface in the zone. The + * attach point is the receive interface. The groups array in that ruleset + * contains an "out" entry for every other interface in the zone. * * For custom-timeout, the attach_point is the VRF ID, i.e. "1" for default * VRF. */ -json_object *_dp_test_npf_json_get_rs(const char *rsname, const char *ifname, +json_object *_dp_test_npf_json_get_rs(const char *rstype, + const char *attach_point, const char *dir, bool debug, const char *file, int line); @@ -306,30 +249,9 @@ json_object *_dp_test_npf_json_get_rs_name(json_object *jarray, #define dp_test_npf_json_get_rs_name(jarray, name) \ _dp_test_npf_json_get_rs_name(jarray, name, __FILE__, __LINE__) -/* - * Get json object in ruleset groups array with specific interface - * - * Returns json object. json_object_put should be called once the caller has - * finished with the object. - * - * Example useage: - * - * jarray = dp_test_npf_json_get_rs("fw-in", "dp1T0", "in"); - * jobj = dp_test_npf_json_get_rs_intf(jarray, "dp2T1"); - * ... - * json_object_put(jarray); - * json_object_put(jobj); - */ -json_object *_dp_test_npf_json_get_rs_intf(json_object *jarray, - const char *intf, - const char *file, int line); - -#define dp_test_npf_json_get_rs_intf(jarray, intf) \ - _dp_test_npf_json_get_rs_intf(jarray, intf, __FILE__, __LINE__) - /* * Get a specific rule from a json ruleset. The ruleset is typically what is - * returned by dp_test_npf_json_get_rs_name or dp_test_npf_json_get_rs_intf. + * returned by dp_test_npf_json_get_rs_name. * * Returns json object. json_object_put should be called once the caller has * finished with the object. @@ -360,6 +282,7 @@ json_object *dp_test_npf_json_get_rs_rule(json_object *jrset, * jrset = dp_test_npf_json_get_ruleset("dnat", "dp1T0", "in", NULL); * jrset = dp_test_npf_json_get_ruleset("snat", "dp1T0", "out", NULL); * jrset = dp_test_npf_json_get_ruleset("nat64", NULL, "in", "dp1T0"); + * jrset = dp_test_npf_json_get_ruleset("zone", "dp1T0", "out", "dp2T1"); */ json_object * _dp_test_npf_json_get_ruleset(const char *rstype, const char *attach_point, @@ -382,6 +305,7 @@ _dp_test_npf_json_get_ruleset(const char *rstype, const char *attach_point, * jrule = dp_test_npf_json_get_rule("dnat", "dp1T0", "in", NULL, "10"); * jrule = dp_test_npf_json_get_rule("snat", "dp1T0", "out", NULL, "10"); * jrule = dp_test_npf_json_get_rule("nat64", NULL, "in", "dp1T0", "1"); + * jrule = dp_test_npf_json_get_rule("zone", "dp1T0", "out", "dp2T1", "1"); */ json_object * _dp_test_npf_json_get_rule(const char *rstype, const char *attach_point, @@ -393,31 +317,6 @@ _dp_test_npf_json_get_rule(const char *rstype, const char *attach_point, _dp_test_npf_json_get_rule(rstype, ap, dir, rsname, rule, \ false, __FILE__, __LINE__) -/* - * Get the packet count for all rules in a ruleset - */ -bool -_dp_test_npf_ruleset_pkt_count(struct dp_test_npf_ruleset_t *rset, - uint *packets, bool debug, - const char *file, int line); - -#define dp_test_npf_ruleset_pkt_count(rset, pkts) \ - _dp_test_npf_ruleset_pkt_count(rset, pkts, false, \ - __FILE__, __LINE__) - -/* - * Get the packet count for one rule in a ruleset - */ -bool -_dp_test_npf_rule_pkt_count(struct dp_test_npf_ruleset_t *rset, - const char *rule, - uint *packets, bool debug, - const char *file, int line); - -#define dp_test_npf_rule_pkt_count(rset, rule, pkts) \ - _dp_test_npf_rule_pkt_count(rset, rule, pkts, false, \ - __FILE__, __LINE__) - /* * Verify the packet count of an npf rule. * @@ -427,6 +326,7 @@ _dp_test_npf_rule_pkt_count(struct dp_test_npf_ruleset_t *rset, * _dp_test_npf_verify_pkt_count(NULL, "dnat", "dp1T0", "in", NULL, "10", 0); * _dp_test_npf_verify_pkt_count(NULL, "snat", "dp1T0", "out", NULL, "10", 2); * _dp_test_npf_verify_pkt_count(NULL, "nat64", NULL, "in", "dp1T0", "1", 1); + * _dp_test_npf_verify_pkt_count(NULL, "zone", "dp1T0", "out", "dp2T1", "1", 2); */ void _dp_test_npf_verify_pkt_count(const char *desc, @@ -475,7 +375,7 @@ __dp_test_npf_verify_rule_pkt_count(const char *desc, * uint16_t exp_npc4; * * dp_test_intf_real("dp1T0", real_ifname); - * ifp = ifnet_byifname(real_ifname); + * ifp = dp_ifnet_byifname(real_ifname); * nif = rcu_dereference(ifp->if_npf); * npf_config = npf_if_conf(nif); * rlset = npf_get_ruleset(npf_config, NPF_RS_FW_IN); diff --git a/tests/whole_dp/src/dp_test_npf_local.c b/tests/whole_dp/src/dp_test_npf_local.c index d7ad3071..dfc821a5 100644 --- a/tests/whole_dp/src/dp_test_npf_local.c +++ b/tests/whole_dp/src/dp_test_npf_local.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -19,12 +19,12 @@ #include "dp_test.h" #include "dp_test_str.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_pkt.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_lib.h" @@ -57,7 +57,7 @@ DP_START_TEST(ipv4, spath) struct dp_test_pkt_desc_t v4_pktA = { .text = "Packet A, Local -> Neighbour 1", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.1", .l2_src = "0:0:0:0:0:0", .l3_dst = "1.1.1.2", @@ -90,7 +90,7 @@ DP_START_TEST(ipv4, spath) struct dp_test_pkt_desc_t v4_pktB = { .text = "Packet B, Neighbour 1 -> Local", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "1.1.1.1", @@ -162,7 +162,7 @@ DP_START_TEST(ipv4, kernel_forwarded) struct dp_test_pkt_desc_t v4_pktA = { .text = "Packet B, Non-local -> Neighbour 1", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "2.2.2.2", .l2_src = "aa:bb:cc:dd:2:a2", .l3_dst = "1.1.1.2", @@ -200,3 +200,2112 @@ DP_START_TEST(ipv4, kernel_forwarded) "aa:bb:cc:dd:1:a1"); } DP_END_TEST; + +/* + * Test creates ipv4 tcp packet and send it to shadow interface. + * Originate firewall is configured in the interface to verify + * dscp mark function and action drop. + * + * | + * | + * v + * +-----+ 1.1.1.1 + * | | + * | uut |---------------host 1.1.1.2 + * | | dp1T0 + * +-----+ intf1 + * + * --> Forwards (on output) + * Source 1.1.1.1 Destination 1.1.1.2 + * + */ +DP_DECL_TEST_SUITE(npf_orig); + +DP_DECL_TEST_CASE(npf_orig, ipv4_tcp_shadow, NULL, NULL); + +static void npf_orig_ipv4_tcp_shadow_setup( + struct dp_test_expected **test_exp, + struct rte_mbuf **test_pak) +{ + struct dp_test_pkt_desc_t *pkt; + struct rte_mbuf *exp_pak; + struct iphdr *ip; + + /* Setup interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_netlink_add_neigh("dp1T0", "1.1.1.2", + "aa:bb:cc:dd:1:a1"); + + /* + * Simulate pkt from kernel to be tx on intf1 + */ + struct dp_test_pkt_desc_t v4_pktA = { + .text = "Packet A, Local -> Neighbour 1", + .len = 20, + .ether_type = RTE_ETHER_TYPE_IPV4, + .l3_src = "1.1.1.1", + .l2_src = "0:0:0:0:0:0", + .l3_dst = "1.1.1.2", + .l2_dst = "aa:bb:cc:dd:1:a1", + .proto = IPPROTO_TCP, + .l4 = { + .tcp = { + .sport = 41000, + .dport = 1000, + .flags = 0 + } + }, + .rx_intf = "dp1T0", + .tx_intf = "dp1T0" + }; + pkt = &v4_pktA; + + *test_pak = dp_test_from_spath_v4_pkt_from_desc(pkt); + + *test_exp = dp_test_exp_create(*test_pak); + exp_pak = dp_test_exp_get_pak_m(*test_exp, 0); + ip = iphdr(exp_pak); + dp_test_set_pak_ip_field(ip, DP_TEST_SET_TOS, IPTOS_DSCP_AF12); + dp_test_exp_set_dont_care(*test_exp, 0, (uint8_t *)&ip->check, 2); +} + +DP_START_TEST(ipv4_tcp_shadow, accpet_and_dscp_remark) +{ + struct dp_test_expected *test_exp = NULL; + struct rte_mbuf *test_pak = NULL; + + struct dp_test_npf_rule_t rules[] = { + { + .rule = "1", + .pass = PASS, + .stateful = STATELESS, + .npf = "proto-final=6 src-port=41000" + " rproc=markdscp(12)"}, + RULE_DEF_BLOCK, + NULL_RULE }; + + struct dp_test_npf_ruleset_t fw = { + .rstype = "originate", + .name = "FW_TCP_ORIG", + .enable = 1, + .attach_point = "dp1T0", + .fwd = FWD, + .dir = "out", + .rules = rules + }; + dp_test_npf_fw_add(&fw, false); + + npf_orig_ipv4_tcp_shadow_setup(&test_exp, &test_pak); + + dp_test_exp_set_oif_name(test_exp, "dp1T0"); + dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_FORWARDED); + + /* Run the test. kernel -> intf1 -> n1 */ + dp_test_send_slowpath_pkt(test_pak, test_exp); + + /* Verify firewall packet count */ + dp_test_npf_verify_rule_pkt_count(NULL, &fw, fw.rules[0].rule, 1); + + /* Cleanup */ + dp_test_npf_fw_del(&fw, false); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_netlink_del_neigh("dp1T0", "1.1.1.2", + "aa:bb:cc:dd:1:a1"); + +} DP_END_TEST; + +DP_START_TEST(ipv4_tcp_shadow, drop) +{ + struct dp_test_expected *test_exp = NULL; + struct rte_mbuf *test_pak = NULL; + + struct dp_test_npf_rule_t rules[] = { + { + .rule = "1", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "proto-final=6 src-port=41000" + " rproc=markdscp(12)"}, + RULE_DEF_PASS, + NULL_RULE }; + + struct dp_test_npf_ruleset_t fw = { + .rstype = "originate", + .name = "FW_TCP_ORIG", + .enable = 1, + .attach_point = "dp1T0", + .fwd = FWD, + .dir = "out", + .rules = rules + }; + dp_test_npf_fw_add(&fw, false); + + npf_orig_ipv4_tcp_shadow_setup(&test_exp, &test_pak); + + dp_test_exp_set_oif_name(test_exp, "dp1T0"); + dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_DROPPED); + + /* Run the test. kernel -> intf1 -> n1 */ + dp_test_send_slowpath_pkt(test_pak, test_exp); + + /* Verify firewall packet count */ + dp_test_npf_verify_rule_pkt_count(NULL, &fw, fw.rules[0].rule, 1); + + /* Cleanup */ + dp_test_npf_fw_del(&fw, false); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_netlink_del_neigh("dp1T0", "1.1.1.2", + "aa:bb:cc:dd:1:a1"); + +} DP_END_TEST; + +/* + * Test creates ipv6 tcp packet and send it to shadow interface. + * Originate firewall is configured in the interface to verify + * dscp mark function and action drop. + * + * | + * | + * v + * +-----+ 2001::1/64 + * | | + * | uut |---------------host 2001::2 + * | | dp1T0 + * +-----+ intf1 + * + * --> Forwards (on output) + * Source 2001::1 Destination 2001::2 + * + */ +DP_DECL_TEST_CASE(npf_orig, ipv6_tcp_shadow, NULL, NULL); + +static void npf_orig_ipv6_tcp_shadow_setup( + struct dp_test_expected **test_exp, + struct rte_mbuf **test_pak) +{ + struct dp_test_pkt_desc_t *pkt; + struct rte_mbuf *exp_pak; + struct ip6_hdr *ip6; + + /* Setup interfaces and neighbors */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001::1/64"); + + /* + * Simulate pkt from kernel to be tx on intf1 + */ + struct dp_test_pkt_desc_t v4_pktA = { + .text = "Packet A, Local -> Neighbour 1", + .len = 20, + .ether_type = RTE_ETHER_TYPE_IPV6, + .l3_src = "2001::1", + .l2_src = "0:0:0:0:0:0", + .l3_dst = "2001::2", + .l2_dst = "aa:bb:cc:dd:1:a1", + .proto = IPPROTO_TCP, + .l4 = { + .tcp = { + .sport = 41000, + .dport = 1000, + .flags = 0 + } + }, + .rx_intf = "dp1T0", + .tx_intf = "dp1T0" + }; + pkt = &v4_pktA; + + *test_pak = dp_test_from_spath_pkt_from_desc(pkt); + + *test_exp = dp_test_exp_create(*test_pak); + exp_pak = dp_test_exp_get_pak_m(*test_exp, 0); + ip6 = ip6hdr(exp_pak); + dp_test_set_pak_ip6_field(ip6, DP_TEST_SET_TOS, IPTOS_DSCP_AF12); +} + +DP_START_TEST(ipv6_tcp_shadow, dscp_remark) +{ + struct dp_test_expected *test_exp; + struct rte_mbuf *test_pak; + + npf_orig_ipv6_tcp_shadow_setup(&test_exp, &test_pak); + + struct dp_test_npf_rule_t rules[] = { + { + .rule = "1", + .pass = PASS, + .stateful = STATELESS, + .npf = "proto-final=6 src-port=41000" + " rproc=markdscp(12)"}, + RULE_DEF_BLOCK, + NULL_RULE }; + + struct dp_test_npf_ruleset_t fw = { + .rstype = "originate", + .name = "FW_TCP_ORIG", + .enable = 1, + .attach_point = "dp1T0", + .fwd = FWD, + .dir = "out", + .rules = rules + }; + dp_test_npf_fw_add(&fw, false); + + dp_test_exp_set_oif_name(test_exp, "dp1T0"); + dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_FORWARDED); + + /* Run the test. kernel -> intf1 -> n1 */ + dp_test_send_slowpath_pkt(test_pak, test_exp); + + /* Verify firewall packet count */ + dp_test_npf_verify_rule_pkt_count(NULL, &fw, fw.rules[0].rule, 1); + + /* Cleanup */ + dp_test_npf_fw_del(&fw, false); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001::1/64"); +} DP_END_TEST; + +DP_START_TEST(ipv6_tcp_shadow, drop) +{ + struct dp_test_expected *test_exp; + struct rte_mbuf *test_pak; + + npf_orig_ipv6_tcp_shadow_setup(&test_exp, &test_pak); + + struct dp_test_npf_rule_t rules[] = { + { + .rule = "1", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "proto-final=6 src-port=41000" + " rproc=markdscp(12)"}, + RULE_DEF_PASS, + NULL_RULE }; + + struct dp_test_npf_ruleset_t fw = { + .rstype = "originate", + .name = "FW_TCP_ORIG", + .enable = 1, + .attach_point = "dp1T0", + .fwd = FWD, + .dir = "out", + .rules = rules + }; + dp_test_npf_fw_add(&fw, false); + + dp_test_exp_set_oif_name(test_exp, "dp1T0"); + dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_DROPPED); + + /* Run the test. kernel -> intf1 -> n1 */ + dp_test_send_slowpath_pkt(test_pak, test_exp); + + /* Verify firewall packet count */ + dp_test_npf_verify_rule_pkt_count(NULL, &fw, fw.rules[0].rule, 1); + + /* Cleanup */ + dp_test_npf_fw_del(&fw, false); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001::1/64"); +} DP_END_TEST; + +DP_DECL_TEST_CASE(npf_orig, ipv4_icmp_transit, NULL, NULL); + +/* + * Match on ICMP type and code + * Test generate ICMP message upon packet too big with don't fragment + * flag set + * + * 2.2.2.2 +-----+ 1.1.1.1 + * | | + * host 2.2.2.1 ------------| uut |---------------host 1.1.1.2 + * dp3T3 | | dp1T1 (mtu 1400) + * intf1 +-----+ intf2 + * + * + * --> Forwards (on output) + * Source 2.2.2.1 Destination 1.1.1.2 (length 1472, DSCP 0) + * + * <-- Back ICMP + * Source 1.1.1.2 Destination 2.2.2.2 + */ +static void npf_orig_ipv4_icmp_transit_setup( + struct dp_test_expected **exp, + struct rte_mbuf **test_pak) +{ + struct rte_mbuf *icmp_pak; + const char *neigh3_mac_str = "aa:bb:cc:dd:ee:ff"; + const char *neigh1_mac_str = "bb:aa:cc:ee:dd:ff"; + struct iphdr *ip_inner; + struct icmphdr *icph; + struct iphdr *ip; + int len = 1472; + int icmplen; + + /* Set up the interface addresses */ + dp_test_nl_add_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp3T3", "2.2.2.2/24"); + + dp_test_netlink_set_interface_mtu("dp1T1", 1400); + + /* Add the nh arp we want the packet to follow */ + dp_test_netlink_add_neigh("dp3T3", "2.2.2.1", neigh3_mac_str); + dp_test_netlink_add_neigh("dp1T1", "1.1.1.2", neigh1_mac_str); + + /* Create pak to match the route added above */ + *test_pak = dp_test_create_ipv4_pak("2.2.2.1", "1.1.1.2", + 1, &len); + ip = iphdr(*test_pak); + dp_test_set_pak_ip_field(ip, DP_TEST_SET_DF, 1); + + (void)dp_test_pktmbuf_eth_init(*test_pak, + dp_test_intf_name2mac_str("dp3T3"), + neigh3_mac_str, RTE_ETHER_TYPE_IPV4); + + /* + * Expected packet + */ + /* Create expected icmp packet */ + icmplen = sizeof(struct iphdr) + 576; + icmp_pak = dp_test_create_icmp_ipv4_pak("2.2.2.2", "2.2.2.1", + ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, + DPT_ICMP_FRAG_DATA(1400), + 1, &icmplen, + iphdr(*test_pak), + &ip, &icph); + + (void)dp_test_pktmbuf_eth_init(icmp_pak, + neigh3_mac_str, + dp_test_intf_name2mac_str("dp3T3"), + RTE_ETHER_TYPE_IPV4); + + dp_test_set_pak_ip_field(ip, DP_TEST_SET_TOS, + IPTOS_PREC_INTERNETCONTROL); + + ip_inner = (struct iphdr *)(icph + 1); + /* + * The TTL allowed to be changed from the original. From RFC + * 1812 s4.3.2.3: + * The returned IP header (and user data) MUST be identical to + * that which was received, except that the router is not + * required to undo any modifications to the IP header that are + * normally performed in forwarding that were performed before + * the error was detected (e.g., decrementing the TTL, or + * updating options) + */ + dp_test_set_pak_ip_field(ip_inner, DP_TEST_SET_TTL, + DP_TEST_PAK_DEFAULT_TTL - 1); + + ip = iphdr(icmp_pak); + dp_test_set_pak_ip_field(ip, DP_TEST_SET_TOS, IPTOS_DSCP_AF12); + + *exp = dp_test_exp_create(icmp_pak); + rte_pktmbuf_free(icmp_pak); +} + +DP_START_TEST(ipv4_icmp_transit, packet_to_big_dscp_remark) +{ + struct dp_test_expected *exp; + struct rte_mbuf *test_pak; + + npf_orig_ipv4_icmp_transit_setup(&exp, &test_pak); + + struct dp_test_npf_rule_t rules[] = { + { + .rule = "1", + .pass = PASS, + .stateful = STATELESS, + .npf = "proto-final=1 rproc=markdscp(12)"}, + RULE_DEF_BLOCK, + NULL_RULE }; + + struct dp_test_npf_ruleset_t fw = { + .rstype = "originate", + .name = "FW_ICMPv4_ORIG", + .enable = 1, + .attach_point = "dp3T3", + .fwd = FWD, + .dir = "out", + .rules = rules + }; + dp_test_npf_fw_add(&fw, false); + + dp_test_exp_set_oif_name(exp, "dp3T3"); + + /* now send test pak and check we get expected back */ + dp_test_pak_receive(test_pak, "dp3T3", exp); + + /* After test validations */ + dp_test_npf_verify_rule_pkt_count(NULL, &fw, fw.rules[0].rule, 1); + + /* Clean Up */ + dp_test_npf_fw_del(&fw, false); + + dp_test_netlink_del_neigh("dp3T3", "2.2.2.1", "aa:bb:cc:dd:ee:ff"); + dp_test_netlink_del_neigh("dp1T1", "1.1.1.2", "bb:aa:cc:ee:dd:ff"); + dp_test_nl_del_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp3T3", "2.2.2.2/24"); + + dp_test_netlink_set_interface_mtu("dp1T1", 1500); +} DP_END_TEST; + +DP_START_TEST(ipv4_icmp_transit, drop) +{ + struct dp_test_expected *exp; + struct rte_mbuf *test_pak; + + npf_orig_ipv4_icmp_transit_setup(&exp, &test_pak); + + struct dp_test_npf_rule_t rules[] = { + { + .rule = "1", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "proto-final=1 rproc=markdscp(12)"}, + RULE_DEF_PASS, + NULL_RULE }; + + struct dp_test_npf_ruleset_t fw = { + .rstype = "originate", + .name = "FW_ICMPv4_ORIG", + .enable = 1, + .attach_point = "dp3T3", + .fwd = FWD, + .dir = "out", + .rules = rules + }; + dp_test_npf_fw_add(&fw, false); + + dp_test_exp_set_oif_name(exp, "dp3T3"); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); + + /* Run test */ + dp_test_pak_receive(test_pak, "dp3T3", exp); + + /* After test validations */ + dp_test_npf_verify_rule_pkt_count(NULL, &fw, fw.rules[0].rule, 1); + + /* Clean Up */ + dp_test_npf_fw_del(&fw, false); + + dp_test_netlink_del_neigh("dp3T3", "2.2.2.1", "aa:bb:cc:dd:ee:ff"); + dp_test_netlink_del_neigh("dp1T1", "1.1.1.2", "bb:aa:cc:ee:dd:ff"); + dp_test_nl_del_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp3T3", "2.2.2.2/24"); + + dp_test_netlink_set_interface_mtu("dp1T1", 1500); +} DP_END_TEST; + +DP_DECL_TEST_CASE(npf_orig, ipv6_icmp_transit, NULL, NULL); +/* + * Test creates ipv6 icmp packet with DF bit set and route it + * to dataplane interface that has mtu less than the packet size. + * Router creates and sends ipv6 icmp packet to big message back. + * Originate firewall is configured in the output interface to verify + * dscp mark function and action drop. + * + * 2001:1:1::1/64 +-----+ 2002:2:2::2/64 + * | | + * host 2001:1:1::2 --------| uut |---------------host 2002:2:2::1 + * dp1T0 | | dp2T1 (mtu 1400) + * intf1 +-----+ intf2 + * Route: + * * + * + * --> Forwards + * Source 2001:1:1::2 Destination 2002:2:2::1 (length 1572, DSCP 0) + * + * <-- Back ICMP + * Source 2002:2:2::1 Destination 2001:1:1::2 (DSCP AF12) + */ +static void npf_orig_ipv6_icmp_transit_setup( + struct dp_test_expected **exp, + struct rte_mbuf **test_pak) +{ + struct rte_mbuf *icmp_pak; + const char *neigh1_mac_str = "aa:bb:cc:dd:ee:10"; + const char *neigh2_mac_str = "bb:aa:cc:ee:dd:21"; + int len = 1572; + int icmplen; + struct ip6_hdr *ip6, *in6_inner; + struct icmp6_hdr *icmp6; + + /* Set up the interface addresses */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2002:2:2::2/64"); + + /* Add the route / nh neighbour we want the packet to follow */ + dp_test_netlink_add_neigh("dp2T1", "2002:2:2::1", neigh2_mac_str); + + /* And the neighbour for the return icmp packet */ + dp_test_netlink_add_neigh("dp1T0", "2001:1:1::2", neigh1_mac_str); + /* Create pak to match the route added above */ + *test_pak = dp_test_create_ipv6_pak("2001:1:1::2", "2002:2:2::1", + 1, &len); + dp_test_pktmbuf_eth_init(*test_pak, + dp_test_intf_name2mac_str("dp1T0"), + neigh1_mac_str, RTE_ETHER_TYPE_IPV6); + + /* + * Expected packet + */ + icmplen = 1280 - sizeof(struct ip6_hdr) - sizeof(struct icmp6_hdr); + icmp_pak = dp_test_create_icmp_ipv6_pak("2001:1:1::1", "2001:1:1::2", + ICMP6_PACKET_TOO_BIG, + 0, /* code */ + 1500 /* mtu */, + 1, &icmplen, + ip6hdr(*test_pak), + &ip6, &icmp6); + (void)dp_test_pktmbuf_eth_init(icmp_pak, + neigh1_mac_str, + dp_test_intf_name2mac_str("dp1T0"), + RTE_ETHER_TYPE_IPV6); + + /* Forwarding code will have already decremented hop limit */ + in6_inner = (struct ip6_hdr *)(icmp6 + 1); + in6_inner->ip6_hlim--; + dp_test_set_pak_ip6_field(ip6, DP_TEST_SET_TOS, IPTOS_DSCP_AF12); + + icmp6->icmp6_cksum = 0; + icmp6->icmp6_cksum = dp_test_ipv6_icmp_cksum(icmp_pak, ip6, icmp6); + + *exp = dp_test_exp_create(icmp_pak); + rte_pktmbuf_free(icmp_pak); +} + +DP_START_TEST(ipv6_icmp_transit, packet_to_big_dscp_remark) +{ + struct dp_test_expected *exp = NULL; + struct rte_mbuf *test_pak = NULL; + + npf_orig_ipv6_icmp_transit_setup(&exp, &test_pak); + + struct dp_test_npf_rule_t rules[] = { + { + .rule = "1", + .pass = PASS, + .stateful = STATELESS, + .npf = "proto-final=58 rproc=markdscp(12)"}, + RULE_DEF_BLOCK, + NULL_RULE }; + + struct dp_test_npf_ruleset_t fw = { + .rstype = "originate", + .name = "FW_ICMPv6_ORIG", + .enable = 1, + .attach_point = "dp1T0", + .fwd = FWD, + .dir = "out", + .rules = rules + }; + dp_test_npf_fw_add(&fw, false); + + /* Set test expectations */ + dp_test_exp_set_oif_name(exp, "dp1T0"); + + /* Run test */ + dp_test_pak_receive(test_pak, "dp1T0", exp); + + /* After test validations */ + dp_test_npf_verify_rule_pkt_count(NULL, &fw, fw.rules[0].rule, 1); + + /* Clean Up */ + dp_test_npf_fw_del(&fw, false); + + dp_test_netlink_del_neigh("dp2T1", "2002:2:2::1", "bb:aa:cc:ee:dd:21"); + dp_test_netlink_del_neigh("dp1T0", "2001:1:1::2", "aa:bb:cc:dd:ee:10"); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2002:2:2::2/64"); + +} DP_END_TEST; + +DP_START_TEST(ipv6_icmp_transit, drop) +{ + struct dp_test_expected *exp = NULL; + struct rte_mbuf *test_pak = NULL; + + npf_orig_ipv6_icmp_transit_setup(&exp, &test_pak); + + struct dp_test_npf_rule_t rules[] = { + { + .rule = "1", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "proto-final=58 rproc=markdscp(12)"}, + RULE_DEF_PASS, + NULL_RULE }; + + struct dp_test_npf_ruleset_t fw = { + .rstype = "originate", + .name = "FW_ICMPv6_ORIG", + .enable = 1, + .attach_point = "dp1T0", + .fwd = FWD, + .dir = "out", + .rules = rules + }; + dp_test_npf_fw_add(&fw, false); + + /* Set test expectations */ + dp_test_exp_set_oif_name(exp, "dp1T0"); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); + + /* Run test */ + dp_test_pak_receive(test_pak, "dp1T0", exp); + + /* After test validations */ + dp_test_npf_verify_rule_pkt_count(NULL, &fw, fw.rules[0].rule, 1); + + /* Clean Up */ + dp_test_npf_fw_del(&fw, false); + + dp_test_netlink_del_neigh("dp2T1", "2002:2:2::1", "bb:aa:cc:ee:dd:21"); + dp_test_netlink_del_neigh("dp1T0", "2001:1:1::2", "aa:bb:cc:dd:ee:10"); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2002:2:2::2/64"); + +} DP_END_TEST; + + +DP_DECL_TEST_CASE(npf_orig, ipv6_nd_na, NULL, NULL); +/* + * Test generates ND Solicitation message and sends to Router. + * Router replay with ND Advertisement. + * Originate firewall is configured in the output interface to verify + * dscp mark function and action drop. + * + * fe80::5054:ff:fe79:3f5/64 + * 2001:1:1::1/64 +-----+ + * | | + * host fe80::409f:1ff:fee8:101 ----| uut | + * dp1T0 | | + * intf1 +-----+ + * + * --> Forwards NS + * Source 2001:1:1::2 Destination ff02::1:ff00:2 + * + * <-- Back NA + * Source fe80::5054:ff:fe79:3f5 Destination fe80::409f:1ff:fee8:101 + */ +static struct rte_mbuf *dp_test_create_na_pak(const char *saddr, + const char *daddr, uint16_t tos, const char *smac, + const char *dmac, const char *target) +{ + struct rte_mbuf *na_pak = NULL; + struct nd_neighbor_advert *nd_na = NULL; + struct ip6_hdr *ip6 = NULL; + struct nd_opt_hdr *nd_opt = NULL; + struct icmp6_hdr *icmp6 = NULL; + struct in6_addr addr6; + + int optlen = (sizeof(struct nd_opt_hdr) + + RTE_ETHER_ADDR_LEN + 7) & ~7; + int icmplen = sizeof(struct nd_neighbor_solicit) - + sizeof(struct icmp6_hdr) + optlen; + na_pak = dp_test_create_icmp_ipv6_pak(saddr, + daddr, + ND_NEIGHBOR_ADVERT, + 0, /* code */ + 0, + 1, &icmplen, + NULL, + &ip6, &icmp6); + + ip6->ip6_hlim = 255; + dp_test_set_pak_ip6_field(ip6, DP_TEST_SET_TOS, tos); + + nd_na = (struct nd_neighbor_advert *)icmp6; + + if (inet_pton(AF_INET6, target, &addr6) != 1) + dp_test_fail("Couldn't create ipv6 address"); + + memcpy(nd_na->nd_na_target.s6_addr, addr6.s6_addr, 16); + nd_na->nd_na_flags_reserved = ND_NA_FLAG_ROUTER + | ND_NA_FLAG_SOLICITED | ND_NA_FLAG_OVERRIDE; + + nd_opt = (struct nd_opt_hdr *)(nd_na + 1); + memset((void *)nd_opt, 0, optlen); + nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; + nd_opt->nd_opt_len = optlen >> 3; + + dp_test_pktmbuf_eth_init(na_pak, dmac, smac, RTE_ETHER_TYPE_IPV6); + + rte_ether_addr_copy(&rte_pktmbuf_mtod(na_pak, + struct rte_ether_hdr *)->s_addr, + (struct rte_ether_addr *)(nd_opt + 1)); + + icmp6->icmp6_cksum = 0; + icmp6->icmp6_cksum = dp_test_ipv6_icmp_cksum(na_pak, + ip6hdr(na_pak), icmp6); + + return na_pak; +} + +static struct rte_mbuf *dp_test_create_ns_pak(const char *saddr, + const char *daddr, uint16_t tos, const char *smac, + const char *dmac, const char *target) +{ + struct rte_mbuf *ns_pak = NULL; + struct nd_neighbor_solicit *nd_ns = NULL; + struct ip6_hdr *ip6 = NULL; + struct nd_opt_hdr *nd_opt = NULL; + struct icmp6_hdr *icmp6 = NULL; + struct in6_addr addr6; + + int optlen = (sizeof(struct nd_opt_hdr) + RTE_ETHER_ADDR_LEN + 7) & ~7; + int icmplen = sizeof(struct nd_neighbor_solicit) - + sizeof(struct icmp6_hdr) + optlen; + ns_pak = dp_test_create_icmp_ipv6_pak(saddr, + daddr, + ND_NEIGHBOR_SOLICIT, + 0, /* code */ + 0, + 1, &icmplen, + NULL, + &ip6, &icmp6); + + ip6->ip6_hlim = 255; + dp_test_set_pak_ip6_field(ip6, DP_TEST_SET_TOS, tos); + + nd_ns = (struct nd_neighbor_solicit *)icmp6; + + if (inet_pton(AF_INET6, target, &addr6) != 1) + dp_test_fail("Couldn't create ipv6 address"); + + memcpy(nd_ns->nd_ns_target.s6_addr, addr6.s6_addr, 16); + + nd_opt = (struct nd_opt_hdr *)(nd_ns + 1); + memset((void *)nd_opt, 0, optlen); + nd_opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; + nd_opt->nd_opt_len = optlen >> 3; + + dp_test_pktmbuf_eth_init(ns_pak, dmac, smac, RTE_ETHER_TYPE_IPV6); + + rte_ether_addr_copy(&rte_pktmbuf_mtod(ns_pak, + struct rte_ether_hdr *)->s_addr, + (struct rte_ether_addr *)(nd_opt + 1)); + + icmp6->icmp6_cksum = 0; + icmp6->icmp6_cksum = + dp_test_ipv6_icmp_cksum(ns_pak, ip6hdr(ns_pak), icmp6); + + return ns_pak; +} + +DP_START_TEST(ipv6_nd_na, packet_na_dscp_remark) +{ + struct dp_test_expected *exp; + struct rte_mbuf *ns_pak; + struct rte_mbuf *exp_na_pak; + const char *neigh1_mac_str = "aa:bb:cc:dd:ee:10"; + const char *host_ll_ip = "fe80::409f:1ff:fee8:101"; + const char *router_ll_ip = "fe80::5054:ff:fe79:3f5"; + const char *router_ll_ip_subnet = "fe80::5054:ff:fe79:3f5/64"; + + /* Set up the interface addresses */ + dp_test_netlink_add_ip_address("dp1T0", router_ll_ip_subnet); + dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + + /* And the neighbour for the return icmp packet */ + dp_test_netlink_add_neigh("dp1T0", host_ll_ip, neigh1_mac_str); + + struct dp_test_npf_rule_t rules[] = { + { + .rule = "1", + .pass = PASS, + .stateful = STATELESS, + .npf = "proto-final=58 rproc=markdscp(12)"}, + RULE_DEF_BLOCK, + NULL_RULE }; + + struct dp_test_npf_ruleset_t fw = { + .rstype = "originate", + .name = "FW_ICMPv6_ORIG", + .enable = 1, + .attach_point = "dp1T0", + .fwd = FWD, + .dir = "out", + .rules = rules + }; + dp_test_npf_fw_add(&fw, false); + + /* + * Test packet + */ + ns_pak = dp_test_create_ns_pak(host_ll_ip, "ff02::1:ff00:2", + IPTOS_CLASS_CS5, neigh1_mac_str, "33:33:ff:00:00:02", + "2001:1:1::1"); + + /* + * Expected packet + */ + exp_na_pak = dp_test_create_na_pak(router_ll_ip, host_ll_ip, + IPTOS_DSCP_AF12, dp_test_intf_name2mac_str("dp1T0"), + neigh1_mac_str, "2001:1:1::1"); + + exp = dp_test_exp_create(exp_na_pak); + rte_pktmbuf_free(exp_na_pak); + + /* Set test expectations */ + dp_test_exp_set_oif_name(exp, "dp1T0"); + + /* Run test */ + dp_test_pak_receive(ns_pak, "dp1T0", exp); + + /* After test validations */ + dp_test_npf_verify_rule_pkt_count(NULL, &fw, fw.rules[0].rule, 1); + + /* Clean Up */ + dp_test_npf_fw_del(&fw, false); + + dp_test_netlink_del_neigh("dp1T0", host_ll_ip, neigh1_mac_str); + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_netlink_del_ip_address("dp1T0", router_ll_ip_subnet); +} DP_END_TEST; + +DP_START_TEST(ipv6_nd_na, drop) +{ + struct dp_test_expected *exp; + struct rte_mbuf *ns_pak; + struct rte_mbuf *exp_na_pak; + const char *neigh1_mac_str = "aa:bb:cc:dd:ee:10"; + const char *host_ll_ip = "fe80::409f:1ff:fee8:101"; + const char *router_ll_ip = "fe80::5054:ff:fe79:3f5"; + const char *router_ll_ip_subnet = "fe80::5054:ff:fe79:3f5/64"; + + /* Set up the interface addresses */ + dp_test_netlink_add_ip_address("dp1T0", router_ll_ip_subnet); + dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + + /* And the neighbour for the return icmp packet */ + dp_test_netlink_add_neigh("dp1T0", host_ll_ip, neigh1_mac_str); + + struct dp_test_npf_rule_t rules[] = { + { + .rule = "1", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "proto-final=58 rproc=markdscp(12)"}, + RULE_DEF_PASS, + NULL_RULE }; + + struct dp_test_npf_ruleset_t fw = { + .rstype = "originate", + .name = "FW_ICMPv6_ORIG", + .enable = 1, + .attach_point = "dp1T0", + .fwd = FWD, + .dir = "out", + .rules = rules + }; + dp_test_npf_fw_add(&fw, false); + + /* + * Test packet + */ + ns_pak = dp_test_create_ns_pak(host_ll_ip, "ff02::1:ff00:2", + IPTOS_CLASS_CS5, neigh1_mac_str, "33:33:ff:00:00:02", + "2001:1:1::1"); + + /* + * Expected packet + */ + exp_na_pak = dp_test_create_na_pak(router_ll_ip, host_ll_ip, + IPTOS_DSCP_AF12, dp_test_intf_name2mac_str("dp1T0"), + neigh1_mac_str, "2001:1:1::1"); + + exp = dp_test_exp_create(exp_na_pak); + rte_pktmbuf_free(exp_na_pak); + + /* Set test expectations */ + dp_test_exp_set_oif_name(exp, "dp1T0"); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); + + /* Run test */ + dp_test_pak_receive(ns_pak, "dp1T0", exp); + + /* After test validations */ + dp_test_npf_verify_rule_pkt_count(NULL, &fw, fw.rules[0].rule, 1); + + /* Clean Up */ + dp_test_npf_fw_del(&fw, false); + + dp_test_netlink_del_neigh("dp1T0", host_ll_ip, neigh1_mac_str); + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_netlink_del_ip_address("dp1T0", router_ll_ip_subnet); +} DP_END_TEST; + +DP_DECL_TEST_CASE(npf_local, cgnat_icmpv4, NULL, NULL); +/* + * Test sends ipv4 icmp echo request message to router sgnat public pool. + * Router sgnat generates and sends ipv4 icmp echo reply. + * Originate firewall is configured in the output interface to verify + * dscp mark function and action drop. + * + * + * Private Public + * 1.1.1.254/24 + * +-----+ + * | |--------------- 1.1.1.1/24 + * | dut | + * | | pool 1.1.1.11/32 + * | | + * +-----+ dp2T1 + * + * <--- ICMP Echo Req Source 1.1.1.1 Destination 1.1.1.11 + * ---> ICMP Echo Reply Source 1.1.1.11 Destination 1.1.1.1 + */ +static void cgnat_icmpv4_setup(struct dp_test_expected **test_exp, + struct rte_mbuf **test_pak) +{ + struct rte_mbuf *exp_pak = NULL; + struct iphdr *ip = NULL; + + dp_test_nl_add_ip_addr_and_connected("dp2T1", "1.1.1.254/24"); + dp_test_netlink_add_neigh("dp2T1", "1.1.1.1", "aa:bb:cc:dd:2:b1"); + + /* Pre IPv4 ICMP packet */ + struct dp_test_pkt_desc_t test_pak_ICMP = { + .text = "IPv4 ICMP req", + .len = 20, + .ether_type = RTE_ETHER_TYPE_IPV4, + .l3_src = "1.1.1.1", + .l2_src = "aa:bb:cc:dd:2:b1", + .l3_dst = "1.1.1.11", + .l2_dst = dp_test_intf_name2mac_str("dp2T1"), + .proto = IPPROTO_ICMP, + .l4 = { + .icmp = { + .type = ICMP_ECHO, + .code = 0, + { + .dpt_icmp_id = 1024, + .dpt_icmp_seq = 0, + }, + } + }, + .rx_intf = "dp2T1", + .tx_intf = "dp2T1" + }; + + /* Post IPv4 ICMP packet */ + struct dp_test_pkt_desc_t exp_pkt_ICMP = { + .text = "IPv4 ICMP echo reply from NAT", + .len = 20, + .ether_type = RTE_ETHER_TYPE_IPV4, + .l3_src = "1.1.1.11", + .l2_src = dp_test_intf_name2mac_str("dp2T1"), + .l3_dst = "1.1.1.1", + .l2_dst = "aa:bb:cc:dd:2:b1", + .proto = IPPROTO_ICMP, + .l4 = { + .icmp = { + .type = ICMP_ECHOREPLY, + .code = 0, + { + .dpt_icmp_id = 1024, + .dpt_icmp_seq = 0, + }, + } + }, + .rx_intf = "dp2T1", + .tx_intf = "dp2T1" + }; + + *test_pak = dp_test_v4_pkt_from_desc(&test_pak_ICMP); + exp_pak = dp_test_v4_pkt_from_desc(&exp_pkt_ICMP); + ip = iphdr(exp_pak); + dp_test_set_pak_ip_field(ip, DP_TEST_SET_TOS, IPTOS_DSCP_AF12); + dp_test_pktmbuf_eth_init(exp_pak, exp_pkt_ICMP.l2_dst, + exp_pkt_ICMP.l2_src, exp_pkt_ICMP.ether_type); + + *test_exp = dp_test_exp_create(exp_pak); + rte_pktmbuf_free(exp_pak); +} + +static void cgnat_icmpv4_teardown(void) +{ + /* Check cgnat feature is disabled */ + dp_test_wait_for_pl_feat_gone("dp2T1", "vyatta:ipv4-cgnat-in", + "ipv4-validate"); + dp_test_wait_for_pl_feat_gone("dp2T1", "vyatta:ipv4-cgnat-out", + "ipv4-out"); + + /* Cleanup */ + dp_test_netlink_del_neigh("dp2T1", "1.1.1.1", "aa:bb:cc:dd:2:b1"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "1.1.1.254/24"); + dp_test_npf_cleanup(); +} + +#define cgnat_policy_add(_a, _b, _c, _d, _e, _f, _g, _h, _i) \ + _cgnat_policy_add(_a, _b, _c, _d, _e, _f, _g, _h, _i, true, \ + __FILE__, __func__, __LINE__) + +DP_START_TEST(cgnat_icmpv4, packet_dscp_remark) +{ + struct dp_test_expected *test_exp = NULL; + struct rte_mbuf *test_pak = NULL; + + cgnat_icmpv4_setup(&test_exp, &test_pak); + + dp_test_npf_cmd("nat-ut pool add POOL1 " + "type=cgnat " + "prefix=RANGE2/1.1.1.11/32", false); + + cgnat_policy_add("POLICY1", 10, "100.64.0.0/12", "POOL1", "dp2T1", + CGN_MAP_EIM, CGN_FLTR_EIF, CGN_3TUPLE, true); + + struct dp_test_npf_rule_t rules[] = { + { + .rule = "1", + .pass = PASS, + .stateful = STATELESS, + .npf = "proto-final=1 rproc=markdscp(12)"}, + RULE_DEF_BLOCK, + NULL_RULE }; + + struct dp_test_npf_ruleset_t fw = { + .rstype = "originate", + .name = "FW_ICMPv4_ORIG", + .enable = 1, + .attach_point = "dp2T1", + .fwd = FWD, + .dir = "out", + .rules = rules + }; + dp_test_npf_fw_add(&fw, false); + + /* Set test expectations */ + + dp_test_exp_set_oif_name(test_exp, "dp2T1"); + dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_FORWARDED); + + /* Run test */ + dp_test_pak_receive(test_pak, "dp2T1", test_exp); + + /* After test validations */ + dp_test_npf_verify_rule_pkt_count(NULL, &fw, fw.rules[0].rule, 1); + + /* Clean Up */ + dp_test_npf_fw_del(&fw, false); + + cgnat_policy_del("POLICY1", 10, "dp2T1"); + dp_test_npf_cmd_fmt(false, "nat-ut pool delete POOL1"); + + cgnat_icmpv4_teardown(); +} DP_END_TEST; + +DP_START_TEST(cgnat_icmpv4, drop) +{ + struct dp_test_expected *test_exp = NULL; + struct rte_mbuf *test_pak = NULL; + + cgnat_icmpv4_setup(&test_exp, &test_pak); + + dp_test_npf_cmd("nat-ut pool add POOL1 " + "type=cgnat " + "prefix=RANGE2/1.1.1.11/32", false); + + cgnat_policy_add("POLICY1", 10, "100.64.0.0/12", "POOL1", "dp2T1", + CGN_MAP_EIM, CGN_FLTR_EIF, CGN_3TUPLE, true); + + struct dp_test_npf_rule_t rules[] = { + { + .rule = "1", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "proto-final=1 rproc=markdscp(12)"}, + RULE_DEF_BLOCK, + NULL_RULE }; + + struct dp_test_npf_ruleset_t fw = { + .rstype = "originate", + .name = "FW_ICMPv4_ORIG", + .enable = 1, + .attach_point = "dp2T1", + .fwd = FWD, + .dir = "out", + .rules = rules + }; + dp_test_npf_fw_add(&fw, false); + + /* Set test expectations */ + + dp_test_exp_set_oif_name(test_exp, "dp2T1"); + dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_DROPPED); + + /* Run test */ + dp_test_pak_receive(test_pak, "dp2T1", test_exp); + + /* After test validations */ + dp_test_npf_verify_rule_pkt_count(NULL, &fw, fw.rules[0].rule, 1); + + /* Clean Up */ + dp_test_npf_fw_del(&fw, false); + + cgnat_policy_del("POLICY1", 10, "dp2T1"); + dp_test_npf_cmd_fmt(false, "nat-ut pool delete POOL1"); + + cgnat_icmpv4_teardown(); +} DP_END_TEST; + +/* + * IPv6 ND and originate firewall + * + * Inject an IPv6 Neighbor Solicitation pkt in order to generate a Neighbor + * Advertisement pkt. + */ +DP_DECL_TEST_CASE(npf_local, npf_v6nbr1, NULL, NULL); +DP_START_TEST(npf_v6nbr1, test) +{ + struct dp_test_expected *exp; + struct rte_mbuf *ns_pak; + struct rte_mbuf *exp_na_pak; + const char *neigh1_mac_str = "aa:bb:cc:dd:ee:10"; + const char *host_ll_ip = "fe80::409f:1ff:fee8:101"; + const char *router_ll_ip = "fe80::5054:ff:fe79:3f5"; + const char *router_ll_ip_subnet = "fe80::5054:ff:fe79:3f5/64"; + + /* Set up the interface addresses */ + dp_test_netlink_add_ip_address("dp1T0", router_ll_ip_subnet); + dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + + /* And the neighbour for the return icmp packet */ + dp_test_netlink_add_neigh("dp1T0", host_ll_ip, neigh1_mac_str); + + struct dp_test_npf_rule_t rules[] = { + { + /* Router Solicitation */ + .rule = "10", + .pass = PASS, + .stateful = STATELESS, + .npf = "proto=58 icmpv6=133" + }, + { + /* Router Advertisement */ + .rule = "20", + .pass = PASS, + .stateful = STATELESS, + .npf = "proto=58 icmpv6=134" + }, + { + /* Neighbor Solicitation */ + .rule = "30", + .pass = PASS, + .stateful = STATELESS, + .npf = "proto=58 icmpv6=135" + }, + { + /* Neighbor Advertisement */ +#define NA_RULE_INDEX 3 + .rule = "40", + .pass = PASS, + .stateful = STATELESS, + .npf = "proto=58 icmpv6=136" + }, + RULE_DEF_BLOCK, + NULL_RULE + }; + + struct dp_test_npf_ruleset_t fw = { + .rstype = "originate", + .name = "FW_ORIG", + .enable = 1, + .attach_point = "dp1T0", + .fwd = FWD, + .dir = "out", + .rules = rules + }; + dp_test_npf_fw_add(&fw, false); + + /* + * Test packet + */ + ns_pak = dp_test_create_ns_pak(host_ll_ip, "ff02::1:ff00:2", + IPTOS_CLASS_CS6, neigh1_mac_str, "33:33:ff:00:00:02", + "2001:1:1::1"); + + /* + * Expected packet + */ + exp_na_pak = dp_test_create_na_pak(router_ll_ip, host_ll_ip, + IPTOS_CLASS_CS6, dp_test_intf_name2mac_str("dp1T0"), + neigh1_mac_str, "2001:1:1::1"); + + exp = dp_test_exp_create(exp_na_pak); + rte_pktmbuf_free(exp_na_pak); + + /* Set test expectations */ + dp_test_exp_set_oif_name(exp, "dp1T0"); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_FORWARDED); + + /* Run test */ + dp_test_pak_receive(ns_pak, "dp1T0", exp); + + /* + * Change firewall to drop NA packet + */ + dp_test_npf_fw_del(&fw, false); + rules[NA_RULE_INDEX].pass = BLOCK; + dp_test_npf_fw_add(&fw, false); + + /* + * Test packet + */ + ns_pak = dp_test_create_ns_pak(host_ll_ip, "ff02::1:ff00:2", + IPTOS_CLASS_CS6, neigh1_mac_str, "33:33:ff:00:00:02", + "2001:1:1::1"); + + /* + * Expected packet + */ + exp_na_pak = dp_test_create_na_pak(router_ll_ip, host_ll_ip, + IPTOS_CLASS_CS6, dp_test_intf_name2mac_str("dp1T0"), + neigh1_mac_str, "2001:1:1::1"); + + exp = dp_test_exp_create(exp_na_pak); + rte_pktmbuf_free(exp_na_pak); + + /* Set test expectations */ + dp_test_exp_set_oif_name(exp, "dp1T0"); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); + + /* Run test */ + dp_test_pak_receive(ns_pak, "dp1T0", exp); + + /* Clean Up */ + dp_test_npf_fw_del(&fw, false); + + dp_test_netlink_del_neigh("dp1T0", host_ll_ip, neigh1_mac_str); + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_netlink_del_ip_address("dp1T0", router_ll_ip_subnet); +} DP_END_TEST; + +/* + * IPv6 ND, zone on interface, no local zone. + * + * Inject an IPv6 Neighbor Solicitation pkt in order to generate a Neighbor + * Advertisement pkt. + * + * The ND packet is forwarded. Normally a "no-zone to zone" transition would + * be blocked. However an exception is made because the flag NPF_FLAG_FROM_US + * is set. + */ +DP_DECL_TEST_CASE(npf_local, npf_v6nbr2, NULL, NULL); +DP_START_TEST(npf_v6nbr2, test) +{ + struct dp_test_expected *exp; + struct rte_mbuf *ns_pak; + struct rte_mbuf *exp_na_pak; + const char *neigh1_mac_str = "aa:bb:cc:dd:ee:10"; + const char *host_ll_ip = "fe80::409f:1ff:fee8:101"; + const char *router_ll_ip = "fe80::5054:ff:fe79:3f5"; + const char *router_ll_ip_subnet = "fe80::5054:ff:fe79:3f5/64"; + bool debug = false; + + /* Set up the interface addresses */ + dp_test_netlink_add_ip_address("dp1T0", router_ll_ip_subnet); + dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + + /* And the neighbour for the return icmp packet */ + dp_test_netlink_add_neigh("dp1T0", host_ll_ip, neigh1_mac_str); + + struct dpt_zone_cfg cfg = { + .private = { + .name = "PRIVATE", + .intf = { "dp1T0", NULL }, + .local = false, + }, + .public = { + .name = "PUBLIC", + .intf = { "dp2T1", NULL }, + .local = false, + }, + .local = { 0 }, + .pub_to_priv = { + .name = "PUB_TO_PRIV", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "", + }, + .priv_to_pub = { + .name = "PRIV_TO_PUB", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .local_to_priv = { 0 }, + .priv_to_local = {0 }, + .local_to_pub = { 0 }, + .pub_to_local = { 0 }, + }; + + dpt_zone_cfg(&cfg, true, debug); + + /* + * Test packet + */ + ns_pak = dp_test_create_ns_pak(host_ll_ip, "ff02::1:ff00:2", + IPTOS_CLASS_CS6, neigh1_mac_str, "33:33:ff:00:00:02", + "2001:1:1::1"); + + /* + * Expected packet + */ + exp_na_pak = dp_test_create_na_pak(router_ll_ip, host_ll_ip, + IPTOS_CLASS_CS6, dp_test_intf_name2mac_str("dp1T0"), + neigh1_mac_str, "2001:1:1::1"); + + exp = dp_test_exp_create(exp_na_pak); + rte_pktmbuf_free(exp_na_pak); + + /* Set test expectations */ + dp_test_exp_set_oif_name(exp, "dp1T0"); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_FORWARDED); + + /* Run test */ + dp_test_pak_receive(ns_pak, "dp1T0", exp); + + /* Clean Up */ + dpt_zone_cfg(&cfg, false, debug); + + dp_test_netlink_del_neigh("dp1T0", host_ll_ip, neigh1_mac_str); + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_netlink_del_ip_address("dp1T0", router_ll_ip_subnet); +} DP_END_TEST; + +/* + * IPv6 ND, zone on interface, local zone. Pass rule in LOCAL_TO_PRIV + * ruleset. + * + * Inject an IPv6 Neighbor Solicitation pkt in order to generate a Neighbor + * Advertisement pkt. + */ +DP_DECL_TEST_CASE(npf_local, npf_v6nbr3, NULL, NULL); +DP_START_TEST(npf_v6nbr3, test) +{ + struct dp_test_expected *exp; + struct rte_mbuf *ns_pak; + struct rte_mbuf *exp_na_pak; + const char *neigh1_mac_str = "aa:bb:cc:dd:ee:10"; + const char *host_ll_ip = "fe80::409f:1ff:fee8:101"; + const char *router_ll_ip = "fe80::5054:ff:fe79:3f5"; + const char *router_ll_ip_subnet = "fe80::5054:ff:fe79:3f5/64"; + bool debug = false; + + /* Set up the interface addresses */ + dp_test_netlink_add_ip_address("dp1T0", router_ll_ip_subnet); + dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + + /* And the neighbour for the return icmp packet */ + dp_test_netlink_add_neigh("dp1T0", host_ll_ip, neigh1_mac_str); + + struct dpt_zone_cfg cfg = { + .private = { + .name = "PRIVATE", + .intf = { "dp1T0", NULL }, + .local = false, + }, + .public = { + .name = "PUBLIC", + .intf = { "dp2T1", NULL }, + .local = false, + }, + .local = { + .name = "LOCAL", + .intf = { NULL }, + .local = true, + }, + .pub_to_priv = { + .name = "PUB_TO_PRIV", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "", + }, + .priv_to_pub = { + .name = "PRIV_TO_PUB", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .local_to_priv = { + .name = "LOCAL_TO_PRIV", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .priv_to_local = { + .name = "PRIV_TO_LOCAL", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .local_to_pub = { 0 }, + .pub_to_local = { 0 }, + }; + + dpt_zone_cfg(&cfg, true, debug); + + /* + * Test packet + */ + ns_pak = dp_test_create_ns_pak(host_ll_ip, "ff02::1:ff00:2", + IPTOS_CLASS_CS6, neigh1_mac_str, "33:33:ff:00:00:02", + "2001:1:1::1"); + + /* + * Expected packet + */ + exp_na_pak = dp_test_create_na_pak(router_ll_ip, host_ll_ip, + IPTOS_CLASS_CS6, dp_test_intf_name2mac_str("dp1T0"), + neigh1_mac_str, "2001:1:1::1"); + + exp = dp_test_exp_create(exp_na_pak); + rte_pktmbuf_free(exp_na_pak); + + /* Set test expectations */ + dp_test_exp_set_oif_name(exp, "dp1T0"); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_FORWARDED); + + /* Run test */ + dp_test_pak_receive(ns_pak, "dp1T0", exp); + + /* Clean Up */ + dpt_zone_cfg(&cfg, false, debug); + + dp_test_netlink_del_neigh("dp1T0", host_ll_ip, neigh1_mac_str); + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_netlink_del_ip_address("dp1T0", router_ll_ip_subnet); +} DP_END_TEST; + +/* + * IPv6 ND, zone on interface, local zone. Pass rule in LOCAL_TO_PRIV + * ruleset, but packet does not match. The UNMATCHED decision is overridden + * in npf_apply_firewall. + * + * Inject an IPv6 Neighbor Solicitation pkt in order to generate a Neighbor + * Advertisement pkt. + */ +DP_DECL_TEST_CASE(npf_local, npf_v6nbr4, NULL, NULL); +DP_START_TEST(npf_v6nbr4, test) +{ + struct dp_test_expected *exp; + struct rte_mbuf *ns_pak; + struct rte_mbuf *exp_na_pak; + const char *neigh1_mac_str = "aa:bb:cc:dd:ee:10"; + const char *host_ll_ip = "fe80::409f:1ff:fee8:101"; + const char *router_ll_ip = "fe80::5054:ff:fe79:3f5"; + const char *router_ll_ip_subnet = "fe80::5054:ff:fe79:3f5/64"; + bool debug = false; + + /* Set up the interface addresses */ + dp_test_netlink_add_ip_address("dp1T0", router_ll_ip_subnet); + dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + + /* And the neighbour for the return icmp packet */ + dp_test_netlink_add_neigh("dp1T0", host_ll_ip, neigh1_mac_str); + + struct dpt_zone_cfg cfg = { + .private = { + .name = "PRIVATE", + .intf = { "dp1T0", NULL }, + .local = false, + }, + .public = { + .name = "PUBLIC", + .intf = { "dp2T1", NULL }, + .local = false, + }, + .local = { + .name = "LOCAL", + .intf = { NULL }, + .local = true, + }, + .pub_to_priv = { + .name = "PUB_TO_PRIV", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "", + }, + .priv_to_pub = { + .name = "PRIV_TO_PUB", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .local_to_priv = { + .name = "LOCAL_TO_PRIV", + .pass = PASS, + .stateful = STATELESS, + .npf = "proto-final=1", + }, + .priv_to_local = { + .name = "PRIV_TO_LOCAL", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .local_to_pub = { 0 }, + .pub_to_local = { 0 }, + }; + + dpt_zone_cfg(&cfg, true, debug); + + /* + * Test packet + */ + ns_pak = dp_test_create_ns_pak(host_ll_ip, "ff02::1:ff00:2", + IPTOS_CLASS_CS6, neigh1_mac_str, "33:33:ff:00:00:02", + "2001:1:1::1"); + + /* + * Expected packet + */ + exp_na_pak = dp_test_create_na_pak(router_ll_ip, host_ll_ip, + IPTOS_CLASS_CS6, dp_test_intf_name2mac_str("dp1T0"), + neigh1_mac_str, "2001:1:1::1"); + + exp = dp_test_exp_create(exp_na_pak); + rte_pktmbuf_free(exp_na_pak); + + /* Set test expectations */ + dp_test_exp_set_oif_name(exp, "dp1T0"); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_FORWARDED); + + /* Run test */ + dp_test_pak_receive(ns_pak, "dp1T0", exp); + + /* Clean Up */ + dpt_zone_cfg(&cfg, false, debug); + + dp_test_netlink_del_neigh("dp1T0", host_ll_ip, neigh1_mac_str); + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_netlink_del_ip_address("dp1T0", router_ll_ip_subnet); +} DP_END_TEST; + +/* + * IPv6 ND, zone on interface, local zone. Block rule in LOCAL_TO_PRIV + * ruleset. However the block rule is overridden in npf_apply_firewall. + * + * Inject an IPv6 Neighbor Solicitation pkt in order to generate a Neighbor + * Advertisement pkt. + */ +DP_DECL_TEST_CASE(npf_local, npf_v6nbr5, NULL, NULL); +DP_START_TEST(npf_v6nbr5, test) +{ + struct dp_test_expected *exp; + struct rte_mbuf *ns_pak; + struct rte_mbuf *exp_na_pak; + const char *neigh1_mac_str = "aa:bb:cc:dd:ee:10"; + const char *host_ll_ip = "fe80::409f:1ff:fee8:101"; + const char *router_ll_ip = "fe80::5054:ff:fe79:3f5"; + const char *router_ll_ip_subnet = "fe80::5054:ff:fe79:3f5/64"; + bool debug = false; + + /* Set up the interface addresses */ + dp_test_netlink_add_ip_address("dp1T0", router_ll_ip_subnet); + dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + + /* And the neighbour for the return icmp packet */ + dp_test_netlink_add_neigh("dp1T0", host_ll_ip, neigh1_mac_str); + + struct dpt_zone_cfg cfg = { + .private = { + .name = "PRIVATE", + .intf = { "dp1T0", NULL }, + .local = false, + }, + .public = { + .name = "PUBLIC", + .intf = { "dp2T1", NULL }, + .local = false, + }, + .local = { + .name = "LOCAL", + .intf = { NULL }, + .local = true, + }, + .pub_to_priv = { + .name = "PUB_TO_PRIV", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "", + }, + .priv_to_pub = { + .name = "PRIV_TO_PUB", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .local_to_priv = { + .name = "LOCAL_TO_PRIV", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "", + }, + .priv_to_local = { + .name = "PRIV_TO_LOCAL", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .local_to_pub = { 0 }, + .pub_to_local = { 0 }, + }; + + dpt_zone_cfg(&cfg, true, debug); + + /* + * Test packet + * + * In this case the NPF_FLAG_FROM_US flag does *not* cause the BLOCK + * decision to be overidden in npf_apply_firewall. The + * NPF_FLAG_FROM_LOCAL and NPF_FLAG_FROM_ZONE take priority, and the + * BLOCK decision is adhered to, + */ + ns_pak = dp_test_create_ns_pak(host_ll_ip, "ff02::1:ff00:2", + IPTOS_CLASS_CS6, neigh1_mac_str, "33:33:ff:00:00:02", + "2001:1:1::1"); + + /* + * Expected packet + */ + exp_na_pak = dp_test_create_na_pak(router_ll_ip, host_ll_ip, + IPTOS_CLASS_CS6, dp_test_intf_name2mac_str("dp1T0"), + neigh1_mac_str, "2001:1:1::1"); + + exp = dp_test_exp_create(exp_na_pak); + rte_pktmbuf_free(exp_na_pak); + + /* Set test expectations */ + dp_test_exp_set_oif_name(exp, "dp1T0"); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_FORWARDED); + + /* Run test */ + dp_test_pak_receive(ns_pak, "dp1T0", exp); + + /* Clean Up */ + dpt_zone_cfg(&cfg, false, debug); + + dp_test_netlink_del_neigh("dp1T0", host_ll_ip, neigh1_mac_str); + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_netlink_del_ip_address("dp1T0", router_ll_ip_subnet); +} DP_END_TEST; + +/* + * IPv6 ND, zone on interface, local zone. No LOCAL_TO_PRIV ruleset. The + * implicit BLOCK is overridden in npf_get_zone_config. + * + * Inject an IPv6 Neighbor Solicitation pkt in order to generate a Neighbor + * Advertisement pkt. + */ +DP_DECL_TEST_CASE(npf_local, npf_v6nbr6, NULL, NULL); +DP_START_TEST(npf_v6nbr6, test) +{ + struct dp_test_expected *exp; + struct rte_mbuf *ns_pak; + struct rte_mbuf *exp_na_pak; + const char *neigh1_mac_str = "aa:bb:cc:dd:ee:10"; + const char *host_ll_ip = "fe80::409f:1ff:fee8:101"; + const char *router_ll_ip = "fe80::5054:ff:fe79:3f5"; + const char *router_ll_ip_subnet = "fe80::5054:ff:fe79:3f5/64"; + bool debug = false; + + /* Set up the interface addresses */ + dp_test_netlink_add_ip_address("dp1T0", router_ll_ip_subnet); + dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + + /* And the neighbour for the return icmp packet */ + dp_test_netlink_add_neigh("dp1T0", host_ll_ip, neigh1_mac_str); + + struct dpt_zone_cfg cfg = { + .private = { + .name = "PRIVATE", + .intf = { "dp1T0", NULL }, + .local = false, + }, + .public = { + .name = "PUBLIC", + .intf = { "dp2T1", NULL }, + .local = false, + }, + .local = { + .name = "LOCAL", + .intf = { NULL }, + .local = true, + }, + .pub_to_priv = { + .name = "PUB_TO_PRIV", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "", + }, + .priv_to_pub = { + .name = "PRIV_TO_PUB", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .local_to_priv = { 0 }, + .priv_to_local = { + .name = "PRIV_TO_LOCAL", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .local_to_pub = { 0 }, + .pub_to_local = { 0 }, + }; + + dpt_zone_cfg(&cfg, true, debug); + + /* + * Test packet + */ + ns_pak = dp_test_create_ns_pak(host_ll_ip, "ff02::1:ff00:2", + IPTOS_CLASS_CS6, neigh1_mac_str, "33:33:ff:00:00:02", + "2001:1:1::1"); + + /* + * Expected packet + */ + exp_na_pak = dp_test_create_na_pak(router_ll_ip, host_ll_ip, + IPTOS_CLASS_CS6, dp_test_intf_name2mac_str("dp1T0"), + neigh1_mac_str, "2001:1:1::1"); + + exp = dp_test_exp_create(exp_na_pak); + rte_pktmbuf_free(exp_na_pak); + + /* Set test expectations */ + dp_test_exp_set_oif_name(exp, "dp1T0"); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_FORWARDED); + + /* Run test */ + dp_test_pak_receive(ns_pak, "dp1T0", exp); + + /* Clean Up */ + dpt_zone_cfg(&cfg, false, debug); + + dp_test_netlink_del_neigh("dp1T0", host_ll_ip, neigh1_mac_str); + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:1:1::1/64"); + dp_test_netlink_del_ip_address("dp1T0", router_ll_ip_subnet); +} DP_END_TEST; + +/* + * ICMP originated packets with the originate firewall. + * + * Test generate ICMP message upon packet too big with don't fragment + * flag set (from dp_test_ip_icmp.c, DP_START_TEST(ip_icmp, df)) + * + * Two ICMP packets are generated. First one is allowed out via a PASS rule. + * Second one is dropped by a BLOCK rule. + * + * Exercises the ip_output code path. + */ +DP_DECL_TEST_CASE(npf_local, npf_icmp_orig1, NULL, NULL); +DP_START_TEST(npf_icmp_orig1, test) +{ + struct dp_test_expected *exp; + struct rte_mbuf *icmp_pak; + struct rte_mbuf *test_pak; + const char *neigh3_mac_str = "aa:bb:cc:dd:ee:ff"; + const char *neigh1_mac_str = "bb:aa:cc:ee:dd:ff"; + struct iphdr *ip_inner; + struct icmphdr *icph; + struct iphdr *ip; + int len = 1472; + int icmplen; + + /* Set up the interface addresses */ + dp_test_nl_add_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp3T3", "2.2.2.2/24"); + + dp_test_netlink_set_interface_mtu("dp1T1", 1400); + + /* Add the nh arp we want the packet to follow */ + dp_test_netlink_add_neigh("dp3T3", "2.2.2.1", neigh3_mac_str); + dp_test_netlink_add_neigh("dp1T1", "1.1.1.2", neigh1_mac_str); + + /* Add originate firewall rule */ + struct dp_test_npf_rule_t rules[] = { + { + .rule = "1", + .pass = PASS, + .stateful = STATELESS, + .npf = "proto-base=1", + }, + RULE_DEF_BLOCK, + NULL_RULE + }; + + struct dp_test_npf_ruleset_t fw = { + .rstype = "originate", + .name = "FW_ORIG", + .enable = 1, + .attach_point = "dp3T3", + .fwd = FWD, + .dir = "out", + .rules = rules + }; + dp_test_npf_fw_add(&fw, false); + + /* Create pak to match the route added above */ + test_pak = dp_test_create_ipv4_pak("2.2.2.1", "1.1.1.2", 1, &len); + ip = iphdr(test_pak); + dp_test_set_pak_ip_field(ip, DP_TEST_SET_DF, 1); + + (void)dp_test_pktmbuf_eth_init(test_pak, + dp_test_intf_name2mac_str("dp3T3"), + neigh3_mac_str, RTE_ETHER_TYPE_IPV4); + + /* + * Expected packet + */ + /* Create expected icmp packet */ + icmplen = sizeof(struct iphdr) + 576; + icmp_pak = dp_test_create_icmp_ipv4_pak("2.2.2.2", "2.2.2.1", + ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, + DPT_ICMP_FRAG_DATA(1400), + 1, &icmplen, iphdr(test_pak), + &ip, &icph); + (void)dp_test_pktmbuf_eth_init(icmp_pak, + neigh3_mac_str, + dp_test_intf_name2mac_str("dp3T3"), + RTE_ETHER_TYPE_IPV4); + + dp_test_set_pak_ip_field(ip, DP_TEST_SET_TOS, + IPTOS_PREC_INTERNETCONTROL); + + ip_inner = (struct iphdr *)(icph + 1); + + /* The TTL allowed to be changed from the original */ + dp_test_set_pak_ip_field(ip_inner, DP_TEST_SET_TTL, + DP_TEST_PAK_DEFAULT_TTL - 1); + + exp = dp_test_exp_create(icmp_pak); + rte_pktmbuf_free(icmp_pak); + + dp_test_exp_set_oif_name(exp, "dp3T3"); + + /* now send test pak and check we get expected back */ + dp_test_pak_receive(test_pak, "dp3T3", exp); + + /* + * Repeat + */ + dp_test_npf_fw_del(&fw, false); + rules[0].pass = BLOCK; + dp_test_npf_fw_add(&fw, false); + + /* Create pak to match the route added above */ + test_pak = dp_test_create_ipv4_pak("2.2.2.1", "1.1.1.2", 1, &len); + ip = iphdr(test_pak); + dp_test_set_pak_ip_field(ip, DP_TEST_SET_DF, 1); + + (void)dp_test_pktmbuf_eth_init(test_pak, + dp_test_intf_name2mac_str("dp3T3"), + neigh3_mac_str, RTE_ETHER_TYPE_IPV4); + + /* + * Expected packet + */ + /* Create expected icmp packet */ + icmplen = sizeof(struct iphdr) + 576; + icmp_pak = dp_test_create_icmp_ipv4_pak("2.2.2.2", "2.2.2.1", + ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, + DPT_ICMP_FRAG_DATA(1400), + 1, &icmplen, iphdr(test_pak), + &ip, &icph); + (void)dp_test_pktmbuf_eth_init(icmp_pak, + neigh3_mac_str, + dp_test_intf_name2mac_str("dp3T3"), + RTE_ETHER_TYPE_IPV4); + + dp_test_set_pak_ip_field(ip, DP_TEST_SET_TOS, + IPTOS_PREC_INTERNETCONTROL); + + ip_inner = (struct iphdr *)(icph + 1); + + /* The TTL allowed to be changed from the original */ + dp_test_set_pak_ip_field(ip_inner, DP_TEST_SET_TTL, + DP_TEST_PAK_DEFAULT_TTL - 1); + + exp = dp_test_exp_create(icmp_pak); + rte_pktmbuf_free(icmp_pak); + + dp_test_exp_set_oif_name(exp, "dp3T3"); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); + + /* now send test pak and check we get expected back */ + dp_test_pak_receive(test_pak, "dp3T3", exp); + + + /* Clean Up */ + dp_test_npf_fw_del(&fw, false); + + dp_test_netlink_del_neigh("dp3T3", "2.2.2.1", neigh3_mac_str); + dp_test_netlink_del_neigh("dp1T1", "1.1.1.2", neigh1_mac_str); + dp_test_nl_del_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp3T3", "2.2.2.2/24"); + + dp_test_netlink_set_interface_mtu("dp1T1", 1500); +} DP_END_TEST; + +/* + * ICMP originated packets with egress ACLs. + * + * Test generate ICMP message upon packet too big with don't fragment + * flag set (from dp_test_ip_icmp.c, DP_START_TEST(ip_icmp, df)) + * + * Two ICMP packets are generated. First one is allowed out via a PASS rule. + * Second one is dropped by a BLOCK rule. + * + * Exercises the ip_output code path. + */ +DP_DECL_TEST_CASE(npf_local, npf_icmp_orig2, NULL, NULL); +DP_START_TEST(npf_icmp_orig2, test) +{ + struct dp_test_expected *exp; + struct rte_mbuf *icmp_pak; + struct rte_mbuf *test_pak; + const char *neigh3_mac_str = "aa:bb:cc:dd:ee:ff"; + const char *neigh1_mac_str = "bb:aa:cc:ee:dd:ff"; + struct iphdr *ip_inner; + struct icmphdr *icph; + struct iphdr *ip; + int len = 1472; + int icmplen; + + /* Set up the interface addresses */ + dp_test_nl_add_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp3T3", "2.2.2.2/24"); + + dp_test_netlink_set_interface_mtu("dp1T1", 1400); + + /* Add the nh arp we want the packet to follow */ + dp_test_netlink_add_neigh("dp3T3", "2.2.2.1", neigh3_mac_str); + dp_test_netlink_add_neigh("dp1T1", "1.1.1.2", neigh1_mac_str); + + /* Add egress ACL */ + dp_test_npf_cmd("npf-ut add acl:v4test 0 family=inet", false); + dp_test_npf_cmd("npf-ut add acl:v4test 10 " + "proto-base=1 " + "action=accept", false); + dp_test_npf_cmd("npf-ut attach interface:dpT33 acl-out acl:v4test", + false); + dp_test_npf_cmd("npf-ut commit", false); + + /* Create pak to match the route added above */ + test_pak = dp_test_create_ipv4_pak("2.2.2.1", "1.1.1.2", 1, &len); + ip = iphdr(test_pak); + dp_test_set_pak_ip_field(ip, DP_TEST_SET_DF, 1); + + (void)dp_test_pktmbuf_eth_init(test_pak, + dp_test_intf_name2mac_str("dp3T3"), + neigh3_mac_str, RTE_ETHER_TYPE_IPV4); + + /* + * Expected packet + */ + /* Create expected icmp packet */ + icmplen = sizeof(struct iphdr) + 576; + icmp_pak = dp_test_create_icmp_ipv4_pak("2.2.2.2", "2.2.2.1", + ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, + DPT_ICMP_FRAG_DATA(1400), + 1, &icmplen, iphdr(test_pak), + &ip, &icph); + (void)dp_test_pktmbuf_eth_init(icmp_pak, + neigh3_mac_str, + dp_test_intf_name2mac_str("dp3T3"), + RTE_ETHER_TYPE_IPV4); + + dp_test_set_pak_ip_field(ip, DP_TEST_SET_TOS, + IPTOS_PREC_INTERNETCONTROL); + + ip_inner = (struct iphdr *)(icph + 1); + + /* The TTL allowed to be changed from the original */ + dp_test_set_pak_ip_field(ip_inner, DP_TEST_SET_TTL, + DP_TEST_PAK_DEFAULT_TTL - 1); + + exp = dp_test_exp_create(icmp_pak); + rte_pktmbuf_free(icmp_pak); + + dp_test_exp_set_oif_name(exp, "dp3T3"); + + /* now send test pak and check we get expected back */ + dp_test_pak_receive(test_pak, "dp3T3", exp); + + /* + * Repeat + */ + dp_test_npf_cmd("npf-ut add acl:v4test 10 " + "proto-base=1 " + "action=drop", false); + dp_test_npf_cmd("npf-ut commit", false); + + /* Create pak to match the route added above */ + test_pak = dp_test_create_ipv4_pak("2.2.2.1", "1.1.1.2", 1, &len); + ip = iphdr(test_pak); + dp_test_set_pak_ip_field(ip, DP_TEST_SET_DF, 1); + + (void)dp_test_pktmbuf_eth_init(test_pak, + dp_test_intf_name2mac_str("dp3T3"), + neigh3_mac_str, RTE_ETHER_TYPE_IPV4); + + /* + * Expected packet + */ + /* Create expected icmp packet */ + icmplen = sizeof(struct iphdr) + 576; + icmp_pak = dp_test_create_icmp_ipv4_pak("2.2.2.2", "2.2.2.1", + ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, + DPT_ICMP_FRAG_DATA(1400), + 1, &icmplen, iphdr(test_pak), + &ip, &icph); + (void)dp_test_pktmbuf_eth_init(icmp_pak, + neigh3_mac_str, + dp_test_intf_name2mac_str("dp3T3"), + RTE_ETHER_TYPE_IPV4); + + dp_test_set_pak_ip_field(ip, DP_TEST_SET_TOS, + IPTOS_PREC_INTERNETCONTROL); + + ip_inner = (struct iphdr *)(icph + 1); + + /* The TTL allowed to be changed from the original */ + dp_test_set_pak_ip_field(ip_inner, DP_TEST_SET_TTL, + DP_TEST_PAK_DEFAULT_TTL - 1); + + exp = dp_test_exp_create(icmp_pak); + rte_pktmbuf_free(icmp_pak); + + dp_test_exp_set_oif_name(exp, "dp3T3"); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); + + /* now send test pak and check we get expected back */ + dp_test_pak_receive(test_pak, "dp3T3", exp); + + + /* Clean Up */ + dp_test_npf_cmd("npf-ut detach interface:dpT33 acl-out acl:v4test", + false); + dp_test_npf_cmd("npf-ut delete acl:v4test", false); + dp_test_npf_cmd("npf-ut commit", false); + + dp_test_netlink_del_neigh("dp3T3", "2.2.2.1", neigh3_mac_str); + dp_test_netlink_del_neigh("dp1T1", "1.1.1.2", neigh1_mac_str); + dp_test_nl_del_ip_addr_and_connected("dp1T1", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp3T3", "2.2.2.2/24"); + + dp_test_netlink_set_interface_mtu("dp1T1", 1500); +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_npf_mbuf.c b/tests/whole_dp/src/dp_test_npf_mbuf.c index c661e301..96843bb8 100644 --- a/tests/whole_dp/src/dp_test_npf_mbuf.c +++ b/tests/whole_dp/src/dp_test_npf_mbuf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * @@ -21,11 +21,11 @@ #include "dp_test.h" #include "dp_test_controller.h" #include "dp_test_console.h" -#include "dp_test_netlink_state.h" -#include "dp_test_cmd_check.h" -#include "dp_test_lib.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test/dp_test_cmd_check.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_lib_exp.h" #include "dp_test_lib_pkt.h" #include "dp_test_npf_lib.h" @@ -34,6 +34,7 @@ #include "npf/nat/nat_pool_public.h" #include "npf/cgnat/cgn.h" +#include "npf/cgnat/cgn_test.h" #include "npf/apm/apm.h" #include "npf/cgnat/cgn_limits.h" #include "npf/cgnat/cgn_policy.h" @@ -82,7 +83,7 @@ DP_START_TEST(npf_mbuf1, test) dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), "aa:bb:cc:dd:1:a1", - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); @@ -91,7 +92,7 @@ DP_START_TEST(npf_mbuf1, test) (void)dp_test_pktmbuf_eth_init( dp_test_exp_get_pak(exp), "aa:bb:cc:dd:2:b1", dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); @@ -139,7 +140,7 @@ DP_START_TEST(npf_mbuf2, test) .rule = "10", .pass = PASS, .stateful = false, - .npf = "proto=6 src-port=49152 dst-port=80 " + .npf = "proto-final=6 src-port=49152 dst-port=80 " "src-addr=100.64.0.1 dst-addr=1.1.1.1" }, RULE_DEF_BLOCK, @@ -167,7 +168,7 @@ DP_START_TEST(npf_mbuf2, test) dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), "aa:bb:cc:dd:1:a1", - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); @@ -176,7 +177,7 @@ DP_START_TEST(npf_mbuf2, test) (void)dp_test_pktmbuf_eth_init( dp_test_exp_get_pak(exp), "aa:bb:cc:dd:2:b1", dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); @@ -227,6 +228,7 @@ DP_START_TEST(npf_mbuf3, test) .ifname = "dp2T1", .proto = IPPROTO_TCP, .map = "dynamic", + .port_alloc = NULL, .from_addr = "100.64.0.1", .from_port = NULL, .to_addr = NULL, @@ -246,7 +248,7 @@ DP_START_TEST(npf_mbuf3, test) dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), "aa:bb:cc:dd:1:a1", - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp_pak = dp_test_create_tcp_ipv4_pak( "1.1.1.13", "1.1.1.1", 0x412, 80, TH_SYN, @@ -255,7 +257,7 @@ DP_START_TEST(npf_mbuf3, test) dp_test_pktmbuf_eth_init(exp_pak, dp_test_intf_name2mac_str("dp1T0"), "aa:bb:cc:dd:1:a1", - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(exp_pak); rte_pktmbuf_free(exp_pak); @@ -265,7 +267,7 @@ DP_START_TEST(npf_mbuf3, test) (void)dp_test_pktmbuf_eth_init( dp_test_exp_get_pak(exp), "aa:bb:cc:dd:2:b1", dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); @@ -297,52 +299,6 @@ DP_START_TEST(npf_mbuf3, test) } DP_END_TEST; -static void -cgnat_policy_add(const char *policy, uint pri, const char *src, - const char *pool, const char *intf, - enum cgn_map_type eim, enum cgn_fltr_type eif, - bool log_sess, bool check_feat) -{ - char real_ifname[IFNAMSIZ]; - - dp_test_intf_real(intf, real_ifname); - - /* Add cgnat policy */ - dp_test_npf_cmd_fmt(false, - "cgn-ut policy add %s priority=%u " - "src-addr=%s pool=%s log-sess-all=%s", - policy, pri, src, pool, - log_sess ? "yes" : "no"); - - dp_test_npf_cmd_fmt(false, - "cgn-ut policy attach name=%s intf=%s", - policy, real_ifname); - - /* Check cgnat feature is enabled */ - if (check_feat) { - dp_test_wait_for_pl_feat(intf, "vyatta:ipv4-cgnat-in", - "ipv4-validate"); - dp_test_wait_for_pl_feat(intf, "vyatta:ipv4-cgnat-out", - "ipv4-out"); - } -} - -static void -cgnat_policy_del(const char *policy, uint pri, const char *intf) -{ - char real_ifname[IFNAMSIZ]; - - dp_test_intf_real(intf, real_ifname); - - dp_test_npf_cmd_fmt(false, - "cgn-ut policy detach name=%s intf=%s", - policy, real_ifname); - - /* Delete cgnat policy */ - dp_test_npf_cmd_fmt(false, - "cgn-ut policy delete %s", policy); -} - /* * npf_mbuf4 -- CGNAT */ @@ -362,7 +318,8 @@ DP_START_TEST(npf_mbuf4, test) "address-range=RANGE1/1.1.1.13-1.1.1.13"); cgnat_policy_add("POLICY1", 10, "100.64.0.0/24", "POOL1", - "dp2T1", CGN_MAP_EIM, CGN_FLTR_EIF, false, true); + "dp2T1", CGN_MAP_EIM, CGN_FLTR_EIF, + CGN_3TUPLE, true); for (copy_bytes = 0; copy_bytes < copy_max; copy_bytes++) { @@ -373,7 +330,7 @@ DP_START_TEST(npf_mbuf4, test) dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), "aa:bb:cc:dd:1:a1", - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp_pak = dp_test_create_tcp_ipv4_pak( "1.1.1.13", "1.1.1.1", 1024, 80, TH_SYN, @@ -382,7 +339,7 @@ DP_START_TEST(npf_mbuf4, test) dp_test_pktmbuf_eth_init(exp_pak, dp_test_intf_name2mac_str("dp1T0"), "aa:bb:cc:dd:1:a1", - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(exp_pak); rte_pktmbuf_free(exp_pak); @@ -392,7 +349,7 @@ DP_START_TEST(npf_mbuf4, test) (void)dp_test_pktmbuf_eth_init( dp_test_exp_get_pak(exp), "aa:bb:cc:dd:2:b1", dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); diff --git a/tests/whole_dp/src/dp_test_npf_nat.c b/tests/whole_dp/src/dp_test_npf_nat.c index 4a817b4f..4ac76ec9 100644 --- a/tests/whole_dp/src/dp_test_npf_nat.c +++ b/tests/whole_dp/src/dp_test_npf_nat.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -18,13 +18,13 @@ #include "dp_test.h" #include "dp_test_controller.h" #include "dp_test_cmd_state.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" #include "dp_test_str.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_pkt.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_sess_lib.h" @@ -74,9 +74,9 @@ DP_DECL_TEST_SUITE(npf_nat); * Dest 172.0.2.1 changed to 10.0.1.1 * */ -DP_DECL_TEST_CASE(npf_nat, npf_snat, NULL, NULL); +DP_DECL_TEST_CASE(npf_nat, snat1, NULL, NULL); -DP_START_TEST(npf_snat, test1) +DP_START_TEST(snat1, test1) { struct dp_test_pkt_desc_t *pre, *post; struct rte_mbuf *pre_pak, *post_pak; @@ -100,6 +100,7 @@ DP_START_TEST(npf_snat, test1) .ifname = "dp2T1", .proto = IPPROTO_TCP, .map = "dynamic", + .port_alloc = NULL, .from_addr = "10.0.1.1", .from_port = NULL, .to_addr = NULL, @@ -132,7 +133,7 @@ DP_START_TEST(npf_snat, test1) struct dp_test_pkt_desc_t v4_pktA_pre = { .text = "Forw, Host1 Ins -> Host3 Outs, pre-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.0.1.1", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "172.0.2.3", @@ -156,7 +157,7 @@ DP_START_TEST(npf_snat, test1) struct dp_test_pkt_desc_t v4_pktA_post = { .text = "Forw, Host1 Ins -> Host3 Outs, post-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "172.0.2.1", .l2_src = dp_test_intf_name2mac_str("dp2T1"), .l3_dst = "172.0.2.3", @@ -198,7 +199,7 @@ DP_START_TEST(npf_snat, test1) /* Verify pkt count */ dp_test_npf_snat_verify_pkts(snat.ifname, snat.rule, 1); - dp_test_npf_portmap_port_verify("172.0.2.1", pre->l4.tcp.sport); + dp_test_npf_portmap_port_verify("tcp", "172.0.2.1", pre->l4.tcp.sport); /***************************************************************** @@ -208,7 +209,7 @@ DP_START_TEST(npf_snat, test1) struct dp_test_pkt_desc_t v4_pktB_pre = { .text = "Back, Host3 Outs -> Host1 Ins, pre-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "172.0.2.3", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "172.0.2.1", @@ -232,7 +233,7 @@ DP_START_TEST(npf_snat, test1) struct dp_test_pkt_desc_t v4_pktB_post = { .text = "Back, Host3 Outs -> Host1 Ins, post-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "172.0.2.3", .l2_src = dp_test_intf_name2mac_str("dp1T0"), .l3_dst = "10.0.1.1", @@ -331,9 +332,9 @@ DP_START_TEST(npf_snat, test1) * Dest 172.0.2.254, port y changed to 10.0.1.x * */ -DP_DECL_TEST_CASE(npf_nat, npf_snat_masquerade, NULL, NULL); +DP_DECL_TEST_CASE(npf_nat, snat2, NULL, NULL); -DP_START_TEST(npf_snat_masquerade, test1) +DP_START_TEST(snat2, test1) { struct dp_test_pkt_desc_t *pre, *post; struct rte_mbuf *pre_pak, *post_pak; @@ -358,6 +359,7 @@ DP_START_TEST(npf_snat_masquerade, test1) .ifname = "dp2T1", .proto = NAT_NULL_PROTO, .map = "dynamic", + .port_alloc = NULL, .from_addr = "10.0.1.0/24", .from_port = NULL, .to_addr = NULL, @@ -393,7 +395,7 @@ DP_START_TEST(npf_snat_masquerade, test1) struct dp_test_pkt_desc_t v4_pkt1_pre = { .text = "Forw, Ins host1 -> Outs host3, Pre-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.0.1.1", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "172.0.2.3", @@ -417,7 +419,7 @@ DP_START_TEST(npf_snat_masquerade, test1) struct dp_test_pkt_desc_t v4_pkt1_post = { .text = "Forw, Ins host1 -> Outs host3, Post-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "172.0.2.254", .l2_src = dp_test_intf_name2mac_str("dp2T1"), .l3_dst = "172.0.2.3", @@ -459,8 +461,9 @@ DP_START_TEST(npf_snat_masquerade, test1) /* Verify pkt count */ dp_test_npf_snat_verify_pkts(snat.ifname, snat.rule, 1); - dp_test_npf_portmap_verify("172.0.2.254", "ACTIVE", 1); - dp_test_npf_portmap_port_verify("172.0.2.254", pre->l4.tcp.sport); + dp_test_npf_portmap_verify("tcp", "172.0.2.254", "ACTIVE", 1); + dp_test_npf_portmap_port_verify("tcp", "172.0.2.254", + pre->l4.tcp.sport); /******************************************************************* @@ -472,7 +475,7 @@ DP_START_TEST(npf_snat_masquerade, test1) struct dp_test_pkt_desc_t v4_pkt2_pre = { .text = "Back, Outs host3 -> Outs UUT intf, Pre-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "172.0.2.3", .l2_src = "aa:bb:cc:dd:1:a3", .l3_dst = "172.0.2.254", @@ -496,7 +499,7 @@ DP_START_TEST(npf_snat_masquerade, test1) struct dp_test_pkt_desc_t v4_pkt2_post = { .text = "Back, Outs host -> Outs UUT intf, Post-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "172.0.2.3", .l2_src = dp_test_intf_name2mac_str("dp1T0"), .l3_dst = "10.0.1.1", @@ -581,7 +584,7 @@ DP_START_TEST(npf_snat_masquerade, test1) struct dp_test_pkt_desc_t v4_pkt4_pre = { .text = "Forw, Ins host2 p1000 -> Outs host3, Pre-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.0.1.2", .l2_src = "aa:bb:cc:dd:2:b2", .l3_dst = "172.0.2.3", @@ -605,7 +608,7 @@ DP_START_TEST(npf_snat_masquerade, test1) struct dp_test_pkt_desc_t v4_pkt4_post = { .text = "Forw, Ins host2 p1000 -> Outs host3, Post-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "172.0.2.254", .l2_src = dp_test_intf_name2mac_str("dp2T1"), .l3_dst = "172.0.2.3", @@ -647,8 +650,9 @@ DP_START_TEST(npf_snat_masquerade, test1) /* Verify pkt count */ dp_test_npf_snat_verify_pkts(snat.ifname, snat.rule, 4); - dp_test_npf_portmap_verify("172.0.2.254", "ACTIVE", 2); - dp_test_npf_portmap_port_verify("172.0.2.254", post->l4.tcp.sport); + dp_test_npf_portmap_verify("tcp", "172.0.2.254", "ACTIVE", 2); + dp_test_npf_portmap_port_verify("tcp", "172.0.2.254", + post->l4.tcp.sport); /******************************************************************* @@ -667,7 +671,7 @@ DP_START_TEST(npf_snat_masquerade, test1) struct dp_test_pkt_desc_t v4_pkt5_pre = { .text = "Back, Outs host3 -> Outs UUT p1001, Pre-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "172.0.2.3", .l2_src = "aa:bb:cc:dd:1:a3", .l3_dst = "172.0.2.254", @@ -691,7 +695,7 @@ DP_START_TEST(npf_snat_masquerade, test1) struct dp_test_pkt_desc_t v4_pkt5_post = { .text = "Back, Outs n1 -> Outs UUT p1001, Post-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "172.0.2.3", .l2_src = dp_test_intf_name2mac_str("dp1T0"), .l3_dst = "10.0.1.2", @@ -761,9 +765,9 @@ DP_START_TEST(npf_snat_masquerade, test1) * Source 10.0.1.1 changed to 172.0.2.1 * */ -DP_DECL_TEST_CASE(npf_nat, npf_dnat, NULL, NULL); +DP_DECL_TEST_CASE(npf_nat, dnat1, NULL, NULL); -DP_START_TEST(npf_dnat, test1) +DP_START_TEST(dnat1, test1) { struct dp_test_pkt_desc_t *pre, *post; struct rte_mbuf *pre_pak, *post_pak; @@ -786,6 +790,7 @@ DP_START_TEST(npf_dnat, test1) .ifname = "dp2T1", .proto = IPPROTO_TCP, .map = "dynamic", + .port_alloc = NULL, .from_addr = NULL, .from_port = NULL, .to_addr = "172.0.2.1", @@ -819,7 +824,7 @@ DP_START_TEST(npf_dnat, test1) struct dp_test_pkt_desc_t v4_pktA_pre = { .text = "Forw, host3 outs -> host1 ins, pre-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "172.0.2.3", .l2_src = "aa:bb:cc:dd:1:a3", .l3_dst = "172.0.2.1", @@ -843,7 +848,7 @@ DP_START_TEST(npf_dnat, test1) struct dp_test_pkt_desc_t v4_pktA_post = { .text = "Forw, host3 outs -> host1 ins, post-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "172.0.2.3", .l2_src = dp_test_intf_name2mac_str("dp1T0"), .l3_dst = "10.0.1.1", @@ -893,7 +898,7 @@ DP_START_TEST(npf_dnat, test1) struct dp_test_pkt_desc_t v4_pktB_pre = { .text = "Back, host1 ins -> host3 outs, pre-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.0.1.1", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "172.0.2.3", @@ -917,7 +922,7 @@ DP_START_TEST(npf_dnat, test1) struct dp_test_pkt_desc_t v4_pktB_post = { .text = "Back, host1 ins -> host3 outs, post-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "172.0.2.1", .l2_src = dp_test_intf_name2mac_str("dp2T1"), .l3_dst = "172.0.2.3", @@ -996,6 +1001,703 @@ DP_START_TEST(npf_dnat, test1) } DP_END_TEST; +/* + * Bidirectional NAT + * + * inside outside + * +-----+ + * hosts1 10.0.1.254 | | 172.0.2.254 host3 + * 10.0.1.1 -----------------| uut |--------------- 172.0.2.3 + * host2 dp1T0 | | dp2T1 + * 10.0.1.2 +-----+ + * snat <-- <-- dnat + * + * <-- Forwards (on Input) + * Dest 172.0.2.1 changed to 10.0.1.1 + * + * --> Back (on Output) + * Source 10.0.1.1 changed to 172.0.2.1 + * + * <-- Forwards (on Output) + * Source 172.0.2.3 changed to 10.0.1.3 + * + * --> Back (on Input) + * Dest 10.0.1.3 changed to 172.0.2.3 + * + */ +DP_DECL_TEST_CASE(npf_nat, bi_nat1, NULL, NULL); + +DP_START_TEST_DONT_RUN(bi_nat1, test1) +{ + struct dp_test_pkt_desc_t *pre, *post; + struct rte_mbuf *pre_pak, *post_pak; + struct dp_test_expected *test_exp; + + /* Setup interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp2T1", "172.0.2.254/24"); + dp_test_nl_add_ip_addr_and_connected("dp1T0", "10.0.1.254/24"); + + dp_test_netlink_add_neigh("dp1T0", "10.0.1.1", "aa:bb:cc:dd:2:b1"); + dp_test_netlink_add_neigh("dp1T0", "10.0.1.2", "aa:bb:cc:dd:2:b2"); + dp_test_netlink_add_neigh("dp2T1", "172.0.2.3", "aa:bb:cc:dd:1:a3"); + + /* + * Add DNAT rule. + */ + struct dp_test_npf_nat_rule_t dnat = { + .desc = "dnat rule", + .rule = "10", + .ifname = "dp2T1", + .proto = IPPROTO_TCP, + .map = "dynamic", + .port_alloc = NULL, + .from_addr = NULL, + .from_port = NULL, + .to_addr = "172.0.2.1", + .to_port = "80", + .trans_addr = "10.0.1.1", + .trans_port = NULL + }; + + dp_test_npf_dnat_add(&dnat, true); + + /* + * Add SNAT rule. + */ + struct dp_test_npf_nat_rule_t snat = { + .desc = "snat rule", + .rule = "10", + .ifname = "dp1T0", + .proto = IPPROTO_TCP, + .map = "dynamic", + .port_alloc = NULL, + .from_addr = "172.0.2.3", + .from_port = NULL, + .to_addr = NULL, + .to_port = NULL, + .trans_addr = "10.0.1.3", + .trans_port = NULL + }; + + dp_test_npf_snat_add(&snat, true); + + /* + * Validation context. This validates the NAT session is correct + * *before* it checks the packet. + */ + struct dp_test_nat_ctx dnat_context; + struct dp_test_nat_ctx *dnat_ctx = &dnat_context; + struct dp_test_nat_ctx snat_context; + struct dp_test_nat_ctx *snat_ctx = &snat_context; + + static struct dp_test_nat_cb_ctx cb_ctx = { + .snat = NULL, + .dnat = NULL, + .saved_cb = dp_test_pak_verify + }; + cb_ctx.snat = snat_ctx; + cb_ctx.dnat = dnat_ctx; + memset(snat_ctx, 0, sizeof(*snat_ctx)); + memset(dnat_ctx, 0, sizeof(*dnat_ctx)); + + + /******************************************************************* + * Pkt1: Forwards direction, Outside host 3 -> Inside host 1 + *******************************************************************/ + + struct dp_test_pkt_desc_t v4_pktA_pre = { + .text = "Fwd, host3 outs -> host1 ins, pre-NAT", + .len = 20, + .ether_type = RTE_ETHER_TYPE_IPV4, + .l3_src = "172.0.2.3", + .l2_src = "aa:bb:cc:dd:1:a3", + .l3_dst = "172.0.2.1", + .l2_dst = dp_test_intf_name2mac_str("dp2T1"), + .proto = IPPROTO_TCP, + .l4 = { + .tcp = { + .sport = 49152, + .dport = 80, + .flags = TH_SYN, + .seq = 0, + .ack = 0, + .win = 5840, + .opts = NULL + } + }, + .rx_intf = "dp2T1", + .tx_intf = "dp1T0" + }; + + struct dp_test_pkt_desc_t v4_pktA_post = { + .text = "Fwd, host3 outs -> host1 ins, post-NAT", + .len = 20, + .ether_type = RTE_ETHER_TYPE_IPV4, + .l3_src = "10.0.1.3", + .l2_src = dp_test_intf_name2mac_str("dp1T0"), + .l3_dst = "10.0.1.1", + .l2_dst = "aa:bb:cc:dd:2:b1", + .proto = IPPROTO_TCP, + .l4 = { + .tcp = { + .sport = 49152, + .dport = 80, + .flags = TH_SYN, + .seq = 0, + .ack = 0, + .win = 5840, + .opts = NULL + } + }, + .rx_intf = "dp2T1", + .tx_intf = "dp1T0" + }; + + pre = &v4_pktA_pre; + post = &v4_pktA_post; + + pre_pak = dp_test_v4_pkt_from_desc(pre); + post_pak = dp_test_v4_pkt_from_desc(post); + test_exp = dp_test_exp_from_desc(post_pak, post); + rte_pktmbuf_free(post_pak); + + dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_FORWARDED); + + /* Setup NAT validation context */ + dp_test_nat_set_ctx(snat_ctx, DP_TEST_NAT_DIR_FORW, DP_TEST_TRANS_SNAT, + pre, post, true); + dp_test_nat_set_ctx(dnat_ctx, DP_TEST_NAT_DIR_FORW, DP_TEST_TRANS_DNAT, + pre, post, true); + dp_test_nat_set_validation(&cb_ctx, test_exp); + + /* Run the test */ + dp_test_pak_receive(pre_pak, pre->rx_intf, test_exp); + + /* Verify pkt count and npf session */ + dp_test_npf_dnat_verify_pkts(dnat.ifname, dnat.rule, 1); + dp_test_npf_snat_verify_pkts(snat.ifname, snat.rule, 1); + + dp_test_npf_portmap_port_verify("tcp", "10.0.1.3", pre->l4.tcp.sport); + + + /******************************************************************* + * Pkt2: Backwards direction, Inside host 1 -> Outside host 3 + *******************************************************************/ + + struct dp_test_pkt_desc_t v4_pktB_pre = { + .text = "Back, host 1 ins -> host3 outs, pre-NAT", + .len = 20, + .ether_type = RTE_ETHER_TYPE_IPV4, + .l3_src = "10.0.1.1", + .l2_src = "aa:bb:cc:dd:2:b1", + .l3_dst = "10.0.1.3", + .l2_dst = dp_test_intf_name2mac_str("dp1T0"), + .proto = IPPROTO_TCP, + .l4 = { + .tcp = { + .sport = 80, + .dport = 49152, + .flags = TH_SYN | TH_ACK, + .seq = 0, + .ack = 1, + .win = 5840, + .opts = NULL + } + }, + .rx_intf = "dp1T0", + .tx_intf = "dp2T1" + }; + + struct dp_test_pkt_desc_t v4_pktB_post = { + .text = "Back, host 1 ins -> host3 outs, post-NAT", + .len = 20, + .ether_type = RTE_ETHER_TYPE_IPV4, + .l3_src = "172.0.2.1", + .l2_src = dp_test_intf_name2mac_str("dp2T1"), + .l3_dst = "172.0.2.3", + .l2_dst = "aa:bb:cc:dd:1:a3", + .proto = IPPROTO_TCP, + .l4 = { + .tcp = { + .sport = 80, + .dport = 49152, + .flags = TH_SYN | TH_ACK, + .seq = 0, + .ack = 1, + .win = 5840, + .opts = NULL + } + }, + .rx_intf = "dp1T0", + .tx_intf = "dp2T1" + }; + + pre = &v4_pktB_pre; + post = &v4_pktB_post; + + pre_pak = dp_test_v4_pkt_from_desc(pre); + post_pak = dp_test_v4_pkt_from_desc(post); + test_exp = dp_test_exp_from_desc(post_pak, post); + rte_pktmbuf_free(post_pak); + + dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_FORWARDED); + + /* Setup NAT validation context */ + dp_test_nat_set_ctx(snat_ctx, DP_TEST_NAT_DIR_BACK, DP_TEST_TRANS_SNAT, + pre, post, false); + dp_test_nat_set_ctx(dnat_ctx, DP_TEST_NAT_DIR_BACK, DP_TEST_TRANS_DNAT, + pre, post, false); + dp_test_nat_set_validation(&cb_ctx, test_exp); + + /* Run the test */ + dp_test_pak_receive(pre_pak, pre->rx_intf, test_exp); + + /* Verify pkt count and npf session */ + dp_test_npf_dnat_verify_pkts(dnat.ifname, dnat.rule, 2); + dp_test_npf_snat_verify_pkts(snat.ifname, snat.rule, 2); + + + /******************************************************************* + * Pkt3: Forwards direction, Outside host 3 -> Inside host 1 + * + * Repeat initial packet + *******************************************************************/ + + pre = &v4_pktA_pre; + post = &v4_pktA_post; + + pre->l4.tcp.flags = post->l4.tcp.flags = TH_ACK; + pre->l4.tcp.seq = post->l4.tcp.seq = 1; + pre->l4.tcp.ack = post->l4.tcp.ack = 1; + + pre_pak = dp_test_v4_pkt_from_desc(pre); + post_pak = dp_test_v4_pkt_from_desc(post); + test_exp = dp_test_exp_from_desc(post_pak, post); + rte_pktmbuf_free(post_pak); + + dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_FORWARDED); + + /* Setup NAT validation context */ + dp_test_nat_set_ctx(snat_ctx, DP_TEST_NAT_DIR_FORW, DP_TEST_TRANS_SNAT, + pre, post, true); + dp_test_nat_set_ctx(dnat_ctx, DP_TEST_NAT_DIR_FORW, DP_TEST_TRANS_DNAT, + pre, post, true); + dp_test_nat_set_validation(&cb_ctx, test_exp); + + /* Run the test */ + dp_test_pak_receive(pre_pak, pre->rx_intf, test_exp); + + /* Verify pkt count and npf session */ + dp_test_npf_dnat_verify_pkts(dnat.ifname, dnat.rule, 3); + dp_test_npf_snat_verify_pkts(snat.ifname, snat.rule, 3); + + + /* Cleanup */ + dp_test_npf_dnat_del(dnat.ifname, dnat.rule, true); + dp_test_npf_snat_del(snat.ifname, snat.rule, true); + dp_test_npf_cleanup(); + + dp_test_netlink_del_neigh("dp1T0", "10.0.1.1", "aa:bb:cc:dd:2:b1"); + dp_test_netlink_del_neigh("dp1T0", "10.0.1.2", "aa:bb:cc:dd:2:b2"); + dp_test_netlink_del_neigh("dp2T1", "172.0.2.3", "aa:bb:cc:dd:1:a3"); + + dp_test_nl_del_ip_addr_and_connected("dp2T1", "172.0.2.254/24"); + dp_test_nl_del_ip_addr_and_connected("dp1T0", "10.0.1.254/24"); + +} DP_END_TEST; + + +/* + * Bidirectional NAT (same interface) + * + * inside outside + * +-----+ + * hosts1 10.0.1.254 | | 172.0.2.254 host3 + * 10.0.1.1 -----------------| uut |--------------- 172.0.2.3 + * host2 dp1T0 | | dp2T1 + * 10.0.1.2 +-----+ + * <-- dnat + * + * <-- Forwards (on Input) + * Dest 172.0.2.1 changed to 10.0.1.1 + * + * --> Back (on Output) + * Source 10.0.1.1 changed to 172.0.2.1 + * + * --> snat + * + * --> Forwards (on Output) + * Source 10.0.1.2 changed to 172.0.2.2 + * + * <-- Back (on Input) + * Dest 172.2.2 changed to 10.0.1.2 + * + * The backwards packet hits the reverse DNAT session created by packet 1, and + * so never hits the SNAT rule. + */ +DP_DECL_TEST_CASE(npf_nat, bi_nat2, NULL, NULL); + +DP_START_TEST_DONT_RUN(bi_nat2, test1) +{ + struct dp_test_pkt_desc_t *pre, *post; + struct rte_mbuf *pre_pak, *post_pak; + struct dp_test_expected *test_exp; + + /* Setup interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp2T1", "172.0.2.254/24"); + dp_test_nl_add_ip_addr_and_connected("dp1T0", "10.0.1.254/24"); + + dp_test_netlink_add_neigh("dp1T0", "10.0.1.1", "aa:bb:cc:dd:2:b1"); + dp_test_netlink_add_neigh("dp1T0", "10.0.1.2", "aa:bb:cc:dd:2:b2"); + dp_test_netlink_add_neigh("dp2T1", "172.0.2.3", "aa:bb:cc:dd:1:a3"); + + /* + * Add DNAT rule. + */ + struct dp_test_npf_nat_rule_t dnat = { + .desc = "dnat rule", + .rule = "10", + .ifname = "dp2T1", + .proto = IPPROTO_TCP, + .map = "dynamic", + .port_alloc = NULL, + .from_addr = NULL, + .from_port = NULL, + .to_addr = "172.0.2.1", + .to_port = "80", + .trans_addr = "10.0.1.1", + .trans_port = NULL + }; + + dp_test_npf_dnat_add(&dnat, true); + + /* + * Add SNAT rule. + */ + struct dp_test_npf_nat_rule_t snat = { + .desc = "snat rule", + .rule = "10", + .ifname = "dp2T1", + .proto = IPPROTO_TCP, + .map = "dynamic", + .port_alloc = NULL, + .from_addr = "10.0.1.2", + .from_port = NULL, + .to_addr = NULL, + .to_port = NULL, + .trans_addr = "172.0.2.2", + .trans_port = NULL + }; + + dp_test_npf_snat_add(&snat, true); + + /* + * Validation context. This validates the NAT session is correct + * *before* it checks the packet. + */ + struct dp_test_nat_ctx dnat_context; + struct dp_test_nat_ctx *dnat_ctx = &dnat_context; + struct dp_test_nat_ctx snat_context; + struct dp_test_nat_ctx *snat_ctx = &snat_context; + + static struct dp_test_nat_cb_ctx cb_ctx = { + .snat = NULL, + .dnat = NULL, + .saved_cb = dp_test_pak_verify + }; + cb_ctx.snat = snat_ctx; + cb_ctx.dnat = dnat_ctx; + memset(snat_ctx, 0, sizeof(*snat_ctx)); + memset(dnat_ctx, 0, sizeof(*dnat_ctx)); + + + /******************************************************************* + * Pkt1: DNAT Forwards direction, Outside host 3 -> Inside host 1 + *******************************************************************/ + + struct dp_test_pkt_desc_t v4_pktA_pre = { + .text = "Fwd, host3 outs -> host1 ins, pre-NAT", + .len = 20, + .ether_type = RTE_ETHER_TYPE_IPV4, + .l3_src = "172.0.2.3", + .l2_src = "aa:bb:cc:dd:1:a3", + .l3_dst = "172.0.2.1", + .l2_dst = dp_test_intf_name2mac_str("dp2T1"), + .proto = IPPROTO_TCP, + .l4 = { + .tcp = { + .sport = 49152, + .dport = 80, + .flags = TH_SYN, + .seq = 0, + .ack = 0, + .win = 5840, + .opts = NULL + } + }, + .rx_intf = "dp2T1", + .tx_intf = "dp1T0" + }; + + struct dp_test_pkt_desc_t v4_pktA_post = { + .text = "Fwd, host3 outs -> host1 ins, post-NAT", + .len = 20, + .ether_type = RTE_ETHER_TYPE_IPV4, + .l3_src = "172.0.2.3", + .l2_src = dp_test_intf_name2mac_str("dp1T0"), + .l3_dst = "10.0.1.1", + .l2_dst = "aa:bb:cc:dd:2:b1", + .proto = IPPROTO_TCP, + .l4 = { + .tcp = { + .sport = 49152, + .dport = 80, + .flags = TH_SYN, + .seq = 0, + .ack = 0, + .win = 5840, + .opts = NULL + } + }, + .rx_intf = "dp2T1", + .tx_intf = "dp1T0" + }; + + pre = &v4_pktA_pre; + post = &v4_pktA_post; + + pre_pak = dp_test_v4_pkt_from_desc(pre); + post_pak = dp_test_v4_pkt_from_desc(post); + test_exp = dp_test_exp_from_desc(post_pak, post); + rte_pktmbuf_free(post_pak); + + dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_FORWARDED); + + /* Setup NAT validation context */ + dp_test_nat_set_ctx(dnat_ctx, DP_TEST_NAT_DIR_FORW, DP_TEST_TRANS_DNAT, + pre, post, true); + dp_test_nat_set_validation(&cb_ctx, test_exp); + cb_ctx.snat = NULL; /* No snat */ + + /* Run the test */ + dp_test_pak_receive(pre_pak, pre->rx_intf, test_exp); + + /* Verify pkt count */ + dp_test_npf_dnat_verify_pkts(dnat.ifname, dnat.rule, 1); + dp_test_npf_snat_verify_pkts(snat.ifname, snat.rule, 0); + + dp_test_npf_session_count_verify(1); + dp_test_npf_nat_session_count_verify(1); + + + /******************************************************************* + * Pkt2: DNAT Backwards direction, Inside host 1 -> Outside host 3 + *******************************************************************/ + + struct dp_test_pkt_desc_t v4_pktB_pre = { + .text = "Rev, host 1 ins -> host3 outs, pre-NAT", + .len = 20, + .ether_type = RTE_ETHER_TYPE_IPV4, + .l3_src = "10.0.1.1", + .l2_src = "aa:bb:cc:dd:2:b1", + .l3_dst = "172.0.2.3", + .l2_dst = dp_test_intf_name2mac_str("dp1T0"), + .proto = IPPROTO_TCP, + .l4 = { + .tcp = { + .sport = 80, + .dport = 49152, + .flags = TH_SYN | TH_ACK, + .seq = 0, + .ack = 1, + .win = 5840, + .opts = NULL + } + }, + .rx_intf = "dp1T0", + .tx_intf = "dp2T1" + }; + + struct dp_test_pkt_desc_t v4_pktB_post = { + .text = "Rev, host 1 ins -> host3 outs, post-NAT", + .len = 20, + .ether_type = RTE_ETHER_TYPE_IPV4, + .l3_src = "172.0.2.1", + .l2_src = dp_test_intf_name2mac_str("dp2T1"), + .l3_dst = "172.0.2.3", + .l2_dst = "aa:bb:cc:dd:1:a3", + .proto = IPPROTO_TCP, + .l4 = { + .tcp = { + .sport = 80, + .dport = 49152, + .flags = TH_SYN | TH_ACK, + .seq = 0, + .ack = 1, + .win = 5840, + .opts = NULL + } + }, + .rx_intf = "dp1T0", + .tx_intf = "dp2T1" + }; + + pre = &v4_pktB_pre; + post = &v4_pktB_post; + + pre_pak = dp_test_v4_pkt_from_desc(pre); + post_pak = dp_test_v4_pkt_from_desc(post); + test_exp = dp_test_exp_from_desc(post_pak, post); + rte_pktmbuf_free(post_pak); + + dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_FORWARDED); + + /* Run the test */ + dp_test_pak_receive(pre_pak, pre->rx_intf, test_exp); + + /* Verify pkt count */ + dp_test_npf_dnat_verify_pkts(dnat.ifname, dnat.rule, 2); + dp_test_npf_snat_verify_pkts(snat.ifname, snat.rule, 0); + + dp_test_npf_session_count_verify(1); + dp_test_npf_nat_session_count_verify(1); + + + /******************************************************************* + * Pkt3: DNAT Forwards direction, Outside host 3 -> Inside host 1 + * + * Repeat packet 1 + *******************************************************************/ + + pre = &v4_pktA_pre; + post = &v4_pktA_post; + + pre->l4.tcp.flags = post->l4.tcp.flags = TH_ACK; + pre->l4.tcp.seq = post->l4.tcp.seq = 1; + pre->l4.tcp.ack = post->l4.tcp.ack = 1; + + pre_pak = dp_test_v4_pkt_from_desc(pre); + post_pak = dp_test_v4_pkt_from_desc(post); + test_exp = dp_test_exp_from_desc(post_pak, post); + rte_pktmbuf_free(post_pak); + + dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_FORWARDED); + + /* Setup NAT validation context */ + dp_test_nat_set_ctx(dnat_ctx, DP_TEST_NAT_DIR_FORW, DP_TEST_TRANS_DNAT, + pre, post, true); + dp_test_nat_set_validation(&cb_ctx, test_exp); + cb_ctx.snat = NULL; /* No snat */ + + /* Run the test */ + dp_test_pak_receive(pre_pak, pre->rx_intf, test_exp); + + /* Verify pkt count */ + dp_test_npf_dnat_verify_pkts(dnat.ifname, dnat.rule, 3); + dp_test_npf_snat_verify_pkts(snat.ifname, snat.rule, 0); + + dp_test_npf_session_count_verify(1); + dp_test_npf_nat_session_count_verify(1); + + + /******************************************************************* + * Pkt4: SNAT Forwards direction, Inside host 2 -> Outside host 3 + * + * This will hit the SNAT rule only + * + *******************************************************************/ + + struct dp_test_pkt_desc_t v4_pktC_pre = { + .text = "Fwd, host2 ins -> host3 outs, pre-NAT", + .len = 20, + .ether_type = RTE_ETHER_TYPE_IPV4, + .l3_src = "10.0.1.2", + .l2_src = "aa:bb:cc:dd:2:b2", + .l3_dst = "172.0.2.3", + .l2_dst = dp_test_intf_name2mac_str("dp1T0"), + .proto = IPPROTO_TCP, + .l4 = { + .tcp = { + .sport = 49101, + .dport = 1000, + .flags = TH_SYN, + .seq = 0, + .ack = 0, + .win = 5840, + .opts = NULL + } + }, + .rx_intf = "dp1T0", + .tx_intf = "dp2T1" + }; + + struct dp_test_pkt_desc_t v4_pktC_post = { + .text = "Fwd, host2 ins -> host3 outs, pre-NAT", + .len = 20, + .ether_type = RTE_ETHER_TYPE_IPV4, + .l3_src = "172.0.2.2", + .l2_src = dp_test_intf_name2mac_str("dp2T1"), + .l3_dst = "172.0.2.3", + .l2_dst = "aa:bb:cc:dd:1:a3", + .proto = IPPROTO_TCP, + .l4 = { + .tcp = { + .sport = 49101, + .dport = 1000, + .flags = TH_SYN, + .seq = 0, + .ack = 0, + .win = 5840, + .opts = NULL + } + }, + .rx_intf = "dp1T0", + .tx_intf = "dp2T1" + }; + + pre = &v4_pktC_pre; + post = &v4_pktC_post; + + pre_pak = dp_test_v4_pkt_from_desc(pre); + post_pak = dp_test_v4_pkt_from_desc(post); + test_exp = dp_test_exp_from_desc(post_pak, post); + rte_pktmbuf_free(post_pak); + + dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_FORWARDED); + + /* Setup NAT validation context */ + dp_test_nat_set_ctx(snat_ctx, DP_TEST_NAT_DIR_FORW, DP_TEST_TRANS_SNAT, + pre, post, true); + dp_test_nat_set_validation(&cb_ctx, test_exp); + cb_ctx.dnat = NULL; /* No dnat */ + + /* Run the test */ + dp_test_pak_receive(pre_pak, pre->rx_intf, test_exp); + + /* Verify pkt count */ + dp_test_npf_dnat_verify_pkts(dnat.ifname, dnat.rule, 3); + dp_test_npf_snat_verify_pkts(snat.ifname, snat.rule, 1); + + dp_test_npf_session_count_verify(2); + dp_test_npf_nat_session_count_verify(2); + + + /* Cleanup */ + dp_test_npf_dnat_del(dnat.ifname, dnat.rule, true); + dp_test_npf_snat_del(snat.ifname, snat.rule, true); + dp_test_npf_cleanup(); + + dp_test_netlink_del_neigh("dp1T0", "10.0.1.1", "aa:bb:cc:dd:2:b1"); + dp_test_netlink_del_neigh("dp1T0", "10.0.1.2", "aa:bb:cc:dd:2:b2"); + dp_test_netlink_del_neigh("dp2T1", "172.0.2.3", "aa:bb:cc:dd:1:a3"); + + dp_test_nl_del_ip_addr_and_connected("dp2T1", "172.0.2.254/24"); + dp_test_nl_del_ip_addr_and_connected("dp1T0", "10.0.1.254/24"); + +} DP_END_TEST; + + + /* * Mapping of address ranges (SNAT) * @@ -1017,7 +1719,9 @@ DP_START_TEST(npf_dnat, test1) * */ -DP_START_TEST(npf_snat, addr_ranges) +DP_DECL_TEST_CASE(npf_nat, snat3, NULL, NULL); + +DP_START_TEST(snat3, addr_ranges) { struct dp_test_pkt_desc_t *pre, *post; struct rte_mbuf *pre_pak, *post_pak; @@ -1040,6 +1744,7 @@ DP_START_TEST(npf_snat, addr_ranges) .ifname = "dp2T1", .proto = IPPROTO_TCP, .map = "dynamic", + .port_alloc = NULL, .from_addr = "10.0.1.0/24", .from_port = NULL, .to_addr = NULL, @@ -1072,7 +1777,7 @@ DP_START_TEST(npf_snat, addr_ranges) struct dp_test_pkt_desc_t v4_pktA_pre = { .text = "Forw, Host1 Ins -> Host3 Outs, pre-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.0.1.1", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "172.0.2.3", @@ -1096,7 +1801,7 @@ DP_START_TEST(npf_snat, addr_ranges) struct dp_test_pkt_desc_t v4_pktA_post = { .text = "Forw, Host1 Ins -> Host3 Outs, pre-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "172.0.2.18", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "172.0.2.3", @@ -1138,7 +1843,7 @@ DP_START_TEST(npf_snat, addr_ranges) /* Verify pkt count */ dp_test_npf_snat_verify_pkts(snat.ifname, snat.rule, 1); - dp_test_npf_portmap_port_verify("172.0.2.18", pre->l4.tcp.sport); + dp_test_npf_portmap_port_verify("tcp", "172.0.2.18", pre->l4.tcp.sport); /***************************************************************** @@ -1148,7 +1853,7 @@ DP_START_TEST(npf_snat, addr_ranges) struct dp_test_pkt_desc_t v4_pktB_pre = { .text = "Forw, Host2 Ins -> Host3 Outs, pre-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.0.1.2", .l2_src = "aa:bb:cc:dd:2:b2", .l3_dst = "172.0.2.3", @@ -1172,7 +1877,7 @@ DP_START_TEST(npf_snat, addr_ranges) struct dp_test_pkt_desc_t v4_pktB_post = { .text = "Forw, Host2 Ins -> Host3 Outs, post-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "172.0.2.19", .l2_src = "aa:bb:cc:dd:2:b2", .l3_dst = "172.0.2.3", @@ -1217,7 +1922,7 @@ DP_START_TEST(npf_snat, addr_ranges) dp_test_npf_session_count_verify(2); dp_test_npf_nat_session_count_verify(2); - dp_test_npf_portmap_port_verify("172.0.2.19", pre->l4.tcp.sport); + dp_test_npf_portmap_port_verify("tcp", "172.0.2.19", pre->l4.tcp.sport); /* Cleanup */ @@ -1254,9 +1959,9 @@ DP_START_TEST(npf_snat, addr_ranges) * Dest 172.0.2.254, port y changed to 10.0.1.x * */ -DP_DECL_TEST_CASE(npf_nat, npf_snat_exclude, NULL, NULL); +DP_DECL_TEST_CASE(npf_nat, snat4, NULL, NULL); -DP_START_TEST(npf_snat_exclude, test1) +DP_START_TEST(snat4, test1) { struct dp_test_pkt_desc_t *pre, *post; struct rte_mbuf *pre_pak, *post_pak; @@ -1281,6 +1986,7 @@ DP_START_TEST(npf_snat_exclude, test1) .ifname = "dp2T1", .proto = NAT_NULL_PROTO, .map = "exclude", + .port_alloc = NULL, .from_addr = "10.0.1.2", .from_port = NULL, .to_addr = NULL, @@ -1295,6 +2001,7 @@ DP_START_TEST(npf_snat_exclude, test1) .ifname = "dp2T1", .proto = NAT_NULL_PROTO, .map = "dynamic", + .port_alloc = NULL, .from_addr = "10.0.1.0/24", .from_port = NULL, .to_addr = NULL, @@ -1329,7 +2036,7 @@ DP_START_TEST(npf_snat_exclude, test1) struct dp_test_pkt_desc_t v4_pkt1_pre = { .text = "Fwd, Ins host1 -> Outs host3, pre-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.0.1.1", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "172.0.2.3", @@ -1353,7 +2060,7 @@ DP_START_TEST(npf_snat_exclude, test1) struct dp_test_pkt_desc_t v4_pkt1_post = { .text = "Fwd, Ins host1 -> Outs host3, post-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "172.0.2.254", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "172.0.2.3", @@ -1397,7 +2104,8 @@ DP_START_TEST(npf_snat_exclude, test1) dp_test_npf_snat_verify_pkts(snat_10.ifname, snat_10.rule, 0); dp_test_npf_snat_verify_pkts(snat_20.ifname, snat_20.rule, 1); - dp_test_npf_portmap_port_verify("172.0.2.254", pre->l4.tcp.sport); + dp_test_npf_portmap_port_verify("tcp", "172.0.2.254", + pre->l4.tcp.sport); /******************************************************************* @@ -1409,7 +2117,7 @@ DP_START_TEST(npf_snat_exclude, test1) struct dp_test_pkt_desc_t v4_pkt2_pre = { .text = "Rev, Outs host3 -> Outs UUT intf, pre-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "172.0.2.3", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "172.0.2.254", @@ -1433,7 +2141,7 @@ DP_START_TEST(npf_snat_exclude, test1) struct dp_test_pkt_desc_t v4_pkt2_post = { .text = "Rev, Outs host3 -> Outs UUT intf, post-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "172.0.2.3", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "10.0.1.1", @@ -1489,7 +2197,7 @@ DP_START_TEST(npf_snat_exclude, test1) struct dp_test_pkt_desc_t v4_pkt3_pre = { .text = "Fwd, Ins host2 p1000 -> Outs host3, pre-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.0.1.2", .l2_src = "aa:bb:cc:dd:2:b2", .l3_dst = "172.0.2.3", @@ -1556,9 +2264,9 @@ DP_START_TEST(npf_snat_exclude, test1) * +-----+ * snat --> */ -DP_DECL_TEST_CASE(npf_nat, npf_snat_port_range, NULL, NULL); +DP_DECL_TEST_CASE(npf_nat, snat5, NULL, NULL); -DP_START_TEST(npf_snat_port_range, test1) +DP_START_TEST(snat5, test1) { struct dp_test_pkt_desc_t *pre, *post; @@ -1581,6 +2289,7 @@ DP_START_TEST(npf_snat_port_range, test1) .ifname = "dp2T1", .proto = IPPROTO_TCP, .map = "dynamic", + .port_alloc = NULL, .from_addr = "10.0.1.1", .from_port = NULL, .to_addr = NULL, @@ -1614,7 +2323,7 @@ DP_START_TEST(npf_snat_port_range, test1) struct dp_test_pkt_desc_t v4_pktA_pre = { .text = "Forw, Host1 Ins -> Host3 Outs, pre-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.0.1.1", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "172.0.2.3", @@ -1638,7 +2347,7 @@ DP_START_TEST(npf_snat_port_range, test1) struct dp_test_pkt_desc_t v4_pktA_post = { .text = "Forw, Host1 Ins -> Host3 Outs, post-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "172.0.2.18", .l2_src = dp_test_intf_name2mac_str("dp2T1"), .l3_dst = "172.0.2.3", @@ -1686,7 +2395,7 @@ DP_START_TEST(npf_snat_port_range, test1) * The validation callback should have set nat_ctx->eport to the value * chosen by the NAT translation. */ - dp_test_npf_portmap_port_verify("172.0.2.18", nat_ctx->eport); + dp_test_npf_portmap_port_verify("tcp", "172.0.2.18", nat_ctx->eport); dp_test_npf_nat_session_verify(NULL, pre->l3_src, pre->l4.tcp.sport, @@ -1713,7 +2422,7 @@ DP_START_TEST(npf_snat_port_range, test1) struct dp_test_pkt_desc_t v4_pktB_pre = { .text = "Back, Host3 Outs -> Host1 Ints, pre-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "172.0.2.3", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "172.0.2.18", @@ -1737,7 +2446,7 @@ DP_START_TEST(npf_snat_port_range, test1) struct dp_test_pkt_desc_t v4_pktB_post = { .text = "Back, Host3 Outs -> Host1 Ints, post-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "172.0.2.3", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "10.0.1.1", @@ -1802,7 +2511,7 @@ DP_START_TEST(npf_snat_port_range, test1) struct dp_test_pkt_desc_t v4_pktC_pre = { .text = "Forw, Host1 Ins -> Host3 Outs, pre-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.0.1.1", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "172.0.2.3", @@ -1826,7 +2535,7 @@ DP_START_TEST(npf_snat_port_range, test1) struct dp_test_pkt_desc_t v4_pktC_post = { .text = "Forw, Host1 Ins -> Host3 Outs, post-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "172.0.2.18", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "172.0.2.3", @@ -1876,7 +2585,7 @@ DP_START_TEST(npf_snat_port_range, test1) */ post->l4.tcp.sport = nat_ctx->eport; - dp_test_npf_portmap_port_verify("172.0.2.18", nat_ctx->eport); + dp_test_npf_portmap_port_verify("tcp", "172.0.2.18", nat_ctx->eport); dp_test_npf_nat_session_verify(NULL, pre->l3_src, pre->l4.tcp.sport, @@ -1928,9 +2637,9 @@ DP_START_TEST(npf_snat_port_range, test1) * Dest 172.0.2.254, port y changed to 10.0.1.x * */ -DP_DECL_TEST_CASE(npf_nat, npf_snat_groups, NULL, NULL); +DP_DECL_TEST_CASE(npf_nat, snat6, NULL, NULL); -DP_START_TEST(npf_snat_groups, test1) +DP_START_TEST(snat6, test1) { struct dp_test_pkt_desc_t *pre, *post; struct rte_mbuf *pre_pak, *post_pak; @@ -1964,6 +2673,7 @@ DP_START_TEST(npf_snat_groups, test1) .ifname = "dp2T1", .proto = IPPROTO_TCP, .map = "dynamic", + .port_alloc = NULL, .from_addr = "ADDR_GRP0", .from_port = "PORT_GRP", .to_addr = NULL, @@ -1997,7 +2707,7 @@ DP_START_TEST(npf_snat_groups, test1) struct dp_test_pkt_desc_t v4_pkt1_pre = { .text = "Fwd, Ins host1 -> Outs host3, pre-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.0.1.1", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "172.0.2.3", @@ -2021,7 +2731,7 @@ DP_START_TEST(npf_snat_groups, test1) struct dp_test_pkt_desc_t v4_pkt1_post = { .text = "Fwd, Ins host1 -> Outs host3, post-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "172.0.2.254", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "172.0.2.3", @@ -2063,8 +2773,9 @@ DP_START_TEST(npf_snat_groups, test1) /* Verify pkt count */ dp_test_npf_snat_verify_pkts(snat.ifname, snat.rule, 1); - dp_test_npf_portmap_verify("172.0.2.254", "ACTIVE", 1); - dp_test_npf_portmap_port_verify("172.0.2.254", pre->l4.tcp.sport); + dp_test_npf_portmap_verify("tcp", "172.0.2.254", "ACTIVE", 1); + dp_test_npf_portmap_port_verify("tcp", "172.0.2.254", + pre->l4.tcp.sport); /* Cleanup */ @@ -2083,3 +2794,88 @@ DP_START_TEST(npf_snat_groups, test1) dp_test_nl_del_ip_addr_and_connected("dp1T0", "10.0.1.254/24"); } DP_END_TEST; + + +/* + * Tests SNAT where same source address and source port are presented to SNAT + * with different protocols. + * + * The second session will have the same trans port. + */ +DP_DECL_TEST_CASE(npf_nat, snat7, NULL, NULL); + +DP_START_TEST(snat7, test1) +{ + /* Setup interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "192.0.2.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "203.0.113.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "203.0.114.1/24"); + + dp_test_netlink_add_neigh("dp1T0", "192.0.2.103", + "aa:bb:cc:16:0:20"); + dp_test_netlink_add_neigh("dp2T1", "203.0.113.203", + "aa:bb:cc:18:0:1"); + dp_test_netlink_add_neigh("dp2T1", "203.0.114.203", + "aa:bb:cc:18:0:1"); + + struct dp_test_npf_nat_rule_t snat = { + .desc = "snat rule", + .rule = "10", + .ifname = "dp2T1", + .proto = NAT_NULL_PROTO, + .map = "dynamic", + .port_alloc = NULL, + .from_addr = "192.0.2.0/24", + .from_port = NULL, + .to_addr = NULL, + .to_port = NULL, + .trans_addr = "masquerade", /* 203.0.113.1 */ + .trans_port = NULL + }; + + dp_test_npf_snat_add(&snat, true); + + /* UDP Forwards */ + dpt_udp("dp1T0", "aa:bb:cc:16:0:20", + "192.0.2.103", 10000, "203.0.113.203", 60000, + "203.0.113.1", 10000, "203.0.113.203", 60000, + "aa:bb:cc:18:0:1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* UDP Back */ + dpt_udp("dp2T1", "aa:bb:cc:18:0:1", + "203.0.113.203", 60000, "203.0.113.1", 10000, + "203.0.113.203", 60000, "192.0.2.103", 10000, + "aa:bb:cc:16:0:20", "dp1T0", + DP_TEST_FWD_FORWARDED); + + /* TCP Forwards */ + dpt_tcp(TH_SYN, "dp1T0", "aa:bb:cc:16:0:20", + "192.0.2.103", 10000, "203.0.113.203", 60001, + "203.0.113.1", 10000, "203.0.113.203", 60001, + "aa:bb:cc:18:0:1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* TCP Back */ + dpt_tcp(TH_SYN | TH_ACK, "dp2T1", "aa:bb:cc:18:0:1", + "203.0.113.203", 60001, "203.0.113.1", 10000, + "203.0.113.203", 60001, "192.0.2.103", 10000, + "aa:bb:cc:16:0:20", "dp1T0", + DP_TEST_FWD_FORWARDED); + + dp_test_npf_snat_del(snat.ifname, snat.rule, true); + + dp_test_npf_cleanup(); + + dp_test_netlink_del_neigh("dp1T0", "192.0.2.103", + "aa:bb:cc:16:0:20"); + dp_test_netlink_del_neigh("dp2T1", "203.0.113.203", + "aa:bb:cc:18:0:1"); + dp_test_netlink_del_neigh("dp2T1", "203.0.114.203", + "aa:bb:cc:18:0:1"); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "192.0.2.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "203.0.113.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "203.0.114.1/24"); + +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_npf_nat64.c b/tests/whole_dp/src/dp_test_npf_nat64.c index edbffbe7..51005eb8 100644 --- a/tests/whole_dp/src/dp_test_npf_nat64.c +++ b/tests/whole_dp/src/dp_test_npf_nat64.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -26,12 +26,12 @@ #include "dp_test.h" #include "dp_test_controller.h" #include "dp_test_cmd_state.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_pkt.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_sess_lib.h" @@ -130,18 +130,17 @@ nat64_nat64_hook_v6_in(const char *ifname, struct dp_test_pkt_desc_t *pdesc, struct npf_config *npf_config; struct pktmbuf_mdata *mdata; char real_ifname[IFNAMSIZ]; - npf_decision_t decision; + nat64_decision_t decision; struct rte_mbuf *mbuf; - npf_action_t action; npf_session_t *se6; struct npf_if *nif; struct ifnet *ifp; - bool rv, intl_hpin = false; - int error = 0; + bool intl_hpin = false; + int rc, error = 0; /* Get interface pointers */ dp_test_intf_real(ifname, real_ifname); - ifp = ifnet_byifname(real_ifname); + ifp = dp_ifnet_byifname(real_ifname); dp_test_fail_unless(ifp, "ifp for %s", ifname); /* Get npf config pointers */ @@ -155,8 +154,8 @@ nat64_nat64_hook_v6_in(const char *ifname, struct dp_test_pkt_desc_t *pdesc, /* Cache IPv6 packet */ npf_cache_init(npc); - rv = npf_cache_all(npc, mbuf, htons(ETHER_TYPE_IPv6)); - dp_test_fail_unless(rv, "packet cache"); + rc = npf_cache_all(npc, mbuf, htons(RTE_ETHER_TYPE_IPV6)); + dp_test_fail_unless(rc == 0, "packet cache"); dp_test_fail_unless((npc->npc_info & NPC_IP6) != 0, "packet cache info %x", npc->npc_info); @@ -175,17 +174,21 @@ nat64_nat64_hook_v6_in(const char *ifname, struct dp_test_pkt_desc_t *pdesc, dp_test_fail_unless(se6, "An IPv6 session should already exist"); } - decision = nat64_hook(&action, npf_config, &se6, ifp, npc, &mbuf, - PFIL_IN, npf_flags); + decision = npf_nat64_6to4_in(npf_config, &se6, ifp, npc, &mbuf, + npf_flags, &error); /* * Verify outcome of nat64_hook */ - /* expect a pass */ - dp_test_fail_unless(decision == NPF_DECISION_PASS, - "Expected PASS, got %s", - npf_decision_str(decision)); + /* From IPv6 ... */ + dp_test_fail_unless((*npf_flags & NPF_FLAG_FROM_IPV6) != 0, + "npf_flags 0x%x", *npf_flags); + + /* ... going to IPv4 */ + dp_test_fail_unless(decision == NAT64_DECISION_TO_V4, + "Expected TO_V4, got %s", + nat64_decision_str(decision)); /* * If nat64 creates a session, check it is not passed back to @@ -196,11 +199,6 @@ nat64_nat64_hook_v6_in(const char *ifname, struct dp_test_pkt_desc_t *pdesc, !se6, "nat64_hook should not pass session back" " to hook_track"); - dp_test_fail_unless((*npf_flags & NPF_FLAG_FROM_IPV6) != 0, - "npf_flags 0x%x", *npf_flags); - - dp_test_fail_unless(action == NPF_ACTION_TO_V4, "action %u", action); - /* * Do we expect pkt metadata? */ @@ -245,17 +243,16 @@ nat64_nat64_hook_v4_out(const char *ifname, struct rte_mbuf *mbuf, npf_cache_t npc_cache, *npc = &npc_cache; struct npf_config *npf_config; char real_ifname[IFNAMSIZ]; - npf_decision_t decision; - npf_action_t action; + nat64_decision_t decision; npf_session_t *se4; struct npf_if *nif; struct ifnet *ifp; - bool rv, intl_hpin = false; - int error = 0; + bool intl_hpin = false; + int rc, error = 0; /* Get interface pointers */ dp_test_intf_real(ifname, real_ifname); - ifp = ifnet_byifname(real_ifname); + ifp = dp_ifnet_byifname(real_ifname); dp_test_fail_unless(ifp, "ifp for %s", ifname); /* Get npf config pointers */ @@ -265,8 +262,8 @@ nat64_nat64_hook_v4_out(const char *ifname, struct rte_mbuf *mbuf, /* Cache IPv4 packet */ npf_cache_init(npc); - rv = npf_cache_all(npc, mbuf, htons(ETHER_TYPE_IPv4)); - dp_test_fail_unless(rv, "packet cache"); + rc = npf_cache_all(npc, mbuf, htons(RTE_ETHER_TYPE_IPV4)); + dp_test_fail_unless(rc == 0, "packet cache"); dp_test_fail_unless((npc->npc_info & NPC_IP4) != 0, "packet cache info %x", npc->npc_info); @@ -283,18 +280,17 @@ nat64_nat64_hook_v4_out(const char *ifname, struct rte_mbuf *mbuf, dp_test_fail_unless(se4, "An IPv4 session should already exist"); - decision = nat64_hook(&action, npf_config, &se4, ifp, npc, &mbuf, - PFIL_OUT, npf_flags); + decision = npf_nat64_6to4_out(&se4, ifp, npc, &mbuf, npf_flags, &error); - dp_test_fail_unless(decision == NPF_DECISION_PASS, + dp_test_fail_unless(decision == NAT64_DECISION_PASS, "Expected PASS, got %s", - npf_decision_str(decision)); + nat64_decision_str(decision)); /* IPv4 session should be created by nat64_hook if necessary */ dp_test_fail_unless(se4, "session is NULL"); /* simulate end of npf_hook_track */ - if (decision != NPF_DECISION_BLOCK && se4) { + if (decision != NAT64_DECISION_DROP && se4) { /* This is a noop if already active */ error = npf_session_activate(se4, ifp, npc, mbuf); dp_test_fail_unless(error == 0, @@ -316,18 +312,17 @@ nat64_nat64_hook_v4_in(const char *ifname, struct dp_test_pkt_desc_t *pdesc, struct npf_config *npf_config; struct pktmbuf_mdata *mdata; char real_ifname[IFNAMSIZ]; - npf_decision_t decision; + nat64_decision_t decision; struct rte_mbuf *mbuf; - npf_action_t action; npf_session_t *se4; struct npf_if *nif; struct ifnet *ifp; - bool rv, intl_hpin = false; - int error = 0; + bool intl_hpin = false; + int rc, error = 0; /* Get interface pointers */ dp_test_intf_real(ifname, real_ifname); - ifp = ifnet_byifname(real_ifname); + ifp = dp_ifnet_byifname(real_ifname); dp_test_fail_unless(ifp, "ifp for %s", ifname); /* Get npf config pointers */ @@ -341,8 +336,8 @@ nat64_nat64_hook_v4_in(const char *ifname, struct dp_test_pkt_desc_t *pdesc, /* Cache IPv4 packet */ npf_cache_init(npc); - rv = npf_cache_all(npc, mbuf, htons(ETHER_TYPE_IPv4)); - dp_test_fail_unless(rv, "packet cache"); + rc = npf_cache_all(npc, mbuf, htons(RTE_ETHER_TYPE_IPV4)); + dp_test_fail_unless(rc == 0, "packet cache"); dp_test_fail_unless((npc->npc_info & NPC_IP4) != 0, "packet cache info %x", npc->npc_info); @@ -361,17 +356,21 @@ nat64_nat64_hook_v4_in(const char *ifname, struct dp_test_pkt_desc_t *pdesc, dp_test_fail_unless(se4, "An IPv4 session should already exist"); } - decision = nat64_hook(&action, npf_config, &se4, ifp, npc, &mbuf, - PFIL_IN, npf_flags); + decision = npf_nat64_4to6_in(npf_config, &se4, ifp, npc, &mbuf, + npf_flags, &error); /* * Verify outcome of nat64_hook */ - /* expect a pass */ - dp_test_fail_unless(decision == NPF_DECISION_PASS, - "Expected PASS, got %s", - npf_decision_str(decision)); + /* From IPv4 ... */ + dp_test_fail_unless((*npf_flags & NPF_FLAG_FROM_IPV4) != 0, + "npf_flags 0x%x", *npf_flags); + + /* ... going to IPv6 */ + dp_test_fail_unless(decision == NAT64_DECISION_TO_V6, + "Expected TO_V6, got %s", + nat64_decision_str(decision)); /* * If nat64 creates a session, check it is not passed back to @@ -382,11 +381,6 @@ nat64_nat64_hook_v4_in(const char *ifname, struct dp_test_pkt_desc_t *pdesc, !se4, "nat64_hook should not pass session back" " to hook_track"); - dp_test_fail_unless((*npf_flags & NPF_FLAG_FROM_IPV4) != 0, - "npf_flags 0x%x", *npf_flags); - - dp_test_fail_unless(action == NPF_ACTION_TO_V6, "action %u", action); - /* * Do we expect pkt metadata? */ @@ -424,17 +418,16 @@ nat64_nat64_hook_v6_out(const char *ifname, struct rte_mbuf *mbuf, npf_cache_t npc_cache, *npc = &npc_cache; struct npf_config *npf_config; char real_ifname[IFNAMSIZ]; - npf_decision_t decision; - npf_action_t action; + nat64_decision_t decision; npf_session_t *se6; struct npf_if *nif; struct ifnet *ifp; - bool rv, intl_hpin = false; - int error = 0; + bool intl_hpin = false; + int rc, error = 0; /* Get interface pointers */ dp_test_intf_real(ifname, real_ifname); - ifp = ifnet_byifname(real_ifname); + ifp = dp_ifnet_byifname(real_ifname); dp_test_fail_unless(ifp, "ifp for %s", ifname); /* Get npf config pointers */ @@ -444,8 +437,8 @@ nat64_nat64_hook_v6_out(const char *ifname, struct rte_mbuf *mbuf, /* Cache IPv6 packet */ npf_cache_init(npc); - rv = npf_cache_all(npc, mbuf, htons(ETHER_TYPE_IPv6)); - dp_test_fail_unless(rv, "packet cache"); + rc = npf_cache_all(npc, mbuf, htons(RTE_ETHER_TYPE_IPV6)); + dp_test_fail_unless(rc == 0, "packet cache"); dp_test_fail_unless((npc->npc_info & NPC_IP6) != 0, "packet cache info %x", npc->npc_info); @@ -462,18 +455,17 @@ nat64_nat64_hook_v6_out(const char *ifname, struct rte_mbuf *mbuf, dp_test_fail_unless(se6, "An IPv6 session should already exist"); - decision = nat64_hook(&action, npf_config, &se6, ifp, npc, &mbuf, - PFIL_OUT, npf_flags); + decision = npf_nat64_4to6_out(&se6, ifp, npc, &mbuf, npf_flags, &error); - dp_test_fail_unless(decision == NPF_DECISION_PASS, + dp_test_fail_unless(decision == NAT64_DECISION_PASS, "Expected PASS, got %s", - npf_decision_str(decision)); + nat64_decision_str(decision)); /* IPv6 session should be created by nat64_hook */ dp_test_fail_unless(se6, "session is NULL"); /* simulate end of npf_hook_track */ - if (decision != NPF_DECISION_BLOCK && se6) { + if (decision != NAT64_DECISION_DROP && se6) { error = npf_session_activate(se6, ifp, npc, mbuf); dp_test_fail_unless(error == 0, "error activating output session"); @@ -528,7 +520,7 @@ DP_START_TEST(nat64_a1, test1) struct dp_test_pkt_desc_t v6_pktA_UDP = { .text = "IPv6 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:101:1::a0a:101", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:101:2::a0a:201", @@ -672,7 +664,7 @@ DP_START_TEST(nat64_a2, test) struct dp_test_pkt_desc_t v4_pktA_UDP = { .text = "Packet A, IPv4 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.10.2.1", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "10.10.1.1", @@ -838,7 +830,7 @@ DP_START_TEST(nat64_a3, test) struct dp_test_pkt_desc_t v6_pktA_UDP = { .text = "IPv6 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:101:1::a0a:101", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:101:2::a0a:201", @@ -858,7 +850,7 @@ DP_START_TEST(nat64_a3, test) struct dp_test_pkt_desc_t v4_pktA_UDP = { .text = "Packet A, IPv4 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.10.2.1", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "10.10.1.1", @@ -1022,7 +1014,7 @@ DP_START_TEST(nat64_a4, test1) struct dp_test_pkt_desc_t v6_pktA_UDP = { .text = "IPv6 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:101:1::a0a:101", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:101:2::a0a:201", @@ -1138,7 +1130,7 @@ static void _nat64_v6_to_v4_udp(uint16_t pre_sport, uint16_t pre_dport, struct dp_test_pkt_desc_t v6_pktA_UDP = { .text = "Packet A, IPv6 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = v6_saddr, .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = v6_daddr, @@ -1158,7 +1150,7 @@ static void _nat64_v6_to_v4_udp(uint16_t pre_sport, uint16_t pre_dport, struct dp_test_pkt_desc_t v4_pktA_UDP = { .text = "Packet A, IPv4 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = v4_saddr, .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = v4_daddr, @@ -1207,7 +1199,7 @@ static void _nat64_v4_to_v6_udp(uint16_t pre_sport, uint16_t pre_dport, struct dp_test_pkt_desc_t v4_pktA_UDP = { .text = "Packet A, IPv4 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = v4_saddr, .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = v4_daddr, @@ -1227,7 +1219,7 @@ static void _nat64_v4_to_v6_udp(uint16_t pre_sport, uint16_t pre_dport, struct dp_test_pkt_desc_t v6_pktA_UDP = { .text = "Packet A, IPv6 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = v6_saddr, .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = v6_daddr, @@ -1281,7 +1273,7 @@ static void _nat64_v6_to_v4_icmp(uint8_t icmp_type, struct dp_test_pkt_desc_t v6_pktA_ICMP = { .text = "Packet A, IPv6 ICMP", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = v6_saddr, .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = v6_daddr, @@ -1305,7 +1297,7 @@ static void _nat64_v6_to_v4_icmp(uint8_t icmp_type, struct dp_test_pkt_desc_t v4_pktA_ICMP = { .text = "Packet A, IPv4 ICMP", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = v4_saddr, .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = v4_daddr, @@ -1362,7 +1354,7 @@ static void _nat64_v4_to_v6_icmp(uint8_t icmp_type, struct dp_test_pkt_desc_t v4_pktA_ICMP = { .text = "Packet A, IPv4 ICMP", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = v4_saddr, .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = v4_daddr, @@ -1386,7 +1378,7 @@ static void _nat64_v4_to_v6_icmp(uint8_t icmp_type, struct dp_test_pkt_desc_t v6_pktA_ICMP = { .text = "Packet A, IPv6 ICMP", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = v6_saddr, .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = v6_daddr, @@ -1678,7 +1670,7 @@ DP_START_TEST(nat64_b3, test1) "npf-ut add nat64:NAT64_GRP1 10 action=accept " "src-addr=2001:101:1::/96 dst-addr=2001:101:2::/96 " "handle=nat64(" - "stype=overload,srange=10.10.1.1-10.10.1.8," + "stype=overload,saddr=10.10.1.0/28," "dtype=one2one,daddr=10.10.2.1/32)"); dp_test_npf_commit(); @@ -2041,6 +2033,7 @@ DP_START_TEST(nat64_b6, test1) .ifname = "dp2T1", .proto = NAT_NULL_PROTO, .map = "dynamic", + .port_alloc = NULL, .from_addr = "10.10.1.1", .from_port = NULL, .to_addr = NULL, @@ -2865,6 +2858,7 @@ DP_START_TEST(nat64_c4, test1) .ifname = "dp2T1", .proto = NAT_NULL_PROTO, .map = "dynamic", + .port_alloc = NULL, .from_addr = NULL, .from_port = NULL, .to_addr = "10.10.1.254", @@ -3183,7 +3177,7 @@ DP_START_TEST(nat64_96, test1) struct dp_test_pkt_desc_t v6_pktA_UDP = { .text = "Packet A, IPv6 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:101:1::a0a:101", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:101:2::a0a:201", @@ -3203,7 +3197,7 @@ DP_START_TEST(nat64_96, test1) struct dp_test_pkt_desc_t v4_pktA_UDP = { .text = "Packet A, IPv4 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.10.1.1", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "10.10.2.1", @@ -3223,7 +3217,7 @@ DP_START_TEST(nat64_96, test1) struct dp_test_pkt_desc_t v6_pktA_PING = { .text = "Packet A, IPv6 PING", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:101:1::a0a:101", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:101:2::a0a:201", @@ -3247,7 +3241,7 @@ DP_START_TEST(nat64_96, test1) struct dp_test_pkt_desc_t v4_pktA_PING = { .text = "Packet A, IPv4 PING", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.10.1.1", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "10.10.2.1", @@ -3532,7 +3526,7 @@ DP_START_TEST(nat64_64, test1) struct dp_test_pkt_desc_t v6_pktA_UDP = { .text = "Packet A, IPv6 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:101:1:0:a:a01:100:1", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:101:2:0:a:a02:100:1", @@ -3552,7 +3546,7 @@ DP_START_TEST(nat64_64, test1) struct dp_test_pkt_desc_t v4_pktA_UDP = { .text = "Packet A, IPv4 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.10.1.1", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "10.10.2.1", @@ -3572,7 +3566,7 @@ DP_START_TEST(nat64_64, test1) struct dp_test_pkt_desc_t v6_pktA_PING = { .text = "Packet A, IPv6 PING", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:101:1:0:a:a01:100:1", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:101:2:0:a:a02:100:1", @@ -3596,7 +3590,7 @@ DP_START_TEST(nat64_64, test1) struct dp_test_pkt_desc_t v4_pktA_PING = { .text = "Packet A, IPv4 PING", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.10.1.1", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "10.10.2.1", @@ -3787,7 +3781,7 @@ DP_START_TEST(nat64_56, test1) struct dp_test_pkt_desc_t v6_pktA_UDP = { .text = "Packet A, IPv6 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:101:1:a:a:101:0:1", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:101:2:a:a:201:0:1", @@ -3807,7 +3801,7 @@ DP_START_TEST(nat64_56, test1) struct dp_test_pkt_desc_t v4_pktA_UDP = { .text = "Packet A, IPv4 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.10.1.1", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "10.10.2.1", @@ -3827,7 +3821,7 @@ DP_START_TEST(nat64_56, test1) struct dp_test_pkt_desc_t v6_pktA_PING = { .text = "Packet A, IPv6 PING", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:101:1:a:a:101:0:1", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:101:2:a:a:201:0:1", @@ -3851,7 +3845,7 @@ DP_START_TEST(nat64_56, test1) struct dp_test_pkt_desc_t v4_pktA_PING = { .text = "Packet A, IPv4 PING", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.10.1.1", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "10.10.2.1", @@ -4045,7 +4039,7 @@ DP_START_TEST(nat64_48, test1) struct dp_test_pkt_desc_t v6_pktA_UDP = { .text = "Packet A, IPv6 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:101:1:a0a:1:100:0:1", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:101:2:a0a:2:100:0:1", @@ -4065,7 +4059,7 @@ DP_START_TEST(nat64_48, test1) struct dp_test_pkt_desc_t v4_pktA_UDP = { .text = "Packet A, IPv4 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.10.1.1", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "10.10.2.1", @@ -4085,7 +4079,7 @@ DP_START_TEST(nat64_48, test1) struct dp_test_pkt_desc_t v6_pktA_PING = { .text = "Packet A, IPv6 PING", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:101:1:a0a:1:100:0:1", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:101:2:a0a:2:100:0:1", @@ -4109,7 +4103,7 @@ DP_START_TEST(nat64_48, test1) struct dp_test_pkt_desc_t v4_pktA_PING = { .text = "Packet A, IPv4 PING", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.10.1.1", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "10.10.2.1", diff --git a/tests/whole_dp/src/dp_test_npf_nat_lib.c b/tests/whole_dp/src/dp_test_npf_nat_lib.c index 1142d4be..f8d5f5da 100644 --- a/tests/whole_dp/src/dp_test_npf_nat_lib.c +++ b/tests/whole_dp/src/dp_test_npf_nat_lib.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -17,11 +17,11 @@ #include "dp_test.h" #include "dp_test_str.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_lib.h" @@ -35,29 +35,19 @@ * * Source NAT example: * npf-ut add snat: nat-type=snat [trans-addr=] - * [trans-port ] action=accept [proto=] + * [trans-port ] action=accept [proto-final=] * [src-addr=
[/]] [src-port=] * [dst-addr=
[/]] [dst-port=] * * Destination NAT example: * npf-ut add dnat: nat-type=dnat [trans-addr=] - * [trans-port ] action=accept [proto=] + * [trans-port ] action=accept [proto-final=] * [src-addr=
[/]] [src-port=] * [dst-addr=
[/]] [dst-port=] */ static bool dp_test_npf_nat_debug; -void dp_test_npf_nat_set_debug(bool on) -{ - dp_test_npf_nat_debug = on; -} - -bool dp_test_npf_nat_get_debug(void) -{ - return dp_test_npf_nat_debug; -} - static const char *str_or_any(const char *str) { return str ?: "any"; @@ -119,6 +109,15 @@ dp_test_npf_nat_map_cmd(char *str, size_t len, } else l += spush(str+l, len-l, "trans-port=%s ", nat->trans_port); + if (nat->port_alloc) { + if (strcmp(nat->port_alloc, "sequential") == 0) + l += spush(str+l, len-l, + "trans-port-alloc=sequential "); + else if (strcmp(nat->port_alloc, "random") == 0) + l += spush(str+l, len-l, + "trans-port-alloc=random "); + } + return l; } @@ -145,7 +144,7 @@ dp_test_npf_nat_match_cmd(char *str, size_t len, */ if (nat->proto != NAT_NULL_PROTO) - l += spush(str+l, len-l, "proto=%d ", nat->proto); + l += spush(str+l, len-l, "proto-final=%d ", nat->proto); if (from_addr != NULL) { if (strchr(from_addr, ':') || strchr(from_addr, '.')) @@ -193,7 +192,7 @@ _dp_test_npf_nat_add(const struct dp_test_npf_nat_rule_t *nat, bool snat, { char rifname[IFNAMSIZ]; char match[100]; - char map[100]; + char map[140]; uint l; dp_test_intf_real(nat->ifname, rifname); @@ -279,6 +278,62 @@ _dp_test_npf_nat_del(const char *ifname, const char *rule, bool snat, } } + +/* + * Simple SNAT rule config + */ +void dpt_snat_cfg(const char *intf, uint8_t ipproto, + const char *from_addr, const char *trans_addr, + bool add) +{ + struct dp_test_npf_nat_rule_t snat = { + .desc = "snat rule", + .rule = "10", + .ifname = intf, + .proto = ipproto, + .map = "dynamic", + .port_alloc = NULL, + .from_addr = from_addr, + .from_port = NULL, + .to_addr = NULL, + .to_port = NULL, + .trans_addr = trans_addr, + .trans_port = NULL, + }; + + if (add) + dp_test_npf_snat_add(&snat, true); + else + dp_test_npf_snat_del(snat.ifname, snat.rule, true); +} +/* + * Simple DNAT rule config + */ +void dpt_dnat_cfg(const char *intf, uint8_t ipproto, + const char *to_addr, const char *trans_addr, + bool add) +{ + struct dp_test_npf_nat_rule_t dnat = { + .desc = "dnat rule", + .rule = "10", + .ifname = intf, + .proto = ipproto, + .map = "dynamic", + .port_alloc = NULL, + .from_addr = NULL, + .from_port = NULL, + .to_addr = to_addr, + .to_port = NULL, + .trans_addr = trans_addr, + .trans_port = NULL, + }; + + if (add) + dp_test_npf_dnat_add(&dnat, true); + else + dp_test_npf_dnat_del(dnat.ifname, dnat.rule, true); +} + /* * Determine what the npf rule "match" field should be in the "npf-op show" * request json reply. @@ -300,7 +355,7 @@ dp_test_npf_nat_match_list(char *str, size_t len, * Note the trailing space at the end of the string. */ if (nat->proto != NAT_NULL_PROTO) - l += spush(str+l, len-l, "proto %d ", nat->proto); + l += spush(str+l, len-l, "proto-final %d ", nat->proto); if (nat->from_addr || nat->from_port) { l += spush(str+l, len-l, "from %s ", @@ -579,49 +634,6 @@ _dp_test_npf_nat64_del(const struct dp_test_npf_nat64_rule_t *rule, * NAT validation context helper functions */ -static void -dp_test_npf_nat_ctx_set_desc(struct dp_test_nat_ctx *ctx) -{ - spush(ctx->desc, sizeof(ctx->desc), "%sNAT %s", - ctx->dnat ? "D" : "S", - ctx->dir == DP_TEST_NAT_DIR_FORW ? "Forw" : "Back"); -} - -/* - * - */ -void -dp_test_npf_nat_ctx_set_dnat(struct dp_test_nat_ctx *ctx) -{ - ctx->dnat = true; - dp_test_npf_nat_ctx_set_desc(ctx); -} - -void -dp_test_npf_nat_ctx_set_snat(struct dp_test_nat_ctx *ctx) -{ - ctx->dnat = false; - dp_test_npf_nat_ctx_set_desc(ctx); -} - -void -dp_test_npf_nat_ctx_set_dir(struct dp_test_nat_ctx *ctx, - enum dp_test_nat_dir dir) -{ - ctx->dir = dir; - dp_test_npf_nat_ctx_set_desc(ctx); -} - -void -dp_test_npf_nat_ctx_set_oaddr(struct dp_test_nat_ctx *ctx, uint32_t oaddr) -{ - ctx->oaddr = oaddr; - - if (!inet_ntop(AF_INET, &oaddr, ctx->oaddr_str, - sizeof(ctx->oaddr_str))) - spush(ctx->oaddr_str, sizeof(ctx->oaddr_str), "0x%X", oaddr); -} - void dp_test_npf_nat_ctx_set_oaddr_str(struct dp_test_nat_ctx *ctx, const char *oaddr_str) @@ -631,16 +643,6 @@ dp_test_npf_nat_ctx_set_oaddr_str(struct dp_test_nat_ctx *ctx, inet_pton(AF_INET, oaddr_str, &ctx->oaddr); } -void -dp_test_npf_nat_ctx_set_taddr(struct dp_test_nat_ctx *ctx, uint32_t taddr) -{ - ctx->taddr = taddr; - - if (!inet_ntop(AF_INET, &taddr, ctx->taddr_str, - sizeof(ctx->taddr_str))) - spush(ctx->taddr_str, sizeof(ctx->taddr_str), "0x%X", taddr); -} - void dp_test_npf_nat_ctx_set_taddr_str(struct dp_test_nat_ctx *ctx, const char *taddr_str) @@ -784,10 +786,11 @@ dp_test_nat_validate(struct rte_mbuf *mbuf, struct ifnet *ifp, * incorrect. */ if (nat->verify_session) { - const char *trans_addr, *src_addr, *dst_addr; - uint16_t trans_port, src_id, dst_id; - int trans_type; - const char *intf; + const char *trans_addr = NULL; + const char *src_addr = NULL, *dst_addr = NULL; + uint16_t trans_port = 0, src_id = 0, dst_id = 0; + int trans_type = 0; + const char *intf = NULL; /* * First determine what addresses and ports we expect to see @@ -850,10 +853,10 @@ dp_test_nat_validate(struct rte_mbuf *mbuf, struct ifnet *ifp, * When this is the case then the dataplane will send an ARP request. * Catch this here and format a suitable error message. */ - struct ether_hdr *eth; + struct rte_ether_hdr *eth; - eth = rte_pktmbuf_mtod(mbuf, struct ether_hdr *); - if (eth->ether_type == htons(ETHER_TYPE_ARP)) { + eth = rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *); + if (eth->ether_type == htons(RTE_ETHER_TYPE_ARP)) { char *tpa = (char *)(eth + 1) + 24; spush(str, len, @@ -880,7 +883,7 @@ dp_test_nat_validate(struct rte_mbuf *mbuf, struct ifnet *ifp, * Check destination address for DNAT forwards or SNAT backwards */ if ((nat->dnat && forw) || (!nat->dnat && !forw)) { - if (strcmp(daddr_str, post->l3_dst)) { + if (strcmp(daddr_str, post->l3_dst) != 0) { spush(str, len, "%s, dst IP %s, expd %s", nat->desc, daddr_str, post->l3_dst); return false; @@ -891,7 +894,7 @@ dp_test_nat_validate(struct rte_mbuf *mbuf, struct ifnet *ifp, * Check source address for SNAT forwards or DNAT back */ if ((!nat->dnat && forw) || (nat->dnat && !forw)) { - if (strcmp(saddr_str, post->l3_src)) { + if (strcmp(saddr_str, post->l3_src) != 0) { spush(str, len, "%s, src IP %s, expd %s", nat->desc, saddr_str, post->l3_src); return false; @@ -1106,129 +1109,3 @@ dp_test_npf_json_get_nat_rule(const char *real_ifname, const char *rule, return jrule; } - -/* - * Pretty print NAT firewall rules - */ -void -dp_test_npf_print_nat(const char *desc) -{ - json_object *jnat; - const char *str; - - if (desc) - printf("%s\n", desc); - - jnat = dp_test_npf_json_nat(NULL, NULL); - if (!jnat) - return; - - str = json_object_to_json_string_ext(jnat, JSON_C_TO_STRING_PRETTY); - if (str) - printf("%s\n", str); - - json_object_put(jnat); -} - -/* - * Wrapper around dp_test_pak_receive to create, send, and verify NAT'd - * packets. - * - * descr Description of the packet being sent - * pre pre-NAT packet descriptor - * post post-NAT packet descriptor (with translations) - * dir Direction of the packet relative to the NAT session - * (DP_TEST_NAT_DIR_FORW or DP_TEST_NAT_DIR_BACK) - * ttype DP_TEST_TRANS_SNAT, DP_TEST_TRANS_DNAT or DP_TEST_TRANS_NONE - * verify_sess Verify the NAT session exists during packet validation callback - * count Number of packets to send - * delay Delay in seconds between packets - * - * Note, the delay is for use when sending multiple packets, however this - * should *only* be used in a private build, i.e. dont commit test code with a - * non-zero delay. - */ -void -_dp_test_npf_nat_pak_receive(const char *descr, - struct dp_test_pkt_desc_t *pre, - struct dp_test_pkt_desc_t *post, - enum dp_test_nat_dir dir, - enum dp_test_trans_type ttype, - bool verify_sess, - uint count, uint delay, - const char *file, int line) -{ - struct rte_mbuf *pre_pak, *post_pak; - struct dp_test_expected *test_exp; - struct dp_test_nat_ctx nat_context; - struct dp_test_nat_ctx *nctx = &nat_context; - uint i; - - if (!descr || !pre || !post) - _dp_test_fail(file, line, "EINVAL"); - - memset(nctx, 0, sizeof(*nctx)); - - /* - * The NAT packet verification is a wrapper around the dp-test packet - * verification. It checks the IP header has been correctly NAT'd - * *before* the packet comparison is done. - */ - struct dp_test_nat_cb_ctx nat_ctx = { - .snat = NULL, - .dnat = NULL, - .saved_cb = dp_test_pak_verify - }; - - if (ttype == DP_TEST_TRANS_SNAT) - nat_ctx.snat = nctx; - else if (ttype == DP_TEST_TRANS_DNAT) - nat_ctx.dnat = nctx; - - /* - * For each packet ... - */ - for (i = 0; i < count; i++) { - /* - * Get pre-nat and post nat packets - */ - pre_pak = dp_test_v4_pkt_from_desc(pre); - post_pak = dp_test_v4_pkt_from_desc(post); - test_exp = _dp_test_exp_from_desc(post_pak, post, NULL, 0, - false, file, line); - rte_pktmbuf_free(post_pak); - - if (count == 1) - spush(test_exp->description, - sizeof(test_exp->description), - "%s", descr); - else - spush(test_exp->description, - sizeof(test_exp->description), - "[%u] %s", i + 1, descr); - - - dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_FORWARDED); - - /* - * Setup the NAT context struct, 'nctx', from the ttype, pre - * pkt descriptor and post pkt descriptor. We also derive the - * NAT config from these. - */ - dp_test_nat_set_ctx(nctx, dir, ttype, pre, post, - i == 0 ? verify_sess : false); - - /* - * Setup the pkt validation callback function to - * dp_test_nat_validate_cb - */ - _dp_test_nat_set_validation(&nat_ctx, test_exp, file, line); - - /* Run the test */ - _dp_test_pak_receive(pre_pak, pre->rx_intf, test_exp, - file, __func__, line); - - if (count > 1 && delay) - sleep(delay); - } -} diff --git a/tests/whole_dp/src/dp_test_npf_nat_lib.h b/tests/whole_dp/src/dp_test_npf_nat_lib.h index 90a44583..fd444e6d 100644 --- a/tests/whole_dp/src/dp_test_npf_nat_lib.h +++ b/tests/whole_dp/src/dp_test_npf_nat_lib.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -34,16 +34,17 @@ struct dp_test_npf_nat_rule_t { const char *to_port; const char *trans_addr; const char *trans_port; + const char *port_alloc; }; -/* - * Enable/disable NAT debugging - * - * 1. Prints the npf string during _dp_test_npf_nat_add - * 2. Printd the json object for the nat rule during _dp_test_npf_nat_verify - */ -void dp_test_npf_nat_set_debug(bool on); -bool dp_test_npf_nat_get_debug(void); +/* Simple SNAT and DNAT config */ +void dpt_snat_cfg(const char *intf, uint8_t ipproto, + const char *from_addr, const char *trans_addr, + bool add); + +void dpt_dnat_cfg(const char *intf, uint8_t ipproto, + const char *to_addr, const char *trans_addr, + bool add); /* * Add a NAT rule @@ -154,42 +155,6 @@ enum dp_test_nat_dir { DP_TEST_NAT_DIR_BACK, }; -/* - * Wrapper around dp_test_pak_receive to create, send, and verify NAT'd - * packets. - * - * descr Description of the packet being sent - * pre pre-NAT packet descriptor - * post post-NAT packet descriptor - * dir Direction of the packet relative to the NAT session - * ttype SNAT or DNAT - * verify_sess Verify the NAT session exists during packet validation callback - * count Number of packets to send - * delay Delay in seconds between packets - * - * Note, the delay is for use when sending mutliple packets, however this - * should *only* be used in a private build, i.e. dont commit test code with a - * non-zero delay. - */ -void -_dp_test_npf_nat_pak_receive(const char *descr, - struct dp_test_pkt_desc_t *pre, - struct dp_test_pkt_desc_t *post, - enum dp_test_nat_dir dir, - enum dp_test_trans_type ttype, - bool verify_sess, - uint count, uint delay, - const char *file, int line); - -#define dp_test_npf_nat_pak_receive(descr, pre, post, dir, ttype, vs) \ - _dp_test_npf_nat_pak_receive(descr, pre, post, dir, ttype, \ - vs, 1, 0, __FILE__, __LINE__) - -#define dp_test_npf_nat_pak_receive_n(descr, pre, post, dir, ttype, vs, \ - count, dly) \ - _dp_test_npf_nat_pak_receive(descr, pre, post, dir, ttype, vs, \ - count, dly, __FILE__, __LINE__) - /* * NAT validation context. Expectation is as follows: * @@ -247,26 +212,10 @@ struct dp_test_nat_cb_ctx { validate_cb saved_cb; }; -void -dp_test_npf_nat_ctx_set_dnat(struct dp_test_nat_ctx *ctx); - -void -dp_test_npf_nat_ctx_set_snat(struct dp_test_nat_ctx *ctx); - -void -dp_test_npf_nat_ctx_set_dir(struct dp_test_nat_ctx *ctx, - enum dp_test_nat_dir dir); - -void -dp_test_npf_nat_ctx_set_oaddr(struct dp_test_nat_ctx *ctx, uint32_t oaddr); - void dp_test_npf_nat_ctx_set_oaddr_str(struct dp_test_nat_ctx *ctx, const char *oaddr_str); -void -dp_test_npf_nat_ctx_set_taddr(struct dp_test_nat_ctx *ctx, uint32_t taddr); - void dp_test_npf_nat_ctx_set_taddr_str(struct dp_test_nat_ctx *ctx, const char *taddr_str); @@ -307,12 +256,7 @@ _dp_test_nat_set_validation(struct dp_test_nat_cb_ctx *ctx, * should be called once the caller has finished with the object. */ json_object * -dp_test_npf_json_get_nat_rule(const char *ifname, const char *num, bool snat); - -/* - * Pretty print NAT firewall rules - */ -void -dp_test_npf_print_nat(const char *desc); +dp_test_npf_json_get_nat_rule(const char *real_ifname, + const char *rule, bool snat); #endif diff --git a/tests/whole_dp/src/dp_test_npf_nptv6.c b/tests/whole_dp/src/dp_test_npf_nptv6.c index 58c43f78..3d6cebcc 100644 --- a/tests/whole_dp/src/dp_test_npf_nptv6.c +++ b/tests/whole_dp/src/dp_test_npf_nptv6.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * @@ -18,13 +18,13 @@ #include "dp_test.h" #include "dp_test_controller.h" #include "dp_test_cmd_state.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" #include "dp_test_str.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_pkt.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_sess_lib.h" @@ -869,9 +869,8 @@ dp_test_npf_show_rules(const char *rstype) char cmd[TEST_MAX_CMD_LEN]; char *response; bool err; - int l = 0; - l += snprintf(cmd+l, sizeof(cmd)-l, "npf-op show all: %s", rstype); + snprintf(cmd, sizeof(cmd), "npf-op show all: %s", rstype); response = dp_test_console_request_w_err(cmd, &err, false); if (!response || err) { @@ -960,7 +959,7 @@ DP_START_TEST(nptv6_4, test) struct dp_test_pkt_desc_t pkt1_pre = { .text = "Internal to external, pre", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = int_addr, /* <--- Orig */ .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:DB8:1::2", @@ -979,7 +978,7 @@ DP_START_TEST(nptv6_4, test) struct dp_test_pkt_desc_t pkt1_post = { .text = "Internal to external, post", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = ext_addr, /* <--- Translated */ .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:DB8:1::2", @@ -998,7 +997,7 @@ DP_START_TEST(nptv6_4, test) struct dp_test_pkt_desc_t pkt2_pre = { .text = "External to internal, pre", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:DB8:1::2", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = ext_addr, /* Before nptv6 */ @@ -1017,7 +1016,7 @@ DP_START_TEST(nptv6_4, test) struct dp_test_pkt_desc_t pkt2_post = { .text = "External to internal, post", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:DB8:1::2", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = int_addr, /* After nptv6 */ @@ -1138,7 +1137,7 @@ DP_START_TEST(nptv6_5, test) struct dp_test_pkt_desc_t pkt1_pre = { .text = "Internal to external, pre", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "FD01:203:405:1::2", /* <--- Orig */ .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:DB8:1::2", @@ -1157,7 +1156,7 @@ DP_START_TEST(nptv6_5, test) struct dp_test_pkt_desc_t pkt1_post = { .text = "Internal to external, post", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:DB8:1:d550::2", /* <--- Translated */ .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:DB8:1::2", @@ -1176,7 +1175,7 @@ DP_START_TEST(nptv6_5, test) struct dp_test_pkt_desc_t pkt2_pre = { .text = "External to internal, pre", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:DB8:1::2", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "2001:DB8:1:d550::2", /* Before nptv6 */ @@ -1195,7 +1194,7 @@ DP_START_TEST(nptv6_5, test) struct dp_test_pkt_desc_t pkt2_post = { .text = "External to internal, post", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:DB8:1::2", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "FD01:203:405:1::2", /* After nptv6 */ @@ -1315,7 +1314,7 @@ DP_START_TEST(nptv6_6, test) struct dp_test_pkt_desc_t pkt1_pre = { .text = "Internal to external, pre", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "FD01:203:405::2", /* <--- Orig */ .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:DB8:1::2", @@ -1334,7 +1333,7 @@ DP_START_TEST(nptv6_6, test) struct dp_test_pkt_desc_t pkt1_post = { .text = "Internal to external, post", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:DB8:1:0:d54f::2", /* <--- Translated */ .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:DB8:1::2", @@ -1353,7 +1352,7 @@ DP_START_TEST(nptv6_6, test) struct dp_test_pkt_desc_t pkt2_pre = { .text = "External to internal, pre", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:DB8:1::2", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "2001:DB8:1:0:d54f::2", /* Before nptv6 */ @@ -1372,7 +1371,7 @@ DP_START_TEST(nptv6_6, test) struct dp_test_pkt_desc_t pkt2_post = { .text = "External to internal, post", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:DB8:1::2", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "FD01:203:405::2", /* After nptv6 */ @@ -1533,7 +1532,7 @@ DP_START_TEST(nptv6_9, test) struct dp_test_pkt_desc_t pkt1_pre = { .text = "Internal to external, pre", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "fd01:203:405:1:ffff:ffff:ffff:ffff", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:DB8:1::2", @@ -1584,7 +1583,7 @@ DP_START_TEST(nptv6_9, test) icmp6); dp_test_pktmbuf_eth_init(test_exp->exp_pak[0], "aa:bb:cc:dd:1:a1", dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); dp_test_exp_set_oif_name(test_exp, "dp1T0"); dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_FORWARDED); @@ -1656,7 +1655,7 @@ DP_START_TEST(nptv6_9b, test) struct dp_test_pkt_desc_t pkt1_pre = { .text = "Internal to external, pre", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "fd01:203:405:1:ffff:ffff:ffff:ffff", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:DB8:1::2", @@ -1707,7 +1706,7 @@ DP_START_TEST(nptv6_9b, test) icmp6); dp_test_pktmbuf_eth_init(test_exp->exp_pak[0], "aa:bb:cc:dd:1:a1", dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); dp_test_exp_set_oif_name(test_exp, "dp1T0"); dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_DROPPED); @@ -1800,7 +1799,7 @@ DP_START_TEST(nptv6_10, test) struct dp_test_pkt_desc_t pkt1_pre = { .text = "Internal to external, pre", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "fd01:203:405:1::2", /* <--- Orig */ .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:DB8:1::2", @@ -1819,7 +1818,7 @@ DP_START_TEST(nptv6_10, test) struct dp_test_pkt_desc_t pkt1_post = { .text = "Internal to external, post", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:DB8:1:0:d550::2", /* <--- Translated */ .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:DB8:1::2", @@ -1838,7 +1837,7 @@ DP_START_TEST(nptv6_10, test) struct dp_test_pkt_desc_t pkt2_pre = { .text = "External to internal, pre", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:DB8:1::2", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "2001:DB8:1:0:d550::2", /* Before nptv6 */ @@ -1857,7 +1856,7 @@ DP_START_TEST(nptv6_10, test) struct dp_test_pkt_desc_t pkt2_post = { .text = "External to internal, post", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:DB8:1::2", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "FD01:203:405:1::2", /* After nptv6 */ @@ -1912,7 +1911,7 @@ DP_START_TEST(nptv6_10, test) dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), pkt2_pre.l2_src, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); /* * ICMPv6 packet after translator @@ -1937,7 +1936,7 @@ DP_START_TEST(nptv6_10, test) dp_test_pktmbuf_eth_init(exp_pak, pkt2_post.l2_dst, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); test_exp = dp_test_exp_create_m(NULL, 1); dp_test_exp_set_pak_m(test_exp, 0, exp_pak); @@ -2031,7 +2030,7 @@ DP_START_TEST(nptv6_11, test) struct dp_test_pkt_desc_t pkt1_pre = { .text = "Internal to external, pre", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "fd01:203:405:1::2", /* <--- Orig */ .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:DB8:1::2", @@ -2050,7 +2049,7 @@ DP_START_TEST(nptv6_11, test) struct dp_test_pkt_desc_t pkt1_post = { .text = "Internal to external, post", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:DB8:1:0:d550::2", /* <--- Translated */ .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:DB8:1::2", @@ -2069,7 +2068,7 @@ DP_START_TEST(nptv6_11, test) struct dp_test_pkt_desc_t pkt2_pre = { .text = "External to internal, pre", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:DB8:1::2", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "2001:DB8:1:0:d550::2", /* Before nptv6 */ @@ -2088,7 +2087,7 @@ DP_START_TEST(nptv6_11, test) struct dp_test_pkt_desc_t pkt2_post = { .text = "External to internal, post", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:DB8:1::2", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "FD01:203:405:1::2", /* After nptv6 */ @@ -2142,7 +2141,7 @@ DP_START_TEST(nptv6_11, test) dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), pkt1_pre.l2_src, - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); /* * ICMPv6 packet after translator @@ -2167,7 +2166,7 @@ DP_START_TEST(nptv6_11, test) dp_test_pktmbuf_eth_init(exp_pak, pkt1_post.l2_dst, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); test_exp = dp_test_exp_create_m(NULL, 1); dp_test_exp_set_pak_m(test_exp, 0, exp_pak); @@ -2258,7 +2257,7 @@ DP_START_TEST(nptv6_12, test) struct dp_test_pkt_desc_t pkt1_pre = { .text = "Internal to external, pre", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "fd01:203:405:1:2aaf::", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:DB8:1::2", @@ -2309,7 +2308,7 @@ DP_START_TEST(nptv6_12, test) icmp6); dp_test_pktmbuf_eth_init(test_exp->exp_pak[0], "aa:bb:cc:dd:1:a1", dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); dp_test_exp_set_oif_name(test_exp, "dp1T0"); dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_FORWARDED); @@ -2381,7 +2380,7 @@ DP_START_TEST(nptv6_13, test) struct dp_test_pkt_desc_t pkt1_pre = { .text = "Internal to external, pre", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "fd01:203:405:1::2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:DB8:1::2", @@ -2432,7 +2431,7 @@ DP_START_TEST(nptv6_13, test) icmp6); dp_test_pktmbuf_eth_init(test_exp->exp_pak[0], "aa:bb:cc:dd:1:a1", dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); dp_test_exp_set_oif_name(test_exp, "dp1T0"); dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_FORWARDED); @@ -2512,7 +2511,7 @@ DP_START_TEST(nptv6_14, test) struct dp_test_pkt_desc_t pkt1_pre = { .text = "Internal to external, pre", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "FD01:203:405::2", /* <--- Orig */ .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:DB8:1::2", @@ -2531,7 +2530,7 @@ DP_START_TEST(nptv6_14, test) struct dp_test_pkt_desc_t pkt1_post = { .text = "Internal to external, post", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:DB8:1:0:d54f::2", /* <--- Translated */ .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2001:DB8:1::2", @@ -2550,7 +2549,7 @@ DP_START_TEST(nptv6_14, test) struct dp_test_pkt_desc_t pkt2_pre = { .text = "External to internal, pre", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:DB8:1::2", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "2001:DB8:1:0:d54f::2", /* Before nptv6 */ @@ -2569,7 +2568,7 @@ DP_START_TEST(nptv6_14, test) struct dp_test_pkt_desc_t pkt2_post = { .text = "External to internal, post", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:DB8:1::2", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "FD01:203:405::2", /* After nptv6 */ diff --git a/tests/whole_dp/src/dp_test_npf_portmap_lib.c b/tests/whole_dp/src/dp_test_npf_portmap_lib.c index 33976be5..50ca3eb5 100644 --- a/tests/whole_dp/src/dp_test_npf_portmap_lib.c +++ b/tests/whole_dp/src/dp_test_npf_portmap_lib.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -16,10 +16,10 @@ #include "main.h" #include "dp_test.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_lib.h" @@ -82,7 +82,7 @@ dp_test_npf_json_portmap_match(json_object *jobj, void *arg) if (vals->state) { if (!dp_test_json_string_field_from_obj(jobj, "state", &str)) return false; - if (strcmp(vals->state, str)) + if (strcmp(vals->state, str) != 0) return false; } @@ -119,66 +119,33 @@ dp_test_npf_json_get_portmap(const char *addr, const char *state) return jret; } -/* - * Returns true if the portmap "state" string is retrieved ok - */ -bool -dp_test_npf_json_get_portmap_state(const char *addr, char **state) -{ - json_object *jmap; - bool rv; - const char *str; - - jmap = dp_test_npf_json_get_portmap(addr, NULL); - if (!jmap) - return false; - - rv = dp_test_json_string_field_from_obj(jmap, "state", &str); - if (rv) - *state = strdup(str); - - json_object_put(jmap); - return rv; -} - -/* - * Returns true if the portmap "used" count is retrieved ok - */ -bool -dp_test_npf_json_get_portmap_used(const char *addr, uint *used) -{ - json_object *jmap; - bool rv; - - jmap = dp_test_npf_json_get_portmap(addr, NULL); - if (!jmap) - return false; - - rv = dp_test_json_int_field_from_obj(jmap, "used", (int *)used); - - json_object_put(jmap); - return rv; -} - /* * Returns true if the given port is in the portmap ports list. Only * considers "ACTIVE" portmaps. */ static bool -dp_test_npf_json_get_portmap_port(const char *addr, uint16_t port) +dp_test_npf_json_get_portmap_port(const char *prot, const char *addr, + uint16_t port) { + bool rv = false; json_object *jmap, *jarray; + struct dp_test_json_find_key key[] = { {"protocols", NULL}, + {"protocol", prot }, + {"ports", NULL } }; jmap = dp_test_npf_json_get_portmap(addr, "ACTIVE"); if (!jmap) return false; - /* Get ports array */ - if (!json_object_object_get_ex(jmap, "ports", &jarray)) + jarray = dp_test_json_find(jmap, key, ARRAY_SIZE(key)); + + if (!jarray) { + json_object_put(jmap); return false; + } if (json_object_get_type(jarray) != json_type_array) - return false; + goto cleanup; uint arraylen, i; json_object *jvalue; @@ -192,41 +159,65 @@ dp_test_npf_json_get_portmap_port(const char *addr, uint16_t port) continue; if (json_object_get_int(jvalue) == (int)port) { - json_object_put(jmap); - return true; + rv = true; + goto cleanup; } } +cleanup: + json_object_put(jarray); json_object_put(jmap); - return false; + return rv; } /* * Port-map is of the form: * * { - * "apm":{ - * "section_size":512, - * "hash_memory":696, - * "instances":[ - * { - * "npf_id":1, - * "portmaps":[ - * { - * "address":"172.0.2.1", - * "state":"ACTIVE", - * "used":1, - * "ports":[ - * 80 - * ] - * } - * ] - * } - * ] - * } + * "apm": { + * "section_size": 512, + * "hash_memory": 3304, + * "portmaps": [ + * { + * "address": "172.0.2.1", + * "state": "ACTIVE", + * "protocols": [ + * { + * "protocol": "tcp", + * "ports_used": 1, + * "ports": [ + * 80 + * ] + * }, + * { + * "protocol": "udp", + * "ports_used": 0 + * }, + * { + * "protocol": "other", + * "ports_used": 0 + * } + * ] + * } + * ], + * "protocols": [ + * { + * "protocol": "tcp", + * "mapping_count": 1 + * }, + * { + * "protocol": "udp", + * "mapping_count": 0 + * }, + * { + * "protocol": "other", + * "mapping_count": 0 + * } + * ] + * } * } - * */ + void dp_test_npf_print_portmap(void) { @@ -255,23 +246,36 @@ dp_test_npf_print_portmap(void) * Verify portmap state and/or used count */ void -_dp_test_npf_portmap_verify(const char *addr, const char *state, uint used, +_dp_test_npf_portmap_verify(const char *prot, const char *addr, + const char *state, uint used, const char *file, int line) { - json_object *jmap; + json_object *jmap, *jprot; bool rv; uint ival = 0; const char *sval = NULL; + struct dp_test_json_find_key key[] = { {"protocols", NULL}, + {"protocol", prot } }; jmap = dp_test_npf_json_get_portmap(addr, NULL); if (!jmap) _dp_test_fail(file, line, "\nFailed to get portmap for %s\n", addr); - rv = dp_test_json_int_field_from_obj(jmap, "used", (int *)&ival); + jprot = dp_test_json_find(jmap, key, ARRAY_SIZE(key)); + + if (!jprot) { + _dp_test_fail(file, line, + "\nFailed to get protocol info" + " for %s\n", addr); + json_object_put(jmap); + return; + } + + rv = dp_test_json_int_field_from_obj(jprot, "ports_used", (int *)&ival); if (!rv) _dp_test_fail(file, line, - "\nFailed to get portmap \"used\"" + "\nFailed to get portmap \"ports_used\"" " field for %s\n", addr); rv = dp_test_json_string_field_from_obj(jmap, "state", &sval); @@ -292,6 +296,7 @@ _dp_test_npf_portmap_verify(const char *addr, const char *state, uint used, " actual count %d\n", addr, used, ival); + json_object_put(jprot); json_object_put(jmap); } @@ -299,12 +304,13 @@ _dp_test_npf_portmap_verify(const char *addr, const char *state, uint used, * Verify portmap port */ void -_dp_test_npf_portmap_port_verify(const char *addr, uint16_t port, - bool expected, const char *file, int line) +_dp_test_npf_portmap_port_verify(const char *prot, const char *addr, + uint16_t port, bool expected, + const char *file, int line) { bool rv; - rv = dp_test_npf_json_get_portmap_port(addr, port); + rv = dp_test_npf_json_get_portmap_port(prot, addr, port); if (expected != rv) dp_test_npf_print_portmap(); diff --git a/tests/whole_dp/src/dp_test_npf_portmap_lib.h b/tests/whole_dp/src/dp_test_npf_portmap_lib.h index 68faadae..fc794805 100644 --- a/tests/whole_dp/src/dp_test_npf_portmap_lib.h +++ b/tests/whole_dp/src/dp_test_npf_portmap_lib.h @@ -1,4 +1,5 @@ /* + * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -16,18 +17,6 @@ #include "dp_test_npf_lib.h" -/* - * Returns true successfully if the portmap "state" string is retrieved ok - */ -bool -dp_test_npf_json_get_portmap_state(const char *addr, char **state); - -/* - * Returns true if the portmap "used" count as retrieved ok - */ -bool -dp_test_npf_json_get_portmap_used(const char *addr, uint *used); - void dp_test_npf_print_portmap(void); @@ -35,24 +24,27 @@ dp_test_npf_print_portmap(void); * Verify portmap state and/or used count */ void -_dp_test_npf_portmap_verify(const char *addr, const char *state, uint used, +_dp_test_npf_portmap_verify(const char *prot, const char *addr, + const char *state, uint used, const char *file, int line); -#define dp_test_npf_portmap_verify(addr, state, used) \ - _dp_test_npf_portmap_verify(addr, state, used, __FILE__, __LINE__) +#define dp_test_npf_portmap_verify(prot, addr, state, used) \ + _dp_test_npf_portmap_verify(prot, addr, state, used, __FILE__, __LINE__) /* * Verify portmap port */ void -_dp_test_npf_portmap_port_verify(const char *addr, uint16_t port, - bool expected, +_dp_test_npf_portmap_port_verify(const char *prot, const char *addr, + uint16_t port, bool expected, const char *file, int line); -#define dp_test_npf_portmap_port_verify(addr, port) \ - _dp_test_npf_portmap_port_verify(addr, port, true, __FILE__, __LINE__) +#define dp_test_npf_portmap_port_verify(prot, addr, port) \ + _dp_test_npf_portmap_port_verify(prot, addr, port, true, __FILE__, \ + __LINE__) -#define dp_test_npf_portmap_port_free_verify(addr, port) \ - _dp_test_npf_portmap_port_verify(addr, port, false, __FILE__, __LINE__) +#define dp_test_npf_portmap_port_free_verify(prot, addr, port) \ + _dp_test_npf_portmap_port_verify(prot, addr, port, false, __FILE__, \ + __LINE__) #endif /* __DP_TEST_NPF_PORTMAP_LIB_H__ */ diff --git a/tests/whole_dp/src/dp_test_npf_prot_group.c b/tests/whole_dp/src/dp_test_npf_prot_group.c index c1c8c46b..0b4f4102 100644 --- a/tests/whole_dp/src/dp_test_npf_prot_group.c +++ b/tests/whole_dp/src/dp_test_npf_prot_group.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * @@ -16,12 +16,12 @@ #include "dp_test.h" #include "dp_test_str.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" #include "dp_test_lib_pkt.h" -#include "dp_test_lib_intf.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_lib.h" @@ -156,7 +156,7 @@ DP_START_TEST(proto_grp_ipv4, group_proto_grp) struct dp_test_pkt_desc_t v4_icmp_pkt = { .text = "Prot-group IPv4 ICMP", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.11", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2.2.2.11", @@ -179,7 +179,7 @@ DP_START_TEST(proto_grp_ipv4, group_proto_grp) struct dp_test_pkt_desc_t v4_tcp_pkt = { .text = "Prot-group IPv4 TCP", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.11", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2.2.2.11", @@ -199,7 +199,7 @@ DP_START_TEST(proto_grp_ipv4, group_proto_grp) struct dp_test_pkt_desc_t v4_udp_pkt = { .text = "Prot-group IPv4 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.11", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2.2.2.11", @@ -218,7 +218,7 @@ DP_START_TEST(proto_grp_ipv4, group_proto_grp) struct dp_test_pkt_desc_t v4_raw_pkt = { .text = "Prot-group IPv4 raw", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.11", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2.2.2.11", @@ -337,7 +337,7 @@ DP_START_TEST(proto_grp_ipv6, group_proto_grp) struct dp_test_pkt_desc_t v6_icmp_pkt = { .text = "Prot-group IPv6 ICMP", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:1:1::2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2002:2:2::1", @@ -360,7 +360,7 @@ DP_START_TEST(proto_grp_ipv6, group_proto_grp) struct dp_test_pkt_desc_t v6_tcp_pkt = { .text = "Prot-group IPv6 TCP", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:1:1::2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2002:2:2::1", @@ -380,7 +380,7 @@ DP_START_TEST(proto_grp_ipv6, group_proto_grp) struct dp_test_pkt_desc_t v6_udp_pkt = { .text = "Prot-group IPv6 UDP", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:1:1::2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2002:2:2::1", @@ -399,7 +399,7 @@ DP_START_TEST(proto_grp_ipv6, group_proto_grp) struct dp_test_pkt_desc_t v6_raw_pkt = { .text = "Prot-group IPv6 raw", .len = 20, - .ether_type = ETHER_TYPE_IPv6, + .ether_type = RTE_ETHER_TYPE_IPV6, .l3_src = "2001:1:1::2", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2002:2:2::1", diff --git a/tests/whole_dp/src/dp_test_npf_ptree.c b/tests/whole_dp/src/dp_test_npf_ptree.c index 7ff88ca9..fcc1f9e9 100644 --- a/tests/whole_dp/src/dp_test_npf_ptree.c +++ b/tests/whole_dp/src/dp_test_npf_ptree.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -28,11 +28,11 @@ #include "dp_test.h" #include "dp_test_controller.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_npf_fw_lib.h" /* Forward reference */ @@ -267,6 +267,10 @@ ptree_string2key(const char *string, uint8_t *key, uint8_t *af, snprintf(s, sizeof(s), "%s", string); + *af = 0; + *mask = 0; + *key = 0; + if (strchr(s, '.')) { *af = AF_INET; *mask = 32; diff --git a/tests/whole_dp/src/dp_test_npf_qos.c b/tests/whole_dp/src/dp_test_npf_qos.c index 00feb032..0fde94e4 100644 --- a/tests/whole_dp/src/dp_test_npf_qos.c +++ b/tests/whole_dp/src/dp_test_npf_qos.c @@ -1,5 +1,5 @@ /** - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -19,12 +19,12 @@ #include "dp_test.h" #include "dp_test_str.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" #include "dp_test_lib_pkt.h" -#include "dp_test_lib_intf.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_lib.h" @@ -63,7 +63,7 @@ DP_START_TEST(qos_ipv4, test1) struct dp_test_pkt_desc_t v4_pkt_desc = { .text = "TCP IPv4", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "1.1.1.11", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "2.2.2.11", diff --git a/tests/whole_dp/src/dp_test_npf_rldb.c b/tests/whole_dp/src/dp_test_npf_rldb.c new file mode 100644 index 00000000..a66c8657 --- /dev/null +++ b/tests/whole_dp/src/dp_test_npf_rldb.c @@ -0,0 +1,353 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include "ip_funcs.h" +#include "ip6_funcs.h" +#include "in_cksum.h" +#include "if_var.h" +#include "main.h" + +#include "rldb.h" + +#include "dp_test.h" + +#define ANY_PROTO 0 + +static struct rldb_db_handle *dh4, *dh6; + +static int _add_rule(uint32_t rule_no, uint32_t prio, uint8_t proto, + const char *saddr, uint8_t smasklen, + const char *daddr, uint8_t dmasklen, + uint16_t sloport, uint16_t shiport, + uint16_t dloport, uint16_t dhiport, + struct rldb_rule_handle **rule_handle) +{ + + uint8_t abytes[16]; + struct rldb_rule_spec rule = { 0 }; + uint8_t af = AF_INET6; + char *colon; + struct rldb_db_handle *dh; + + if (!saddr || !daddr || rule_no == 0 || !rule_handle) + return -EINVAL; + + colon = strchr(saddr, ':'); + if (!colon) + af = AF_INET; + + switch (af) { + case AF_INET: + dh = dh4; + rule.rldb_flags |= NPFRL_FLAG_V4_PFX; + break; + case AF_INET6: + dh = dh6; + rule.rldb_flags |= NPFRL_FLAG_V6_PFX; + break; + default: + ck_assert_msg(false, "Unexpected AF"); + return -EAFNOSUPPORT; + } + + rule.rldb_priority = prio; + + if (proto) { + rule.rldb_flags |= NPFRL_FLAG_PROTO; + rule.rldb_proto.npfrl_proto = proto; + } + + /* src */ + rule.rldb_flags |= NPFRL_FLAG_SRC_PFX; + ck_assert(inet_pton(af, saddr, abytes) == 1); + + if (af == AF_INET6) { + struct rldb_v6_prefix *pfx = &rule.rldb_src_addr.v6_pfx; + memcpy(pfx->npfrl_bytes, abytes, + sizeof(pfx->npfrl_bytes)); + pfx->npfrl_plen = smasklen; + + } else if (af == AF_INET) { + struct rldb_v4_prefix *pfx = &rule.rldb_src_addr.v4_pfx; + memcpy(pfx->npfrl_bytes, (void *)abytes, + sizeof(pfx->npfrl_bytes)); + pfx->npfrl_plen = smasklen; + } else { + ck_assert_msg(false, "Unexpected AF"); + return -EAFNOSUPPORT; + } + + /* dst */ + rule.rldb_flags |= NPFRL_FLAG_DST_PFX; + ck_assert(inet_pton(af, daddr, abytes) == 1); + + if (af == AF_INET6) { + struct rldb_v6_prefix *pfx = &rule.rldb_dst_addr.v6_pfx; + memcpy(pfx->npfrl_bytes, abytes, + sizeof(pfx->npfrl_bytes)); + pfx->npfrl_plen = dmasklen; + } else if (af == AF_INET) { + struct rldb_v4_prefix *pfx = &rule.rldb_dst_addr.v4_pfx; + memcpy(pfx->npfrl_bytes, (void *)abytes, + sizeof(pfx->npfrl_bytes)); + pfx->npfrl_plen = dmasklen; + } else { + ck_assert_msg(false, "Unexpected AF"); + return -EAFNOSUPPORT; + } + + if (sloport || shiport) { + rule.rldb_flags |= NPFRL_FLAG_SRC_PORT_RANGE; + rule.rldb_src_port_range.npfrl_loport = sloport; + rule.rldb_src_port_range.npfrl_hiport = shiport; + } + + if (dloport || dhiport) { + rule.rldb_flags |= NPFRL_FLAG_DST_PORT_RANGE; + rule.rldb_dst_port_range.npfrl_loport = dloport; + rule.rldb_dst_port_range.npfrl_hiport = dhiport; + } + + return rldb_add_rule(dh, rule_no, &rule, rule_handle); +} + +static int match_packet4(const char *saddr_pkt, const char *daddr_pkt, + uint16_t sport_pkt, uint16_t dport_pkt) +{ + const int len = 22; + struct rldb_result results[1]; + struct rte_mbuf *pkt; + struct rte_mbuf *m[1]; + int rc; + + pkt = + dp_test_create_udp_ipv4_pak(saddr_pkt, daddr_pkt, sport_pkt, + dport_pkt, 1, &len); + m[0] = pkt; + rc = rldb_match(dh4, m, 1, results); + rte_pktmbuf_free(pkt); + if (rc && rc != -ENOENT) + return rc; + + return results[0].rldb_rule_no; + +} + +static int match_packet6(const char *saddr_pkt, const char *daddr_pkt, + uint16_t sport_pkt, uint16_t dport_pkt) +{ + const int len = 22; + struct rldb_result results[1]; + struct rte_mbuf *pkt; + struct rte_mbuf *m[1]; + int rc; + + pkt = + dp_test_create_udp_ipv6_pak(saddr_pkt, daddr_pkt, sport_pkt, + dport_pkt, 1, &len); + m[0] = pkt; + rc = rldb_match(dh6, m, 1, results); + rte_pktmbuf_free(pkt); + if (rc && rc != -ENOENT) + return rc; + + return results[0].rldb_rule_no; + +} + +static int match_packet_tcp4(const char *saddr_pkt, const char *daddr_pkt, + uint16_t sport_pkt, uint16_t dport_pkt) +{ + int rc; + const int len = 22; + struct rldb_result results; + struct rte_mbuf *pkt; + struct rte_mbuf *m[1]; + + pkt = + dp_test_create_tcp_ipv4_pak(saddr_pkt, daddr_pkt, sport_pkt, + dport_pkt, 0, 1, 0, 0, NULL, 1, &len); + m[0] = pkt; + rc = rldb_match(dh4, m, 1, &results); + rte_pktmbuf_free(pkt); + return rc; +} + +static int add_rule(uint32_t rule_no, uint32_t prio, uint8_t proto, + const char *saddr, uint8_t smasklen, + const char *daddr, uint8_t dmasklen, + uint16_t sloport, uint16_t shiport, + uint16_t dloport, uint16_t dhiport) +{ + struct rldb_rule_handle *rule_handle; + int rc; + + char *colon; + struct rldb_db_handle *dh = dh6; + + colon = strchr(saddr, ':'); + if (!colon) + dh = dh4; + + rc = rldb_start_transaction(dh); + ck_assert_msg(rc == 0, "Failed to start transaction"); + + rc = _add_rule(rule_no, prio, proto, + saddr, smasklen, + daddr, dmasklen, + sloport, shiport, dloport, dhiport, &rule_handle); + ck_assert_msg(rc == 0, "Failed to add IPv4 rule"); + + rc = rldb_commit_transaction(dh); + ck_assert_msg(rc == 0, "Failed to commit transaction"); + + return rc; +} + +static void rldb_setup(void) +{ + int rc; + + rc = rldb_create("test4", NPFRL_FLAG_V4_PFX, &dh4); + ck_assert(rc == 0); + + rc = rldb_create("test6", NPFRL_FLAG_V6_PFX, &dh6); + ck_assert(rc == 0); +} + +static void rldb_teardown(void) +{ + rldb_destroy(dh4); + dh4 = NULL; + rldb_destroy(dh6); + dh6 = NULL; +} + +DP_DECL_TEST_SUITE(rldb_suite); +DP_DECL_TEST_CASE(rldb_suite, rldb_rule, rldb_setup, rldb_teardown); + +DP_START_TEST(rldb_rule, delete) +{ + int rc; + int rule_no = 42; + struct rldb_rule_handle *rule_handle, *rule_handle_unused; + struct rldb_db_handle *dh = dh4; + + rc = rldb_start_transaction(dh); + ck_assert_msg(rc == 0, "Failed to start transaction"); + + for (int i = 0; i < 100; i++) + _add_rule(1000 + i, 123, 0, "41.0.0.0", 24, "30.0.0.0", 24, 0, + 0, 0, 0, &rule_handle_unused); + + rc = _add_rule(rule_no, 123, 0, "40.0.0.0", 24, "30.0.0.0", 24, 0, 0, 0, + 0, &rule_handle); + ck_assert_msg(rc == 0, "Failed to add IPv4 rule"); + + for (int i = 0; i < 100; i++) + _add_rule(2000 + i, 123, 0, "42.0.0.0", 24, "30.0.0.0", 24, 0, + 0, 0, 0, &rule_handle_unused); + + rc = rldb_commit_transaction(dh); + ck_assert_msg(rc == 0, "Failed to commit transaction"); + + ck_assert_msg(match_packet4("40.0.0.0", "30.0.0.0", 8888, 8888) == + rule_no, "Verify rule installation by matching"); + + printf("BEFORE DELETE:\n"); + rte_acl_list_dump(); + printf("\n"); + + rc = rldb_start_transaction(dh); + ck_assert_msg(rc == 0, "Failed to start transaction #2"); + + rc = rldb_del_rule(dh, rule_handle); + ck_assert_msg(rc == 0, "Failed delete rule"); + + rc = rldb_commit_transaction(dh); + ck_assert_msg(rc == 0, "Failed to commit transaction #2"); + + + printf("AFTER DELETE:\n"); + rte_acl_list_dump(); + printf("\n"); + + ck_assert_msg(match_packet4("40.0.0.0", "30.0.0.0", 8888, 8888) != + rule_no, "Verify rule removal by negative matching"); +} DP_END_TEST; + +DP_START_TEST(rldb_rule, match_ipv6) +{ + add_rule(6, 1000, ANY_PROTO, "30::0", 24, "40::0", 24, 0, 0, 0, 0); + ck_assert_msg(match_packet6("30::1", "40::1", 8888, 8888) == 6, + "Addresses-only policy"); + ck_assert_msg(match_packet6("30::1:1", "40::0:1", 8888, 8888) != 0, + "Negative addresses-only policy"); + ck_assert_msg(match_packet6("30::0:1", "40::1:1", 8888, 8888) != 0, + "Negative addresses-only policy"); +} DP_END_TEST; + +DP_START_TEST(rldb_rule, match_ipv4) +{ + add_rule(6, 1000, ANY_PROTO, "30.0.0.0", 24, "40.0.0.0", 24, 0, 0, 0, + 0); + ck_assert_msg(match_packet4("30.0.0.1", "40.0.0.1", 8888, 8888) == 6, + "Addresses-only policy"); + ck_assert_msg(match_packet4("30.0.1.1", "40.0.0.1", 8888, 8888) != 0, + "Negative addresses-only policy"); + ck_assert_msg(match_packet4("30.0.0.1", "40.0.1.1", 8888, 8888) != 0, + "Negative addresses-only policy"); + add_rule(7, 1000, IPPROTO_UDP, "31.0.0.0", 24, "41.0.0.0", 24, 0, 0, 0, + 0); + ck_assert_msg(match_packet4("31.0.0.1", "41.0.0.1", 8888, 8888) == 7, + "Protocol-only policy"); + ck_assert_msg(match_packet_tcp4("31.0.0.1", "41.0.0.1", 8888, 8888) != + 0, "Negative protocol-only policy"); + + add_rule(8, 1000, ANY_PROTO, "32.0.0.1", 32, "42.2.0.1", 32, 0, 0, 0, + 0); + ck_assert_msg(match_packet4("32.0.0.1", "42.2.0.1", 8888, 8888) == 8, + "Host-to-host policy"); + ck_assert_msg(match_packet4("32.0.0.2", "42.2.0.1", 8888, 8888) != 0, + "Negative host-policy source"); + ck_assert_msg(match_packet4("32.0.0.1", "42.2.0.2", 8888, 8888) != 0, + "Negative host-policy destination"); + + add_rule(9, 1000, ANY_PROTO, "33.0.0.0", 24, "0.0.0.0", 0, 0, 0, 0, 0); + ck_assert_msg(match_packet4("33.0.0.1", "123.0.0.1", 8888, 8888) == 9, + "Anycast destination"); + ck_assert_msg(match_packet4("33.3.3.1", "123.0.0.1", 8888, 8888) != 0, + "Negative anycast destination"); + + add_rule(10, 1000, IPPROTO_UDP, "33.0.1.0", 24, "0.0.0.0", 0, 8888, + 8888, 8888, 8888); + ck_assert_msg(match_packet4("33.0.1.1", "123.0.0.1", 8888, 8888) == 10, + "Anycast destination & port"); + ck_assert_msg(match_packet4("33.0.1.1", "123.0.0.1", 8887, 8888) != 0, + "Negative anycast destination & port"); + + add_rule(11, 1000, IPPROTO_UDP, "34.0.0.0", 24, "44.0.0.0", 24, 8888, + 8888, 8888, 8888); + ck_assert_msg(match_packet4("34.0.0.1", "44.0.0.1", 8888, 8888) == 11, + "Protocol & port"); + ck_assert_msg(match_packet4("34.0.0.1", "44.0.0.1", 40, 40) != 0, + "Negative protocol & port"); + + add_rule(1, 1, ANY_PROTO, "0.0.0.0", 0, "0.0.0.0", 0, 0, 0, 0, 0); + ck_assert_msg(match_packet4("34.0.0.1", "44.0.0.1", 40, 40) == 1, + "Catch all rule"); +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_npf_ruleset_state.c b/tests/whole_dp/src/dp_test_npf_ruleset_state.c index f4ccc049..28beb613 100644 --- a/tests/whole_dp/src/dp_test_npf_ruleset_state.c +++ b/tests/whole_dp/src/dp_test_npf_ruleset_state.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2021, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * @@ -17,12 +17,12 @@ #include "dp_test.h" #include "dp_test_str.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_pkt.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_lib.h" @@ -43,13 +43,13 @@ dpt_fw(const char *if_name, bool in, const char *fw_name, bool add) .rule = "10", .pass = PASS, .stateful = STATELESS, - .npf = "proto=17 dst-port=48879" + .npf = "proto-final=17 dst-port=48879" }, { .rule = "20", .pass = BLOCK, .stateful = STATELESS, - .npf = "proto=17 dst-port=48878" + .npf = "proto-final=17 dst-port=48878" }, RULE_DEF_BLOCK, NULL_RULE @@ -83,6 +83,7 @@ dpt_dnat(const char *if_name, const char *rule, bool add) .ifname = if_name, .proto = IPPROTO_UDP, .map = "dynamic", + .port_alloc = NULL, .from_addr = NULL, .from_port = NULL, .to_addr = "2.2.2.12", @@ -109,6 +110,7 @@ dpt_snat(const char *if_name, const char *rule, bool add) .ifname = if_name, .proto = IPPROTO_UDP, .map = "dynamic", + .port_alloc = NULL, .from_addr = "1.1.1.11", .from_port = NULL, .to_addr = NULL, @@ -168,6 +170,124 @@ dpt_nat64(const char *if_name, bool add) dp_test_npf_cmd_fmt(false, "npf-ut commit"); } +static void +dpt_zone(bool add) +{ + if (add) { + dp_test_npf_cmd_fmt( + false, + "npf-ut zone add ZONE1"); + dp_test_npf_cmd_fmt( + false, + "npf-ut zone intf add ZONE1 dpT10"); + dp_test_npf_cmd_fmt( + false, + "npf-ut zone intf add ZONE1 dpT11"); + + dp_test_npf_cmd_fmt( + false, + "npf-ut zone add ZONE2"); + dp_test_npf_cmd_fmt( + false, + "npf-ut zone intf add ZONE2 dpT12"); + + dp_test_npf_cmd_fmt( + false, + "npf-ut zone add ZONE3"); + dp_test_npf_cmd_fmt( + false, + "npf-ut zone intf add ZONE3 dpT13"); + + dp_test_npf_cmd_fmt( + false, + "npf-ut zone policy add ZONE1 ZONE2"); + dp_test_npf_cmd_fmt( + false, + "npf-ut add fw:ZFW1 1 action=accept"); + dp_test_npf_cmd_fmt( + false, + "npf-ut attach zone:ZONE1>ZONE2 zone fw:ZFW1"); + + dp_test_npf_cmd_fmt( + false, + "npf-ut zone policy add ZONE1 ZONE3"); + dp_test_npf_cmd_fmt( + false, + "npf-ut add fw:ZFW3 1 action=accept"); + dp_test_npf_cmd_fmt( + false, + "npf-ut attach zone:ZONE1>ZONE3 zone fw:ZFW3"); + + dp_test_npf_cmd_fmt( + false, + "npf-ut zone policy add ZONE2 ZONE1"); + dp_test_npf_cmd_fmt( + false, + "npf-ut add fw:ZFW2 10 action=accept"); + dp_test_npf_cmd_fmt( + false, + "npf-ut attach zone:ZONE2>ZONE1 zone fw:ZFW2"); + + dp_test_npf_cmd_fmt(false, "npf-ut commit"); + } else { + dp_test_npf_cmd_fmt( + false, + "npf-ut detach zone:ZONE1>ZONE2 zone fw:ZFW1"); + dp_test_npf_cmd_fmt( + false, + "npf-ut delete fw:ZFW1"); + dp_test_npf_cmd_fmt( + false, + "npf-ut zone policy remove ZONE1 ZONE2"); + + dp_test_npf_cmd_fmt( + false, + "npf-ut detach zone:ZONE1>ZONE3 zone fw:ZFW3"); + dp_test_npf_cmd_fmt( + false, + "npf-ut delete fw:ZFW3"); + dp_test_npf_cmd_fmt( + false, + "npf-ut zone policy remove ZONE1 ZONE3"); + + dp_test_npf_cmd_fmt( + false, + "npf-ut detach zone:ZONE2>ZONE1 zone fw:ZFW2"); + dp_test_npf_cmd_fmt( + false, + "npf-ut delete fw:ZFW2"); + dp_test_npf_cmd_fmt( + false, + "npf-ut zone policy remove ZONE2 ZONE1"); + + dp_test_npf_cmd_fmt( + false, + "npf-ut zone intf remove ZONE1 dpT10"); + dp_test_npf_cmd_fmt( + false, + "npf-ut zone intf remove ZONE1 dpT11"); + dp_test_npf_cmd_fmt( + false, + "npf-ut zone remove ZONE1"); + + dp_test_npf_cmd_fmt( + false, + "npf-ut zone intf remove ZONE2 dpT12"); + dp_test_npf_cmd_fmt( + false, + "npf-ut zone remove ZONE2"); + + dp_test_npf_cmd_fmt( + false, + "npf-ut zone intf remove ZONE3 dpT13"); + dp_test_npf_cmd_fmt( + false, + "npf-ut zone remove ZONE3"); + + dp_test_npf_cmd_fmt(false, "npf-ut commit"); + } +} + static void dpt_show_rulesets(const char *rs_name, const char *if_name) { @@ -176,7 +296,6 @@ dpt_show_rulesets(const char *rs_name, const char *if_name) char intf_str[50]; char *response; bool err; - int l = 0; if (if_name) { char real_if[IFNAMSIZ]; @@ -185,8 +304,8 @@ dpt_show_rulesets(const char *rs_name, const char *if_name) } else snprintf(intf_str, sizeof(intf_str), "all:"); - l += snprintf(cmd+l, sizeof(cmd)-l, "npf-op show %s %s", - intf_str, rs_name); + snprintf(cmd, sizeof(cmd), "npf-op show %s %s", + intf_str, rs_name); response = dp_test_console_request_w_err(cmd, &err, false); if (!response || err) { @@ -231,6 +350,8 @@ dpt_show_ruleset_state(const char *rs_name, const char *if_name) l += snprintf(cmd+l, sizeof(cmd)-l, "npf-op state %s %s", intf_str, rs_name); + (void) l; + response = dp_test_console_request_w_err(cmd, &err, false); if (!response || err) { dp_test_fail("no response from dataplane"); @@ -535,3 +656,109 @@ DP_START_TEST(npf_get_state3, test1) dp_test_nl_del_ip_addr_and_connected("dp1T2", "3.3.3.3/24"); } DP_END_TEST; + +/* + * make -j4 dataplane_test_run CK_RUN_CASE=npf_get_state4 + */ +static void dpt_get_state4_verify_zone(void) +{ + json_object *jexp; + char cmd_str[30]; + + snprintf(cmd_str, sizeof(cmd_str), + "npf-op state all: zone"); + + jexp = dp_test_json_create( + "{" + " \"zone\":[" + " {" + " \"input-zone-name\":\"ZONE2\"," + " \"to\":[" + " {" + " \"output-zone-name\":\"ZONE1\"," + " \"name\":[" + " {" + " \"group-name\":\"ZFW2\"," + " \"rule\":[" + " {" + " \"rule-number\":10," + " \"bytes\":0," + " \"packets\":0" + " }" + " ]" + " }" + " ]" + " }" + " ]" + " }," + " {" + " \"input-zone-name\":\"ZONE1\"," + " \"to\":[" + " {" + " \"output-zone-name\":\"ZONE2\"," + " \"name\":[" + " {" + " \"group-name\":\"ZFW1\"," + " \"rule\":[" + " {" + " \"rule-number\":1," + " \"bytes\":0," + " \"packets\":0" + " }" + " ]" + " }" + " ]" + " }," + " {" + " \"output-zone-name\":\"ZONE3\"," + " \"name\":[" + " {" + " \"group-name\":\"ZFW3\"," + " \"rule\":[" + " {" + " \"rule-number\":1," + " \"bytes\":0," + " \"packets\":0" + " }" + " ]" + " }" + " ]" + " }" + " ]" + " }" + " ]" + "}" + ); + + dp_test_check_json_poll_state(cmd_str, jexp, + DP_TEST_JSON_CHECK_SUBSET, + false, 0); + json_object_put(jexp); +} + +DP_DECL_TEST_CASE(npf_ruleset_state, npf_get_state4, NULL, NULL); +DP_START_TEST(npf_get_state4, test1) +{ + + /* Setup interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp1T1", "2.2.2.2/24"); + dp_test_nl_add_ip_addr_and_connected("dp1T2", "3.3.3.3/24"); + + dpt_zone(true); + + if (0) + dpt_show_rulesets("zone", NULL); + + if (0) + dpt_show_ruleset_state("zone", NULL); + + dpt_get_state4_verify_zone(); + + dpt_zone(false); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp1T1", "2.2.2.2/24"); + dp_test_nl_del_ip_addr_and_connected("dp1T2", "3.3.3.3/24"); + +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_npf_sess_lib.c b/tests/whole_dp/src/dp_test_npf_sess_lib.c index fdcb6e9b..ec8c81d3 100644 --- a/tests/whole_dp/src/dp_test_npf_sess_lib.c +++ b/tests/whole_dp/src/dp_test_npf_sess_lib.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -19,16 +19,17 @@ #include "session/session.h" #include "dp_test.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test/dp_test_cmd_check.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_lib.h" #include "dp_test_npf_nat_lib.h" #include "dp_test_npf_sess_lib.h" -#include "dp_test_session_lib.h" +#include "dp_test_session_internal_lib.h" /* * Parameters required to identify a session @@ -53,27 +54,29 @@ struct dp_test_npf_json_session_match_t { * Verify the npf global session count */ void -_dp_test_npf_session_count_verify(uint exp_count, bool warn, - const char *file, int line) +_dp_test_session_count_verify(uint exp_count, bool warn, + const char *file, const char *func, int line) { uint count = 0; bool rv; rv = dp_test_npf_session_count(&count); - _dp_test_fail_unless(rv, file, line, "Failed to get session count\n"); + _dp_test_fail_unless(rv, file, line, + "Failed to get session count (%s)\n", func); if (count != exp_count) { char str[80]; snprintf(str, sizeof(str), - "FW session count expected %d, actual %d", - exp_count, count); + "FW session count expected %d, actual %d (%s)", + exp_count, count, func); if (warn) - printf("\nWarning: %s %d %s\n", file, line, str); + printf("\nWarning: %s %s %d %s\n", + file, func, line, str); else - _dp_test_fail(file, line, "\n%s\n", str); + _dp_test_fail(file, line, "\n%s (%s)\n", str, func); } } @@ -110,8 +113,8 @@ _dp_test_npf_tcp_session_count_verify(uint exp_count, bool warn, * Verify the npf global UDP session count */ void -_dp_test_npf_udp_session_count_verify(uint exp_count, bool warn, - const char *file, int line) +_dp_test_session_udp_count_verify(uint exp_count, bool warn, + const char *file, int line) { uint count = 0; bool rv; @@ -177,7 +180,7 @@ dp_test_npf_expire_sessions(void) * Clear all npf sessions */ void -dp_test_npf_clear_sessions(void) +dp_test_sessions_clear(void) { /* @@ -265,7 +268,7 @@ dp_test_npf_session_state_from_json(json_object *jobj) * @return true if found **/ bool -_dp_test_npf_session_verify(char *desc, +_dp_test_session_verify(char *desc, const char *saddr, uint16_t src_id, const char *daddr, uint16_t dst_id, uint8_t proto, const char *intf, @@ -297,6 +300,8 @@ _dp_test_npf_session_verify(char *desc, " Src [%s, %d] Dst [%s, %d] proto %d %s", saddr, src_id, daddr, dst_id, proto, intf); + (void) l; + if (!ok) { printf("not ok\n"); spush(err_str, sizeof(err_str), @@ -321,6 +326,155 @@ _dp_test_npf_session_verify(char *desc, return false; } +struct dp_test_npf_poll_cmd { + int poll_cnt; + bool result; + json_object *response; + /* fields to match on */ + const char *saddr; + uint16_t src_id; + const char *daddr; + uint16_t dst_id; + uint8_t proto; + const char *intf; + uint32_t exp_flags; + uint32_t flags_mask; + int pkts_in; + int bytes_in; + int pkts_out; + int bytes_out; +}; + +static int +_dp_test_npf_session_verify_count_internal(zloop_t *loop, int poller, + void *arg) +{ + struct dp_test_npf_poll_cmd *cmd = arg; + char real_ifname[IFNAMSIZ]; + json_object *jobj; + json_object *counter_obj; + unsigned int index = 0; + int found_pkts_in; + int found_bytes_in; + int found_pkts_out; + int found_bytes_out; + + --(cmd->poll_cnt); + + dp_test_intf_real(cmd->intf, real_ifname); + + jobj = dp_test_npf_json_get_session(cmd->saddr, cmd->src_id, + cmd->daddr, cmd->dst_id, + cmd->proto, real_ifname, + cmd->exp_flags, cmd->flags_mask, + &index); + + if (cmd->response) + json_object_put(cmd->response); + cmd->response = jobj; + if (jobj) { + if (!json_object_object_get_ex(jobj, "counters", &counter_obj)) + goto done; + + if (!dp_test_json_int_field_from_obj(counter_obj, "packets_in", + &found_pkts_in)) + goto done; + if (!dp_test_json_int_field_from_obj(counter_obj, "bytes_in", + &found_bytes_in)) + goto done; + if (!dp_test_json_int_field_from_obj(counter_obj, "packets_out", + &found_pkts_out)) + goto done; + if (!dp_test_json_int_field_from_obj(counter_obj, "bytes_out", + &found_bytes_out)) + goto done; + /* We have values for all of them */ + if (cmd->pkts_in == found_pkts_in && + cmd->bytes_in == found_bytes_in && + cmd->pkts_out == found_pkts_out && + cmd->bytes_out == found_bytes_out) { + cmd->result = true; + return -1; + } + } +done: + cmd->result = false; + if (cmd->poll_cnt == 0) + return -1; + return 0; +} + +/* + * Verify the presence/absence of an npf session. the counts must match as + * well as the values identifying the session. Poll for a matching session + * for the standard poll delay and record a test failure if not found. + * + * @param desc [in] Optional text to be prepended to any error message + * @param saddr [in] Source address string + * @param src_id [in] Source ID in host order (TCP port, ICMP id) + * @param daddr [in] Dest address string + * @param dst_id [in] Dest ID in host order (TCP port, ICMP id) + * @param proto [in] IP protocol + * @param intf [in] Interface string, e.g. "dp2T1" + * @param exp_flags [in] Expected flags, e.g. SE_ACTIVE | SE_PASS + * @param flags_mask [in] Flags mask, e.g. SE_FLAGS_MASK + * @param pkts_in [in] expected count + * @param bytes_in [in] expected count + * @param pkts_out [in] expected count + * @param bytes_out [in] expected count + * + * @return true if found + */ +void +_dp_test_session_verify_count(char *desc, + const char *saddr, uint16_t src_id, + const char *daddr, uint16_t dst_id, + uint8_t proto, const char *intf, + uint32_t exp_flags, + uint32_t flags_mask, + int pkts_in, int bytes_in, + int pkts_out, + int bytes_out, const char *file, + int line) +{ + zloop_t *loop = zloop_new(); + int timer; + struct dp_test_npf_poll_cmd cmd = { + .poll_cnt = DP_TEST_POLL_COUNT, + .saddr = saddr, + .src_id = src_id, + .daddr = daddr, + .dst_id = dst_id, + .proto = proto, + .intf = intf, + .exp_flags = exp_flags, + .flags_mask = flags_mask, + .pkts_in = pkts_in, + .bytes_in = bytes_in, + .pkts_out = pkts_out, + .bytes_out = bytes_out, + }; + const char *str; + + timer = zloop_timer(loop, dp_test_wait_sec, 0, + _dp_test_npf_session_verify_count_internal, &cmd); + dp_test_assert_internal(timer >= 0); + + zloop_start(loop); + zloop_destroy(&loop); + + if (cmd.result) { + json_object_put(cmd.response); + return; + } + + str = json_object_to_json_string_ext(cmd.response, + JSON_C_TO_STRING_PRETTY); + _dp_test_fail(file, line, "Did not find the expected counts:\n%s\n", + str ? str : ""); +} + + /** * Verify the presence/absence of an npf session. The 5-tuple is derived from * a packet descriptor. @@ -345,7 +499,7 @@ _dp_test_npf_session_verify_desc(char *text, struct dp_test_pkt_desc_t *pkt, dp_test_npf_extract_ids_from_pkt_desc(pkt, &src_id, &dst_id); - return _dp_test_npf_session_verify(text, + return _dp_test_session_verify(text, pkt->l3_src, src_id, pkt->l3_dst, dst_id, pkt->proto, intf, @@ -436,6 +590,7 @@ _dp_test_npf_nat_session_verify(char *desc, l += spush(sess_str+l, sizeof(sess_str)-l, " proto %d %s", proto, intf); l += spush(sess_str+l, sizeof(sess_str)-l, " ttype %d", trans_type); + (void) l; if (!ok) { spush(err_str, sizeof(err_str), @@ -626,8 +781,6 @@ dp_test_npf_sess_match_flags(json_object *jnew, json_object *jcur, uint32_t flags_new = dp_test_npf_session_flags_from_json(jnew); uint32_t flags_cur = dp_test_npf_session_flags_from_json(jcur); - return true; - /* flags preference, most preferred to least preferred */ uint32_t pref[] = { SE_ACTIVE, @@ -686,6 +839,14 @@ typedef bool (*dp_test_npf_json_session_cb)(json_object *jvalue, void *arg); static uint first_session_id; static uint first_nat_session_id; +/* Reset dataplane session ID to 0 */ +void dp_test_npf_reset_session_id(void) +{ + dp_test_session_reset_session_id(); + first_session_id = 0; + first_nat_session_id = 0; +} + json_object * dp_test_npf_json_fw_session_iterate(dp_test_npf_json_session_cb cb, void *arg, unsigned int *index) @@ -1273,9 +1434,9 @@ const char * dp_test_npf_sess_state_str(uint8_t proto, uint state) { if (proto == IPPROTO_TCP) - return npf_state_get_state_name(state, NPF_PROTO_IDX_TCP); + return npf_state_get_tcp_name(state); - return npf_state_get_state_name(state, NPF_PROTO_IDX_OTHER); + return dp_session_state_name(state, false); } void @@ -1342,3 +1503,190 @@ dp_test_npf_print_session_table(bool nat) json_object_put(jobj); } + +/* + * Return counters for one session. Session filter should be fully specified, + * e.g. + * + * uint32_t pkts_in = 0, pkts_out = 0; + * uint32_t bytes_in = 0, bytes_out = 0; + * uint32_t sess_id = 0; + * + * dp_test_session_counters("start 0 count 1 " + * "src-addr 192.0.2.103 src-port 10000 " + * "dst-addr 203.0.113.203 dst-port 60000 " + * "proto 17 dir out intf dpT21", + * &pkts_in, &pkts_out, &bytes_in, &bytes_out, + * &sess_id); + */ +int dp_test_session_counters(const char *options, + uint32_t *pkts_in, uint32_t *pkts_out, + uint32_t *bytes_in, uint32_t *bytes_out, + uint32_t *sess_id) +{ + json_object *jresp; + const char *str; + char *response; + bool err; + char cmd[1000]; + int l; + int rc = 0; + + l = snprintf(cmd, sizeof(cmd), "session-op show dataplane sessions"); + if (options) + l += snprintf(cmd+l, sizeof(cmd)-l, " %s", options); + (void) l; + + response = dp_test_console_request_w_err(cmd, &err, false); + if (!response || err) + return -1; + + jresp = parse_json(response, parse_err_str, sizeof(parse_err_str)); + free(response); + if (!jresp) + return -1; + + /* For debug */ + if (0) { + str = json_object_to_json_string_ext(jresp, + JSON_C_TO_STRING_PRETTY); + if (str) + printf("%s\n", str); + } + + json_object *jarray; + struct dp_test_json_find_key ip_keys[] = { + {"sessions", NULL} + }; + + jarray = dp_test_json_find(jresp, ip_keys, ARRAY_SIZE(ip_keys)); + if (!jarray) { + json_object_put(jresp); + return -1; + } + + int len = json_object_array_length(jarray); + json_object *jobj, *counters_json; + + if (len > 1) { + rc = -1; + goto cleanup; + } + + jobj = json_object_array_get_idx(jarray, 0); + + if (!json_object_object_get_ex(jobj, "counters", &counters_json)) { + rc = -1; + goto cleanup; + } + + if (!dp_test_json_int_field_from_obj(counters_json, + "packets_in", (int *)pkts_in)) { + rc = -1; + goto cleanup; + } + + if (!dp_test_json_int_field_from_obj(counters_json, + "packets_out", (int *)pkts_out)) { + rc = -1; + goto cleanup; + } + + if (!dp_test_json_int_field_from_obj(counters_json, + "bytes_in", (int *)bytes_in)) { + rc = -1; + goto cleanup; + } + + if (!dp_test_json_int_field_from_obj(counters_json, + "bytes_out", (int *)bytes_out)) { + rc = -1; + goto cleanup; + } + + if (sess_id) { + if (!dp_test_json_int_field_from_obj(jobj, "id", + (int *)sess_id)) { + rc = -1; + goto cleanup; + } + } + +cleanup: + json_object_put(jarray); + json_object_put(jresp); + return rc; +} + +/* + * Uses the newer "session-op show dataplane sessions" command. + * + * A simple example is as follows: + * + * dpt_show_sessions2("start 0 count 10"); + */ +void dpt_show_sessions2(const char *options) +{ + json_object *jresp; + const char *str; + char *response; + bool err; + char cmd[1000]; + int l; + + l = snprintf(cmd, sizeof(cmd), "session-op show dataplane sessions"); + if (options) + l += snprintf(cmd+l, sizeof(cmd)-l, " %s", options); + (void) l; + + response = dp_test_console_request_w_err(cmd, &err, false); + if (!response || err) + return; + + jresp = parse_json(response, parse_err_str, sizeof(parse_err_str)); + free(response); + if (!jresp) + return; + + if (1) { + str = json_object_to_json_string_ext(jresp, + JSON_C_TO_STRING_PRETTY); + if (str) + printf("%s\n", str); + } + + json_object *jarray; + struct dp_test_json_find_key ip_keys[] = { + {"sessions", NULL} + }; + + jarray = dp_test_json_find(jresp, ip_keys, ARRAY_SIZE(ip_keys)); + assert(jarray); + + if (jarray) { + int len = json_object_array_length(jarray); + json_object *jobj; + const char *saddr, *daddr; + int i; + bool rv; + + for (i = 0; i < len; ++i) { + jobj = json_object_array_get_idx(jarray, i); + + rv = dp_test_json_string_field_from_obj(jobj, + "src_addr", + &saddr); + if (!rv) + break; + rv = dp_test_json_string_field_from_obj(jobj, + "dst_addr", + &daddr); + if (!rv) + break; + + printf("%-15s %-15s\n", saddr, daddr); + } + } + + json_object_put(jresp); +} diff --git a/tests/whole_dp/src/dp_test_npf_sess_lib.h b/tests/whole_dp/src/dp_test_npf_sess_lib.h index 06390058..27dcc621 100644 --- a/tests/whole_dp/src/dp_test_npf_sess_lib.h +++ b/tests/whole_dp/src/dp_test_npf_sess_lib.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -17,19 +17,12 @@ #include "dp_test_lib_pkt.h" #include "dp_test_npf_lib.h" -#define SC_WARN_ONLY true -#define SC_FAIL false +#include "dp_test/dp_test_session_lib.h" -/* - * Verify the npf global session count - */ -void -_dp_test_npf_session_count_verify(uint exp_count, bool warn, - const char *file, int line); - -#define dp_test_npf_session_count_verify(count) \ - _dp_test_npf_session_count_verify(count, SC_FAIL, \ - __FILE__, __LINE__) +#define _dp_test_npf_session_count_verify _dp_test_session_count_verify +#define dp_test_npf_session_count_verify(count) \ + _dp_test_session_count_verify(count, SC_FAIL, \ + __FILE__, __func__, __LINE__) /* * Verify the npf global TCP session count @@ -45,12 +38,10 @@ _dp_test_npf_tcp_session_count_verify(uint exp_count, bool warn, /* * Verify the npf global UDP session count */ -void -_dp_test_npf_udp_session_count_verify(uint exp_count, bool warn, - const char *file, int line); +#define _dp_test_npf_udp_session_count_verify _dp_test_session_udp_count_verify #define dp_test_npf_udp_session_count_verify(count) \ - _dp_test_npf_udp_session_count_verify(count, SC_FAIL, \ + _dp_test_session_udp_count_verify(count, SC_FAIL, \ __FILE__, __LINE__) /* @@ -60,18 +51,18 @@ void _dp_test_npf_nat_session_count_verify(uint exp_count, bool warn, const char *file, int line); -#define dp_test_npf_nat_session_count_verify(count) \ +#define dp_test_npf_nat_session_count_verify(count) \ _dp_test_npf_nat_session_count_verify(count, SC_FAIL, \ __FILE__, __LINE__) -/* Clear all npf sessions. */ -void -dp_test_npf_clear_sessions(void); +#define dp_test_npf_clear_sessions dp_test_sessions_clear /* Expire active sessions */ void dp_test_npf_expire_sessions(void); +/* Reset session ID to 0 */ +void dp_test_npf_reset_session_id(void); /** * Extract source and destination IDs from a packet descriptor. e.g. for TCP @@ -85,48 +76,20 @@ void dp_test_npf_extract_ids_from_pkt_desc(struct dp_test_pkt_desc_t *pkt, uint16_t *src_id, uint16_t *dst_id); -/* - * Verify the presence/absence of an npf session. Source/dest addresses, ports - * and protocol are taken from a packet template - */ -#define SE_ACTIVE 0x0004 -#define SE_PASS 0x0008 -#define SE_EXPIRE 0x0010 -#define SE_GC_PASS_TWO 0x0020 -#define SE_BYPASS 0x0040 - -#define SE_FLAGS_MASK (SE_ACTIVE | SE_PASS | SE_EXPIRE | SE_BYPASS) -#define SE_FLAGS_AE (SE_ACTIVE | SE_EXPIRE) - -/** - * Verify the presence/absence of an npf session - * - * @param desc [in] Optional text to be prepended to any error message - * @param saddr [in] Source address string - * @param src_id [in] Source ID in host order (TCP port, ICMP id) - * @param daddr [in] Dest address string - * @param dst_id [in] Dest ID in host order (TCP port, ICMP id) - * @param proto [in] IP protocol - * @param intf [in] Interface string, e.g. "dp2T1" - * @param exp_flags [in] Expected flags, e.g. SE_ACTIVE | SE_PASS - * @param flags_mask [in] Flags mask, e.g. SE_FLAGS_MASK - * @param state [in] true if we expect to find the session - * - * @return true if found - **/ -bool _dp_test_npf_session_verify(char *desc, - const char *saddr, uint16_t src_id, - const char *daddr, uint16_t dst_id, - uint8_t proto, - const char *intf, - uint32_t exp_flags, uint32_t flags_mask, - bool exists, const char *file, int line); - #define dp_test_npf_session_verify(desc, saddr, src_id, daddr, dst_id, proto, \ intf, flgs, msk, exists) \ - _dp_test_npf_session_verify(desc, saddr, src_id, daddr, dst_id, \ - proto, intf, flgs, msk, exists, \ - __FILE__, __LINE__) + _dp_test_session_verify(desc, saddr, src_id, daddr, dst_id, \ + proto, intf, flgs, msk, exists, \ + __FILE__, __LINE__) + +#define dp_test_npf_session_verify_count(desc, saddr, src_id, daddr, dst_id, \ + proto, intf, flgs, msk, \ + pkts_in, bytes_in, pkts_out, \ + bytes_out) \ + _dp_test_session_verify_count(desc, saddr, src_id, daddr, dst_id, \ + proto, intf, flgs, msk, \ + pkts_in, bytes_in, pkts_out, \ + bytes_out, __FILE__, __LINE__) /** * Verify the presence/absence of an npf session. The 5-tuple is derived from @@ -342,4 +305,33 @@ void dp_test_npf_print_session(const char *saddr, uint16_t src_id, */ void dp_test_npf_print_session_table(bool nat); +/* + * Return counters for one session. Session filter should be fully specified, + * e.g. + * + * uint32_t pkts_in = 0, pkts_out = 0; + * uint32_t bytes_in = 0, bytes_out = 0; + * uint32_t sess_id = 0; + * + * dpt_session_counters("start 0 count 1 " + * "src-addr 192.0.2.103 src-port 10000 " + * "dst-addr 203.0.113.203 dst-port 60000 " + * "proto 17 dir out intf dpT21", + * &pkts_in, &pkts_out, &bytes_in, &bytes_out, + * &sess_id); + */ +int dpt_session_counters(const char *options, + uint32_t *pkts_in, uint32_t *pkts_out, + uint32_t *bytes_in, uint32_t *bytes_out, + uint32_t *sess_id); + +/* + * Uses the newer "session-op show dataplane sessions" command. + * + * A simple example is as follows: + * + * dpt_show_sessions2("start 0 count 10"); + */ +void dpt_show_sessions2(const char *options); + #endif diff --git a/tests/whole_dp/src/dp_test_npf_session_limit.c b/tests/whole_dp/src/dp_test_npf_session_limit.c index 8221a552..8eb1e0b5 100644 --- a/tests/whole_dp/src/dp_test_npf_session_limit.c +++ b/tests/whole_dp/src/dp_test_npf_session_limit.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -16,16 +16,17 @@ #include "in_cksum.h" #include "if_var.h" #include "main.h" +#include "npf/npf_state.h" #include "dp_test.h" #include "dp_test_str.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" #include "dp_test_lib_pkt.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_tcp.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_lib.h" @@ -219,7 +220,7 @@ print_sess_limiter(void) * Callback from dp_test_tcp_pak_receive */ static void forwarded_cb(const char *str, - uint pktno, enum dp_test_tcp_dir dir, + uint pktno, bool forw, uint8_t flags, struct dp_test_pkt_desc_t *pre, struct dp_test_pkt_desc_t *post, @@ -234,7 +235,7 @@ static void forwarded_cb(const char *str, /* * Fixup MAC header */ - if (dir == FWD) { + if (forw) { post_copy.l2_src = dp_test_intf_name2mac_str("dp2T1"); post_copy.l2_dst = "aa:bb:cc:18:0:1"; } else { @@ -261,7 +262,7 @@ static void forwarded_cb(const char *str, * Callback from dp_test_tcp_pak_receive */ static void dropped_cb(const char *str, - uint pktno, enum dp_test_tcp_dir dir, + uint pktno, bool forw, uint8_t flags, struct dp_test_pkt_desc_t *pre, struct dp_test_pkt_desc_t *post, @@ -276,7 +277,7 @@ static void dropped_cb(const char *str, /* * Fixup MAC header */ - if (dir == FWD) { + if (forw) { post_copy.l2_src = dp_test_intf_name2mac_str("dp2T1"); post_copy.l2_dst = "aa:bb:cc:18:0:1"; } else { @@ -319,67 +320,33 @@ _dp_test_create_tcp_sessions(uint nsessions, const char *exp_state, /* * TCP packet */ - struct dp_test_pkt_desc_t fwd_pkt = { - .text = "Fwd", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "100.101.102.103", - .l2_src = "aa:bb:cc:16:0:20", - .l3_dst = "200.201.202.203", - .l2_dst = dp1T0_mac, - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 49152, - .dport = dport, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp2T1" - }; - - struct dp_test_pkt_desc_t back_pkt = { - .text = "Back", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "200.201.202.203", - .l2_src = "aa:bb:cc:18:0:1", - .l3_dst = "100.101.102.103", - .l2_dst = dp2T1_mac, - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = dport, - .dport = 49152, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp2T1", - .tx_intf = "dp1T0" - }; - - struct dp_test_tcp_call tcp_call = { - .str[0] = '\0', - .isn = {0, 0}, - .desc[DP_DIR_FORW] = { - .pre = &fwd_pkt, - .post = &fwd_pkt, + struct dp_test_pkt_desc_t *fwd_pkt, *back_pkt; + + fwd_pkt = dpt_pdesc_v4_create( + "Fwd", IPPROTO_TCP, + "aa:bb:cc:16:0:20", "100.101.102.103", 49152, + dp1T0_mac, "200.201.202.203", dport, + "dp1T0", "dp2T1"); + + back_pkt = dpt_pdesc_v4_create( + "Back", IPPROTO_TCP, + "aa:bb:cc:18:0:1", "200.201.202.203", dport, + dp2T1_mac, "100.101.102.103", 49152, + "dp2T1", "dp1T0"); + + struct dpt_tcp_flow tcp_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = fwd_pkt, + .pst = fwd_pkt, }, - .desc[DP_DIR_BACK] = { - .pre = &back_pkt, - .post = &back_pkt, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = back_pkt, + .pst = back_pkt, }, - .test_cb = forwarded_cb, - .post_cb = NULL, + .test_cb = forwarded_cb, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ }; /* @@ -403,7 +370,7 @@ _dp_test_create_tcp_sessions(uint nsessions, const char *exp_state, }; /* - * How many pakts do we need to sent to get session is required state? + * How many pkts do we need to send to get session in required state? */ for (i = 0; i < ARRAY_SIZE(tcp_pkt1_state); i++) { if (!strcmp(exp_state, tcp_pkt1_state[i])) { @@ -422,19 +389,19 @@ _dp_test_create_tcp_sessions(uint nsessions, const char *exp_state, /* * TCP call packets */ - struct dp_test_tcp_flow_pkt tcp_pkt1[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, /* NPF_TCPS_ESTABLISHED */ - {DP_DIR_BACK, TH_ACK, 20, NULL}, - {DP_DIR_FORW, TH_ACK, 50, NULL}, - {DP_DIR_FORW, TH_ACK | TH_FIN, 10, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_BACK, TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, + struct dpt_tcp_flow_pkt tcp_pkt1[] = { + {DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + {DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, /* ESTABLISHED */ + {DPT_BACK, TH_ACK, 20, NULL, 0, NULL }, + {DPT_FORW, TH_ACK, 50, NULL, 0, NULL }, + {DPT_FORW, TH_ACK | TH_FIN, 10, NULL, 0, NULL }, + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + {DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + {DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + {DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, }; dp_test_fail_unless( @@ -447,8 +414,8 @@ _dp_test_create_tcp_sessions(uint nsessions, const char *exp_state, for (i = 0; i < nsessions; i++) { bool exp_created = i < exp_sessions; - fwd_pkt.l4.tcp.sport = src_port++; - back_pkt.l4.tcp.dport = fwd_pkt.l4.tcp.sport; + fwd_pkt->l4.tcp.sport = src_port++; + back_pkt->l4.tcp.dport = fwd_pkt->l4.tcp.sport; /* Do we expect this session to be created or dropped? */ if (exp_created) @@ -456,31 +423,31 @@ _dp_test_create_tcp_sessions(uint nsessions, const char *exp_state, else tcp_call.test_cb = dropped_cb; - spush(tcp_call.str, sizeof(tcp_call.str), - "%s %d: TCP Sess %u, port %u, exp to be %s (%s)", - basename(file), line, i+1, fwd_pkt.l4.tcp.sport, - exp_created ? "created" : "dropped", exp_state); + spush(tcp_call.text, sizeof(tcp_call.text), + "%s %d: TCP Sess %u, port %u, exp to be %s (%s), " + "npkts %u", + basename(file), line, i+1, fwd_pkt->l4.tcp.sport, + exp_created ? "created" : "dropped", exp_state, npkts); /* Create the session */ - dp_test_tcp_call(&tcp_call, tcp_pkt1, npkts, NULL, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, npkts, 0, 0, NULL, 0); /* Do we expect session to be created? */ if (exp_created) { /* Verify the session exists and is active */ - dp_test_npf_session_verify_desc(NULL, &fwd_pkt, - fwd_pkt.rx_intf, + dp_test_npf_session_verify_desc(NULL, fwd_pkt, + fwd_pkt->rx_intf, SE_ACTIVE, SE_FLAGS_AE, true); /* Verify the session state */ - dp_test_npf_session_state(fwd_pkt.l3_src, - fwd_pkt.l4.tcp.sport, - fwd_pkt.l3_dst, - fwd_pkt.l4.tcp.dport, + dp_test_npf_session_state(fwd_pkt->l3_src, + fwd_pkt->l4.tcp.sport, + fwd_pkt->l3_dst, + fwd_pkt->l4.tcp.dport, IPPROTO_TCP, - fwd_pkt.rx_intf, &state); + fwd_pkt->rx_intf, &state); - state_str = dp_test_npf_sess_state_str(IPPROTO_TCP, - state); + state_str = npf_state_get_tcp_name(state); dp_test_fail_unless(!strcmp(state_str, tcp_pkt1_state[npkts-1]), @@ -488,8 +455,8 @@ _dp_test_create_tcp_sessions(uint nsessions, const char *exp_state, tcp_pkt1_state[npkts-1]); } else { /* Verify the session does *not* exist */ - dp_test_npf_session_verify_desc(NULL, &fwd_pkt, - fwd_pkt.rx_intf, + dp_test_npf_session_verify_desc(NULL, fwd_pkt, + fwd_pkt->rx_intf, 0, 0, false); } @@ -497,6 +464,8 @@ _dp_test_create_tcp_sessions(uint nsessions, const char *exp_state, print_sess_limiter(); } + free(fwd_pkt); + free(back_pkt); } #define dp_test_create_tcp_sessions(n, h, exp, dport) \ @@ -527,7 +496,7 @@ _dp_test_create_udp_sessions(uint nsessions, bool halfopen, struct dp_test_pkt_desc_t fwd_pkt = { .text = "Fwd", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "100.101.102.103", .l2_src = "aa:bb:cc:16:0:20", .l3_dst = "200.201.202.203", @@ -546,7 +515,7 @@ _dp_test_create_udp_sessions(uint nsessions, bool halfopen, struct dp_test_pkt_desc_t back_pkt = { .text = "Back", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "200.201.202.203", .l2_src = "aa:bb:cc:18:0:1", .l3_dst = "100.101.102.103", @@ -689,7 +658,7 @@ DP_START_TEST(sess_limit_tcp, max_halfopen) false, "npf-ut add session-limiter:GROUP1 10 " "action=accept " - "proto=6 dst-addr=200.201.202.203 dst-port=80 " + "proto-final=6 dst-addr=200.201.202.203 dst-port=80 " "handle=session-limiter(parameter=PARAM1)"); dp_test_npf_cmd_fmt( @@ -899,7 +868,7 @@ DP_START_TEST(sess_limit_tcp, max_rate) false, "npf-ut add session-limiter:GROUP1 10 " "action=accept " - "proto=6 dst-addr=200.201.202.203 dst-port=80 " + "proto-final=6 dst-addr=200.201.202.203 dst-port=80 " "handle=session-limiter(parameter=PARAM1)"); dp_test_npf_cmd_fmt( @@ -1078,7 +1047,7 @@ DP_START_TEST(sess_limit_udp, max_halfopen) false, "npf-ut add session-limiter:GROUP1 10 " "action=accept " - "proto=17 dst-addr=200.201.202.203 dst-port=80 " + "proto-final=17 dst-addr=200.201.202.203 dst-port=80 " "handle=session-limiter(parameter=PARAM1)"); dp_test_npf_cmd_fmt( diff --git a/tests/whole_dp/src/dp_test_npf_snat_overrun.c b/tests/whole_dp/src/dp_test_npf_snat_overrun.c index 263e48b4..4e6ac8b4 100644 --- a/tests/whole_dp/src/dp_test_npf_snat_overrun.c +++ b/tests/whole_dp/src/dp_test_npf_snat_overrun.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -18,13 +18,13 @@ #include "dp_test.h" #include "dp_test_controller.h" #include "dp_test_cmd_state.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" #include "dp_test_str.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_pkt.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_sess_lib.h" @@ -97,6 +97,7 @@ DP_START_TEST(npf_snat_overrun_1, test1) .ifname = "dp2T1", .proto = IPPROTO_TCP, .map = "dynamic", + .port_alloc = NULL, .from_addr = "10.0.1.1", .from_port = NULL, .to_addr = NULL, @@ -129,7 +130,7 @@ DP_START_TEST(npf_snat_overrun_1, test1) struct dp_test_pkt_desc_t v4_pktA_pre = { .text = "Forw, Host1 Ins -> Host3 Outs, pre-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "10.0.1.1", .l2_src = "aa:bb:cc:dd:2:b1", .l3_dst = "172.0.2.3", @@ -153,7 +154,7 @@ DP_START_TEST(npf_snat_overrun_1, test1) struct dp_test_pkt_desc_t v4_pktA_post = { .text = "Forw, Host1 Ins -> Host3 Outs, post-NAT", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "172.0.2.1", .l2_src = dp_test_intf_name2mac_str("dp2T1"), .l3_dst = "172.0.2.3", @@ -211,7 +212,7 @@ DP_START_TEST(npf_snat_overrun_1, test1) dp_test_pak_receive(pre_pak, pre->rx_intf, test_exp); if (i < 100) - dp_test_npf_portmap_port_verify("172.0.2.1", + dp_test_npf_portmap_port_verify("tcp", "172.0.2.1", pre->l4.tcp.sport); } diff --git a/tests/whole_dp/src/dp_test_npf_tblset.c b/tests/whole_dp/src/dp_test_npf_tblset.c index 5e6c9f90..8ebdd067 100644 --- a/tests/whole_dp/src/dp_test_npf_tblset.c +++ b/tests/whole_dp/src/dp_test_npf_tblset.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -28,11 +28,11 @@ #include "dp_test.h" #include "dp_test_controller.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_npf_fw_lib.h" DP_DECL_TEST_SUITE(npf_tblset); @@ -43,7 +43,7 @@ DP_DECL_TEST_CASE(npf_tblset, npf_tblset_case1, NULL, NULL); static uint32_t *td[DSET_SZ] = {0}; -static int id[DSET_SZ] = {0}; +static uint32_t id[DSET_SZ] = {0}; static uint32_t g_data[DSET_SZ] = { 0xba5eba11, @@ -73,17 +73,9 @@ npf_test_tbl_walk_cb(const char *name, uint id, void *data, void *ctx) return 0; } -/* - * Walk callback to remove and destroy each entry - */ -static int -npf_test_tbl_destroy_cb(const char *name, uint id, void *data, void *ctx) +static void npf_test_tbl_entry_free_cb(void *data __unused) { - struct npf_tbl *nt = ctx; - - dp_test_fail_unless(npf_tbl_entry_remove(nt, data) == 0, - "npf_tbl_entry_remove"); - return 0; + /* Nothing to do */ } /* @@ -93,12 +85,13 @@ DP_START_TEST(npf_tblset_case1, test1) { struct npf_tbl *nt; uint32_t *tmp, *entry1, *entry2; - int rc, entry1_id; + int rc; + uint32_t entry1_id, entry2_id; /* * Create a tableset with 8 entries initially. */ - uint8_t tbl_id = 0; + uint8_t tbl_id = 0; uint tbl_sz = 8; uint tbl_sz_max = 128; uint tbl_entry_sz = sizeof(uint32_t); @@ -108,6 +101,8 @@ DP_START_TEST(npf_tblset_case1, test1) tbl_flags); dp_test_fail_unless(nt, "npf_tbl_create"); + /* Set entry-free function */ + npf_tbl_set_entry_freefn(nt, npf_test_tbl_entry_free_cb); /* Create entry "TABLE1" */ entry1 = npf_tbl_entry_create(nt, "TABLE1"); @@ -117,8 +112,9 @@ DP_START_TEST(npf_tblset_case1, test1) *entry1 = g_data[0]; /* Insert entry into table */ - entry1_id = npf_tbl_entry_insert(nt, entry1); - dp_test_fail_unless(entry1_id >= 0, "npf_tbl_entry_insert"); + npf_tbl_entry_insert(nt, entry1, &entry1_id); + dp_test_fail_unless(entry1_id != NPF_TBLID_NONE, + "npf_tbl_entry_insert"); dp_test_fail_unless(npf_tbl_size(nt) == 1, "Table size %u", npf_tbl_size(nt)); @@ -136,7 +132,7 @@ DP_START_TEST(npf_tblset_case1, test1) dp_test_fail_unless(entry2, "npf_tbl_entry_create"); /* Try and insert duplicate entry into table */ - rc = npf_tbl_entry_insert(nt, entry2); + rc = npf_tbl_entry_insert(nt, entry2, &entry2_id); dp_test_fail_unless(rc < 0, "npf_tbl_entry_insert"); /* Destroyed duplicate entry */ @@ -167,10 +163,10 @@ DP_START_TEST(npf_tblset_case1, test1) *td[i] = g_data[i]; - id[i] = npf_tbl_entry_insert(nt, td[i]); - dp_test_fail_unless(id[i] >= 0, - "npf_tbl_entry_insert id[%u] = %d", - i, id[i]); + rc = npf_tbl_entry_insert(nt, td[i], &id[i]); + dp_test_fail_unless(rc == 0, + "npf_tbl_entry_insert id[%u], rc = %d", + i, rc); } dp_test_fail_unless(npf_tbl_size(nt) == ARRAY_SIZE(td), @@ -191,8 +187,8 @@ DP_START_TEST(npf_tblset_case1, test1) *td[i] = g_data[i]; - id[i] = npf_tbl_entry_insert(nt, td[i]); - dp_test_fail_unless(id[i] >= 0, "npf_tbl_entry_insert"); + rc = npf_tbl_entry_insert(nt, td[i], &id[i]); + dp_test_fail_unless(rc == 0, "npf_tbl_entry_insert"); /* Lookup by ID */ @@ -209,12 +205,7 @@ DP_START_TEST(npf_tblset_case1, test1) /* Walk all entries */ npf_tbl_walk(nt, npf_test_tbl_walk_cb, NULL); - /* Destroy all entries */ - npf_tbl_walk(nt, npf_test_tbl_destroy_cb, nt); - for (i = 0; i < ARRAY_SIZE(td); i++) - td[i] = NULL; - - /* Destroy table */ + /* Destroy table with entries */ rc = npf_tbl_destroy(nt); dp_test_fail_unless(!rc, "npf_tbl_destroy"); diff --git a/tests/whole_dp/src/dp_test_npf_tcp.c b/tests/whole_dp/src/dp_test_npf_tcp.c index f0789c6d..f7688520 100644 --- a/tests/whole_dp/src/dp_test_npf_tcp.c +++ b/tests/whole_dp/src/dp_test_npf_tcp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -19,14 +19,14 @@ #include "dp_test.h" #include "dp_test_controller.h" #include "dp_test_cmd_state.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" #include "dp_test_str.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_pkt.h" #include "dp_test_lib_tcp.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_sess_lib.h" @@ -35,34 +35,6 @@ #define CORE_TCP_FLAGS (TH_FIN|TH_SYN|TH_RST|TH_ACK) -/* - * Callback from dp_test_tcp_pak_receive - */ -static void -dp_test_npf_tcp_test_cb(const char *str, - uint pktno, enum dp_test_tcp_dir dir, - uint8_t flags, - struct dp_test_pkt_desc_t *pre, - struct dp_test_pkt_desc_t *post, - void *data, uint index) -{ - struct rte_mbuf *pre_pak, *post_pak; - struct dp_test_expected *test_exp; - - pre_pak = dp_test_v4_pkt_from_desc(pre); - post_pak = dp_test_v4_pkt_from_desc(post); - - test_exp = dp_test_exp_from_desc(post_pak, post); - rte_pktmbuf_free(post_pak); - dp_test_exp_set_fwd_status(test_exp, DP_TEST_FWD_FORWARDED); - - spush(test_exp->description, sizeof(test_exp->description), - "%s", str); - - /* Run the test */ - dp_test_pak_receive(pre_pak, pre->rx_intf, test_exp); -} - DP_DECL_TEST_SUITE(npf_tcp); @@ -95,102 +67,32 @@ DP_START_TEST(strict_state, t1) dp_test_netlink_add_neigh("dp2T1", "200.201.202.203", "aa:bb:cc:18:0:1"); - - struct dp_test_pkt_desc_t ins_pre = { - .text = "Inside pre", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "100.101.102.103", - .l2_src = "aa:bb:cc:16:0:20", - .l3_dst = "200.201.202.203", - .l2_dst = dp1T0_mac, - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 49152, - .dport = 80, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp2T1" - }; - - struct dp_test_pkt_desc_t ins_post = { - .text = "Inside post", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "100.101.102.103", - .l2_src = dp2T1_mac, - .l3_dst = "200.201.202.203", - .l2_dst = "aa:bb:cc:18:0:1", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 49152, - .dport = 80, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp2T1" - }; - - struct dp_test_pkt_desc_t outs_pre = { - .text = "Outside pre", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "200.201.202.203", - .l2_src = "aa:bb:cc:18:0:1", - .l3_dst = "100.101.102.103", - .l2_dst = dp2T1_mac, - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 80, - .dport = 49152, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp2T1", - .tx_intf = "dp1T0" - }; - - struct dp_test_pkt_desc_t outs_post = { - .text = "Outside post", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "200.201.202.203", - .l2_src = dp1T0_mac, - .l3_dst = "100.101.102.103", - .l2_dst = "aa:bb:cc:16:0:20", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 80, - .dport = 49152, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp2T1", - .tx_intf = "dp1T0" - }; + struct dp_test_pkt_desc_t *ins_pre, *ins_post; + struct dp_test_pkt_desc_t *outs_pre, *outs_post; + + ins_pre = dpt_pdesc_v4_create( + "Inside pre", IPPROTO_TCP, + "aa:bb:cc:16:0:20", "100.101.102.103", 49152, + dp1T0_mac, "200.201.202.203", 80, + "dp1T0", "dp2T1"); + + ins_post = dpt_pdesc_v4_create( + "Inside post", IPPROTO_TCP, + dp2T1_mac, "100.101.102.103", 49152, + "aa:bb:cc:18:0:1", "200.201.202.203", 80, + "dp1T0", "dp2T1"); + + outs_pre = dpt_pdesc_v4_create( + "Outside pre", IPPROTO_TCP, + "aa:bb:cc:18:0:1", "200.201.202.203", 80, + dp2T1_mac, "100.101.102.103", 49152, + "dp2T1", "dp1T0"); + + outs_post = dpt_pdesc_v4_create( + "Outside post", IPPROTO_TCP, + dp1T0_mac, "200.201.202.203", 80, + "aa:bb:cc:16:0:20", "100.101.102.103", 49152, + "dp2T1", "dp1T0"); struct dp_test_npf_rule_t rules[] = { { @@ -218,168 +120,168 @@ DP_START_TEST(strict_state, t1) dp_test_npf_cmd("npf-ut fw global tcp-strict enable", false); dp_test_npf_commit(); - struct dp_test_tcp_call tcp_call = { - .str[0] = '\0', + struct dpt_tcp_flow tcp_call = { + .text[0] = '\0', .isn = {0, 0}, - .desc[DP_DIR_FORW] = { - .pre = &ins_pre, - .post = &ins_post, + .desc[DPT_FORW] = { + .pre = ins_pre, + .pst = ins_post, }, - .desc[DP_DIR_BACK] = { - .pre = &outs_pre, - .post = &outs_post, + .desc[DPT_BACK] = { + .pre = outs_pre, + .pst = outs_post, }, - .test_cb = dp_test_npf_tcp_test_cb, + .test_cb = NULL, .post_cb = NULL, }; /* * Test 1 */ - spush(tcp_call.str, sizeof(tcp_call.str), "npf TCP strict Test 1.1"); + spush(tcp_call.text, sizeof(tcp_call.text), "npf TCP strict Test 1.1"); /* Comment is new npf_tcp_fsm state */ - struct dp_test_tcp_flow_pkt tcp_pkt1[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_ACK, 20, NULL}, - {DP_DIR_FORW, TH_ACK, 50, NULL}, - {DP_DIR_FORW, TH_ACK | TH_FIN, 10, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_BACK, TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_SYN, 0, NULL}, + struct dpt_tcp_flow_pkt tcp_pkt1[] = { + {DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + {DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + {DPT_BACK, TH_ACK, 20, NULL, 0, NULL }, + {DPT_FORW, TH_ACK, 50, NULL, 0, NULL }, + {DPT_FORW, TH_ACK | TH_FIN, 10, NULL, 0, NULL }, + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + {DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + {DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + {DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + {DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, }; - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), NULL, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, NULL, 0); /* * Test 2 */ - spush(tcp_call.str, sizeof(tcp_call.str), "npf TCP strict Test 1.2"); + spush(tcp_call.text, sizeof(tcp_call.text), "npf TCP strict Test 1.2"); /* Comment is new npf_tcp_fsm state */ - struct dp_test_tcp_flow_pkt tcp_pkt2[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_BACK, TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_BACK, TH_ACK, 0, NULL} + struct dpt_tcp_flow_pkt tcp_pkt2[] = { + {DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + {DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + {DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + {DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, + {DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, + {DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + {DPT_BACK, TH_ACK, 0, NULL, 0, NULL } }; /* * Incremement the forwards source port so that a new session is * created */ - tcp_call.desc[DP_DIR_FORW].pre->l4.tcp.sport++; - tcp_call.desc[DP_DIR_FORW].post->l4.tcp.sport++; - tcp_call.desc[DP_DIR_BACK].pre->l4.tcp.dport++; - tcp_call.desc[DP_DIR_BACK].post->l4.tcp.dport++; + tcp_call.desc[DPT_FORW].pre->l4.tcp.sport++; + tcp_call.desc[DPT_FORW].pst->l4.tcp.sport++; + tcp_call.desc[DPT_BACK].pre->l4.tcp.dport++; + tcp_call.desc[DPT_BACK].pst->l4.tcp.dport++; - dp_test_tcp_call(&tcp_call, tcp_pkt2, ARRAY_SIZE(tcp_pkt2), NULL, 0); + dpt_tcp_call(&tcp_call, tcp_pkt2, ARRAY_SIZE(tcp_pkt2), 0, 0, NULL, 0); /* * Test 3 */ - spush(tcp_call.str, sizeof(tcp_call.str), "npf TCP strict Test 1.3"); + spush(tcp_call.text, sizeof(tcp_call.text), "npf TCP strict Test 1.3"); /* Comment is new npf_tcp_fsm state */ - struct dp_test_tcp_flow_pkt tcp_pkt3[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_BACK, TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_ACK | TH_FIN, 0, NULL} + struct dpt_tcp_flow_pkt tcp_pkt3[] = { + {DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + {DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + {DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + {DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, + {DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + {DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL } }; - tcp_call.desc[DP_DIR_FORW].pre->l4.tcp.sport++; - tcp_call.desc[DP_DIR_FORW].post->l4.tcp.sport++; - tcp_call.desc[DP_DIR_BACK].pre->l4.tcp.dport++; - tcp_call.desc[DP_DIR_BACK].post->l4.tcp.dport++; + tcp_call.desc[DPT_FORW].pre->l4.tcp.sport++; + tcp_call.desc[DPT_FORW].pst->l4.tcp.sport++; + tcp_call.desc[DPT_BACK].pre->l4.tcp.dport++; + tcp_call.desc[DPT_BACK].pst->l4.tcp.dport++; - dp_test_tcp_call(&tcp_call, tcp_pkt3, ARRAY_SIZE(tcp_pkt3), NULL, 0); + dpt_tcp_call(&tcp_call, tcp_pkt3, ARRAY_SIZE(tcp_pkt3), 0, 0, NULL, 0); /* * Test 4 */ - spush(tcp_call.str, sizeof(tcp_call.str), "npf TCP strict Test 1.4"); + spush(tcp_call.text, sizeof(tcp_call.text), "npf TCP strict Test 1.4"); /* Comment is new npf_tcp_fsm state */ - struct dp_test_tcp_flow_pkt tcp_pkt4[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK | TH_FIN, 0, NULL} + struct dpt_tcp_flow_pkt tcp_pkt4[] = { + {DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + {DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + {DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + {DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, + {DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL } }; - tcp_call.desc[DP_DIR_FORW].pre->l4.tcp.sport++; - tcp_call.desc[DP_DIR_FORW].post->l4.tcp.sport++; - tcp_call.desc[DP_DIR_BACK].pre->l4.tcp.dport++; - tcp_call.desc[DP_DIR_BACK].post->l4.tcp.dport++; + tcp_call.desc[DPT_FORW].pre->l4.tcp.sport++; + tcp_call.desc[DPT_FORW].pst->l4.tcp.sport++; + tcp_call.desc[DPT_BACK].pre->l4.tcp.dport++; + tcp_call.desc[DPT_BACK].pst->l4.tcp.dport++; - dp_test_tcp_call(&tcp_call, tcp_pkt4, ARRAY_SIZE(tcp_pkt4), NULL, 0); + dpt_tcp_call(&tcp_call, tcp_pkt4, ARRAY_SIZE(tcp_pkt4), 0, 0, NULL, 0); /* * Test 5 */ - spush(tcp_call.str, sizeof(tcp_call.str), "npf TCP strict Test 1.5"); - - struct dp_test_tcp_flow_pkt tcp_pkt5[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN, 0, NULL}, - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_FORW, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_BACK, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL} + spush(tcp_call.text, sizeof(tcp_call.text), "npf TCP strict Test 1.5"); + + struct dpt_tcp_flow_pkt tcp_pkt5[] = { + {DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + {DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + {DPT_BACK, TH_SYN, 0, NULL, 0, NULL }, + {DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + {DPT_FORW, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + {DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + {DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL } }; - tcp_call.desc[DP_DIR_FORW].pre->l4.tcp.sport++; - tcp_call.desc[DP_DIR_FORW].post->l4.tcp.sport++; - tcp_call.desc[DP_DIR_BACK].pre->l4.tcp.dport++; - tcp_call.desc[DP_DIR_BACK].post->l4.tcp.dport++; + tcp_call.desc[DPT_FORW].pre->l4.tcp.sport++; + tcp_call.desc[DPT_FORW].pst->l4.tcp.sport++; + tcp_call.desc[DPT_BACK].pre->l4.tcp.dport++; + tcp_call.desc[DPT_BACK].pst->l4.tcp.dport++; - dp_test_tcp_call(&tcp_call, tcp_pkt5, ARRAY_SIZE(tcp_pkt5), NULL, 0); + dpt_tcp_call(&tcp_call, tcp_pkt5, ARRAY_SIZE(tcp_pkt5), 0, 0, NULL, 0); /* * Test 6 */ - spush(tcp_call.str, sizeof(tcp_call.str), "npf TCP strict Test 1.6"); - - struct dp_test_tcp_flow_pkt tcp_pkt6[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_FORW, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_BACK, TH_ACK, 0, NULL} + spush(tcp_call.text, sizeof(tcp_call.text), "npf TCP strict Test 1.6"); + + struct dpt_tcp_flow_pkt tcp_pkt6[] = { + {DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + {DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + {DPT_BACK, TH_SYN, 0, NULL, 0, NULL }, + {DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + {DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + {DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, + {DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + {DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + {DPT_BACK, TH_ACK, 0, NULL, 0, NULL } }; - tcp_call.desc[DP_DIR_FORW].pre->l4.tcp.sport++; - tcp_call.desc[DP_DIR_FORW].post->l4.tcp.sport++; - tcp_call.desc[DP_DIR_BACK].pre->l4.tcp.dport++; - tcp_call.desc[DP_DIR_BACK].post->l4.tcp.dport++; + tcp_call.desc[DPT_FORW].pre->l4.tcp.sport++; + tcp_call.desc[DPT_FORW].pst->l4.tcp.sport++; + tcp_call.desc[DPT_BACK].pre->l4.tcp.dport++; + tcp_call.desc[DPT_BACK].pst->l4.tcp.dport++; - dp_test_tcp_call(&tcp_call, tcp_pkt6, ARRAY_SIZE(tcp_pkt6), NULL, 0); + dpt_tcp_call(&tcp_call, tcp_pkt6, ARRAY_SIZE(tcp_pkt6), 0, 0, NULL, 0); /* * End @@ -387,6 +289,10 @@ DP_START_TEST(strict_state, t1) dp_test_npf_cmd("npf-ut fw global tcp-strict disable", false); dp_test_npf_commit(); + free(ins_pre); + free(ins_post); + free(outs_pre); + free(outs_post); /************************************************************* * Cleanup @@ -414,7 +320,7 @@ DP_DECL_TEST_CASE(npf_tcp, strict_syn, NULL, NULL); */ static void dp_test_npf_tcp_test_cb2(const char *desc, - uint pktno, enum dp_test_tcp_dir dir, + uint pktno, bool forw, uint8_t flags, struct dp_test_pkt_desc_t *pre, struct dp_test_pkt_desc_t *post, @@ -438,7 +344,7 @@ dp_test_npf_tcp_test_cb2(const char *desc, } static void -dp_test_npf_tcp_post_cb2(uint pktno, enum dp_test_tcp_dir dir, +dp_test_npf_tcp_post_cb2(uint pktno, bool forw, uint8_t flags, struct dp_test_pkt_desc_t *pre, struct dp_test_pkt_desc_t *post, @@ -465,102 +371,32 @@ DP_START_TEST(strict_syn, t1) dp_test_netlink_add_neigh("dp2T1", "200.201.202.203", "aa:bb:cc:18:0:1"); - - struct dp_test_pkt_desc_t ins_pre = { - .text = "Inside pre", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "100.101.102.103", - .l2_src = "aa:bb:cc:16:0:20", - .l3_dst = "200.201.202.203", - .l2_dst = dp1T0_mac, - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 50152, - .dport = 80, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp2T1" - }; - - struct dp_test_pkt_desc_t ins_post = { - .text = "Inside post", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "100.101.102.103", - .l2_src = dp2T1_mac, - .l3_dst = "200.201.202.203", - .l2_dst = "aa:bb:cc:18:0:1", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 50152, - .dport = 80, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp2T1" - }; - - struct dp_test_pkt_desc_t outs_pre = { - .text = "Outside pre", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "200.201.202.203", - .l2_src = "aa:bb:cc:18:0:1", - .l3_dst = "100.101.102.103", - .l2_dst = dp2T1_mac, - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 80, - .dport = 50152, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp2T1", - .tx_intf = "dp1T0" - }; - - struct dp_test_pkt_desc_t outs_post = { - .text = "Outside post", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "200.201.202.203", - .l2_src = dp1T0_mac, - .l3_dst = "100.101.102.103", - .l2_dst = "aa:bb:cc:16:0:20", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 80, - .dport = 50152, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp2T1", - .tx_intf = "dp1T0" - }; + struct dp_test_pkt_desc_t *ins_pre, *ins_post; + struct dp_test_pkt_desc_t *outs_pre, *outs_post; + + ins_pre = dpt_pdesc_v4_create( + "Inside pre", IPPROTO_TCP, + "aa:bb:cc:16:0:20", "100.101.102.103", 50152, + dp1T0_mac, "200.201.202.203", 80, + "dp1T0", "dp2T1"); + + ins_post = dpt_pdesc_v4_create( + "Inside post", IPPROTO_TCP, + dp2T1_mac, "100.101.102.103", 50152, + "aa:bb:cc:18:0:1", "200.201.202.203", 80, + "dp1T0", "dp2T1"); + + outs_pre = dpt_pdesc_v4_create( + "Outside pre", IPPROTO_TCP, + "aa:bb:cc:18:0:1", "200.201.202.203", 80, + dp2T1_mac, "100.101.102.103", 50152, + "dp2T1", "dp1T0"); + + outs_post = dpt_pdesc_v4_create( + "Outside post", IPPROTO_TCP, + dp1T0_mac, "200.201.202.203", 80, + "aa:bb:cc:16:0:20", "100.101.102.103", 50152, + "dp2T1", "dp1T0"); struct dp_test_npf_rule_t rules[] = { { @@ -588,16 +424,16 @@ DP_START_TEST(strict_syn, t1) dp_test_npf_cmd("npf-ut fw global tcp-strict enable", false); dp_test_npf_commit(); - struct dp_test_tcp_call tcp_call = { - .str[0] = '\0', + struct dpt_tcp_flow tcp_call = { + .text[0] = '\0', .isn = {0, 0}, - .desc[DP_DIR_FORW] = { - .pre = &ins_pre, - .post = &ins_post, + .desc[DPT_FORW] = { + .pre = ins_pre, + .pst = ins_post, }, - .desc[DP_DIR_BACK] = { - .pre = &outs_pre, - .post = &outs_post, + .desc[DPT_BACK] = { + .pre = outs_pre, + .pst = outs_post, }, .test_cb = dp_test_npf_tcp_test_cb2, .post_cb = dp_test_npf_tcp_post_cb2, @@ -607,8 +443,8 @@ DP_START_TEST(strict_syn, t1) * Test 2.1. Verify that a SYN-only can create a session */ - struct dp_test_tcp_flow_pkt tcp_pkt[] = { - {DP_DIR_FORW, 0, 0, NULL}, + struct dpt_tcp_flow_pkt tcp_pkt[] = { + { DPT_FORW, 0, 0, NULL, 0, NULL }, }; uint i; @@ -618,11 +454,12 @@ DP_START_TEST(strict_syn, t1) (i & TH_RST) != 0) continue; - spush(tcp_call.str, sizeof(tcp_call.str), + spush(tcp_call.text, sizeof(tcp_call.text), "npf TCP strict Test 2.1.%u", i); tcp_pkt[0].flags = i; - dp_test_tcp_call(&tcp_call, tcp_pkt, - ARRAY_SIZE(tcp_pkt), NULL, 0); + + dpt_tcp_call(&tcp_call, tcp_pkt, + ARRAY_SIZE(tcp_pkt), 0, 0, NULL, 0); } /* @@ -630,14 +467,15 @@ DP_START_TEST(strict_syn, t1) * reverse direction whatever the flags are. */ - tcp_pkt[0].dir = DP_DIR_BACK; + tcp_pkt[0].forw = DPT_BACK; for (i = 0; i < 256; i++) { - spush(tcp_call.str, sizeof(tcp_call.str), + spush(tcp_call.text, sizeof(tcp_call.text), "npf TCP strict Test 2.2.%u", i); tcp_pkt[0].flags = i; - dp_test_tcp_call(&tcp_call, tcp_pkt, - ARRAY_SIZE(tcp_pkt), NULL, 0); + + dpt_tcp_call(&tcp_call, tcp_pkt, + ARRAY_SIZE(tcp_pkt), 0, 0, NULL, 0); } /* @@ -646,6 +484,10 @@ DP_START_TEST(strict_syn, t1) dp_test_npf_cmd("npf-ut fw global tcp-strict disable", false); dp_test_npf_commit(); + free(ins_pre); + free(ins_post); + free(outs_pre); + free(outs_post); /************************************************************* * Cleanup @@ -673,7 +515,7 @@ DP_DECL_TEST_CASE(npf_tcp, strict_nat, NULL, NULL); */ static void dp_test_npf_tcp_test_cb3(const char *desc, - uint pktno, enum dp_test_tcp_dir dir, + uint pktno, bool forw, uint8_t flags, struct dp_test_pkt_desc_t *pre, struct dp_test_pkt_desc_t *post, @@ -720,102 +562,32 @@ DP_START_TEST(strict_nat, t1) dp_test_netlink_add_neigh("dp2T1", "200.201.202.203", "aa:bb:cc:18:0:1"); - - struct dp_test_pkt_desc_t ins_pre = { - .text = "Inside pre", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "100.101.102.103", - .l2_src = "aa:bb:cc:16:0:20", - .l3_dst = "200.201.202.203", - .l2_dst = dp1T0_mac, - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 49152, - .dport = 80, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp2T1" - }; - - struct dp_test_pkt_desc_t ins_post = { - .text = "Inside post", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "100.101.102.103", - .l2_src = dp2T1_mac, - .l3_dst = "200.201.202.203", - .l2_dst = "aa:bb:cc:18:0:1", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 49152, - .dport = 80, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp2T1" - }; - - struct dp_test_pkt_desc_t outs_pre = { - .text = "Outside pre", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "200.201.202.203", - .l2_src = "aa:bb:cc:18:0:1", - .l3_dst = "100.101.102.103", - .l2_dst = dp2T1_mac, - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 80, - .dport = 49152, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp2T1", - .tx_intf = "dp1T0" - }; - - struct dp_test_pkt_desc_t outs_post = { - .text = "Outside post", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "200.201.202.203", - .l2_src = dp1T0_mac, - .l3_dst = "100.101.102.103", - .l2_dst = "aa:bb:cc:16:0:20", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 80, - .dport = 49152, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp2T1", - .tx_intf = "dp1T0" - }; + struct dp_test_pkt_desc_t *ins_pre, *ins_post; + struct dp_test_pkt_desc_t *outs_pre, *outs_post; + + ins_pre = dpt_pdesc_v4_create( + "Inside pre", IPPROTO_TCP, + "aa:bb:cc:16:0:20", "100.101.102.103", 49152, + dp1T0_mac, "200.201.202.203", 80, + "dp1T0", "dp2T1"); + + ins_post = dpt_pdesc_v4_create( + "Inside post", IPPROTO_TCP, + dp2T1_mac, "100.101.102.103", 49152, + "aa:bb:cc:18:0:1", "200.201.202.203", 80, + "dp1T0", "dp2T1"); + + outs_pre = dpt_pdesc_v4_create( + "Outside pre", IPPROTO_TCP, + "aa:bb:cc:18:0:1", "200.201.202.203", 80, + dp2T1_mac, "100.101.102.103", 49152, + "dp2T1", "dp1T0"); + + outs_post = dpt_pdesc_v4_create( + "Outside post", IPPROTO_TCP, + dp1T0_mac, "200.201.202.203", 80, + "aa:bb:cc:16:0:20", "100.101.102.103", 49152, + "dp2T1", "dp1T0"); struct dp_test_npf_rule_t rules[] = { { @@ -843,16 +615,16 @@ DP_START_TEST(strict_nat, t1) dp_test_npf_cmd("npf-ut fw global tcp-strict enable", false); dp_test_npf_commit(); - struct dp_test_tcp_call tcp_call = { - .str[0] = '\0', + struct dpt_tcp_flow tcp_call = { + .text[0] = '\0', .isn = {0, 0}, - .desc[DP_DIR_FORW] = { - .pre = &ins_pre, - .post = &ins_post, + .desc[DPT_FORW] = { + .pre = ins_pre, + .pst = ins_post, }, - .desc[DP_DIR_BACK] = { - .pre = &outs_pre, - .post = &outs_post, + .desc[DPT_BACK] = { + .pre = outs_pre, + .pst = outs_post, }, .test_cb = dp_test_npf_tcp_test_cb3, .post_cb = NULL, @@ -861,27 +633,26 @@ DP_START_TEST(strict_nat, t1) /* * Test 1 */ - spush(tcp_call.str, sizeof(tcp_call.str), "npf TCP strict Test 3.1"); + spush(tcp_call.text, sizeof(tcp_call.text), "npf TCP strict Test 3.1"); - /* Comment is new npf_tcp_fsm state */ - struct dp_test_tcp_flow_pkt tcp_pkt1[] = { + struct dpt_tcp_flow_pkt tcp_pkt1[] = { /* Open */ - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, + {DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + {DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, /* Data */ - {DP_DIR_BACK, TH_ACK, 20, NULL}, - {DP_DIR_FORW, TH_ACK, 50, NULL}, + {DPT_BACK, TH_ACK, 20, NULL, 0, NULL }, + {DPT_FORW, TH_ACK, 50, NULL, 0, NULL }, /* Close */ - {DP_DIR_FORW, TH_ACK | TH_FIN, 10, NULL}, - {DP_DIR_BACK, TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, + {DPT_FORW, TH_ACK | TH_FIN, 10, NULL, 0, NULL }, + {DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, + {DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, }; - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), NULL, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, NULL, 0); /* * End @@ -889,6 +660,10 @@ DP_START_TEST(strict_nat, t1) dp_test_npf_cmd("npf-ut fw global tcp-strict disable", false); dp_test_npf_commit(); + free(ins_pre); + free(ins_post); + free(outs_pre); + free(outs_post); /************************************************************* * Cleanup @@ -916,7 +691,7 @@ DP_DECL_TEST_CASE(npf_tcp, time_wait, NULL, NULL); static void dp_test_npf_tcp_test_cb4(const char *desc, - uint pktno, enum dp_test_tcp_dir dir, + uint pktno, bool forw, uint8_t flags, struct dp_test_pkt_desc_t *pre, struct dp_test_pkt_desc_t *post, @@ -926,7 +701,7 @@ dp_test_npf_tcp_test_cb4(const char *desc, struct rte_mbuf *pre_pak, *post_pak; struct dp_test_expected *test_exp; - if (dir == DP_DIR_BACK) { + if (!forw) { /* * Remember seq and ack from first BACK packet after handshake */ @@ -960,7 +735,7 @@ dp_test_npf_tcp_test_cb4(const char *desc, } static void -dp_test_npf_tcp_post_cb4(uint pktno, enum dp_test_tcp_dir dir, +dp_test_npf_tcp_post_cb4(uint pktno, bool forw, uint8_t flags, struct dp_test_pkt_desc_t *pre, struct dp_test_pkt_desc_t *post, @@ -997,18 +772,15 @@ dp_test_npf_tcp_post_cb4(uint pktno, enum dp_test_tcp_dir dir, if (!rv) { dp_test_fail("Session not found: %s", desc); dp_test_npf_print_sessions(NULL); - } else { - if (pktno < ARRAY_SIZE(expected_tcp_state) && - state != expected_tcp_state[pktno]) { - dp_test_fail( - "%s, exp state %s, actual state %s", - desc, - npf_state_get_state_name( - expected_tcp_state[pktno], - NPF_PROTO_IDX_TCP), - npf_state_get_state_name(state, - NPF_PROTO_IDX_TCP)); - } + return; + } + + if (pktno < ARRAY_SIZE(expected_tcp_state) && + state != expected_tcp_state[pktno]) { + dp_test_fail("%s, exp state %s, actual state %s", + desc, + npf_state_get_tcp_name(expected_tcp_state[pktno]), + npf_state_get_tcp_name(state)); } } @@ -1027,102 +799,32 @@ DP_START_TEST(time_wait, t1) dp_test_netlink_add_neigh("dp2T1", "200.201.202.203", "aa:bb:cc:18:0:1"); - - struct dp_test_pkt_desc_t ins_pre = { - .text = "Inside pre", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "100.101.102.103", - .l2_src = "aa:bb:cc:16:0:20", - .l3_dst = "200.201.202.203", - .l2_dst = dp1T0_mac, - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 49152, - .dport = 80, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp2T1" - }; - - struct dp_test_pkt_desc_t ins_post = { - .text = "Inside post", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "100.101.102.103", - .l2_src = dp2T1_mac, - .l3_dst = "200.201.202.203", - .l2_dst = "aa:bb:cc:18:0:1", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 49152, - .dport = 80, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp2T1" - }; - - struct dp_test_pkt_desc_t outs_pre = { - .text = "Outside pre", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "200.201.202.203", - .l2_src = "aa:bb:cc:18:0:1", - .l3_dst = "100.101.102.103", - .l2_dst = dp2T1_mac, - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 80, - .dport = 49152, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp2T1", - .tx_intf = "dp1T0" - }; - - struct dp_test_pkt_desc_t outs_post = { - .text = "Outside post", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "200.201.202.203", - .l2_src = dp1T0_mac, - .l3_dst = "100.101.102.103", - .l2_dst = "aa:bb:cc:16:0:20", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 80, - .dport = 49152, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp2T1", - .tx_intf = "dp1T0" - }; + struct dp_test_pkt_desc_t *ins_pre, *ins_post; + struct dp_test_pkt_desc_t *outs_pre, *outs_post; + + ins_pre = dpt_pdesc_v4_create( + "Inside pre", IPPROTO_TCP, + "aa:bb:cc:16:0:20", "100.101.102.103", 49152, + dp1T0_mac, "200.201.202.203", 80, + "dp1T0", "dp2T1"); + + ins_post = dpt_pdesc_v4_create( + "Inside post", IPPROTO_TCP, + dp2T1_mac, "100.101.102.103", 49152, + "aa:bb:cc:18:0:1", "200.201.202.203", 80, + "dp1T0", "dp2T1"); + + outs_pre = dpt_pdesc_v4_create( + "Outside pre", IPPROTO_TCP, + "aa:bb:cc:18:0:1", "200.201.202.203", 80, + dp2T1_mac, "100.101.102.103", 49152, + "dp2T1", "dp1T0"); + + outs_post = dpt_pdesc_v4_create( + "Outside post", IPPROTO_TCP, + dp1T0_mac, "200.201.202.203", 80, + "aa:bb:cc:16:0:20", "100.101.102.103", 49152, + "dp2T1", "dp1T0"); struct dp_test_npf_rule_t rules[] = { { @@ -1147,16 +849,16 @@ DP_START_TEST(time_wait, t1) dp_test_npf_fw_add(&fw, false); - struct dp_test_tcp_call tcp_call = { - .str[0] = '\0', + struct dpt_tcp_flow tcp_call = { + .text[0] = '\0', .isn = {0, 0}, - .desc[DP_DIR_FORW] = { - .pre = &ins_pre, - .post = &ins_post, + .desc[DPT_FORW] = { + .pre = ins_pre, + .pst = ins_post, }, - .desc[DP_DIR_BACK] = { - .pre = &outs_pre, - .post = &outs_post, + .desc[DPT_BACK] = { + .pre = outs_pre, + .pst = outs_post, }, .test_cb = dp_test_npf_tcp_test_cb4, .post_cb = dp_test_npf_tcp_post_cb4, @@ -1165,37 +867,42 @@ DP_START_TEST(time_wait, t1) /* * Test 1 */ - spush(tcp_call.str, sizeof(tcp_call.str), + spush(tcp_call.text, sizeof(tcp_call.text), "npf TCP TIME-WAIT assassination Test 4.1"); - struct dp_test_tcp_flow_pkt tcp_pkt1[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - - {DP_DIR_FORW, TH_ACK, 100, NULL}, - {DP_DIR_BACK, TH_ACK, 20, NULL}, - {DP_DIR_FORW, TH_ACK, 50, NULL}, - {DP_DIR_BACK, TH_ACK, 20, NULL}, - {DP_DIR_FORW, TH_ACK, 50, NULL}, - {DP_DIR_BACK, TH_ACK, 20, NULL}, - - {DP_DIR_FORW, TH_ACK | TH_FIN, 10, NULL}, - {DP_DIR_BACK, TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_ACK | TH_FIN, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - - {DP_DIR_BACK, TH_ACK, 0, NULL}, /* Old duplicate */ - {DP_DIR_FORW, TH_ACK, 0, NULL}, /* */ - {DP_DIR_BACK, TH_RST, 0, NULL}, /* */ + struct dpt_tcp_flow_pkt tcp_pkt1[] = { + {DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + {DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + + {DPT_FORW, TH_ACK, 100, NULL, 0, NULL }, + {DPT_BACK, TH_ACK, 20, NULL, 0, NULL }, + {DPT_FORW, TH_ACK, 50, NULL, 0, NULL }, + {DPT_BACK, TH_ACK, 20, NULL, 0, NULL }, + {DPT_FORW, TH_ACK, 50, NULL, 0, NULL }, + {DPT_BACK, TH_ACK, 20, NULL, 0, NULL }, + + {DPT_FORW, TH_ACK | TH_FIN, 10, NULL, 0, NULL }, + {DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, + {DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + + {DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, /* Old duplicate */ + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, /* */ + {DPT_BACK, TH_RST, 0, NULL, 0, NULL }, /* */ }; - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), NULL, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, NULL, 0); /* * End */ + free(ins_pre); + free(ins_post); + free(outs_pre); + free(outs_post); + /************************************************************* * Cleanup *************************************************************/ @@ -1219,7 +926,7 @@ DP_DECL_TEST_CASE(npf_tcp, rst_estb, NULL, NULL); * Test 5: TCP reset when in Established state */ static void -dp_test_npf_tcp_post_cb5(uint pktno, enum dp_test_tcp_dir dir, +dp_test_npf_tcp_post_cb5(uint pktno, bool forw, uint8_t flags, struct dp_test_pkt_desc_t *pre, struct dp_test_pkt_desc_t *post, @@ -1254,20 +961,17 @@ dp_test_npf_tcp_post_cb5(uint pktno, enum dp_test_tcp_dir dir, */ if (pktno == 5) return; - dp_test_fail("Session not found: %s", desc); dp_test_npf_print_sessions(NULL); - } else { - if (pktno < ARRAY_SIZE(expected_tcp_state) && - state != expected_tcp_state[pktno]) { - dp_test_fail( - "%s, exp state %s, actual state %s", - desc, - npf_state_get_state_name( - expected_tcp_state[pktno], - NPF_PROTO_IDX_TCP), - npf_state_get_state_name(state, - NPF_PROTO_IDX_TCP)); - } + dp_test_fail("Session not found: %s", desc); + return; + } + + if (pktno < ARRAY_SIZE(expected_tcp_state) && + state != expected_tcp_state[pktno]) { + dp_test_fail("%s, exp state %s, actual state %s", + desc, + npf_state_get_tcp_name(expected_tcp_state[pktno]), + npf_state_get_tcp_name(state)); } /* @@ -1311,102 +1015,32 @@ DP_START_TEST(rst_estb, t1) dp_test_netlink_add_neigh("dp2T1", "200.201.202.203", "aa:bb:cc:18:0:1"); - - struct dp_test_pkt_desc_t ins_pre = { - .text = "Inside pre", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "100.101.102.103", - .l2_src = "aa:bb:cc:16:0:20", - .l3_dst = "200.201.202.203", - .l2_dst = dp1T0_mac, - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 49152, - .dport = 80, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp2T1" - }; - - struct dp_test_pkt_desc_t ins_post = { - .text = "Inside post", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "100.101.102.103", - .l2_src = dp2T1_mac, - .l3_dst = "200.201.202.203", - .l2_dst = "aa:bb:cc:18:0:1", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 49152, - .dport = 80, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp2T1" - }; - - struct dp_test_pkt_desc_t outs_pre = { - .text = "Outside pre", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "200.201.202.203", - .l2_src = "aa:bb:cc:18:0:1", - .l3_dst = "100.101.102.103", - .l2_dst = dp2T1_mac, - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 80, - .dport = 49152, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp2T1", - .tx_intf = "dp1T0" - }; - - struct dp_test_pkt_desc_t outs_post = { - .text = "Outside post", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "200.201.202.203", - .l2_src = dp1T0_mac, - .l3_dst = "100.101.102.103", - .l2_dst = "aa:bb:cc:16:0:20", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 80, - .dport = 49152, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp2T1", - .tx_intf = "dp1T0" - }; + struct dp_test_pkt_desc_t *ins_pre, *ins_post; + struct dp_test_pkt_desc_t *outs_pre, *outs_post; + + ins_pre = dpt_pdesc_v4_create( + "Inside pre", IPPROTO_TCP, + "aa:bb:cc:16:0:20", "100.101.102.103", 49152, + dp1T0_mac, "200.201.202.203", 80, + "dp1T0", "dp2T1"); + + ins_post = dpt_pdesc_v4_create( + "Inside post", IPPROTO_TCP, + dp2T1_mac, "100.101.102.103", 49152, + "aa:bb:cc:18:0:1", "200.201.202.203", 80, + "dp1T0", "dp2T1"); + + outs_pre = dpt_pdesc_v4_create( + "Outside pre", IPPROTO_TCP, + "aa:bb:cc:18:0:1", "200.201.202.203", 80, + dp2T1_mac, "100.101.102.103", 49152, + "dp2T1", "dp1T0"); + + outs_post = dpt_pdesc_v4_create( + "Outside post", IPPROTO_TCP, + dp1T0_mac, "200.201.202.203", 80, + "aa:bb:cc:16:0:20", "100.101.102.103", 49152, + "dp2T1", "dp1T0"); struct dp_test_npf_rule_t rules[] = { { @@ -1431,42 +1065,47 @@ DP_START_TEST(rst_estb, t1) dp_test_npf_fw_add(&fw, false); - struct dp_test_tcp_call tcp_call = { - .str[0] = '\0', + struct dpt_tcp_flow tcp_call = { + .text[0] = '\0', .isn = {0, 0}, - .desc[DP_DIR_FORW] = { - .pre = &ins_pre, - .post = &ins_post, + .desc[DPT_FORW] = { + .pre = ins_pre, + .pst = ins_post, }, - .desc[DP_DIR_BACK] = { - .pre = &outs_pre, - .post = &outs_post, + .desc[DPT_BACK] = { + .pre = outs_pre, + .pst = outs_post, }, - .test_cb = dp_test_npf_tcp_test_cb, + .test_cb = NULL, .post_cb = dp_test_npf_tcp_post_cb5, }; /* * Test 1 */ - spush(tcp_call.str, sizeof(tcp_call.str), "npf TCP dev"); + spush(tcp_call.text, sizeof(tcp_call.text), "npf TCP dev"); /* Comment is new npf_tcp_fsm state */ - struct dp_test_tcp_flow_pkt tcp_pkt1[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_ACK, 20, NULL}, - {DP_DIR_FORW, TH_ACK, 50, NULL}, - {DP_DIR_FORW, TH_RST, 0, NULL}, + struct dpt_tcp_flow_pkt tcp_pkt1[] = { + {DPT_FORW, TH_SYN, 0, NULL, 0, NULL}, + {DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL}, + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL}, + {DPT_BACK, TH_ACK, 20, NULL, 0, NULL}, + {DPT_FORW, TH_ACK, 50, NULL, 0, NULL}, + {DPT_FORW, TH_RST, 0, NULL, 0, NULL}, }; - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), NULL, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, NULL, 0); /* * End */ + free(ins_pre); + free(ins_post); + free(outs_pre); + free(outs_post); + /************************************************************* * Cleanup *************************************************************/ @@ -1530,39 +1169,23 @@ DP_START_TEST(rst_only, test1) char *dp1T0_mac = dp_test_intf_name2mac_str("dp1T0"); char *dp2T1_mac = dp_test_intf_name2mac_str("dp2T1"); - struct dp_test_pkt_desc_t fwd_pkt = { - .text = "Fwd", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "100.101.102.103", - .l2_src = "aa:bb:cc:16:0:20", - .l3_dst = "200.201.202.203", - .l2_dst = dp1T0_mac, - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 49152, - .dport = 80, - .flags = TH_RST | TH_ACK, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp2T1" - }; + struct dp_test_pkt_desc_t *fwd_pkt; + fwd_pkt = dpt_pdesc_v4_create( + "Fwd", IPPROTO_TCP, + "aa:bb:cc:16:0:20", "100.101.102.103", 49152, + dp1T0_mac, "200.201.202.203", 80, + "dp1T0", "dp2T1"); + fwd_pkt->l4.tcp.flags = TH_RST | TH_ACK; struct dp_test_pkt_desc_t pkt_copy; struct dp_test_expected *test_exp; struct rte_mbuf *test_pak; bool rv; - test_pak = dp_test_v4_pkt_from_desc(&fwd_pkt); + test_pak = dp_test_v4_pkt_from_desc(fwd_pkt); - pkt_copy = fwd_pkt; + pkt_copy = *fwd_pkt; pkt_copy.l2_src = dp2T1_mac; pkt_copy.l2_dst = "aa:bb:cc:18:0:1"; @@ -1583,6 +1206,8 @@ DP_START_TEST(rst_only, test1) dp_test_npf_print_sessions(NULL); }; + free(fwd_pkt); + /* Cleanup */ dp_test_npf_fw_del(&rset, false); diff --git a/tests/whole_dp/src/dp_test_npf_tcp_mss.c b/tests/whole_dp/src/dp_test_npf_tcp_mss.c index c4494705..dfa42e82 100644 --- a/tests/whole_dp/src/dp_test_npf_tcp_mss.c +++ b/tests/whole_dp/src/dp_test_npf_tcp_mss.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -19,14 +19,14 @@ #include "dp_test.h" #include "dp_test_controller.h" #include "dp_test_cmd_state.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" #include "dp_test_str.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_pkt.h" #include "dp_test_lib_tcp.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_sess_lib.h" @@ -157,7 +157,7 @@ dp_test_npf_tcp_mss_opt(uint8_t flags, uint8_t *opts, */ static void dp_test_npf_tcp_test_cb(const char *str, - uint pktno, enum dp_test_tcp_dir dir, + uint pktno, bool forw, uint8_t flags, struct dp_test_pkt_desc_t *pre, struct dp_test_pkt_desc_t *post, @@ -268,102 +268,32 @@ DP_START_TEST(tcp_mss_ipv4, test1) dp_test_netlink_add_neigh("dp2T1", "200.201.202.203", "aa:bb:cc:18:0:1"); - - struct dp_test_pkt_desc_t ins_pre = { - .text = "Inside pre", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "100.101.102.103", - .l2_src = "aa:bb:cc:16:0:20", - .l3_dst = "200.201.202.203", - .l2_dst = dp1T0_mac, - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 49152, - .dport = 80, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp2T1" - }; - - struct dp_test_pkt_desc_t ins_post = { - .text = "Inside post", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "100.101.102.103", - .l2_src = dp2T1_mac, - .l3_dst = "200.201.202.203", - .l2_dst = "aa:bb:cc:18:0:1", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 49152, - .dport = 80, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp2T1" - }; - - struct dp_test_pkt_desc_t outs_pre = { - .text = "Outside pre", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "200.201.202.203", - .l2_src = "aa:bb:cc:18:0:1", - .l3_dst = "100.101.102.103", - .l2_dst = dp2T1_mac, - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 80, - .dport = 49152, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp2T1", - .tx_intf = "dp1T0" - }; - - struct dp_test_pkt_desc_t outs_post = { - .text = "Outside post", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "200.201.202.203", - .l2_src = dp1T0_mac, - .l3_dst = "100.101.102.103", - .l2_dst = "aa:bb:cc:16:0:20", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 80, - .dport = 49152, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp2T1", - .tx_intf = "dp1T0" - }; + struct dp_test_pkt_desc_t *ins_pre, *ins_post; + struct dp_test_pkt_desc_t *outs_pre, *outs_post; + + ins_pre = dpt_pdesc_v4_create( + "Inside pre", IPPROTO_TCP, + "aa:bb:cc:16:0:20", "100.101.102.103", 49152, + dp1T0_mac, "200.201.202.203", 80, + "dp1T0", "dp2T1"); + + ins_post = dpt_pdesc_v4_create( + "Inside post", IPPROTO_TCP, + dp2T1_mac, "100.101.102.103", 49152, + "aa:bb:cc:18:0:1", "200.201.202.203", 80, + "dp1T0", "dp2T1"); + + outs_pre = dpt_pdesc_v4_create( + "Outside pre", IPPROTO_TCP, + "aa:bb:cc:18:0:1", "200.201.202.203", 80, + dp2T1_mac, "100.101.102.103", 49152, + "dp2T1", "dp1T0"); + + outs_post = dpt_pdesc_v4_create( + "Outside post", IPPROTO_TCP, + dp1T0_mac, "200.201.202.203", 80, + "aa:bb:cc:16:0:20", "100.101.102.103", 49152, + "dp2T1", "dp1T0"); char npf[100]; @@ -398,26 +328,26 @@ DP_START_TEST(tcp_mss_ipv4, test1) .rules = rules }; - struct dp_test_tcp_call tcp_call = { - .str[0] = '\0', + struct dpt_tcp_flow tcp_call = { + .text[0] = '\0', .isn = {0, 0}, - .desc[DP_DIR_FORW] = { + .desc[DPT_FORW] = { .pre = &ins_pre, - .post = &ins_post, + .pst = &ins_post, }, - .desc[DP_DIR_BACK] = { + .desc[DPT_BACK] = { .pre = &outs_pre, - .post = &outs_post, + .pst = &outs_post, }, .test_cb = dp_test_npf_tcp_test_cb, .post_cb = NULL, }; - struct dp_test_tcp_flow_pkt tcp_pkt1[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_ACK, 20, NULL}, + struct dpt_tcp_flow_pkt tcp_pkt1[] = { + {DPT_FORW, TH_SYN, 0, NULL, 0, NULL}, + {DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL}, + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL}, + {DPT_BACK, TH_ACK, 20, NULL, 0, NULL}, /* call truncated ... */ }; @@ -436,7 +366,7 @@ DP_START_TEST(tcp_mss_ipv4, test1) .l3l4_size = 20 + 20, }; - spush(tcp_call.str, sizeof(tcp_call.str), + spush(tcp_call.text, sizeof(tcp_call.text), "npf TCP mss clamp Test 1.1 - mtu"); /* create the rproc npf string */ @@ -450,7 +380,7 @@ DP_START_TEST(tcp_mss_ipv4, test1) if (rules[0].stateful == STATELESS) dp_test_npf_fw_add(&fw2, false); - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), &ctx, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, &ctx, 0); dp_test_npf_fw_del(&fw1, false); @@ -470,7 +400,7 @@ DP_START_TEST(tcp_mss_ipv4, test1) /* IP + TCP (ignore options) */ ctx.l3l4_size = 20 + 20; - spush(tcp_call.str, sizeof(tcp_call.str), + spush(tcp_call.text, sizeof(tcp_call.text), "npf TCP mss clamp Test 1.2 - mtu-minus"); /* create the rproc npf string */ @@ -484,7 +414,7 @@ DP_START_TEST(tcp_mss_ipv4, test1) if (rules[0].stateful == STATELESS) dp_test_npf_fw_add(&fw2, false); - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), &ctx, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, &ctx, 0); dp_test_npf_fw_del(&fw1, false); @@ -504,7 +434,7 @@ DP_START_TEST(tcp_mss_ipv4, test1) /* IP + TCP */ ctx.l3l4_size = 20 + 20; - spush(tcp_call.str, sizeof(tcp_call.str), + spush(tcp_call.text, sizeof(tcp_call.text), "npf TCP mss clamp Test 1.3 - limit"); /* create the rproc npf string */ @@ -518,7 +448,7 @@ DP_START_TEST(tcp_mss_ipv4, test1) if (rules[0].stateful == STATELESS) dp_test_npf_fw_add(&fw2, false); - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), &ctx, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, &ctx, 0); dp_test_npf_fw_del(&fw1, false); @@ -538,7 +468,7 @@ DP_START_TEST(tcp_mss_ipv4, test1) /* IP + TCP */ ctx.l3l4_size = 20 + 20; - spush(tcp_call.str, sizeof(tcp_call.str), + spush(tcp_call.text, sizeof(tcp_call.text), "npf TCP mss clamp Test 1.4 - limit (stateful)"); /* create the rproc npf string */ @@ -552,13 +482,17 @@ DP_START_TEST(tcp_mss_ipv4, test1) if (rules[0].stateful == STATELESS) dp_test_npf_fw_add(&fw2, false); - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), &ctx, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, &ctx, 0); dp_test_npf_fw_del(&fw1, false); if (rules[0].stateful == STATELESS) dp_test_npf_fw_del(&fw2, false); + free(ins_pre); + free(ins_post); + free(outs_pre); + free(outs_post); /************************************************************* * Cleanup @@ -602,102 +536,32 @@ DP_START_TEST(tcp_mss_ipv6, test1) dp_test_netlink_add_neigh("dp2T1", "2002:2:2::1", "aa:bb:cc:dd:2:b1"); - - struct dp_test_pkt_desc_t ins_pre = { - .text = "Inside pre", - .len = 0, - .ether_type = ETHER_TYPE_IPv6, - .l3_src = "2001:1:1::2", - .l2_src = "aa:bb:cc:dd:1:a1", - .l3_dst = "2002:2:2::1", - .l2_dst = dp1T0_mac, - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 0xDEAD, - .dport = 0xBEEF, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp2T1" - }; - - struct dp_test_pkt_desc_t ins_post = { - .text = "Inside post", - .len = 0, - .ether_type = ETHER_TYPE_IPv6, - .l3_src = "2001:1:1::2", - .l2_src = dp2T1_mac, - .l3_dst = "2002:2:2::1", - .l2_dst = "aa:bb:cc:dd:2:b1", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 0xDEAD, - .dport = 0xBEEF, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp2T1" - }; - - struct dp_test_pkt_desc_t outs_pre = { - .text = "Outside pre", - .len = 0, - .ether_type = ETHER_TYPE_IPv6, - .l3_src = "2002:2:2::1", - .l2_src = "aa:bb:cc:dd:2:b1", - .l3_dst = "2001:1:1::2", - .l2_dst = dp2T1_mac, - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 0xBEEF, - .dport = 0xDEAD, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp2T1", - .tx_intf = "dp1T0" - }; - - struct dp_test_pkt_desc_t outs_post = { - .text = "Outside post", - .len = 0, - .ether_type = ETHER_TYPE_IPv6, - .l3_src = "2002:2:2::1", - .l2_src = dp1T0_mac, - .l3_dst = "2001:1:1::2", - .l2_dst = "aa:bb:cc:dd:1:a1", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 0xBEEF, - .dport = 0xDEAD, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp2T1", - .tx_intf = "dp1T0" - }; + struct dp_test_pkt_desc_t *ins_pre, *ins_post; + struct dp_test_pkt_desc_t *outs_pre, *outs_post; + + ins_pre = dpt_pdesc_v6_create( + "Inside pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:a1", "2001:1:1::2", 0xDEAD, + dp1T0_mac, "2002:2:2::1", 0xBEEF, + "dp1T0", "dp2T1"); + + ins_post = dpt_pdesc_v6_create( + "Inside post", IPPROTO_TCP, + dp2T1_mac, "2001:1:1::2", 0xDEAD, + "aa:bb:cc:dd:2:b1", "2002:2:2::1", 0xBEEF, + "dp1T0", "dp2T1"); + + outs_pre = dpt_pdesc_v6_create( + "Outside pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:b1", "2002:2:2::1", 0xBEEF, + dp2T1_mac, "2001:1:1::2", 0xDEAD, + "dp2T1", "dp1T0"); + + outs_post = dpt_pdesc_v6_create( + "Outside post", IPPROTO_TCP, + dp1T0_mac, "2002:2:2::1", 0xBEEF, + "aa:bb:cc:dd:1:a1", "2001:1:1::2", 0xDEAD, + "dp2T1", "dp1T0"); char npf[100]; @@ -732,26 +596,26 @@ DP_START_TEST(tcp_mss_ipv6, test1) .rules = rules }; - struct dp_test_tcp_call tcp_call = { - .str[0] = '\0', + struct dpt_tcp_call tcp_call = { + .text[0] = '\0', .isn = {0, 0}, - .desc[DP_DIR_FORW] = { + .desc[DPT_FORW] = { .pre = &ins_pre, - .post = &ins_post, + .pst = &ins_post, }, - .desc[DP_DIR_BACK] = { + .desc[DPT_BACK] = { .pre = &outs_pre, - .post = &outs_post, + .pst = &outs_post, }, .test_cb = dp_test_npf_tcp_test_cb, .post_cb = NULL, }; - struct dp_test_tcp_flow_pkt tcp_pkt1[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_ACK, 20, NULL}, + struct dpt_tcp_call_pkt tcp_pkt1[] = { + {DPT_FORW, TH_SYN, 0, NULL, 0, NULL}, + {DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL}, + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL}, + {DPT_BACK, TH_ACK, 20, NULL, 0, NULL}, /* call truncated ... */ }; @@ -769,7 +633,7 @@ DP_START_TEST(tcp_mss_ipv6, test1) .l3l4_size = 40 + 20, }; - spush(tcp_call.str, sizeof(tcp_call.str), + spush(tcp_call.text, sizeof(tcp_call.text), "npf TCP mss clamp Test 2.1 - mtu"); /* create the rproc npf string */ @@ -783,7 +647,7 @@ DP_START_TEST(tcp_mss_ipv6, test1) if (rules[0].stateful == STATELESS) dp_test_npf_fw_add(&fw2, false); - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), &ctx, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, &ctx, 0); dp_test_npf_fw_del(&fw1, false); @@ -803,7 +667,7 @@ DP_START_TEST(tcp_mss_ipv6, test1) /* IP + TCP */ ctx.l3l4_size = 40 + 20; - spush(tcp_call.str, sizeof(tcp_call.str), + spush(tcp_call.text, sizeof(tcp_call.text), "npf TCP mss clamp Test 2.2 - mtu-minus"); /* create the rproc npf string */ @@ -817,7 +681,7 @@ DP_START_TEST(tcp_mss_ipv6, test1) if (rules[0].stateful == STATELESS) dp_test_npf_fw_add(&fw2, false); - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), &ctx, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, &ctx, 0); dp_test_npf_fw_del(&fw1, false); @@ -837,7 +701,7 @@ DP_START_TEST(tcp_mss_ipv6, test1) /* IP + TCP */ ctx.l3l4_size = 40 + 20; - spush(tcp_call.str, sizeof(tcp_call.str), + spush(tcp_call.text, sizeof(tcp_call.text), "npf TCP mss clamp Test 2.3 - limit"); /* create the rproc npf string */ @@ -851,7 +715,7 @@ DP_START_TEST(tcp_mss_ipv6, test1) if (rules[0].stateful == STATELESS) dp_test_npf_fw_add(&fw2, false); - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), &ctx, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, &ctx, 0); dp_test_npf_fw_del(&fw1, false); @@ -871,7 +735,7 @@ DP_START_TEST(tcp_mss_ipv6, test1) /* IP + TCP */ ctx.l3l4_size = 40 + 20; - spush(tcp_call.str, sizeof(tcp_call.str), + spush(tcp_call.text, sizeof(tcp_call.text), "npf TCP mss clamp Test 2.4 - limit (stateful)"); /* create the rproc npf string */ @@ -885,13 +749,17 @@ DP_START_TEST(tcp_mss_ipv6, test1) if (rules[0].stateful == STATELESS) dp_test_npf_fw_add(&fw2, false); - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), &ctx, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, &ctx, 0); dp_test_npf_fw_del(&fw1, false); if (rules[0].stateful == STATELESS) dp_test_npf_fw_del(&fw2, false); + free(ins_pre); + free(ins_post); + free(outs_pre); + free(outs_post); /************************************************************* * Cleanup diff --git a/tests/whole_dp/src/dp_test_npf_vti.c b/tests/whole_dp/src/dp_test_npf_vti.c new file mode 100644 index 00000000..b3f2f4f0 --- /dev/null +++ b/tests/whole_dp/src/dp_test_npf_vti.c @@ -0,0 +1,899 @@ +/* + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * Whole dataplane Zone Firewall tests for virtual tunnel interfaces + */ + +#include + +#include "ip6_funcs.h" +#include "ip_funcs.h" +#include "in_cksum.h" +#include "if_var.h" +#include "main.h" +#include "crypto/vti.h" + +#include "dp_test.h" +#include "dp_test_str.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_exp.h" +#include "dp_test_lib_pkt.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_crypto_utils.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_console.h" +#include "dp_test_json_utils.h" +#include "dp_test_npf_lib.h" +#include "dp_test_npf_fw_lib.h" +#include "dp_test_npf_sess_lib.h" +#include "dp_test_npf_nat_lib.h" + + +DP_DECL_TEST_SUITE(npf_vti_suite); + +#define SPI_OUTBOUND 0xd43d87c7 +#define SPI_INBOUND 0x10203040 +#define VTI_TUN_REQID 1234 + +#define NETWORK_WEST "10.10.1.0" +#define CLIENT_LOCAL "10.10.1.1" +#define NETWORK_LOCAL "10.10.1.0" +#define PORT_WEST "10.10.1.2" +#define CLIENT_LOCAL_MAC_ADDR "aa:bb:cc:dd:1:1" + +#define NETWORK_WEST6 "2001:1::" +#define CLIENT_LOCAL6 "2001:1::1" +#define PORT_WEST6 "2001:1::2" + +#define NETWORK_EAST "10.10.2.0" +#define PEER "10.10.2.3" +#define PEER_MAC_ADDR "aa:bb:cc:dd:2:3" +#define PORT_EAST "10.10.2.2" +#define NETWORK_REMOTE "10.10.3.0" + +#define NETWORK_EAST6 "2001:2::" +#define PEER6 "2001:2::3" +#define PORT_EAST6 "2001:2::2" +#define NETWORK_REMOTE6 "2001:3::" + +#define OUTPUT_MARK 100 +#define INPUT_MARK 100 + +#define CLIENT_REMOTE "10.10.3.4" +#define CLIENT_REMOTE6 "2001:3::4" + +#define TEST_VRF_ID 55 + +/* + * Crypto policy definitions used by the tests in this module + */ +static struct dp_test_crypto_policy output_policy = { + .d_prefix = "0.0.0.0/0", + .s_prefix = "0.0.0.0/0", + .proto = 0, + .dst = PEER, + .dst_family = AF_INET, + .dir = XFRM_POLICY_OUT, + .family = AF_INET, + .reqid = VTI_TUN_REQID, + .priority = 0, + .mark = OUTPUT_MARK, + .vrfid = VRF_DEFAULT_ID +}; + +static struct dp_test_crypto_policy input_policy = { + .d_prefix = "0.0.0.0/0", + .s_prefix = "0.0.0.0/0", + .proto = 0, + .dst = PORT_EAST, + .dst_family = AF_INET, + .dir = XFRM_POLICY_IN, + .family = AF_INET, + .reqid = VTI_TUN_REQID + 1, + .priority = 0, + .mark = INPUT_MARK, + .vrfid = VRF_DEFAULT_ID +}; + +/* + * Crypto SA definitions used by the tests in this module + */ +static struct dp_test_crypto_sa output_sa = { + .auth_algo = CRYPTO_AUTH_HMAC_SHA1, + .spi = SPI_OUTBOUND, + .d_addr = PEER, + .s_addr = PORT_EAST, + .family = AF_INET, + .mode = XFRM_MODE_TUNNEL, + .reqid = VTI_TUN_REQID, + .mark = OUTPUT_MARK, + .vrfid = VRF_DEFAULT_ID +}; + +static struct dp_test_crypto_sa input_sa = { + .auth_algo = CRYPTO_AUTH_HMAC_SHA1, + .spi = SPI_INBOUND, + .d_addr = PORT_EAST, + .s_addr = PEER, + .family = AF_INET, + .mode = XFRM_MODE_TUNNEL, + .reqid = VTI_TUN_REQID + 1, + .mark = INPUT_MARK, + .vrfid = VRF_DEFAULT_ID +}; + +static void vti_setup_policies_and_sas(vrfid_t vrfid) +{ + input_policy.vrfid = vrfid; + output_policy.vrfid = vrfid; + dp_test_crypto_create_policy(&input_policy); + dp_test_crypto_create_policy(&output_policy); + + input_sa.vrfid = vrfid; + output_sa.vrfid = vrfid; + dp_test_crypto_create_sa(&input_sa); + dp_test_crypto_create_sa(&output_sa); +} + +static void vti_teardown_sas_and_policy(void) +{ + dp_test_crypto_delete_policy(&input_policy); + dp_test_crypto_delete_policy(&output_policy); + + dp_test_crypto_delete_sa(&input_sa); + dp_test_crypto_delete_sa(&output_sa); +} + +static void vti_setup_tunnel(vrfid_t vrf_id, uint16_t mark_out) +{ + char route_name[DP_TEST_MAX_ROUTE_STRING_LEN]; + + if (vrf_id != VRF_DEFAULT_ID) + dp_test_netlink_add_vrf(vrf_id, 1); + + /* Input interface and connected route is in the requested VRF */ + dp_test_nl_add_ip_addr_and_connected_vrf("dp1T1", + "10.10.1.2/24", vrf_id); + dp_test_netlink_add_neigh("dp1T1", CLIENT_LOCAL, CLIENT_LOCAL_MAC_ADDR); + + /* Output interface and connected route are in default VRF */ + dp_test_nl_add_ip_addr_and_connected("dp2T2", "10.10.2.2/24"); + dp_test_netlink_add_neigh("dp2T2", PEER, PEER_MAC_ADDR); + + dp_test_intf_vti_create("vti0", PORT_EAST, PEER, mark_out, vrf_id); + dp_test_netlink_add_ip_address_vrf("vti0", "5.5.5.5/24", vrf_id); + snprintf(route_name, sizeof(route_name), "vrf:%d %s nh %s int:vti0", + vrf_id, "10.10.3.0/24", PEER); + dp_test_netlink_add_route(route_name); + + dp_test_crypto_check_sa_count(vrf_id, 0); +} + +static void vti_teardown_tunnel(vrfid_t vrf_id) +{ + char route_name[DP_TEST_MAX_ROUTE_STRING_LEN]; + + snprintf(route_name, sizeof(route_name), "vrf:%d %s nh %s int:vti0", + vrf_id, "10.10.3.0/24", PEER); + dp_test_netlink_del_route(route_name); + + dp_test_netlink_del_ip_address_vrf("vti0", "5.5.5.5/24", vrf_id); + dp_test_intf_vti_delete("vti0", PORT_EAST, PEER, 10, vrf_id); + dp_test_netlink_del_neigh("dp2T2", PEER, PEER_MAC_ADDR); + dp_test_nl_del_ip_addr_and_connected("dp2T2", "10.10.2.2/24"); + dp_test_netlink_del_neigh("dp1T1", CLIENT_LOCAL, CLIENT_LOCAL_MAC_ADDR); + dp_test_nl_del_ip_addr_and_connected_vrf("dp1T1", "10.10.1.2/24", + vrf_id); + + if (vrf_id != VRF_DEFAULT_ID) + dp_test_netlink_del_vrf(vrf_id, 0); +} + +static void vti_count(struct ifnet *ifp, void *arg) +{ + int *count = (int *)arg; + + if (ifp->if_type == IFT_TUNNEL_VTI) + (*count)++; +} + +static int vti_count_of_vtis(void) +{ + int count = 0; + + dp_ifnet_walk(vti_count, &count); + return count; +} + +/* + * build_input_icmp_packet() + * + * This helper function builds an input ICMP packet that + * corresponds to the encrypted payload in the ESP packet + * built by build_expected_esp_packet(). + */ +static struct rte_mbuf *build_input_icmp_packet(void) +{ + struct iphdr *ip; + struct rte_mbuf *packet; + const uint8_t payload[] = { + 0x2c, 0x57, 0xba, 0x55, 0x00, 0x00, 0x00, 0x00, 0xd9, 0xe9, + 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, + 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, + 0x03, 0x04, 0x01, 0x02, 0x03, 0x04 + }; + int payload_len = sizeof(payload); + + packet = dp_test_create_icmp_ipv4_pak(CLIENT_LOCAL, CLIENT_REMOTE, + ICMP_ECHO /* echo request */, + 0 /* no code */, + DPT_ICMP_ECHO_DATA(0xac9, 1), + 1 /* one mbuf */, + &payload_len, + payload, + &ip, NULL); + if (!packet) + return NULL; + + /* + * The resulting ICMP packet isn't exactly as + * we want, so tickle a few bits into shape + */ + dp_test_set_pak_ip_field(ip, DP_TEST_SET_IP_ID, 0xea53); + dp_test_set_pak_ip_field(ip, DP_TEST_SET_DF, 1); + + return packet; +} + +/* + * build_expected_esp_packet() + * + * This helper function creates an output ESP packet containing the + * encrypted ICMP ping packet built by build_input_icmp_packet(). + */ +static struct rte_mbuf *build_expected_esp_packet(int *payload_len) +{ + const char encrypted_payload[] = { + 0x64, 0xc8, 0x6e, 0x89, 0x53, 0x45, 0x54, 0xd6, 0xb1, 0x0c, + 0x8c, 0xca, 0xc4, 0x44, 0xbf, 0xd3, 0x7c, 0xe1, 0x9e, 0x7e, + 0x65, 0x4c, 0xfd, 0x34, 0xfd, 0x9d, 0x64, 0xab, 0x31, 0x2c, + 0x3a, 0x08, 0x4d, 0x75, 0xb5, 0x86, 0x27, 0x50, 0xaf, 0x0e, + 0x47, 0xc0, 0x0e, 0x55, 0x56, 0x13, 0x97, 0xe0, 0xef, 0xc2, + 0x68, 0xf2, 0xdf, 0xb2, 0xfc, 0xf7, 0xd7, 0x70, 0xc9, 0x35, + 0xf5, 0xb1, 0xa8, 0x12, 0x23, 0x6c, 0xa9, 0xe3, 0xd3, 0xe0, + 0x41, 0xef, 0x9f, 0xf0, 0xfe, 0x99, 0x89, 0x88, 0x7d, 0x2c, + 0xdf, 0xf1, 0x7a, 0x85, 0x28, 0xf4, 0x0c, 0x99, 0x36, 0xa5, + 0x34, 0x3e, 0xde, 0xf8, 0xa6, 0x84, 0x40, 0xf3, 0x6f, 0xc5, + 0x07, 0xee, 0xde, 0x55, 0xcf, 0x9d, 0xaf, 0xda, 0x9e, 0x7b, + 0x8c, 0x98, 0xf6, 0xf8, 0x59, 0x0f, 0xd7, 0xbd, 0xc9, 0x24, + 0x01, 0xcd, 0x42, 0x38 + }; + + *payload_len = sizeof(encrypted_payload); + + return dp_test_create_esp_ipv4_pak(PORT_EAST, PEER, 1, + payload_len, + encrypted_payload, + SPI_OUTBOUND, + 1 /* seq no */, + 0 /* ip ID */, + 255 /* ttl */, + NULL /* udp/esp */, + NULL /* transport_hdr*/); +} + + +/* + * build_encrypted_input_packet() + * + * This helper function creates an input ESP packet containing + * an encrypted ICMP ping packet from 10.10.3.4 to 10.10.1.1. + */ +static struct rte_mbuf *build_encrypted_input_packet(void) +{ + int payload_len; + const char encrypted_payload[] = { + 0x64, 0xc8, 0x6e, 0x89, 0x53, 0x45, 0x54, 0xd6, 0xb1, 0x0c, + 0x8c, 0xca, 0xc4, 0x44, 0xbf, 0xd3, 0xe4, 0xac, 0x69, 0xfb, + 0x6e, 0xf2, 0x98, 0x2c, 0x4e, 0x19, 0xd6, 0x8f, 0xd1, 0x72, + 0xfb, 0x67, 0x3c, 0x14, 0xc8, 0x00, 0x34, 0x4a, 0x08, 0x3d, + 0xe6, 0x3d, 0xeb, 0x3b, 0xeb, 0x90, 0xd8, 0xe1, 0x28, 0xa5, + 0xd2, 0x1b, 0xa1, 0xb1, 0xcf, 0xf4, 0xf4, 0x3e, 0x1d, 0x6b, + 0xa2, 0x8d, 0xb2, 0x2c, 0x5e, 0x60, 0x7f, 0x81, 0x3b, 0x79, + 0xb5, 0x10, 0xe2, 0x78, 0x7c, 0xd7, 0x19, 0xcf, 0x14, 0x80, + 0xca, 0x31, 0xa8, 0x4d, 0xf8, 0xde, 0x31, 0x3d, 0x61, 0x4d, + 0x5d, 0xed, 0x02, 0x1a, 0x91, 0x5d, 0x7c, 0x36, 0x9d, 0xce, + 0x2f, 0x1c, 0x57, 0x75, 0x8b, 0xe2, 0xa1, 0xdc, 0xf9, 0x4a, + 0x33, 0x97, 0x2a, 0x71, 0x7b, 0x16, 0x88, 0x59, 0x3d, 0x09, + 0xc8, 0x89, 0xa8, 0x31 + }; + + payload_len = sizeof(encrypted_payload); + + return dp_test_create_esp_ipv4_pak(PEER, PORT_EAST, 1, + &payload_len, + encrypted_payload, + SPI_INBOUND, + 1 /* seq no */, + 0 /* ip ID */, + 63 /* ttl */, + NULL /* udp/esp */, + NULL /* transport_hdr*/); +} + +/* + * build_expected_icmp_packet() + * + * This helper function builds an output ICMP packet that + * corresponds to the encrypted payload in the ESP packet + * built by build_encrypted_input_packet(). + */ +static struct rte_mbuf *build_expected_icmp_packet(int *payload_len) +{ + struct iphdr *ip; + struct rte_mbuf *packet; + const uint8_t payload[] = { + 0x2c, 0x57, 0xba, 0x55, 0x00, 0x00, 0x00, 0x00, 0xd9, 0xe9, + 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, + 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, + 0x03, 0x04, 0x01, 0x02, 0x03, 0x04 + }; + + *payload_len = sizeof(payload); + + packet = dp_test_create_icmp_ipv4_pak(CLIENT_REMOTE, CLIENT_LOCAL, + ICMP_ECHO /* echo request */, + 0 /* no code */, + DPT_ICMP_ECHO_DATA(0xac9, 1), + 1 /* one mbuf */, + payload_len, + payload, + &ip, NULL); + + /* + * The resulting ICMP packet isn't exactly as + * we want, so tickle a few bits into shape + */ + dp_test_set_pak_ip_field(ip, DP_TEST_SET_IP_ID, 0xea53); + dp_test_set_pak_ip_field(ip, DP_TEST_SET_DF, 1); + dp_test_set_pak_ip_field(ip, DP_TEST_SET_TTL, + DP_TEST_PAK_DEFAULT_TTL - 2); + + return packet; +} + +/* + * TEST: npf_vti_encrypt1 + * + * This check tests that when an ICMP ping packet is received that + * should be routed over a VTI tunnel the correct encrypted ESP + * packet is transmitted. + * + * Baseline test. No npf configuration. + * + * Packet ----> + * vti0 + * ========================== tunnel + * 10.10.2.2 10.10.2.3 + * +---------+ + * dp1T1 | | dp2T2 + * --------------------+ +--------------- + * 10.10.1.2 | | 10.10.2.2 + * +---------+ + * + */ +DP_DECL_TEST_CASE(npf_vti_suite, npf_vti_encrypt1, NULL, NULL); +DP_START_TEST(npf_vti_encrypt1, test) +{ + struct rte_mbuf *output_packet; + struct rte_mbuf *input_packet; + struct dp_test_expected *exp; + int encrypted_payload_len; + + vti_setup_tunnel(VRF_DEFAULT_ID, OUTPUT_MARK); + vti_setup_policies_and_sas(VRF_DEFAULT_ID); + + dp_test_fail_unless((vti_count_of_vtis() == 1), + "Expected VTI to be created"); + + input_packet = build_input_icmp_packet(); + (void)dp_test_pktmbuf_eth_init(input_packet, + dp_test_intf_name2mac_str("dp1T1"), + NULL, RTE_ETHER_TYPE_IPV4); + + output_packet = build_expected_esp_packet(&encrypted_payload_len); + + dp_test_set_pak_ip_field(iphdr(output_packet), DP_TEST_SET_DF, 1); + + (void)dp_test_pktmbuf_eth_init(output_packet, + PEER_MAC_ADDR, + dp_test_intf_name2mac_str("dp2T2"), + RTE_ETHER_TYPE_IPV4); + + exp = dp_test_exp_create(output_packet); + rte_pktmbuf_free(output_packet); + dp_test_exp_set_oif_name(exp, "dp2T2"); + + dp_test_pak_receive(input_packet, "dp1T1", exp); + dp_test_crypto_check_sad_packets(VRF_DEFAULT_ID, 1, 84); + + vti_teardown_tunnel(VRF_DEFAULT_ID); + vti_teardown_sas_and_policy(); + + dp_test_fail_unless((vti_count_of_vtis() == 0), + "Expected VTI to be deleted"); +} DP_END_TEST; + +/* + * TEST: npf_vti_decrypt1 + * + * Baseline test. No npf configuration. + * + * This test checks that an encrypted packet received on a + * VTI interface is correctly decrypted and and forwarded. + * + * <----- Packet + * + * vti0 + * ========================== tunnel + * 10.10.2.2 10.10.2.3 + * +---------+ + * dp1T1 | | dp2T2 + * -------------------+ +--------------- + * 10.10.1.2 | | 10.10.2.2 + * +---------+ + * + */ +DP_DECL_TEST_CASE(npf_vti_suite, npf_vti_decrypt1, NULL, NULL); +DP_START_TEST(npf_vti_decrypt1, test) +{ + struct rte_mbuf *output_packet; + struct rte_mbuf *input_packet; + struct dp_test_expected *exp; + int decrypted_payload_len; + + vti_setup_tunnel(VRF_DEFAULT_ID, OUTPUT_MARK); + vti_setup_policies_and_sas(VRF_DEFAULT_ID); + dp_test_fail_unless((vti_count_of_vtis() == 1), + "Expected VTI to be created"); + + /* + * Create the input encrypted packet. + */ + input_packet = build_encrypted_input_packet(); + (void)dp_test_pktmbuf_eth_init(input_packet, + dp_test_intf_name2mac_str("dp2T2"), + NULL, RTE_ETHER_TYPE_IPV4); + + /* + * Ceate the expected decrypted ping packet + */ + output_packet = build_expected_icmp_packet(&decrypted_payload_len); + (void)dp_test_pktmbuf_eth_init(output_packet, + CLIENT_LOCAL_MAC_ADDR, + dp_test_intf_name2mac_str("dp1T1"), + RTE_ETHER_TYPE_IPV4); + + /* + * Create an expectation for the decypted ICMP ping packet on dp1T1. + */ + exp = dp_test_exp_create(output_packet); + rte_pktmbuf_free(output_packet); + + dp_test_exp_set_oif_name(exp, "dp1T1"); + + dp_test_pak_receive(input_packet, "dp2T2", exp); + dp_test_crypto_check_sad_packets(VRF_DEFAULT_ID, 1, 84); + + vti_teardown_tunnel(VRF_DEFAULT_ID); + vti_teardown_sas_and_policy(); + dp_test_fail_unless((vti_count_of_vtis() == 0), + "Expected VTI to be deleted"); +} DP_END_TEST; + +/* + * TEST: npf_vti_encrypt2 + * + * Rx interface dp1T1 in zone EAST, tx interface vti0 in zone WEST, pass rule + * for EAST to WEST traffic. + * + * Zone "EAST" Zone "WEST" + * + * Packet ----> + * vti0 + * ========================== tunnel + * 10.10.2.2 10.10.2.3 + * +---------+ + * dp1T1 | | dp2T2 + * --------------------+ +--------------- + * 10.10.1.2 | | 10.10.2.2 + * +---------+ + * + */ +DP_DECL_TEST_CASE(npf_vti_suite, npf_vti_encrypt2, NULL, NULL); +DP_START_TEST(npf_vti_encrypt2, test) +{ + struct rte_mbuf *output_packet; + struct rte_mbuf *input_packet; + struct dp_test_expected *exp; + int encrypted_payload_len; + + vti_setup_tunnel(VRF_DEFAULT_ID, OUTPUT_MARK); + vti_setup_policies_and_sas(VRF_DEFAULT_ID); + + dp_test_fail_unless((vti_count_of_vtis() == 1), + "Expected VTI to be created"); + + /* + * Add zones config + */ + struct dpt_zone_cfg cfg = { + .private = { + .name = "EAST", + .intf = { "dp1T1", NULL, NULL }, + .local = false, + }, + .public = { + .name = "WEST", + .intf = { "vti0", NULL, NULL }, + .local = false, + }, + .local = { 0 }, + .priv_to_pub = { + .name = "EAST_TO_WEST", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .pub_to_priv = { + .name = "WEST_TO_EAST", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .local_to_priv = { 0 }, + .priv_to_local = { 0 }, + .local_to_pub = { 0 }, + .pub_to_local = { 0 }, + }; + + dpt_zone_cfg(&cfg, true, false); + + /* + * Create the input packet. + */ + input_packet = build_input_icmp_packet(); + (void)dp_test_pktmbuf_eth_init(input_packet, + dp_test_intf_name2mac_str("dp1T1"), + NULL, RTE_ETHER_TYPE_IPV4); + + output_packet = build_expected_esp_packet(&encrypted_payload_len); + + dp_test_set_pak_ip_field(iphdr(output_packet), DP_TEST_SET_DF, 1); + + (void)dp_test_pktmbuf_eth_init(output_packet, + PEER_MAC_ADDR, + dp_test_intf_name2mac_str("dp2T2"), + RTE_ETHER_TYPE_IPV4); + + exp = dp_test_exp_create(output_packet); + rte_pktmbuf_free(output_packet); + dp_test_exp_set_oif_name(exp, "dp2T2"); + + dp_test_pak_receive(input_packet, "dp1T1", exp); + dp_test_crypto_check_sad_packets(VRF_DEFAULT_ID, 1, 84); + + /* + * Remove zones config + */ + dpt_zone_cfg(&cfg, false, false); + + vti_teardown_tunnel(VRF_DEFAULT_ID); + vti_teardown_sas_and_policy(); + + dp_test_fail_unless((vti_count_of_vtis() == 0), + "Expected VTI to be deleted"); +} DP_END_TEST; + +/* + * TEST: npf_vti_decrypt2 + * + * Tx interface dp1T1 in zone EAST, rx interface vti0 in zone WEST, pass rule + * for WEST to EAST traffic. + * + * Zone "EAST" Zone "WEST" + * + * <----- Packet + * + * vti0 + * ========================== tunnel + * 10.10.2.2 10.10.2.3 + * +---------+ + * dp1T1 | | dp2T2 + * -------------------+ +--------------- + * 10.10.1.2 | | 10.10.2.2 + * +---------+ + * + */ +DP_DECL_TEST_CASE(npf_vti_suite, npf_vti_decrypt2, NULL, NULL); +DP_START_TEST(npf_vti_decrypt2, test) +{ + struct rte_mbuf *output_packet; + struct rte_mbuf *input_packet; + struct dp_test_expected *exp; + int decrypted_payload_len; + + vti_setup_tunnel(VRF_DEFAULT_ID, OUTPUT_MARK); + vti_setup_policies_and_sas(VRF_DEFAULT_ID); + dp_test_fail_unless((vti_count_of_vtis() == 1), + "Expected VTI to be created"); + + /* + * Add zones config + */ + /* + * Add zones config + */ + struct dpt_zone_cfg cfg = { + .private = { + .name = "EAST", + .intf = { "dp1T1", NULL, NULL }, + .local = false, + }, + .public = { + .name = "WEST", + .intf = { "vti0", NULL, NULL }, + .local = false, + }, + .local = { 0 }, + .priv_to_pub = { + .name = "EAST_TO_WEST", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .pub_to_priv = { + .name = "WEST_TO_EAST", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .local_to_priv = { 0 }, + .priv_to_local = { 0 }, + .local_to_pub = { 0 }, + .pub_to_local = { 0 }, + }; + + dpt_zone_cfg(&cfg, true, false); + + /* + * Create the input encrypted packet. + */ + input_packet = build_encrypted_input_packet(); + (void)dp_test_pktmbuf_eth_init(input_packet, + dp_test_intf_name2mac_str("dp2T2"), + NULL, RTE_ETHER_TYPE_IPV4); + + /* + * Ceate the expected decrypted ping packet + */ + output_packet = build_expected_icmp_packet(&decrypted_payload_len); + (void)dp_test_pktmbuf_eth_init(output_packet, + CLIENT_LOCAL_MAC_ADDR, + dp_test_intf_name2mac_str("dp1T1"), + RTE_ETHER_TYPE_IPV4); + + /* + * Create an expectation for the decypted ICMP ping packet on dp1T1. + */ + exp = dp_test_exp_create(output_packet); + rte_pktmbuf_free(output_packet); + + dp_test_exp_set_oif_name(exp, "dp1T1"); + + dp_test_pak_receive(input_packet, "dp2T2", exp); + dp_test_crypto_check_sad_packets(VRF_DEFAULT_ID, 1, 84); + + /* + * Remove zones config + */ + dpt_zone_cfg(&cfg, false, false); + + vti_teardown_tunnel(VRF_DEFAULT_ID); + vti_teardown_sas_and_policy(); + + dp_test_fail_unless((vti_count_of_vtis() == 0), + "Expected VTI to be deleted"); +} DP_END_TEST; + + +/* + * TEST: npf_vti_encrypt3 + * + * Rx interface dp1T1 in zone EAST, tx interface vti0 not in a zone. + * + * Zone "EAST" + * + * Packet ----> + * vti0 + * ========================== tunnel + * 10.10.2.2 10.10.2.3 + * +---------+ + * dp1T1 | | dp2T2 + * --------------------+ +--------------- + * 10.10.1.2 | | 10.10.2.2 + * +---------+ + * + */ +DP_DECL_TEST_CASE(npf_vti_suite, npf_vti_encrypt3, NULL, NULL); +DP_START_TEST(npf_vti_encrypt3, test) +{ + struct rte_mbuf *output_packet; + struct rte_mbuf *input_packet; + struct dp_test_expected *exp; + int encrypted_payload_len; + + vti_setup_tunnel(VRF_DEFAULT_ID, OUTPUT_MARK); + vti_setup_policies_and_sas(VRF_DEFAULT_ID); + + dp_test_fail_unless((vti_count_of_vtis() == 1), + "Expected VTI to be created"); + + /* + * Add zones config + */ + struct dpt_zone_cfg cfg = { + .private = { + .name = "EAST", + .intf = { "dp1T1", NULL, NULL }, + .local = false, + }, + .public = { NULL, { NULL, NULL, NULL }, false }, + .local = { 0 }, + .pub_to_priv = { 0 }, + .priv_to_pub = { 0 }, + .local_to_priv = { 0 }, + .priv_to_local = { 0 }, + .local_to_pub = { 0 }, + .pub_to_local = { 0 }, + }; + + dpt_zone_cfg(&cfg, true, false); + + /* + * Create the input packet. + */ + input_packet = build_input_icmp_packet(); + (void)dp_test_pktmbuf_eth_init(input_packet, + dp_test_intf_name2mac_str("dp1T1"), + NULL, RTE_ETHER_TYPE_IPV4); + + output_packet = build_expected_esp_packet(&encrypted_payload_len); + + dp_test_set_pak_ip_field(iphdr(output_packet), DP_TEST_SET_DF, 1); + + (void)dp_test_pktmbuf_eth_init(output_packet, + PEER_MAC_ADDR, + dp_test_intf_name2mac_str("dp2T2"), + RTE_ETHER_TYPE_IPV4); + + exp = dp_test_exp_create(output_packet); + rte_pktmbuf_free(output_packet); + + dp_test_exp_set_oif_name(exp, "dp2T2"); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); + + dp_test_pak_receive(input_packet, "dp1T1", exp); + + /* + * Remove zones config + */ + dpt_zone_cfg(&cfg, false, false); + + vti_teardown_tunnel(VRF_DEFAULT_ID); + vti_teardown_sas_and_policy(); + + dp_test_fail_unless((vti_count_of_vtis() == 0), + "Expected VTI to be deleted"); +} DP_END_TEST; + +/* + * TEST: npf_vti_decrypt3 + * + * Tx interface dp1T1 in zone EAST, rx interface vti0 not in a zone. + * + * Zone "EAST" + * + * <----- Packet + * + * vti0 + * ========================== tunnel + * 10.10.2.2 10.10.2.3 + * +---------+ + * dp1T1 | | dp2T2 + * -------------------+ +--------------- + * 10.10.1.2 | | 10.10.2.2 + * +---------+ + * + */ +DP_DECL_TEST_CASE(npf_vti_suite, npf_vti_decrypt3, NULL, NULL); +DP_START_TEST(npf_vti_decrypt3, test) +{ + struct rte_mbuf *output_packet; + struct rte_mbuf *input_packet; + struct dp_test_expected *exp; + int decrypted_payload_len; + + vti_setup_tunnel(VRF_DEFAULT_ID, OUTPUT_MARK); + vti_setup_policies_and_sas(VRF_DEFAULT_ID); + dp_test_fail_unless((vti_count_of_vtis() == 1), + "Expected VTI to be created"); + + /* + * Add zones config + */ + struct dpt_zone_cfg cfg = { + .private = { + .name = "EAST", + .intf = { "dp1T1", NULL, NULL }, + .local = false, + }, + .public = { NULL, { NULL, NULL, NULL }, false }, + .local = { 0 }, + .pub_to_priv = { 0 }, + .priv_to_pub = { 0 }, + .local_to_priv = { 0 }, + .priv_to_local = { 0 }, + .local_to_pub = { 0 }, + .pub_to_local = { 0 }, + }; + + dpt_zone_cfg(&cfg, true, false); + + /* + * Create the input encrypted packet. + */ + input_packet = build_encrypted_input_packet(); + (void)dp_test_pktmbuf_eth_init(input_packet, + dp_test_intf_name2mac_str("dp2T2"), + NULL, RTE_ETHER_TYPE_IPV4); + + /* + * Ceate the expected decrypted ping packet + */ + output_packet = build_expected_icmp_packet(&decrypted_payload_len); + (void)dp_test_pktmbuf_eth_init(output_packet, + CLIENT_LOCAL_MAC_ADDR, + dp_test_intf_name2mac_str("dp1T1"), + RTE_ETHER_TYPE_IPV4); + + /* + * Create an expectation for the decypted ICMP ping packet on dp1T1. + */ + exp = dp_test_exp_create(output_packet); + rte_pktmbuf_free(output_packet); + + dp_test_exp_set_oif_name(exp, "dp1T1"); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); + + dp_test_pak_receive(input_packet, "dp2T2", exp); + + /* + * Remove zones config + */ + dpt_zone_cfg(&cfg, false, false); + + vti_teardown_tunnel(VRF_DEFAULT_ID); + vti_teardown_sas_and_policy(); + + dp_test_fail_unless((vti_count_of_vtis() == 0), + "Expected VTI to be deleted"); +} DP_END_TEST; + diff --git a/tests/whole_dp/src/dp_test_npf_zone.c b/tests/whole_dp/src/dp_test_npf_zone.c new file mode 100644 index 00000000..28281241 --- /dev/null +++ b/tests/whole_dp/src/dp_test_npf_zone.c @@ -0,0 +1,1817 @@ +/* + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2015 by Brocade Communications Systems, Inc. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * Whole dataplane Zone Firewall tests + */ + +#include + +#include "ip6_funcs.h" +#include "ip_funcs.h" +#include "in_cksum.h" +#include "if_var.h" +#include "main.h" + +#include "dp_test.h" +#include "dp_test_str.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_exp.h" +#include "dp_test_lib_pkt.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_console.h" +#include "dp_test_json_utils.h" +#include "dp_test_npf_lib.h" +#include "dp_test_npf_fw_lib.h" +#include "dp_test_npf_sess_lib.h" +#include "dp_test_npf_nat_lib.h" + + + +/* Forward declarations */ +static void zone_setup(void); +static void zone_teardown(void); +static void zone_setup6(void); +static void zone_teardown6(void); + +DP_DECL_TEST_SUITE(npf_zone); + +/* + * zone1 - Zone to zone, Stateless, simple ruleset + */ +DP_DECL_TEST_CASE(npf_zone, zone1, zone_setup, zone_teardown); +DP_START_TEST(zone1, test) +{ + bool debug = false; + + struct dpt_zone_cfg cfg = { + .private = { + .name = "PRIVATE", + .intf = { "dp1T0", "dp1T1", NULL }, + .local = false, + }, + .public = { + .name = "PUBLIC", + .intf = { "dp1T2", "dp1T3", NULL }, + .local = false, + }, + .local = { 0 }, + .pub_to_priv = { + .name = "PUB_TO_PRIV", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "", + }, + .priv_to_pub = { + .name = "PRIV_TO_PUB", + .pass = PASS, + .stateful = STATELESS, + .npf = "src-addr=1.1.1.11", + }, + .local_to_priv = { 0 }, + .priv_to_local = { 0 }, + .local_to_pub = { 0 }, + .pub_to_local = { 0 }, + }; + + dpt_zone_cfg(&cfg, true, debug); + + /* + * PRIVATE -> PRIVATE, Intra-zone + */ + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a1", + "1.1.1.11", 41000, "2.2.2.11", 1000, + NULL, 0, NULL, 0, + "aa:bb:cc:dd:2:a1", "dp1T1", + DP_TEST_FWD_FORWARDED); + + /* + * PRIVATE -> PUBLIC, src-addr matches PASS rule + */ + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a1", + "1.1.1.11", 41001, "3.3.3.11", 1001, + NULL, 0, NULL, 0, + "aa:bb:cc:dd:3:a1", "dp1T2", + DP_TEST_FWD_FORWARDED); + + /* + * PRIVATE -> PUBLIC, src-addr does *not* PASS rule + */ + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a2", + "1.1.1.12", 41002, "3.3.3.11", 1002, + NULL, 0, NULL, 0, + "aa:bb:cc:dd:3:a1", "dp1T2", + DP_TEST_FWD_DROPPED); + + /* + * PUBLIC -> PRIVATE (reverse packet off PRIVATE -> PUBLIC) + */ + dpt_udp("dp1T2", "aa:bb:cc:dd:3:a1", + "3.3.3.11", 1001, "1.1.1.11", 41001, + NULL, 0, NULL, 0, + "aa:bb:cc:dd:1:a1", "dp1T0", + DP_TEST_FWD_DROPPED); + + + /* Cleanup */ + dpt_zone_cfg(&cfg, false, debug); + + /* zone1 */ +} DP_END_TEST; + + +/* + * zone2 - Zone to zone, "PRIVATE->PUBLIC" is Stateful, simple ruleset + */ +DP_DECL_TEST_CASE(npf_zone, zone2, zone_setup, zone_teardown); +DP_START_TEST(zone2, test) +{ + bool debug = false; + + struct dpt_zone_cfg cfg = { + .private = { + .name = "PRIVATE", + .intf = { "dp1T0", "dp1T1", NULL }, + .local = false, + }, + .public = { + .name = "PUBLIC", + .intf = { "dp1T2", "dp1T3", NULL }, + .local = false, + }, + .local = { 0 }, + .pub_to_priv = { + .name = "PUB_TO_PRIV", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "", + }, + .priv_to_pub = { + .name = "PRIV_TO_PUB", + .pass = PASS, + .stateful = STATEFUL, + .npf = "dst-addr=3.3.3.11", + }, + .local_to_priv = { 0 }, + .priv_to_local = { 0 }, + .local_to_pub = { 0 }, + .pub_to_local = { 0 }, + }; + + dpt_zone_cfg(&cfg, true, debug); + + /* + * 1. PRIVATE -> PUBLIC, dst-addr matches stateful PASS rule. Session + * will be created on dp1T2. + */ + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a1", + "1.1.1.11", 41001, "3.3.3.11", 1001, + NULL, 0, NULL, 0, + "aa:bb:cc:dd:3:a1", "dp1T2", + DP_TEST_FWD_FORWARDED); + + /* + * 2. PRIVATE -> PUBLIC, dst-addr does not match PASS rule + */ + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a1", + "1.1.1.11", 41002, "3.3.3.12", 1002, + NULL, 0, NULL, 0, + "aa:bb:cc:dd:3:a2", "dp1T2", + DP_TEST_FWD_DROPPED); + + /* + * 3. PUBLIC -> PRIVATE. Reverse of pkt #1. Zone has block rule on + * output, but pkt matches reverse session on input so is forwarded. + */ + dpt_udp("dp1T2", "aa:bb:cc:dd:3:a1", + "3.3.3.11", 1001, "1.1.1.11", 41001, + NULL, 0, NULL, 0, + "aa:bb:cc:dd:1:a1", "dp1T0", + DP_TEST_FWD_FORWARDED); + + /* Cleanup */ + dpt_zone_cfg(&cfg, false, debug); + + /* zone2 */ +} DP_END_TEST; + + +/* + * zone3 - Zone to/from non-zone + */ +DP_DECL_TEST_CASE(npf_zone, zone3, zone_setup, zone_teardown); +DP_START_TEST(zone3, test) +{ + bool debug = false; + + struct dpt_zone_cfg cfg = { + .private = { + .name = "PRIVATE", + .intf = { "dp1T0", "dp1T1", NULL }, + .local = false, + }, + .public = { + .name = "PUBLIC", + .intf = { "dp1T2", "dp1T3", NULL }, + .local = false, + }, + .local = { 0 }, + .pub_to_priv = { + .name = "PUB_TO_PRIV", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "", + }, + .priv_to_pub = { + .name = "PRIV_TO_PUB", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .local_to_priv = { 0 }, + .priv_to_local = { 0 }, + .local_to_pub = { 0 }, + .pub_to_local = { 0 }, + }; + + dpt_zone_cfg(&cfg, true, debug); + + /* + * 1. PRIVATE -> Non-zone + */ + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a1", + "1.1.1.11", 41001, "5.5.5.11", 1001, + NULL, 0, NULL, 0, + "aa:bb:cc:dd:5:a1", "dp2T1", + DP_TEST_FWD_DROPPED); + + /* + * 2. Non-zone -> PRIVATE (reverse packet #1) + */ + dpt_udp("dp2T1", "aa:bb:cc:dd:5:a1", + "5.5.5.11", 1001, "1.1.1.11", 41001, + NULL, 0, NULL, 0, + "aa:bb:cc:dd:1:a1", "dp1T0", + DP_TEST_FWD_DROPPED); + + /* + * Local -> PRIVATE. No local zone is cfgd, so should default to + * FORWARDED. + */ + dpt_udp(NULL, "00:00:00:00:00:00", + "1.1.1.1", 41002, "1.1.1.11", 1002, + NULL, 0, NULL, 0, + "aa:bb:cc:dd:1:a1", "dp1T0", + DP_TEST_FWD_FORWARDED); + + /* Cleanup */ + dpt_zone_cfg(&cfg, false, debug); + + /* zone3 */ +} DP_END_TEST; + + +/* + * zone4 - Zones and SNAT + * + * We add a ZBF src-addr rule for PRIVATE -> PUBLIC traffic. This matches the + * post-SNAT source address. + */ +DP_DECL_TEST_CASE(npf_zone, zone4, zone_setup, zone_teardown); +DP_START_TEST(zone4, test) +{ + bool debug = false; + + struct dpt_zone_cfg cfg = { + .private = { + .name = "PRIVATE", + .intf = { "dp1T0", "dp1T1", NULL }, + .local = false, + }, + .public = { + .name = "PUBLIC", + .intf = { "dp1T2", "dp1T3", NULL }, + .local = false, + }, + .local = { 0 }, + .pub_to_priv = { + .name = "PUB_TO_PRIV", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "", + }, + .priv_to_pub = { + .name = "PRIV_TO_PUB", + .pass = PASS, + .stateful = STATELESS, + /* SNAT address */ + .npf = "src-addr=3.3.3.102", + }, + .local_to_priv = { 0 }, + .priv_to_local = { 0 }, + .local_to_pub = { 0 }, + .pub_to_local = { 0 }, + }; + + dpt_zone_cfg(&cfg, true, debug); + + /* SNAT on PRIVATE intf dp1T1 */ + dpt_snat_cfg("dp1T1", IPPROTO_UDP, NULL, "2.2.2.64/26", true); + + /* SNAT on PUBLIC intf dp1T2 */ + dpt_snat_cfg("dp1T2", IPPROTO_UDP, NULL, "3.3.3.64/26", true); + + /* SNAT on non-zone intf dp2T1 */ + dpt_snat_cfg("dp2T1", IPPROTO_UDP, NULL, "5.5.5.64/26", true); + + /* + * 1. PRIVATE -> PRIVATE/snat + */ + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a1", + "1.1.1.11", 41000, "2.2.2.11", 1000, + "2.2.2.102", 41000, "2.2.2.11", 1000, + "aa:bb:cc:dd:2:a1", "dp1T1", + DP_TEST_FWD_FORWARDED); + + /* + * 2. PRIVATE/snat -> PRIVATE + */ + dpt_udp("dp1T1", "aa:bb:cc:dd:2:a1", + "2.2.2.11", 1000, "2.2.2.102", 41000, + "2.2.2.11", 1000, "1.1.1.11", 41000, + "aa:bb:cc:dd:1:a1", "dp1T0", + DP_TEST_FWD_FORWARDED); + + /* + * 3. PRIVATE -> PUBLIC/snat + * + * SNAT'd src addr matches PRIV_TO_PUB pass rule. SNAT session + * created on dp1T2. + */ + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a1", + "1.1.1.11", 41001, "3.3.3.11", 1001, + "3.3.3.102", 41001, "3.3.3.11", 1001, + "aa:bb:cc:dd:3:a1", "dp1T2", + DP_TEST_FWD_FORWARDED); + + /* + * 4. PRIVATE -> PUBLIC/snat + * + * SNAT'd src addr does *not* match PRIV_TO_PUB pass rule + */ + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a2", + "1.1.1.12", 41001, "3.3.3.11", 1001, + "3.3.3.103", 41001, "3.3.3.11", 1001, + "aa:bb:cc:dd:3:a1", "dp1T2", + DP_TEST_FWD_DROPPED); + + /* + * 5. PUBLIC/rev-snat -> PRIVATE. Reverse of #3. Pkt matches reverse + * SNAT session on input on dp1T2. PUB_TO_PRIV has block rule, but + * NAT-pinhole overrides that and pkt is forwarded. + * + * Compare this to pkt zone 5 #4. A NAT session in matched on input, + * and a zone pass is not matched on output. That pkt is blocked + * whereas here the pkt is forwarded. Only difference is that zone 5 + * pkt #4 matches a reverse NAT session whereas this pkt matched a + * forwards NAT session. + */ + dpt_udp("dp1T2", "aa:bb:cc:dd:3:a1", + "3.3.3.11", 1001, "3.3.3.102", 41001, + "3.3.3.11", 1001, "1.1.1.11", 41001, + "aa:bb:cc:dd:1:a1", "dp1T0", + DP_TEST_FWD_FORWARDED); + + /* + * PRIVATE -> Non-zone/snat + */ + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a1", + "1.1.1.11", 41001, "5.5.5.11", 1001, + NULL, 0, NULL, 0, + "aa:bb:cc:dd:5:a1", "dp2T1", + DP_TEST_FWD_DROPPED); + + /* Cleanup */ + dpt_snat_cfg("dp1T1", IPPROTO_UDP, NULL, "2.2.2.64/26", false); + dpt_snat_cfg("dp1T2", IPPROTO_UDP, NULL, "3.3.3.64/26", false); + dpt_snat_cfg("dp2T1", IPPROTO_UDP, NULL, "5.5.5.64/26", false); + dpt_zone_cfg(&cfg, false, debug); + + /* zone4 */ +} DP_END_TEST; + + +/* + * zone5 - Zones and DNAT + */ +DP_DECL_TEST_CASE(npf_zone, zone5, zone_setup, zone_teardown); +DP_START_TEST(zone5, test) +{ + bool debug = false; + + struct dpt_zone_cfg cfg = { + .private = { + .name = "PRIVATE", + .intf = { "dp1T0", "dp1T1", NULL }, + .local = false, + }, + .public = { + .name = "PUBLIC", + .intf = { "dp1T2", "dp1T3", NULL }, + .local = false, + }, + .local = { 0 }, + .pub_to_priv = { + .name = "PUB_TO_PRIV", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "", + }, + .priv_to_pub = { + .name = "PRIV_TO_PUB", + .pass = PASS, + .stateful = STATELESS, + /* Post-DNAT address */ + .npf = "dst-addr=3.3.3.12", + }, + .local_to_priv = { 0 }, + .priv_to_local = { 0 }, + .local_to_pub = { 0 }, + .pub_to_local = { 0 }, + }; + + dpt_zone_cfg(&cfg, true, debug); + + /* + * 1. PRIVATE/dnat -> PRIVATE. dst addr is DNAT'd on input. + */ + dpt_dnat_cfg("dp1T0", IPPROTO_UDP, "2.2.2.11", "2.2.2.12", true); + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a1", + "1.1.1.11", 41000, "2.2.2.11", 1000, + "1.1.1.11", 41000, "2.2.2.12", 1000, + "aa:bb:cc:dd:2:a2", "dp1T1", + DP_TEST_FWD_FORWARDED); + dpt_dnat_cfg("dp1T0", IPPROTO_UDP, "2.2.2.11", "2.2.2.12", false); + + /* + * 2. PRIVATE/dnat -> PUBLIC. Pkt is DNAT'd at input and session + * created on dp1T0. At output, DNAT'd dest addr matches zone pass + * rule. + */ + dpt_dnat_cfg("dp1T0", IPPROTO_UDP, "3.3.3.11", "3.3.3.12", true); + + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a1", + "1.1.1.11", 41001, "3.3.3.11", 1001, + "1.1.1.11", 41001, "3.3.3.12", 1001, + "aa:bb:cc:dd:3:a2", "dp1T2", + DP_TEST_FWD_FORWARDED); + + /* + * 3. PUBLIC -> PRIVATE/rev-dnat. Zone has block rule. Pkt matches + * reverse DNAT session of pkt #2 at output on dp1T0. + */ + dpt_udp("dp1T2", "aa:bb:cc:dd:3:a2", + "3.3.3.12", 1001, "1.1.1.11", 41001, + "3.3.3.11", 1001, "1.1.1.11", 41001, + "aa:bb:cc:dd:1:a1", "dp1T0", + DP_TEST_FWD_FORWARDED); + + dpt_dnat_cfg("dp1T0", IPPROTO_UDP, "3.3.3.11", "3.3.3.12", false); + + /* + * 4. PRIVATE/dnat -> PUBLIC. Pkts is DNAT'd at input. At output, + * DNAT'd dest addr does *not* match zone pass rule. + */ + dpt_dnat_cfg("dp1T0", IPPROTO_UDP, "4.4.4.11", "4.4.4.12", true); + + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a1", + "1.1.1.11", 41001, "4.4.4.11", 1001, + "1.1.1.11", 41001, "4.4.4.12", 1001, + "aa:bb:cc:dd:4:a2", "dp1T3", + DP_TEST_FWD_DROPPED); + + dpt_dnat_cfg("dp1T0", IPPROTO_UDP, "4.4.4.11", "4.4.4.12", false); + + + /* + * 5. PRIVATE/dnat -> Non-zone + * + * These next two tests are odd. The PRIVATE/dnat -> Non-zone creates + * a DNAT session at input, but the packet is dropped by ZBF at + * output. + * + * However the subsequent packet (#6) is FORWARDED because of + * nat-pinhole even though it is rcvd on a non-zone interface. + */ + dpt_dnat_cfg("dp1T0", IPPROTO_UDP, "5.5.5.11", "5.5.5.12", true); + + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a1", + "1.1.1.11", 41001, "5.5.5.11", 1001, + "1.1.1.11", 41001, "5.5.5.12", 1001, + "aa:bb:cc:dd:3:a2", "dp2T1", + DP_TEST_FWD_DROPPED); + + /* + * 6. Non-zone -> PRIVATE/rev-dnat + * + * This is forwarded because npf_session_is_nat_pinhole() returns true + * in npf_hook_track in fw_out. Pkt matches reverse session created + * by pkt #5. + */ + dpt_udp("dp2T1", "aa:bb:cc:dd:1:a2", + "5.5.5.12", 1001, "1.1.1.11", 41001, + "5.5.5.11", 1001, "1.1.1.11", 41001, + "aa:bb:cc:dd:1:a1", "dp1T0", + DP_TEST_FWD_FORWARDED); + + dpt_dnat_cfg("dp1T0", IPPROTO_UDP, "5.5.5.11", "5.5.5.12", false); + + /* Cleanup */ + dpt_zone_cfg(&cfg, false, debug); + + /* zone5 */ +} DP_END_TEST; + + +/* + * zone6 - Zones and local + */ +DP_DECL_TEST_CASE(npf_zone, zone6, zone_setup, zone_teardown); +DP_START_TEST(zone6, test) +{ + bool debug = false; + + struct dpt_zone_cfg cfg = { + .private = { + .name = "PRIVATE", + .intf = { "dp1T0", "dp1T1", NULL }, + .local = false, + }, + .public = { + .name = "PUBLIC", + .intf = { "dp1T2", "dp1T3", NULL }, + .local = false, + }, + .local = { + .name = "LOCAL", + .intf = { NULL, NULL, NULL }, + .local = true, + }, + .pub_to_priv = { + .name = "PUB_TO_PRIV", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "", + }, + .priv_to_pub = { + .name = "PRIV_TO_PUB", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .local_to_priv = { + .name = "LOCAL_TO_PRIV", + .pass = PASS, + .stateful = STATELESS, + .npf = "dst-addr=1.1.1.11", + }, + .priv_to_local = { + .name = "PRIV_TO_LOCAL", + .pass = PASS, + .stateful = STATELESS, + .npf = "src-addr=1.1.1.12", + }, + .local_to_pub = { 0 }, + .pub_to_local = { + .name = "PUB_TO_LOCAL", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "", + }, + }; + + dpt_zone_cfg(&cfg, true, debug); + + /* + * Local -> PRIVATE. dst addr matches pass rule. + */ + dpt_udp(NULL, "00:00:00:00:00:00", + "1.1.1.1", 41001, "1.1.1.11", 1001, + NULL, 0, NULL, 0, + "aa:bb:cc:dd:1:a1", "dp1T0", + DP_TEST_FWD_FORWARDED); + + /* + * PRIVATE -> Local. src addr matches pass rule. + */ + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a2", + "1.1.1.12", 1002, "1.1.1.1", 41002, + NULL, 0, NULL, 0, + "00:00:00:00:00:00", NULL, + DP_TEST_FWD_LOCAL); + + /* + * Local -> PUBLIC. No ruleset. + */ + dpt_udp(NULL, "00:00:00:00:00:00", + "1.1.1.1", 41003, "3.3.3.11", 1003, + NULL, 0, NULL, 0, + "aa:bb:cc:dd:3:a1", "dp1T2", + DP_TEST_FWD_FORWARDED); + + /* + * PUBLIC -> Local. Block rule. + */ + dpt_udp("dp1T2", "aa:bb:cc:dd:3:a1", + "3.3.3.11", 1003, "1.1.1.1", 41003, + NULL, 0, NULL, 0, + "00:00:00:00:00:00", NULL, + DP_TEST_FWD_DROPPED); + + /* Cleanup */ + dpt_zone_cfg(&cfg, false, debug); + + /* zone6 */ +} DP_END_TEST; + + +/* + * zone7 - Zones and local, SNAT + */ +DP_DECL_TEST_CASE(npf_zone, zone7, zone_setup, zone_teardown); +DP_START_TEST(zone7, test) +{ + bool debug = false; + + struct dpt_zone_cfg cfg = { + .private = { + .name = "PRIVATE", + .intf = { "dp1T0", "dp1T1", NULL }, + .local = false, + }, + .public = { + .name = "PUBLIC", + .intf = { "dp1T2", "dp1T3", NULL }, + .local = false, + }, + .local = { + .name = "LOCAL", + .intf = { NULL, NULL, NULL }, + .local = true, + }, + .pub_to_priv = { + .name = "PUB_TO_PRIV", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "", + }, + .priv_to_pub = { + .name = "PRIV_TO_PUB", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .local_to_priv = { + .name = "LOCAL_TO_PRIV", + .pass = PASS, + .stateful = STATELESS, + .npf = "src-addr=1.1.1.92", + }, + .priv_to_local = { + .name = "PRIV_TO_LOCAL", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "", + }, + .local_to_pub = { + .name = "LOCAL_TO_PUB", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .pub_to_local = { + .name = "PUB_TO_LOCAL", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "", + }, + }; + + dpt_zone_cfg(&cfg, true, debug); + + /* Add SNAT to a PUBLIC interface. */ + dpt_snat_cfg("dp1T2", IPPROTO_UDP, NULL, "3.3.3.64/26", true); + + /* + * 1. Local -> PUBLIC + */ + dpt_udp(NULL, "00:00:00:00:00:00", + "1.1.1.1", 41003, "3.3.3.11", 1003, + "3.3.3.92", 41003, "3.3.3.11", 1003, + "aa:bb:cc:dd:3:a1", "dp1T2", + DP_TEST_FWD_FORWARDED); + + /* + * 2. PUBLIC -> Local. PUB_TO_LOCAL has a BLOCK rule but NAT pinhole + * from pkt #1 allows return packet to bypass ZBF. + */ + dpt_udp("dp1T2", "aa:bb:cc:dd:3:a1", + "3.3.3.11", 1003, "3.3.3.92", 41003, + "3.3.3.11", 1003, "1.1.1.1", 41003, + "00:00:00:00:00:00", NULL, + DP_TEST_FWD_LOCAL); + + dpt_snat_cfg("dp1T2", IPPROTO_UDP, NULL, "3.3.3.64/26", false); + + /* Add SNAT to a PRIVATE interface. */ + dpt_snat_cfg("dp1T0", IPPROTO_UDP, NULL, "1.1.1.64/26", true); + + /* + * 3. Local -> PRIVATE. Pkts is SNAT'd. SNAT src addr matches zone + * rule. + */ + dpt_udp(NULL, "00:00:00:00:00:00", + "1.1.1.1", 41004, "1.1.1.11", 1004, + "1.1.1.92", 41004, "1.1.1.11", 1004, + "aa:bb:cc:dd:1:a1", "dp1T0", + DP_TEST_FWD_FORWARDED); + + /* + * 4. PRIVATE -> Local. Reverse of pkt #3. PRIV_TO_LOCAL has a block + * rule, but pkt matches reverse SNAT session. + */ + dpt_udp("dp1T0", "00:00:00:00:00:00", + "1.1.1.11", 1004, "1.1.1.92", 41004, + "1.1.1.11", 1004, "1.1.1.1", 41004, + "aa:bb:cc:dd:1:a1", NULL, + DP_TEST_FWD_FORWARDED); + + + dpt_snat_cfg("dp1T0", IPPROTO_UDP, NULL, "1.1.1.64/26", false); + + /* Cleanup */ + dpt_zone_cfg(&cfg, false, debug); + + /* zone7 */ +} DP_END_TEST; + + +/* + * zone8 - Zones and local, DNAT + */ +DP_DECL_TEST_CASE(npf_zone, zone8, zone_setup, zone_teardown); +DP_START_TEST(zone8, test) +{ + bool debug = false; + + struct dpt_zone_cfg cfg = { + .private = { + .name = "PRIVATE", + .intf = { "dp1T0", "dp1T1", NULL }, + .local = false, + }, + .public = { + .name = "PUBLIC", + .intf = { "dp1T2", "dp1T3", NULL }, + .local = false, + }, + .local = { + .name = "LOCAL", + .intf = { NULL, NULL, NULL }, + .local = true, + }, + .pub_to_priv = { + .name = "PUB_TO_PRIV", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "", + }, + .priv_to_pub = { + .name = "PRIV_TO_PUB", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .local_to_priv = { + .name = "LOCAL_TO_PRIV", + .pass = PASS, + .stateful = STATELESS, + .npf = "dst-addr=1.1.1.12", + }, + .priv_to_local = { + .name = "PRIV_TO_LOCAL", + .pass = PASS, + .stateful = STATELESS, + /* + * dst-addr rule matches *pre* DNAT address. The DNAT + * is reversed in the slow path in order to run the + * ZBF. + */ + .npf = "dst-addr=1.1.1.21", + }, + .local_to_pub = { + .name = "LOCAL_TO_PUB", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .pub_to_local = { + .name = "PUB_TO_LOCAL", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "", + }, + }; + + dpt_zone_cfg(&cfg, true, debug); + + /* Add DNAT to a PRIVATE interface */ + dpt_dnat_cfg("dp1T0", IPPROTO_UDP, "1.1.1.21", "1.1.1.1", true); + + /* Add DNAT to a PUBLIC interface */ + dpt_dnat_cfg("dp1T2", IPPROTO_UDP, "3.3.3.21", "3.3.3.3", true); + + /* + * 1. PRIVATE to Local. + * + * Pkt is DNATd on input. At this point dst addr does *not* match + * PRIV_TO_LOCAL rule. Pkt must have the DNAT reversed in the + * slowpatch before a ruleset lookup is done. + */ + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a1", + "1.1.1.11", 1003, "1.1.1.21", 41003, + "1.1.1.11", 1003, "1.1.1.1", 41003, + "00:00:00:00:00:00", NULL, + DP_TEST_FWD_LOCAL); + + /* + * 2. Local to PRIVATE. Reverse of #1. dst addr does *not* match + * zone rule, but pkt matches reverse session of pkt #1. + */ + dpt_udp(NULL, "00:00:00:00:00:00", + "1.1.1.1", 41003, "1.1.1.11", 1003, + "1.1.1.21", 41003, "1.1.1.11", 1003, + "aa:bb:cc:dd:1:a1", "dp1T0", + DP_TEST_FWD_FORWARDED); + + /* + * PUBLIC to Local. Block rule. + */ + dpt_udp("dp1T2", "aa:bb:cc:dd:3:a1", + "3.3.3.11", 1004, "3.3.3.21", 41004, + "3.3.3.11", 1004, "3.3.3.3", 41004, + "00:00:00:00:00:00", NULL, + DP_TEST_FWD_DROPPED); + + /* Cleanup */ + dpt_dnat_cfg("dp1T0", IPPROTO_UDP, "1.1.1.21", "1.1.1.1", false); + dpt_dnat_cfg("dp1T2", IPPROTO_UDP, "3.3.3.21", "3.3.3.3", false); + dpt_zone_cfg(&cfg, false, debug); + + /* zone8 */ +} DP_END_TEST; + +/* + * zone9 - Stateful zones and SNAT + */ +DP_DECL_TEST_CASE(npf_zone, zone9, zone_setup, zone_teardown); +DP_START_TEST(zone9, test) +{ + bool debug = false; + + struct dpt_zone_cfg cfg = { + .private = { + .name = "PRIVATE", + .intf = { "dp1T0", "dp1T1", NULL }, + .local = false, + }, + .public = { + .name = "PUBLIC", + .intf = { "dp1T2", "dp1T3", NULL }, + .local = false, + }, + .local = { 0 }, + .pub_to_priv = { + .name = "PUB_TO_PRIV", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "", + }, + .priv_to_pub = { + .name = "PRIV_TO_PUB", + .pass = PASS, + .stateful = STATEFUL, + .npf = "src-addr=3.3.3.100", + }, + .local_to_priv = { 0 }, + .priv_to_local = { 0 }, + .local_to_pub = { 0 }, + .pub_to_local = { 0 }, + }; + + dpt_zone_cfg(&cfg, true, debug); + + /* SNAT on PUBLIC intf dp1T2 */ + dpt_snat_cfg("dp1T2", IPPROTO_UDP, NULL, "3.3.3.100", true); + + /* + * 1. PRIVATE -> PUBLIC. SNAT and stateful zone rule on same + * interface. + */ + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a1", + "1.1.1.11", 41001, "3.3.3.11", 1001, + "3.3.3.100", 41001, "3.3.3.11", 1001, + "aa:bb:cc:dd:3:a1", "dp1T2", + DP_TEST_FWD_FORWARDED); + + /* + * 2. PUBLIC -> PRIVATE + */ + dpt_udp("dp1T2", "aa:bb:cc:dd:3:a1", + "3.3.3.11", 1001, "3.3.3.100", 41001, + "3.3.3.11", 1001, "1.1.1.11", 41001, + "aa:bb:cc:dd:1:a1", "dp1T0", + DP_TEST_FWD_FORWARDED); + + + dpt_snat_cfg("dp1T2", IPPROTO_UDP, NULL, "3.3.3.100", false); + + dpt_zone_cfg(&cfg, false, debug); + + /* zone9 */ +} DP_END_TEST; + +/* + * zone10 - Stateful zones and DNAT + */ +DP_DECL_TEST_CASE(npf_zone, zone10, zone_setup, zone_teardown); +DP_START_TEST(zone10, test) +{ + bool debug = false; + + struct dpt_zone_cfg cfg = { + .private = { + .name = "PRIVATE", + .intf = { "dp1T0", "dp1T1", NULL }, + .local = false, + }, + .public = { + .name = "PUBLIC", + .intf = { "dp1T2", "dp1T3", NULL }, + .local = false, + }, + .local = { 0 }, + .pub_to_priv = { + .name = "PUB_TO_PRIV", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .priv_to_pub = { + .name = "PRIV_TO_PUB", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .local_to_priv = { 0 }, + .priv_to_local = { 0 }, + .local_to_pub = { 0 }, + .pub_to_local = { 0 }, + }; + + dpt_zone_cfg(&cfg, true, debug); + + dpt_zone_cfg(&cfg, false, debug); + + /* zone10 */ +} DP_END_TEST; + +/* + * zone11 - Zone block rule after SNAT + */ +DP_DECL_TEST_CASE(npf_zone, zone11, zone_setup, zone_teardown); +DP_START_TEST(zone11, test) +{ + bool debug = false; + + struct dpt_zone_cfg cfg = { + .private = { + .name = "PRIVATE", + .intf = { "dp1T0", "dp1T1", NULL }, + .local = false, + }, + .public = { + .name = "PUBLIC", + .intf = { "dp1T2", "dp1T3", NULL }, + .local = false, + }, + .local = { 0 }, + .pub_to_priv = { + .name = "PUB_TO_PRIV", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .priv_to_pub = { + .name = "PRIV_TO_PUB", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "", + }, + .local_to_priv = { 0 }, + .priv_to_local = { 0 }, + .local_to_pub = { 0 }, + .pub_to_local = { 0 }, + }; + + dpt_zone_cfg(&cfg, true, debug); + + /* SNAT on PUBLIC intf dp1T2 */ + dpt_snat_cfg("dp1T2", IPPROTO_UDP, NULL, "3.3.3.100", true); + + /* + * 1. PRIVATE -> PUBLIC. src addr is SNATd and session created. But + * zone has block rule so SNAT session is *not* activated. + */ + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a1", + "1.1.1.11", 41001, "3.3.3.11", 1001, + "3.3.3.100", 41001, "3.3.3.11", 1001, + "aa:bb:cc:dd:3:a1", "dp1T2", + DP_TEST_FWD_DROPPED); + + /* + * If there is a session then that means the SNAT rule was activated + * even though zones fw dropped the packet. This is BAD, as it means + * a NAT pinhole has been wrongly opened for return traffic. + */ + dp_test_npf_session_count_verify(0); + + dpt_snat_cfg("dp1T2", IPPROTO_UDP, NULL, "3.3.3.100", false); + + dpt_zone_cfg(&cfg, false, debug); + + /* zone11 */ +} DP_END_TEST; + + +/* + * zone12 - Stateful rule in one direction, block rule in reverse direction. + */ +DP_DECL_TEST_CASE(npf_zone, zone12, zone_setup, zone_teardown); +DP_START_TEST(zone12, test) +{ + bool debug = false; + + struct dpt_zone_cfg cfg = { + .private = { + .name = "PRIVATE", + .intf = { "dp1T0", "dp1T1", NULL }, + .local = false, + }, + .public = { + .name = "PUBLIC", + .intf = { "dp1T2", "dp1T3", NULL }, + .local = false, + }, + .local = { 0 }, + .pub_to_priv = { + .name = "PUB_TO_PRIV", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "", + }, + .priv_to_pub = { + .name = "PRIV_TO_PUB", + .pass = PASS, + .stateful = STATEFUL, + .npf = "", + }, + .local_to_priv = { 0 }, + .priv_to_local = { 0 }, + .local_to_pub = { 0 }, + .pub_to_local = { 0 }, + }; + + dpt_zone_cfg(&cfg, true, debug); + + /* + * 1. PUBLIC -> PRIVATE. Block rule. + */ + dpt_udp("dp1T2", "aa:bb:cc:dd:3:a1", + "3.3.3.11", 1001, "1.1.1.11", 41001, + NULL, 0, NULL, 0, + "aa:bb:cc:dd:1:a1", "dp1T0", + DP_TEST_FWD_DROPPED); + + /* + * 2. PRIVATE -> PUBLIC. Reverse of #1. Will match stateful rule and + * create a session. + */ + dpt_udp("dp1T0", "aa:bb:cc:dd:1:a1", + "1.1.1.11", 41001, "3.3.3.11", 1001, + NULL, 0, NULL, 0, + "aa:bb:cc:dd:3:a1", "dp1T2", + DP_TEST_FWD_FORWARDED); + + /* + * 3. PUBLIC -> PRIVATE. Repeat of #1. Reverse of #2. Will match + * zones session on input, which will override block rule. + */ + dpt_udp("dp1T2", "aa:bb:cc:dd:3:a1", + "3.3.3.11", 1001, "1.1.1.11", 41001, + NULL, 0, NULL, 0, + "aa:bb:cc:dd:1:a1", "dp1T0", + DP_TEST_FWD_FORWARDED); + + dpt_zone_cfg(&cfg, false, debug); + + /* zone12 */ +} DP_END_TEST; + + +/* + * zone19 - Zone to zone, VIF interface + */ +DP_DECL_TEST_CASE(npf_zone, zone19, zone_setup, zone_teardown); +DP_START_TEST(zone19, test) +{ + bool debug = false; + + struct dpt_zone_cfg cfg = { + .private = { + .name = "PRIVATE", + .intf = { "dp1T0", "dp1T1.100", NULL }, + .local = false, + }, + .public = { + .name = "PUBLIC", + .intf = { "dp1T2", "dp1T3", NULL }, + .local = false, + }, + .local = { 0 }, + .pub_to_priv = { + .name = "PUB_TO_PRIV", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "", + }, + .priv_to_pub = { + .name = "PRIV_TO_PUB", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .local_to_priv = { 0 }, + .priv_to_local = { 0 }, + .local_to_pub = { 0 }, + .pub_to_local = { 0 }, + }; + + /* + * Create an incomplete vif interface *before* Zone cfg + */ + dp_test_intf_vif_create_incmpl("dp1T1.100", "dp1T1", 100); + + dpt_zone_cfg(&cfg, true, debug); + + /* Complete the vif interface */ + dp_test_intf_vif_create_incmpl_fin("dp1T1.100", "dp1T1", 100); + + dp_test_nl_add_ip_addr_and_connected("dp1T1.100", "2.2.3.2/24"); + dp_test_netlink_add_neigh("dp1T1.100", "2.2.3.11", "aa:bb:cc:3:2:a1"); + + /* + * PRIVATE -> PRIVATE + */ + dpt_vlan_udp("dp1T0", "aa:bb:cc:dd:1:a1", + "1.1.1.11", 41000, "2.2.3.11", 1000, + NULL, 0, NULL, 0, + "aa:bb:cc:3:2:a1", "dp1T1", + DP_TEST_FWD_FORWARDED, 0, 100); + + + /* Cleanup */ + dpt_zone_cfg(&cfg, false, debug); + + dp_test_netlink_del_neigh("dp1T1.100", "2.2.3.11", + "aa:bb:cc:3:2:a1"); + dp_test_nl_del_ip_addr_and_connected("dp1T1.100", "2.2.3.2/24"); + dp_test_intf_vif_del("dp1T1.100", 100); + + /* zone9 */ +} DP_END_TEST; + + +/* + * zone20 - Zones and IPv6 + */ +DP_DECL_TEST_CASE(npf_zone, zone20, zone_setup6, zone_teardown6); +DP_START_TEST(zone20, test) +{ + bool debug = false; + + struct dpt_zone_cfg cfg = { + .private = { + .name = "PRIVATE", + .intf = { "dp1T0", "dp1T1", NULL }, + .local = false, + }, + .public = { + .name = "PUBLIC", + .intf = { "dp1T2", "dp1T3", NULL }, + .local = false, + }, + .local = { 0 }, + .pub_to_priv = { + .name = "PUB_TO_PRIV", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "", + }, + .priv_to_pub = { + .name = "PRIV_TO_PUB", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .local_to_priv = { 0 }, + .priv_to_local = { 0 }, + .local_to_pub = { 0 }, + .pub_to_local = { 0 }, + }; + + dpt_zone_cfg(&cfg, true, debug); + + struct dp_test_pkt_desc_t udp_pkt = { + .text = "UDP", + .len = 20, + .ether_type = RTE_ETHER_TYPE_IPV6, + .l3_src = "2001:101:1::11", + .l2_src = "aa:bb:cc:dd:1:a1", + .l3_dst = "2001:101:2::11", + .l2_dst = "aa:bb:cc:dd:2:a1", + .proto = IPPROTO_UDP, + .l4 = { + .udp = { + .sport = 0xDEAD, + .dport = 0xBEEF, + } + }, + .rx_intf = "dp1T0", + .tx_intf = "dp1T1" + }; + struct rte_mbuf *pak; + struct dp_test_expected *exp; + + /* + * PRIVATE -> PRIVATE + */ + pak = dp_test_v6_pkt_from_desc(&udp_pkt); + exp = dp_test_exp_from_desc(pak, &udp_pkt); + dp_test_pak_receive(pak, udp_pkt.rx_intf, exp); + + /* + * PRIVATE -> PUBLIC + */ + udp_pkt.l3_dst = "2001:101:3::11"; + udp_pkt.l2_dst = "aa:bb:cc:dd:3:a1"; + udp_pkt.tx_intf = "dp1T2"; + pak = dp_test_v6_pkt_from_desc(&udp_pkt); + exp = dp_test_exp_from_desc(pak, &udp_pkt); + dp_test_pak_receive(pak, udp_pkt.rx_intf, exp); + + /* + * PUBLIC -> PRIVATE + */ + udp_pkt.l3_src = "2001:101:3::11"; + udp_pkt.l2_src = "aa:bb:cc:dd:3:a1"; + udp_pkt.rx_intf = "dp1T2"; + + udp_pkt.l3_dst = "2001:101:1::11"; + udp_pkt.l2_dst = "aa:bb:cc:dd:1:a1"; + udp_pkt.tx_intf = "dp1T0"; + + pak = dp_test_v6_pkt_from_desc(&udp_pkt); + exp = dp_test_exp_from_desc(pak, &udp_pkt); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); + dp_test_pak_receive(pak, udp_pkt.rx_intf, exp); + + + /* Cleanup */ + dpt_zone_cfg(&cfg, false, debug); + + /* zone20 */ +} DP_END_TEST; + + +/* + * Interface and address setup for above tests + */ +static void zone_setup(void) +{ + /* Setup interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp1T1", "2.2.2.2/24"); + dp_test_nl_add_ip_addr_and_connected("dp1T2", "3.3.3.3/24"); + dp_test_nl_add_ip_addr_and_connected("dp1T3", "4.4.4.4/24"); + + /* Non-zone interfaces */ + dp_test_nl_add_ip_addr_and_connected("dp2T1", "5.5.5.5/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T2", "6.6.6.6/24"); + + /* PRIVATE interfaces */ + dp_test_netlink_add_neigh("dp1T0", "1.1.1.11", "aa:bb:cc:dd:1:a1"); + dp_test_netlink_add_neigh("dp1T0", "1.1.1.12", "aa:bb:cc:dd:1:a2"); + + dp_test_netlink_add_neigh("dp1T1", "2.2.2.11", "aa:bb:cc:dd:2:a1"); + dp_test_netlink_add_neigh("dp1T1", "2.2.2.12", "aa:bb:cc:dd:2:a2"); + + /* PUBLIC interfaces */ + dp_test_netlink_add_neigh("dp1T2", "3.3.3.11", "aa:bb:cc:dd:3:a1"); + dp_test_netlink_add_neigh("dp1T2", "3.3.3.12", "aa:bb:cc:dd:3:a2"); + + dp_test_netlink_add_neigh("dp1T3", "4.4.4.11", "aa:bb:cc:dd:4:a1"); + dp_test_netlink_add_neigh("dp1T3", "4.4.4.12", "aa:bb:cc:dd:4:a2"); + + /* Non-zone interfaces */ + dp_test_netlink_add_neigh("dp2T1", "5.5.5.11", "aa:bb:cc:dd:5:a1"); + dp_test_netlink_add_neigh("dp2T1", "5.5.5.12", "aa:bb:cc:dd:5:a2"); + + dp_test_netlink_add_neigh("dp2T2", "6.6.6.11", "aa:bb:cc:dd:6:a1"); +} + +static void zone_teardown(void) +{ + dp_test_netlink_del_neigh("dp1T0", "1.1.1.11", "aa:bb:cc:dd:1:a1"); + dp_test_netlink_del_neigh("dp1T0", "1.1.1.12", "aa:bb:cc:dd:1:a2"); + + dp_test_netlink_del_neigh("dp1T1", "2.2.2.11", "aa:bb:cc:dd:2:a1"); + dp_test_netlink_del_neigh("dp1T1", "2.2.2.12", "aa:bb:cc:dd:2:a2"); + + dp_test_netlink_del_neigh("dp1T2", "3.3.3.11", "aa:bb:cc:dd:3:a1"); + dp_test_netlink_del_neigh("dp1T2", "3.3.3.12", "aa:bb:cc:dd:3:a2"); + + dp_test_netlink_del_neigh("dp1T3", "4.4.4.11", "aa:bb:cc:dd:4:a1"); + dp_test_netlink_del_neigh("dp1T3", "4.4.4.12", "aa:bb:cc:dd:4:a2"); + + dp_test_netlink_del_neigh("dp2T1", "5.5.5.11", "aa:bb:cc:dd:5:a1"); + dp_test_netlink_del_neigh("dp2T1", "5.5.5.12", "aa:bb:cc:dd:5:a2"); + + dp_test_netlink_del_neigh("dp2T2", "6.6.6.11", "aa:bb:cc:dd:6:a1"); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp1T1", "2.2.2.2/24"); + dp_test_nl_del_ip_addr_and_connected("dp1T2", "3.3.3.3/24"); + dp_test_nl_del_ip_addr_and_connected("dp1T3", "4.4.4.4/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "5.5.5.5/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T2", "6.6.6.6/24"); + + dp_test_npf_cleanup(); +} + +static void zone_setup6(void) +{ + /* Setup interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "2001:101:1::1/96"); + dp_test_nl_add_ip_addr_and_connected("dp1T1", "2001:101:2::1/96"); + dp_test_nl_add_ip_addr_and_connected("dp1T2", "2001:101:3::1/96"); + dp_test_nl_add_ip_addr_and_connected("dp1T3", "2001:101:4::1/96"); + + /* Non-zone interfaces */ + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2001:101:5::1/96"); + dp_test_nl_add_ip_addr_and_connected("dp2T2", "2001:101:6::1/96"); + + /* PRIVATE interfaces */ + dp_test_netlink_add_neigh("dp1T0", "2001:101:1::11", + "aa:bb:cc:dd:1:a1"); + dp_test_netlink_add_neigh("dp1T0", "2001:101:1::12", + "aa:bb:cc:dd:1:a2"); + + dp_test_netlink_add_neigh("dp1T1", "2001:101:2::11", + "aa:bb:cc:dd:2:a1"); + dp_test_netlink_add_neigh("dp1T1", "2001:101:2::12", + "aa:bb:cc:dd:2:a2"); + + /* PUBLIC interfaces */ + dp_test_netlink_add_neigh("dp1T2", "2001:101:3::11", + "aa:bb:cc:dd:3:a1"); + dp_test_netlink_add_neigh("dp1T2", "2001:101:3::12", + "aa:bb:cc:dd:3:a2"); + + dp_test_netlink_add_neigh("dp1T3", "2001:101:4::11", + "aa:bb:cc:dd:4:a1"); + dp_test_netlink_add_neigh("dp1T3", "2001:101:4::12", + "aa:bb:cc:dd:4:a2"); + + /* Non-zone interfaces */ + dp_test_netlink_add_neigh("dp2T1", "2001:101:5::11", + "aa:bb:cc:dd:5:a1"); + dp_test_netlink_add_neigh("dp2T1", "2001:101:5::12", + "aa:bb:cc:dd:5:a2"); + + dp_test_netlink_add_neigh("dp2T2", "2001:101:6::11", + "aa:bb:cc:dd:6:a1"); +} + +static void zone_teardown6(void) +{ + /* PRIVATE interfaces */ + dp_test_netlink_del_neigh("dp1T0", "2001:101:1::11", + "aa:bb:cc:dd:1:a1"); + dp_test_netlink_del_neigh("dp1T0", "2001:101:1::12", + "aa:bb:cc:dd:1:a2"); + + dp_test_netlink_del_neigh("dp1T1", "2001:101:2::11", + "aa:bb:cc:dd:2:a1"); + dp_test_netlink_del_neigh("dp1T1", "2001:101:2::12", + "aa:bb:cc:dd:2:a2"); + + /* PUBLIC interfaces */ + dp_test_netlink_del_neigh("dp1T2", "2001:101:3::11", + "aa:bb:cc:dd:3:a1"); + dp_test_netlink_del_neigh("dp1T2", "2001:101:3::12", + "aa:bb:cc:dd:3:a2"); + + dp_test_netlink_del_neigh("dp1T3", "2001:101:4::11", + "aa:bb:cc:dd:4:a1"); + dp_test_netlink_del_neigh("dp1T3", "2001:101:4::12", + "aa:bb:cc:dd:4:a2"); + + /* Non-zone interfaces */ + dp_test_netlink_del_neigh("dp2T1", "2001:101:5::11", + "aa:bb:cc:dd:5:a1"); + dp_test_netlink_del_neigh("dp2T1", "2001:101:5::12", + "aa:bb:cc:dd:5:a2"); + + dp_test_netlink_del_neigh("dp2T2", "2001:101:6::11", + "aa:bb:cc:dd:6:a1"); + + /* Setup interfaces and neighbours */ + dp_test_nl_del_ip_addr_and_connected("dp1T0", "2001:101:1::1/96"); + dp_test_nl_del_ip_addr_and_connected("dp1T1", "2001:101:2::1/96"); + dp_test_nl_del_ip_addr_and_connected("dp1T2", "2001:101:3::1/96"); + dp_test_nl_del_ip_addr_and_connected("dp1T3", "2001:101:4::1/96"); + + /* Non-zone interfaces */ + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2001:101:5::1/96"); + dp_test_nl_del_ip_addr_and_connected("dp2T2", "2001:101:6::1/96"); + + dp_test_npf_cleanup(); +} + + +/* + * TEST: decrypt_a_packet + * + * This test checks that an encrypted packet received on a + * VTI interface is correctly decrypted and and forwarded. + */ +#include "dp_test_crypto_utils.h" + +#define SPI_OUTBOUND 0xd43d87c7 +#define SPI_INBOUND 0x10203040 +#define VTI_TUN_REQID 1234 + +#define NETWORK_WEST "10.10.1.0" +#define CLIENT_LOCAL "10.10.1.1" +#define NETWORK_LOCAL "10.10.1.0" +#define PORT_WEST "10.10.1.2" +#define CLIENT_LOCAL_MAC_ADDR "aa:bb:cc:dd:1:1" + +#define NETWORK_EAST "10.10.2.0" +#define PEER "10.10.2.3" +#define PEER_MAC_ADDR "aa:bb:cc:dd:2:3" +#define PORT_EAST "10.10.2.2" +#define NETWORK_REMOTE "10.10.3.0" + +#define OUTPUT_MARK 100 +#define INPUT_MARK 100 + +#define CLIENT_REMOTE "10.10.3.4" + +#define TEST_VRF_ID 55 + +/* + * Crypto policy definitions used by the tests in this module + */ +static struct dp_test_crypto_policy output_policy = { + .d_prefix = "0.0.0.0/0", + .s_prefix = "0.0.0.0/0", + .proto = 0, + .dst = PEER, + .dst_family = AF_INET, + .dir = XFRM_POLICY_OUT, + .family = AF_INET, + .reqid = VTI_TUN_REQID, + .priority = 0, + .mark = OUTPUT_MARK, + .vrfid = VRF_DEFAULT_ID +}; + +static struct dp_test_crypto_policy input_policy = { + .d_prefix = "0.0.0.0/0", + .s_prefix = "0.0.0.0/0", + .proto = 0, + .dst = PORT_EAST, + .dst_family = AF_INET, + .dir = XFRM_POLICY_IN, + .family = AF_INET, + .reqid = VTI_TUN_REQID, + .priority = 0, + .mark = INPUT_MARK, + .vrfid = VRF_DEFAULT_ID +}; + +/* + * Crypto SA definitions used by the tests in this module + */ +static struct dp_test_crypto_sa output_sa = { + .auth_algo = CRYPTO_AUTH_HMAC_SHA1, + .spi = SPI_OUTBOUND, + .d_addr = PEER, + .s_addr = PORT_EAST, + .family = AF_INET, + .mode = XFRM_MODE_TUNNEL, + .reqid = VTI_TUN_REQID, + .mark = OUTPUT_MARK, + .vrfid = VRF_DEFAULT_ID +}; + +static struct dp_test_crypto_sa input_sa = { + .auth_algo = CRYPTO_AUTH_HMAC_SHA1, + .spi = SPI_INBOUND, + .d_addr = PORT_EAST, + .s_addr = PEER, + .family = AF_INET, + .mode = XFRM_MODE_TUNNEL, + .reqid = VTI_TUN_REQID, + .mark = INPUT_MARK, + .vrfid = VRF_DEFAULT_ID +}; + +static void vti_setup_policies_and_sas(vrfid_t vrfid) +{ + input_policy.vrfid = vrfid; + output_policy.vrfid = vrfid; + dp_test_crypto_create_policy(&input_policy); + dp_test_crypto_create_policy(&output_policy); + + input_sa.vrfid = vrfid; + output_sa.vrfid = vrfid; + dp_test_crypto_create_sa(&input_sa); + dp_test_crypto_create_sa(&output_sa); +} + +static void vti_teardown_sas_and_policy(void) +{ + dp_test_crypto_delete_policy(&input_policy); + dp_test_crypto_delete_policy(&output_policy); + + dp_test_crypto_delete_sa(&input_sa); + dp_test_crypto_delete_sa(&output_sa); +} + +static void vti_setup_tunnel(vrfid_t vrf_id, uint16_t mark_out) +{ + char route_name[DP_TEST_MAX_ROUTE_STRING_LEN]; + + if (vrf_id != VRF_DEFAULT_ID) + dp_test_netlink_add_vrf(vrf_id, 1); + + /* Input interface and connected route is in the requested VRF */ + dp_test_nl_add_ip_addr_and_connected_vrf("dp1T1", + "10.10.1.2/24", vrf_id); + dp_test_netlink_add_neigh("dp1T1", CLIENT_LOCAL, CLIENT_LOCAL_MAC_ADDR); + + /* Output interface and connected route are in default VRF */ + dp_test_nl_add_ip_addr_and_connected("dp2T2", "10.10.2.2/24"); + dp_test_netlink_add_neigh("dp2T2", "10.10.2.3", PEER_MAC_ADDR); + + dp_test_intf_vti_create("vti0", "10.10.2.2", "10.10.2.3", + mark_out, vrf_id); + dp_test_netlink_add_ip_address_vrf("vti0", "5.5.5.5/24", vrf_id); + snprintf(route_name, sizeof(route_name), "vrf:%d %s nh %s int:vti0", + vrf_id, "10.10.3.0/24", PEER); + dp_test_netlink_add_route(route_name); + + dp_test_crypto_check_sa_count(vrf_id, 0); +} + +static void vti_teardown_tunnel(vrfid_t vrf_id) +{ + char route_name[DP_TEST_MAX_ROUTE_STRING_LEN]; + + snprintf(route_name, sizeof(route_name), "vrf:%d %s nh %s int:vti0", + vrf_id, "10.10.3.0/24", PEER); + dp_test_netlink_del_route(route_name); + + dp_test_netlink_del_ip_address_vrf("vti0", "5.5.5.5/24", vrf_id); + dp_test_intf_vti_delete("vti0", PORT_EAST, PEER, 10, vrf_id); + dp_test_netlink_del_neigh("dp2T2", PEER, PEER_MAC_ADDR); + dp_test_nl_del_ip_addr_and_connected("dp2T2", "10.10.2.2/24"); + dp_test_netlink_del_neigh("dp1T1", CLIENT_LOCAL, CLIENT_LOCAL_MAC_ADDR); + dp_test_nl_del_ip_addr_and_connected_vrf("dp1T1", "10.10.1.2/24", + vrf_id); + + if (vrf_id != VRF_DEFAULT_ID) + dp_test_netlink_del_vrf(vrf_id, 0); +} + +static void vti_count(struct ifnet *ifp, void *arg) +{ + int *count = (int *)arg; + + if (ifp->if_type == IFT_TUNNEL_VTI) + (*count)++; +} + +static int vti_count_of_vtis(void) +{ + int count = 0; + + dp_ifnet_walk(vti_count, &count); + return count; +} + +/* + * build_encrypted_input_packet() + * + * This helper function creates an input ESP packet containing + * an encrypted ICMP ping packet from 10.10.3.4 to 10.10.1.1. + */ +static struct rte_mbuf *build_encrypted_input_packet(void) +{ + int payload_len; + const char encrypted_payload[] = { + 0x64, 0xc8, 0x6e, 0x89, 0x53, 0x45, 0x54, 0xd6, 0xb1, 0x0c, + 0x8c, 0xca, 0xc4, 0x44, 0xbf, 0xd3, 0xe4, 0xac, 0x69, 0xfb, + 0x6e, 0xf2, 0x98, 0x2c, 0x4e, 0x19, 0xd6, 0x8f, 0xd1, 0x72, + 0xfb, 0x67, 0x3c, 0x14, 0xc8, 0x00, 0x34, 0x4a, 0x08, 0x3d, + 0xe6, 0x3d, 0xeb, 0x3b, 0xeb, 0x90, 0xd8, 0xe1, 0x28, 0xa5, + 0xd2, 0x1b, 0xa1, 0xb1, 0xcf, 0xf4, 0xf4, 0x3e, 0x1d, 0x6b, + 0xa2, 0x8d, 0xb2, 0x2c, 0x5e, 0x60, 0x7f, 0x81, 0x3b, 0x79, + 0xb5, 0x10, 0xe2, 0x78, 0x7c, 0xd7, 0x19, 0xcf, 0x14, 0x80, + 0xca, 0x31, 0xa8, 0x4d, 0xf8, 0xde, 0x31, 0x3d, 0x61, 0x4d, + 0x5d, 0xed, 0x02, 0x1a, 0x91, 0x5d, 0x7c, 0x36, 0x9d, 0xce, + 0x2f, 0x1c, 0x57, 0x75, 0x8b, 0xe2, 0xa1, 0xdc, 0xf9, 0x4a, + 0x33, 0x97, 0x2a, 0x71, 0x7b, 0x16, 0x88, 0x59, 0x3d, 0x09, + 0xc8, 0x89, 0xa8, 0x31 + }; + + payload_len = sizeof(encrypted_payload); + + return dp_test_create_esp_ipv4_pak(PEER, PORT_EAST, 1, + &payload_len, + encrypted_payload, + SPI_INBOUND, + 1 /* seq no */, + 0 /* ip ID */, + 63 /* ttl */, + NULL /* udp/esp */, + NULL /* transport_hdr*/); +} + +/* + * build_expected_icmp_packet() + * + * This helper function builds an output ICMP packet that + * corresponds to the encrypted payload in the ESP packet + * built by build_encrypted_input_packet(). + */ +static struct rte_mbuf *build_expected_icmp_packet(int *payload_len) +{ + struct iphdr *ip; + struct rte_mbuf *packet; + const uint8_t payload[] = { + 0x2c, 0x57, 0xba, 0x55, 0x00, 0x00, 0x00, 0x00, 0xd9, 0xe9, + 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, + 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, + 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, 0x03, 0x04, 0x01, 0x02, + 0x03, 0x04, 0x01, 0x02, 0x03, 0x04 + }; + + *payload_len = sizeof(payload); + + packet = dp_test_create_icmp_ipv4_pak(CLIENT_REMOTE, CLIENT_LOCAL, + ICMP_ECHO /* echo request */, + 0 /* no code */, + DPT_ICMP_ECHO_DATA(0xac9, 1), + 1 /* one mbuf */, + payload_len, + payload, + &ip, NULL); + + /* + * The resulting ICMP packet isn't exactly as + * we want, so tickle a few bits into shape + */ + dp_test_set_pak_ip_field(ip, DP_TEST_SET_IP_ID, 0xea53); + dp_test_set_pak_ip_field(ip, DP_TEST_SET_DF, 1); + dp_test_set_pak_ip_field(ip, DP_TEST_SET_TTL, + DP_TEST_PAK_DEFAULT_TTL - 2); + + return packet; +} + + +/* + * Zone "EAST" Zone "WEST" + * + * rx -> tx -> + * +---------+ + * dp2T2 | | dp1T1 + * ----------+ +---------- + * vti0 | | + * +---------+ + * + */ +DP_DECL_TEST_CASE(npf_zone, zone50, NULL, NULL); +DP_START_TEST(zone50, test) +{ + struct rte_mbuf *output_packet; + struct rte_mbuf *input_packet; + struct dp_test_expected *exp; + int decrypted_payload_len; + + struct dpt_zone_cfg cfg = { + .private = { + .name = "EAST", + .intf = { "vti0", NULL, NULL }, + .local = false, + }, + .public = { + .name = "WEST", + .intf = { "dp1T1", NULL, NULL }, + .local = false, + }, + .local = { 0 }, + .pub_to_priv = { + .name = "EAST_TO_WEST", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .priv_to_pub = { + .name = "WEST_TO_EAST", + .pass = PASS, + .stateful = STATELESS, + .npf = "", + }, + .local_to_priv = { 0 }, + .priv_to_local = { 0 }, + .local_to_pub = { 0 }, + .pub_to_local = { 0 }, + }; + + vti_setup_tunnel(VRF_DEFAULT_ID, OUTPUT_MARK); + vti_setup_policies_and_sas(VRF_DEFAULT_ID); + dp_test_fail_unless((vti_count_of_vtis() == 1), + "Expected VTI to be created"); + + /* + * Create the input encrypted packet. + */ + input_packet = build_encrypted_input_packet(); + (void)dp_test_pktmbuf_eth_init(input_packet, + dp_test_intf_name2mac_str("dp2T2"), + NULL, RTE_ETHER_TYPE_IPV4); + + /* + * Ceate the expected decrypted ping packet + */ + output_packet = build_expected_icmp_packet(&decrypted_payload_len); + (void)dp_test_pktmbuf_eth_init(output_packet, + CLIENT_LOCAL_MAC_ADDR, + dp_test_intf_name2mac_str("dp1T1"), + RTE_ETHER_TYPE_IPV4); + + /* Add zones config */ + dpt_zone_cfg(&cfg, true, false); + + /* + * Create an expectation for the decypted ICMP ping packet on dp1T1. + */ + exp = dp_test_exp_create(output_packet); + rte_pktmbuf_free(output_packet); + + dp_test_exp_set_oif_name(exp, "dp1T1"); + + dp_test_pak_receive(input_packet, "dp2T2", exp); + dp_test_crypto_check_sad_packets(VRF_DEFAULT_ID, 1, 84); + + /* Remove zones config */ + dpt_zone_cfg(&cfg, false, false); + + vti_teardown_tunnel(VRF_DEFAULT_ID); + vti_teardown_sas_and_policy(); + dp_test_fail_unless((vti_count_of_vtis() == 0), + "Expected VTI to be deleted"); + +} DP_END_TEST; + diff --git a/tests/whole_dp/src/dp_test_pbr.c b/tests/whole_dp/src/dp_test_pbr.c index 898ae82e..0ca1b022 100644 --- a/tests/whole_dp/src/dp_test_pbr.c +++ b/tests/whole_dp/src/dp_test_pbr.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -22,11 +22,10 @@ #include "dp_test.h" #include "dp_test_controller.h" #include "dp_test_console.h" -#include "dp_test_netlink_state.h" -#include "dp_test_cmd_check.h" -#include "dp_test_lib.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test/dp_test_cmd_check.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_lib_exp.h" DP_DECL_TEST_SUITE(pbr_suite); @@ -223,6 +222,7 @@ _pbr_set_policy_ip(const char *intf, const char *name, int rule, if (dst != NULL) len += snprintf(match + len, sizeof(match) - len, "to %s ", dst); + (void) len; if (vrf_id) { snprintf(vrf_rproc, sizeof(vrf_rproc), @@ -269,7 +269,7 @@ pbr_create_pak(const char *src, const char *dst, const char *oif, struct dp_test_expected *exp; int len = 22; - if (type == ETHER_TYPE_IPv4) + if (type == RTE_ETHER_TYPE_IPV4) pak = dp_test_create_ipv4_pak(src, dst, 1, &len); else pak = dp_test_create_ipv6_pak(src, dst, 1, &len); @@ -286,7 +286,7 @@ pbr_create_pak(const char *src, const char *dst, const char *oif, "failed to create exp"); dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac, dp_test_intf_name2mac_str(oif), type); - if (type == ETHER_TYPE_IPv4) + if (type == RTE_ETHER_TYPE_IPV4) dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); else dp_test_ipv6_decrement_ttl(dp_test_exp_get_pak(exp)); @@ -366,9 +366,9 @@ pbr_setup(void) } #define PBR_V4PAK(s, d, oif, nh, pp) \ - pbr_create_pak(s, d, oif, ETHER_TYPE_IPv4, nh, pp, __LINE__) + pbr_create_pak(s, d, oif, RTE_ETHER_TYPE_IPV4, nh, pp, __LINE__) #define PBR_V6PAK(s, d, oif, nh, pp) \ - pbr_create_pak(s, d, oif, ETHER_TYPE_IPv6, nh, pp, __LINE__) + pbr_create_pak(s, d, oif, RTE_ETHER_TYPE_IPV6, nh, pp, __LINE__) DP_DECL_TEST_CASE(pbr_suite, pbr, pbr_setup, pbr_teardown); @@ -572,8 +572,9 @@ DP_START_TEST(pbr_drop, v6) #define STRINGIFY(x) STRINGIFZ(x) static void -pbr_vrf_teardown(void) +_pbr_vrf_teardown(void) { + pbr_del_policy(PBR_IN_IFN, "pbr:pbr4"); dp_test_netlink_del_route(dp_test_default_route_ipv4); dp_test_netlink_del_neigh(PBR_OUT_IFN, PBR_OUT_NH4, PBR_OUT_NH4_MAC); @@ -594,7 +595,19 @@ pbr_vrf_teardown(void) } static void -pbr_vrf_setup(void) +pbr_vrf_teardown(void) +{ + vrfid_t xvrfid = dp_test_translate_vrf_id(TEST_VRF); + dp_test_send_config_src(dp_test_cont_src_get(), + "tablemap %d %d 0 %d", + TEST_VRF, + TEST_TABLEID, + xvrfid); + _pbr_vrf_teardown(); +} + +static void +_pbr_vrf_setup(void) { pbr_del_policy(PBR_IN_IFN, "pbr:pbr4"); dp_test_nl_add_ip_addr_and_connected(PBR_IN_IFN, PBR_IN_IFN_ADDR4); @@ -614,6 +627,13 @@ pbr_vrf_setup(void) dp_test_netlink_add_route(dp_test_default_route_ipv6); dp_test_netlink_add_neigh(PBR_OUT_IFN, PBR_OUT_NH6, PBR_OUT_NH6_MAC); +} + +static void +pbr_vrf_setup(void) +{ + _pbr_vrf_setup(); + vrfid_t xvrfid = dp_test_translate_vrf_id(TEST_VRF); dp_test_send_config_src(dp_test_cont_src_get(), "tablemap %d %d %d %d", @@ -622,6 +642,7 @@ pbr_vrf_setup(void) xvrfid); } + DP_DECL_TEST_CASE(pbr_suite, pbr_x_vrf, pbr_vrf_setup, pbr_vrf_teardown); DP_START_TEST(pbr_x_vrf, v4) @@ -735,7 +756,7 @@ DP_START_TEST(pbr_x_vrf, v4_tableid) */ pbr_set_policy_ip_vrf(PBR_IN_IFN, "pbr:pbr4", 10, PBR_ACCEPT, PBR_ACCEPT_SHOW, PBR_IPV4, NULL, dstpfx, - TEST_VRF, MAPPED_TABLEID_1); + TEST_VRF, TEST_TABLEID); dp_test_netlink_add_route("vrf:" STRINGIFY(TEST_VRF) " " "tbl:" STRINGIFY(MAPPED_TABLEID_1) " " PBR_TEST_4DST "/32 nh " @@ -782,7 +803,7 @@ DP_START_TEST(pbr_x_vrf, v6_tableid) */ pbr_set_policy_ip_vrf(PBR_IN_IFN, "pbr:pbr6", 10, PBR_ACCEPT, PBR_ACCEPT_SHOW, PBR_IPV6, NULL, dstpfx, - TEST_VRF, MAPPED_TABLEID_1); + TEST_VRF, TEST_TABLEID); dp_test_netlink_add_route("vrf:" STRINGIFY(TEST_VRF) " " "tbl:" STRINGIFY(MAPPED_TABLEID_1) " " PBR_TEST_6DST "/128 nh " @@ -811,10 +832,25 @@ DP_START_TEST(pbr_x_vrf, v6_tableid) static void pbr_in_vrf_teardown(void) { + vrfid_t xvrfid; + + xvrfid = dp_test_translate_vrf_id(TEST_VRF); + + dp_test_send_config_src(dp_test_cont_src_get(), + "tablemap %d %d 0 %d", + TEST_VRF, + POLICY_PBR_TABLEID_2, + xvrfid); + dp_test_send_config_src(dp_test_cont_src_get(), + "tablemap %d %d 0 %d", + TEST_VRF, + POLICY_PBR_TABLEID_1, + xvrfid); + dp_test_netlink_set_interface_vrf(PBR_TEST_IFN, VRF_DEFAULT_ID); dp_test_netlink_set_interface_vrf(PBR_IN_IFN, VRF_DEFAULT_ID); - pbr_vrf_teardown(); + _pbr_vrf_teardown(); } static void @@ -826,7 +862,7 @@ pbr_in_vrf_setup(void) xvrfid = dp_test_translate_vrf_id(TEST_VRF); dp_test_netlink_del_vrf(TEST_VRF, 0); - pbr_vrf_setup(); + _pbr_vrf_setup(); dp_test_netlink_set_interface_vrf(PBR_TEST_IFN, TEST_VRF); dp_test_netlink_set_interface_vrf(PBR_IN_IFN, TEST_VRF); @@ -973,8 +1009,7 @@ DP_START_TEST(pbr_in_vrf_no_map, v4_tableid) pbr_set_policy_ip(PBR_IN_IFN, "pbr:pbr4", 10, PBR_ACCEPT, PBR_ACCEPT_SHOW, PBR_IPV4, NULL, dstpfx, POLICY_NON_PBR_TABLEID); - dp_test_netlink_add_route("vrf:" STRINGIFY(TEST_VRF) " " - "tbl:" STRINGIFY(POLICY_NON_PBR_TABLEID) " " + dp_test_netlink_add_route("tbl:" STRINGIFY(POLICY_NON_PBR_TABLEID) " " PBR_TEST_4DST "/32 nh " PBR_TEST_NH4 " int:" PBR_TEST_IFN); dp_test_netlink_add_neigh(PBR_TEST_IFN, PBR_TEST_NH4, PBR_TEST_NH4_MAC); @@ -982,6 +1017,7 @@ DP_START_TEST(pbr_in_vrf_no_map, v4_tableid) exp = PBR_V4PAK(PBR_TEST_4SRC, PBR_TEST_4DST, PBR_TEST_IFN, PBR_TEST_NH4_MAC, &test_pak); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); dp_test_check_state_show("npf-op show all: pbr", "\"packets\": 0", pbr_debug); @@ -990,8 +1026,7 @@ DP_START_TEST(pbr_in_vrf_no_map, v4_tableid) dp_test_check_state_show("npf-op show all: pbr", "\"packets\": 1", pbr_debug); - dp_test_netlink_del_route("vrf:" STRINGIFY(TEST_VRF) " " - "tbl:" STRINGIFY(POLICY_NON_PBR_TABLEID) " " + dp_test_netlink_del_route("tbl:" STRINGIFY(POLICY_NON_PBR_TABLEID) " " PBR_TEST_4DST "/32 nh " PBR_TEST_NH4 " int:" PBR_TEST_IFN); dp_test_netlink_del_neigh(PBR_TEST_IFN, PBR_TEST_NH4, PBR_TEST_NH4_MAC); @@ -1011,8 +1046,7 @@ DP_START_TEST(pbr_in_vrf_no_map, v6_tableid) pbr_set_policy_ip(PBR_IN_IFN, "pbr:pbr6", 10, PBR_ACCEPT, PBR_ACCEPT_SHOW, PBR_IPV6, NULL, dstpfx, POLICY_NON_PBR_TABLEID); - dp_test_netlink_add_route("vrf:" STRINGIFY(TEST_VRF) " " - "tbl:" STRINGIFY(POLICY_NON_PBR_TABLEID) " " + dp_test_netlink_add_route("tbl:" STRINGIFY(POLICY_NON_PBR_TABLEID) " " PBR_TEST_6DST "/128 nh " PBR_TEST_NH6 " int:" PBR_TEST_IFN); dp_test_netlink_add_neigh(PBR_TEST_IFN, PBR_TEST_NH6, PBR_TEST_NH6_MAC); @@ -1020,6 +1054,7 @@ DP_START_TEST(pbr_in_vrf_no_map, v6_tableid) exp = PBR_V6PAK(PBR_TEST_6SRC, PBR_TEST_6DST, PBR_TEST_IFN, PBR_TEST_NH6_MAC, &test_pak); + dp_test_exp_set_fwd_status(exp, DP_TEST_FWD_DROPPED); dp_test_check_state_show("npf-op show all: pbr", "\"packets\": 0", pbr_debug); @@ -1028,8 +1063,7 @@ DP_START_TEST(pbr_in_vrf_no_map, v6_tableid) dp_test_check_state_show("npf-op show all: pbr", "\"packets\": 1", pbr_debug); - dp_test_netlink_del_route("vrf:" STRINGIFY(TEST_VRF) " " - "tbl:" STRINGIFY(POLICY_NON_PBR_TABLEID) " " + dp_test_netlink_del_route("tbl:" STRINGIFY(POLICY_NON_PBR_TABLEID) " " PBR_TEST_6DST "/128 nh " PBR_TEST_NH6 " int:" PBR_TEST_IFN); dp_test_netlink_del_neigh(PBR_TEST_IFN, PBR_TEST_NH6, PBR_TEST_NH6_MAC); @@ -1057,7 +1091,7 @@ pbr_in_vrf_setup_config_before_netlink(void) POLICY_PBR_TABLEID_2, MAPPED_TABLEID_2, xvrfid); - pbr_vrf_setup(); + _pbr_vrf_setup(); dp_test_netlink_set_interface_vrf(PBR_TEST_IFN, TEST_VRF); dp_test_netlink_set_interface_vrf(PBR_IN_IFN, TEST_VRF); diff --git a/tests/whole_dp/src/dp_test_pipeline.c b/tests/whole_dp/src/dp_test_pipeline.c index 9b4130be..66093600 100644 --- a/tests/whole_dp/src/dp_test_pipeline.c +++ b/tests/whole_dp/src/dp_test_pipeline.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -7,18 +7,17 @@ * * Whole dataplane test pipeline tests */ -#include "dp_test.h" -#include "dp_test_str.h" + #include "dp_test_lib.h" -#include "dp_test_lib_exp.h" -#include "dp_test_lib_pkt.h" +#include "dp_test_lib_intf.h" +#include "dp_test_macros.h" #include "dp_test_pktmbuf_lib.h" -#include "dp_test_controller.h" -#include "dp_test_json_utils.h" #include "dp_test_netlink_state.h" -#include "dp_test_console.h" -#include "src/pipeline/nodes/sample/SampleFeatConfig.pb-c.h" +#include + +#include "SampleFeatConfig.pb-c.h" +#include "SampleFeatOp.pb-c.h" #include "protobuf/DataplaneEnvelope.pb-c.h" DP_DECL_TEST_SUITE(pipeline); @@ -30,8 +29,6 @@ dp_test_create_and_send_sample_feat_msg(bool enable, const char *ifname) { int len; - void *buf; - SampleFeatConfig samplefeat = SAMPLE_FEAT_CONFIG__INIT; /* set values here */ @@ -41,10 +38,53 @@ dp_test_create_and_send_sample_feat_msg(bool enable, samplefeat.if_name = (char *)ifname; len = sample_feat_config__get_packed_size(&samplefeat); void *buf2 = malloc(len); - dp_test_assert_internal(buf2); + assert(buf2); sample_feat_config__pack(&samplefeat, buf2); + dp_test_lib_pb_wrap_and_send_pb("sample:sample-feat", buf2, len); +} + + +static bool validate_resp_callback(void *data, int len, void *arg) +{ + uint32_t expect_val = *(uint32_t *)arg; /* expected packet count */ + + DataplaneEnvelope *dmsg_resp = + dataplane_envelope__unpack(NULL, len, (unsigned char *)data); + if (!dmsg_resp) { + printf("Failed to read dataplane protobuf message\n"); + return false; + } + + /* now decap pb */ + SampleFeatOpResp *smsg_resp = + sample_feat_op_resp__unpack(NULL, + dmsg_resp->msg.len, + dmsg_resp->msg.data); + if (!smsg_resp) { + printf("unable to read protobuf message: %p, %d\n", data, len); + dataplane_envelope__free_unpacked(dmsg_resp, NULL); + return false; + } + + uint32_t received_val = smsg_resp->count; + + sample_feat_op_resp__free_unpacked(smsg_resp, NULL); + dataplane_envelope__free_unpacked(dmsg_resp, NULL); + return (received_val == expect_val); +} + +static void dp_test_pl_build_and_check_start_count(int init_pkt_cnt) +{ + SampleFeatOpReq sampleop = SAMPLE_FEAT_OP_REQ__INIT; + + int len = sample_feat_op_req__get_packed_size(&sampleop); + void *buf2 = malloc(len); + assert(buf2); + + sample_feat_op_req__pack(&sampleop, buf2); + DataplaneEnvelope msg = DATAPLANE_ENVELOPE__INIT; msg.type = strdup("sample:sample-feat"); msg.msg.data = buf2; @@ -52,56 +92,95 @@ dp_test_create_and_send_sample_feat_msg(bool enable, len = dataplane_envelope__get_packed_size(&msg); - buf = malloc(len); - dp_test_assert_internal(buf); + unsigned char *buf = malloc(len); + assert(buf); dataplane_envelope__pack(&msg, buf); - dp_test_send_config_src_pb(dp_test_cont_src_get(), buf, len); + uint32_t val = init_pkt_cnt + 1; + void *arg = &val; + dp_test_check_pb_state(buf, len, + validate_resp_callback, + arg); free(buf2); free(msg.type); free(buf); } +static bool get_start_count_callback(void *data, int len, void *arg) +{ + DataplaneEnvelope *dmsg_resp = + dataplane_envelope__unpack(NULL, len, (unsigned char *)data); + if (!dmsg_resp) { + printf("Failed to read dataplane protobuf message\n"); + return false; + } + + /* now decap pb */ + SampleFeatOpResp *smsg_resp = + sample_feat_op_resp__unpack(NULL, + dmsg_resp->msg.len, + dmsg_resp->msg.data); + if (!smsg_resp) { + printf("unable to read protobuf message: %p, %d\n", data, len); + dataplane_envelope__free_unpacked(dmsg_resp, NULL); + return false; + } + uint32_t received_val = smsg_resp->count; + + *(uint32_t *)arg = received_val; + + sample_feat_op_resp__free_unpacked(smsg_resp, NULL); + dataplane_envelope__free_unpacked(dmsg_resp, NULL); + + return true; +} static void dp_test_pl_get_start_count(int *ipv4_val_cnt) { - json_object *jvalue; - json_object *jresp; - char *response; - bool err; - *ipv4_val_cnt = INT_MAX; + int len; + void *buf; + + SampleFeatOpReq sampleop = SAMPLE_FEAT_OP_REQ__INIT; - response = dp_test_console_request_w_err( - "pipeline sample-feat show", &err, false); - if (!response || err) - return; + len = sample_feat_op_req__get_packed_size(&sampleop); + void *buf2 = malloc(len); + assert(buf2); + + sample_feat_op_req__pack(&sampleop, buf2); + + DataplaneEnvelope msg = DATAPLANE_ENVELOPE__INIT; + msg.type = strdup("sample:sample-feat"); + msg.msg.data = buf2; + msg.msg.len = len; + + len = dataplane_envelope__get_packed_size(&msg); + + buf = malloc(len); + assert(buf); - jresp = parse_json(response, NULL, 0); - free(response); + dataplane_envelope__pack(&msg, buf); - if (!jresp) - return; + dp_test_check_pb_state((char *)buf, len, + get_start_count_callback, + ipv4_val_cnt); - if (!json_object_object_get_ex(jresp, "sample-feat", &jvalue)) - return; + free(buf2); + free(msg.type); + free(buf); - dp_test_json_int_field_from_obj(jvalue, - "ipv4-validate-packet-count", - ipv4_val_cnt); - json_object_put(jresp); } + DP_START_TEST(dyn_feat, dyn_feat_ipv4) { const char *nh_mac_str = "aa:bb:cc:dd:2:b1"; struct dp_test_expected *exp; - json_object *expected_json; char real_ifname[IFNAMSIZ]; struct rte_mbuf *test_pak; - int init_pkt_cnt; + int init_pkt_cnt = 0; int len = 22; /* Setup interfaces and neighbours */ @@ -127,7 +206,7 @@ DP_START_TEST(dyn_feat, dyn_feat_ipv4) dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -135,23 +214,12 @@ DP_START_TEST(dyn_feat, dyn_feat_ipv4) dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str, dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); dp_test_pak_receive(test_pak, "dp1T0", exp); /* Verify that the feature saw the packet */ - expected_json = dp_test_json_create( - "{" - " \"sample-feat\":" - " { " - " \"ipv4-validate-packet-count\": %d," - " }" - "}", - init_pkt_cnt + 1); - dp_test_check_json_state("pipeline sample-feat show", - expected_json, - DP_TEST_JSON_CHECK_SUBSET, false); - json_object_put(expected_json); + dp_test_pl_build_and_check_start_count(init_pkt_cnt); /* Disable the feature and check it's gone */ dp_test_create_and_send_sample_feat_msg(false, @@ -168,3 +236,14 @@ DP_START_TEST(dyn_feat, dyn_feat_ipv4) dp_test_nl_del_ip_addr_and_connected("dp2T1", "2.2.2.2/24"); } DP_END_TEST; + +static const char *plugin_name = "dp_test_pipeline"; + +int dp_ut_plugin_init(const char **name) +{ + int rv = 0; + + *name = plugin_name; + + return rv; +} diff --git a/tests/common/src/dp_test_pktmbuf_lib.c b/tests/whole_dp/src/dp_test_pktmbuf_lib.c similarity index 93% rename from tests/common/src/dp_test_pktmbuf_lib.c rename to tests/whole_dp/src/dp_test_pktmbuf_lib.c index fc70c180..16e13a60 100644 --- a/tests/common/src/dp_test_pktmbuf_lib.c +++ b/tests/whole_dp/src/dp_test_pktmbuf_lib.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -23,11 +23,12 @@ #include #include #include +#include #include #include #include "compiler.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "in_cksum.h" #include "netinet6/ip6_funcs.h" @@ -180,12 +181,12 @@ dp_test_create_mbuf_chain(int n, const int *plen, uint16_t hlen) static void dp_test_pktmbuf_mac_set(struct rte_mbuf *m, const char *mac_str, bool src_mac) { - struct ether_addr *mac_field; - struct ether_hdr *eth; + struct rte_ether_addr *mac_field; + struct rte_ether_hdr *eth; assert(m); assert(mac_str); - eth = rte_pktmbuf_mtod(m, struct ether_hdr *); + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); if (src_mac) mac_field = ð->s_addr; else @@ -210,12 +211,12 @@ dp_test_pktmbuf_dmac_set(struct rte_mbuf *m, const char *dmac_str) static void dp_test_pktmbuf_mac_get(struct rte_mbuf *m, char *mac, bool src_mac) { - struct ether_addr *mac_field; - struct ether_hdr *eth; + struct rte_ether_addr *mac_field; + struct rte_ether_hdr *eth; assert(m); assert(mac); - eth = rte_pktmbuf_mtod(m, struct ether_hdr *); + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); if (src_mac) mac_field = ð->s_addr; else @@ -248,24 +249,24 @@ dp_test_pktmbuf_dmac_get(struct rte_mbuf *m, char *dmac_str) * * @return Pointer to eth header if successful, else NULL */ -static struct ether_hdr * +static struct rte_ether_hdr * dp_test_pktmbuf_eth(struct rte_mbuf *m, const char *d_addr, const char *s_addr, uint16_t ether_type, bool prepend) { - struct ether_hdr *eth; + struct rte_ether_hdr *eth; if (m->l2_len == 0 || prepend) { - m->l2_len = sizeof(struct ether_hdr); - eth = (struct ether_hdr *)rte_pktmbuf_prepend(m, m->l2_len); + m->l2_len = sizeof(struct rte_ether_hdr); + eth = (struct rte_ether_hdr *)rte_pktmbuf_prepend(m, m->l2_len); if (!eth) { DP_PKTMBUF_DBG("Failed to prepend eth header\n"); return NULL; } } else { - eth = rte_pktmbuf_mtod(m, struct ether_hdr *); + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); } eth->ether_type = htons(ether_type); @@ -282,14 +283,14 @@ dp_test_pktmbuf_eth(struct rte_mbuf *m, return eth; } -struct ether_hdr * +struct rte_ether_hdr * dp_test_pktmbuf_eth_init(struct rte_mbuf *m, const char *d_addr, const char *s_addr, uint16_t ether_type) { return dp_test_pktmbuf_eth(m, d_addr, s_addr, ether_type, false); } -struct ether_hdr * +struct rte_ether_hdr * dp_test_pktmbuf_eth_prepend(struct rte_mbuf *m, const char *d_addr, const char *s_addr, uint16_t ether_type) { @@ -308,24 +309,24 @@ void dp_test_pktmbuf_eth_hdr_replace(struct rte_mbuf *m_target, struct rte_mbuf *m_origin) { - struct ether_hdr *eth_target, *eth_origin; + struct rte_ether_hdr *eth_target, *eth_origin; assert(m_target); assert(m_origin); if (m_target->l2_len == 0) { - m_target->l2_len = sizeof(struct ether_hdr); - eth_target = (struct ether_hdr *) + m_target->l2_len = sizeof(struct rte_ether_hdr); + eth_target = (struct rte_ether_hdr *) rte_pktmbuf_prepend(m_target, m_target->l2_len); if (!eth_target) { DP_PKTMBUF_DBG("Failed to replace eth header\n"); return; } } - assert(m_origin->l2_len >= sizeof(struct ether_hdr)); - eth_target = rte_pktmbuf_mtod(m_target, struct ether_hdr *); - eth_origin = rte_pktmbuf_mtod(m_origin, struct ether_hdr *); - memcpy(eth_target, eth_origin, sizeof(struct ether_hdr)); - m_origin->l2_len = sizeof(struct ether_hdr); + assert(m_origin->l2_len >= sizeof(struct rte_ether_hdr)); + eth_target = rte_pktmbuf_mtod(m_target, struct rte_ether_hdr *); + eth_origin = rte_pktmbuf_mtod(m_origin, struct rte_ether_hdr *); + memcpy(eth_target, eth_origin, sizeof(struct rte_ether_hdr)); + m_origin->l2_len = sizeof(struct rte_ether_hdr); } /** @@ -359,7 +360,7 @@ dp_test_pktmbuf_ip(struct rte_mbuf *m, const char *src, const char *dst, m->l3_len = sizeof(struct iphdr); } else { m->l3_len = sizeof(struct iphdr); - ip = pktmbuf_mtol3(m, struct iphdr *); + ip = dp_pktmbuf_mtol3(m, struct iphdr *); } hlen = m->l2_len + sizeof(*ip); @@ -395,7 +396,7 @@ dp_test_pktmbuf_ip(struct rte_mbuf *m, const char *src, const char *dst, /* Set checksum */ ip->check = 0; - ip->check = rte_ipv4_cksum((const struct ipv4_hdr *)ip); + ip->check = rte_ipv4_cksum((const struct rte_ipv4_hdr *)ip); return ip; } @@ -446,7 +447,7 @@ dp_test_pktmbuf_ip6_init(struct rte_mbuf *m, return NULL; } - ip6 = pktmbuf_mtol3(m, struct ip6_hdr *); + ip6 = dp_pktmbuf_mtol3(m, struct ip6_hdr *); m->l3_len = sizeof(*ip6); plen = m->pkt_len - m->l2_len - m->l3_len; @@ -508,7 +509,7 @@ dp_test_ipv4_udptcp_cksum(const struct rte_mbuf *m, const struct iphdr *ip, cur_len += seg->data_len; } - return rte_ipv4_udptcp_cksum((const struct ipv4_hdr *)ip, buf); + return rte_ipv4_udptcp_cksum((const struct rte_ipv4_hdr *)ip, buf); } /** @@ -545,7 +546,7 @@ dp_test_ipv6_udptcp_cksum(const struct rte_mbuf *m, const struct ip6_hdr *ip6, cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff); /* checksum pseudo IPv6 header */ - cksum += rte_ipv6_phdr_cksum((const struct ipv6_hdr *)ip6, 0); + cksum += rte_ipv6_phdr_cksum((const struct rte_ipv6_hdr *)ip6, 0); cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff); cksum = (~cksum) & 0xffff; @@ -627,7 +628,7 @@ dp_test_ipv6_icmp_cksum(const struct rte_mbuf *m, const struct ip6_hdr *ip6, cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff); /* checksum pseudo IPv6 header */ - cksum += rte_ipv6_phdr_cksum((const struct ipv6_hdr *)ip6, 0); + cksum += rte_ipv6_phdr_cksum((const struct rte_ipv6_hdr *)ip6, 0); cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff); cksum = (~cksum) & 0xffff; @@ -637,16 +638,16 @@ dp_test_ipv6_icmp_cksum(const struct rte_mbuf *m, const struct ip6_hdr *ip6, return cksum; } -static struct vxlan_hdr * +static struct rte_vxlan_hdr * dp_test_pktmbuf_vxlan(struct rte_mbuf *m, uint32_t vx_flags, uint32_t vx_vni, bool prepend) { - struct vxlan_hdr *vxlan; + struct rte_vxlan_hdr *vxlan; uint16_t hlen; if (prepend) { - vxlan = (struct vxlan_hdr *) - rte_pktmbuf_prepend(m, sizeof(struct vxlan_hdr)); + vxlan = (struct rte_vxlan_hdr *) + rte_pktmbuf_prepend(m, sizeof(struct rte_vxlan_hdr)); if (!vxlan) { DP_PKTMBUF_DBG("Failed to prepend vxlan header\n"); return NULL; @@ -656,10 +657,10 @@ dp_test_pktmbuf_vxlan(struct rte_mbuf *m, uint32_t vx_flags, uint32_t vx_vni, m->l4_len = 0; } else { /* There is no _mtol5 function so calc l5 */ - vxlan = pktmbuf_mtol4(m, struct vxlan_hdr *) + 1; + vxlan = dp_pktmbuf_mtol4(m, struct rte_vxlan_hdr *) + 1; } - hlen = m->l2_len + m->l3_len + m->l4_len + sizeof(struct vxlan_hdr); + hlen = m->l2_len + m->l3_len + m->l4_len + sizeof(struct rte_vxlan_hdr); /* Is there room for VXLAN hdr in first mbuf? */ if (hlen > m->data_len) { @@ -674,14 +675,14 @@ dp_test_pktmbuf_vxlan(struct rte_mbuf *m, uint32_t vx_flags, uint32_t vx_vni, return vxlan; } -struct vxlan_hdr * +struct rte_vxlan_hdr * dp_test_pktmbuf_vxlan_init(struct rte_mbuf *m, uint32_t vx_flags, uint32_t vx_vni) { return dp_test_pktmbuf_vxlan(m, vx_flags, vx_vni, false); } -struct vxlan_hdr * +struct rte_vxlan_hdr * dp_test_pktmbuf_vxlan_prepend(struct rte_mbuf *m, uint32_t vx_flags, uint32_t vx_vni) { @@ -733,7 +734,7 @@ dp_test_pktmbuf_udp(struct rte_mbuf *m, uint16_t sport, uint16_t dport, return NULL; } - udp = pktmbuf_mtol4(m, struct udphdr *); + udp = dp_pktmbuf_mtol4(m, struct udphdr *); memset(udp, 0, sizeof(*udp)); udp->source = htons(sport); @@ -876,7 +877,7 @@ dp_test_pktmbuf_tcp_init(struct rte_mbuf *m, return NULL; } - tcp = pktmbuf_mtol4(m, struct tcphdr *); + tcp = dp_pktmbuf_mtol4(m, struct tcphdr *); memset(tcp, 0, l4_len); tcp->source = htons(sport); @@ -957,7 +958,7 @@ dp_test_pktmbuf_icmp_init(struct rte_mbuf *m, uint8_t icmp_type, du.udata32 = data; - icmp = pktmbuf_mtol4(m, struct icmphdr *); + icmp = dp_pktmbuf_mtol4(m, struct icmphdr *); memset(icmp, 0, sizeof(*icmp)); icmp->type = icmp_type; icmp->code = icmp_code; @@ -1009,8 +1010,8 @@ dp_test_pktmbuf_icmp6_init(struct rte_mbuf *m, uint8_t icmp6_type, return NULL; } - ip6 = pktmbuf_mtol3(m, struct ip6_hdr *); - icmp6 = pktmbuf_mtol4(m, struct icmp6_hdr *); + ip6 = dp_pktmbuf_mtol3(m, struct ip6_hdr *); + icmp6 = dp_pktmbuf_mtol4(m, struct icmp6_hdr *); memset(icmp6, 0, sizeof(*icmp6)); icmp6->icmp6_type = icmp6_type; icmp6->icmp6_code = icmp6_code; @@ -1094,7 +1095,7 @@ dp_test_pktmbuf_gre(struct rte_mbuf *m, uint16_t prot, uint32_t checksum, } else { assert(m->l2_len); assert(m->l3_len); - gre = pktmbuf_mtol4(m, struct gre_base_hdr *); + gre = dp_pktmbuf_mtol4(m, struct gre_base_hdr *); } @@ -1133,6 +1134,7 @@ dp_test_pktmbuf_gre(struct rte_mbuf *m, uint16_t prot, uint32_t checksum, *u32p = htonl(seq); cursor += sizeof(seq); } + (void) cursor; return gre; } @@ -1145,7 +1147,7 @@ dp_test_pktmbuf_erspan_init(struct rte_mbuf *m, uint16_t erspanid, if (hdr_type == ERSPAN_TYPEII) { char *hdr; struct erspan_type2_hdr *erspan; - hdr = pktmbuf_mtol4(m, char *) + sizeof(*erspan); + hdr = dp_pktmbuf_mtol4(m, char *) + sizeof(*erspan); erspan = (struct erspan_type2_hdr *)hdr; erspan->ver_sid = htonl((hdr_type << 28) | (vlan << 16) | @@ -1153,8 +1155,9 @@ dp_test_pktmbuf_erspan_init(struct rte_mbuf *m, uint16_t erspanid, erspanid); erspan->idx_dir = htonl(idx << 4 | dir); return erspan; - } else - return NULL; + } + + return NULL; } /** @@ -1289,10 +1292,10 @@ dp_test_create_l2_pak(const char *d_addr, */ switch (ether_type) { - case ETHER_TYPE_IPv4: + case RTE_ETHER_TYPE_IPV4: hlen = sizeof(struct iphdr); break; - case ETHER_TYPE_IPv6: + case RTE_ETHER_TYPE_IPV6: hlen = sizeof(struct ip6_hdr); break; default: @@ -1346,8 +1349,8 @@ dp_test_pktmbuf_vlan_clear(struct rte_mbuf *m) } struct ether_vlan_hdr { - struct ether_hdr eh; - struct vlan_hdr vh; + struct rte_ether_hdr eh; + struct rte_vlan_hdr vh; }; void @@ -1355,17 +1358,18 @@ dp_test_insert_8021q_hdr(struct rte_mbuf *pak, uint16_t vlan_id, uint16_t vlan_ether_type, uint16_t payload_ether_type) { - struct ether_hdr *eth = rte_pktmbuf_mtod(pak, struct ether_hdr *); + struct rte_ether_hdr *eth = + rte_pktmbuf_mtod(pak, struct rte_ether_hdr *); struct ether_vlan_hdr *vhdr = (struct ether_vlan_hdr *) - rte_pktmbuf_prepend(pak, sizeof(struct vlan_hdr)); + rte_pktmbuf_prepend(pak, sizeof(struct rte_vlan_hdr)); assert(vhdr != NULL); - memmove(&vhdr->eh, eth, 2 * ETHER_ADDR_LEN); + memmove(&vhdr->eh, eth, 2 * RTE_ETHER_ADDR_LEN); vhdr->eh.ether_type = htons(vlan_ether_type); vhdr->vh.vlan_tci = htons(vlan_id); vhdr->vh.eth_proto = htons(payload_ether_type); - pktmbuf_l2_len(pak) += sizeof(struct vlan_hdr); + dp_pktmbuf_l2_len(pak) += sizeof(struct rte_vlan_hdr); } struct rte_mbuf * @@ -1420,7 +1424,7 @@ dp_test_create_ipv4_pak(const char *saddr, const char *daddr, struct rte_mbuf * dp_test_create_raw_ipv4_pak(const char *saddr, const char *daddr, - uint8_t ipproto, int n, const int *len) + uint8_t protocol, int n, const int *len) { struct rte_mbuf *pak; struct iphdr *ip; @@ -1435,11 +1439,11 @@ dp_test_create_raw_ipv4_pak(const char *saddr, const char *daddr, /* * Init headers in first mbuf */ - if (!dp_test_pktmbuf_eth_init(pak, NULL, NULL, ETHER_TYPE_IPv4)) { + if (!dp_test_pktmbuf_eth_init(pak, NULL, NULL, RTE_ETHER_TYPE_IPV4)) { rte_pktmbuf_free(pak); return NULL; } - ip = dp_test_pktmbuf_ip_init(pak, saddr, daddr, ipproto); + ip = dp_test_pktmbuf_ip_init(pak, saddr, daddr, protocol); if (!ip) { rte_pktmbuf_free(pak); return NULL; @@ -1490,7 +1494,7 @@ dp_test_create_udp_ipv4_pak(const char *saddr, const char *daddr, /* * Init headers in first mbuf */ - if (!dp_test_pktmbuf_eth_init(pak, NULL, NULL, ETHER_TYPE_IPv4)) { + if (!dp_test_pktmbuf_eth_init(pak, NULL, NULL, RTE_ETHER_TYPE_IPV4)) { rte_pktmbuf_free(pak); return NULL; } @@ -1534,7 +1538,7 @@ dp_test_create_raw_ipv6_pak(const char *saddr, const char *daddr, if (!pak) return NULL; - if (!dp_test_pktmbuf_eth_init(pak, NULL, NULL, ETHER_TYPE_IPv6)) { + if (!dp_test_pktmbuf_eth_init(pak, NULL, NULL, RTE_ETHER_TYPE_IPV6)) { rte_pktmbuf_free(pak); return NULL; } @@ -1604,7 +1608,7 @@ dp_test_create_udp_ipv6_pak(const char *saddr, const char *daddr, if (!pak) return NULL; - if (!dp_test_pktmbuf_eth_init(pak, NULL, NULL, ETHER_TYPE_IPv6)) { + if (!dp_test_pktmbuf_eth_init(pak, NULL, NULL, RTE_ETHER_TYPE_IPV6)) { rte_pktmbuf_free(pak); return NULL; } @@ -1684,7 +1688,7 @@ dp_test_create_tcp_ipv4_pak(const char *saddr, const char *daddr, if (!pak) return NULL; - if (!dp_test_pktmbuf_eth_init(pak, NULL, NULL, ETHER_TYPE_IPv4)) { + if (!dp_test_pktmbuf_eth_init(pak, NULL, NULL, RTE_ETHER_TYPE_IPV4)) { rte_pktmbuf_free(pak); return NULL; } @@ -1752,7 +1756,7 @@ dp_test_create_tcp_ipv6_pak(const char *saddr, const char *daddr, if (!pak) return NULL; - if (!dp_test_pktmbuf_eth_init(pak, NULL, NULL, ETHER_TYPE_IPv6)) { + if (!dp_test_pktmbuf_eth_init(pak, NULL, NULL, RTE_ETHER_TYPE_IPV6)) { rte_pktmbuf_free(pak); return NULL; } @@ -1822,7 +1826,7 @@ dp_test_create_icmp_ipv4_pak(const char *saddr, const char *daddr, if (!pak) return NULL; - if (!dp_test_pktmbuf_eth_init(pak, NULL, NULL, ETHER_TYPE_IPv4)) { + if (!dp_test_pktmbuf_eth_init(pak, NULL, NULL, RTE_ETHER_TYPE_IPV4)) { rte_pktmbuf_free(pak); return NULL; } @@ -1839,7 +1843,8 @@ dp_test_create_icmp_ipv4_pak(const char *saddr, const char *daddr, uint32_t plen = pak->pkt_len - poff; if (payload) { - memcpy(pktmbuf_mtol4(pak, struct icmphdr *) + 1, payload, plen); + memcpy(dp_pktmbuf_mtol4(pak, struct icmphdr *) + 1, + payload, plen); } else { /* Write test pattern to mbuf payload */ if (dp_test_pktmbuf_payload_init(pak, poff, NULL, plen) == 0) { @@ -1894,7 +1899,7 @@ dp_test_create_icmp_ipv6_pak(const char *saddr, const char *daddr, if (!pak) return NULL; - if (!dp_test_pktmbuf_eth_init(pak, NULL, NULL, ETHER_TYPE_IPv6)) { + if (!dp_test_pktmbuf_eth_init(pak, NULL, NULL, RTE_ETHER_TYPE_IPV6)) { rte_pktmbuf_free(pak); return NULL; } @@ -1911,8 +1916,8 @@ dp_test_create_icmp_ipv6_pak(const char *saddr, const char *daddr, uint32_t plen = pak->pkt_len - poff; if (payload) { - memcpy(pktmbuf_mtol4(pak, struct icmp6_hdr *) + 1, payload, - plen); + memcpy(dp_pktmbuf_mtol4(pak, struct icmp6_hdr *) + 1, + payload, plen); } else { /* Write test pattern to mbuf payload */ if (dp_test_pktmbuf_payload_init(pak, poff, NULL, plen) == 0) { @@ -1946,7 +1951,7 @@ dp_test_create_gre_pak(uint16_t ethertype, const char *saddr, const char *daddr, uint16_t hlen; /* Create mbuf chain */ - if (ethertype == ETHER_TYPE_IPv4) + if (ethertype == RTE_ETHER_TYPE_IPV4) hlen = sizeof(*ip) + sizeof(*gre); else hlen = sizeof(*ip6) + sizeof(*gre); @@ -1958,7 +1963,7 @@ dp_test_create_gre_pak(uint16_t ethertype, const char *saddr, const char *daddr, rte_pktmbuf_free(pak); return NULL; } - if (ethertype == ETHER_TYPE_IPv4) { + if (ethertype == RTE_ETHER_TYPE_IPV4) { ip = dp_test_pktmbuf_ip_init(pak, saddr, daddr, IPPROTO_GRE); if (!ip) { rte_pktmbuf_free(pak); @@ -2014,7 +2019,7 @@ dp_test_create_gre_ipv4_pak(const char *saddr, const char *daddr, uint32_t gre_key, uint32_t gre_seq, void **payload) { - return dp_test_create_gre_pak(ETHER_TYPE_IPv4, saddr, daddr, n, + return dp_test_create_gre_pak(RTE_ETHER_TYPE_IPV4, saddr, daddr, n, len, gre_prot, gre_key, gre_seq, payload); } @@ -2039,7 +2044,7 @@ dp_test_create_gre_ipv6_pak(const char *saddr, const char *daddr, uint32_t gre_key, uint32_t gre_seq, void **payload) { - return dp_test_create_gre_pak(ETHER_TYPE_IPv6, saddr, daddr, n, + return dp_test_create_gre_pak(RTE_ETHER_TYPE_IPV6, saddr, daddr, n, len, gre_prot, gre_key, gre_seq, payload); } @@ -2093,7 +2098,7 @@ dp_test_create_gre_pptp_ipv4_pak(const char *saddr, const char *daddr, if (!m) return NULL; - if (!dp_test_pktmbuf_eth_init(m, NULL, NULL, ETHER_TYPE_IPv4)) { + if (!dp_test_pktmbuf_eth_init(m, NULL, NULL, RTE_ETHER_TYPE_IPV4)) { rte_pktmbuf_free(m); return NULL; } @@ -2111,7 +2116,7 @@ dp_test_create_gre_pptp_ipv4_pak(const char *saddr, const char *daddr, * Init the bits and flags. We only set the key bit, * ack bit (if present) and version to 1. */ - gre = pktmbuf_mtol4(m, struct gre_base_hdr *); + gre = dp_pktmbuf_mtol4(m, struct gre_base_hdr *); memset(gre, 0, gre_hlen); flags = GRE_FLAG_KEY | GRE_FLAG_VER_1; @@ -2157,7 +2162,7 @@ dp_test_create_gre_pptp_ipv4_pak(const char *saddr, const char *daddr, } if (payload) - *payload = pktmbuf_mtol4(m, char *) + hlen + 1; + *payload = dp_pktmbuf_mtol4(m, char *) + hlen + 1; return m; } @@ -2198,7 +2203,7 @@ dp_test_create_erspan_ipv4_pak(const char *saddr, const char *daddr, if (!pak) return NULL; - if (!dp_test_pktmbuf_eth_init(pak, NULL, NULL, ETHER_TYPE_IPv4)) { + if (!dp_test_pktmbuf_eth_init(pak, NULL, NULL, RTE_ETHER_TYPE_IPV4)) { rte_pktmbuf_free(pak); return NULL; } @@ -2257,7 +2262,7 @@ dp_test_create_erspan_ipv4_pak(const char *saddr, const char *daddr, */ struct rte_mbuf * _dp_test_create_mpls_pak(uint8_t nlabels, - label_t *labels, uint8_t mpls_ttls[], + const label_t *labels, const uint8_t mpls_ttls[], const struct rte_mbuf *payload) { struct rte_mbuf *pak; @@ -2274,7 +2279,7 @@ _dp_test_create_mpls_pak(uint8_t nlabels, /* * Init L2 */ - if (!dp_test_pktmbuf_eth_init(pak, NULL, NULL, ETHER_TYPE_MPLS)) { + if (!dp_test_pktmbuf_eth_init(pak, NULL, NULL, RTE_ETHER_TYPE_MPLS)) { rte_pktmbuf_free(pak); return NULL; } @@ -2282,7 +2287,7 @@ _dp_test_create_mpls_pak(uint8_t nlabels, /* * Init mpls head */ - lbl_stack = pktmbuf_mtol3(pak, label_t *); + lbl_stack = dp_pktmbuf_mtol3(pak, label_t *); for (i = 0; i < nlabels; i++) *lbl_stack++ = htonl(labels[i] << MPLS_LS_LABEL_SHIFT | @@ -2297,7 +2302,7 @@ _dp_test_create_mpls_pak(uint8_t nlabels, * set the data len to the end of the mpls header */ rte_pktmbuf_data_len(pak) = - sizeof(struct ether_hdr) + (nlabels * sizeof(label_t)); + sizeof(struct rte_ether_hdr) + (nlabels * sizeof(label_t)); pak->l2_len = rte_pktmbuf_data_len(pak); pak->pkt_len = rte_pktmbuf_data_len(pak); @@ -2311,8 +2316,8 @@ _dp_test_create_mpls_pak(uint8_t nlabels, /* * Copy l3 from payload into our l3 */ - copy_from = pktmbuf_mtol3(payload, struct iphdr *); - ip = pktmbuf_mtol3(pak, struct iphdr *); + copy_from = dp_pktmbuf_mtol3(payload, struct iphdr *); + ip = dp_pktmbuf_mtol3(pak, struct iphdr *); memcpy(ip, copy_from, rte_pktmbuf_data_len(payload) - payload->l2_len); pak->l3_len = sizeof(struct iphdr); @@ -2324,7 +2329,7 @@ _dp_test_create_mpls_pak(uint8_t nlabels, * l2 header to be end of the ethernet header as that * is what the dplane expects when receiving a packet. */ - pak->l2_len = sizeof(struct ether_hdr); + pak->l2_len = sizeof(struct rte_ether_hdr); } return pak; } @@ -2334,7 +2339,7 @@ dp_test_get_mpls_pak_payload(const struct rte_mbuf *m) { label_t *lbl_stack; - lbl_stack = pktmbuf_mtol3(m, label_t *); + lbl_stack = dp_pktmbuf_mtol3(m, label_t *); for (; ; lbl_stack++) { if (ntohl(*lbl_stack) & (1 << MPLS_LS_S_SHIFT)) { @@ -2483,7 +2488,7 @@ dp_test_ipv6_scan_non_frag_hdrs(struct rte_mbuf *m, uint8_t *l3_len, uint8_t *n_ptr; uint8_t nxt, len; - ip6 = pktmbuf_mtol3(m, struct ip6_hdr *); + ip6 = dp_pktmbuf_mtol3(m, struct ip6_hdr *); *l3_len = sizeof(*ip6); n_ptr = (uint8_t *)ip6 + sizeof(*ip6); *last_pcol = ip6->ip6_nxt; @@ -2523,14 +2528,14 @@ dp_test_fill_ipv6hdr_frag(struct rte_mbuf *pkt_in, struct rte_mbuf *pkt_out, uint16_t plen, uint16_t fofs, uint32_t mf, uint32_t fh_id) { - struct ipv6_hdr *dst, *src; + struct rte_ipv6_hdr *dst, *src; struct ip6_frag *fh; uint16_t offlg; int l3_len; uint8_t nxt_proto; - src = pktmbuf_mtol3(pkt_in, struct ipv6_hdr *); - dst = pktmbuf_mtol3(pkt_out, struct ipv6_hdr *); + src = dp_pktmbuf_mtol3(pkt_in, struct rte_ipv6_hdr *); + dst = dp_pktmbuf_mtol3(pkt_out, struct rte_ipv6_hdr *); l3_len = pkt_in->l3_len; /* @@ -2538,7 +2543,7 @@ dp_test_fill_ipv6hdr_frag(struct rte_mbuf *pkt_in, struct rte_mbuf *pkt_out, * ext. headers and the fragmentation header itself, * but not the IPv6 header. */ - plen += l3_len + sizeof(struct ip6_frag) - sizeof(struct ipv6_hdr); + plen += l3_len + sizeof(struct ip6_frag) - sizeof(struct rte_ipv6_hdr); memcpy(dst, src, l3_len); dst->payload_len = htons(plen); @@ -2586,11 +2591,11 @@ dp_test_ipv6_fragment_packet(struct rte_mbuf *pkt_in, uint32_t pkt_len; char *buf; - if (pkt_in->l3_len < sizeof(struct ipv6_hdr)) + if (pkt_in->l3_len < sizeof(struct rte_ipv6_hdr)) return -EINVAL; /* Check header length matches mbuf pkt_len */ - ip6 = pktmbuf_mtol3(pkt_in, struct ip6_hdr *); + ip6 = dp_pktmbuf_mtol3(pkt_in, struct ip6_hdr *); pkt_len = ntohs(ip6->ip6_plen) + pkt_in->l2_len + sizeof(struct ip6_hdr); if (pkt_len != pkt_in->pkt_len) @@ -2727,7 +2732,7 @@ dp_test_ipv6_append_non_frag_ext_hdr(struct rte_mbuf *m, if (frag_l3_len != m->l3_len) return NULL; - ip6 = pktmbuf_mtol3(m, struct ip6_hdr *); + ip6 = dp_pktmbuf_mtol3(m, struct ip6_hdr *); /* Insert space between last ext hdr and l4 hdr */ new_ext = (struct ip6_ext *)dp_test_pktmbuf_insert(m, @@ -2776,7 +2781,7 @@ dp_test_ipv4_fragment_packet(struct rte_mbuf *pkt_in, return -EINVAL; /* Check header length matches mbuf pkt_len */ - ip = pktmbuf_mtol3(pkt_in, struct iphdr *); + ip = dp_pktmbuf_mtol3(pkt_in, struct iphdr *); pkt_len = ntohs(ip->tot_len) + pkt_in->l2_len; if (pkt_len != pkt_in->pkt_len) @@ -2838,7 +2843,7 @@ dp_test_ipv4_fragment_packet(struct rte_mbuf *pkt_in, struct iphdr *src_ip, *dst_ip; /* Append space for IP header, and copy */ - src_ip = pktmbuf_mtol3(pkt_in, struct iphdr *); + src_ip = dp_pktmbuf_mtol3(pkt_in, struct iphdr *); dst_ip = (struct iphdr *)rte_pktmbuf_append(out_pkt, pkt_in->l3_len); memcpy(dst_ip, src_ip, pkt_in->l3_len); @@ -2855,7 +2860,8 @@ dp_test_ipv4_fragment_packet(struct rte_mbuf *pkt_in, dst_ip->frag_off |= htons(IP_MF); dst_ip->tot_len = htons(plen + out_pkt->l3_len); dst_ip->check = 0; - dst_ip->check = rte_ipv4_cksum((const struct ipv4_hdr *)dst_ip); + dst_ip->check = + rte_ipv4_cksum((const struct rte_ipv4_hdr *) dst_ip); /* Append space to fragment, and write fragment payload */ memcpy(rte_pktmbuf_append(out_pkt, plen), payload + offset, @@ -2971,15 +2977,15 @@ dp_test_get_pak_eth_ip_field(const struct rte_mbuf *m, enum dp_test_pak_field_ip field, struct dp_test_addr *ip_addr, uint32_t *val) { - struct ether_hdr *eth; + struct rte_ether_hdr *eth; struct ip6_hdr *ip6h; struct iphdr *iph; - assert(m->l2_len == sizeof(struct ether_hdr)); - eth = rte_pktmbuf_mtod(m, struct ether_hdr *); + assert(m->l2_len == sizeof(struct rte_ether_hdr)); + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); switch (ntohs(eth->ether_type)) { - case ETHER_TYPE_IPv4: + case RTE_ETHER_TYPE_IPV4: assert(m->l3_len >= sizeof(struct iphdr)); iph = iphdr(m); assert(m->l3_len == iph->ihl * 4); @@ -3002,7 +3008,7 @@ dp_test_get_pak_eth_ip_field(const struct rte_mbuf *m, assert(false); } break; - case ETHER_TYPE_IPv6: + case RTE_ETHER_TYPE_IPV6: assert(m->l3_len >= sizeof(struct ip6_hdr)); ip6h = (struct ip6_hdr *)((uintptr_t)eth + sizeof(*eth)); switch (field) { @@ -3087,7 +3093,7 @@ dp_test_ipv6_decrement_ttl(struct rte_mbuf *m) if (!m) return; - ip6 = pktmbuf_mtol3(m, struct ip6_hdr *); + ip6 = dp_pktmbuf_mtol3(m, struct ip6_hdr *); ip6->ip6_hlim = ip6->ip6_hlim - 1; } @@ -3124,7 +3130,7 @@ dp_test_create_l2_pak_from_data(const char *d_addr, const char *s_addr, uint16_t ether_type, char *data, int len) { struct rte_mbuf *m; - struct ether_hdr *eth; + struct rte_ether_hdr *eth; m = dp_test_create_mbuf_chain(1, &len, 0); if (!m) @@ -3138,7 +3144,7 @@ dp_test_create_l2_pak_from_data(const char *d_addr, const char *s_addr, m->l2_len = 14; - eth = rte_pktmbuf_mtod(m, struct ether_hdr *); + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); /* Optionally overwrite ethernet header */ if (d_addr && s_addr) { @@ -3154,13 +3160,13 @@ dp_test_create_l2_pak_from_data(const char *d_addr, const char *s_addr, * then the IP header checksum will be zero. Set it here regardless. */ - if (eth->ether_type == htons(ETHER_TYPE_IPv4)) { + if (eth->ether_type == htons(RTE_ETHER_TYPE_IPV4)) { /* Set IP checksum (its zero in hex stream from Wireshark) */ struct iphdr *ip; ip = (struct iphdr *)(rte_pktmbuf_mtod(m, char *) + m->l2_len); ip->check = 0; - ip->check = rte_ipv4_cksum((const struct ipv4_hdr *)ip); + ip->check = rte_ipv4_cksum((const struct rte_ipv4_hdr *)ip); } return m; @@ -3474,12 +3480,12 @@ bool dp_test_mbuf_is_ipv6(struct rte_mbuf *m) static uint16_t dp_test_mbuf_ethertype(struct rte_mbuf *pak) { - struct ether_hdr *eth; + struct rte_ether_hdr *eth; - if (pak->l2_len < sizeof(struct ether_hdr)) + if (pak->l2_len < sizeof(struct rte_ether_hdr)) return 0; - eth = rte_pktmbuf_mtod(pak, struct ether_hdr *); + eth = rte_pktmbuf_mtod(pak, struct rte_ether_hdr *); return ntohs(eth->ether_type); } @@ -3489,8 +3495,8 @@ dp_test_mbuf_ethertype_is_ip(struct rte_mbuf *m) { uint16_t ether_type = dp_test_mbuf_ethertype(m); - return (ether_type == ETHER_TYPE_IPv4) || - (ether_type == ETHER_TYPE_IPv6); + return (ether_type == RTE_ETHER_TYPE_IPV4) || + (ether_type == RTE_ETHER_TYPE_IPV6); } bool @@ -3498,16 +3504,16 @@ dp_test_mbuf_ethertype_is_mpls(struct rte_mbuf *m) { uint16_t ether_type = dp_test_mbuf_ethertype(m); - return (ether_type == ETHER_TYPE_MPLS); + return (ether_type == RTE_ETHER_TYPE_MPLS); } struct ether_arp { struct arphdr ea_hdr; /* fixed-size header */ - struct ether_addr arp_sha; /* sender hardware address */ + struct rte_ether_addr arp_sha; /* sender hardware address */ in_addr_t arp_spa; /* sender protocol address */ - struct ether_addr arp_tha; /* target hardware address */ + struct rte_ether_addr arp_tha; /* target hardware address */ in_addr_t arp_tpa; /* target protocol address */ -} __attribute__ ((__packed__)); +} __attribute__ ((__packed__)) __attribute__((aligned(2))); /* * sha: Sender Hardware Address @@ -3522,7 +3528,7 @@ dp_test_create_arp_pak(ushort op, const char *s_mac, const char *d_mac, uint16_t vlan_id) { struct rte_mbuf *pak; - struct ether_hdr *eth; + struct rte_ether_hdr *eth; struct ether_arp *arp; in_addr_t ipaddr; int len = 0; @@ -3531,7 +3537,7 @@ dp_test_create_arp_pak(ushort op, const char *s_mac, const char *d_mac, if (!pak) return NULL; - eth = dp_test_pktmbuf_eth_init(pak, d_mac, s_mac, ETHER_TYPE_ARP); + eth = dp_test_pktmbuf_eth_init(pak, d_mac, s_mac, RTE_ETHER_TYPE_ARP); if (!eth) { rte_pktmbuf_free(pak); return NULL; @@ -3541,8 +3547,8 @@ dp_test_create_arp_pak(ushort op, const char *s_mac, const char *d_mac, arp = (struct ether_arp *) (eth+1); arp->ea_hdr.ar_hrd = htons(ARPHRD_ETHER); - arp->ea_hdr.ar_pro = htons(ETHER_TYPE_IPv4); - arp->ea_hdr.ar_hln = ETHER_ADDR_LEN; + arp->ea_hdr.ar_pro = htons(RTE_ETHER_TYPE_IPV4); + arp->ea_hdr.ar_hln = RTE_ETHER_ADDR_LEN; arp->ea_hdr.ar_pln = sizeof(in_addr_t); arp->ea_hdr.ar_op = htons(op); if (ether_aton_r(sha, &arp->arp_sha) == NULL) diff --git a/tests/common/inc/dp_test_pktmbuf_lib.h b/tests/whole_dp/src/dp_test_pktmbuf_lib_internal.h similarity index 76% rename from tests/common/inc/dp_test_pktmbuf_lib.h rename to tests/whole_dp/src/dp_test_pktmbuf_lib_internal.h index 30d67ac0..c4558976 100644 --- a/tests/common/inc/dp_test_pktmbuf_lib.h +++ b/tests/whole_dp/src/dp_test_pktmbuf_lib_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2011-2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -7,8 +7,8 @@ * * Library of functions for packet creation and handling */ -#ifndef __DP_PKTMBUF_LIB_H__ -#define __DP_PKTMBUF_LIB_H__ +#ifndef __DP_PKTMBUF_LIB_INTERNAL_H__ +#define __DP_PKTMBUF_LIB_INTERNAL_H__ #include #include @@ -22,6 +22,7 @@ #include #include #include +#include "dp_test/dp_test_pktmbuf_lib.h" /* * TCP flags not defined in netinet/tcp.h @@ -34,15 +35,6 @@ #define TH_ECE 0x40 #endif -struct dp_test_addr { - int family; - union { - in_addr_t ipv4; - struct in6_addr ipv6; - uint32_t mpls; - } addr; -}; - struct gre_base_hdr { uint16_t flags; uint16_t protocol; @@ -55,7 +47,7 @@ struct erspan_type2_hdr { uint32_t idx_dir; }; -#define ETHER_TYPE_MPLS 0x8847 +#define RTE_ETHER_TYPE_MPLS 0x8847 typedef uint32_t label_t; #ifndef MPLS_LS_LABEL_SHIFT # define MPLS_LS_LABEL_SHIFT 12 @@ -150,30 +142,8 @@ dp_test_pktmbuf_dmac_set(struct rte_mbuf *m, const char *mac_str); void dp_test_pktmbuf_smac_set(struct rte_mbuf *m, const char *mac_str); -/** - * Initialize ethernet hdr. If l2_len is 0, prepend 14 bytes and set - * m->l2_len to 14. - * - * @param m [in] Pointer to packet mbuf - * @param d_addr [in] Dest mac string, e.g. "aa:bb:cc:dd:ee:ff" - * @param s_addr [in] Source mac string - * @param ether_type [in] Ethernet type (host order), may be 0 - * - * @return Pointer to eth header if successful, else NULL - * - * To just check and set the mbuf l2_len: - * (void)dp_test_pktmbuf_eth_init(m, NULL, NULL, 0); - * - * To just check and set the mbuf l2_len and ether type: - * (void)dp_test_pktmbuf_eth_init(m, NULL, NULL, ETHER_TYPE_IPv4); - */ -struct ether_hdr * -dp_test_pktmbuf_eth_init(struct rte_mbuf *m, - const char *d_addr, - const char *s_addr, - uint16_t ether_type); -struct ether_hdr * +struct rte_ether_hdr * dp_test_pktmbuf_eth_prepend(struct rte_mbuf *m, const char *d_addr, const char *s_addr, uint16_t ether_type); @@ -229,40 +199,6 @@ dp_test_pktmbuf_ip6_init(struct rte_mbuf *m, const char *dst, uint8_t protocol); -/** - * Calculate IPv4 UDP or TCP checksum. - * - * The IPv4 header should not contains options. The layer 4 checksum - * must be set to 0 in the packet by the caller. The l4 header must be - * in the first mbuf. - * - * @param m [in] Pointer to the mbuf chain - * @param ip [in] Pointer to the contiguous IP header. - * @param l4_hdr [in] Pointer to the beginning of the L4 header - * @return - * The complemented checksum to set in the IPv4 UDP/TCP header - */ -uint16_t -dp_test_ipv4_udptcp_cksum(const struct rte_mbuf *m, const struct iphdr *ip, - void *l4_hdr); - -/** - * Calculate IPv6 UDP or TCP checksum. - * - * The layer 4 checksum must be set to 0 in the packet by the caller. - * - * @param ip6 - * The pointer to the contiguous IPv6 header. - * @param l4_hdr - * The pointer to the beginning of the L4 header (must be in first mbuf). - * @return - * The complemented checksum to set in the IPv6 UDP/TCP header - */ -uint16_t -dp_test_ipv6_udptcp_cksum(const struct rte_mbuf *m, - const struct ip6_hdr *ip6, - const void *l4_hdr); - /** * Calculate IPv4 ICMP checksum. * @@ -419,11 +355,11 @@ dp_test_pktmbuf_gre_prepend(struct rte_mbuf *m, uint16_t prot, uint32_t key); void dp_test_pktmbuf_gre_adj(struct rte_mbuf *m); -struct vxlan_hdr * +struct rte_vxlan_hdr * dp_test_pktmbuf_vxlan_init(struct rte_mbuf *m, uint32_t vx_flags, uint32_t vx_vni); -struct vxlan_hdr * +struct rte_vxlan_hdr * dp_test_pktmbuf_vxlan_prepend(struct rte_mbuf *m, uint32_t vx_flags, uint32_t vx_vni); @@ -520,156 +456,6 @@ dp_test_insert_8021q_hdr(struct rte_mbuf *pak, uint16_t vlan_id, uint16_t vlan_ether_type, uint16_t payload_ether_type); -/** - * Create and initialise a UDP IPv4 packet - * - * @param saddr [in] Source address string, e.g. "10.0.1.0" - * @param daddr [in] Dest address string - * @param n [in] Number of mbufs - * @param len [in] Array of per-mbuf payload lengths - * - * @return pak Pointer to mbuf if successful, else NULL - */ -struct rte_mbuf * -dp_test_create_ipv4_pak(const char *saddr, const char *daddr, - int n, const int *len); - -/** - * Create and initialise a raw IPv4 packet with the given protocol. - * - * @param saddr [in] Source address string, e.g. "10.0.1.0" - * @param daddr [in] Dest address string - * @param protocol [in] Protocol, e.g. IPPROTO_UDP - * @param n [in] Number of mbufs - * @param len [in] Array of 'n' per-mbuf payload lengths - * - * @return pak Pointer to mbuf if successful, else NULL - */ -struct rte_mbuf * -dp_test_create_raw_ipv4_pak(const char *saddr, const char *daddr, - uint8_t protocol, int n, const int *len); - -/** - * Create and initialise an IPv4 UDP packet - * - * @param saddr [in] Source address string, e.g. "10.0.1.0" - * @param daddr [in] Dest address string - * @param sport [in] UDP source port - * @param dport [in] UDP dest port - * @param n [in] Number of mbufs - * @param len [in] Array of per-mbuf payload lengths - * - * @return pak Pointer to mbuf if successful, else NULL - */ -struct rte_mbuf * -dp_test_create_udp_ipv4_pak(const char *saddr, const char *daddr, - uint16_t sport, uint16_t dport, - int n, const int *len); - -/** - * Create and initialise an IPv6 packet with no protocol. - * - * @param saddr [in] Source address string, e.g. "2001:101:8::1" - * @param daddr [in] Dest address string - * @param n [in] Number of mbufs - * @param len [in] Array of 'n' per-mbuf payload lengths - * - * @return pak Pointer to mbuf if successful, else NULL - */ -struct rte_mbuf * -dp_test_create_ipv6_pak(const char *saddr, const char *daddr, - int n, const int *len); - -/** - * Create and initialise a raw IPv6 packet with the given protocol. - * - * @param saddr [in] Source address string, e.g. "2001:101:8::1" - * @param daddr [in] Dest address string - * @param protocol [in] Protocol, e.g. IPPROTO_UDP - * @param n [in] Number of mbufs - * @param len [in] Array of 'n' per-mbuf payload lengths - * - * @return pak Pointer to mbuf if successful, else NULL - */ -struct rte_mbuf * -dp_test_create_raw_ipv6_pak(const char *saddr, const char *daddr, - uint8_t protocol, int n, const int *len); - -/** - * Create and initialise an IPv6 UDP packet - * - * @param saddr [in] Source address string, e.g. "2001:101:8::1" - * @param daddr [in] Dest address string - * @param sport [in] UDP source port (host order) - * @param dport [in] UDP dest port - * @param n [in] Number of mbufs - * @param len [in] Array of 'n' per-mbuf payload lengths - * - * @return pak Pointer to mbuf if successful, else NULL - */ -struct rte_mbuf * -dp_test_create_udp_ipv6_pak(const char *saddr, const char *daddr, - uint16_t sport, uint16_t dport, - int n, const int *len); -/** - * Create and initialise an IPv4 TCP packet - * - * @param saddr [in] Source address string, e.g. "10.0.1.0" - * @param daddr [in] Dest address string - * @param sport [in] TCP source port - * @param dport [in] TCP dest port - * @param flags [in] TCP header flags - * @param seq [in] TCP sequence number - * @param ack [in] TCP acknowledgment number - * @param win [in] TCP window value (host order) - * @param opts [in] Byte array of TCP options. See below. - * @param n [in] Number of mbufs - * @param len [in] Array of 'n' per-mbuf payload lengths - * - * @return pak Pointer to mbuf if successful, else NULL - * - * TCP options - type (1 byte), length (1 byte), value (length-2 bytes), e.g. - * - * uint8_t opts[] = { - * 2, 4, 0x18, 0x02, - * 1, - * 3, 3, 1, - * 0 - * }; - * - * The options list is terminated when 'type' is 0 (EOL). Note that when - * 'type' is 1 (NOP) then there is no length value. This is commonly used to - * separate options in a header. - */ -struct rte_mbuf * -dp_test_create_tcp_ipv4_pak(const char *saddr, const char *daddr, - uint16_t sport, uint16_t dport, uint8_t flags, - uint32_t seq, uint32_t ack, uint16_t win, - const uint8_t *opts, int n, const int *len); - -/** - * Create and initialise an IPv6 TCP packet - * - * @param saddr [in] Source address string, e.g. "2001:101:8::1" - * @param daddr [in] Dest address string - * @param sport [in] TCP source port (host order) - * @param dport [in] TCP dest port - * @param flags [in] TCP header flags - * @param seq [in] TCP sequence number - * @param ack [in] TCP acknowledgment number - * @param win [in] TCP window value (host order) - * @param opts [in] Byte array of TCP options. See above. - * @param n [in] Number of mbufs - * @param len [in] Array of 'n' per-mbuf payload lengths - * - * @return pak Pointer to mbuf if successful, else NULL - */ -struct rte_mbuf * -dp_test_create_tcp_ipv6_pak(const char *saddr, const char *daddr, - uint16_t sport, uint16_t dport, uint8_t flags, - uint32_t seq, uint32_t ack, uint16_t win, - const uint8_t *opts, int n, const int *len); - /** * Create and initialise an IPv4 ICMP packet * @@ -817,7 +603,7 @@ dp_test_create_erspan_ipv4_pak(const char *saddr, const char *daddr, struct rte_mbuf * _dp_test_create_mpls_pak(uint8_t nlabels, - label_t *labels, uint8_t mpls_ttl[], + const label_t *labels, const uint8_t mpls_ttl[], const struct rte_mbuf *payload); /** @@ -973,23 +759,6 @@ struct ip6_ext * dp_test_ipv6_append_non_frag_ext_hdr(struct rte_mbuf *m, uint8_t proto, uint16_t len); -/* - * API to allow us to set a given field within the ip header in a buffer. - */ -enum dp_test_pak_field_ { - DP_TEST_SET_VERSION, - DP_TEST_SET_SRC_ADDR_IPV4, - DP_TEST_SET_DST_ADDR_IPV4, - DP_TEST_SET_IP_ECN, - DP_TEST_SET_DF, - DP_TEST_SET_FRAG_MORE, - DP_TEST_SET_FRAG_OFFSET, - DP_TEST_SET_TOS, - DP_TEST_SET_PROTOCOL, - DP_TEST_SET_TTL, - DP_TEST_SET_IP_ID, -}; - /* * API to allow us to get a given field within the ip / ip6 header in a buffer. */ @@ -999,14 +768,6 @@ enum dp_test_pak_field_ip { DP_TEST_GET_TOS, DP_TEST_GET_PROTOCOL, }; -void -dp_test_set_pak_ip_field(struct iphdr *ip, - enum dp_test_pak_field_ field, - uint32_t val); -void -dp_test_set_pak_ip6_field(struct ip6_hdr *ip, - enum dp_test_pak_field_ field, - uint32_t val); void dp_test_get_pak_eth_ip_field_addr(const struct rte_mbuf *m, @@ -1023,12 +784,6 @@ dp_test_get_pak_eth_ip_field_u32(const struct rte_mbuf *m, enum dp_test_pak_field_ip field, uint32_t *val); -void -dp_test_ipv4_decrement_ttl(struct rte_mbuf *m); - -void -dp_test_ipv6_decrement_ttl(struct rte_mbuf *m); - void dp_test_ipv4_remark_tos(struct rte_mbuf *m, unsigned char tos); @@ -1105,4 +860,4 @@ dp_test_create_arp_pak(ushort op, const char *s_mac, const char *d_mac, const char *spa_addr, const char *tpa_addr, uint16_t vlan_id); -#endif /* __DP_PKTMBUF_LIB_H__ */ +#endif /* __DP_PKTMBUF_LIB_INTERNAL_H__ */ diff --git a/tests/whole_dp/src/dp_test_poe_cmds.c b/tests/whole_dp/src/dp_test_poe_cmds.c index 297afe69..1985fa00 100644 --- a/tests/whole_dp/src/dp_test_poe_cmds.c +++ b/tests/whole_dp/src/dp_test_poe_cmds.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. + * Copyright (c) 2018-2020, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -8,10 +8,10 @@ #include #include "dp_test.h" -#include "dp_test_lib.h" -#include "dp_test_cmd_check.h" +#include "dp_test_lib_internal.h" +#include "dp_test/dp_test_cmd_check.h" #include "dp_test_console.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_json_utils.h" struct dp_test_command_t { diff --git a/tests/whole_dp/src/dp_test_portmonitor.c b/tests/whole_dp/src/dp_test_portmonitor.c index 0ffa55d9..3a547b96 100644 --- a/tests/whole_dp/src/dp_test_portmonitor.c +++ b/tests/whole_dp/src/dp_test_portmonitor.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -18,16 +18,16 @@ #include "in_cksum.h" #include "if_var.h" #include "main.h" -#include "gre.h" +#include "if/gre.h" #include "iptun_common.h" #include "dp_test.h" #include "dp_test_controller.h" -#include "dp_test_netlink_state.h" -#include "dp_test_cmd_check.h" -#include "dp_test_lib.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test/dp_test_cmd_check.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_lib_exp.h" #include "dp_test_lib_portmonitor.h" @@ -100,7 +100,7 @@ DP_START_TEST(mirroring, span) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* We expect 2 packets, one local and one mirrored */ exp = dp_test_exp_create_m(test_pak, 2); @@ -146,7 +146,7 @@ DP_START_TEST(mirroring, span_filter) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* We expect 1 packet, local, mirrored is blocked */ exp = dp_test_exp_create_m(test_pak, 1); @@ -195,7 +195,7 @@ DP_START_TEST(mirroring, rspan_source) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* We expect 2 packets, one local and one mirrored */ exp = dp_test_exp_create_m(test_pak, 2); @@ -253,7 +253,7 @@ DP_START_TEST(mirroring, rspan_source_filter) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* We expect 1 packet, local, mirrored is blocked */ exp = dp_test_exp_create_m(test_pak, 1); @@ -346,7 +346,7 @@ erspan_build_expected_pak(struct dp_test_expected **expected, int len; struct dp_test_expected *exp; struct iphdr *inner_ip; - struct ether_hdr *payload; + struct rte_ether_hdr *payload; void *erspan_payload; struct rte_mbuf *m; uint8_t *dont_care_start; @@ -358,20 +358,20 @@ erspan_build_expected_pak(struct dp_test_expected **expected, exp->fwd_result[1] = DP_TEST_FWD_FORWARDED; inner_ip = iphdr(tpak); - len = ntohs(inner_ip->tot_len) + ETHER_HDR_LEN; + len = ntohs(inner_ip->tot_len) + RTE_ETHER_HDR_LEN; m = dp_test_create_erspan_ipv4_pak("1.1.2.1", "1.1.2.2", &len, gre_prot, erspanid, srcidx, tpak->vlan_tci, dir, &erspan_payload); if (!m) return; - payload = rte_pktmbuf_mtod(tpak, struct ether_hdr *); + payload = rte_pktmbuf_mtod(tpak, struct rte_ether_hdr *); memcpy(erspan_payload, payload, len); rte_pktmbuf_free(exp->exp_pak[1]); exp->exp_pak[1] = m; /* Check packet after ether hdr */ - exp->check_start[1] = pktmbuf_l2_len(exp->exp_pak[1]); + exp->check_start[1] = dp_pktmbuf_l2_len(exp->exp_pak[1]); exp->check_len[1] = rte_pktmbuf_data_len(exp->exp_pak[0]) - exp->check_start[1]; @@ -405,7 +405,7 @@ DP_START_TEST(mirroring, erspan_source) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* We expect 2 packets, one local and one mirrored */ exp = dp_test_exp_create_m(test_pak, 2); @@ -452,7 +452,7 @@ DP_START_TEST(mirroring, erspan_source_filter) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* We expect 1 packet, local, mirrored is blocked */ exp = dp_test_exp_create_m(test_pak, 1); @@ -503,7 +503,7 @@ DP_START_TEST(mirroring, erspan_vlan_source) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* We expect 2 packets, one local and one mirrored */ exp = dp_test_exp_create_m(test_pak, 2); @@ -523,7 +523,7 @@ DP_START_TEST(mirroring, erspan_vlan_source) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp2T1"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* We expect 2 packets, one local and one mirrored */ exp = dp_test_exp_create_m(test_pak, 2); @@ -532,7 +532,7 @@ DP_START_TEST(mirroring, erspan_vlan_source) dp_test_pktmbuf_eth_init(dp_test_exp_get_pak_m(exp, 0), nh_mac_str, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak_m(exp, 0)); dp_test_exp_set_vlan_tci_m(exp, 0, 10); diff --git a/tests/whole_dp/src/dp_test_portmonitor_commands.c b/tests/whole_dp/src/dp_test_portmonitor_commands.c index a27dffa4..50a0383d 100644 --- a/tests/whole_dp/src/dp_test_portmonitor_commands.c +++ b/tests/whole_dp/src/dp_test_portmonitor_commands.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -18,13 +18,13 @@ #include "dp_test.h" #include "dp_test_str.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_pkt.h" #include "dp_test_lib_portmonitor.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" diff --git a/tests/whole_dp/src/dp_test_ppp.c b/tests/whole_dp/src/dp_test_ppp.c index ad4e0055..16580a9c 100644 --- a/tests/whole_dp/src/dp_test_ppp.c +++ b/tests/whole_dp/src/dp_test_ppp.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * @@ -8,14 +8,15 @@ #include "dp_test.h" #include "dp_test_str.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" #include "dp_test_lib_pkt.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_controller.h" #include "dp_test_json_utils.h" -#include "dp_test_netlink_state.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" +#include "dp_test_ppp.h" #include "pipeline/nodes/pppoe/pppoe.h" @@ -25,21 +26,14 @@ DP_DECL_TEST_SUITE(ppp); -#define CREATE true -#define NO_CREATE false -#define VERIFY true -#define NO_VERIFY false -#define SESS_VALID true -#define SESS_INVALID false - static void -dp_test_create_pppoe_msg(const char *ppp_intf, - const char *real_intf, - const char *src_mac, - const char *dst_mac, - int session_id, - void **buf, int *len) +dp_test_create_and_send_pppoe_msg(const char *ppp_intf, + const char *real_intf, + const char *src_mac, + const char *dst_mac, + int session_id) { + int len; PPPOEConfig con = PPPOECONFIG__INIT; con.has_session = true; con.session = session_id; @@ -48,28 +42,16 @@ dp_test_create_pppoe_msg(const char *ppp_intf, con.ether = (char *)src_mac; con.peer_ether = (char *)dst_mac; - *len = pppoeconfig__get_packed_size(&con); - void *buf2 = malloc(*len); + len = pppoeconfig__get_packed_size(&con); + void *buf2 = malloc(len); dp_test_assert_internal(buf2); pppoeconfig__pack(&con, buf2); - DataplaneEnvelope msg = DATAPLANE_ENVELOPE__INIT; - msg.type = strdup("vyatta:pppoe"); - msg.msg.data = buf2; - msg.msg.len = *len; - - *len = dataplane_envelope__get_packed_size(&msg); - - *buf = malloc(*len); - dp_test_assert_internal(*buf); - - dataplane_envelope__pack(&msg, *buf); - free(buf2); - free(msg.type); + dp_test_lib_pb_wrap_and_send_pb("vyatta:pppoe", buf2, len); } -static void +void _dp_test_create_pppoe_session(const char *ppp_intf, const char *under_intf, uint16_t session_id, const char *src_mac, const char *dst_mac, bool create, bool verify, @@ -81,20 +63,11 @@ _dp_test_create_pppoe_session(const char *ppp_intf, const char *under_intf, dp_test_intf_real(under_intf, real_ifname); - if (create) { - int len; - void *buf; - - dp_test_create_pppoe_msg(ppp_intf, - real_ifname, - src_mac, dst_mac, - session_id, - &buf, &len); - - dp_test_send_config_src_pb(dp_test_cont_src_get(), - buf, len); - free(buf); - } + if (create) + dp_test_create_and_send_pppoe_msg(ppp_intf, + real_ifname, + src_mac, dst_mac, + session_id); if (verify) { expected = dp_test_json_create("{" @@ -123,29 +96,7 @@ _dp_test_create_pppoe_session(const char *ppp_intf, const char *under_intf, } } -#define dp_test_create_pppoe_session(ppp_intf, under_intf, session_id, \ - src_mac, dst_mac) \ - _dp_test_create_pppoe_session(ppp_intf, under_intf, session_id, \ - src_mac, dst_mac, CREATE, \ - VERIFY, SESS_VALID, \ - __FILE__, __func__, __LINE__) - -#define dp_test_create_pppoe_session_nv(ppp_intf, under_intf, session_id, \ - src_mac, dst_mac) \ - _dp_test_create_pppoe_session(ppp_intf, under_intf, session_id, \ - src_mac, dst_mac, CREATE, \ - NO_VERIFY, SESS_VALID, \ - __FILE__, __func__, __LINE__) - -#define dp_test_verify_pppoe_session(ppp_intf, under_intf, session_id, \ - src_mac, dst_mac, valid) \ - _dp_test_create_pppoe_session(ppp_intf, under_intf, session_id, \ - src_mac, dst_mac, NO_CREATE, \ - VERIFY, valid, \ - __FILE__, __func__, __LINE__) - - -static struct pppoe_packet * +struct pppoe_packet * dp_test_ipv4_pktmbuf_ppp_prepend(struct rte_mbuf *m, const char *dst_mac, const char *src_mac, @@ -283,7 +234,7 @@ DP_START_TEST(ppp_traffic, ppp_traffic_1) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -336,7 +287,7 @@ DP_START_TEST(ppp_traffic, ppp_traffic_2) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -358,7 +309,7 @@ DP_START_TEST(ppp_traffic, ppp_traffic_2) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Delete the underlying interface */ dp_test_nl_del_ip_addr_and_connected("dp2T1.100", "3.3.3.3/24"); @@ -388,7 +339,7 @@ DP_START_TEST(ppp_traffic, ppp_traffic_2) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -438,7 +389,7 @@ DP_START_TEST(ppp_traffic, ppp_traffic_3) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -462,7 +413,7 @@ DP_START_TEST(ppp_traffic, ppp_traffic_3) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -523,7 +474,7 @@ DP_START_TEST(ppp_traffic, ppp_traffic_4) (void)dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), DP_TEST_INTF_DEF_SRC_MAC, - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); diff --git a/tests/whole_dp/src/dp_test_ppp.h b/tests/whole_dp/src/dp_test_ppp.h new file mode 100644 index 00000000..06b662b3 --- /dev/null +++ b/tests/whole_dp/src/dp_test_ppp.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + */ + +struct rte_mbuf; + +#define CREATE true +#define NO_CREATE false +#define VERIFY true +#define NO_VERIFY false +#define SESS_VALID true +#define SESS_INVALID false + +void +_dp_test_create_pppoe_session(const char *ppp_intf, const char *under_intf, + uint16_t session_id, const char *src_mac, + const char *dst_mac, bool create, bool verify, + bool valid, + const char *file, const char *func, int line); + +#define dp_test_create_pppoe_session(ppp_intf, under_intf, session_id, \ + src_mac, dst_mac) \ + _dp_test_create_pppoe_session(ppp_intf, under_intf, session_id, \ + src_mac, dst_mac, CREATE, \ + VERIFY, SESS_VALID, \ + __FILE__, __func__, __LINE__) + +#define dp_test_create_pppoe_session_nv(ppp_intf, under_intf, session_id, \ + src_mac, dst_mac) \ + _dp_test_create_pppoe_session(ppp_intf, under_intf, session_id, \ + src_mac, dst_mac, CREATE, \ + NO_VERIFY, SESS_VALID, \ + __FILE__, __func__, __LINE__) + +#define dp_test_verify_pppoe_session(ppp_intf, under_intf, session_id, \ + src_mac, dst_mac, valid) \ + _dp_test_create_pppoe_session(ppp_intf, under_intf, session_id, \ + src_mac, dst_mac, NO_CREATE, \ + VERIFY, valid, \ + __FILE__, __func__, __LINE__) + + +struct pppoe_packet * +dp_test_ipv4_pktmbuf_ppp_prepend(struct rte_mbuf *m, + const char *dst_mac, + const char *src_mac, + int v4_len, + uint16_t session); diff --git a/tests/whole_dp/src/dp_test_ptp.c b/tests/whole_dp/src/dp_test_ptp.c index b4582f07..407d24a6 100644 --- a/tests/whole_dp/src/dp_test_ptp.c +++ b/tests/whole_dp/src/dp_test_ptp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. + * Copyright (c) 2019-2020, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -14,15 +14,17 @@ #include "in_cksum.h" #include "if_var.h" #include "main.h" +#include "ptp.h" #include "dp_test.h" +#include "dp_test_controller.h" #include "dp_test_str.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_pkt.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" @@ -179,9 +181,284 @@ static const struct dp_test_command_t ptp_cmds[] = { false, false, }, + /* test g.8275.2 profiles */ + { + "ptp-ut clock create 0 " + "domain-number=0 " + "number-ports=2 " + "clock-identity=0:1:2:3:4:5:6:7 " + "priority1=128 " + "priority2=128 " + "slave-only=0 " + "two-step=0 " + "profile=g82752-profile", + "", + true, + false, + }, + { + "ptp-ut clock delete 0", + "", + true, + false, + }, + { + "ptp-ut clock create 0 " + "domain-number=0 " + "number-ports=2 " + "clock-identity=0:1:2:3:4:5:6:7 " + "priority1=128 " + "priority2=128 " + "slave-only=0 " + "two-step=0 " + "antenna-delay=100 " + "profile=g82752-apts-profile", + "", + true, + false, + }, + { + "ptp-ut clock delete 0", + "", + true, + false, + }, + /* additional-path */ + { + "ptp-ut clock create 0 " + "domain-number=0 " + "number-ports=1 " + "clock-identity=0:1:2:3:4:5:6:7 " + "priority1=128 " + "priority2=128 " + "slave-only=0 " + "two-step=0 " + "profile=default-profile", + "", + true, + false, + }, + { + "ptp-ut port create 1 " + "clock-id=0 " + "underlying-interface=dpT10 " + "vlan-id=10 " + "log-min-delay-req-interval=1 " + "log-announce-interval=2 " + "announce-receipt-timeout=3 " + "log-min-pdelay-req-interval=1 " + "log-sync-interval=1 " + "ip=192.168.10.1 " + "mac=0:0:0:0:a:1 " + "dscp=0 " + "additional-path=dpT11,100 ", + "", + true, + false, + }, + { + "ptp-ut port delete 1 clock-id=0", + "", + true, + false, + }, + { + "ptp-ut clock delete 0", + "", + true, + false, + }, + /* test g.8275.1 profiles */ + { + "ptp-ut clock create 0 " + "domain-number=24 " + "number-ports=2 " + "clock-identity=0:1:2:3:4:5:6:7 " + "priority1=128 " + "priority2=128 " + "slave-only=0 " + "two-step=0 " + "profile=g82751-forwardable-profile", + "", + true, + false, + }, + { + "ptp-ut clock delete 0", + "", + true, + false, + }, + { + "ptp-ut clock create 0 " + "domain-number=24 " + "number-ports=2 " + "clock-identity=0:1:2:3:4:5:6:7 " + "priority1=128 " + "priority2=128 " + "slave-only=0 " + "two-step=0 " + "antenna-delay=100 " + "profile=g82751-non-forwardable-profile", + "", + true, + false, + }, + { + "ptp-ut clock delete 0", + "", + true, + false, + }, + /* test multiple peers with the same IP address */ + { + "ptp-ut clock create 0 " + "domain-number=0 " + "number-ports=2 " + "clock-identity=0:1:2:3:4:5:6:7 " + "priority1=128 " + "priority2=128 " + "slave-only=0 " + "two-step=0 " + "profile=default-profile", + "", + true, + false, + }, + { + "ptp-ut port create 1 " + "clock-id=0 " + "underlying-interface=dpT10 " + "vlan-id=10 " + "log-min-delay-req-interval=1 " + "log-announce-interval=2 " + "announce-receipt-timeout=3 " + "log-min-pdelay-req-interval=1 " + "log-sync-interval=1 " + "ip=192.168.10.1 " + "mac=0:0:0:0:a:1 " + "dscp=0 ", + "", + true, + false, + }, + { + "ptp-ut port create 2 " + "clock-id=0 " + "underlying-interface=dpT11 " + "vlan-id=20 " + "log-min-delay-req-interval=1 " + "log-announce-interval=2 " + "announce-receipt-timeout=3 " + "log-min-pdelay-req-interval=1 " + "log-sync-interval=1 " + "ip=192.168.10.1 " + "mac=0:0:0:0:a:1 " + "dscp=0 ", + "", + true, + false, + }, + { + "ptp-ut peer create clock-id=0 port-id=1 type=master ip=192.168.10.2 ", + "", + true, + false, + }, + { + "ptp-ut peer create clock-id=0 port-id=2 type=master ip=192.168.10.2 ", + "", + true, + false, + }, + { + "ptp-ut peer delete clock-id=0 port-id=2 type=master ip=192.168.10.2", + "", + true, + false, + }, + { + "ptp-ut peer delete clock-id=0 port-id=1 type=master ip=192.168.10.2", + "", + true, + false, + }, + { + "ptp-ut port delete 2 clock-id=0", + "", + true, + false, + }, + { + "ptp-ut port delete 1 clock-id=0", + "", + true, + false, + }, + { + "ptp-ut clock delete 0", + "", + true, + false, + }, + /* test create/delete of hotplug ports */ + { + "ptp-ut clock create 0 " + "domain-number=0 " + "number-ports=1 " + "clock-identity=0:1:2:3:4:5:6:7 " + "priority1=128 " + "priority2=128 " + "slave-only=0 " + "two-step=0 " + "profile=default-profile", + "", + true, + false, + }, + { + "ptp-ut port create 1 " + "clock-id=0 " + "underlying-interface=dp0ce0p1 " + "vlan-id=10 " + "log-min-delay-req-interval=1 " + "log-announce-interval=2 " + "announce-receipt-timeout=3 " + "log-min-pdelay-req-interval=1 " + "log-sync-interval=1 " + "ip=192.168.10.1 " + "mac=0:0:0:0:a:1 " + "dscp=0 ", + "", + true, + false, + }, + { + "ptp-ut peer create clock-id=0 port-id=1 type=master ip=192.168.10.2 ", + "", + true, + false, + }, + { + "ptp-ut peer delete clock-id=0 port-id=1 type=master ip=192.168.10.2", + "", + true, + false, + }, + { + "ptp-ut port delete 1 clock-id=0", + "", + true, + false, + }, + { + "ptp-ut clock delete 0", + "", + true, + false, + }, }; - DP_DECL_TEST_SUITE(ptp); DP_DECL_TEST_CASE(ptp, ptp_cmds, NULL, NULL); @@ -209,3 +486,502 @@ DP_START_TEST(ptp_cmds, basic) } } } DP_END_TEST; + +DP_START_TEST(ptp_cmds, resolver) +{ + struct rte_mbuf *mbufs[64]; + int i, count; + struct bridge_vlan_set *allowed_vlans = bridge_vlan_set_create(); + + bridge_vlan_set_add(allowed_vlans, 10); + bridge_vlan_set_add(allowed_vlans, 20); + + dp_test_intf_bridge_create("sw0"); + dp_test_intf_vif_create("sw0.10", "sw0", 10); + dp_test_intf_vif_create("sw0.20", "sw0", 20); + dp_test_intf_bridge_enable_vlan_filter("sw0"); + dp_test_intf_bridge_add_port("sw0", "dpT10"); + dp_test_intf_bridge_add_port("sw0", "dpT11"); + dp_test_intf_bridge_port_set_vlans("sw0", "dpT10", 0, + allowed_vlans, NULL); + dp_test_intf_bridge_port_set_vlans("sw0", "dpT11", 0, + allowed_vlans, NULL); + + dp_test_nl_add_ip_addr_and_connected("sw0.10", "192.168.10.1/24"); + dp_test_nl_add_ip_addr_and_connected("sw0.20", "192.168.20.1/24"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut clock create 0 " + "domain-number=0 " + "number-ports=2 " + "clock-identity=0:1:2:3:4:5:6:7 " + "priority1=128 " + "priority2=128 " + "slave-only=0 " + "two-step=0 " + "profile=default-profile"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut port create 1 " + "clock-id=0 " + "underlying-interface=dpT10 " + "vlan-id=10 " + "log-min-delay-req-interval=1 " + "log-announce-interval=2 " + "announce-receipt-timeout=3 " + "log-min-pdelay-req-interval=1 " + "log-sync-interval=1 " + "ip=192.168.10.1 " + "mac=0:0:0:0:a:1 " + "dscp=0"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut peer create " + "clock-id=0 " + "port-id=1 " + "type=master " + "ip=192.168.10.2"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut port create 2 " + "clock-id=0 " + "underlying-interface=dpT11 " + "vlan-id=20 " + "log-min-delay-req-interval=1 " + "log-announce-interval=2 " + "announce-receipt-timeout=3 " + "log-min-pdelay-req-interval=1 " + "log-sync-interval=1 " + "ip=192.168.20.1 " + "mac=0:0:0:0:14:1 " + "dscp=0"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut peer create " + "clock-id=0 " + "port-id=2 " + "type=slave " + "ip=192.168.20.2"); + + dp_test_check_state_show("ptp resolver trigger", "", true); + + /* Until we have neighbors, the peers will not install. */ + dp_test_check_state_show("ptp resolver dump", + "{[{\n" + " \"peer\": \"192.168.20.2\",\n" + " \"installed\": false,\n" + " \"port-id\": 2,\n" + " \"type\": \"slave\"\n" + " },{\n" + " \"peer\": \"192.168.10.2\",\n" + " \"installed\": false,\n" + " \"port-id\": 1,\n" + " \"type\": \"master\"\n" + " }\n" + " ]\n" + " }\n", true); + + /* There should be two ARPs per interface */ + count = dp_test_pak_get_from_ring("dpT10", mbufs, 64); + for (i = 0; i < count; i++) + rte_pktmbuf_free(mbufs[i]); + dp_test_assert_internal(count == 2); + + count = dp_test_pak_get_from_ring("dpT11", mbufs, 64); + for (i = 0; i < count; i++) + rte_pktmbuf_free(mbufs[i]); + dp_test_assert_internal(count == 2); + + /* Add peer resolution and re-run the resolver */ + dp_test_netlink_add_neigh("sw0.10", "192.168.10.2", "0:0:0:0:0:1"); + dp_test_netlink_add_neigh("sw0.20", "192.168.20.2", "0:0:0:0:0:2"); + dp_test_check_state_show("ptp resolver trigger", "", true); + + dp_test_check_state_show("ptp resolver dump", + "{[{\n" + " \"peer\": \"192.168.20.2\",\n" + " \"installed\": true,\n" + " \"port-id\": 2,\n" + " \"mac\": \"0:0:0:0:0:2\",\n" + " \"type\": \"slave\"\n" + " },{\n" + " \"peer\": \"192.168.10.2\",\n" + " \"installed\": true,\n" + " \"port-id\": 1,\n" + " \"mac\": \"0:0:0:0:0:1\",\n" + " \"type\": \"master\"\n" + " }\n" + " ]\n" + " }\n", true); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut peer delete " + "clock-id=0 port-id=1 " + "type=master " + "ip=192.168.10.2"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut peer delete " + "clock-id=0 " + "port-id=2 " + "type=slave " + "ip=192.168.20.2"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut port delete 1 clock-id=0"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut port delete 2 clock-id=0"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut clock delete 0"); + + dp_test_netlink_del_neigh("sw0.10", "192.168.10.2", "0:0:0:0:0:1"); + dp_test_netlink_del_neigh("sw0.20", "192.168.20.2", "0:0:0:0:0:2"); + dp_test_nl_del_ip_addr_and_connected("sw0.10", "192.168.10.1/24"); + dp_test_nl_del_ip_addr_and_connected("sw0.20", "192.168.20.1/24"); + dp_test_intf_bridge_remove_port("sw0", "dpT11"); + dp_test_intf_bridge_remove_port("sw0", "dpT10"); + dp_test_intf_vif_del("sw0.10", 10); + dp_test_intf_vif_del("sw0.20", 20); + dp_test_intf_bridge_del("sw0"); + bridge_vlan_set_free(allowed_vlans); + +} DP_END_TEST; + +DP_START_TEST(ptp_cmds, resolver_two_uplinks) +{ + struct bridge_vlan_set *allowed_vlans = bridge_vlan_set_create(); + + bridge_vlan_set_add(allowed_vlans, 10); + bridge_vlan_set_add(allowed_vlans, 20); + + dp_test_intf_bridge_create("sw0"); + dp_test_intf_vif_create("sw0.10", "sw0", 10); + dp_test_intf_vif_create("sw0.20", "sw0", 20); + dp_test_intf_bridge_enable_vlan_filter("sw0"); + dp_test_intf_bridge_add_port("sw0", "dpT10"); + dp_test_intf_bridge_add_port("sw0", "dpT11"); + dp_test_intf_bridge_port_set_vlans("sw0", "dpT10", 0, + allowed_vlans, NULL); + dp_test_intf_bridge_port_set_vlans("sw0", "dpT11", 0, + allowed_vlans, NULL); + + dp_test_nl_add_ip_addr_and_connected("sw0.10", "192.168.10.1/24"); + dp_test_nl_add_ip_addr_and_connected("sw0.20", "192.168.20.1/24"); + dp_test_netlink_add_neigh("sw0.10", "192.168.10.2", "0:0:0:0:0:1"); + dp_test_netlink_add_neigh("sw0.20", "192.168.20.2", "0:0:0:0:0:2"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut clock create 0 " + "domain-number=0 " + "number-ports=2 " + "clock-identity=0:1:2:3:4:5:6:7 " + "priority1=128 " + "priority2=128 " + "slave-only=0 " + "two-step=0 " + "profile=default-profile"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut port create 1 " + "clock-id=0 " + "underlying-interface=dpT10 " + "vlan-id=10 " + "log-min-delay-req-interval=1 " + "log-announce-interval=2 " + "announce-receipt-timeout=3 " + "log-min-pdelay-req-interval=1 " + "log-sync-interval=1 " + "ip=192.168.10.1 " + "mac=0:0:0:0:a:1 " + "dscp=0"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut peer create " + "clock-id=0 " + "port-id=1 " + "type=master " + "ip=192.168.30.2"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut port create 2 " + "clock-id=0 " + "underlying-interface=dpT11 " + "vlan-id=20 " + "log-min-delay-req-interval=1 " + "log-announce-interval=2 " + "announce-receipt-timeout=3 " + "log-min-pdelay-req-interval=1 " + "log-sync-interval=1 " + "ip=192.168.20.1 " + "mac=0:0:0:0:14:1 " + "dscp=0"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut peer create " + "clock-id=0 " + "port-id=2 " + "type=master " + "ip=192.168.30.2"); + + /* Until we have neighbors, the peers will not install. */ + dp_test_check_state_show("ptp resolver dump", + "{[{\n" + " \"peer\": \"192.168.30.2\",\n" + " \"installed\": false,\n" + " \"port-id\": 1,\n" + " \"type\": \"master\"\n" + " },[{\n" + " \"peer\": \"192.168.30.2\",\n" + " \"installed\": false,\n" + " \"port-id\": 2,\n" + " \"type\": \"master\"\n" + " }\n" + " ]\n" + " ]\n" + " }\n", true); + + /* Add route to peer via sw0.10 and run the resolver */ + dp_test_netlink_add_route("192.168.30.0/24 nh 192.168.10.2 int:sw0.10"); + dp_test_check_state_show("ptp resolver trigger", "", true); + + dp_test_check_state_show("ptp resolver dump", + "{[{\n" + " \"peer\": \"192.168.30.2\",\n" + " \"installed\": true,\n" + " \"port-id\": 1,\n" + " \"mac\": \"0:0:a5:0:3:e9\",\n" + " \"type\": \"master\"\n" + " },[{\n" + " \"peer\": \"192.168.30.2\",\n" + " \"installed\": false,\n" + " \"port-id\": 2,\n" + " \"type\": \"master\"\n" + " }\n" + " ]\n" + " ]\n" + " }\n", true); + + /* Move route to peer to sw0.20 and re-run the resolver */ + dp_test_netlink_del_route("192.168.30.0/24 nh 192.168.10.2 int:sw0.10"); + dp_test_netlink_add_route("192.168.30.0/24 nh 192.168.20.2 int:sw0.20"); + dp_test_check_state_show("ptp resolver trigger", "", true); + + dp_test_check_state_show("ptp resolver dump", + "{[{\n" + " \"peer\": \"192.168.30.2\",\n" + " \"installed\": false,\n" + " \"port-id\": 1,\n" + " \"type\": \"master\"\n" + " },[{\n" + " \"peer\": \"192.168.30.2\",\n" + " \"installed\": true,\n" + " \"port-id\": 2,\n" + " \"mac\": \"0:0:a5:0:3:e9\",\n" + " \"type\": \"master\"\n" + " }\n" + " ]\n" + " ]\n" + " }\n", true); + + /* And if there are no routes, no peer should be active */ + dp_test_netlink_del_route("192.168.30.0/24 nh 192.168.20.2 int:sw0.20"); + dp_test_check_state_show("ptp resolver trigger", "", true); + dp_test_check_state_show("ptp resolver dump", + "{[{\n" + " \"peer\": \"192.168.30.2\",\n" + " \"installed\": false,\n" + " \"port-id\": 1,\n" + " \"type\": \"master\"\n" + " },[{\n" + " \"peer\": \"192.168.30.2\",\n" + " \"installed\": false,\n" + " \"port-id\": 2,\n" + " \"type\": \"master\"\n" + " }\n" + " ]\n" + " ]\n" + " }\n", true); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut peer delete " + "clock-id=0 port-id=1 " + "type=master " + "ip=192.168.30.2"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut peer delete " + "clock-id=0 " + "port-id=2 " + "type=master " + "ip=192.168.30.2"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut port delete 1 clock-id=0"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut port delete 2 clock-id=0"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut clock delete 0"); + + dp_test_netlink_del_neigh("sw0.10", "192.168.10.2", "0:0:0:0:0:1"); + dp_test_netlink_del_neigh("sw0.20", "192.168.20.2", "0:0:0:0:0:2"); + dp_test_nl_del_ip_addr_and_connected("sw0.10", "192.168.10.1/24"); + dp_test_nl_del_ip_addr_and_connected("sw0.20", "192.168.20.1/24"); + dp_test_intf_bridge_remove_port("sw0", "dpT11"); + dp_test_intf_bridge_remove_port("sw0", "dpT10"); + dp_test_intf_vif_del("sw0.10", 10); + dp_test_intf_vif_del("sw0.20", 20); + dp_test_intf_bridge_del("sw0"); + bridge_vlan_set_free(allowed_vlans); + +} DP_END_TEST; + +DP_START_TEST(ptp_cmds, resolver_edge_cases) +{ + struct bridge_vlan_set *allowed_vlans = bridge_vlan_set_create(); + + bridge_vlan_set_add(allowed_vlans, 10); + bridge_vlan_set_add(allowed_vlans, 20); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut clock create 0 " + "domain-number=0 " + "number-ports=2 " + "clock-identity=0:1:2:3:4:5:6:7 " + "priority1=128 " + "priority2=128 " + "slave-only=0 " + "two-step=0 " + "profile=default-profile"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut port create 1 " + "clock-id=0 " + "underlying-interface=dpT10 " + "vlan-id=10 " + "log-min-delay-req-interval=1 " + "log-announce-interval=2 " + "announce-receipt-timeout=3 " + "log-min-pdelay-req-interval=1 " + "log-sync-interval=1 " + "ip=192.168.10.1 " + "mac=0:0:0:0:a:1 " + "dscp=0"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut peer create " + "clock-id=0 " + "port-id=1 " + "type=master " + "ip=192.168.10.2"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut port create 2 " + "clock-id=0 " + "underlying-interface=dpT11 " + "vlan-id=20 " + "log-min-delay-req-interval=1 " + "log-announce-interval=2 " + "announce-receipt-timeout=3 " + "log-min-pdelay-req-interval=1 " + "log-sync-interval=1 " + "ip=192.168.20.1 " + "mac=0:0:0:0:14:1 " + "dscp=0"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut peer create " + "clock-id=0 " + "port-id=2 " + "type=slave " + "ip=192.168.20.2"); + + /* missing switch */ + dp_test_check_state_show("ptp resolver trigger", "", true); + + /* bridge configured */ + dp_test_intf_bridge_create("sw0"); + dp_test_intf_bridge_enable_vlan_filter("sw0"); + dp_test_intf_bridge_add_port("sw0", "dpT10"); + dp_test_intf_bridge_add_port("sw0", "dpT11"); + dp_test_intf_bridge_port_set_vlans("sw0", "dpT10", 0, + allowed_vlans, NULL); + dp_test_intf_bridge_port_set_vlans("sw0", "dpT11", 0, + allowed_vlans, NULL); + dp_test_check_state_show("ptp resolver trigger", "", true); + + /* routing interfaces */ + dp_test_intf_vif_create("sw0.10", "sw0", 10); + dp_test_intf_vif_create("sw0.20", "sw0", 20); + dp_test_check_state_show("ptp resolver trigger", "", true); + + dp_test_nl_add_ip_addr_and_connected("sw0.10", "192.168.10.1/24"); + dp_test_nl_add_ip_addr_and_connected("sw0.20", "192.168.20.1/24"); + dp_test_netlink_add_neigh("sw0.10", "192.168.10.2", "0:0:0:0:0:1"); + dp_test_netlink_add_neigh("sw0.20", "192.168.20.2", "0:0:0:0:0:2"); + dp_test_check_state_show("ptp resolver trigger", "", true); + + /* admin down interfaces */ + dp_test_netlink_set_interface_admin_status("dpT10", false); + dp_test_netlink_set_interface_admin_status("dpT11", false); + dp_test_check_state_show("ptp resolver trigger", "", true); + + /* admin up interfaces */ + dp_test_netlink_set_interface_admin_status("dpT10", true); + dp_test_netlink_set_interface_admin_status("dpT11", true); + dp_test_check_state_show("ptp resolver trigger", "", true); + + dp_test_check_state_show("ptp resolver dump", + "{[{\n" + " \"peer\": \"192.168.20.2\",\n" + " \"installed\": true,\n" + " \"port-id\": 2,\n" + " \"mac\": \"0:0:0:0:0:2\",\n" + " \"type\": \"slave\"\n" + " },{\n" + " \"peer\": \"192.168.10.2\",\n" + " \"installed\": true,\n" + " \"port-id\": 1,\n" + " \"mac\": \"0:0:0:0:0:1\",\n" + " \"type\": \"master\"\n" + " }\n" + " ]\n" + " }\n", true); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut peer delete " + "clock-id=0 port-id=1 " + "type=master " + "ip=192.168.10.2"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut peer delete " + "clock-id=0 " + "port-id=2 " + "type=slave " + "ip=192.168.20.2"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut port delete 1 clock-id=0"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut port delete 2 clock-id=0"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "ptp-ut clock delete 0"); + + dp_test_netlink_del_neigh("sw0.10", "192.168.10.2", "0:0:0:0:0:1"); + dp_test_netlink_del_neigh("sw0.20", "192.168.20.2", "0:0:0:0:0:2"); + dp_test_nl_del_ip_addr_and_connected("sw0.10", "192.168.10.1/24"); + dp_test_nl_del_ip_addr_and_connected("sw0.20", "192.168.20.1/24"); + dp_test_intf_bridge_remove_port("sw0", "dpT11"); + dp_test_intf_bridge_remove_port("sw0", "dpT10"); + dp_test_intf_vif_del("sw0.10", 10); + dp_test_intf_vif_del("sw0.20", 20); + dp_test_intf_bridge_del("sw0"); + bridge_vlan_set_free(allowed_vlans); + +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_qos_basic.c b/tests/whole_dp/src/dp_test_qos_basic.c index f64814e2..04564de4 100644 --- a/tests/whole_dp/src/dp_test_qos_basic.c +++ b/tests/whole_dp/src/dp_test_qos_basic.c @@ -1,5 +1,5 @@ /** - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2018 by AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -19,11 +19,11 @@ #include "dp_test.h" #include "dp_test_str.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" #include "dp_test_controller.h" #include "dp_test_json_utils.h" @@ -65,14 +65,14 @@ DP_DECL_TEST_CASE(qos_basic, qos_basic_ipv4, NULL, NULL); */ const char *qos_lib_selftest_cmds[] = { - "port subports 1 pipes 1 profiles 2 overhead 24", - "subport 0 rate 1250000000 size 5000000 period 40", + "port subports 1 pipes 1 profiles 2 overhead 24 ql_packets", + "subport 0 rate 1250000000 size 5000000 period 40000", "subport 0 queue 0 rate 1250000000 size 5000000", "subport 0 queue 1 rate 1250000000 size 5000000", "subport 0 queue 2 rate 1250000000 size 5000000", "subport 0 queue 3 rate 1250000000 size 5000000", "vlan 0 0", - "profile 0 rate 12500000 size 50000 period 10", + "profile 0 rate 12500000 size 50000 period 10000", "profile 0 queue 0 rate 12500000 size 50000", "profile 0 queue 1 rate 12500000 size 50000", "profile 0 queue 2 rate 12500000 size 50000", @@ -175,9 +175,10 @@ DP_START_TEST(qos_basic_ipv4, qos_lib_selftest) "tc_rates for tc %u\n", tc); value = json_object_get_int(j_obj); json_object_put(j_obj); - dp_test_fail_unless(value == QOS_LIB_SELFTEST_PROFILE_RATE, + dp_test_fail_unless(value == + QOS_LIB_SELFTEST_PROFILE_RATE, "failed to get correct tc_rate value for " - "tc %u\n", tc); + "tc %u - value %u\n", tc, value); } /* Can we find the params wrr_weights? */ @@ -342,7 +343,7 @@ DP_START_TEST(qos_basic_ipv4, qos_lib_selftest) */ const char *basic_pkt_fwd_cmds[] = { - "port subports 1 pipes 1 profiles 2 overhead 24", + "port subports 1 pipes 1 profiles 2 overhead 24 ql_packets", "subport 0 rate 1250000000 size 5000000 period 40", "subport 0 queue 0 rate 1250000000 size 5000000", "subport 0 queue 1 rate 1250000000 size 5000000", @@ -424,7 +425,7 @@ DP_START_TEST(qos_basic_ipv4, basic_pkt_fwd) */ const char *basic_pkt_classify_cmds[] = { - "port subports 1 pipes 2 profiles 1 overhead 24", + "port subports 1 pipes 2 profiles 1 overhead 24 ql_packets", "subport 0 rate 1250000000 size 5000000 period 40", "subport 0 queue 0 rate 1250000000 size 5000000", "subport 0 queue 1 rate 1250000000 size 5000000", @@ -598,7 +599,7 @@ DP_START_TEST(qos_basic_ipv4, basic_pkt_classify) */ const char *basic_dscp_map_cmds[] = { - "port subports 1 pipes 1 profiles 2 overhead 24", + "port subports 1 pipes 1 profiles 2 overhead 24 ql_packets", "subport 0 rate 1250000000 size 5000000 period 40", "subport 0 queue 0 rate 1250000000 size 5000000", "subport 0 queue 1 rate 1250000000 size 5000000", @@ -712,70 +713,70 @@ const char *basic_dscp_map_cmds[] = { struct tc_queue_pair dscp_map[] = { /* tc queue */ - { 3, 7 }, /* DSCP = 0 */ - { 3, 7 }, - { 3, 6 }, - { 3, 6 }, - { 3, 5 }, - { 3, 5 }, - { 3, 4 }, - { 3, 4 }, - { 3, 3 }, - { 3, 3 }, - { 3, 2 }, - { 3, 2 }, - { 3, 1 }, - { 3, 1 }, - { 3, 0 }, - { 3, 0 }, /* DSCP = 15 */ - { 2, 7 }, /* DSCP = 16 */ - { 2, 7 }, - { 2, 6 }, - { 2, 6 }, - { 2, 5 }, - { 2, 5 }, - { 2, 4 }, - { 2, 4 }, - { 2, 3 }, - { 2, 3 }, - { 2, 2 }, - { 2, 2 }, - { 2, 1 }, - { 2, 1 }, - { 2, 0 }, - { 2, 0 }, /* DSCP = 31 */ - { 1, 7 }, /* DSCP = 32 */ - { 1, 7 }, - { 1, 6 }, - { 1, 6 }, - { 1, 5 }, - { 1, 5 }, - { 1, 4 }, - { 1, 4 }, - { 1, 3 }, - { 1, 3 }, - { 1, 2 }, - { 1, 2 }, - { 1, 1 }, - { 1, 1 }, - { 1, 0 }, - { 1, 0 }, /* DSCP = 47 */ - { 0, 7 }, /* DSCP = 48 */ - { 0, 7 }, - { 0, 6 }, - { 0, 6 }, - { 0, 5 }, - { 0, 5 }, - { 0, 4 }, - { 0, 4 }, - { 0, 3 }, - { 0, 3 }, - { 0, 2 }, - { 0, 2 }, - { 0, 1 }, - { 0, 1 }, - { 0, 0 }, - { 0, 0 } /* DSCP = 63 */ + { 3, 7, 0}, /* DSCP = 0 */ + { 3, 7, 0}, + { 3, 6, 0}, + { 3, 6, 0}, + { 3, 5, 0}, + { 3, 5, 0}, + { 3, 4, 0}, + { 3, 4, 0}, + { 3, 3, 0}, + { 3, 3, 0}, + { 3, 2, 0}, + { 3, 2, 0}, + { 3, 1, 0}, + { 3, 1, 0}, + { 3, 0, 0}, + { 3, 0, 0}, /* DSCP = 15 */ + { 2, 7, 0}, /* DSCP = 16 */ + { 2, 7, 0}, + { 2, 6, 0}, + { 2, 6, 0}, + { 2, 5, 0}, + { 2, 5, 0}, + { 2, 4, 0}, + { 2, 4, 0}, + { 2, 3, 0}, + { 2, 3, 0}, + { 2, 2, 0}, + { 2, 2, 0}, + { 2, 1, 0}, + { 2, 1, 0}, + { 2, 0, 0}, + { 2, 0, 0}, /* DSCP = 31 */ + { 1, 7, 0}, /* DSCP = 32 */ + { 1, 7, 0}, + { 1, 6, 0}, + { 1, 6, 0}, + { 1, 5, 0}, + { 1, 5, 0}, + { 1, 4, 0}, + { 1, 4, 0}, + { 1, 3, 0}, + { 1, 3, 0}, + { 1, 2, 0}, + { 1, 2, 0}, + { 1, 1, 0}, + { 1, 1, 0}, + { 1, 0, 0}, + { 1, 0, 0}, /* DSCP = 47 */ + { 0, 7, 0}, /* DSCP = 48 */ + { 0, 7, 0}, + { 0, 6, 0}, + { 0, 6, 0}, + { 0, 5, 0}, + { 0, 5, 0}, + { 0, 4, 0}, + { 0, 4, 0}, + { 0, 3, 0}, + { 0, 3, 0}, + { 0, 2, 0}, + { 0, 2, 0}, + { 0, 1, 0}, + { 0, 1, 0}, + { 0, 0, 0}, + { 0, 0, 0} /* DSCP = 63 */ }; DP_START_TEST(qos_basic_ipv4, basic_dscp_map) @@ -837,7 +838,7 @@ DP_START_TEST(qos_basic_ipv4, basic_dscp_map) */ const char *basic_vlan_pkt_fwd_cmds[] = { - "port subports 2 pipes 1 profiles 3 overhead 24", + "port subports 2 pipes 1 profiles 3 overhead 24 ql_packets", "subport 0 rate 1250000000 size 5000000 period 40", "subport 0 queue 0 rate 1250000000 size 5000000", "subport 0 queue 1 rate 1250000000 size 5000000", @@ -965,7 +966,7 @@ DP_START_TEST(qos_basic_ipv4, basic_vlan_pkt_fwd) */ const char *basic_pkt_remark_cmds[] = { - "port subports 1 pipes 2 profiles 1 overhead 24", + "port subports 1 pipes 2 profiles 1 overhead 24 ql_packets", "subport 0 rate 1250000000 size 5000000 period 40", "subport 0 queue 0 rate 1250000000 size 5000000", "subport 0 queue 1 rate 1250000000 size 5000000", @@ -1063,7 +1064,7 @@ DP_START_TEST(qos_basic_ipv4, basic_pkt_remark) */ const char *basic_pkt_drop_cmds[] = { - "port subports 1 pipes 1 profiles 1 overhead 24", + "port subports 1 pipes 1 profiles 1 overhead 24 ql_packets", "subport 0 rate 1250000000 size 5000000 period 40", "subport 0 queue 0 rate 1250000000 size 5000000", "param 0 limit packets 1", @@ -1132,7 +1133,7 @@ DP_START_TEST(qos_basic_ipv4, basic_pkt_drop) */ const char *vlan_subport_map_cmds[] = { - "port subports 3 pipes 1 profiles 3 overhead 24", + "port subports 3 pipes 1 profiles 3 overhead 24 ql_packets", "subport 0 rate 1250000000 size 5000000 period 40", "subport 0 queue 0 rate 1250000000 size 5000000", "subport 0 queue 1 rate 1250000000 size 5000000", @@ -1242,7 +1243,7 @@ DP_START_TEST(qos_basic_ipv4, vlan_subport_map) */ const char *npf_rules_check_cmds[] = { - "port subports 1 pipes 3 profiles 3 overhead 24", + "port subports 1 pipes 3 profiles 3 overhead 24 ql_packets", "subport 0 rate 1250000000 size 5000000 period 40", "subport 0 queue 0 rate 1250000000 size 5000000", "subport 0 queue 1 rate 1250000000 size 5000000", @@ -1268,8 +1269,8 @@ const char *npf_rules_check_cmds[] = { "pipe 0 1 1", "match 0 1 action=accept src-addr=1.1.1.11/32 handle=tag(1)", "pipe 0 2 2", - "match 0 2 action=accept proto=6 dst-addr=2.2.2.11/32 dst-port=999 " - "handle=tag(2)", + "match 0 2 action=accept proto-final=6 dst-addr=2.2.2.11/32 " + "dst-port=999 handle=tag(2)", "enable" }; @@ -1299,9 +1300,10 @@ DP_START_TEST(qos_basic_ipv4, npf_rules_check) rc = json_object_object_get_ex(j_obj, "2", &j_rule); dp_test_fail_unless(rc, "failed to get rule '2'\n"); dp_test_qos_check_rule(j_rule, "pass", - "action=accept proto=6 dst-addr=2.2.2.11/32 " + "action=accept proto-final=6 " + "dst-addr=2.2.2.11/32 " "dst-port=999 handle=tag(2)", - "proto 6 to 2.2.2.11/32 port 999", + "proto-final 6 to 2.2.2.11/32 port 999", "apply tag(2)", 0, 0, debug); json_object_put(j_obj); diff --git a/tests/whole_dp/src/dp_test_qos_burst.c b/tests/whole_dp/src/dp_test_qos_burst.c new file mode 100644 index 00000000..abb16425 --- /dev/null +++ b/tests/whole_dp/src/dp_test_qos_burst.c @@ -0,0 +1,289 @@ +/** + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + */ + +#include +#include + +#include "ip6_funcs.h" +#include "ip_funcs.h" +#include "in_cksum.h" +#include "if_var.h" +#include "main.h" + +#include "dp_test.h" +#include "dp_test_str.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_exp.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_console.h" +#include "dp_test_controller.h" +#include "dp_test_json_utils.h" +#include "dp_test_npf_lib.h" +#include "dp_test_npf_fw_lib.h" +#include "dp_test_npf_sess_lib.h" + +#include "dp_test_qos_lib.h" + +DP_DECL_TEST_SUITE(qos_burst); + +DP_DECL_TEST_CASE(qos_burst, qos_burst1, NULL, NULL); + +/* + * Simple QoS test-setup + * + * +-----+1.1.1.11 1.1.1.1+-----+2.2.2.2 2.2.2.11+-----+ + * | | dp1T0| |dp2T1 | | + * | src |------------------------| uut |------------------------| dst | + * | |aa:bb:cc:dd:01:a1 | | aa:bb:cc:dd:02:b1| | + * +-----+ 00:00:a4:00:00:64+-----+00:00:a4:00:00:6a +-----+ + * + * QoS is configured on dp2T1. Packets are received on dp1T0 and routed + * out of dp2T1 where they receive the "QoS treatment". + */ + +/* + * set policy qos name 50M shaper default NO_COS_PROFILE_50M + * set policy qos name 50M shaper frame-overhead 28 + * set policy qos name 50M shaper profile NO_COS_PROFILE_50M bandwidth 50000kbit + * set policy qos name 50M shaper profile NO_COS_PROFILE_50M burst 6250 + * set policy qos name 50M shaper profile NO_COS_PROFILE_50M map dscp 0-63 to 24 + * set policy qos name 50M shaper profile NO_COS_PROFILE_50M queue 24 traffic-class 3 + * set policy qos name 50M shaper traffic-class 3 queue-limit 1024 + * + * The above cli config would be translated into the following set of + * commands from vplaned. + */ +const char *burst_cmds[] = { + "port subports 1 pipes 1 profiles 1 overhead 28 ql_packets", + "subport 0 rate 1250000000 size 0 period 40", + "subport 0 queue 0 percent 100 size 0", + "param subport 0 0 limit packets 64", + "subport 0 queue 1 percent 100 size 0", + "param subport 0 1 limit packets 64", + "subport 0 queue 2 percent 100 size 0", + "param subport 0 2 limit packets 64", + "subport 0 queue 3 percent 100 size 0", + "param subport 0 3 limit packets 1024", + "vlan 0 0", + "profile 0 rate 6250000 size 6250 period 10", + "profile 0 queue 0 percent 100 size 0", + "profile 0 queue 1 percent 100 size 0", + "profile 0 queue 2 percent 100 size 0", + "profile 0 queue 3 percent 100 size 0", + "profile 0 dscp 0 0x3", + "profile 0 dscp 1 0x3", + "profile 0 dscp 2 0x3", + "profile 0 dscp 3 0x3", + "profile 0 dscp 4 0x3", + "profile 0 dscp 5 0x3", + "profile 0 dscp 6 0x3", + "profile 0 dscp 7 0x3", + "profile 0 dscp 8 0x3", + "profile 0 dscp 9 0x3", + "profile 0 dscp 10 0x3", + "profile 0 dscp 11 0x3", + "profile 0 dscp 12 0x3", + "profile 0 dscp 13 0x3", + "profile 0 dscp 14 0x3", + "profile 0 dscp 15 0x3", + "profile 0 dscp 16 0x3", + "profile 0 dscp 17 0x3", + "profile 0 dscp 18 0x3", + "profile 0 dscp 19 0x3", + "profile 0 dscp 20 0x3", + "profile 0 dscp 21 0x3", + "profile 0 dscp 22 0x3", + "profile 0 dscp 23 0x3", + "profile 0 dscp 24 0x3", + "profile 0 dscp 25 0x3", + "profile 0 dscp 26 0x3", + "profile 0 dscp 27 0x3", + "profile 0 dscp 28 0x3", + "profile 0 dscp 29 0x3", + "profile 0 dscp 30 0x3", + "profile 0 dscp 31 0x3", + "profile 0 dscp 32 0x3", + "profile 0 dscp 33 0x3", + "profile 0 dscp 34 0x3", + "profile 0 dscp 35 0x3", + "profile 0 dscp 36 0x3", + "profile 0 dscp 37 0x3", + "profile 0 dscp 38 0x3", + "profile 0 dscp 39 0x3", + "profile 0 dscp 40 0x3", + "profile 0 dscp 41 0x3", + "profile 0 dscp 42 0x3", + "profile 0 dscp 43 0x3", + "profile 0 dscp 44 0x3", + "profile 0 dscp 45 0x3", + "profile 0 dscp 46 0x3", + "profile 0 dscp 47 0x3", + "profile 0 dscp 48 0x3", + "profile 0 dscp 49 0x3", + "profile 0 dscp 50 0x3", + "profile 0 dscp 51 0x3", + "profile 0 dscp 52 0x3", + "profile 0 dscp 53 0x3", + "profile 0 dscp 54 0x3", + "profile 0 dscp 55 0x3", + "profile 0 dscp 56 0x3", + "profile 0 dscp 57 0x3", + "profile 0 dscp 58 0x3", + "profile 0 dscp 59 0x3", + "profile 0 dscp 60 0x3", + "profile 0 dscp 61 0x3", + "profile 0 dscp 62 0x3", + "profile 0 dscp 63 0x3", + "profile 0 queue 0x3 wrr-weight 1 24", + "pipe 0 0 0", + "enable" +}; + +static void _dp_test_qos_burst_send(int count, bool wait, + const char *file, + const char *func, int line) +{ + struct rte_mbuf *test_pak; + struct dp_test_pkt_desc_t v4_pkt_desc = { + .text = "TCP IPv4", + .len = 458, /* gives 512 byte packets */ + .ether_type = RTE_ETHER_TYPE_IPV4, + .l3_src = "1.1.1.11", + .l2_src = "aa:bb:cc:dd:1:a1", + .l3_dst = "2.2.2.11", + .l2_dst = "aa:bb:cc:dd:2:b1", + .proto = IPPROTO_TCP, + .l4 = { + .tcp = { + .sport = 1000, + .dport = 1001, + .flags = 0 + } + }, + .rx_intf = "dp1T0", + .tx_intf = "dp2T1" + }; + int i; + + for (i = 0; i < count; i++) { + test_pak = dp_test_v4_pkt_from_desc(&v4_pkt_desc); + dp_test_pak_add_to_ring("dp1T0", &test_pak, 1, false); + } +} + +#define dp_test_qos_burst_send(count, wait) \ + _dp_test_qos_burst_send(count, wait, \ + __FILE__, __func__, __LINE__) + +/* Get packets from the receive ring and free them */ +static int dp_test_qos_burst_receive(const char *if_name) +{ + struct rte_mbuf *bufs[64]; + int count; + int i; + + count = dp_test_pak_get_from_ring("dp2T1", + &bufs[0], + 64); + for (i = 0; i < count; i++) + rte_pktmbuf_free(bufs[i]); + + return count; +} + +/* + * We want to simulate the following setup: + * A 50mbit shaper, with a 6520 byte burst per m/s burst + * A queue of 1024 packets. + * + * This is what the burst_cmds above give us. + * + * Qos expects 24 bytes other than IP + payload + * + * 50000000 == circuit speed + * + * Sending packets at 99.5% of the circuit speed: + * 49500000 == offered load (99.5%) + * 49500000/8 = 6187500 bytes per sec + * 6187500/512 + 24 = 11543 pps + * 11543 pps = 86.6325 usec per packet. + * + * 11543 *15 = 173145 pkts in 15 secs. + */ +DP_START_TEST_DONT_RUN(qos_burst1, qos_burst1) +{ + bool debug = (dp_test_debug_get() == 2 ? true : false); + int received = 0; + int sent = 0; + int sleep_count = 0; + struct timeval start, now, diff; + float time_diff_usec; + float time_per_pak = 86.6325; + int total_to_send = 173145; + int should_have_sent; + int to_send; + int sent_burst_sizes[33] = { 0 }; + + qos_lib_test_setup(); + + dp_test_qos_debug(debug); + + /* Set up QoS config on dp2T1 */ + dp_test_qos_attach_config_to_if("dp2T1", burst_cmds, debug); + + dp_test_qos_check_for_zero_counters("dp2T1", debug); + + /* Send packets - but with no verify for speed */ + gettimeofday(&start, NULL); + while (true) { + /* + * Spin round sending packets, trying to make sure + * that we average them out at the required speed. + */ + gettimeofday(&now, NULL); + timersub(&now, &start, &diff); + time_diff_usec = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; + should_have_sent = (int)(time_diff_usec / time_per_pak); + + to_send = should_have_sent - sent; + if (to_send) { + if (to_send > 32) { + dp_test_qos_burst_send(32, false); + sent += 32; + sent_burst_sizes[32]++; + } else { + dp_test_qos_burst_send(to_send, false); + sent += to_send; + sent_burst_sizes[to_send]++; + } + } + received += dp_test_qos_burst_receive("dp2T1"); + + if (sent >= total_to_send) + break; + } + + while (sent != received && sleep_count < 100000) { + received += dp_test_qos_burst_receive("dp2T1"); + usleep(1); + sleep_count++; + } + + /* And fail if there are any drops */ + dp_test_fail_unless(received == sent, + "wrong counts: sent %d rx %d missing %d", + sent, received, sent - received); + + /* Cleanup */ + dp_test_qos_delete_config_from_if("dp2T1", debug); + dp_test_qos_debug(false); + + qos_lib_test_teardown(); + +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_qos_class.c b/tests/whole_dp/src/dp_test_qos_class.c new file mode 100644 index 00000000..590ad59c --- /dev/null +++ b/tests/whole_dp/src/dp_test_qos_class.c @@ -0,0 +1,1098 @@ +/** + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. + * + * @file dp_test_qos_class.c + * @brief Basic QoS classification tests + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +#include +#include + +#include "ip6_funcs.h" +#include "ip_funcs.h" +#include "in_cksum.h" +#include "if_var.h" +#include "main.h" +#include "fal_plugin.h" + +#include "dp_test.h" +#include "dp_test_str.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_exp.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_console.h" +#include "dp_test_controller.h" +#include "dp_test_json_utils.h" +#include "dp_test_npf_lib.h" +#include "dp_test_npf_fw_lib.h" +#include "dp_test_npf_sess_lib.h" + +#include "dp_test_qos_lib.h" + +DP_DECL_TEST_SUITE(qos_class); + +DP_DECL_TEST_CASE(qos_class, qos_class_basic, NULL, NULL); + +static int +dp_test_qos_class_hw_switch_if(const char *if_name, bool enable) +{ + char real_if_name[IFNAMSIZ]; + struct ifnet *ifp; + int ret = 0; + + /* Convert the test if-name into a real if-name */ + dp_test_intf_real(if_name, real_if_name); + + ifp = dp_ifnet_byifname(real_if_name); + if (ifp) + ifp->hw_forwarding = enable; + else + ret = -1; + + return ret; +} + +/* + * class_basic uses a minimal QoS configuration + * + * class_basic_cmds generate from: + * + * set policy qos ingress-map in-map-1 pcp 0 designation 0 drop-prec green + * set policy qos ingress-map in-map-1 pcp 1 designation 1 drop-prec green + * set policy qos ingress-map in-map-1 pcp 2 designation 2 drop-prec green + * set policy qos ingress-map in-map-1 pcp 3 designation 3 drop-prec green + * set policy qos ingress-map in-map-1 pcp 4 designation 4 drop-prec green + * set policy qos ingress-map in-map-1 pcp 5 designation 5 drop-prec green + * set policy qos ingress-map in-map-1 pcp 6 designation 6 drop-prec green + * set policy qos ingress-map in-map-1 pcp 7 designation 7 drop-prec green + * set interface dataplane dpX switch-group port-parameters + * policy ingress-map in-map-1 + */ + +const char *ingress_map_cmds[] = { + "ingress-map in-map-1 pcp 0 designation 0 drop-prec green", + "ingress-map in-map-1 pcp 1 designation 1 drop-prec green", + "ingress-map in-map-1 pcp 2 designation 2 drop-prec green", + "ingress-map in-map-1 pcp 3 designation 3 drop-prec green", + "ingress-map in-map-1 pcp 4 designation 4 drop-prec green", + "ingress-map in-map-1 pcp 5 designation 5 drop-prec green", + "ingress-map in-map-1 pcp 6 designation 6 drop-prec green", + "ingress-map in-map-1 pcp 7 designation 7 drop-prec green", + "ingress-map in-map-1 complete", +}; + +static const char expected_ingress_map_vlan_str[] = +"{\"dpT21\":" + "{\"ingress-maps\":" + "[{\"vlan\":0," + "\"fal-qos-dot1p2des\":" + "[{\"pcp\":0,\"des\":0,\"dp\":0}," + "{\"pcp\":1,\"des\":1,\"dp\":0}," + "{\"pcp\":2,\"des\":2,\"dp\":0}," + "{\"pcp\":3,\"des\":3,\"dp\":0}," + "{\"pcp\":4,\"des\":4,\"dp\":0}," + "{\"pcp\":5,\"des\":5,\"dp\":0}," + "{\"pcp\":6,\"des\":6,\"dp\":0}," + "{\"pcp\":7,\"des\":7,\"dp\":0}]" + "}]" + "}" +"}"; + +static const char expected_ingress_map_str[] = +"{\"ingress-maps\":" + "[{\"name\":\"in-map-1\"," + "\"type\":\"pcp\"," + "\"system-default\":false," + "\"map\":" + "[{\"designation\":0,\"DPs\":[{\"DP\":0,\"pcp/mask\":1}]}," + "{\"designation\":1,\"DPs\":[{\"DP\":0,\"pcp/mask\":2}]}," + "{\"designation\":2,\"DPs\":[{\"DP\":0,\"pcp/mask\":4}]}," + "{\"designation\":3,\"DPs\":[{\"DP\":0,\"pcp/mask\":8}]}," + "{\"designation\":4,\"DPs\":[{\"DP\":0,\"pcp/mask\":16}]}," + "{\"designation\":5,\"DPs\":[{\"DP\":0,\"pcp/mask\":32}]}," + "{\"designation\":6,\"DPs\":[{\"DP\":0,\"pcp/mask\":64}]}," + "{\"designation\":7,\"DPs\":[{\"DP\":0,\"pcp/mask\":128}]}]" + "}]" +"}"; + +DP_START_TEST(qos_class_basic, class_basic) +{ + bool debug = (dp_test_debug_get() == 2 ? true : false); + int ret; + + qos_lib_test_setup(); + + dp_test_qos_debug(debug); + + dp_test_netlink_set_interface_l2("dp1sw_port_0_0"); + + dp_test_intf_switch_create("switch0"); + dp_test_intf_switch_add_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_add_port("switch0", "dp2T1"); + + ret = dp_test_qos_class_hw_switch_if("dp2T1", true); + dp_test_fail_unless((ret == 0), + "failed to set hw-switching on dp2T1\n"); + + dp_test_qos_send_config(ingress_map_cmds, expected_ingress_map_str, + "qos show ingress-maps", 9, debug); + dp_test_qos_send_if_cmd("dp2T1", "ingress-map in-map-1 vlan 0", + expected_ingress_map_vlan_str, + "qos show platform", + debug); + /* Cleanup */ + dp_test_qos_send_if_cmd("dp2T1", "ingress-map in-map-1 vlan 0 delete", + "{ }", "qos show platform", debug); + dp_test_qos_send_cmd("ingress-map in-map-1 delete", + "{ }", + "qos show ingress-maps", debug); + + ret = dp_test_qos_class_hw_switch_if("dp2T1", false); + dp_test_fail_unless((ret == 0), + "failed to clear hw-switching on dp2T1\n"); + + dp_test_intf_switch_remove_port("switch0", "dp2T1"); + dp_test_intf_switch_remove_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_del("switch0"); + + dp_test_netlink_del_interface_l2("dp1sw_port_0_0"); + + dp_test_qos_debug(false); + + qos_lib_test_teardown(); + +} DP_END_TEST; + +/* + * class_multi_maps uses several 2 ingress maps + * + * cmds generate from: + * + * set policy qos ingress-map in-map-1 pcp 0 designation 0 drop-prec green + * set policy qos ingress-map in-map-1 pcp 1 designation 1 drop-prec green + * set policy qos ingress-map in-map-1 pcp 2 designation 2 drop-prec green + * set policy qos ingress-map in-map-1 pcp 3 designation 4 drop-prec green + * set policy qos ingress-map in-map-1 pcp 4 designation 4 drop-prec yellow + * set policy qos ingress-map in-map-1 pcp 5 designation 5 drop-prec green + * set policy qos ingress-map in-map-1 pcp 6 designation 7 drop-prec green + * set policy qos ingress-map in-map-1 pcp 7 designation 7 drop-prec yellow + * set interface dataplane dpX switch-group port-parameters + * policy ingress-map in-map-1 + * set resources group dscp-group rt dscp 38 + * set resources group dscp-group voice dscp 46 + * set resources group dscp-group control dscp 48 + * set resources group dscp-group data1 dscp 24 + * set resources group dscp-group data2 dscp 0 + * set policy qos ingress-map in-map-2 dscp-group rt designation 0 + * drop-prec green + * set policy qos ingress-map in-map-2 dscp-group voice designation 1 + * drop-prec green + * set policy qos ingress-map in-map-2 dscp-group control designation 2 + * drop-prec green + * set policy qos ingress-map in-map-2 dscp-group data1 designation 3 + * drop-prec green + * set policy qos ingress-map in-map-2 dscp-group data2 designation 4 + * drop-prec green + * set interface dataplane dpX switch-group port-parameters + * policy ingress-map in-map-2 + */ + +const char *ingress_multi_map_cmds1[] = { + "ingress-map in-map-1 pcp 0 designation 0 drop-prec green", + "ingress-map in-map-1 pcp 1 designation 1 drop-prec green", + "ingress-map in-map-1 pcp 2 designation 2 drop-prec green", + "ingress-map in-map-1 pcp 3 designation 4 drop-prec green", + "ingress-map in-map-1 pcp 4 designation 4 drop-prec yellow", + "ingress-map in-map-1 pcp 5 designation 5 drop-prec green", + "ingress-map in-map-1 pcp 6 designation 7 drop-prec green", + "ingress-map in-map-1 pcp 7 designation 7 drop-prec yellow", + "ingress-map in-map-1 complete", +}; + +const char *ingress_multi_map_cmds2[] = { + "ingress-map in-map-2 dscp-group rt designation 0 drop-prec green", + "ingress-map in-map-2 dscp-group voice designation 1 drop-prec green", + "ingress-map in-map-2 dscp-group control designation 2 drop-prec green", + "ingress-map in-map-2 dscp-group data1 designation 3 drop-prec green", + "ingress-map in-map-2 dscp-group data2 designation 4 drop-prec green", + "ingress-map in-map-2 complete", +}; + + +static const char expected_ingress_multi_map_cmds1[] = +"{\"ingress-maps\":" + "[{\"name\":\"in-map-1\"," + "\"type\":\"pcp\"," + "\"system-default\":false," + "\"map\":" + "[{\"designation\":0,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":1}]}," + "{\"designation\":1,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":2}]}," + "{\"designation\":2,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":4}]}," + "{\"designation\":4,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":8}," + "{\"DP\":1,\"pcp/mask\":16}]}," + "{\"designation\":5,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":32}]}," + "{\"designation\":7,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":64}," + "{\"DP\":1,\"pcp/mask\":128}]}]" + "}]" +"}"; + +static const char expected_ingress_multi_map_cmds2[] = +"{\"ingress-maps\":" + "[{\"name\":\"in-map-2\"," + "\"type\":\"dscp\"," + "\"system-default\":false," + "\"map\":" + "[{\"designation\":0,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":70093866270720}]}," + "{\"designation\":1,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":211106232532992}]}," + "{\"designation\":2,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":9223372036854775807}]}," + "{\"designation\":3,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":274861129728}]}," + "{\"designation\":4,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":16777215}]}]" + "}]" +"}"; + +const char *ingress_rg_add_cmds[] = { + "npf-cfg add dscp-group:rt 0 38;39;40;41;42;43;44;45", + "npf-cfg add dscp-group:voice 0 46;47", + "npf-cfg add dscp-group:control 0 48;49;50;51;52;53;54;55;56;57;58;59;60;61;62;63", + "npf-cfg add dscp-group:data1 0 24;25;26;27;28;29;30;31;32;33;34;35;36;37", + "npf-cfg add dscp-group:data2 0 0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16;17;18;19;20;21;22;23", + "npf-cfg commit" +}; + +const char *ingress_rg_del_cmds[] = { + "npf-cfg delete dscp-group:rt", + "npf-cfg delete dscp-group:voice", + "npf-cfg delete dscp-group:control", + "npf-cfg delete dscp-group:data1", + "npf-cfg delete dscp-group:data2", + "npf-cfg commit" +}; + +static const char expected_ingress_multi_map_vlan_cmds[] = +"{\"dpT21\":" + "{\"ingress-maps\":" + "[{\"vlan\":0," + "\"fal-qos-dot1p2des\":" + "[{\"pcp\":0,\"des\":0,\"dp\":0}," + "{\"pcp\":1,\"des\":1,\"dp\":0}," + "{\"pcp\":2,\"des\":2,\"dp\":0}," + "{\"pcp\":3,\"des\":4,\"dp\":0}," + "{\"pcp\":4,\"des\":4,\"dp\":1}," + "{\"pcp\":5,\"des\":5,\"dp\":0}," + "{\"pcp\":6,\"des\":7,\"dp\":0}," + "{\"pcp\":7,\"des\":7,\"dp\":1}]}," + "{\"vlan\":10," + "\"fal-qos-dscp2des\":" + "[{\"dscp\":0,\"des\":4,\"dp\":0}," + "{\"dscp\":1,\"des\":4,\"dp\":0}," + "{\"dscp\":2,\"des\":4,\"dp\":0}," + "{\"dscp\":3,\"des\":4,\"dp\":0}," + "{\"dscp\":4,\"des\":4,\"dp\":0}," + "{\"dscp\":5,\"des\":4,\"dp\":0}," + "{\"dscp\":6,\"des\":4,\"dp\":0}," + "{\"dscp\":7,\"des\":4,\"dp\":0}," + "{\"dscp\":8,\"des\":4,\"dp\":0}," + "{\"dscp\":9,\"des\":4,\"dp\":0}," + "{\"dscp\":10,\"des\":4,\"dp\":0}," + "{\"dscp\":11,\"des\":4,\"dp\":0}," + "{\"dscp\":12,\"des\":4,\"dp\":0}," + "{\"dscp\":13,\"des\":4,\"dp\":0}," + "{\"dscp\":14,\"des\":4,\"dp\":0}," + "{\"dscp\":15,\"des\":4,\"dp\":0}," + "{\"dscp\":16,\"des\":4,\"dp\":0}," + "{\"dscp\":17,\"des\":4,\"dp\":0}," + "{\"dscp\":18,\"des\":4,\"dp\":0}," + "{\"dscp\":19,\"des\":4,\"dp\":0}," + "{\"dscp\":20,\"des\":4,\"dp\":0}," + "{\"dscp\":21,\"des\":4,\"dp\":0}," + "{\"dscp\":22,\"des\":4,\"dp\":0}," + "{\"dscp\":23,\"des\":4,\"dp\":0}," + "{\"dscp\":24,\"des\":3,\"dp\":0}," + "{\"dscp\":25,\"des\":3,\"dp\":0}," + "{\"dscp\":26,\"des\":3,\"dp\":0}," + "{\"dscp\":27,\"des\":3,\"dp\":0}," + "{\"dscp\":28,\"des\":3,\"dp\":0}," + "{\"dscp\":29,\"des\":3,\"dp\":0}," + "{\"dscp\":30,\"des\":3,\"dp\":0}," + "{\"dscp\":31,\"des\":3,\"dp\":0}," + "{\"dscp\":32,\"des\":3,\"dp\":0}," + "{\"dscp\":33,\"des\":3,\"dp\":0}," + "{\"dscp\":34,\"des\":3,\"dp\":0}," + "{\"dscp\":35,\"des\":3,\"dp\":0}," + "{\"dscp\":36,\"des\":3,\"dp\":0}," + "{\"dscp\":37,\"des\":3,\"dp\":0}," + "{\"dscp\":38,\"des\":0,\"dp\":0}," + "{\"dscp\":39,\"des\":0,\"dp\":0}," + "{\"dscp\":40,\"des\":0,\"dp\":0}," + "{\"dscp\":41,\"des\":0,\"dp\":0}," + "{\"dscp\":42,\"des\":0,\"dp\":0}," + "{\"dscp\":43,\"des\":0,\"dp\":0}," + "{\"dscp\":44,\"des\":0,\"dp\":0}," + "{\"dscp\":45,\"des\":0,\"dp\":0}," + "{\"dscp\":46,\"des\":1,\"dp\":0}," + "{\"dscp\":47,\"des\":1,\"dp\":0}," + "{\"dscp\":48,\"des\":2,\"dp\":0}," + "{\"dscp\":49,\"des\":2,\"dp\":0}," + "{\"dscp\":50,\"des\":2,\"dp\":0}," + "{\"dscp\":51,\"des\":2,\"dp\":0}," + "{\"dscp\":52,\"des\":2,\"dp\":0}," + "{\"dscp\":53,\"des\":2,\"dp\":0}," + "{\"dscp\":54,\"des\":2,\"dp\":0}," + "{\"dscp\":55,\"des\":2,\"dp\":0}," + "{\"dscp\":56,\"des\":2,\"dp\":0}," + "{\"dscp\":57,\"des\":2,\"dp\":0}," + "{\"dscp\":58,\"des\":2,\"dp\":0}," + "{\"dscp\":59,\"des\":2,\"dp\":0}," + "{\"dscp\":60,\"des\":2,\"dp\":0}," + "{\"dscp\":61,\"des\":2,\"dp\":0}," + "{\"dscp\":62,\"des\":2,\"dp\":0}," + "{\"dscp\":63,\"des\":2,\"dp\":0}]" + "}]" + "}" +"}"; + +DP_START_TEST(qos_class_basic, class_multimaps) +{ + bool debug = (dp_test_debug_get() == 2 ? true : false); + int ret, i; + + qos_lib_test_setup(); + + dp_test_qos_debug(debug); + + dp_test_netlink_set_interface_l2("dp1sw_port_0_0"); + + dp_test_intf_switch_create("switch0"); + dp_test_intf_switch_add_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_add_port("switch0", "dp2T1"); + + ret = dp_test_qos_class_hw_switch_if("dp2T1", true); + dp_test_fail_unless((ret == 0), + "failed to set hw-switching on dp2T1\n"); + + i = 0; + while (!strstr(ingress_rg_add_cmds[i], "npf-cfg commit")) { + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + ingress_rg_add_cmds[i++]); + } + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + ingress_rg_add_cmds[i++]); + dp_test_qos_send_config(ingress_multi_map_cmds1, + expected_ingress_multi_map_cmds1, + "qos show ingress-maps", 9, debug); + dp_test_qos_send_config(ingress_multi_map_cmds2, + expected_ingress_multi_map_cmds2, + "qos show ingress-maps", 6, debug); + dp_test_qos_send_if_cmd("dp2T1", "ingress-map in-map-1 vlan 0", + NULL, "", debug); + /* + * Validating "ingress-map in-map-1 vlan 0" + * as part of "ingress-map in-map-2 vlan 10" + */ + dp_test_qos_send_if_cmd("dp2T1", "ingress-map in-map-2 vlan 10", + expected_ingress_multi_map_vlan_cmds, + "qos show platform", debug); + + /* Cleanup */ + dp_test_qos_send_if_cmd("dp2T1", "ingress-map in-map-2 vlan 10 delete", + NULL, "", debug); + /* + * Validating "ingress-map in-map-2 vlan 10 delete" + * as part of "ingress-map in-map-1 vlan 0 delete" + */ + dp_test_qos_send_if_cmd("dp2T1", "ingress-map in-map-1 vlan 0 delete", + "{ }", "qos show platform", debug); + dp_test_qos_send_cmd("ingress-map in-map-1 delete", + expected_ingress_multi_map_cmds2, + "qos show ingress-maps", + debug); + dp_test_qos_send_cmd("ingress-map in-map-2 delete", + "{ }", + "qos show ingress-maps", + debug); + i = 0; + while (!strstr(ingress_rg_del_cmds[i], "npf-cfg commit")) { + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + ingress_rg_del_cmds[i++]); + } + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + ingress_rg_del_cmds[i++]); + + ret = dp_test_qos_class_hw_switch_if("dp2T1", false); + dp_test_fail_unless((ret == 0), + "failed to clear hw-switching on dp2T1\n"); + + dp_test_intf_switch_remove_port("switch0", "dp2T1"); + dp_test_intf_switch_remove_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_del("switch0"); + + dp_test_netlink_del_interface_l2("dp1sw_port_0_0"); + + dp_test_qos_debug(false); + + qos_lib_test_teardown(); + +} DP_END_TEST; + +/* + * class_map_multi_dps uses a minimal QoS configuration + * + * class_map_multi_dps generate from: + * + * set policy qos ingress-map in-map-1 pcp 0 designation 0 drop-prec green + * set policy qos ingress-map in-map-1 pcp 1 designation 1 drop-prec green + * set policy qos ingress-map in-map-1 pcp 2 designation 1 drop-prec yellow + * set policy qos ingress-map in-map-1 pcp 3 designation 3 drop-prec green + * set policy qos ingress-map in-map-1 pcp 4 designation 3 drop-prec yellow + * set policy qos ingress-map in-map-1 pcp 5 designation 5 drop-prec green + * set policy qos ingress-map in-map-1 pcp 6 designation 5 drop-prec yellow + * set policy qos ingress-map in-map-1 pcp 7 designation 5 drop-prec red + * set interface dataplane dpX switch-group port-parameters + * policy ingress-map in-map-1 + */ + +const char *ingress_map_dp_cmds[] = { + "ingress-map in-map-1 pcp 0 designation 0 drop-prec green", + "ingress-map in-map-1 pcp 1 designation 1 drop-prec green", + "ingress-map in-map-1 pcp 2 designation 2 drop-prec green", + "ingress-map in-map-1 pcp 3 designation 3 drop-prec green", + "ingress-map in-map-1 pcp 4 designation 3 drop-prec yellow", + "ingress-map in-map-1 pcp 5 designation 5 drop-prec green", + "ingress-map in-map-1 pcp 6 designation 5 drop-prec yellow", + "ingress-map in-map-1 pcp 7 designation 5 drop-prec red", + "ingress-map in-map-1 complete", +}; + +static const char expected_ingress_map_vlan_dp_cmds[] = +"{\"dpT21\":" + "{\"ingress-maps\":" + "[{\"vlan\":0," + "\"fal-qos-dot1p2des\":" + "[{\"pcp\":0,\"des\":0,\"dp\":0}," + "{\"pcp\":1,\"des\":1,\"dp\":0}," + "{\"pcp\":2,\"des\":2,\"dp\":0}," + "{\"pcp\":3,\"des\":3,\"dp\":0}," + "{\"pcp\":4,\"des\":3,\"dp\":1}," + "{\"pcp\":5,\"des\":5,\"dp\":0}," + "{\"pcp\":6,\"des\":5,\"dp\":1}," + "{\"pcp\":7,\"des\":5,\"dp\":2}]" + "}]" + "}" +"}"; + +static const char expected_ingress_map_dp_cmds[] = +"{\"ingress-maps\":" + "[{\"name\":\"in-map-1\"," + "\"type\":\"pcp\"," + "\"system-default\":false," + "\"map\":" + "[{\"designation\":0,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":1}]}," + "{\"designation\":1,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":2}]}," + "{\"designation\":2,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":4}]}," + "{\"designation\":3,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":8}," + "{\"DP\":1,\"pcp/mask\":16}]}," + "{\"designation\":5,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":32}," + "{\"DP\":1,\"pcp/mask\":64}," + "{\"DP\":2,\"pcp/mask\":128}]}]" + "}]" +"}"; + +DP_START_TEST(qos_class_basic, class_map_multi_dps) +{ + bool debug = (dp_test_debug_get() == 2 ? true : false); + int ret; + + qos_lib_test_setup(); + + dp_test_qos_debug(debug); + + dp_test_netlink_set_interface_l2("dp1sw_port_0_0"); + + dp_test_intf_switch_create("switch0"); + dp_test_intf_switch_add_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_add_port("switch0", "dp2T1"); + + ret = dp_test_qos_class_hw_switch_if("dp2T1", true); + dp_test_fail_unless((ret == 0), + "failed to set hw-switching on dp2T1\n"); + + dp_test_qos_send_config(ingress_map_dp_cmds, + expected_ingress_map_dp_cmds, + "qos show ingress-maps", 9, debug); + dp_test_qos_send_if_cmd("dp2T1", "ingress-map in-map-1 vlan 0", + expected_ingress_map_vlan_dp_cmds, + "qos show platform", debug); + + /* Cleanup */ + dp_test_qos_send_if_cmd("dp2T1", "ingress-map in-map-1 vlan 0 delete", + "{ }", "qos show platform", debug); + dp_test_qos_send_cmd("ingress-map in-map-1 delete", + "{ }", + "qos show ingress-maps", debug); + + ret = dp_test_qos_class_hw_switch_if("dp2T1", false); + dp_test_fail_unless((ret == 0), + "failed to clear hw-switching on dp2T1\n"); + + dp_test_intf_switch_remove_port("switch0", "dp2T1"); + dp_test_intf_switch_remove_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_del("switch0"); + + dp_test_netlink_del_interface_l2("dp1sw_port_0_0"); + + dp_test_qos_debug(false); + + qos_lib_test_teardown(); + +} DP_END_TEST; + +/* + * class_single_sysdef uses a minimal QoS configuration + * + * class_single_sysdef generate from: + * + * set policy qos ingress-map in-map-1 pcp 0 designation 0 + * set policy qos ingress-map in-map-1 system-default + * set policy qos ingress-map in-map-2 pcp 3 designation 3 + * set policy qos ingress-map in-map-2 system-default + */ + +const char *ingress_sysdef1[] = { + "ingress-map in-map-1 pcp 0 designation 0 drop-prec green", + "ingress-map in-map-1 pcp 1 designation 0 drop-prec yellow", + "ingress-map in-map-1 pcp 2 designation 0 drop-prec red", + "ingress-map in-map-1 pcp 3 designation 1 drop-prec green", + "ingress-map in-map-1 pcp 4 designation 1 drop-prec yellow", + "ingress-map in-map-1 pcp 5 designation 1 drop-prec red", + "ingress-map in-map-1 pcp 6 designation 2 drop-prec green", + "ingress-map in-map-1 pcp 7 designation 2 drop-prec yellow", + "ingress-map in-map-1 system-default", + "ingress-map in-map-1 complete" +}; + +const char *ingress_sysdef2[] = { + "ingress-map in-map-2 pcp 0 designation 2 drop-prec green", + "ingress-map in-map-2 pcp 1 designation 2 drop-prec yellow", + "ingress-map in-map-2 pcp 2 designation 2 drop-prec red", + "ingress-map in-map-2 pcp 3 designation 3 drop-prec green", + "ingress-map in-map-2 pcp 4 designation 3 drop-prec yellow", + "ingress-map in-map-2 pcp 5 designation 3 drop-prec red", + "ingress-map in-map-2 pcp 6 designation 4 drop-prec green", + "ingress-map in-map-2 pcp 7 designation 4 drop-prec yellow", + "ingress-map in-map-2 system-default", +}; + +static const char expected_ingress_sysdef1[] = +"{\"ingress-maps\":" + "[{\"name\":\"in-map-1\"," + "\"type\":\"pcp\"," + "\"system-default\":true," + "\"map\":" + "[{\"designation\":0,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":1}," + "{\"DP\":1,\"pcp/mask\":2}," + "{\"DP\":2,\"pcp/mask\":4}]}," + "{\"designation\":1,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":8}," + "{\"DP\":1,\"pcp/mask\":16}," + "{\"DP\":2,\"pcp/mask\":32}]}," + "{\"designation\":2,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":64}," + "{\"DP\":1,\"pcp/mask\":128}]}]" + "}]" +"}"; + +static const char expected_ingress_sysdef2_false[] = +"{\"ingress-maps\":" + "[{\"name\":\"in-map-2\"," + "\"type\":\"pcp\"," + "\"system-default\":false," + "\"map\":" + "[{\"designation\":2,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":1}," + "{\"DP\":1,\"pcp/mask\":2}," + "{\"DP\":2,\"pcp/mask\":4}]}," + "{\"designation\":3,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":8}," + "{\"DP\":1,\"pcp/mask\":16}," + "{\"DP\":2,\"pcp/mask\":32}]}," + "{\"designation\":4,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":64}," + "{\"DP\":1,\"pcp/mask\":128}]}]" + "}]" +"}"; + +static const char expected_ingress_sysdef2_true[] = +"{\"ingress-maps\":" + "[{\"name\":\"in-map-2\"," + "\"type\":\"pcp\"," + "\"system-default\":true," + "\"map\":" + "[{\"designation\":2,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":1}," + "{\"DP\":1,\"pcp/mask\":2}," + "{\"DP\":2,\"pcp/mask\":4}]}," + "{\"designation\":3,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":8}," + "{\"DP\":1,\"pcp/mask\":16}," + "{\"DP\":2,\"pcp/mask\":32}]}," + "{\"designation\":4,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":64}," + "{\"DP\":1,\"pcp/mask\":128}]}]" + "}]" +"}"; + + +DP_START_TEST(qos_class_basic, class_single_sysdef) +{ + bool debug = (dp_test_debug_get() == 2 ? true : false); + + qos_lib_test_setup(); + + dp_test_qos_debug(debug); + + dp_test_qos_send_config(ingress_sysdef1, expected_ingress_sysdef1, + "qos show ingress-maps", 10, debug); + /* Second system-default should fail */ + dp_test_set_config_err(-EINVAL); + dp_test_qos_send_config(ingress_sysdef2, + expected_ingress_sysdef2_false, + "qos show ingress-maps", 9, debug); + dp_test_qos_send_cmd("ingress-map in-map-1 delete", + expected_ingress_sysdef2_false, + "qos show ingress-maps", debug); + /* Now it should succeed */ + dp_test_qos_send_cmd("ingress-map in-map-2 system-default", NULL, + "", debug); + dp_test_qos_send_cmd("ingress-map in-map-2 complete", + expected_ingress_sysdef2_true, + "qos show ingress-maps", debug); + + /* Cleanup */ + dp_test_qos_send_cmd("ingress-map in-map-2 delete", + "{ }", + "qos show ingress-maps", debug); + + dp_test_qos_debug(false); + + qos_lib_test_teardown(); + +} DP_END_TEST; + +/* + * class_map_to_policy setup an ingress map and matching policy + * + * cmds generate from: + * + * set resources group dscp-group rt dscp 38 + * set resources group dscp-group voice dscp 46 + * set resources group dscp-group control dscp 48 + * set resources group dscp-group data1 dscp 24 + * set resources group dscp-group data2 dscp 0 + * set policy qos ingress-map in-map-2 dscp-group rt designation 0 + * drop-prec green + * set policy qos ingress-map in-map-2 dscp-group voice designation 1 + * drop-prec green + * set policy qos ingress-map in-map-2 dscp-group control designation 2 + * drop-prec green + * set policy qos ingress-map in-map-2 dscp-group data1 designation 3 + * drop-prec green + * set policy qos ingress-map in-map-2 dscp-group data2 designation 4 + * drop-prec green + * set interface dataplane dpX switch-group port-parameters + * policy ingress-map in-map-2 + * set policy qos name foo shaper default def + * set policy qos name foo shaper profile def map designation 0 to 0 + * set policy qos name foo shaper profile def map designation 1 to 2 + * set policy qos name foo shaper profile def map designation 2 to 5 + * set policy qos name foo shaper profile def map designation 3 to 6 + * set policy qos name foo shaper profile def map designation 4 to 7 + * set policy qos name foo shaper profile def queue 0 traffic-class 0 + * set policy qos name foo shaper profile def queue 2 traffic-class 1 + * set policy qos name foo shaper profile def queue 5 traffic-class 2 + * set policy qos name foo shaper profile def queue 6 traffic-class 3 + * set policy qos name foo shaper profile def queue 7 traffic-class 3 + * set interface dataplane dpX switch-group port-parameters + * policy qos foo + */ + +const char *ingress_map_2_pol_cmds[] = { + "ingress-map in-map-2 dscp-group rt designation 0 drop-prec green", + "ingress-map in-map-2 dscp-group voice designation 1 drop-prec green", + "ingress-map in-map-2 dscp-group control designation 2 drop-prec green", + "ingress-map in-map-2 dscp-group data1 designation 3 drop-prec green", + "ingress-map in-map-2 dscp-group data2 designation 4 drop-prec green", + "ingress-map in-map-2 complete", +}; + +const char *ingress_policy_cmds[] = { + "port subports 1 pipes 1 profiles 1 overhead 24 ql_bytes", + "subport 0 rate 1250000000 size 0 period 40", + "subport 0 queue 0 percent 100 size 0", + "param subport 0 0", + "subport 0 queue 1 percent 100 size 0", + "param subport 0 1", + "subport 0 queue 2 percent 100 size 0", + "param subport 0 2", + "subport 0 queue 3 percent 100 size 0", + "param subport 0 3", + "vlan 0 0", + "profile 0 percent 100 size 0 period 10", + "profile 0 queue 0 percent 100 size 0", + "profile 0 queue 2 percent 100 size 0", + "profile 0 queue 5 percent 100 size 0", + "profile 0 queue 6 percent 100 size 0", + "profile 0 queue 7 percent 100 size 0", + "profile 0 queue 0 wrr-weight 1 0", + "profile 0 queue 0x1 wrr-weight 1 2", + "profile 0 queue 0x2 wrr-weight 1 5", + "profile 0 queue 0x3 wrr-weight 1 6", + "profile 0 queue 0x7 wrr-weight 1 7", + "profile 0 designation 0 queue 0", + "profile 0 designation 1 queue 0x1", + "profile 0 designation 2 queue 0x2", + "profile 0 designation 3 queue 0x3", + "profile 0 designation 4 queue 0x7", + "pipe 0 0 0", + "enable", +}; + +static const char expected_ingress_map_2_pol[] = +"{\"ingress-maps\":" + "[{\"name\":\"in-map-2\"," + "\"type\":\"dscp\"," + "\"system-default\":false," + "\"map\":" + "[{\"designation\":0,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":70093866270720}]}," + "{\"designation\":1,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":211106232532992}]}," + "{\"designation\":2,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":9223372036854775807}]}," + "{\"designation\":3,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":274861129728}]}," + "{\"designation\":4,\"DPs\":" + "[{\"DP\":0,\"pcp/mask\":16777215}]}]" + "}]" +"}"; + +DP_START_TEST(qos_class_basic, class_map_to_policy) +{ + bool debug = (dp_test_debug_get() == 2 ? true : false); + int ret, i; + json_object *j_obj; + + qos_lib_test_setup(); + + dp_test_qos_debug(debug); + + dp_test_netlink_set_interface_l2("dp1sw_port_0_0"); + + dp_test_intf_switch_create("switch0"); + dp_test_intf_switch_add_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_add_port("switch0", "dp2T1"); + dp_test_intf_switch_add_port("switch0", "dp2T2"); + + ret = dp_test_qos_class_hw_switch_if("dp2T1", true); + dp_test_fail_unless((ret == 0), + "failed to set hw-switching on dp2T1\n"); + + ret = dp_test_qos_class_hw_switch_if("dp2T2", true); + dp_test_fail_unless((ret == 0), + "failed to set hw-switching on dp2T2\n"); + + i = 0; + while (!strstr(ingress_rg_add_cmds[i], "npf-cfg commit")) { + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + ingress_rg_add_cmds[i++]); + } + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + ingress_rg_add_cmds[i++]); + dp_test_qos_send_config(ingress_map_2_pol_cmds, + expected_ingress_map_2_pol, + "qos show ingress-maps", 6, debug); + dp_test_qos_attach_config_to_if("dp2T2", ingress_policy_cmds, debug); + + /* + * Check the designator values in the queue objects match the + * configured values + * + * Although a designation value of 0 is valid we use to indicate + * that the value be ignored for testing purposes so skip. + */ + j_obj = dp_test_qos_hw_get_json_queue("dp2T2", 0, 0, 0, 0, debug); + dp_test_qos_hw_check_queue(j_obj, 0, 64, 0, 0, debug); + json_object_put(j_obj); + + j_obj = dp_test_qos_hw_get_json_queue("dp2T2", 0, 0, 1, 0, debug); + dp_test_qos_hw_check_queue(j_obj, 0, 64, 0, 1, debug); + json_object_put(j_obj); + + j_obj = dp_test_qos_hw_get_json_queue("dp2T2", 0, 0, 2, 0, debug); + dp_test_qos_hw_check_queue(j_obj, 0, 64, 0, 2, debug); + json_object_put(j_obj); + + j_obj = dp_test_qos_hw_get_json_queue("dp2T2", 0, 0, 3, 0, debug); + dp_test_qos_hw_check_queue(j_obj, 0, 64, 0, 3, debug); + json_object_put(j_obj); + + j_obj = dp_test_qos_hw_get_json_queue("dp2T2", 0, 0, 3, 1, debug); + dp_test_qos_hw_check_queue(j_obj, 1, 64, 1, 4, debug); + json_object_put(j_obj); + + /* Cleanup */ + dp_test_qos_send_if_cmd("dp2T2", "disable", NULL, "", debug); + dp_test_qos_send_cmd("ingress-map in-map-2 delete", + "{ }", + "qos show ingress-maps", debug); + i = 0; + while (!strstr(ingress_rg_del_cmds[i], "npf-cfg commit")) { + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + ingress_rg_del_cmds[i++]); + } + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + ingress_rg_del_cmds[i++]); + + ret = dp_test_qos_class_hw_switch_if("dp2T1", false); + dp_test_fail_unless((ret == 0), + "failed to clear hw-switching on dp2T1\n"); + + ret = dp_test_qos_class_hw_switch_if("dp2T2", false); + dp_test_fail_unless((ret == 0), + "failed to clear hw-switching on dp2T2\n"); + + dp_test_intf_switch_remove_port("switch0", "dp2T1"); + dp_test_intf_switch_remove_port("switch0", "dp2T2"); + dp_test_intf_switch_remove_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_del("switch0"); + + dp_test_netlink_del_interface_l2("dp1sw_port_0_0"); + + dp_test_qos_debug(false); + + qos_lib_test_teardown(); + +} DP_END_TEST; + +/* + * class_policy_skip_des Don't use consecutive designations + * + * cmds generate from: + * + * set policy qos name foo shaper default def + * set policy qos name foo shaper profile def map designation 0 to 0 + * set policy qos name foo shaper profile def map designation 2 to 2 + * set policy qos name foo shaper profile def map designation 5 to 5 + * set policy qos name foo shaper profile def map designation 7 to 6 + * set policy qos name foo shaper profile def queue 0 traffic-class 0 + * set policy qos name foo shaper profile def queue 2 traffic-class 1 + * set policy qos name foo shaper profile def queue 5 traffic-class 2 + * set policy qos name foo shaper profile def queue 6 traffic-class 3 + * set interface dataplane dpX switch-group port-parameters + * policy qos foo + */ + +const char *ingress_policy_skip_des_cmds[] = { + "port subports 1 pipes 1 profiles 1 overhead 24 ql_bytes", + "subport 0 rate 1250000000 size 0 period 40", + "subport 0 queue 0 percent 100 size 0", + "param subport 0 0", + "subport 0 queue 1 percent 100 size 0", + "param subport 0 1", + "subport 0 queue 2 percent 100 size 0", + "param subport 0 2", + "subport 0 queue 3 percent 100 size 0", + "param subport 0 3", + "vlan 0 0", + "profile 0 percent 100 size 0 period 10", + "profile 0 queue 0 percent 100 size 0", + "profile 0 queue 2 percent 100 size 0", + "profile 0 queue 5 percent 100 size 0", + "profile 0 queue 6 percent 100 size 0", + "profile 0 queue 0 wrr-weight 1 0", + "profile 0 queue 0x1 wrr-weight 1 2", + "profile 0 queue 0x2 wrr-weight 1 5", + "profile 0 queue 0x3 wrr-weight 1 6", + "profile 0 designation 0 queue 0", + "profile 0 designation 2 queue 0x1", + "profile 0 designation 5 queue 0x2", + "profile 0 designation 7 queue 0x3", + "pipe 0 0 0", + "enable", +}; + +DP_START_TEST(qos_class_basic, class_policy_skip_des) +{ + bool debug = (dp_test_debug_get() == 2 ? true : false); + int ret; + json_object *j_obj; + + qos_lib_test_setup(); + + dp_test_qos_debug(debug); + + dp_test_netlink_set_interface_l2("dp1sw_port_0_0"); + + dp_test_intf_switch_create("switch0"); + dp_test_intf_switch_add_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_add_port("switch0", "dp2T1"); + + ret = dp_test_qos_class_hw_switch_if("dp2T1", true); + dp_test_fail_unless((ret == 0), + "failed to set hw-switching on dp2T1\n"); + + dp_test_qos_attach_config_to_if("dp2T1", ingress_policy_skip_des_cmds, + debug); + + /* + * Check the designator values in the queue objects match the + * configured values + */ + j_obj = dp_test_qos_hw_get_json_queue("dp2T1", 0, 0, 0, 0, debug); + dp_test_qos_hw_check_queue(j_obj, 0, 64, 0, 0, debug); + json_object_put(j_obj); + + j_obj = dp_test_qos_hw_get_json_queue("dp2T1", 0, 0, 1, 0, debug); + dp_test_qos_hw_check_queue(j_obj, 0, 64, 0, 2, debug); + json_object_put(j_obj); + + j_obj = dp_test_qos_hw_get_json_queue("dp2T1", 0, 0, 2, 0, debug); + dp_test_qos_hw_check_queue(j_obj, 0, 64, 0, 5, debug); + json_object_put(j_obj); + + j_obj = dp_test_qos_hw_get_json_queue("dp2T1", 0, 0, 3, 0, debug); + dp_test_qos_hw_check_queue(j_obj, 0, 64, 0, 7, debug); + json_object_put(j_obj); + + /* Cleanup */ + dp_test_qos_send_if_cmd("dp2T1", "disable", NULL, "", debug); + + ret = dp_test_qos_class_hw_switch_if("dp2T1", false); + dp_test_fail_unless((ret == 0), + "failed to clear hw-switching on dp2T1\n"); + + dp_test_intf_switch_remove_port("switch0", "dp2T1"); + dp_test_intf_switch_remove_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_del("switch0"); + + dp_test_netlink_del_interface_l2("dp1sw_port_0_0"); + + dp_test_qos_debug(false); + + qos_lib_test_teardown(); + +} DP_END_TEST; + +/* + * class_policy_vci A test policy used for VCI + * + * cmds generate from: + * + * set policy qos name pol-1 shaper default profile-1 + * set policy qos name pol-1 shaper profile profile-1 + * set interface dataplane dpX switch-group port-parameters + * policy qos pol-1 + */ + +const char *class_policy_vci[] = { + "port subports 1 pipes 1 profiles 1 overhead 24 ql_bytes", + "subport 0 rate 1250000000 size 16000 period 40", + "subport 0 queue 0 percent 100 size 0", + "param subport 0 0 limit packets 64", + "subport 0 queue 1 percent 100 size 0", + "param subport 0 1 limit packets 64", + "subport 0 queue 2 percent 100 size 0", + "param subport 0 2 limit packets 64", + "subport 0 queue 3 percent 100 size 0", + "param subport 0 3 limit packets 64", + "vlan 0 0", + "profile 0 percent 100 size 16000 period 10", + "profile 0 queue 0 percent 100 size 0", + "profile 0 queue 1 percent 100 size 0", + "profile 0 queue 2 percent 100 size 0", + "profile 0 queue 3 percent 100 size 0", + "pipe 0 0 0", + "enable", +}; + +DP_START_TEST(qos_class_basic, class_policy_vci) +{ + bool debug = (dp_test_debug_get() == 2 ? true : false); + int ret; + json_object *j_obj; + + qos_lib_test_setup(); + + dp_test_qos_debug(debug); + + dp_test_netlink_set_interface_l2("dp1sw_port_0_0"); + + dp_test_intf_switch_create("switch0"); + dp_test_intf_switch_add_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_add_port("switch0", "dp2T1"); + + ret = dp_test_qos_class_hw_switch_if("dp2T1", true); + dp_test_fail_unless((ret == 0), + "failed to set hw-switching on dp2T1\n"); + + dp_test_qos_attach_config_to_if("dp2T1", class_policy_vci, + debug); + + /* + * Check the designator values in the queue objects match the + * configured values + */ + j_obj = dp_test_qos_hw_get_json_queue("dp2T1", 0, 0, 0, 0, debug); + dp_test_qos_hw_check_queue(j_obj, 0, 64, 0, 0, debug); + json_object_put(j_obj); + + j_obj = dp_test_qos_hw_get_json_queue("dp2T1", 0, 0, 1, 0, debug); + dp_test_qos_hw_check_queue(j_obj, 0, 64, 0, 0, debug); + json_object_put(j_obj); + + j_obj = dp_test_qos_hw_get_json_queue("dp2T1", 0, 0, 2, 0, debug); + dp_test_qos_hw_check_queue(j_obj, 0, 64, 0, 0, debug); + json_object_put(j_obj); + + j_obj = dp_test_qos_hw_get_json_queue("dp2T1", 0, 0, 3, 0, debug); + dp_test_qos_hw_check_queue(j_obj, 0, 64, 0, 0, debug); + json_object_put(j_obj); + + /* Cleanup */ + dp_test_qos_send_if_cmd("dp2T1", "disable", NULL, "", debug); + + ret = dp_test_qos_class_hw_switch_if("dp2T1", false); + dp_test_fail_unless((ret == 0), + "failed to clear hw-switching on dp2T1\n"); + + dp_test_intf_switch_remove_port("switch0", "dp2T1"); + dp_test_intf_switch_remove_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_del("switch0"); + + dp_test_netlink_del_interface_l2("dp1sw_port_0_0"); + + dp_test_qos_debug(false); + + qos_lib_test_teardown(); + +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_qos_ext_buf_monitor.c b/tests/whole_dp/src/dp_test_qos_ext_buf_monitor.c new file mode 100644 index 00000000..3562662a --- /dev/null +++ b/tests/whole_dp/src/dp_test_qos_ext_buf_monitor.c @@ -0,0 +1,382 @@ +/* + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * QoS external buffer monitor tests + */ + +#include + +#include "ip6_funcs.h" +#include "ip_funcs.h" +#include "in_cksum.h" +#include "if_var.h" +#include "main.h" +#include "qos_ext_buf_monitor.h" + +#include "dp_test.h" +#include "dp_test_str.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_exp.h" +#include "dp_test_lib_pkt.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test/dp_test_macros.h" +#include "dp_test_console.h" +#include "dp_test_json_utils.h" + +struct qos_counters { + uint32_t buf_free; + uint32_t dropped; + enum qos_ext_buf_state state; +}; + +DP_DECL_TEST_SUITE(qos_ext_buf_monitor); + +DP_DECL_TEST_CASE(qos_ext_buf_monitor, ext_buf_monitor_test, NULL, NULL); + +struct qos_external_buffer_congest_stats buf_stats; + +static void qos_ext_buf_test_init(void) +{ + memset(&buf_stats, 0, sizeof(buf_stats)); + + buf_stats.max_buf_desc = 98302; + buf_stats.buf_cfg_threshold = 85; + buf_stats.cur_state.state = EXT_BUF_S_CLEAR; + buf_stats.cur_state.period_data.notify_mode = + EXT_BUF_EVT_NOTIFY_MODE_TEN_SEC; +} + +static void dp_test_qos_ext_buf_state_transition(void) +{ + int ret; + struct qos_ext_buf_state_record *cur_state = &buf_stats.cur_state; + enum qos_ext_buf_event evt; + + qos_ext_buf_test_init(); + + /* test unchanged CLEAR state */ + cur_state->state = EXT_BUF_S_CLEAR; + evt = EXT_BUF_EVT_CLEAR; + ret = qos_ext_buf_state_transit(cur_state, evt); + dp_test_fail_unless(!ret && cur_state->state == EXT_BUF_S_CLEAR, + "S_CLEAR state is expected!"); + + /* test S_CLEAR -> S_THRESHOLD state transition */ + cur_state->state = EXT_BUF_S_CLEAR; + evt = EXT_BUF_EVT_THRESHOLD_ONLY; + ret = qos_ext_buf_state_transit(cur_state, evt); + dp_test_fail_unless(ret == 1 && + cur_state->state == EXT_BUF_S_THRESHOLD_ONLY, + "S_THRESHOLD_ONLY state is expected!"); + dp_test_fail_unless(cur_state->consecutive_periods_cnt == 1, + "Bad periods cnt is expected to be 1!"); + + /* test EVT_THRESHOLD won't trigger state S_REJECTPKT change */ + cur_state->state = EXT_BUF_S_REJECTPKT_ONLY; + evt = EXT_BUF_EVT_THRESHOLD_ONLY; + ret = qos_ext_buf_state_transit(cur_state, evt); + dp_test_fail_unless(ret == 0 && + cur_state->state == EXT_BUF_S_REJECTPKT_ONLY, + "S_REJECTPKT_ONLY state is expected!"); + + /* S_REJECTPKT -> S_THRESHOLD_REJECTPKT */ + cur_state->state = EXT_BUF_S_REJECTPKT_ONLY; + evt = EXT_BUF_EVT_THRESHOLD_REJECTPKT; + ret = qos_ext_buf_state_transit(cur_state, evt); + dp_test_fail_unless(ret == 1 && + cur_state->state == EXT_BUF_S_THRESHOLD_REJECTPKT, + "S_THRESHOLD_AND_REJECTPKT state is expected!"); + + /* S_REJECTPKT -> S_CLEAR */ + cur_state->state = EXT_BUF_S_REJECTPKT_ONLY; + evt = EXT_BUF_EVT_CLEAR; + ret = qos_ext_buf_state_transit(cur_state, evt); + dp_test_fail_unless(ret == 1 && cur_state->state == EXT_BUF_S_CLEAR, + "S_CLEAR state is expected!"); +} + +static void dp_test_qos_ext_buf_event_from_sample_result(void) +{ + struct qos_ext_buf_state_record *cur_state = &buf_stats.cur_state; + enum qos_ext_buf_event evt = EXT_BUF_EVT_NONE; + enum qos_ext_buf_sample_result smp_result; + uint32_t clear_cnt = MAX_CONSECUTIVE_SAMPLES_ON_CLEAR; + + qos_ext_buf_test_init(); + + /* no state behavior for good sample result */ + cur_state->state = EXT_BUF_S_THRESHOLD_ONLY; + cur_state->consecutive_good_samples_cnt = clear_cnt - 3; + smp_result = EXT_BUF_SPL_R_NONE; + evt = qos_ext_buf_get_evt_by_sample_result(cur_state, smp_result); + dp_test_fail_unless(evt == EXT_BUF_EVT_NONE && + cur_state->consecutive_good_samples_cnt == (clear_cnt-2), + "EVT_NONE is expected!"); + + /* Get EVT_CLEAR when accumulating good samples */ + cur_state->state = EXT_BUF_S_THRESHOLD_ONLY; + cur_state->consecutive_good_samples_cnt = clear_cnt - 1; + smp_result = EXT_BUF_SPL_R_NONE; + evt = qos_ext_buf_get_evt_by_sample_result(cur_state, smp_result); + dp_test_fail_unless(evt == EXT_BUF_EVT_CLEAR, + "EVT_CLEAR is expected!"); + + /* Get event from bad sample */ + memset(cur_state, 0, sizeof(*cur_state)); + cur_state->state = EXT_BUF_S_CLEAR; + smp_result = EXT_BUF_SPL_R_THRESHOLD_ONLY; + evt = qos_ext_buf_get_evt_by_sample_result(cur_state, smp_result); + dp_test_fail_unless(evt == EXT_BUF_EVT_THRESHOLD_ONLY, + "EVT_THRESHOLD_ONLY is expected!"); +} + +static void dp_test_qos_ext_buf_schedule_action_with_samples_result(void) +{ + struct qos_ext_buf_state_record *cur_state = &buf_stats.cur_state; + struct qos_external_buffer_sample *sample = &buf_stats.buf_samples[0]; + + qos_ext_buf_test_init(); + + /* No change in S_CLEAR state */ + cur_state->state = EXT_BUF_S_CLEAR; + sample->result = EXT_BUF_SPL_R_NONE; + qos_ext_buf_schedule_state_machine(cur_state, sample); + dp_test_fail_unless(cur_state->state == EXT_BUF_S_CLEAR, + "S_CLEAR is expected!"); + + /* State changed due to more severe event + * NOtification mode changed to TEN_SEC, + * periods counters is expected to be 1 + */ + cur_state->state = EXT_BUF_S_THRESHOLD_ONLY; + sample->result = EXT_BUF_SPL_R_REJECTPKT_ONLY; + cur_state->period_data.notify_mode = + EXT_BUF_EVT_NOTIFY_MODE_MINUTE; + cur_state->period_data.samples_cnt = 4; + cur_state->consecutive_periods_cnt = 2; + qos_ext_buf_schedule_state_machine(cur_state, sample); + dp_test_fail_unless(cur_state->state == EXT_BUF_S_REJECTPKT_ONLY, + "S_REJECTPKT_ONLY is expected!"); + dp_test_fail_unless(cur_state->period_data.notify_mode == + EXT_BUF_EVT_NOTIFY_MODE_TEN_SEC, + "EVT_NOTIFY_MODE_TEN_SEC is expected!"); + dp_test_fail_unless(cur_state->consecutive_periods_cnt == 1, + "Value is expected to be 1! But actual value is %d.", + cur_state->consecutive_periods_cnt); + dp_test_fail_unless(cur_state->bad_periods_in_notification_mode == 1, + "Value is expected to be 1! But actual value is %d.", + cur_state->bad_periods_in_notification_mode); + dp_test_fail_unless(cur_state->period_data.samples_cnt == 0, + "Value is expected to be 0! But actual value is %d.", + cur_state->period_data.samples_cnt); + + /* State not changed due to less severe event */ + memset(cur_state, 0, sizeof(*cur_state)); + cur_state->state = EXT_BUF_S_REJECTPKT_ONLY; + sample->result = EXT_BUF_SPL_R_THRESHOLD_ONLY; + cur_state->period_data.notify_mode = + EXT_BUF_EVT_NOTIFY_MODE_TEN_SEC; + qos_ext_buf_schedule_state_machine(cur_state, sample); + dp_test_fail_unless(cur_state->state == EXT_BUF_S_REJECTPKT_ONLY, + "S_REJECTPKT_ONLY is expected!"); + dp_test_fail_unless(cur_state->period_data.bad_sample_in_period == 0, + "EVT_NOTIFY_MODE_HOURLY is expected!"); + + /* tune notification mode TEN_SEC -> MINUTE */ + memset(cur_state, 0, sizeof(*cur_state)); + cur_state->state = EXT_BUF_S_THRESHOLD_ONLY; + sample->result = EXT_BUF_SPL_R_THRESHOLD_ONLY; + cur_state->period_data.notify_mode = + EXT_BUF_EVT_NOTIFY_MODE_TEN_SEC; + cur_state->period_data.samples_cnt = 0; + cur_state->period_data.bad_sample_in_period = 0; + cur_state->consecutive_periods_cnt = 2; + qos_ext_buf_schedule_state_machine(cur_state, sample); + dp_test_fail_unless(cur_state->state == EXT_BUF_S_THRESHOLD_ONLY, + "S_THRESHOLD_ONLY is expected!"); + dp_test_fail_unless(cur_state->period_data.notify_mode == + EXT_BUF_EVT_NOTIFY_MODE_MINUTE, + "EVT_NOTIFY_MODE_MINUTELY is expected!"); + dp_test_fail_unless(cur_state->consecutive_periods_cnt == 0, + "Value is expected to be 0! But actual value is %d.", + cur_state->consecutive_periods_cnt); + /* mode not change with one event in new period */ + qos_ext_buf_schedule_state_machine(cur_state, sample); + dp_test_fail_unless(cur_state->state == EXT_BUF_S_THRESHOLD_ONLY, + "S_THRESHOLD_ONLY is expected!"); + dp_test_fail_unless(cur_state->period_data.notify_mode == + EXT_BUF_EVT_NOTIFY_MODE_MINUTE, + "EVT_NOTIFY_MODE_MINUTELY is expected!"); + dp_test_fail_unless(cur_state->consecutive_periods_cnt == 0, + "Value is expected to be 0! But actual value is %d.", + cur_state->consecutive_periods_cnt); + + /* tune notification mode MINUTE -> HOUR */ + memset(cur_state, 0, sizeof(*cur_state)); + cur_state->state = EXT_BUF_S_THRESHOLD_ONLY; + sample->result = EXT_BUF_SPL_R_THRESHOLD_ONLY; + cur_state->period_data.notify_mode = + EXT_BUF_EVT_NOTIFY_MODE_MINUTE; + cur_state->period_data.samples_cnt = 3; + cur_state->period_data.bad_sample_in_period = 0; + cur_state->consecutive_periods_cnt = 2; + qos_ext_buf_schedule_state_machine(cur_state, sample); + dp_test_fail_unless(cur_state->state == EXT_BUF_S_THRESHOLD_ONLY, + "S_THRESHOLD_ONLY is expected!"); + dp_test_fail_unless(cur_state->period_data.notify_mode == + EXT_BUF_EVT_NOTIFY_MODE_MINUTE, + "EVT_NOTIFY_MODE_MINUTELY is expected!"); + qos_ext_buf_schedule_state_machine(cur_state, sample); + dp_test_fail_unless(cur_state->state == EXT_BUF_S_THRESHOLD_ONLY, + "S_THRESHOLD_ONLY is expected!"); + dp_test_fail_unless(cur_state->period_data.notify_mode == + EXT_BUF_EVT_NOTIFY_MODE_MINUTE, + "EVT_NOTIFY_MODE_MINUTELY is expected!"); + dp_test_fail_unless(cur_state->consecutive_periods_cnt == 2, + "Value is expected to be 2! But actual value is %d.", + cur_state->consecutive_periods_cnt); + qos_ext_buf_schedule_state_machine(cur_state, sample); + dp_test_fail_unless(cur_state->state == EXT_BUF_S_THRESHOLD_ONLY, + "S_THRESHOLD_ONLY is expected!"); + dp_test_fail_unless(cur_state->period_data.notify_mode == + EXT_BUF_EVT_NOTIFY_MODE_HOUR, + "EVT_NOTIFY_MODE_HOURLY is expected!"); + + /* state from S_CLEAR -> S_THRESHOLD + * mode from TEN_SEC -> MINUTE after 3 consecutive bad samples + */ + memset(cur_state, 0, sizeof(*cur_state)); + cur_state->state = EXT_BUF_S_CLEAR; + sample->result = EXT_BUF_SPL_R_THRESHOLD_ONLY; + qos_ext_buf_schedule_state_machine(cur_state, sample); + dp_test_fail_unless(cur_state->state == EXT_BUF_S_THRESHOLD_ONLY, + "S_THRESHOLD_ONLY is expected!"); + dp_test_fail_unless(cur_state->period_data.notify_mode == + EXT_BUF_EVT_NOTIFY_MODE_TEN_SEC, + "EVT_NOTIFY_MODE_TEN_SEC is expected!"); + qos_ext_buf_schedule_state_machine(cur_state, sample); + qos_ext_buf_schedule_state_machine(cur_state, sample); + dp_test_fail_unless(cur_state->state == EXT_BUF_S_THRESHOLD_ONLY, + "S_THRESHOLD_ONLY is expected!"); + dp_test_fail_unless(cur_state->period_data.notify_mode == + EXT_BUF_EVT_NOTIFY_MODE_MINUTE, + "EVT_NOTIFY_MODE_MINUTELY is expected!"); + /* mode from MINUTE -> HOUR after 3 consecutive bad 1-minute periods */ + for (uint32_t i = 0; i < 3; i++) { + uint32_t j = 0; + + for (; j < 5; j++) { + qos_ext_buf_schedule_state_machine(cur_state, sample); + dp_test_fail_unless(cur_state->state == + EXT_BUF_S_THRESHOLD_ONLY, + "S_THRESHOLD_ONLY is expected!"); + dp_test_fail_unless( + cur_state->period_data.notify_mode == + EXT_BUF_EVT_NOTIFY_MODE_MINUTE, + "EVT_NOTIFY_MODE_MINUTELY is expected!"); + dp_test_fail_unless(cur_state->msg_warning_cnt == 3+i, + "Warning cnt should be %d! Actual value %d", + 3+i, cur_state->msg_warning_cnt); + } + + qos_ext_buf_schedule_state_machine(cur_state, sample); + dp_test_fail_unless(cur_state->msg_warning_cnt == 4+i, + "Warning msg cnt is expected to be %d! Actual value %d", + 4+i, cur_state->msg_warning_cnt); + dp_test_fail_unless(cur_state->state == + EXT_BUF_S_THRESHOLD_ONLY, + "S_THRESHOLD_ONLY is expected!"); + + if (i == 2 && j == 5) { + dp_test_fail_unless( + cur_state->period_data.notify_mode == + EXT_BUF_EVT_NOTIFY_MODE_HOUR, + "EVT_NOTIFY_MODE_HOURLY is expected! Actual value %d", + cur_state->period_data.notify_mode); + } else { + dp_test_fail_unless( + cur_state->period_data.notify_mode == + EXT_BUF_EVT_NOTIFY_MODE_MINUTE, + "MODE_MINUTELY is expected! Actual value %d. P%d, S%d", + cur_state->period_data.notify_mode, i, j); + } + } + /* keep HOUR mode following tests above */ + for (int i = 0; i < 180; i++) + qos_ext_buf_schedule_state_machine(cur_state, sample); + sample->result = EXT_BUF_SPL_R_NONE; + for (int i = 0; i < 180; i++) + qos_ext_buf_schedule_state_machine(cur_state, sample); + + dp_test_fail_unless(cur_state->state == EXT_BUF_S_THRESHOLD_ONLY, + "S_THRESHOLD_ONLY is expected!"); + dp_test_fail_unless(cur_state->period_data.notify_mode == + EXT_BUF_EVT_NOTIFY_MODE_HOUR, + "EVT_NOTIFY_MODE_HOURLY is expected! Actual value %d", + cur_state->period_data.notify_mode); + dp_test_fail_unless(cur_state->msg_warning_cnt == 7, + "Warning msg cnt is expected to be %d! Actual value %d", + 7, cur_state->msg_warning_cnt); + /* CLEAR after 360 good samples since last bad one */ + sample->result = EXT_BUF_SPL_R_NONE; + for (int i = 0; i < 179; i++) + qos_ext_buf_schedule_state_machine(cur_state, sample); + dp_test_fail_unless(cur_state->state == EXT_BUF_S_THRESHOLD_ONLY, + "S_THRESHOLD_ONLY is expected!"); + qos_ext_buf_schedule_state_machine(cur_state, sample); + dp_test_fail_unless(cur_state->state == EXT_BUF_S_CLEAR, + "S_CLEAR is expected!"); +} + +static void dp_test_qos_ext_buf_tmr_hdlr(void) +{ + struct qos_ext_buf_state_record *cur_state = &buf_stats.cur_state; + + /* same values as fal_plugin_qos_get_counters() */ + struct qos_counters values[] = { + {50000, 0, EXT_BUF_S_CLEAR}, + {3000, 0, EXT_BUF_S_THRESHOLD_ONLY}, + {50000, 0, EXT_BUF_S_THRESHOLD_ONLY}, + {50000, 1, EXT_BUF_S_REJECTPKT_ONLY}, + {3000, 0, EXT_BUF_S_REJECTPKT_ONLY}, + {3000, 1, EXT_BUF_S_THRESHOLD_REJECTPKT}, + {3000, 0, EXT_BUF_S_THRESHOLD_REJECTPKT} + }; + + int size = ARRAY_SIZE(values); + + qos_ext_buf_test_init(); + + for (int i = 0; i < size; i++) { + struct qos_external_buffer_sample *sample = + &buf_stats.buf_samples[buf_stats.cur_sample_idx]; + + qos_external_buffer_congestion_tmr_hdlr(NULL, NULL); + + dp_test_fail_unless(cur_state->state == values[i].state, + "Timer %d, S_CLEAR is expected! Actual state is %d", + i, cur_state->state); + dp_test_fail_unless(sample->ext_buf_free == values[i].buf_free, + "Timer %d, buf_free expected/actual %u/%" PRIu64, + i, values[i].buf_free, sample->ext_buf_free); + dp_test_fail_unless( + sample->ext_buf_pkt_reject == values[i].dropped, + "Timer %d, Pkt_dropped expected/actual %u/%" PRIu64, + i, values[i].dropped, sample->ext_buf_pkt_reject); + } +} + +DP_START_TEST(ext_buf_monitor_test, test1) +{ + dp_test_qos_ext_buf_state_transition(); + + dp_test_qos_ext_buf_event_from_sample_result(); + + dp_test_qos_ext_buf_schedule_action_with_samples_result(); + + dp_test_qos_ext_buf_tmr_hdlr(); +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_qos_fal.c b/tests/whole_dp/src/dp_test_qos_fal.c new file mode 100644 index 00000000..cd800613 --- /dev/null +++ b/tests/whole_dp/src/dp_test_qos_fal.c @@ -0,0 +1,2404 @@ +/** + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * @file dp_test_qos_fal.h + * @brief Basic QoS FAL unit-tests + */ + +#include +#include + +#include "ip6_funcs.h" +#include "ip_funcs.h" +#include "in_cksum.h" +#include "if_var.h" +#include "main.h" +#include "fal_plugin.h" + +#include "dp_test.h" +#include "dp_test_str.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_exp.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_console.h" +#include "dp_test_controller.h" +#include "dp_test_json_utils.h" +#include "dp_test_npf_lib.h" +#include "dp_test_npf_fw_lib.h" +#include "dp_test_npf_sess_lib.h" + +#include "dp_test_qos_lib.h" + +DP_DECL_TEST_SUITE(qos_fal); + +DP_DECL_TEST_CASE(qos_fal, qos_fal_basic, NULL, NULL); + +static int +dp_test_qos_fal_hw_switch_if(const char *if_name, bool enable) +{ + char real_if_name[IFNAMSIZ]; + struct ifnet *ifp; + int ret = 0; + + /* Convert the test if-name into a real if-name */ + dp_test_intf_real(if_name, real_if_name); + + ifp = dp_ifnet_byifname(real_if_name); + if (ifp) + ifp->hw_forwarding = enable; + else + ret = -1; + + return ret; +} + +/* + * fal_basic uses a minimal QoS configuration + * + * fal_basic_cmds generate from: + * + * set interfaces dataplane dp0s5 policy qos 'trunk-egress' + * set policy qos name trunk-egress shaper default 'global-profile' + * set policy qos profile global-profile bandwidth '100Mbit' + */ + +const char *fal_basic_cmds[] = { + "port subports 1 pipes 1 profiles 2 overhead 24 ql_bytes", + "subport 0 rate 1250000000 size 100000 period 40000", + "subport 0 queue 0 rate 1250000000 size 100000", // size N/A + "subport 0 queue 1 rate 1250000000 size 100000", // size N/A + "subport 0 queue 2 rate 1250000000 size 100000", // size N/A + "subport 0 queue 3 rate 1250000000 size 100000", // size N/A + "vlan 0 0", + "profile 0 percent 1 size 50000 period 10000", + "profile 0 queue 0 rate 12500000 size 50000", // size N/A + "profile 0 queue 1 percent 100 size 50000", // size N/A + "profile 0 queue 2 rate 12500000 size 50000", // size N/A + "profile 0 queue 3 rate 12500000 size 50000", // size N/A + "pipe 0 0 0", + "enable" +}; + +struct des_dp_pair default_dscp_map[] = { + /* des drop-precedence */ + { 0, 0 }, /* DSCP = 0 */ + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, /* DSCP = 15 */ + { 1, 0 }, /* DSCP = 16 */ + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, /* DSCP = 31 */ + { 2, 0 }, /* DSCP = 32 */ + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, /* DSCP = 47 */ + { 3, 0 }, /* DSCP = 48 */ + { 3, 0 }, + { 3, 0 }, + { 3, 0 }, + { 3, 0 }, + { 3, 0 }, + { 3, 0 }, + { 3, 0 }, + { 3, 0 }, + { 3, 0 }, + { 3, 0 }, + { 3, 0 }, + { 3, 0 }, + { 3, 0 }, + { 3, 0 }, + { 3, 0 } /* DSCP = 63 */ +}; + +DP_START_TEST(qos_fal_basic, fal_basic) +{ + bool debug = (dp_test_debug_get() == 2 ? true : false); + json_object *j_sched_obj; + json_object *j_obj; + uint32_t level; + uint32_t subport = 0; /* Only one subport with id = 0 */ + uint32_t pipe = 0; /* Only one pipe with id = 0 */ + uint32_t tc; /* The normal four traffic-classes */ + uint32_t queue = 0; /* A single queue per TC with id = 0 */ + int ret; + + qos_lib_test_setup(); + + dp_test_qos_debug(debug); + + dp_test_netlink_set_interface_l2("dp1sw_port_0_0"); + + dp_test_intf_switch_create("switch0"); + dp_test_intf_switch_add_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_add_port("switch0", "dp2T1"); + + ret = dp_test_qos_fal_hw_switch_if("dp2T1", true); + dp_test_fail_unless((ret == 0), + "failed to set hw-switching on dp2T1\n"); + + dp_test_qos_attach_config_to_if("dp2T1", fal_basic_cmds, debug); + + /* + * Start of the hardware configuration verification checks + * Check the port-level + */ + level = FAL_QOS_SCHED_GROUP_LEVEL_PORT; + j_obj = dp_test_qos_hw_get_json_sched_group(level, "dp2T1", 0, 0, 0, + debug); + dp_test_qos_hw_check_sched_group(j_obj, level, 1, 1, 0, debug); + j_sched_obj = dp_test_qos_hw_get_json_child(j_obj, "scheduler", debug); + dp_test_qos_hw_check_scheduler(j_sched_obj, "Weighted Round-Robin", + "Bytes Per Second", 1, 1250000000, 0, + 24, debug); + json_object_put(j_sched_obj); + json_object_put(j_obj); + + /* + * Check the subport-level + */ + level = FAL_QOS_SCHED_GROUP_LEVEL_SUBPORT; + j_obj = dp_test_qos_hw_get_json_sched_group(level, "dp2T1", subport, 0, + 0, debug); + dp_test_qos_hw_check_sched_group(j_obj, level, 1, 1, 0, debug); + j_sched_obj = dp_test_qos_hw_get_json_child(j_obj, "scheduler", debug); + dp_test_qos_hw_check_scheduler(j_sched_obj, "Weighted Round-Robin", + "Bytes Per Second", 1, 1250000000, + 100000, 24, debug); + json_object_put(j_sched_obj); + json_object_put(j_obj); + + /* + * Check the pipe-level + */ + level = FAL_QOS_SCHED_GROUP_LEVEL_PIPE; + j_obj = dp_test_qos_hw_get_json_sched_group(level, "dp2T1", subport, + pipe, 0, debug); + dp_test_qos_hw_check_sched_group(j_obj, level, 4, 4, 0, debug); + j_sched_obj = dp_test_qos_hw_get_json_child(j_obj, "scheduler", debug); + dp_test_qos_hw_check_scheduler(j_sched_obj, "Strict Priority", + "Bytes Per Second", -1, 12500000, 50000, + 24, debug); + + json_object_put(j_sched_obj); + json_object_put(j_obj); + + /* + * Check the four TCs + */ + level = FAL_QOS_SCHED_GROUP_LEVEL_TC; + for (tc = 0; tc < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc++) { + j_obj = dp_test_qos_hw_get_json_sched_group(level, "dp2T1", + subport, pipe, tc, + debug); + dp_test_qos_hw_check_sched_group(j_obj, level, 1, 1, 0, debug); + j_sched_obj = dp_test_qos_hw_get_json_child(j_obj, "scheduler", + debug); + dp_test_qos_hw_check_scheduler(j_sched_obj, + "Weighted Round-Robin", + "Bytes Per Second", 1, 12500000, + 50000, 24, debug); + json_object_put(j_sched_obj); + json_object_put(j_obj); + } + + /* + * Each TC should have a single queue with id = 0 + */ + for (tc = 0; tc < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc++) { + j_obj = dp_test_qos_hw_get_json_queue("dp2T1", subport, pipe, + tc, queue, debug); + dp_test_qos_hw_check_queue(j_obj, queue, 64, 0, 0, debug); + j_sched_obj = dp_test_qos_hw_get_json_child(j_obj, "scheduler", + debug); + dp_test_qos_hw_check_scheduler(j_sched_obj, + "Weighted Round-Robin", + "Bytes Per Second", 1, 0, 0, + 0, debug); + json_object_put(j_sched_obj); + json_object_put(j_obj); + } + + /* Cleanup */ + dp_test_qos_delete_config_from_if("dp2T1", debug); + + ret = dp_test_qos_fal_hw_switch_if("dp2T1", false); + dp_test_fail_unless((ret == 0), + "failed to clear hw-switching on dp2T1\n"); + + dp_test_intf_switch_remove_port("switch0", "dp2T1"); + dp_test_intf_switch_remove_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_del("switch0"); + + dp_test_netlink_del_interface_l2("dp1sw_port_0_0"); + + dp_test_qos_debug(false); + + qos_lib_test_teardown(); + +} DP_END_TEST; + +/* + * fal_wred introduces a WRED queue + * + * fal_wred_cmds generated from: + * + * set interfaces dataplane dp0s5 policy qos 'trunk-egress' + * set policy qos name trunk-egress shaper bandwidth '100Mbit' + * set policy qos name trunk-egress shaper default 'default-profile' + * set policy qos name trunk-egress shaper profile 'default-profile' + * set policy qos name trunk-egress shaper traffic-class 0 queue-limit '4096' + * set policy qos name trunk-egress shaper traffic-class 0 random-detect + * filter-weight '6' + * set policy qos name trunk-egress shaper traffic-class 0 random-detect + * mark-probability '34' + * set policy qos name trunk-egress shaper traffic-class 0 random-detect + * max-threshold '4095' + * set policy qos name trunk-egress shaper traffic-class 0 random-detect + * min-threshold '2048' + */ + +const char *fal_wred_cmds[] = { + "port subports 1 pipes 1 profiles 1 overhead 24 ql_bytes", + "subport 0 rate 12500000 size 50000 period 40000", + "subport 0 queue 0 rate 12500000 size 50000", + "param 0 limit packets 4096 red 0 packets 2048 4095 34 6", + "subport 0 queue 1 rate 12500000 size 50000", + "subport 0 queue 2 rate 12500000 size 50000", + "subport 0 queue 3 rate 12500000 size 50000", + "vlan 0 0", + "profile 0 rate 12500000 size 50000 period 10000", + "profile 0 queue 0 rate 12500000 size 50000", + "profile 0 queue 1 rate 12500000 size 50000", + "profile 0 queue 2 rate 12500000 size 50000", + "profile 0 queue 3 rate 12500000 size 50000", + "pipe 0 0 0", + "enable" +}; + +DP_START_TEST(qos_fal_basic, fal_wred) +{ + bool debug = (dp_test_debug_get() == 2 ? true : false); + json_object *j_wred_obj; + json_object *j_obj; + uint32_t subport = 0; /* Only one subport with id = 0 */ + uint32_t pipe = 0; /* Only one pipe with id = 0 */ + uint32_t tc; /* The normal four traffic-classes */ + uint32_t queue = 0; /* A single queue per TC with id = 0 */ + int ret; + + qos_lib_test_setup(); + + dp_test_qos_debug(debug); + + dp_test_netlink_set_interface_l2("dp1sw_port_0_0"); + + dp_test_intf_switch_create("switch0"); + dp_test_intf_switch_add_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_add_port("switch0", "dp2T1"); + + ret = dp_test_qos_fal_hw_switch_if("dp2T1", true); + dp_test_fail_unless((ret == 0), + "failed to set hw-switching on dp2T1\n"); + + /* Set up QoS config on dp2T1 */ + dp_test_qos_attach_config_to_if("dp2T1", fal_wred_cmds, debug); + + /* + * No need to check the port, subport, pipe and TCs, they are + * the same as the qos_fal_basic test. + * + * Each TC should have a single queue with id = 0 + */ + for (tc = 0; tc < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; tc++) { + j_obj = dp_test_qos_hw_get_json_queue("dp2T1", subport, pipe, + tc, queue, debug); + + if (tc != 0) { + /* + * Non TC-0 queues have default queue-limits of 64 + */ + dp_test_qos_hw_check_queue(j_obj, queue, 64, 0, 0, + debug); + } else { + /* + * Check for 4k queue-limit and WRED queue on TC-0 + */ + dp_test_qos_hw_check_queue(j_obj, queue, 4096, 0, + 0, debug); + j_wred_obj = dp_test_qos_hw_get_json_child(j_obj, + "wred", + debug); + dp_test_qos_hw_check_wred_colour(j_wred_obj, "green", + true, 2048, 4095, 34, + 6, debug); + json_object_put(j_wred_obj); + } + json_object_put(j_obj); + } + + /* Cleanup */ + dp_test_qos_delete_config_from_if("dp2T1", debug); + + ret = dp_test_qos_fal_hw_switch_if("dp2T1", false); + dp_test_fail_unless((ret == 0), + "failed to clear hw-switching on dp2T1\n"); + + dp_test_intf_switch_remove_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_remove_port("switch0", "dp2T1"); + dp_test_intf_switch_del("switch0"); + + dp_test_netlink_del_interface_l2("dp1sw_port_0_0"); + + dp_test_qos_debug(false); + + qos_lib_test_teardown(); +} DP_END_TEST; + +/* + * fal_said_npf_cmds and fal_said_qos_cmds created from: + * + * set resource group dscp-group synch-group dscp 56 + * + * set resource group dscp-group real-time-group dscp 48 + * set resource group dscp-group real-time-group dscp 47 + * set resource group dscp-group real-time-group dscp 46 + * set resource group dscp-group real-time-group dscp 40 + * + * set resource group dscp-group priority-group-high-drop dscp 34 + * set resource group dscp-group priority-group-high-drop dscp 32 + * set resource group dscp-group priority-group-high-drop dscp 26 + * set resource group dscp-group priority-group-high-drop dscp 24 + * + * set resource group dscp-group priority-group-low-drop dscp 39 + * set resource group dscp-group priority-group-low-drop dscp 38 + * set resource group dscp-group priority-group-low-drop dscp 37 + * set resource group dscp-group priority-group-low-drop dscp 36 + * set resource group dscp-group priority-group-low-drop dscp 35 + * set resource group dscp-group priority-group-low-drop dscp 33 + * set resource group dscp-group priority-group-low-drop dscp 31 + * set resource group dscp-group priority-group-low-drop dscp 30 + * set resource group dscp-group priority-group-low-drop dscp 29 + * set resource group dscp-group priority-group-low-drop dscp 28 + * set resource group dscp-group priority-group-low-drop dscp 27 + * set resource group dscp-group priority-group-low-drop dscp 25 + * + * set resource group dscp-group default-group-high-drop dscp 18 + * set resource group dscp-group default-group-high-drop dscp 16 + * set resource group dscp-group default-group-high-drop dscp 10 + * set resource group dscp-group default-group-high-drop dscp 8 + * + * set resource group dscp-group default-group-low-drop dscp 63 + * set resource group dscp-group default-group-low-drop dscp 62 + * set resource group dscp-group default-group-low-drop dscp 61 + * set resource group dscp-group default-group-low-drop dscp 60 + * set resource group dscp-group default-group-low-drop dscp 59 + * set resource group dscp-group default-group-low-drop dscp 58 + * set resource group dscp-group default-group-low-drop dscp 57 + * set resource group dscp-group default-group-low-drop dscp 55 + * set resource group dscp-group default-group-low-drop dscp 54 + * set resource group dscp-group default-group-low-drop dscp 53 + * set resource group dscp-group default-group-low-drop dscp 52 + * set resource group dscp-group default-group-low-drop dscp 51 + * set resource group dscp-group default-group-low-drop dscp 50 + * set resource group dscp-group default-group-low-drop dscp 49 + * set resource group dscp-group default-group-low-drop dscp 45 + * set resource group dscp-group default-group-low-drop dscp 44 + * set resource group dscp-group default-group-low-drop dscp 43 + * set resource group dscp-group default-group-low-drop dscp 42 + * set resource group dscp-group default-group-low-drop dscp 41 + * set resource group dscp-group default-group-low-drop dscp 23 + * set resource group dscp-group default-group-low-drop dscp 22 + * set resource group dscp-group default-group-low-drop dscp 21 + * set resource group dscp-group default-group-low-drop dscp 20 + * set resource group dscp-group default-group-low-drop dscp 19 + * set resource group dscp-group default-group-low-drop dscp 17 + * set resource group dscp-group default-group-low-drop dscp 15 + * set resource group dscp-group default-group-low-drop dscp 14 + * set resource group dscp-group default-group-low-drop dscp 13 + * set resource group dscp-group default-group-low-drop dscp 12 + * set resource group dscp-group default-group-low-drop dscp 11 + * set resource group dscp-group default-group-low-drop dscp 9 + * set resource group dscp-group default-group-low-drop dscp 7 + * set resource group dscp-group default-group-low-drop dscp 6 + * set resource group dscp-group default-group-low-drop dscp 5 + * set resource group dscp-group default-group-low-drop dscp 4 + * set resource group dscp-group default-group-low-drop dscp 3 + * set resource group dscp-group default-group-low-drop dscp 2 + * set resource group dscp-group default-group-low-drop dscp 1 + * set resource group dscp-group default-group-low-drop dscp 0 + * + * set interfaces dataplane dp0s5 policy qos 'trunk-policy' + * set interfaces dataplane dp0s5 vif 10 policy qos 'vlan-policy-50M' + * set interfaces dataplane dp0s5 vif 20 policy qos 'vlan-policy-50M' + * set policy qos name trunk-policy shaper default 'trunk-profile' + * set policy qos name trunk-policy shaper frame-overhead '22' + * set policy qos name trunk-policy shaper profile trunk-profile bandwidth + * '2mbit' + * set policy qos name trunk-policy shaper traffic-class 0 queue-limit '512' + * set policy qos name trunk-policy shaper traffic-class 1 queue-limit '1024' + * set policy qos name trunk-policy shaper traffic-class 2 queue-limit '1024' + * set policy qos name trunk-policy shaper traffic-class 3 queue-limit '1' + * set policy qos mark-map hw-egress-map dscp-group synch-group + * pcp-mark 7 + * set policy qos mark-map hw-egress-map dscp-group real-time-group + * pcp-mark 5 + * set policy qos mark-map hw-egress-map dscp-group + * priority-group-high-drop pcp-mark 4 + * set policy qos mark-map hw-egress-map dscp-group + * priority-group-low-drop pcp-mark 3 + * set policy qos mark-map hw-egress-map dscp-group + * default-group-high-drop pcp-mark 2 + * set policy qos mark-map hw-egress-map dscp-group + * default-group-low-drop pcp-mark 1 + * set policy qos name vlan-policy-50M shaper mark-map hw-egress-map + * set policy qos name vlan-policy-50M shaper default 'default-prof' + * set policy qos profile default-prof bandwidth '2mbit' + * set policy qos profile vlan-profile-50M bandwidth '50mbit' + * set policy qos profile vlan-profile-50M burst '30000' + * set policy qos profile vlan-profile-50M map dscp 0-23,41-45,49-55,57-63 + * to '9' + * set policy qos profile vlan-profile-50M map dscp 24-39 to '8' + * set policy qos profile vlan-profile-50M map dscp 40,46,47,48 to '4' + * set policy qos profile vlan-profile-50M map dscp 56 to '0' + * set policy qos profile vlan-profile-50M period '5' + * set policy qos profile vlan-profile-50M queue 0 traffic-class '0' + * set policy qos profile vlan-profile-50M queue 4 traffic-class '1' + * set policy qos profile vlan-profile-50M queue 8 traffic-class '2' + * set policy qos profile vlan-profile-50M queue 8 weight '60' + * set policy qos profile vlan-profile-50M queue 9 traffic-class '2' + * set policy qos profile vlan-profile-50M queue 9 weight '40' + * set policy qos profile vlan-profile-50M traffic-class 0 bandwidth '50%' + * set policy qos profile vlan-profile-50M traffic-class 1 bandwidth '50%' + */ + +const char *fal_hw_npf_cmds[] = { + "npf-cfg delete dscp-group:synch-group", + "npf-cfg delete dscp-group:real-time-group", + "npf-cfg delete dscp-group:priority-group-high-drop", + "npf-cfg delete dscp-group:priority-group-low-drop", + "npf-cfg delete dscp-group:default-group-high-drop", + "npf-cfg delete dscp-group:default-group-low-drop", + "npf-cfg add dscp-group:synch-group 0 56", + "npf-cfg add dscp-group:real-time-group 0 40;46;47;48", + "npf-cfg add dscp-group:priority-group-high-drop 0 24;26;32;34", + "npf-cfg add dscp-group:priority-group-low-drop 0 " + "25;27;28;29;30;31;33;35;36;37;38;39", + "npf-cfg add dscp-group:default-group-high-drop 0 8;10;16;18", + "npf-cfg add dscp-group:default-group-low-drop 0 " + "0;1;2;3;4;5;6;7;9;11;12;13;14;15;17;19;20;21;22;23;" + "41;42;43;44;45;49;50;51;52;53;54;55;57;58;59;60;61;62;63", + "npf-cfg commit", +}; + +const char *fal_hw_npf_delete_cmds[] = { + "npf-cfg delete dscp-group:synch-group", + "npf-cfg delete dscp-group:real-time-group", + "npf-cfg delete dscp-group:priority-group-high-drop", + "npf-cfg delete dscp-group:priority-group-low-drop", + "npf-cfg delete dscp-group:default-group-high-drop", + "npf-cfg delete dscp-group:default-group-low-drop", + "npf-cfg commit", +}; + +const char *fal_hw_qos_glb_cmds[] = { + "qos global-object-cmd mark-map hw-egress-map dscp-group default-group-low-drop " + "pcp 1", + "qos global-object-cmd mark-map hw-egress-map dscp-group default-group-high-drop " + "pcp 2", + "qos global-object-cmd mark-map hw-egress-map dscp-group priority-group-low-drop " + "pcp 3", + "qos global-object-cmd mark-map hw-egress-map dscp-group priority-group-high-drop " + "pcp 4", + "qos global-object-cmd mark-map hw-egress-map dscp-group real-time-group pcp 5", + "qos global-object-cmd mark-map hw-egress-map dscp-group synch-group pcp 7" +}; + +const char *fal_hw_qos_glb_delete_cmds[] = { + "qos global-object-cmd mark-map hw-egress-map delete" +}; + +const char *fal_hw_qos_cmds[] = { + "port subports 3 pipes 2 profiles 5 overhead 22 ql_bytes", + + /* + * 100% rate = 1250000000 bytes/sec. + * 1 msec burst = 1250000 bytes squashed + * to max burst of 130048 bytes + */ + "subport 0 percent 100 msec 1 period 40000", + + "subport 0 queue 0 rate 1250000000 size 100000", // size N/A + "param 0 limit packets 512", + "subport 0 queue 1 rate 1250000000 size 100000", // size N/A + "param 1 limit packets 1024", + "subport 0 queue 2 rate 1250000000 size 100000", // size N/A + "param 2 limit packets 1024", + "subport 0 queue 3 rate 1250000000 size 100000", // size N/A + "param 3 limit packets 1", + "vlan 0 0", + "profile 3 rate 250000 msec 20 period 10000",// burst size = 5000 + "profile 3 queue 0 percent 100 size 1000", // size N/A + "profile 3 queue 1 rate 250000 size 1000", // size N/A + "profile 3 queue 2 rate 250000 size 1000", // size N/A + "profile 3 queue 3 rate 250000 size 1000", // size N/A + "profile 0 rate 6250000 size 30000 period 5000", + "profile 0 queue 0 percent 50 size 12500", // rate = 3125000, size N/A + "profile 0 queue 1 rate 3125000 size 12500", // size N/A + "profile 0 queue 2 percent 100 size 25000", // rate = 6250000, size N/A + "profile 0 queue 3 rate 6250000 size 25000", // size N/A + "profile 0 dscp 0 0x6", + "profile 0 dscp 1 0x6", + "profile 0 dscp 2 0x6", + "profile 0 dscp 3 0x6", + "profile 0 dscp 4 0x6", + "profile 0 dscp 5 0x6", + "profile 0 dscp 6 0x6", + "profile 0 dscp 7 0x6", + "profile 0 dscp 8 0x6", + "profile 0 dscp 9 0x6", + "profile 0 dscp 10 0x6", + "profile 0 dscp 11 0x6", + "profile 0 dscp 12 0x6", + "profile 0 dscp 13 0x6", + "profile 0 dscp 14 0x6", + "profile 0 dscp 15 0x6", + "profile 0 dscp 16 0x6", + "profile 0 dscp 17 0x6", + "profile 0 dscp 18 0x6", + "profile 0 dscp 19 0x6", + "profile 0 dscp 20 0x6", + "profile 0 dscp 21 0x6", + "profile 0 dscp 22 0x6", + "profile 0 dscp 23 0x6", + "profile 0 dscp 24 0x2", + "profile 0 dscp 25 0x2", + "profile 0 dscp 26 0x2", + "profile 0 dscp 27 0x2", + "profile 0 dscp 28 0x2", + "profile 0 dscp 29 0x2", + "profile 0 dscp 30 0x2", + "profile 0 dscp 31 0x2", + "profile 0 dscp 32 0x2", + "profile 0 dscp 33 0x2", + "profile 0 dscp 34 0x2", + "profile 0 dscp 35 0x2", + "profile 0 dscp 36 0x2", + "profile 0 dscp 37 0x2", + "profile 0 dscp 38 0x2", + "profile 0 dscp 39 0x2", + "profile 0 dscp 40 0x1", + "profile 0 dscp 41 0x6", + "profile 0 dscp 42 0x6", + "profile 0 dscp 43 0x6", + "profile 0 dscp 44 0x6", + "profile 0 dscp 45 0x6", + "profile 0 dscp 46 0x1", + "profile 0 dscp 47 0x1", + "profile 0 dscp 48 0x1", + "profile 0 dscp 49 0x6", + "profile 0 dscp 50 0x6", + "profile 0 dscp 51 0x6", + "profile 0 dscp 52 0x6", + "profile 0 dscp 53 0x6", + "profile 0 dscp 54 0x6", + "profile 0 dscp 55 0x6", + "profile 0 dscp 56 0x0", + "profile 0 dscp 57 0x6", + "profile 0 dscp 58 0x6", + "profile 0 dscp 59 0x6", + "profile 0 dscp 60 0x6", + "profile 0 dscp 61 0x6", + "profile 0 dscp 62 0x6", + "profile 0 dscp 63 0x6", + "profile 0 queue 0 wrr-weight 1 0", + "profile 0 queue 0x1 wrr-weight 1 4", + "profile 0 queue 0x2 wrr-weight 60 8", + "profile 0 queue 0x6 wrr-weight 40 9", + "profile 1 rate 250000 msec 4 period 10000", // size = 1000 (1540 MTU) + "profile 1 queue 0 rate 250000 size 1000", // size N/A + "profile 1 queue 1 rate 250000 size 1000", // size N/A + "profile 1 queue 2 rate 250000 size 1000", // size N/A + "profile 1 queue 3 rate 250000 size 1000", // size N/A + "pipe 0 0 3", + "subport 1 rate 1250000000 size 100000 period 40000", + "subport 1 queue 0 rate 1250000000 size 100000", // size N/A + "subport 1 queue 1 rate 1250000000 size 100000", // size N/A + "subport 1 queue 2 rate 1250000000 size 100000", // size N/A + "subport 1 queue 3 rate 1250000000 size 100000", // size N/A + "subport 1 mark-map hw-egress-map", + "vlan 10 1", + "pipe 1 0 1", + "pipe 1 1 0", + "subport 2 rate 1250000000 size 100000 period 40000", + "subport 2 queue 0 rate 1250000000 size 100000", // size N/A + "subport 2 queue 1 rate 1250000000 size 100000", // size N/A + "subport 2 queue 2 rate 1250000000 size 100000", // size N/A + "subport 2 queue 3 rate 1250000000 size 100000", // size N/A + "subport 2 mark-map hw-egress-map", + "vlan 20 2", + "pipe 2 0 1", + "pipe 2 1 0", + "enable" +}; + +uint8_t hw_dot1p_map[] = { + /* 0 1 2 3 4 5 6 7 8 9 */ + 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, /* DSCP 0-9 */ + 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, /* DSCP 10-19 */ + 1, 1, 1, 1, 4, 3, 4, 3, 3, 3, /* DSCP 20-29 */ + 3, 3, 4, 3, 4, 3, 3, 3, 3, 3, /* DSCP 30-39 */ + 5, 1, 1, 1, 1, 1, 5, 5, 5, 1, /* DSCP 40-49 */ + 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, /* DSCP 50-59 */ + 1, 1, 1, 1 /* DSCP 60-63 */ +}; + +struct des_dp_pair hw_dscp_map[] = { + /* des drop-precedence */ + { 0, 0 }, /* DSCP = 0 */ + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, /* DSCP = 15 */ + { 0, 0 }, /* DSCP = 16 */ + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, /* DSCP = 31 */ + { 1, 0 }, /* DSCP = 32 */ + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 2, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 2, 0 }, + { 2, 0 }, /* DSCP = 47 */ + { 2, 0 }, /* DSCP = 48 */ + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 3, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 } /* DSCP = 63 */ +}; + +struct qos_fal_ut_sched_group_result { + /* identifiers */ + uint32_t level; + const char *ifname; + uint32_t subport; + uint32_t pipe; + uint32_t tc; + /* sched-group expected results */ + uint32_t max_children; + uint32_t current_children; + /* associated scheduler expected results */ + const char *sched_type; + uint32_t weight; + uint64_t max_bandwidth; + uint32_t max_burst; + int8_t overhead; +}; + +static struct qos_fal_ut_sched_group_result hw_poc_sched_group_results[] = { + /* ----identifiers---- sched-group --------scheduler------- */ + { 1, "dp2T1", 0, 0, 0, 3, 3, "Weighted Round-Robin", 1, + 1250000000, 0, 22 }, + { 2, "dp2T1", 0, 0, 0, 1, 1, "Weighted Round-Robin", 1, + 1250000000, 130048, 22 }, + { 3, "dp2T1", 0, 0, 0, 4, 4, "Strict Priority", -1, + 250000, 5000, 22 }, + { 4, "dp2T1", 0, 0, 0, 1, 1, "Weighted Round-Robin", 1, + 250000, 5000, 22 }, + { 4, "dp2T1", 0, 0, 1, 1, 1, "Weighted Round-Robin", 1, + 250000, 5000, 22 }, + { 4, "dp2T1", 0, 0, 2, 1, 1, "Weighted Round-Robin", 1, + 250000, 5000, 22 }, + { 4, "dp2T1", 0, 0, 3, 1, 1, "Weighted Round-Robin", 1, + 250000, 5000, 22 }, + { 2, "dp2T1", 1, 0, 0, 2, 2, "Weighted Round-Robin", 1, + 1250000000, 100000, 22 }, + { 3, "dp2T1", 1, 0, 0, 4, 4, "Strict Priority", -1, + 250000, 1540, 22 }, + { 4, "dp2T1", 1, 0, 0, 1, 1, "Weighted Round-Robin", 1, + 250000, 1540, 22 }, + { 4, "dp2T1", 1, 0, 1, 1, 1, "Weighted Round-Robin", 1, + 250000, 1540, 22 }, + { 4, "dp2T1", 1, 0, 2, 1, 1, "Weighted Round-Robin", 1, + 250000, 1540, 22 }, + { 4, "dp2T1", 1, 0, 3, 1, 1, "Weighted Round-Robin", 1, + 250000, 1540, 22 }, + { 3, "dp2T1", 1, 1, 0, 4, 3, "Strict Priority", -1, + 6250000, 30000, 22 }, + { 4, "dp2T1", 1, 1, 0, 1, 1, "Weighted Round-Robin", 1, + 3125000, 30000, 22 }, + { 4, "dp2T1", 1, 1, 1, 1, 1, "Weighted Round-Robin", 1, + 3125000, 30000, 22 }, + { 4, "dp2T1", 1, 1, 2, 2, 2, "Weighted Round-Robin", 1, + 6250000, 30000, 22 }, + { 2, "dp2T1", 2, 0, 0, 2, 2, "Weighted Round-Robin", 1, + 1250000000, 100000, 22 }, + { 3, "dp2T1", 2, 0, 0, 4, 4, "Strict Priority", -1, + 250000, 1540, 22 }, + { 4, "dp2T1", 2, 0, 0, 1, 1, "Weighted Round-Robin", 1, + 250000, 1540, 22 }, + { 4, "dp2T1", 2, 0, 1, 1, 1, "Weighted Round-Robin", 1, + 250000, 1540, 22 }, + { 4, "dp2T1", 2, 0, 2, 1, 1, "Weighted Round-Robin", 1, + 250000, 1540, 22 }, + { 4, "dp2T1", 2, 0, 3, 1, 1, "Weighted Round-Robin", 1, + 250000, 1540, 22 }, + { 3, "dp2T1", 2, 1, 0, 4, 3, "Strict Priority", -1, + 6250000, 30000, 22 }, + { 4, "dp2T1", 2, 1, 0, 1, 1, "Weighted Round-Robin", 1, + 3125000, 30000, 22 }, + { 4, "dp2T1", 2, 1, 1, 1, 1, "Weighted Round-Robin", 1, + 3125000, 30000, 22 }, + { 4, "dp2T1", 2, 1, 2, 2, 2, "Weighted Round-Robin", 1, + 6250000, 30000, 22 }, +}; + +struct qos_fal_ut_map_results { + /* identifiers */ + const char *ifname; + uint32_t subport; + uint32_t pipe; + /* expected results */ + uint32_t ingress_map_type; + struct des_dp_pair *ingress_map_list; + uint32_t egress_map_type; + uint8_t *egress_map_list; +}; + +static struct qos_fal_ut_map_results hw_poc_map_results[] = { + /* identifiers | results */ + { "dp2T1", 0, 0, FAL_QOS_MAP_TYPE_DSCP_TO_DESIGNATOR, default_dscp_map, + 0, NULL }, + { "dp2T1", 1, 0, FAL_QOS_MAP_TYPE_DSCP_TO_DESIGNATOR, default_dscp_map, + FAL_QOS_MAP_TYPE_DSCP_TO_DOT1P, hw_dot1p_map }, + { "dp2T1", 1, 1, FAL_QOS_MAP_TYPE_DSCP_TO_DESIGNATOR, hw_dscp_map, + FAL_QOS_MAP_TYPE_DSCP_TO_DOT1P, hw_dot1p_map }, + { "dp2T1", 2, 0, FAL_QOS_MAP_TYPE_DSCP_TO_DESIGNATOR, default_dscp_map, + FAL_QOS_MAP_TYPE_DSCP_TO_DOT1P, hw_dot1p_map }, + { "dp2T1", 2, 1, FAL_QOS_MAP_TYPE_DSCP_TO_DESIGNATOR, hw_dscp_map, + FAL_QOS_MAP_TYPE_DSCP_TO_DOT1P, hw_dot1p_map }, +}; + +struct qos_fal_ut_queue_results { + /* identifiers */ + uint32_t subport; + uint32_t pipe; + uint32_t tc; + uint32_t queue; + /* expected results */ + uint32_t queue_limit; + uint32_t weight; +}; + +static struct qos_fal_ut_queue_results hw_poc_queue_results[] = { + /* identifiers | results */ + { 0, 0, 0, 0, 512, 1 }, + { 0, 0, 1, 0, 1024, 1 }, + { 0, 0, 2, 0, 1024, 1 }, + { 0, 0, 3, 0, 1, 1 }, + { 1, 0, 0, 0, 512, 1 }, + { 1, 0, 1, 0, 1024, 1 }, + { 1, 0, 2, 0, 1024, 1 }, + { 1, 0, 3, 0, 1, 1 }, + { 2, 0, 0, 0, 512, 1 }, + { 2, 0, 1, 0, 1024, 1 }, + { 2, 0, 2, 0, 1024, 1 }, + { 2, 0, 3, 0, 1, 1 }, + { 2, 1, 0, 0, 512, 1 }, + { 2, 1, 1, 0, 1024, 1 }, + { 2, 1, 2, 0, 1024, 60 }, + { 2, 1, 2, 1, 1024, 40 } +}; + +DP_START_TEST(qos_fal_basic, fal_said_poc) +{ + bool debug = (dp_test_debug_get() == 2 ? true : false); + json_object *j_sched_obj; + json_object *j_map_obj; + json_object *j_obj; + const char *ifname; + uint32_t level; + uint32_t subport; + uint32_t pipe; + uint32_t tc; + uint32_t queue; + uint32_t queue_limit; + uint32_t weight; + uint32_t i; + int ret; + + qos_lib_test_setup(); + + dp_test_qos_debug(debug); + + dp_test_netlink_set_interface_l2("dp1sw_port_0_0"); + + dp_test_intf_switch_create("switch0"); + dp_test_intf_switch_add_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_add_port("switch0", "dp2T1"); + + ret = dp_test_qos_fal_hw_switch_if("dp2T1", true); + dp_test_fail_unless((ret == 0), + "failed to set hw-switching on dp2T1\n"); + + /* Add NPF config */ + for (i = 0; i < ARRAY_SIZE(fal_hw_npf_cmds); i++) + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + fal_hw_npf_cmds[i]); + + /* Add QoS global config */ + for (i = 0; i < ARRAY_SIZE(fal_hw_qos_glb_cmds); i++) + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + fal_hw_qos_glb_cmds[i]); + + /* Add QoS interface config */ + dp_test_qos_attach_config_to_if("dp2T1", fal_hw_qos_cmds, debug); + + /* + * Start of the hardware configuration verification checks. + * Check the hierarchy of sched-group objects and their associated + * scheduler objects. + */ + for (i = 0; i < ARRAY_SIZE(hw_poc_sched_group_results); i++) { + uint32_t max_children; + uint32_t current_children; + const char *sched_type; + uint64_t max_bandwidth; + uint32_t max_burst; + int8_t overhead; + + /* Get the identifiers */ + level = hw_poc_sched_group_results[i].level; + ifname = hw_poc_sched_group_results[i].ifname; + subport = hw_poc_sched_group_results[i].subport; + pipe = hw_poc_sched_group_results[i].pipe; + tc = hw_poc_sched_group_results[i].tc; + + /* Get the expected results */ + max_children = hw_poc_sched_group_results[i].max_children; + current_children = + hw_poc_sched_group_results[i].current_children; + sched_type = hw_poc_sched_group_results[i].sched_type; + weight = hw_poc_sched_group_results[i].weight; + max_bandwidth = hw_poc_sched_group_results[i].max_bandwidth; + max_burst = hw_poc_sched_group_results[i].max_burst; + overhead = hw_poc_sched_group_results[i].overhead; + + j_obj = dp_test_qos_hw_get_json_sched_group(level, ifname, + subport, pipe, tc, + debug); + + dp_test_qos_hw_check_sched_group(j_obj, level, max_children, + current_children, 0, debug); + + j_sched_obj = dp_test_qos_hw_get_json_child(j_obj, "scheduler", + debug); + if (level == FAL_QOS_SCHED_GROUP_LEVEL_QUEUE) + overhead = -1; + dp_test_qos_hw_check_scheduler(j_sched_obj, sched_type, + "Bytes Per Second", weight, + max_bandwidth, max_burst, + overhead, debug); + json_object_put(j_sched_obj); + json_object_put(j_obj); + } + + /* + * Check that the pipe-level sched-group objects have the correct + * mapping-tables. + */ + for (i = 0; i < ARRAY_SIZE(hw_poc_map_results); i++) { + uint32_t map_type; + uint8_t *egress_map_list; + + /* Get the identifiers */ + level = FAL_QOS_SCHED_GROUP_LEVEL_PIPE; + ifname = hw_poc_map_results[i].ifname; + subport = hw_poc_map_results[i].subport; + pipe = hw_poc_map_results[i].pipe; + + j_obj = dp_test_qos_hw_get_json_sched_group(level, ifname, + subport, pipe, 0, + debug); + + /* Get the expected egress map results */ + map_type = hw_poc_map_results[i].egress_map_type; + egress_map_list = hw_poc_map_results[i].egress_map_list; + if (egress_map_list != NULL) { + j_map_obj = dp_test_qos_hw_get_json_child(j_obj, + "egress-map", + debug); + dp_test_qos_hw_check_egress_map(j_map_obj, map_type, + egress_map_list, debug); + json_object_put(j_map_obj); + } + + json_object_put(j_obj); + } + + /* + * Finally check that the queues that are the leaves of the + * scheduling hierarchy have all the expected values. + */ + for (i = 0; i < ARRAY_SIZE(hw_poc_queue_results); i++) { + subport = hw_poc_queue_results[i].subport; + pipe = hw_poc_queue_results[i].pipe; + tc = hw_poc_queue_results[i].tc; + queue = hw_poc_queue_results[i].queue; + queue_limit = hw_poc_queue_results[i].queue_limit; + weight = hw_poc_queue_results[i].weight; + + j_obj = dp_test_qos_hw_get_json_queue("dp2T1", subport, pipe, + tc, queue, debug); + dp_test_qos_hw_check_queue(j_obj, queue, queue_limit, queue, + 0, debug); + + j_sched_obj = dp_test_qos_hw_get_json_child(j_obj, "scheduler", + debug); + dp_test_qos_hw_check_scheduler(j_sched_obj, + "Weighted Round-Robin", + "Bytes Per Second", weight, 0, + 0, 0, debug); + json_object_put(j_sched_obj); + json_object_put(j_obj); + } + + /* QoS cleanup */ + dp_test_qos_delete_config_from_if("dp2T1", debug); + + for (i = 0; i < ARRAY_SIZE(fal_hw_qos_glb_delete_cmds); i++) + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + fal_hw_qos_glb_delete_cmds[i]); + + /* NPF cleanup */ + for (i = 0; i < ARRAY_SIZE(fal_hw_npf_delete_cmds); i++) + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + fal_hw_npf_delete_cmds[i]); + + ret = dp_test_qos_fal_hw_switch_if("dp2T1", false); + dp_test_fail_unless((ret == 0), + "failed to clear hw-switching on dp2T1\n"); + + /* Cleanup the ports */ + dp_test_intf_switch_remove_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_remove_port("switch0", "dp2T1"); + dp_test_intf_switch_del("switch0"); + + dp_test_netlink_del_interface_l2("dp1sw_port_0_0"); + + dp_test_qos_debug(false); + + qos_lib_test_teardown(); +} DP_END_TEST; + +/* + * fal_local_priority_cmds created from: + * + * set interfaces dataplane dp0s5 policy qos 'policy-3' + * set policy qos name policy-3 shaper bandwidth '1Gbit' + * set policy qos name policy-3 shaper default 'profile-1' + * set policy qos name policy-3 shaper profile profile-1 queue 31 + * 'priority-local' + * set policy qos name policy-3 shaper profile profile-1 queue 31 + * traffic-class '1' + */ + +const char *fal_local_priority_cmds[] = { + "port subports 1 pipes 1 profiles 1 overhead 24 ql_bytes", + "subport 0 rate 125000000 size 50000 period 40000", + "subport 0 queue 0 rate 125000000 size 50000", + "subport 0 queue 1 rate 125000000 size 50000", + "subport 0 queue 2 rate 125000000 size 50000", + "subport 0 queue 3 rate 125000000 size 50000", + "vlan 0 0", + "profile 0 rate 125000000 size 50000 period 10000", + "profile 0 queue 0 rate 125000000 size 50000", + "profile 0 queue 1 rate 125000000 size 50000", + "profile 0 queue 2 rate 125000000 size 50000", + "profile 0 queue 3 rate 125000000 size 50000", + "profile 0 queue 0x5 wrr-weight 1 31 prio-loc", + "pipe 0 0 0", + "enable" +}; + +DP_START_TEST(qos_fal_basic, fal_local_priority_queue) +{ + bool debug = (dp_test_debug_get() == 2 ? true : false); + json_object *j_obj; + uint32_t level; + int ret; + + qos_lib_test_setup(); + + dp_test_qos_debug(debug); + + dp_test_netlink_set_interface_l2("dp1sw_port_0_0"); + + dp_test_intf_switch_create("switch0"); + dp_test_intf_switch_add_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_add_port("switch0", "dp2T1"); + + ret = dp_test_qos_fal_hw_switch_if("dp2T1", true); + dp_test_fail_unless((ret == 0), + "failed to set hw-switching on dp2T1\n"); + + /* Add QoS config */ + dp_test_qos_attach_config_to_if("dp2T1", fal_local_priority_cmds, + debug); + + /* + * Test verification code here - check that the ingress map has + * its local-priority queue set. + */ + level = FAL_QOS_SCHED_GROUP_LEVEL_PIPE; + j_obj = dp_test_qos_hw_get_json_sched_group(level, "dp2T1", 0, 0, 0, + debug); + dp_test_qos_hw_check_sched_group(j_obj, level, 4, 4, 0, debug); + json_object_put(j_obj); + + j_obj = dp_test_qos_hw_get_json_queue("dp2T1", 0, 0, 0, 0, debug); + dp_test_qos_hw_check_queue(j_obj, 0, 64, 0, 0, debug); + json_object_put(j_obj); + + j_obj = dp_test_qos_hw_get_json_queue("dp2T1", 0, 0, 1, 0, debug); + dp_test_qos_hw_check_queue(j_obj, 0, 64, 0, 0, debug); + json_object_put(j_obj); + + j_obj = dp_test_qos_hw_get_json_queue("dp2T1", 0, 0, 1, 1, debug); + dp_test_qos_hw_check_queue(j_obj, 1, 64, 1, 0, debug); + json_object_put(j_obj); + + j_obj = dp_test_qos_hw_get_json_queue("dp2T1", 0, 0, 2, 0, debug); + dp_test_qos_hw_check_queue(j_obj, 0, 64, 0, 0, debug); + json_object_put(j_obj); + + j_obj = dp_test_qos_hw_get_json_queue("dp2T1", 0, 0, 3, 0, debug); + dp_test_qos_hw_check_queue(j_obj, 0, 64, 0, 0, debug); + json_object_put(j_obj); + + /* QoS cleanup */ + dp_test_qos_delete_config_from_if("dp2T1", debug); + + ret = dp_test_qos_fal_hw_switch_if("dp2T1", false); + dp_test_fail_unless((ret == 0), + "failed to clear hw-switching on dp2T1\n"); + + /* Cleanup the ports */ + dp_test_intf_switch_remove_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_remove_port("switch0", "dp2T1"); + dp_test_intf_switch_del("switch0"); + + dp_test_netlink_del_interface_l2("dp1sw_port_0_0"); + + dp_test_qos_debug(false); + + qos_lib_test_teardown(); +} DP_END_TEST; + +/* + * fal_egress_map_npf_cmds and fal_egress_map_qos_cmds created from: + * + * set resources group dscp-group dscp7-0 dscp 0 + * set resources group dscp-group dscp7-0 dscp 1 + * set resources group dscp-group dscp7-0 dscp 2 + * set resources group dscp-group dscp7-0 dscp 3 + * set resources group dscp-group dscp7-0 dscp 4 + * set resources group dscp-group dscp7-0 dscp 5 + * set resources group dscp-group dscp7-0 dscp 6 + * set resources group dscp-group dscp7-0 dscp 7 + * set resources group dscp-group dscp15-8 dscp 8 + * set resources group dscp-group dscp15-8 dscp 9 + * set resources group dscp-group dscp15-8 dscp 10 + * set resources group dscp-group dscp15-8 dscp 11 + * set resources group dscp-group dscp15-8 dscp 12 + * set resources group dscp-group dscp15-8 dscp 13 + * set resources group dscp-group dscp15-8 dscp 14 + * set resources group dscp-group dscp15-8 dscp 15 + * set resources group dscp-group dscp23-16 dscp 16 + * set resources group dscp-group dscp23-16 dscp 17 + * set resources group dscp-group dscp23-16 dscp 18 + * set resources group dscp-group dscp23-16 dscp 19 + * set resources group dscp-group dscp23-16 dscp 20 + * set resources group dscp-group dscp23-16 dscp 21 + * set resources group dscp-group dscp23-16 dscp 22 + * set resources group dscp-group dscp23-16 dscp 23 + * set resources group dscp-group dscp31-24 dscp 24 + * set resources group dscp-group dscp31-24 dscp 25 + * set resources group dscp-group dscp31-24 dscp 26 + * set resources group dscp-group dscp31-24 dscp 27 + * set resources group dscp-group dscp31-24 dscp 28 + * set resources group dscp-group dscp31-24 dscp 29 + * set resources group dscp-group dscp31-24 dscp 30 + * set resources group dscp-group dscp31-24 dscp 31 + * set resources group dscp-group dscp39-32 dscp 32 + * set resources group dscp-group dscp39-32 dscp 33 + * set resources group dscp-group dscp39-32 dscp 34 + * set resources group dscp-group dscp39-32 dscp 35 + * set resources group dscp-group dscp39-32 dscp 36 + * set resources group dscp-group dscp39-32 dscp 37 + * set resources group dscp-group dscp39-32 dscp 38 + * set resources group dscp-group dscp39-32 dscp 39 + * set resources group dscp-group dscp47-40 dscp 40 + * set resources group dscp-group dscp47-40 dscp 41 + * set resources group dscp-group dscp47-40 dscp 42 + * set resources group dscp-group dscp47-40 dscp 43 + * set resources group dscp-group dscp47-40 dscp 44 + * set resources group dscp-group dscp47-40 dscp 45 + * set resources group dscp-group dscp47-40 dscp 46 + * set resources group dscp-group dscp47-40 dscp 47 + * set resources group dscp-group dscp55-48 dscp 48 + * set resources group dscp-group dscp55-48 dscp 49 + * set resources group dscp-group dscp55-48 dscp 50 + * set resources group dscp-group dscp55-48 dscp 51 + * set resources group dscp-group dscp55-48 dscp 52 + * set resources group dscp-group dscp55-48 dscp 53 + * set resources group dscp-group dscp55-48 dscp 54 + * set resources group dscp-group dscp55-48 dscp 55 + * set resources group dscp-group dscp63-56 dscp 56 + * set resources group dscp-group dscp63-56 dscp 57 + * set resources group dscp-group dscp63-56 dscp 58 + * set resources group dscp-group dscp63-56 dscp 59 + * set resources group dscp-group dscp63-56 dscp 60 + * set resources group dscp-group dscp63-56 dscp 61 + * set resources group dscp-group dscp63-56 dscp 62 + * set resources group dscp-group dscp63-56 dscp 63 + * + * set policy qos mark-map egress-pcp-map dscp-group dscp63-56 pcp-mark 7 + * set policy qos mark-map egress-pcp-map dscp-group dscp55-48 pcp-mark 6 + * set policy qos mark-map egress-pcp-map dscp-group dscp47-40 pcp-mark 5 + * set policy qos mark-map egress-pcp-map dscp-group dscp39-32 pcp-mark 4 + * set policy qos mark-map egress-pcp-map dscp-group dscp31-24 pcp-mark 3 + * set policy qos mark-map egress-pcp-map dscp-group dscp23-16 pcp-mark 2 + * set policy qos mark-map egress-pcp-map dscp-group dscp15-8 pcp-mark 1 + * set policy qos mark-map egress-pcp-map dscp-group dscp7-0 pcp-mark 0 + * + * set policy qos name trunk-policy shaper default profile-1 + * set policy qos name trunk-policy shaper profile profile-1 bandwidth 100Mbit + * set interface dataplane dp0s5 policy qos trunk-policy + * + * set policy qos name vlan-policy shaper mark-map egress-pcp-map + * set policy qos name vlan-policy shaper profile profile-2 bandwidth 200Mbit + * set policy qos name vlan-policy shaper default profile-2 + * set interface dataplane dp0s5 vif 10 policy qos vlan-policy + */ + +const char *fal_egress_map_npf_cmds[] = { + "npf-cfg add dscp-group:dscp7-0 0 0;1;2;3;4;5;6;7", + "npf-cfg add dscp-group:dscp15-8 0 8;9;10;11;12;13;14;15", + "npf-cfg add dscp-group:dscp23-16 0 16;17;18;19;20;21;22;23", + "npf-cfg add dscp-group:dscp31-24 0 24;25;26;27;28;29;30;31", + "npf-cfg add dscp-group:dscp39-32 0 32;33;34;35;36;37;38;39", + "npf-cfg add dscp-group:dscp47-40 0 40;41;42;43;44;45;46;47", + "npf-cfg add dscp-group:dscp55-48 0 48;49;50;51;52;53;54;55", + "npf-cfg add dscp-group:dscp63-56 0 56;57;58;59;60;61;62;63", + "npf-cfg commit" +}; + +const char *fal_egress_map_npf_delete_cmds[] = { + "npf-cfg delete dscp-group:dscp7-0", + "npf-cfg delete dscp-group:dscp15-8", + "npf-cfg delete dscp-group:dscp23-16", + "npf-cfg delete dscp-group:dscp31-24", + "npf-cfg delete dscp-group:dscp39-32", + "npf-cfg delete dscp-group:dscp47-40", + "npf-cfg delete dscp-group:dscp55-48", + "npf-cfg delete dscp-group:dscp63-56", + "npf-cfg commit" +}; + +const char *fal_egress_map_qos_glb_cmds[] = { + "qos global-object-cmd mark-map egress-pcp-map dscp-group dscp7-0 pcp 0", + "qos global-object-cmd mark-map egress-pcp-map dscp-group dscp15-8 pcp 1", + "qos global-object-cmd mark-map egress-pcp-map dscp-group dscp23-16 pcp 2", + "qos global-object-cmd mark-map egress-pcp-map dscp-group dscp31-24 pcp 3", + "qos global-object-cmd mark-map egress-pcp-map dscp-group dscp39-32 pcp 4", + "qos global-object-cmd mark-map egress-pcp-map dscp-group dscp47-40 pcp 5", + "qos global-object-cmd mark-map egress-pcp-map dscp-group dscp55-48 pcp 6", + "qos global-object-cmd mark-map egress-pcp-map dscp-group dscp63-56 pcp 7" +}; + +const char *fal_egress_map_qos_glb_delete_cmds[] = { + "qos global-object-cmd mark-map egress-pcp-map delete" +}; + +const char *fal_egress_map_qos_int_cmds[] = { + "port subports 2 pipes 1 profiles 2 overhead 24 ql_bytes", + "subport 0 rate 1250000000 size 100000 period 40000", + "subport 0 queue 0 rate 1250000000 size 100000", + "subport 0 queue 1 rate 1250000000 size 100000", + "subport 0 queue 2 rate 1250000000 size 100000", + "subport 0 queue 3 rate 1250000000 size 100000", + "vlan 0 0", + "profile 0 rate 12500000 size 50000 period 10000", + "profile 0 queue 0 rate 12500000 size 50000", + "profile 0 queue 1 rate 12500000 size 50000", + "profile 0 queue 2 rate 12500000 size 50000", + "profile 0 queue 3 rate 12500000 size 50000", + "pipe 0 0 0", + "subport 1 rate 1250000000 size 100000 period 40000", + "subport 1 queue 0 rate 1250000000 size 100000", + "subport 1 queue 1 rate 1250000000 size 100000", + "subport 1 queue 2 rate 1250000000 size 100000", + "subport 1 queue 3 rate 1250000000 size 100000", + "subport 1 mark-map egress-pcp-map", + "vlan 10 1", + "profile 1 rate 25000000 size 100000 period 10000", + "profile 1 queue 0 rate 25000000 size 100000", + "profile 1 queue 1 rate 25000000 size 100000", + "profile 1 queue 2 rate 25000000 size 100000", + "profile 1 queue 3 rate 25000000 size 100000", + "pipe 1 0 1", + "enable" +}; + +int8_t fal_egress_map_expected_pcp_values[] = { + 0, 0, 0, 0, 0, 0, 0, 0, /* DSCP 0-7 */ + 1, 1, 1, 1, 1, 1, 1, 1, /* DSCP 8-15 */ + 2, 2, 2, 2, 2, 2, 2, 2, /* DSCP 16-23 */ + 3, 3, 3, 3, 3, 3, 3, 3, /* DSCP 24-31 */ + 4, 4, 4, 4, 4, 4, 4, 4, /* DSCP 32-39 */ + 5, 5, 5, 5, 5, 5, 5, 5, /* DSCP 40-47 */ + 6, 6, 6, 6, 6, 6, 6, 6, /* DSCP 48-55 */ + 7, 7, 7, 7, 7, 7, 7, 7 /* DSCP 56-63 */ +}; + +DP_START_TEST(qos_fal_basic, fal_egress_map) +{ + bool debug = (dp_test_debug_get() == 2 ? true : false); + uint32_t i; + int ret; + + qos_lib_test_setup(); + + dp_test_qos_debug(debug); + + dp_test_netlink_set_interface_l2("dp1sw_port_0_0"); + + dp_test_intf_switch_create("switch0"); + dp_test_intf_switch_add_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_add_port("switch0", "dp2T1"); + + ret = dp_test_qos_fal_hw_switch_if("dp2T1", true); + dp_test_fail_unless((ret == 0), + "failed to set hw-switching on dp2T1\n"); + + /* Add NPF config */ + for (i = 0; i < ARRAY_SIZE(fal_egress_map_npf_cmds); i++) + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + fal_egress_map_npf_cmds[i]); + + /* Add QoS config */ + for (i = 0; i < ARRAY_SIZE(fal_egress_map_qos_glb_cmds); i++) + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + fal_egress_map_qos_glb_cmds[i]); + + dp_test_qos_attach_config_to_if("dp2T1", fal_egress_map_qos_int_cmds, + debug); + + /* + * Test verification code here + */ + dp_test_qos_check_mark_map("egress-pcp-map", + fal_egress_map_expected_pcp_values, + debug); + + /* QoS cleanup */ + dp_test_qos_delete_config_from_if("dp2T1", debug); + + for (i = 0; i < ARRAY_SIZE(fal_egress_map_qos_glb_delete_cmds); i++) + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + fal_egress_map_qos_glb_delete_cmds[i]); + + /* NPF cleanup */ + for (i = 0; i < ARRAY_SIZE(fal_egress_map_npf_delete_cmds); i++) + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + fal_egress_map_npf_delete_cmds[i]); + + ret = dp_test_qos_fal_hw_switch_if("dp2T1", false); + dp_test_fail_unless((ret == 0), + "failed to clear hw-switching on dp2T1\n"); + + /* Cleanup the ports */ + dp_test_intf_switch_remove_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_remove_port("switch0", "dp2T1"); + dp_test_intf_switch_del("switch0"); + + dp_test_netlink_del_interface_l2("dp1sw_port_0_0"); + + dp_test_qos_debug(false); + + qos_lib_test_teardown(); +} DP_END_TEST; + +/* + * fal_egress_map_cmds and fal_egress_map_qos_cmds2 created from: + * + * set policy qos mark-map des-mark designation 0 drop-precedence green + * pcp-mark 7 + * set policy qos mark-map des-mark designation 0 drop-precedence yellow + * pcp-mark 7 + * set policy qos mark-map des-mark designation 0 drop-precedence red + * pcp-mark 7 + * set policy qos mark-map des-mark designation 1 drop-precedence green + * pcp-mark 6 + * set policy qos mark-map des-mark designation 2 drop-precedence green + * pcp-mark 5 + * set policy qos mark-map des-mark designation 3 drop-precedence green + * pcp-mark 4 + * set policy qos mark-map des-mark designation 4 drop-precedence green + * pcp-mark 3 + * set policy qos mark-map des-mark designation 5 drop-precedence green + * pcp-mark 2 + * set policy qos mark-map des-mark designation 5 drop-precedence yellow + * pcp-mark 2 + * set policy qos mark-map des-mark designation 6 drop-precedence green + * pcp-mark 1 + * set policy qos mark-map des-mark designation 7 drop-precedence green + * pcp-mark 0 + * + * set policy qos name trunk-policy shaper default profile-1 + * set policy qos name trunk-policy shaper profile profile-1 bandwidth 100Mbit + * set interface dataplane dp0s5 policy qos trunk-policy + * + * set policy qos name vlan-policy shaper mark-map des-mark + * set policy qos name vlan-policy shaper profile profile-2 bandwidth 200Mbit + * set policy qos name vlan-policy shaper default profile-2 + * set interface dataplane dp0s5 vif 10 policy qos vlan-policy + */ + +const char *fal_egress_map_qos_glb_cmds2[] = { + "qos global-object-cmd mark-map des-mark designation 0 drop-prec green pcp 7", + "qos global-object-cmd mark-map des-mark designation 0 drop-prec yellow pcp 7", + "qos global-object-cmd mark-map des-mark designation 0 drop-prec red pcp 7", + "qos global-object-cmd mark-map des-mark designation 1 drop-prec green pcp 6", + "qos global-object-cmd mark-map des-mark designation 2 drop-prec green pcp 5", + "qos global-object-cmd mark-map des-mark designation 3 drop-prec green pcp 4", + "qos global-object-cmd mark-map des-mark designation 4 drop-prec green pcp 3", + "qos global-object-cmd mark-map des-mark designation 5 drop-prec green pcp 2", + "qos global-object-cmd mark-map des-mark designation 5 drop-prec yellow pcp 2", + "qos global-object-cmd mark-map des-mark designation 6 drop-prec green pcp 1", + "qos global-object-cmd mark-map des-mark designation 7 drop-prec green pcp 0", +}; + +const char *fal_egress_map_qos_glb_delete_cmds2[] = { + "qos global-object-cmd mark-map des-mark delete" +}; + +const char *fal_egress_map_qos_int_cmds2[] = { + "port subports 2 pipes 1 profiles 2 overhead 24 ql_bytes", + "subport 0 rate 1250000000 size 100000 period 40000", + "subport 0 queue 0 rate 1250000000 size 100000", + "subport 0 queue 1 rate 1250000000 size 100000", + "subport 0 queue 2 rate 1250000000 size 100000", + "subport 0 queue 3 rate 1250000000 size 100000", + "vlan 0 0", + "profile 0 rate 12500000 size 50000 period 10000", + "profile 0 queue 0 rate 12500000 size 50000", + "profile 0 queue 1 rate 12500000 size 50000", + "profile 0 queue 2 rate 12500000 size 50000", + "profile 0 queue 3 rate 12500000 size 50000", + "pipe 0 0 0", + "subport 1 rate 1250000000 size 100000 period 40000", + "subport 1 queue 0 rate 1250000000 size 100000", + "subport 1 queue 1 rate 1250000000 size 100000", + "subport 1 queue 2 rate 1250000000 size 100000", + "subport 1 queue 3 rate 1250000000 size 100000", + "subport 1 mark-map des-mark", + "vlan 10 1", + "profile 1 rate 25000000 size 100000 period 10000", + "profile 1 queue 0 rate 25000000 size 100000", + "profile 1 queue 1 rate 25000000 size 100000", + "profile 1 queue 2 rate 25000000 size 100000", + "profile 1 queue 3 rate 25000000 size 100000", + "pipe 1 0 1", + "enable" +}; + +int8_t fal_egress_map_expected_pcp_values2[] = { + /* designation 0-7 x dp 0-2*/ + 7, 7, 7, + 6, 0, 0, + 5, 0, 0, + 4, 0, 0, + 3, 0, 0, + 2, 2, 0, + 1, 0, 0, + 0, 0, 0, +}; + +DP_START_TEST(qos_fal_basic, fal_egress_map2) +{ + bool debug = (dp_test_debug_get() == 2 ? true : false); + uint32_t i; + int ret; + + qos_lib_test_setup(); + + dp_test_qos_debug(debug); + + dp_test_netlink_set_interface_l2("dp1sw_port_0_0"); + + dp_test_intf_switch_create("switch0"); + dp_test_intf_switch_add_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_add_port("switch0", "dp2T1"); + + ret = dp_test_qos_fal_hw_switch_if("dp2T1", true); + dp_test_fail_unless((ret == 0), + "failed to set hw-switching on dp2T1\n"); + + /* Add QoS config */ + for (i = 0; i < ARRAY_SIZE(fal_egress_map_qos_glb_cmds2); i++) + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + fal_egress_map_qos_glb_cmds2[i]); + + dp_test_qos_attach_config_to_if("dp2T1", fal_egress_map_qos_int_cmds2, + debug); + + /* + * Test verification code here + */ + dp_test_qos_check_mark_map("des-mark", + fal_egress_map_expected_pcp_values2, + debug); + + /* QoS cleanup */ + dp_test_qos_delete_config_from_if("dp2T1", debug); + + for (i = 0; i < ARRAY_SIZE(fal_egress_map_qos_glb_delete_cmds2); i++) + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + fal_egress_map_qos_glb_delete_cmds2[i]); + + ret = dp_test_qos_fal_hw_switch_if("dp2T1", false); + dp_test_fail_unless((ret == 0), + "failed to clear hw-switching on dp2T1\n"); + + /* Cleanup the ports */ + dp_test_intf_switch_remove_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_remove_port("switch0", "dp2T1"); + dp_test_intf_switch_del("switch0"); + + dp_test_netlink_del_interface_l2("dp1sw_port_0_0"); + + dp_test_qos_debug(false); + + qos_lib_test_teardown(); +} DP_END_TEST; + +/* + * fal_egress_map_cmds and fal_egress_map_qos_cmds2 created from: + * + * set policy qos mark-map des-mark designation 0 drop-precedence green + * pcp-mark 2 + * set policy qos mark-map des-mark designation 3 drop-precedence yellow + * pcp-mark 4 + * set policy qos mark-map des-mark designation 6 drop-precedence red + * pcp-mark 7 + * + * set policy qos name trunk-policy shaper default profile-1 + * set policy qos name trunk-policy shaper profile profile-1 bandwidth 100Mbit + * set interface dataplane dp0s5 policy qos trunk-policy + * + * set policy qos name vlan-policy shaper mark-map des-mark + * set policy qos name vlan-policy shaper profile profile-2 bandwidth 200Mbit + * set policy qos name vlan-policy shaper default profile-2 + * set interface dataplane dp0s5 vif 10 policy qos vlan-policy + */ + +const char *fal_egress_map_qos_glb_cmds3[] = { + "qos global-object-cmd mark-map des-mark designation 0 drop-prec green pcp 2", + "qos global-object-cmd mark-map des-mark designation 3 drop-prec yellow pcp 4", + "qos global-object-cmd mark-map des-mark designation 6 drop-prec red pcp 7", +}; + +int8_t fal_egress_map_expected_pcp_values3[] = { + /* + * des/dp + * 0/green, + * 3/yellow, + * 6/red + */ + 2, 0, 0, + 0, 0, 0, + 0, 0, 0, + 0, 4, 0, + 0, 0, 0, + 0, 0, 0, + 0, 0, 7, + 0, 0, 0, +}; + +DP_START_TEST(qos_fal_basic, fal_egress_map3) +{ + bool debug = (dp_test_debug_get() == 2 ? true : false); + uint32_t i; + int ret; + + qos_lib_test_setup(); + + dp_test_qos_debug(debug); + + dp_test_netlink_set_interface_l2("dp1sw_port_0_0"); + + dp_test_intf_switch_create("switch0"); + dp_test_intf_switch_add_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_add_port("switch0", "dp2T1"); + + ret = dp_test_qos_fal_hw_switch_if("dp2T1", true); + dp_test_fail_unless((ret == 0), + "failed to set hw-switching on dp2T1\n"); + + /* Add QoS config */ + for (i = 0; i < ARRAY_SIZE(fal_egress_map_qos_glb_cmds3); i++) + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + fal_egress_map_qos_glb_cmds3[i]); + + dp_test_qos_attach_config_to_if("dp2T1", fal_egress_map_qos_int_cmds2, + debug); + + /* + * Test verification code here + */ + dp_test_qos_check_mark_map("des-mark", + fal_egress_map_expected_pcp_values3, + debug); + + /* QoS cleanup */ + dp_test_qos_delete_config_from_if("dp2T1", debug); + + for (i = 0; i < ARRAY_SIZE(fal_egress_map_qos_glb_delete_cmds2); i++) + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + fal_egress_map_qos_glb_delete_cmds2[i]); + + ret = dp_test_qos_fal_hw_switch_if("dp2T1", false); + dp_test_fail_unless((ret == 0), + "failed to clear hw-switching on dp2T1\n"); + + /* Cleanup the ports */ + dp_test_intf_switch_remove_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_remove_port("switch0", "dp2T1"); + dp_test_intf_switch_del("switch0"); + + dp_test_netlink_del_interface_l2("dp1sw_port_0_0"); + + dp_test_qos_debug(false); + + qos_lib_test_teardown(); +} DP_END_TEST; + +/* + * fal_wred_map_npf_cmds and fal_wred_map_qos_cmds created from: + * + * set resources group dscp-group dscp-55-48 dscp 48 + * set resources group dscp-group dscp-55-48 dscp 49 + * set resources group dscp-group dscp-55-48 dscp 50 + * set resources group dscp-group dscp-55-48 dscp 51 + * set resources group dscp-group dscp-55-48 dscp 52 + * set resources group dscp-group dscp-55-48 dscp 53 + * set resources group dscp-group dscp-55-48 dscp 54 + * set resources group dscp-group dscp-55-48 dscp 55 + * set resources group dscp-group dscp-63-56 dscp 56 + * set resources group dscp-group dscp-63-56 dscp 57 + * set resources group dscp-group dscp-63-56 dscp 58 + * set resources group dscp-group dscp-63-56 dscp 59 + * set resources group dscp-group dscp-63-56 dscp 60 + * set resources group dscp-group dscp-63-56 dscp 61 + * set resources group dscp-group dscp-63-56 dscp 62 + * set resources group dscp-group dscp-63-56 dscp 63 + * + * set policy qos name policy-1 shaper default profile-1 + * set policy qos name policy-1 shaper profile profile-1 bandwidth 1Mbit + * set policy qos name policy-1 shaper profile profile-1 queue 0 + * traffic-class 0 + * set policy qos name policy-1 shaper profile profile-1 queue 0 wred-map + * dscp-group dscp-55-48 mark-probability 20 + * set policy qos name policy-1 shaper profile profile-1 queue 0 wred-map + * dscp-group dscp-55-48 max-threshold 63 + * set policy qos name policy-1 shaper profile profile-1 queue 0 wred-map + * dscp-group dscp-55-48 min-threshold 32 + * set policy qos name policy-1 shaper profile profile-1 queue 0 wred-map + * dscp-group dscp-63-56 mark-probability 50 + * set policy qos name policy-1 shaper profile profile-1 queue 0 wred-map + * dscp-group dscp-63-56 max-threshold 40 + * set policy qos name policy-1 shaper profile profile-1 queue 0 wred-map + * dscp-group dscp-63-56 min-threshold 20 + * set policy qos name policy-1 shaper profile profile-1 queue 0 wred-map + * filter-weight 5 + * set interfaces dataplane dp0s5 policy qos policy-1 + */ + +const char *fal_wred_map_npf_cmds[] = { + "npf-cfg delete dscp-group:dscp-55-48", + "npf-cfg delete dscp-group:dscp-63-56", + "npf-cfg add dscp-group:dscp-55-48 0 48,49,50,51,52,53,54,55", + "npf-cfg add dscp-group:dscp-63-56 0 56,57,58,59,60,61,62,63", + "npf-cfg commit", +}; + +const char *fal_wred_map_npf_delete_cmds[] = { + "npf-cfg delete dscp-group:dscp-55-48", + "npf-cfg delete dscp-group:dscp-63-56", + "npf-cfg commit", +}; + +const char *fal_wred_map_qos_cmds[] = { + "port subports 1 pipes 1 profiles 1 overhead 24 ql_bytes", + "subport 0 rate 1250000000 size 100000 period 40000", + "subport 0 queue 0 rate 1250000000 size 100000", + "subport 0 queue 1 rate 1250000000 size 100000", + "subport 0 queue 2 rate 1250000000 size 100000", + "subport 0 queue 3 rate 1250000000 size 100000", + "vlan 0 0", + "profile 0 rate 125000 size 500 period 10000", + "profile 0 queue 0 rate 125000 size 500", + "profile 0 queue 1 rate 125000 size 500", + "profile 0 queue 2 rate 125000 size 500", + "profile 0 queue 3 rate 125000 size 500", + "profile 0 dscp 0 0x3", + "profile 0 dscp 1 0x3", + "profile 0 dscp 2 0x3", + "profile 0 dscp 3 0x3", + "profile 0 dscp 4 0x3", + "profile 0 dscp 5 0x3", + "profile 0 dscp 6 0x3", + "profile 0 dscp 7 0x3", + "profile 0 dscp 8 0x3", + "profile 0 dscp 9 0x3", + "profile 0 dscp 10 0x3", + "profile 0 dscp 11 0x3", + "profile 0 dscp 12 0x3", + "profile 0 dscp 13 0x3", + "profile 0 dscp 14 0x3", + "profile 0 dscp 15 0x3", + "profile 0 dscp 16 0x2", + "profile 0 dscp 17 0x2", + "profile 0 dscp 18 0x2", + "profile 0 dscp 19 0x2", + "profile 0 dscp 20 0x2", + "profile 0 dscp 21 0x2", + "profile 0 dscp 22 0x2", + "profile 0 dscp 23 0x2", + "profile 0 dscp 24 0x2", + "profile 0 dscp 25 0x2", + "profile 0 dscp 26 0x2", + "profile 0 dscp 27 0x2", + "profile 0 dscp 28 0x2", + "profile 0 dscp 29 0x2", + "profile 0 dscp 30 0x2", + "profile 0 dscp 31 0x2", + "profile 0 dscp 32 0x1", + "profile 0 dscp 33 0x1", + "profile 0 dscp 34 0x1", + "profile 0 dscp 35 0x1", + "profile 0 dscp 36 0x1", + "profile 0 dscp 37 0x1", + "profile 0 dscp 38 0x1", + "profile 0 dscp 39 0x1", + "profile 0 dscp 40 0x1", + "profile 0 dscp 41 0x1", + "profile 0 dscp 42 0x1", + "profile 0 dscp 43 0x1", + "profile 0 dscp 44 0x1", + "profile 0 dscp 45 0x1", + "profile 0 dscp 46 0x1", + "profile 0 dscp 47 0x1", + "profile 0 dscp 48 0x0", + "profile 0 dscp 49 0x0", + "profile 0 dscp 50 0x0", + "profile 0 dscp 51 0x0", + "profile 0 dscp 52 0x0", + "profile 0 dscp 53 0x0", + "profile 0 dscp 54 0x0", + "profile 0 dscp 55 0x0", + "profile 0 dscp 56 0x20", + "profile 0 dscp 57 0x20", + "profile 0 dscp 58 0x20", + "profile 0 dscp 59 0x20", + "profile 0 dscp 60 0x20", + "profile 0 dscp 61 0x20", + "profile 0 dscp 62 0x20", + "profile 0 dscp 63 0x20", + "profile 0 queue 0 wrr-weight 1", + "profile 0 queue 0 dscp-group dscp-55-48 bytes 630 320 20", + "profile 0 queue 0 dscp-group dscp-63-56 bytes 400 200 50", + "profile 0 queue 0 wred-weight 5", + "pipe 0 0 0", + "enable" +}; + +struct des_dp_pair wred_map_dscp_map[] = { + /* des discard-index */ + { 0, 0 }, /* DSCP = 0 */ + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, /* DSCP = 15 */ + { 1, 0 }, /* DSCP = 16 */ + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, + { 1, 0 }, /* DSCP = 31 */ + { 2, 0 }, /* DSCP = 32 */ + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, + { 2, 0 }, /* DSCP = 47 */ + { 3, 0 }, /* DSCP = 48 */ + { 3, 0 }, + { 3, 0 }, + { 3, 0 }, + { 3, 0 }, + { 3, 0 }, + { 3, 0 }, + { 3, 0 }, /* DSCP = 55 */ + { 3, 1 }, /* DSCP = 56 */ + { 3, 1 }, + { 3, 1 }, + { 3, 1 }, + { 3, 1 }, + { 3, 1 }, + { 3, 1 }, + { 3, 1 } /* DSCP = 63 */ +}; + +DP_START_TEST(qos_fal_basic, fal_wred_map) +{ + bool debug = (dp_test_debug_get() == 2 ? true : false); + json_object *j_sched_obj; + json_object *j_wred_obj; + json_object *j_obj; + uint32_t level; + uint32_t subport = 0; /* Only one subport with id = 0 */ + uint32_t pipe = 0; /* Only one pipe with id = 0 */ + uint32_t i; + int ret; + + qos_lib_test_setup(); + + dp_test_qos_debug(debug); + + dp_test_netlink_set_interface_l2("dp1sw_port_0_0"); + + dp_test_intf_switch_create("switch0"); + dp_test_intf_switch_add_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_add_port("switch0", "dp2T1"); + + ret = dp_test_qos_fal_hw_switch_if("dp2T1", true); + dp_test_fail_unless((ret == 0), + "failed to set hw-switching on dp2T1\n"); + + /* Add NPF config */ + i = 0; + while (!strstr(fal_wred_map_npf_cmds[i], "npf-cfg commit")) { + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + fal_wred_map_npf_cmds[i++]); + } + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + fal_wred_map_npf_cmds[i++]); + + /* Add QoS config */ + dp_test_qos_attach_config_to_if("dp2T1", fal_wred_map_qos_cmds, debug); + + /* + * Check the pipe-level, and in particular the map object + */ + level = FAL_QOS_SCHED_GROUP_LEVEL_PIPE; + j_obj = dp_test_qos_hw_get_json_sched_group(level, "dp2T1", subport, + pipe, 0, debug); + dp_test_qos_hw_check_sched_group(j_obj, level, 4, 4, 0, debug); + j_sched_obj = dp_test_qos_hw_get_json_child(j_obj, "scheduler", debug); + dp_test_qos_hw_check_scheduler(j_sched_obj, "Strict Priority", + "Bytes Per Second", -1, 125000, 1542, + 24, debug); + + json_object_put(j_sched_obj); + json_object_put(j_obj); + + /* + * Check that the one queue that has multiple wred-maps configured + * on it has all the expected values. + */ + j_obj = dp_test_qos_hw_get_json_queue("dp2T1", 0, 0, 0, 0, debug); + + j_wred_obj = dp_test_qos_hw_get_json_child(j_obj, "wred", debug); + dp_test_qos_hw_check_wred_colour(j_wred_obj, "green", true, 320, 630, + 20, 5, debug); + + dp_test_qos_hw_check_wred_colour(j_wred_obj, "yellow", true, 200, 400, + 50, 5, debug); + + dp_test_qos_hw_check_wred_colour(j_wred_obj, "red", false, -1, -1, -1, + 5, debug); + + json_object_put(j_wred_obj); + json_object_put(j_obj); + + /* QoS cleanup */ + dp_test_qos_delete_config_from_if("dp2T1", debug); + + /* NPF cleanup */ + for (i = 0; i < ARRAY_SIZE(fal_wred_map_npf_delete_cmds); i++) + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + fal_wred_map_npf_delete_cmds[i]); + + ret = dp_test_qos_fal_hw_switch_if("dp2T1", false); + dp_test_fail_unless((ret == 0), + "failed to clear hw-switching on dp2T1\n"); + + /* Cleanup the ports */ + dp_test_intf_switch_remove_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_remove_port("switch0", "dp2T1"); + dp_test_intf_switch_del("switch0"); + + dp_test_netlink_del_interface_l2("dp1sw_port_0_0"); + + dp_test_qos_debug(false); + + qos_lib_test_teardown(); +} DP_END_TEST; + +/* + * fal_hw_wred_map_npf_cmds and fal_hw_wred_map_qos_cmds created from: + * + * set resources group dscp-group default-group-high-drop dscp 8 + * set resources group dscp-group default-group-high-drop dscp 10 + * set resources group dscp-group default-group-high-drop dscp 16 + * set resources group dscp-group default-group-high-drop dscp 18 + * set resources group dscp-group default-group-low-drop dscp 0 + * set resources group dscp-group default-group-low-drop dscp 1 + * set resources group dscp-group default-group-low-drop dscp 2 + * set resources group dscp-group default-group-low-drop dscp 3 + * set resources group dscp-group default-group-low-drop dscp 4 + * set resources group dscp-group default-group-low-drop dscp 5 + * set resources group dscp-group default-group-low-drop dscp 6 + * set resources group dscp-group default-group-low-drop dscp 7 + * set resources group dscp-group default-group-low-drop dscp 9 + * set resources group dscp-group default-group-low-drop dscp 11 + * set resources group dscp-group default-group-low-drop dscp 12 + * set resources group dscp-group default-group-low-drop dscp 13 + * set resources group dscp-group default-group-low-drop dscp 14 + * set resources group dscp-group default-group-low-drop dscp 15 + * set resources group dscp-group default-group-low-drop dscp 17 + * set resources group dscp-group default-group-low-drop dscp 19 + * set resources group dscp-group default-group-low-drop dscp 20 + * set resources group dscp-group default-group-low-drop dscp 21 + * set resources group dscp-group default-group-low-drop dscp 22 + * set resources group dscp-group default-group-low-drop dscp 23 + * set resources group dscp-group default-group-low-drop dscp 41 + * set resources group dscp-group default-group-low-drop dscp 42 + * set resources group dscp-group default-group-low-drop dscp 43 + * set resources group dscp-group default-group-low-drop dscp 44 + * set resources group dscp-group default-group-low-drop dscp 45 + * set resources group dscp-group default-group-low-drop dscp 49 + * set resources group dscp-group default-group-low-drop dscp 50 + * set resources group dscp-group default-group-low-drop dscp 51 + * set resources group dscp-group default-group-low-drop dscp 52 + * set resources group dscp-group default-group-low-drop dscp 53 + * set resources group dscp-group default-group-low-drop dscp 54 + * set resources group dscp-group default-group-low-drop dscp 55 + * set resources group dscp-group default-group-low-drop dscp 57 + * set resources group dscp-group default-group-low-drop dscp 58 + * set resources group dscp-group default-group-low-drop dscp 59 + * set resources group dscp-group default-group-low-drop dscp 60 + * set resources group dscp-group default-group-low-drop dscp 61 + * set resources group dscp-group default-group-low-drop dscp 62 + * set resources group dscp-group default-group-low-drop dscp 63 + * set resources group dscp-group priority-group-high-drop dscp 24 + * set resources group dscp-group priority-group-high-drop dscp 26 + * set resources group dscp-group priority-group-high-drop dscp 32 + * set resources group dscp-group priority-group-high-drop dscp 34 + * set resources group dscp-group priority-group-low-drop dscp 25 + * set resources group dscp-group priority-group-low-drop dscp 27 + * set resources group dscp-group priority-group-low-drop dscp 28 + * set resources group dscp-group priority-group-low-drop dscp 29 + * set resources group dscp-group priority-group-low-drop dscp 30 + * set resources group dscp-group priority-group-low-drop dscp 31 + * set resources group dscp-group priority-group-low-drop dscp 33 + * set resources group dscp-group priority-group-low-drop dscp 35 + * set resources group dscp-group priority-group-low-drop dscp 36 + * set resources group dscp-group priority-group-low-drop dscp 37 + * set resources group dscp-group priority-group-low-drop dscp 38 + * set resources group dscp-group priority-group-low-drop dscp 39 + * set resources group dscp-group real-time-group dscp 40 + * set resources group dscp-group real-time-group dscp 46 + * set resources group dscp-group real-time-group dscp 47 + * set resources group dscp-group real-time-group dscp 48 + * set resources group dscp-group synch-group dscp 56 + * + * set policy qos name vlan-policy-50M shaper default vlan-profile-50M + * set policy qos name vlan-policy-50M shaper traffic-class 0 + * queue-limit 1024 + * set policy qos name vlan-policy-50M shaper traffic-class 1 + * queue-limit 64 + * set policy qos name vlan-policy-50M shaper traffic-class 2 + * queue-limit 4096 + * set policy qos profile default-prof bandwidth 2mbit + * set policy qos profile vlan-profile-50M bandwidth 50mbit + * set policy qos profile vlan-profile-50M burst 30000 + * set policy qos profile vlan-profile-50M map dscp-group + * default-group-high-drop to 9 + * set policy qos profile vlan-profile-50M map dscp-group + * default-group-low-drop to 9 + * set policy qos profile vlan-profile-50M map dscp-group + * priority-group-high-drop to 8 + * set policy qos profile vlan-profile-50M map dscp-group + * priority-group-low-drop to 8 + * set policy qos profile vlan-profile-50M map dscp-group + * real-time-group to 4 + * set policy qos profile vlan-profile-50M map dscp-group synch-group to 0 + * set policy qos profile vlan-profile-50M period 5 + * set policy qos profile vlan-profile-50M queue 0 traffic-class 0 + * set policy qos profile vlan-profile-50M queue 0 wred-map-bytes dscp-group + * synch-group mark-probability 50 + * set policy qos profile vlan-profile-50M queue 0 wred-map-bytes dscp-group + * synch-group max-threshold 1023 + * set policy qos profile vlan-profile-50M queue 0 wred-map-bytes dscp-group + * synch-group min-threshold 512 + * set policy qos profile vlan-profile-50M queue 0 wred-map-bytes + * filter-weight 4 + * set policy qos profile vlan-profile-50M queue 4 traffic-class 1 + * set policy qos profile vlan-profile-50M queue 4 wred-map-bytes dscp-group + * real-time-group mark-probability 1 + * set policy qos profile vlan-profile-50M queue 4 wred-map-bytes dscp-group + * real-time-group max-threshold 63 + * set policy qos profile vlan-profile-50M queue 4 wred-map-bytes dscp-group + * real-time-group min-threshold 32 + * set policy qos profile vlan-profile-50M queue 4 wred-map-bytes + * filter-weight 6 + * set policy qos profile vlan-profile-50M queue 8 traffic-class 2 + * set policy qos profile vlan-profile-50M queue 8 weight 60 + * set policy qos profile vlan-profile-50M queue 8 wred-map-bytes dscp-group + * priority-group-high-drop mark-probability 30 + * set policy qos profile vlan-profile-50M queue 8 wred-map-bytes dscp-group + * priority-group-high-drop max-threshold 2027 + * set policy qos profile vlan-profile-50M queue 8 wred-map-bytes dscp-group + * priority-group-high-drop min-threshold 1024 + * set policy qos profile vlan-profile-50M queue 8 wred-map-bytes dscp-group + * priority-group-low-drop mark-probability 75 + * set policy qos profile vlan-profile-50M queue 8 wred-map-bytes dscp-group + * priority-group-low-drop max-threshold 4095 + * set policy qos profile vlan-profile-50M queue 8 wred-map-bytes dscp-group + * priority-group-low-drop min-threshold 2048 + * set policy qos profile vlan-profile-50M queue 8 wred-map-bytes + * filter-weight 8 + * set policy qos profile vlan-profile-50M queue 9 traffic-class 2 + * set policy qos profile vlan-profile-50M queue 9 weight 40 + * set policy qos profile vlan-profile-50M queue 9 wred-map-bytes dscp-group + * default-group-high-drop mark-probability 50 + * set policy qos profile vlan-profile-50M queue 9 wred-map-bytes dscp-group + * default-group-high-drop max-threshold 1023 + * set policy qos profile vlan-profile-50M queue 9 wred-map-bytes dscp-group + * default-group-high-drop min-threshold 512 + * set policy qos profile vlan-profile-50M queue 9 wred-map-bytes dscp-group + * default-group-low-drop mark-probability 100 + * set policy qos profile vlan-profile-50M queue 9 wred-map-bytes dscp-group + * default-group-low-drop max-threshold 2048 + * set policy qos profile vlan-profile-50M queue 9 wred-map-bytes dscp-group + * default-group-low-drop min-threshold 1024 + * set policy qos profile vlan-profile-50M queue 9 wred-map-bytes + * filter-weight 10 + * set policy qos profile vlan-profile-50M traffic-class 0 bandwidth 50% + * set policy qos profile vlan-profile-50M traffic-class 1 bandwidth 50% + * set interface dataplane dp0s5 policy qos vlan-policy-50M + */ + +const char *fal_hw_wred_map_npf_cmds[] = { + "npf-cfg delete dscp-group:default-group-high-drop", + "npf-cfg delete dscp-group:default-group-low-drop", + "npf-cfg delete dscp-group:priority-group-high-drop", + "npf-cfg delete dscp-group:priority-group-low-drop", + "npf-cfg delete dscp-group:real-time-group", + "npf-cfg delete dscp-group:synch-group", + "npf-cfg add dscp-group:default-group-high-drop 0 8;10;16;18", + "npf-cfg add dscp-group:default-group-low-drop 0 0;1;2;3;4;5;6;7;9;" + "11;12;13;14;15;17;19;20;21;22;23;41;42;43;44;45;49;50;51;52;53;54;" + "55;57;58;59;60;61;62;63", + "npf-cfg add dscp-group:priority-group-high-drop 0 24;26;32;34", + "npf-cfg add dscp-group:priority-group-low-drop 0 25;27;28;29;30;31;" + "33;35;36;37;38;39", + "npf-cfg add dscp-group:real-time-group 0 40;46;47;48", + "npf-cfg add dscp-group:synch-group 0 56", + "npf-cfg commit" +}; + +const char *fal_hw_wred_map_npf_delete_cmds[] = { + "npf-cfg delete dscp-group:default-group-high-drop", + "npf-cfg delete dscp-group:default-group-low-drop", + "npf-cfg delete dscp-group:priority-group-high-drop", + "npf-cfg delete dscp-group:priority-group-low-drop", + "npf-cfg delete dscp-group:real-time-group", + "npf-cfg delete dscp-group:synch-group", + "npf-cfg commit" +}; + +const char *fal_hw_wred_map_qos_cmds[] = { + "port subports 1 pipes 1 profiles 3 overhead 24 ql_bytes", + "subport 0 rate 1250000000 size 100000 period 40000", + "subport 0 queue 0 rate 1250000000 size 100000", + "param subport 0 0 limit packets 1024", + "subport 0 queue 1 rate 1250000000 size 100002", + "param subport 0 1 limit packets 64", + "subport 0 queue 2 rate 1250000000 size 100000", + "param subport 0 2 limit packets 4096", + "subport 0 queue 3 rate 1250000000 size 100000", + "vlan 0 0", + "profile 0 rate 250000 size 1000 period 10000", + "profile 0 queue 0 rate 250000 size 1000", + "profile 0 queue 1 rate 250000 size 1000", + "profile 0 queue 2 rate 250000 size 1000", + "profile 0 queue 3 rate 250000 size 1000", + "profile 1 rate 6250000 size 30000 period 5000", + "profile 1 queue 0 rate 3125000 size 12500", + "profile 1 queue 1 rate 3125000 size 12500", + "profile 1 queue 2 rate 6250000 size 25000", + "profile 1 queue 3 rate 6250000 size 25000", + "profile 1 dscp 0 0x26", + "profile 1 dscp 1 0x26", + "profile 1 dscp 2 0x26", + "profile 1 dscp 3 0x26", + "profile 1 dscp 4 0x26", + "profile 1 dscp 5 0x26", + "profile 1 dscp 6 0x26", + "profile 1 dscp 7 0x26", + "profile 1 dscp 8 0x6", + "profile 1 dscp 9 0x26", + "profile 1 dscp 10 0x6", + "profile 1 dscp 11 0x26", + "profile 1 dscp 12 0x26", + "profile 1 dscp 13 0x26", + "profile 1 dscp 14 0x26", + "profile 1 dscp 15 0x26", + "profile 1 dscp 16 0x6", + "profile 1 dscp 17 0x26", + "profile 1 dscp 18 0x6", + "profile 1 dscp 19 0x26", + "profile 1 dscp 20 0x26", + "profile 1 dscp 21 0x26", + "profile 1 dscp 22 0x26", + "profile 1 dscp 23 0x26", + "profile 1 dscp 24 0x2", + "profile 1 dscp 25 0x22", + "profile 1 dscp 26 0x2", + "profile 1 dscp 27 0x22", + "profile 1 dscp 28 0x22", + "profile 1 dscp 29 0x22", + "profile 1 dscp 30 0x22", + "profile 1 dscp 31 0x22", + "profile 1 dscp 32 0x2", + "profile 1 dscp 33 0x22", + "profile 1 dscp 34 0x2", + "profile 1 dscp 35 0x22", + "profile 1 dscp 36 0x22", + "profile 1 dscp 37 0x22", + "profile 1 dscp 38 0x22", + "profile 1 dscp 39 0x22", + "profile 1 dscp 40 0x1", + "profile 1 dscp 41 0x26", + "profile 1 dscp 42 0x26", + "profile 1 dscp 43 0x26", + "profile 1 dscp 44 0x26", + "profile 1 dscp 45 0x26", + "profile 1 dscp 46 0x1", + "profile 1 dscp 47 0x1", + "profile 1 dscp 48 0x1", + "profile 1 dscp 49 0x26", + "profile 1 dscp 50 0x26", + "profile 1 dscp 51 0x26", + "profile 1 dscp 52 0x26", + "profile 1 dscp 53 0x26", + "profile 1 dscp 54 0x26", + "profile 1 dscp 55 0x26", + "profile 1 dscp 56 0x0", + "profile 1 dscp 57 0x26", + "profile 1 dscp 58 0x26", + "profile 1 dscp 59 0x26", + "profile 1 dscp 60 0x26", + "profile 1 dscp 61 0x26", + "profile 1 dscp 62 0x26", + "profile 1 dscp 63 0x26", + "profile 1 queue 0 wrr-weight 1", + "profile 1 queue 0 dscp-group synch-group bytes 1023 512 50", + "profile 1 queue 0 wred-weight 4", + "profile 1 queue 0x1 wrr-weight 1", + "profile 1 queue 0x1 dscp-group real-time-group bytes 630 320 100", + "profile 1 queue 0x1 wred-weight 6", + "profile 1 queue 0x2 wrr-weight 60", + "profile 1 queue 0x2 dscp-group priority-group-low-drop bytes 4095 2048 75", + "profile 1 queue 0x2 dscp-group priority-group-high-drop bytes 2027 1024 30", + "profile 1 queue 0x2 wred-weight 8", + "profile 1 queue 0x6 wrr-weight 40", + "profile 1 queue 0x6 dscp-group default-group-high-drop bytes 1023 512 50", + "profile 1 queue 0x6 dscp-group default-group-low-drop bytes 2048 1024 100", + "profile 1 queue 0x6 wred-weight 10", + "pipe 0 0 1", + "enable" +}; + +struct wred_colour_results { + const char *colour; + bool enabled; + int32_t min_th; + int32_t max_th; + int32_t prob; +}; + +struct wred_map_results { + uint32_t tc; + uint32_t queue; + int32_t qlen; + int8_t fw; + struct wred_colour_results colour[FAL_PACKET_COLOUR_RED + 1]; +}; + +struct wred_map_results hw_wred_results[] = { + // tc, q, qlen, filter-weight + { 0, 0, 1024, 4, + { + { "green", true, 512, 1023, 50 }, + { "yellow", false, -1, -1, -1 }, + { "red", false, -1, -1, -1 } + } + }, + { 1, 0, 64, 6, + { + { "green", true, 320, 630, 100 }, + { "yellow", false, -1, -1, -1 }, + { "red", false, -1, -1, -1 } + } + }, + { 2, 0, 4096, 8, + { + { "green", true, 2048, 4095, 75 }, + { "yellow", true, 1024, 2027, 30 }, + { "red", false, -1, -1, -1 } + } + }, + { 2, 1, 4096, 10, + { + { "green", true, 512, 1023, 50 }, + { "yellow", true, 1024, 2048, 100 }, + { "red", false, -1, -1, -1 } + } + } +}; + +DP_START_TEST(qos_fal_basic, fal_hw_wred_map) +{ + bool debug = (dp_test_debug_get() == 2 ? true : false); + json_object *j_wred_obj; + json_object *j_obj; + uint32_t subport = 0; /* Only one subport with id = 0 */ + uint32_t pipe = 0; /* Only one pipe with id = 0 */ + uint32_t tc; + uint32_t queue; + uint32_t i; + uint32_t j; + int ret; + + qos_lib_test_setup(); + + dp_test_qos_debug(debug); + + dp_test_netlink_set_interface_l2("dp1sw_port_0_0"); + + dp_test_intf_switch_create("switch0"); + dp_test_intf_switch_add_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_add_port("switch0", "dp2T1"); + + ret = dp_test_qos_fal_hw_switch_if("dp2T1", true); + dp_test_fail_unless((ret == 0), + "failed to set hw-switching on dp2T1\n"); + + /* Add NPF config */ + for (i = 0; i < ARRAY_SIZE(fal_hw_wred_map_npf_cmds); i++) + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + fal_hw_wred_map_npf_cmds[i]); + + /* Add QoS config */ + dp_test_qos_attach_config_to_if("dp2T1", fal_hw_wred_map_qos_cmds, + debug); + + /* + * Check the queue level, and in particular the wred objects + */ + for (i = 0; i < ARRAY_SIZE(hw_wred_results); i++) { + int32_t filter_weight; + int32_t qlen; + + tc = hw_wred_results[i].tc; + queue = hw_wred_results[i].queue; + qlen = hw_wred_results[i].qlen; + filter_weight = hw_wred_results[i].fw; + + j_obj = dp_test_qos_hw_get_json_queue("dp2T1", subport, pipe, + tc, queue, debug); + dp_test_qos_hw_check_queue(j_obj, queue, qlen, queue, + 0, debug); + + for (j = FAL_PACKET_COLOUR_GREEN; + j <= FAL_PACKET_COLOUR_RED; j++) { + struct wred_colour_results *colour_results; + const char *colour; + int32_t min_th; + int32_t max_th; + bool enabled; + int32_t prob; + + colour_results = &hw_wred_results[i].colour[j]; + colour = colour_results->colour; + enabled = colour_results->enabled; + min_th = colour_results->min_th; + max_th = colour_results->max_th; + prob = colour_results->prob; + + j_wred_obj = dp_test_qos_hw_get_json_child(j_obj, + "wred", + debug); + dp_test_qos_hw_check_wred_colour(j_wred_obj, colour, + enabled, min_th, + max_th, prob, + filter_weight, debug); + json_object_put(j_wred_obj); + } + json_object_put(j_obj); + } + + /* QoS cleanup */ + dp_test_qos_delete_config_from_if("dp2T1", debug); + + /* NPF cleanup */ + for (i = 0; i < ARRAY_SIZE(fal_hw_wred_map_npf_delete_cmds); i++) + dp_test_send_config_src(dp_test_cont_src_get(), "%s", + fal_hw_wred_map_npf_delete_cmds[i]); + + ret = dp_test_qos_fal_hw_switch_if("dp2T1", false); + dp_test_fail_unless((ret == 0), + "failed to clear hw-switching on dp2T1\n"); + + /* Cleanup the ports */ + dp_test_intf_switch_remove_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_switch_remove_port("switch0", "dp2T1"); + dp_test_intf_switch_del("switch0"); + + dp_test_netlink_del_interface_l2("dp1sw_port_0_0"); + + dp_test_qos_debug(false); + + qos_lib_test_teardown(); +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_qos_lib.c b/tests/whole_dp/src/dp_test_qos_lib.c index dd05ad89..0fe1821d 100644 --- a/tests/whole_dp/src/dp_test_qos_lib.c +++ b/tests/whole_dp/src/dp_test_qos_lib.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * @@ -20,12 +20,12 @@ #include "fal_plugin.h" #include "dp_test.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" #include "dp_test_lib_pkt.h" -#include "dp_test_lib_intf.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" #include "dp_test_controller.h" #include "dp_test_json_utils.h" @@ -102,25 +102,6 @@ dp_test_qos_json_dump(json_object *j_obj) printf("%s\n", str); } -/* - * This function is just to aid unit-test development. - * When you're trying to check for an expected value somewhere deep in the - * json output from "qos show", it can be useful to dump it out in a - * "readable" format. - */ -void -dp_test_qos_show(void) -{ - json_object *jobj; - struct dp_test_json_mismatches *mismatches = NULL; - - jobj = dp_test_json_do_show_cmd("qos show", &mismatches, false); - if (jobj) { - dp_test_qos_json_dump(jobj); - json_object_put(jobj); - } -} - __attribute__((format(printf, 5, 6))) static void _dp_test_qos_json_error(bool debug, const char *file, const int line, @@ -557,28 +538,6 @@ _dp_test_qos_get_json_rules(const char *if_name, const uint subport, file, line); } -json_object * -_dp_test_qos_get_json_groups(const char *if_name, const uint subport, - bool debug, const char *file, const int line) -{ - char real_if_name[IFNAMSIZ]; - - /* Convert the test if-name into a real if-name */ - dp_test_intf_real(if_name, real_if_name); - - /* Build the key for the required object */ - struct dp_test_json_search_key key[] = { - { real_if_name, NULL, 0 }, - { "shaper", NULL, 0 }, - { "subports", NULL, subport }, - { "rules", NULL, 0 }, - { "groups", NULL, 0 }, - }; - - return _dp_test_qos_get_json(key, ARRAY_SIZE(key), __func__, debug, - file, line); -} - json_object * _dp_test_qos_get_json_groups_rules(const char *if_name, const uint subport, bool debug, const char *file, const int line) @@ -891,23 +850,6 @@ void _dp_test_qos_clear_counters(const char *if_name, bool debug, if_name ? real_if_name : ""); } -/* - * QoS hardware JSON functions - using "qos hw" rather than "qos show" to - * retrieve operational state via the FAL - */ -void -dp_test_qos_hw(void) -{ - json_object *j_obj; - struct dp_test_json_mismatches *mismatches = NULL; - - j_obj = dp_test_json_do_show_cmd("qos hw", &mismatches, true); - if (j_obj) { - dp_test_qos_json_dump(j_obj); - json_object_put(j_obj); - } -} - static json_object * _dp_test_qos_hw_get_json(struct dp_test_json_search_key *key, uint32_t key_size, const char *func, bool debug, const char *file, @@ -1012,8 +954,8 @@ _dp_test_qos_hw_get_json_child(json_object *j_parent, const char *name, void _dp_test_qos_hw_check_sched_group(json_object *j_obj, int32_t level, int32_t max_children, - int32_t current_children, bool debug, - const char *file, const int line) + int32_t current_children, uint8_t lpq, + bool debug, const char *file, const int line) { int32_t int_value; bool rc; @@ -1039,8 +981,8 @@ _dp_test_qos_hw_check_sched_group(json_object *j_obj, int32_t level, _dp_test_fail_unless(rc && max_children == int_value, file, line, - "%s failed to match max-children %d\n", - __func__, max_children); + "%s failed to match max-children %d int_val %d\n", + __func__, max_children, int_value); } if (current_children >= 0) { @@ -1051,114 +993,22 @@ _dp_test_qos_hw_check_sched_group(json_object *j_obj, int32_t level, _dp_test_fail_unless(rc && current_children == int_value, file, line, - "%s failed to match current-children %d\n", - __func__, current_children); + "%s failed to match current-children %d int_val %d\n", + __func__, current_children, int_value); } -} - -void -_dp_test_qos_hw_check_ingress_map(json_object *j_map_obj, int32_t map_type, - struct tc_queue_pair *map_list, - bool local_priority, bool debug, - const char *file, const int line) -{ - json_object *j_map_list_obj; - int32_t int_value; - bool bool_value = false; - uint32_t length; - uint8_t max_cp; - uint8_t cp; - uint8_t i; - bool rc; - _dp_test_fail_unless(j_map_obj != NULL, file, line, "null map\n"); - _dp_test_fail_unless(map_list != NULL, file, line, "null map-list\n"); - - if (debug) - dp_test_qos_json_dump(j_map_obj); - - if (map_type >= 0) { - rc = dp_test_json_int_field_from_obj(j_map_obj, "map-type", + if (lpq > 0) { + rc = dp_test_json_int_field_from_obj(j_obj, + "local-priority-des", &int_value); - _dp_test_fail_unless(rc && int_value == map_type, file, line, - "%s failed to match map-type %d\n", - __func__, map_type); - } - - rc = dp_test_json_boolean_field_from_obj(j_map_obj, "local-priority", - &bool_value); - _dp_test_fail_unless(rc && bool_value == local_priority, file, line, - "%s failed to match local-priority %d\n", - __func__, local_priority); - - struct dp_test_json_search_key key[] = { - { "map-list", NULL, -1 }, - }; - - j_map_list_obj = dp_test_json_search(j_map_obj, key, 1); - _dp_test_fail_unless(j_map_list_obj != NULL, file, line, - "%s failed to find map-list array\n", __func__); - - max_cp = map_type == FAL_QOS_MAP_TYPE_DSCP_TO_TC ? - FAL_QOS_MAP_DSCP_VALUES : FAL_QOS_MAP_PCP_VALUES; - length = json_object_array_length(j_map_list_obj); - - struct tc_queue_pair json_map[FAL_QOS_MAP_DSCP_VALUES] = { { 0 } }; - - for (i = 0; i < length; i++) { - json_object *j_map_entry; - const char *cp_bitmap_str; - uint64_t cp_bitmap; - int queue; - int dp; - int tc; - bool rc1; - bool rc2; - bool rc3; - bool rc4; - - j_map_entry = json_object_array_get_idx(j_map_list_obj, i); - - rc1 = dp_test_json_string_field_from_obj(j_map_entry, - "cp-bitmap", - &cp_bitmap_str); - rc2 = dp_test_json_int_field_from_obj(j_map_entry, - "traffic-class", &tc); - rc3 = dp_test_json_int_field_from_obj(j_map_entry, "queue", - &queue); - rc4 = dp_test_json_int_field_from_obj(j_map_entry, - "drop-precedence", - &dp); - _dp_test_fail_unless(rc1 && rc2 && rc3 && rc4, file, line, - "%s failed to extract map from map-list\n", - __func__); - - cp_bitmap = strtoul(cp_bitmap_str, NULL, 10); - - for (cp = 0; cp < max_cp; cp++) { - if (cp_bitmap & (1ul << cp)) { - json_map[cp].tc = tc; - json_map[cp].queue = queue; - json_map[cp].dp = dp; - } - } - } + if (debug) + dp_test_qos_json_dump(j_obj); - for (cp = 0; cp < max_cp; cp++) { - _dp_test_fail_unless(json_map[cp].tc == map_list[cp].tc && - json_map[cp].queue == map_list[cp].queue && - json_map[cp].dp == map_list[cp].dp, - file, line, - "%s failed to match code-point %u tc: " - "%u queue: %u drop-precedence %u vs " - "tc: %d queue: %d drop-precedence %d\n", - __func__, cp, map_list[cp].tc, - map_list[cp].queue, map_list[cp].dp, - json_map[cp].tc, json_map[cp].queue, - json_map[cp].dp); + _dp_test_fail_unless(rc && lpq == int_value, file, + line, + "%s failed to match lpq %d int_val %d\n", + __func__, lpq, int_value); } - - json_object_put(j_map_list_obj); } void @@ -1321,11 +1171,10 @@ _dp_test_qos_hw_check_scheduler(json_object *j_obj, const char *type, void _dp_test_qos_hw_check_queue(json_object *j_obj, int32_t id, int32_t queue_limit, int32_t queue_index, - bool local_priority, bool debug, const char *file, - const int line) + uint8_t designation, + bool debug, const char *file, const int line) { int32_t int_value; - bool bool_value = false; bool rc; _dp_test_fail_unless(j_obj != NULL, file, line, "null queue\n"); @@ -1363,11 +1212,17 @@ _dp_test_qos_hw_check_queue(json_object *j_obj, int32_t id, __func__, queue_index); } - rc = dp_test_json_boolean_field_from_obj(j_obj, "local-priority", - &bool_value); - _dp_test_fail_unless(rc && bool_value == local_priority, file, line, - "%s failed to match local-priority %d\n", - __func__, local_priority); + if (designation > 0) { + rc = dp_test_json_int_field_from_obj(j_obj, "designation", + &int_value); + if (debug) + dp_test_qos_json_dump(j_obj); + + _dp_test_fail_unless(rc && (int)designation == int_value, + file, line, + "%s failed to match designation %d\n", + __func__, designation); + } } void @@ -1510,7 +1365,6 @@ _dp_test_qos_check_mark_map(const char *map_name, int8_t *pcp_values, struct dp_test_qos_json_array_iterate_argblk argblk; json_object *j_obj; json_object *j_pcp_values; - struct dp_test_json_mismatches *mismatches = NULL; struct dp_test_json_search_key key[] = { { "mark-maps", NULL, 0 }, @@ -1518,13 +1372,11 @@ _dp_test_qos_check_mark_map(const char *map_name, int8_t *pcp_values, }; j_obj = _dp_test_qos_get_json_mark_map(key, ARRAY_SIZE(key), __func__, - debug, file, line); - if (!j_obj) { - (void)dp_test_json_mismatch_print(mismatches, 2, NULL, 0); + debug, file, line); + if (!j_obj) _dp_test_qos_json_error(debug, file, line, j_obj, "%s failed to find json object", __func__); - } struct dp_test_json_find_key pcp_key[] = { { "pcp-values", NULL } @@ -1554,10 +1406,76 @@ _dp_test_qos_check_mark_map(const char *map_name, int8_t *pcp_values, void _dp_test_qos_delete_config_from_if(const char *if_name, bool debug, const char *file, const int line) { - uint32_t ifindex = dp_test_intf_name2index(if_name); + char real[IFNAMSIZ]; + + dp_test_send_config_src(dp_test_cont_src_get(), "qos %s disable", + dp_test_intf_real(if_name, real)); +} + +void _dp_test_qos_verify_config(const char *expected_json_str, + const char *verify_cmd, + bool negate_match, bool debug) +{ + if (expected_json_str != NULL) { + json_object *expected_json; + expected_json = dp_test_json_create("%s", expected_json_str); + dp_test_check_json_state(verify_cmd, expected_json, + DP_TEST_JSON_CHECK_SUBSET, + negate_match); + json_object_put(expected_json); + } + +} +void _dp_test_qos_send_config(const char *cmd_list[], + const char *expected_json_str, + const char *verify_cmd, + int num_cmds, bool debug, + const char *file, const int line) + +{ + int i = 0; + + for (i = 0; i < num_cmds; i++) { + dp_test_send_config_src(dp_test_cont_src_get(), + "qos global-object-cmd %s", + cmd_list[i]); + } + + _dp_test_qos_verify_config(expected_json_str, + verify_cmd, false, debug); + +} + +void _dp_test_qos_send_cmd(const char *cmd, + const char *expected_json_str, + const char *verify_cmd, + bool debug, + const char *file, const int line) +{ + dp_test_send_config_src(dp_test_cont_src_get(), + "qos global-object-cmd %s", cmd); + + _dp_test_qos_verify_config(expected_json_str, + verify_cmd, false, debug); + +} + +void _dp_test_qos_send_if_cmd(const char *if_name, + const char *cmd, + const char *expected_json_str, + const char *verify_cmd, + bool debug, + const char *file, const int line) + +{ + char real[IFNAMSIZ]; + + dp_test_send_config_src(dp_test_cont_src_get(), "qos %s %s", + dp_test_intf_real(if_name, real), cmd); + + _dp_test_qos_verify_config(expected_json_str, + verify_cmd, false, debug); - dp_test_send_config_src(dp_test_cont_src_get(), "qos %u disable", - ifindex); } void _dp_test_qos_attach_config_to_if(const char *if_name, @@ -1565,10 +1483,10 @@ void _dp_test_qos_attach_config_to_if(const char *if_name, const char *file, const int line) { - uint32_t ifindex = dp_test_intf_name2index(if_name); uint32_t i = 0; uint32_t subports = 0; int32_t items = -1; + char real[IFNAMSIZ]; while (!strstr(cmd_list[i], "enable")) { /* @@ -1582,10 +1500,12 @@ void _dp_test_qos_attach_config_to_if(const char *if_name, /* * Update the numeric port-id with the required value */ - dp_test_send_config_src(dp_test_cont_src_get(), "qos %u %s", - ifindex, cmd_list[i++]); + dp_test_send_config_src(dp_test_cont_src_get(), "qos %s %s", + dp_test_intf_real(if_name, real), + cmd_list[i++]); } - dp_test_send_config_src(dp_test_cont_src_get(), "qos %u %s", ifindex, + dp_test_send_config_src(dp_test_cont_src_get(), "qos %s %s", + dp_test_intf_real(if_name, real), cmd_list[i++]); /* @@ -1632,7 +1552,7 @@ _dp_test_qos_pkt_forw_test(const char *ifname, uint vlan_id, struct dp_test_pkt_desc_t v4_pkt_desc = { .text = "TCP IPv4", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "", @@ -1673,11 +1593,6 @@ _dp_test_qos_pkt_forw_test(const char *ifname, uint vlan_id, __func__, line); /* Verify */ - /* - * If you're not getting the numbers you expect here it can be useful - * to insert dp_test_qos_show(); here and decode the JSON by hand. - */ - /* dp_test_qos_show(); */ _dp_test_qos_check_subport_tc_counter(ifname, subport, tc, "packets", 1, debug, file, line); _dp_test_qos_check_subport_tc_counter(ifname, subport, tc, "bytes", 74, @@ -1708,7 +1623,7 @@ _dp_test_qos_pkt_remark_test(const char *ifname, const uint vlan_id, struct dp_test_pkt_desc_t v4_pkt_desc = { .text = "TCP IPv4", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "", @@ -1754,11 +1669,6 @@ _dp_test_qos_pkt_remark_test(const char *ifname, const uint vlan_id, __func__, line); /* Verify */ - /* - * If you're not getting the numbers you expect here it can be useful - * to insert dp_test_qos_show(); here are decode the JSON by hand. - */ - /* dp_test_qos_show(); */ _dp_test_qos_check_subport_tc_counter(ifname, subport, tc, "packets", 1, debug, file, line); _dp_test_qos_check_subport_tc_counter(ifname, subport, tc, "bytes", 74, @@ -1790,7 +1700,7 @@ _dp_test_qos_pkt_force_drop(const char *ifname, const uint vlan_id, struct dp_test_pkt_desc_t v4_pkt_desc = { .text = "TCP IPv4", .len = 20, - .ether_type = ETHER_TYPE_IPv4, + .ether_type = RTE_ETHER_TYPE_IPV4, .l3_src = "", .l2_src = "aa:bb:cc:dd:1:a1", .l3_dst = "", @@ -1836,11 +1746,6 @@ _dp_test_qos_pkt_force_drop(const char *ifname, const uint vlan_id, test_exp); /* Verify */ - /* - * If you're not getting the numbers you expect here it can be useful - * to insert dp_test_qos_show(); here and decode the JSON by hand. - */ - /* dp_test_qos_show(); */ _dp_test_qos_check_subport_tc_counter(ifname, subport, tc, "packets", queue_limit, debug, file, line); _dp_test_qos_check_subport_tc_counter(ifname, subport, tc, "dropped", 1, diff --git a/tests/whole_dp/src/dp_test_qos_lib.h b/tests/whole_dp/src/dp_test_qos_lib.h index 68c1adab..ba2fa9d8 100644 --- a/tests/whole_dp/src/dp_test_qos_lib.h +++ b/tests/whole_dp/src/dp_test_qos_lib.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * @@ -19,8 +19,8 @@ * Note, the dataplane has changed to only accept protocol numbers. and * not strings */ -#define QOS_PROTO_TCP "proto=6" -#define QOS_PROTO_UDP "proto=17" +#define QOS_PROTO_TCP "proto-final=6" +#define QOS_PROTO_UDP "proto-final=17" struct tc_queue_pair { @@ -29,6 +29,11 @@ struct tc_queue_pair { uint dp; }; +struct des_dp_pair { + uint des; + uint dp; +}; + /* * Enable/disable QoS debugging in the dataplane */ @@ -58,9 +63,6 @@ _dp_test_qos_op_cmd_fmt(bool debug, const char *file, const int line, void dp_test_qos_json_dump(json_object *j_obj); -void -dp_test_qos_show(void); - /* * Functions that handle the JSON output from "qos show" */ @@ -213,14 +215,6 @@ _dp_test_qos_get_json_rules(const char *if_name, const uint subport, _dp_test_qos_get_json_rules(if_name, subport, debug, \ __FILE__, __LINE__) -json_object * -_dp_test_qos_get_json_groups(const char *if_name, const uint subport, - bool debug, const char *file, const int line); - -#define dp_test_qos_get_json_groups(if_name, subport, debug) \ - _dp_test_qos_get_json_groups(if_name, subport, debug, \ - __FILE__, __LINE__) - json_object * _dp_test_qos_get_json_groups_rules(const char *if_name, const uint subport, bool debug, const char *file, @@ -230,12 +224,6 @@ _dp_test_qos_get_json_groups_rules(const char *if_name, const uint subport, _dp_test_qos_get_json_groups_rules(if_name, subport, debug, \ __FILE__, __LINE__) -/* - * Functions that handle the JSON output from "qos hw" - */ -void -dp_test_qos_hw(void); - #define dp_test_qos_hw_get_json(key, key_size, debug) \ _dp_test_qos_hw_get_json(key, key_size, __func__, debug, __FILE__, \ __LINE__) @@ -272,26 +260,14 @@ _dp_test_qos_hw_get_json_child(json_object *j_parent, const char *name, void _dp_test_qos_hw_check_sched_group(json_object *j_obj, int32_t level, int32_t max_children, - int32_t current_children, bool debug, - const char *file, const int line); + int32_t current_children, uint8_t lpq, + bool debug, const char *file, const int line); #define dp_test_qos_hw_check_sched_group(j_obj, level, max_children, \ - current_children, debug) \ + current_children, lpq, debug) \ _dp_test_qos_hw_check_sched_group(j_obj, level, max_children, \ - current_children, debug, __FILE__, \ - __LINE__) - -void -_dp_test_qos_hw_check_ingress_map(json_object *j_map_obj, int32_t map_type, - struct tc_queue_pair *map_list, - bool local_priority, bool debug, - const char *file, const int line); - -#define dp_test_qos_hw_check_ingress_map(j_map_obj, map_type, map_list, \ - local_priority, debug) \ - _dp_test_qos_hw_check_ingress_map(j_map_obj, map_type, map_list, \ - local_priority, debug, __FILE__, \ - __LINE__) + current_children, lpq, debug, \ + __FILE__, __LINE__) void _dp_test_qos_hw_check_egress_map(json_object *j_map_obj, int32_t map_type, @@ -319,14 +295,14 @@ _dp_test_qos_hw_check_scheduler(json_object *j_obj, const char *type, void _dp_test_qos_hw_check_queue(json_object *j_obj, int32_t id, int32_t queue_limit, int32_t queue_index, - bool local_priority, bool debug, const char *file, - const int line); + uint8_t designation, + bool debug, const char *file, const int line); #define dp_test_qos_hw_check_queue(j_obj, id, queue_limit, queue_index, \ - local_priority, debug) \ + designation, debug) \ _dp_test_qos_hw_check_queue(j_obj, id, queue_limit, queue_index, \ - local_priority, debug, __FILE__, \ - __LINE__) + designation, debug, \ + __FILE__, __LINE__) void _dp_test_qos_hw_check_wred_colour(json_object *j_obj, const char *colour, @@ -454,6 +430,49 @@ _dp_test_qos_attach_config_to_if(const char *if_name, const char *cmd_list[], _dp_test_qos_attach_config_to_if(if_name, cmd_list, debug, \ __FILE__, __LINE__) +void +_dp_test_qos_send_config(const char *cmd_list[], + const char *expected_json_str, + const char *verify_cmd, int num_cmds, + bool debug, const char *file, const int line); + +#define dp_test_qos_send_config(cmd_list, exp_json_str, verify_cmd, \ + num_cmds, debug) \ + _dp_test_qos_send_config(cmd_list, exp_json_str, verify_cmd, \ + num_cmds, debug, __FILE__, __LINE__) + + +void _dp_test_qos_verify_config(const char *expected_json_str, + const char *verify_cmd, + bool negate_match, bool debug); + +void +_dp_test_qos_send_cmd(const char *cmd, + const char *expected_json_str, + const char *verify_cmd, + bool debug, + const char *file, const int line); + +#define dp_test_qos_send_cmd(cmd, exp_json_str, verify_cmd, \ + debug) \ + _dp_test_qos_send_cmd(cmd, exp_json_str, \ + verify_cmd, \ + debug, __FILE__, __LINE__) + +void +_dp_test_qos_send_if_cmd(const char *if_name, + const char *cmd, + const char *expected_json_str, + const char *verify_cmd, + bool debug, + const char *file, const int line); + +#define dp_test_qos_send_if_cmd(if_name, cmd, exp_json_str, verify_cmd, \ + debug) \ + _dp_test_qos_send_if_cmd(if_name, \ + cmd, exp_json_str, verify_cmd, \ + debug, __FILE__, __LINE__) + /* * QoS packet test functions */ diff --git a/tests/whole_dp/src/dp_test_route_broker.c b/tests/whole_dp/src/dp_test_route_broker.c index 5d63108d..39d3f7db 100644 --- a/tests/whole_dp/src/dp_test_route_broker.c +++ b/tests/whole_dp/src/dp_test_route_broker.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -18,6 +18,7 @@ #include "zmq_dp.h" zsock_t *broker_data_sock; +bool dp_test_route_broker_protobuf = true; static int process_actor_message(zsock_t *sock) { @@ -55,7 +56,7 @@ static void process_ctrl_message(zsock_t *sock) assert(envelope); msg_type = zmsg_popstr(msg); - if (!msg_type || strcmp(msg_type, "CONNECT")) { + if (!msg_type || strcmp(msg_type, "CONNECT") != 0) { if (msg_type && !strcmp(msg_type, "KEEPALIVE")) { /* ignore keepalives */ free(msg_type); @@ -102,6 +103,9 @@ static void process_ctrl_message(zsock_t *sock) free(url); assert(rc >= 0); + rc = zmsg_addu32(msg, dp_test_route_broker_protobuf ? 0x1 : 0x0); + assert(rc >= 0); + rc = zmsg_prepend(msg, &envelope); assert(rc >= 0); diff --git a/tests/whole_dp/src/dp_test_route_broker.h b/tests/whole_dp/src/dp_test_route_broker.h index 8de680b3..95ff1179 100644 --- a/tests/whole_dp/src/dp_test_route_broker.h +++ b/tests/whole_dp/src/dp_test_route_broker.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -13,4 +13,6 @@ #include extern zsock_t *broker_data_sock; +extern bool dp_test_route_broker_protobuf; + void dp_test_broker_thread_run(zsock_t *pipe, void *args); diff --git a/tests/whole_dp/src/dp_test_route_tracker.c b/tests/whole_dp/src/dp_test_route_tracker.c index 777fa75c..3620dc99 100644 --- a/tests/whole_dp/src/dp_test_route_tracker.c +++ b/tests/whole_dp/src/dp_test_route_tracker.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only * @@ -18,12 +18,13 @@ #include "dp_test.h" #include "dp_test_controller.h" #include "dp_test_console.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_exp.h" +#include "dp_test_cmd_state.h" -#include "dp_test_pktmbuf_lib.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_route_tracker.h" DP_DECL_TEST_SUITE(route_tracker); @@ -84,13 +85,13 @@ static int dp_test_cmd_route_tracker_cfg(FILE *f, int argc, char **argv) if (add) { dp_test_assert_internal(tracker_ctx[index].used == false); - rt_tracker_add(vrf, &addr, + dp_rt_tracker_add(vrf, &addr, &tracker_ctx[index], dp_test_route_tracker_cb); tracker_ctx[index].used = true; } else { dp_test_assert_internal(tracker_ctx[index].used); - rt_tracker_delete(vrf, &addr, + dp_rt_tracker_delete(vrf, &addr, &tracker_ctx[index]); tracker_ctx[index].used = false; } @@ -279,3 +280,63 @@ DP_START_TEST(route_tracker, route_tracker_simple) rt_tracker_test(v4_interface_addr, v4_routes, v4_trackers, v4_cover); rt_tracker_test(v6_interface_addr, v6_routes, v6_trackers, v6_cover); } DP_END_TEST; + +#define IIFNAME "dp1T0" +#define PEER_MAC "be:ef:60:d:f0:d" +#define PEER_IP "1.1.1.2" +#define OUR_IP "1.1.1.1" +#define OUR_ADDRESS "1.1.1.1/24" +#define PEER_ROUTE "1.1.1.2/32 nh 1.1.1.2 int:dp1T0" + +DP_START_TEST(route_tracker, route_tracker_race) +{ + cmd_rt_tracker_cfg_test_set(dp_test_cmd_route_tracker_cfg); + + /* Set up the interface addresses */ + dp_test_nl_add_ip_addr_and_connected(IIFNAME, OUR_ADDRESS); + + /* Add a tracker and ensure it is resolved through the connected */ + dp_test_send_config_src(dp_test_cont_src_get(), + "tracker-ut ADD %s 0", PEER_IP); + dp_test_verify_tracker(PEER_IP, 1, "1.1.1.0/24"); + + /* + * Add a neigh entry, which will end up creating /32 neigh_created + * route and as a result the tracker should be updated to point + * to this new rule/route. + */ + dp_test_netlink_add_neigh(IIFNAME, PEER_IP, PEER_MAC); + + /* ARP based route should now resolve the tracker with a /32 cover */ + dp_test_verify_tracker(PEER_IP, 1, "1.1.1.2/32"); + + /* + * This is a higher scope route and as a result should replace the + * ARP created route/rule but the tracker should resolve via the + * /32 + */ + dp_test_netlink_add_route(PEER_ROUTE); + + /* Should still be reoslved with the /32 cover */ + dp_test_verify_tracker(PEER_IP, 1, "1.1.1.2/32"); + + /* + * Now remove the neigh entry and the tracker should remain unaffected + */ + dp_test_neigh_clear_entry(IIFNAME, PEER_IP); + dp_test_verify_tracker(PEER_IP, 1, "1.1.1.2/32"); + + /* + * Now get rid of the /32 route and the tracker should re-resolve via + * the /24 connected route + */ + dp_test_netlink_del_route(PEER_ROUTE); + dp_test_verify_tracker(PEER_IP, 1, "1.1.1.0/24"); + + /* Clean Up */ + dp_test_nl_del_ip_addr_and_connected("dp1T0", OUR_ADDRESS); + dp_test_send_config_src(dp_test_cont_src_get(), + "tracker-ut DELETE %s 0", PEER_IP); + dp_test_verify_tracker_gone(PEER_IP); + cmd_rt_tracker_cfg_test_set(NULL); +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_session.c b/tests/whole_dp/src/dp_test_session.c index cdcb3028..ccf58ca2 100644 --- a/tests/whole_dp/src/dp_test_session.c +++ b/tests/whole_dp/src/dp_test_session.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2017 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -27,17 +27,21 @@ #include "npf/npf.h" #include "npf/npf_if.h" #include "npf/npf_cache.h" +#include "npf/npf_pack.h" #include "npf/npf_session.h" #include "dp_test.h" #include "dp_test_controller.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_session_lib.h" +#include "dp_test_lib_tcp.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_session_internal_lib.h" +#include "dp_test_npf_fw_lib.h" #include "dp_test_npf_sess_lib.h" +#include "dp_test_npf_nat_lib.h" #define TEST_VRF 69 #define IF_NAME "dp1T0" @@ -65,7 +69,7 @@ DP_START_TEST(session_udp_lookup, test1) dp_test_nl_add_ip_addr_and_connected_vrf(IF_NAME, "1.1.1.1/24", 69); dp_test_intf_real(IF_NAME, realname); - ifp = ifnet_byifname(realname); + ifp = dp_ifnet_byifname(realname); /* Create forward and reverse packets */ f = dp_test_create_udp_ipv4_pak("10.73.0.0", "10.73.2.0", @@ -125,7 +129,7 @@ DP_START_TEST(session_tcp_lookup, test2) dp_test_nl_add_ip_addr_and_connected_vrf(IF_NAME, "1.1.1.1/24", 69); dp_test_intf_real(IF_NAME, realname); - ifp = ifnet_byifname(realname); + ifp = dp_ifnet_byifname(realname); /* Create forward and reverse packets */ f = dp_test_create_tcp_ipv4_pak("10.73.0.0", "10.73.2.0", @@ -180,7 +184,7 @@ DP_START_TEST(session_icmp_lookup, test3) dp_test_nl_add_ip_addr_and_connected_vrf(IF_NAME, "1.1.1.1/24", 69); dp_test_intf_real(IF_NAME, realname); - ifp = ifnet_byifname(realname); + ifp = dp_ifnet_byifname(realname); /* Create forward and reverse packets */ f = dp_test_create_icmp_ipv4_pak("10.73.0.0", "10.73.2.0", @@ -235,7 +239,7 @@ DP_START_TEST(session_udp6_lookup, test4) dp_test_nl_add_ip_addr_and_connected(IF_NAME, "2001:1:1::1/64"); dp_test_intf_real(IF_NAME, realname); - ifp = ifnet_byifname(realname); + ifp = dp_ifnet_byifname(realname); /* Create forward and reverse packets */ f = dp_test_create_udp_ipv6_pak("2010:73::", "2010:73:2::", @@ -286,7 +290,7 @@ DP_START_TEST(session_tcp6_lookup, test5) dp_test_nl_add_ip_addr_and_connected(IF_NAME, "2001:1:1::1/64"); dp_test_intf_real(IF_NAME, realname); - ifp = ifnet_byifname(realname); + ifp = dp_ifnet_byifname(realname); /* Create forward and reverse packets */ f = dp_test_create_tcp_ipv6_pak("2010:73::", "2010:73:2::", @@ -337,7 +341,7 @@ DP_START_TEST(session_icmp6_lookup, test6) dp_test_nl_add_ip_addr_and_connected(IF_NAME, "2001:1:1::1/64"); dp_test_intf_real(IF_NAME, realname); - ifp = ifnet_byifname(realname); + ifp = dp_ifnet_byifname(realname); /* Create forward and reverse packets */ f = dp_test_create_icmp_ipv6_pak("2010:73::", "2010:73:2::", @@ -394,7 +398,7 @@ DP_START_TEST(session_sentry, test7) dp_test_nl_add_ip_addr_and_connected_vrf(IF_NAME, "1.1.1.1/24", 69); dp_test_intf_real(IF_NAME, realname); - ifp = ifnet_byifname(realname); + ifp = dp_ifnet_byifname(realname); /* Create a packet and session */ f = dp_test_create_udp_ipv4_pak("10.73.0.0", "10.73.2.0", @@ -496,7 +500,7 @@ DP_START_TEST(session_feature, test8) dp_test_nl_add_ip_addr_and_connected_vrf(IF_NAME, "1.1.1.1/24", 69); dp_test_intf_real(IF_NAME, realname); - ifp = ifnet_byifname(realname); + ifp = dp_ifnet_byifname(realname); /* Create a packet and session */ f = dp_test_create_udp_ipv4_pak("10.73.0.0", "10.73.2.0", @@ -599,7 +603,7 @@ DP_START_TEST(session_expire, test9) dp_test_nl_add_ip_addr_and_connected_vrf(IF_NAME, "1.1.1.1/24", 69); dp_test_intf_real(IF_NAME, realname); - ifp = ifnet_byifname(realname); + ifp = dp_ifnet_byifname(realname); /* Create packet */ f = dp_test_create_udp_ipv4_pak("10.73.0.0", "10.73.2.0", @@ -651,7 +655,7 @@ DP_START_TEST(session_link, test10) dp_test_nl_add_ip_addr_and_connected_vrf(IF_NAME, "1.1.1.1/24", 69); dp_test_intf_real(IF_NAME, realname); - ifp = ifnet_byifname(realname); + ifp = dp_ifnet_byifname(realname); /* Create packets */ pkt[0] = dp_test_create_udp_ipv4_pak("10.73.0.0", "10.73.2.0", @@ -732,7 +736,7 @@ DP_START_TEST(session_unlink_all, test11) dp_test_nl_add_ip_addr_and_connected_vrf(IF_NAME, "1.1.1.1/24", 69); dp_test_intf_real(IF_NAME, realname); - ifp = ifnet_byifname(realname); + ifp = dp_ifnet_byifname(realname); /* Create packets */ pkt[0] = dp_test_create_udp_ipv4_pak("10.73.0.0", "10.73.2.0", @@ -801,7 +805,7 @@ DP_START_TEST(session_timeout, test12) dp_test_nl_add_ip_addr_and_connected_vrf(IF_NAME, "1.1.1.1/24", 69); dp_test_intf_real(IF_NAME, realname); - ifp = ifnet_byifname(realname); + ifp = dp_ifnet_byifname(realname); /* Create forward and reverse packets */ f = dp_test_create_udp_ipv4_pak("10.73.0.0", "10.73.2.0", @@ -892,7 +896,7 @@ DP_START_TEST(session_icmp_test, test14) dp_test_nl_add_ip_addr_and_connected_vrf(IF_NAME, "1.1.1.1/24", 69); dp_test_intf_real(IF_NAME, realname); - ifp = ifnet_byifname(realname); + ifp = dp_ifnet_byifname(realname); /* Attempt session creation of a echo reply - must pass */ icmp_pak = dp_test_create_icmp_ipv4_pak("10.73.2.0", "10.73.0.0", @@ -947,7 +951,7 @@ DP_START_TEST(session_pptp_lookup, test15) dp_test_nl_add_ip_addr_and_connected_vrf(IF_NAME, "1.1.1.1/24", 69); dp_test_intf_real(IF_NAME, realname); - ifp = ifnet_byifname(realname); + ifp = dp_ifnet_byifname(realname); /* Create forward and reverse packets */ f = dp_test_create_gre_pptp_ipv4_pak("10.73.0.0", "10.73.2.0", @@ -1009,7 +1013,7 @@ DP_START_TEST(session_sentry_packet, test16) dp_test_nl_add_ip_addr_and_connected_vrf(IF_NAME, "1.1.1.1/24", 69); dp_test_intf_real(IF_NAME, realname); - ifp = ifnet_byifname(realname); + ifp = dp_ifnet_byifname(realname); /* Create forward and reverse packets */ f = dp_test_create_udp_ipv4_pak("10.73.0.0", "10.73.2.0", @@ -1090,7 +1094,7 @@ DP_START_TEST(session_link_walk, test18) dp_test_nl_add_ip_addr_and_connected_vrf(IF_NAME, "1.1.1.1/24", 69); dp_test_intf_real(IF_NAME, realname); - ifp = ifnet_byifname(realname); + ifp = dp_ifnet_byifname(realname); /* Create packets */ pkt[0] = dp_test_create_udp_ipv4_pak("10.73.0.0", "10.73.2.0", @@ -1156,3 +1160,783 @@ DP_START_TEST(session_link_walk, test18) dp_test_netlink_del_vrf(69, 0); } DP_END_TEST; + + +/* + * Test session sync for a UDP firewall session + * + * Creates a firewall session, saves it to a connsync buffer, clears the + * session, then restores session from the connsync buffer. + */ +DP_DECL_TEST_CASE(session_suite, ssync1, NULL, NULL); +DP_START_TEST(ssync1, test19) +{ + /* Setup interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "192.0.2.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "203.0.113.1/24"); + + dp_test_netlink_add_neigh("dp1T0", "192.0.2.103", + "aa:bb:cc:16:0:20"); + dp_test_netlink_add_neigh("dp2T1", "203.0.113.203", + "aa:bb:cc:18:0:1"); + + /* + * Ruleset + */ + struct dp_test_npf_rule_t rules[] = { + { + .rule = "10", + .pass = PASS, + .stateful = STATEFUL, + .npf = "to=any" + }, + RULE_DEF_BLOCK, + NULL_RULE + }; + + struct dp_test_npf_ruleset_t rset = { + .rstype = "fw-out", + .name = "FW1", + .enable = 1, + .attach_point = "dp2T1", + .fwd = FWD, + .dir = "out", + .rules = rules + }; + + dp_test_npf_fw_add(&rset, false); + + /* UDP Forwards */ + dpt_udp("dp1T0", "aa:bb:cc:16:0:20", + "192.0.2.103", 10000, "203.0.113.203", 60000, + "192.0.2.103", 10000, "203.0.113.203", 60000, + "aa:bb:cc:18:0:1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + uint32_t pkts_in = 0; + uint32_t pkts_out = 0; + uint32_t bytes_in = 0; + uint32_t bytes_out = 0; + uint32_t sess_id1 = 0; + + dp_test_session_counters("start 0 count 1 " + "src-addr 192.0.2.103 src-port 10000 " + "dst-addr 203.0.113.203 dst-port 60000 " + "proto 17 dir out intf dpT21", + &pkts_in, &pkts_out, &bytes_in, &bytes_out, + &sess_id1); + + dp_test_fail_unless(pkts_out == 1, "Packets out %u, expected 1", + pkts_out); + dp_test_fail_unless(bytes_out == 62, "Bytes out %u, expected 62", + bytes_out); + + /* Session ID should be 1 */ + dp_test_fail_unless(sess_id1 > 0, "Session ID %u, expected > 0", + sess_id1); + + /* + * Create a sentry_packet to match the forward flow + */ + uint32_t saddr; + uint32_t daddr; + const struct ifnet *ifp; + char realname[IFNAMSIZ]; + struct sentry_packet sp_forw; + int rc; + + dp_test_intf_real("dpT21", realname); + ifp = dp_ifnet_byifname(realname); + + inet_pton(AF_INET, "192.0.2.103", &saddr); + inet_pton(AF_INET, "203.0.113.203", &daddr); + + rc = dp_test_session_init_sentry_packet(&sp_forw, ifp->if_index, + SENTRY_IPv4, (uint8_t) IPPROTO_UDP, 1, htons(10000), + &saddr, htons(60000), &daddr); + dp_test_fail_unless(rc == 0, "session init sentry_packet: %d\n", rc); + + /* + * Use sentry_packet to lookup dataplane session + */ + struct session *s = NULL; + struct npf_session *se = NULL; + bool forw; + + rc = session_lookup_by_sentry_packet(&sp_forw, &s, &forw); + dp_test_fail_unless(rc == 0 && s != NULL, + "session_lookup_by_sentry_packet failed\n"); + + /* + * Get the npf session from the dataplane session + */ + se = session_feature_get(s, s->se_sen->sen_ifindex, + SESSION_FEATURE_NPF); + dp_test_fail_unless(se != NULL, "Failed to get npf session\n"); + + /* + * Pack session. Returns pmh_len if successful + */ + struct session *peer = NULL; + struct npf_pack_message buf; + + memset(&buf, 0, sizeof(buf)); + + rc = dp_session_pack(s, &buf, sizeof(buf), SESSION_PACK_FULL, &peer); + dp_test_fail_unless(rc > 0, "dp_session_pack failed\n"); + + /* + * Clear the session + */ + dp_test_npf_clear_sessions(); + + /* + * Unpack and restore session from buffer + */ + enum session_pack_type spt = SESSION_PACK_NONE; + + rc = dp_session_restore(&buf, buf.hdr.pmh_len, &spt); + dp_test_fail_unless(rc == 0 && spt == SESSION_PACK_FULL, + "dp_session_restore failed\n"); + + /* + * An identical session should now exist, except for the session ID + * which should be one greater than the first session + */ + pkts_in = 0; + pkts_out = 0; + bytes_in = 0; + bytes_out = 0; + uint32_t sess_id2 = 0; + + dp_test_session_counters("start 0 count 1 " + "src-addr 192.0.2.103 src-port 10000 " + "dst-addr 203.0.113.203 dst-port 60000 " + "proto 17 dir out intf dpT21", + &pkts_in, &pkts_out, &bytes_in, &bytes_out, + &sess_id2); + + dp_test_fail_unless(pkts_out == 1, + "Packets out %u, expected 1", pkts_out); + dp_test_fail_unless(bytes_out == 62, + "Bytes out %u, expected 62", bytes_out); + dp_test_fail_unless(sess_id2 == sess_id1 + 1, + "Session ID %u, expected %u", + sess_id2, sess_id1 + 1); + + /* Send another packet */ + dpt_udp("dp1T0", "aa:bb:cc:16:0:20", + "192.0.2.103", 10000, "203.0.113.203", 60000, + "192.0.2.103", 10000, "203.0.113.203", 60000, + "aa:bb:cc:18:0:1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* + * Check packet has used the restored session + */ + pkts_in = 0, pkts_out = 0, bytes_in = 0, bytes_out = 0; + dp_test_session_counters("start 0 count 1 " + "src-addr 192.0.2.103 src-port 10000 " + "dst-addr 203.0.113.203 dst-port 60000 " + "proto 17 dir out intf dpT21", + &pkts_in, &pkts_out, &bytes_in, &bytes_out, + &sess_id2); + + dp_test_fail_unless(pkts_out == 2, + "Packets out %u, expected 2", pkts_out); + dp_test_fail_unless(bytes_out == 124, + "Bytes out %u, expected 124", bytes_out); + + /* + * Cleanup + */ + dp_test_npf_fw_del(&rset, false); + dp_test_npf_clear_sessions(); + + dp_test_netlink_del_neigh("dp1T0", "192.0.2.103", + "aa:bb:cc:16:0:20"); + dp_test_netlink_del_neigh("dp2T1", "203.0.113.203", + "aa:bb:cc:18:0:1"); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "192.0.2.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "203.0.113.1/24"); + +} DP_END_TEST; + + +/* + * Test session sync for a TCP firewall session with TCP strict enabled + * + * Creates a firewall session, saves it to a connsync buffer, clears the + * session, then restores session from the connsync buffer. + */ +DP_DECL_TEST_CASE(session_suite, ssync2, NULL, NULL); +DP_START_TEST(ssync2, test20) +{ + /* Setup interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "2.2.2.1/24"); + + dp_test_netlink_add_neigh("dp1T0", "1.1.1.11", + "aa:bb:cc:dd:1:11"); + dp_test_netlink_add_neigh("dp1T0", "1.1.1.12", + "aa:bb:cc:dd:1:12"); + dp_test_netlink_add_neigh("dp2T1", "2.2.2.11", + "aa:bb:cc:dd:2:11"); + + /* + * Ruleset + */ + struct dp_test_npf_rule_t rules[] = { + { + .rule = "10", + .pass = PASS, + .stateful = STATEFUL, + .npf = "to=any" + }, + RULE_DEF_BLOCK, + NULL_RULE + }; + + struct dp_test_npf_ruleset_t rset = { + .rstype = "fw-out", + .name = "FW1", + .enable = 1, + .attach_point = "dp2T1", + .fwd = FWD, + .dir = "out", + .rules = rules + }; + + dp_test_npf_fw_add(&rset, false); + dp_test_npf_cmd("npf-ut fw global tcp-strict enable", false); + + /* + * TCP packet + */ + struct dp_test_pkt_desc_t *fwd_in, *fwd_out; + struct dp_test_pkt_desc_t *rev_in, *rev_out; + + fwd_in = dpt_pdesc_v4_create( + "TCP Forwards In", IPPROTO_TCP, + "aa:bb:cc:dd:1:11", "1.1.1.11", 1000, + "00:00:a4:00:00:64", "2.2.2.11", 80, + "dp1T0", "dp2T1"); + + fwd_out = dpt_pdesc_v4_create( + "TCP Forwards Out", IPPROTO_TCP, + "00:00:a4:00:00:64", "1.1.1.11", 1000, + "aa:bb:cc:dd:2:11", "2.2.2.11", 80, + "dp1T0", "dp2T1"); + + rev_in = dpt_pdesc_v4_create( + "TCP Reverse In", IPPROTO_TCP, + "aa:bb:cc:dd:2:11", "2.2.2.11", 80, + "00:00:a4:00:00:64", "1.1.1.11", 1000, + "dp2T1", "dp1T0"); + + rev_out = dpt_pdesc_v4_create( + "TCP Reverse Out", IPPROTO_TCP, + "00:00:a4:00:00:64", "2.2.2.11", 80, + "aa:bb:cc:dd:1:11", "1.1.1.11", 1000, + "dp2T1", "dp1T0"); + + struct dpt_tcp_flow tcp_call = { + .text[0] = '\0', /* description */ + .isn = {0, 0}, /* initial seq no */ + .desc[DPT_FORW] = { /* Forw pkt descriptors */ + .pre = fwd_in, + .pst = fwd_out, + }, + .desc[DPT_BACK] = { /* Back pkt descriptors */ + .pre = rev_in, + .pst = rev_out, + }, + .test_cb = NULL, /* Prep and send pkt */ + .post_cb = NULL, /* Fixup pkt exp */ + }; + + struct dpt_tcp_flow_pkt tcp_pkt1[] = { + { DPT_FORW, TH_SYN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK, 20, NULL, 0, NULL }, + /* sync occurs here */ + { DPT_FORW, TH_ACK, 50, NULL, 0, NULL }, + { DPT_FORW, TH_ACK | TH_FIN, 10, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + { DPT_BACK, TH_ACK | TH_FIN, 0, NULL, 0, NULL }, + { DPT_FORW, TH_ACK, 0, NULL, 0, NULL }, + }; + + /* First 4 packets of TCP call */ + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 3, NULL, 0); + + uint32_t pkts_in = 0; + uint32_t pkts_out = 0; + uint32_t bytes_in = 0; + uint32_t bytes_out = 0; + uint32_t sess_id1 = 0; + + /* + * Check sessions exists, and packet counts are as expected + */ + dp_test_session_counters("start 0 count 1 " + "src-addr 1.1.1.11 src-port 1000 " + "dst-addr 2.2.2.11 dst-port 80 " + "proto 6 dir out intf dpT21", + &pkts_in, &pkts_out, &bytes_in, &bytes_out, + &sess_id1); + + dp_test_fail_unless(pkts_out == 2, "Packets out %u, expected 2", + pkts_out); + dp_test_fail_unless(bytes_out == 108, "Bytes out %u, expected 108", + bytes_out); + dp_test_fail_unless(pkts_in == 2, "Packets out %u, expected 2", + pkts_in); + dp_test_fail_unless(bytes_in == 128, "Bytes out %u, expected 128", + bytes_in); + + /* Session ID should be 1 */ + dp_test_fail_unless(sess_id1 > 0, "Session ID %u, expected > 0", + sess_id1); + + /* + * Create a sentry_packet to match the forward flow + */ + uint32_t saddr; + uint32_t daddr; + const struct ifnet *ifp; + char realname[IFNAMSIZ]; + struct sentry_packet sp_forw; + int rc; + + dp_test_intf_real("dpT21", realname); + ifp = dp_ifnet_byifname(realname); + + inet_pton(AF_INET, "1.1.1.11", &saddr); + inet_pton(AF_INET, "2.2.2.11", &daddr); + + rc = dp_test_session_init_sentry_packet(&sp_forw, ifp->if_index, + SENTRY_IPv4, (uint8_t) IPPROTO_TCP, 1, htons(1000), + &saddr, htons(80), &daddr); + dp_test_fail_unless(rc == 0, "session init sentry_packet: %d\n", rc); + + /* + * Use sentry_packet to lookup dataplane session + */ + struct session *s = NULL; + struct npf_session *se = NULL; + bool forw; + + rc = session_lookup_by_sentry_packet(&sp_forw, &s, &forw); + dp_test_fail_unless(rc == 0 && s != NULL, + "session_lookup_by_sentry_packet failed\n"); + + /* + * Get the npf session from the dataplane session + */ + se = session_feature_get(s, s->se_sen->sen_ifindex, + SESSION_FEATURE_NPF); + dp_test_fail_unless(se != NULL, "Failed to get npf session\n"); + + /* + * Pack session. Returns pmh_len if successful + */ + struct session *peer = NULL; + struct npf_pack_message buf; + + memset(&buf, 0, sizeof(buf)); + + rc = dp_session_pack(s, &buf, sizeof(buf), SESSION_PACK_FULL, &peer); + dp_test_fail_unless(rc > 0, "dp_session_pack failed\n"); + + /* + * Clear the session + */ + dp_test_npf_clear_sessions(); + + /* + * Unpack and restore session from buffer + */ + enum session_pack_type spt = SESSION_PACK_NONE; + + rc = dp_session_restore(&buf, buf.hdr.pmh_len, &spt); + dp_test_fail_unless(rc == 0 && spt == SESSION_PACK_FULL, + "dp_session_restore failed\n"); + + /* + * Remainder of TCP call + */ + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 4, 11, NULL, 0); + + + /* + * An identical session should now exist, except for the session ID + * which should be one greater than the first session + */ + pkts_in = 0; + pkts_out = 0; + bytes_in = 0; + bytes_out = 0; + uint32_t sess_id2 = 0; + + dp_test_session_counters("start 0 count 1 " + "src-addr 1.1.1.11 src-port 1000 " + "dst-addr 2.2.2.11 dst-port 80 " + "proto 6 dir out intf dpT21", + &pkts_in, &pkts_out, &bytes_in, &bytes_out, + &sess_id2); + + dp_test_fail_unless(pkts_out == 8, "Packets out %u, expected 8", + pkts_out); + dp_test_fail_unless(bytes_out == 492, "Bytes out %u, expected 492", + bytes_out); + dp_test_fail_unless(pkts_in == 4, "Packets in %u, expected 4", + pkts_in); + dp_test_fail_unless(bytes_in == 236, "Bytes in %u, expected 236", + bytes_in); + dp_test_fail_unless(sess_id2 == sess_id1 + 1, + "Session ID %u, expected %u", + sess_id2, sess_id1 + 1); + + /* + * Cleanup + */ + free(fwd_in); + free(fwd_out); + free(rev_in); + free(rev_out); + + dp_test_npf_cmd("npf-ut fw global tcp-strict disable", false); + dp_test_npf_fw_del(&rset, false); + dp_test_npf_clear_sessions(); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "1.1.1.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "2.2.2.1/24"); + + dp_test_netlink_del_neigh("dp1T0", "1.1.1.11", + "aa:bb:cc:dd:1:11"); + dp_test_netlink_del_neigh("dp1T0", "1.1.1.12", + "aa:bb:cc:dd:1:12"); + dp_test_netlink_del_neigh("dp2T1", "2.2.2.11", + "aa:bb:cc:dd:2:11"); + +} DP_END_TEST; + + +/* + * Verify the normal SNAT session + * + * "time_to_expire" has been removed from the expected json + */ +static void session_suite_ssync3_verify_sess1(void) +{ + json_object *exp; + + exp = dp_test_json_create( + "{" + " \"config\":{" + " \"sessions\":{" + " \"1\":{" + " \"vrf_id\":1," + " \"src_addr\":\"192.0.2.103\"," + " \"src_port\":10000," + " \"dst_addr\":\"203.0.113.203\"," + " \"dst_port\":60000," + " \"proto\":17," + " \"interface\":\"dpT21\"," + " \"state_expire_window\":60," + " \"state\":2," + " \"gen_state\":2," + " \"parent\":0," + " \"duration\":0," + " \"feature_type\":2," + " \"features_count\":1," + " \"features\":[" + " {" + " \"type\":3," + " \"interface\":\"dpT21\"," + " \"flags\":518," + " \"nat\":{" + " \"trans_type\":2," + " \"trans_addr\":\"203.0.113.2\"," + " \"trans_port\":10000," + " \"masquerade\":0," + " \"rule\":{" + " \"name\":\"dpT21\"," + " \"number\":10" + " }" + " }" + " }" + " ]," + " \"counters\":{" + " \"packets_in\":1," + " \"bytes_in\":62," + " \"packets_out\":1," + " \"bytes_out\":62" + " }" + " }" + " }" + " }" + "}"); + + dp_test_check_json_poll_state("session-op show sessions full", exp, + DP_TEST_JSON_CHECK_SUBSET, + false, 1); +} + +/* + * Verify the SNAT session created from connsync restoration. + * + * "time_to_expire" has been removed from the expected json + * + * The *only* line that should have changed is line 4, the session ID. + */ +static void session_suite_ssync3_verify_sess2(void) +{ + json_object *exp; + + exp = dp_test_json_create( + "{" + " \"config\":{" + " \"sessions\":{" + " \"2\":{" + " \"vrf_id\":1," + " \"src_addr\":\"192.0.2.103\"," + " \"src_port\":10000," + " \"dst_addr\":\"203.0.113.203\"," + " \"dst_port\":60000," + " \"proto\":17," + " \"interface\":\"dpT21\"," + " \"state_expire_window\":60," + " \"state\":2," + " \"gen_state\":2," + " \"parent\":0," + " \"duration\":0," + " \"feature_type\":2," + " \"features_count\":1," + " \"features\":[" + " {" + " \"type\":3," + " \"interface\":\"dpT21\"," + " \"flags\":518," + " \"nat\":{" + " \"trans_type\":2," + " \"trans_addr\":\"203.0.113.2\"," + " \"trans_port\":10000," + " \"masquerade\":0," + " \"rule\":{" + " \"name\":\"dpT21\"," + " \"number\":10" + " }" + " }" + " }" + " ]," + " \"counters\":{" + " \"packets_in\":1," + " \"bytes_in\":62," + " \"packets_out\":1," + " \"bytes_out\":62" + " }" + " }" + " }" + " }" + "}"); + + dp_test_check_json_poll_state("session-op show sessions full", exp, + DP_TEST_JSON_CHECK_SUBSET, + false, 1); +} + +/* + * Test session sync for an SNAT session + * + * 1. Create SNAT session + * 2. Verify incoming pkt is translated ok + * 3. Pack session into connsync buffer + * 4. Clear session + * 5. Verify incoming pkt is now dropped + * 6. Unpack connsync buffer and restore session + * 7. Verify incoming pkt is now translated ok + */ +DP_DECL_TEST_CASE(session_suite, ssync3, NULL, NULL); +DP_START_TEST(ssync3, test19) +{ + /* Setup interfaces and neighbours */ + dp_test_nl_add_ip_addr_and_connected("dp1T0", "192.0.2.1/24"); + dp_test_nl_add_ip_addr_and_connected("dp2T1", "203.0.113.1/24"); + + dp_test_netlink_add_neigh("dp1T0", "192.0.2.103", + "aa:bb:cc:16:0:20"); + dp_test_netlink_add_neigh("dp2T1", "203.0.113.203", + "aa:bb:cc:18:0:1"); + + struct dp_test_npf_nat_rule_t snat = { + .desc = "snat rule", + .rule = "10", + .ifname = "dp2T1", + .proto = NAT_NULL_PROTO, + .map = "dynamic", + .port_alloc = NULL, + .from_addr = "192.0.2.0/24", + .from_port = NULL, + .to_addr = NULL, + .to_port = NULL, + .trans_addr = "203.0.113.2", + .trans_port = NULL, + }; + dp_test_npf_snat_add(&snat, true); + + /* Block inbound pkts that do not match a session */ + struct dp_test_npf_rule_t rules[] = { + { + .rule = "10", + .pass = BLOCK, + .stateful = STATELESS, + .npf = "dst-addr=203.0.113.2" + }, + RULE_DEF_BLOCK, + NULL_RULE + }; + + struct dp_test_npf_ruleset_t rset = { + .rstype = "fw-in", + .name = "FW1", + .enable = 1, + .attach_point = "dp2T1", + .fwd = FWD, + .dir = "in", + .rules = rules + }; + dp_test_npf_fw_add(&rset, false); + + /* Ensure session ID is 0 */ + dp_test_session_reset_session_id(); + + /* UDP Forwards */ + dpt_udp("dp1T0", "aa:bb:cc:16:0:20", + "192.0.2.103", 10000, "203.0.113.203", 60000, + "203.0.113.2", 10000, "203.0.113.203", 60000, + "aa:bb:cc:18:0:1", "dp2T1", + DP_TEST_FWD_FORWARDED); + + /* UDP Backwards */ + dpt_udp("dp2T1", "aa:bb:cc:18:0:1", + "203.0.113.203", 60000, "203.0.113.2", 10000, + "203.0.113.203", 60000, "192.0.2.103", 10000, + "aa:bb:cc:16:0:20", "dp1T0", + DP_TEST_FWD_FORWARDED); + + /* Verify session */ + session_suite_ssync3_verify_sess1(); + + /* + * Create a sentry_packet to match the forward flow + */ + uint32_t saddr; + uint32_t daddr; + const struct ifnet *ifp; + char realname[IFNAMSIZ]; + struct sentry_packet sp_forw; + int rc; + + dp_test_intf_real("dpT21", realname); + ifp = dp_ifnet_byifname(realname); + + inet_pton(AF_INET, "192.0.2.103", &saddr); + inet_pton(AF_INET, "203.0.113.203", &daddr); + + rc = dp_test_session_init_sentry_packet(&sp_forw, ifp->if_index, + SENTRY_IPv4, (uint8_t) IPPROTO_UDP, 1, htons(10000), + &saddr, htons(60000), &daddr); + dp_test_fail_unless(rc == 0, "session init sentry_packet: %d\n", rc); + + /* + * Use sentry_packet to lookup dataplane session + */ + struct session *s = NULL; + struct npf_session *se = NULL; + bool forw; + + rc = session_lookup_by_sentry_packet(&sp_forw, &s, &forw); + dp_test_fail_unless(rc == 0 && s != NULL, + "session_lookup_by_sentry_packet failed\n"); + + /* + * Get the npf session from the dataplane session + */ + se = session_feature_get(s, s->se_sen->sen_ifindex, + SESSION_FEATURE_NPF); + dp_test_fail_unless(se != NULL, "Failed to get npf session\n"); + + /* + * Pack session. Returns pmh_len if successful + */ + struct session *peer = NULL; + struct npf_pack_message buf; + + memset(&buf, 0, sizeof(buf)); + + rc = dp_session_pack(s, &buf, sizeof(buf), SESSION_PACK_FULL, &peer); + dp_test_fail_unless(rc > 0, "dp_session_pack failed\n"); + + /* + * There may be a good reason this changes if the data structs change + */ + dp_test_fail_unless(rc == 256, + "Expected pack msg length 256, got %d\n", rc); + + /* + * Clear the SNAT session. Without the session, an incoming pkt will + * hit the block firewall rule instead. + */ + dp_test_npf_clear_sessions(); + + dpt_udp("dp2T1", "aa:bb:cc:18:0:1", + "203.0.113.203", 60000, "203.0.113.2", 10000, + "203.0.113.203", 60000, "192.0.2.103", 10000, + "aa:bb:cc:16:0:20", "dp1T0", + DP_TEST_FWD_DROPPED); + + /* + * Unpack and restore session from buffer + */ + enum session_pack_type spt = SESSION_PACK_NONE; + + rc = dp_session_restore(&buf, buf.hdr.pmh_len, &spt); + dp_test_fail_unless(rc == 0 && spt == SESSION_PACK_FULL, + "dp_session_restore failed\n"); + + /* Verify restored session */ + session_suite_ssync3_verify_sess2(); + + /* + * With the SNAT session restored, a backwards packet should now be + * translated and forwarded. + */ + dpt_udp("dp2T1", "aa:bb:cc:18:0:1", + "203.0.113.203", 60000, "203.0.113.2", 10000, + "203.0.113.203", 60000, "192.0.2.103", 10000, + "aa:bb:cc:16:0:20", "dp1T0", + DP_TEST_FWD_FORWARDED); + + /* + * Cleanup + */ + dp_test_npf_fw_del(&rset, false); + dp_test_npf_snat_del(snat.ifname, snat.rule, true); + dp_test_npf_cleanup(); + + dp_test_netlink_del_neigh("dp1T0", "192.0.2.103", + "aa:bb:cc:16:0:20"); + dp_test_netlink_del_neigh("dp2T1", "203.0.113.203", + "aa:bb:cc:18:0:1"); + + dp_test_nl_del_ip_addr_and_connected("dp1T0", "192.0.2.1/24"); + dp_test_nl_del_ip_addr_and_connected("dp2T1", "203.0.113.1/24"); + +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_session_cmds.c b/tests/whole_dp/src/dp_test_session_cmds.c index f510e1c8..aad04b20 100644 --- a/tests/whole_dp/src/dp_test_session_cmds.c +++ b/tests/whole_dp/src/dp_test_session_cmds.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -19,12 +19,12 @@ #include "dp_test.h" #include "dp_test_str.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_pkt.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_console.h" #include "dp_test_json_utils.h" #include "dp_test_npf_lib.h" @@ -169,7 +169,7 @@ static const struct dp_test_command_t session_cmd[] = { * * "delete session-table source 10.0.0.1 destination 11.0.0.1" * -> "session-op clear session filter saddr 10.0.0.1 sport any" - * " daddr 11.0.0.1 dport any" + * " daddr 11.0.0.1 dport any proto any" */ { "session-op clear", @@ -198,6 +198,20 @@ static const struct dp_test_command_t session_cmd[] = { { "session-op clear session filter saddr 10.0.0.1 sport any " "daddr 11.0.0.1 dport any", + "missing argument", + false, + false, + }, + { + "session-op clear session filter saddr 10.0.0.1 sport any " + "daddr 11.0.0.1 dport any proto any", + EXP_EMPTY_STRING, + true, + false, + }, + { + "session-op clear session filter saddr 2001::1 sport any " + "daddr 2001::2 dport any proto any", EXP_EMPTY_STRING, true, false, diff --git a/tests/whole_dp/src/dp_test_session_internal_lib.c b/tests/whole_dp/src/dp_test_session_internal_lib.c new file mode 100644 index 00000000..def12bbc --- /dev/null +++ b/tests/whole_dp/src/dp_test_session_internal_lib.c @@ -0,0 +1,362 @@ +/* + * Copyright (c) 2020-2021, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017 by Brocade Communications Systems, Inc. + * All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + * + * Peter W. Morreale + * + * dataplane UT Session test lib + */ + +#include +#include +#include +#include +#include +#include + +#include "ip_funcs.h" +#include "in_cksum.h" +#include "if_var.h" +#include "urcu.h" +#include "main.h" +#include "session/session.h" +#include "session/session_feature.h" +#include "npf/npf.h" +#include "npf/npf_cache.h" +#include "npf/npf_session.h" + +#include "dp_test.h" +#include "dp_test_controller.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_lib_exp.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_session_internal_lib.h" + + +int _dp_test_session_establish(struct rte_mbuf *m, const struct ifnet *ifp, + uint32_t timeout, struct session **se, bool *created, + const char *file, int line) +{ + int rc; + struct session *s = NULL; + + rc = session_establish(m, ifp, timeout, &s, created); + + if (*created) { + _dp_test_fail_unless(s, file, line, + "session establish, no session\n"); + _dp_test_fail_unless(s->se_timeout == timeout, + file, line, "session establish, bad timeout\n"); + _dp_test_fail_unless(s->se_link == NULL, file, line, + "session establish: link defined\n"); + _dp_test_fail_unless(s->se_vrfid == pktmbuf_get_vrf(m), + file, line, + "session establish: bad session vrfid\n"); + _dp_test_fail_unless(rte_atomic16_read(&s->se_link_cnt) == 0, + file, line, "session establish: bad link cnt %u\n", + rte_atomic16_read(&s->se_link_cnt)); + _dp_test_fail_unless(rte_atomic16_read(&s->se_sen_cnt) == 2, + file, line, + "session establish: bad sentry cnt %u\n", + rte_atomic16_read(&s->se_sen_cnt)); + _dp_test_fail_unless( + rte_atomic16_read(&s->se_feature_count) == 0, + file, line, "session establish: bad feature cnt %u\n", + rte_atomic16_read(&s->se_feature_count)); + } + *se = s; + + return rc; +} + +/* Certain test want this to fail, only for continuity */ +int _dp_test_session_lookup(struct rte_mbuf *m, uint32_t if_index, + struct session **se, bool *forw) +{ + return session_lookup(m, if_index, se, forw); +} + +void _dp_test_session_expire(struct session *s, struct rte_mbuf *m, + const char *file, int line) +{ + session_expire(s, m); + + _dp_test_fail_unless(s->se_flags & SESSION_EXPIRED, file, line, + "session expire: bad flags: %u\n", s->se_flags); + + /* + * If we were linked, ensure that all is cleaned up + */ + if (s->se_link) { + _dp_test_fail_unless(cds_list_empty(&s->se_link->sl_children), + file, line, + "session expire: linked children\n"); + _dp_test_fail_unless(cds_list_empty(&s->se_link->sl_link), + file, line, + "session expire: still linked\n"); + _dp_test_fail_unless(s->se_link->sl_parent == NULL, file, line, + "session expire: parent exists\n"); + } + + if (m) + _dp_test_fail_unless( + !pktmbuf_mdata_exists(m, PKT_MDATA_SESSION_SENTRY), + file, line, + "session expire: pkt not cleared\n"); +} + +int _dp_test_session_init_sentry_packet(struct sentry_packet *sp, + uint32_t if_index, uint16_t flags, uint8_t proto, + vrfid_t vrfid, uint16_t sid, void *saddr, + uint16_t did, void *daddr, const char *file, int line) +{ + return session_init_sentry_packet(sp, if_index, flags, proto, + vrfid, sid, saddr, did, daddr); +} + + +int _dp_test_session_create_from_sentry_packets(struct rte_mbuf *m, + struct sentry_packet *sp_forw, struct sentry_packet *sp_back, + const struct ifnet *ifp, + uint32_t timeout, struct session **se, bool *created, + const char *file, int line) +{ + int rc; + struct session *s = NULL; + + rc = session_create_from_sentry_packets(m, sp_forw, sp_back, ifp, + timeout, &s, created); + _dp_test_fail_unless((rc == 0 && s), file, line, + "session create from sentry packets: %d s: %p\n", + rc, s); + *se = s; + return rc; +} + +int _dp_test_session_sentry_insert(struct session *s, uint32_t if_index, + uint16_t flags, uint16_t sid, void *saddr, uint16_t did, + void *daddr, const char *file, int line) +{ + int rc; + uint16_t sen_cnt = rte_atomic16_read(&s->se_sen_cnt); + + rc = session_sentry_insert(s, if_index, flags, sid, saddr, did, daddr); + + /* + * Only on success, some want the failure returned + */ + if (!rc) { + uint16_t new_sen_cnt = rte_atomic16_read(&s->se_sen_cnt); + + _dp_test_fail_unless(new_sen_cnt - sen_cnt == 1, file, line, + "session sentry insert: bad sen cnt: %u:%u\n", + new_sen_cnt, sen_cnt); + } + + return rc; +} + +static int print_sen(struct sentry *sen, void *data) +{ + printf("sentry walk: session: %p flags: %u len: %u\n", + sen->sen_session, sen->sen_flags, sen->sen_len); + return 0; + +} + +static int print_se(struct session *s, void *data) +{ + printf("session walk: session: %p flags: %u link_cnt: %u sen_cnt: %u\n", + s, s->se_flags, rte_atomic16_read(&s->se_link_cnt), + rte_atomic16_read(&s->se_sen_cnt)); + + return 0; +} + +void _dp_test_session_reset(const char *file, int line) +{ + int rc; + unsigned long sen; + unsigned long se; + + rc = session_table_destroy_all(); + + _dp_test_fail_unless(rc == 0, file, line, + "session table destroy all: %d\n", rc); + + session_table_counts(&sen, &se); + + if (sen) + sentry_table_walk(print_sen, NULL); + if (se) + session_table_walk(print_se, NULL); + + _dp_test_fail_unless(sen == 0, file, line, + "session table counts: sentries: %lu\n", sen); + _dp_test_fail_unless(se == 0, file, line, + "session table counts: sessions: %lu\n", se); +} + +void dp_test_session_reset_session_id(void) +{ + session_reset_session_id(); +} + +int _dp_test_session_feature_add(struct session *s, uint32_t if_index, + enum session_feature_type type, void *data, + const char *file, int line) +{ + int rc; + uint16_t old = rte_atomic16_read(&s->se_feature_count); + uint16_t new; + + rc = session_feature_add(s, if_index, type, data); + if (!rc) { + new = rte_atomic16_read(&s->se_feature_count); + _dp_test_fail_unless(new - old == 1, file, line, + "session feature add: bad counts: %u:%u\n", new, old); + } + return rc; +} + +int _dp_test_session_feature_request_expiry(struct session *s, + uint32_t if_index, enum session_feature_type type, + const char *file, int line) +{ + int rc; + uint16_t old = rte_atomic16_read(&s->se_feature_exp_count); + uint16_t new; + + rc = session_feature_request_expiry(s, if_index, type); + if (!rc) { + new = rte_atomic16_read(&s->se_feature_exp_count); + _dp_test_fail_unless(new - old == 1, file, line, + "session feature add: bad counts: %u:%u\n", old, new); + } + return rc; +} + +void *_dp_test_session_feature_get(struct session *s, uint32_t if_index, + enum session_feature_type type) +{ + return session_feature_get(s, if_index, type); +} + +static struct session_link *lookup_link(struct session_link *sl, + struct cds_list_head *head) +{ + struct session_link *tmp; + + cds_list_for_each_entry(tmp, head, sl_link) { + if (sl == tmp) + return sl; + } + return NULL; +} + +int _dp_test_session_link(struct session *parent, struct session *child, + const char *file, int line) +{ + uint16_t parent_linkcnt = rte_atomic16_read(&parent->se_link_cnt); + uint16_t child_linkcnt = rte_atomic16_read(&child->se_link_cnt); + uint16_t new_linkcnt; + struct session_link *sl; + int rc; + + rc = session_link(parent, child); + if (rc) + return rc; + + new_linkcnt = rte_atomic16_read(&parent->se_link_cnt); + _dp_test_fail_unless(new_linkcnt - parent_linkcnt == 1, file, line, + "session link: bad parent link cnt: %u:%u\n", + new_linkcnt, parent_linkcnt); + + /* Child does not inc */ + new_linkcnt = rte_atomic16_read(&child->se_link_cnt); + _dp_test_fail_unless(new_linkcnt - child_linkcnt == 0, file, line, + "session link: bad child link cnt: %u:%u\n", + new_linkcnt, child_linkcnt); + + /* Must have link structs on both parent and child */ + _dp_test_fail_unless(parent->se_link, file, line, + "session link: No parent link\n"); + _dp_test_fail_unless(child->se_link, file, line, + "session link: No child link\n"); + + /* Ensure session pointers in the link struct are correct. */ + _dp_test_fail_unless(child->se_link->sl_self == child, file, line, + "session link: No child self\n"); + _dp_test_fail_unless(child->se_link->sl_parent == parent, file, line, + "session link: No child parent\n"); + + /* Child list link cannot be empty */ + _dp_test_fail_unless(!cds_list_empty(&child->se_link->sl_link), + file, line, "session link: No child link\n"); + + /* Ensure the child is on the parent */ + sl = lookup_link(child->se_link, &parent->se_link->sl_children); + _dp_test_fail_unless(sl == child->se_link, file, line, + "session link: child not on parent\n"); + return 0; +} + +int _dp_test_session_unlink(struct session *s, const char *file, int line) +{ + struct session_link *sl; + struct session *parent; + uint16_t link_cnt; + + /* Not an error if not linked */ + if (!s->se_link) + return 0; + + /* Not an error if already unlinked */ + parent = s->se_link->sl_parent; + if (!parent) + return 0; + + link_cnt = rte_atomic16_read(&parent->se_link_cnt); + + session_unlink(s); + + _dp_test_fail_unless(rte_atomic16_read(&parent->se_link_cnt) == + (link_cnt - 1), file, line, + "session unlink: bad parent link cnt: %u:%u\n", + link_cnt, rte_atomic16_read(&parent->se_link_cnt)); + + _dp_test_fail_unless(s->se_link->sl_parent == NULL, file, line, + "session unlink: parent not cleared\n"); + _dp_test_fail_unless(cds_list_empty(&s->se_link->sl_link), file, line, + "session unlink: not unlinked\n"); + + sl = lookup_link(s->se_link, &parent->se_link->sl_children); + _dp_test_fail_unless(sl == NULL, file, line, + "session unlink: exists on parent\n"); + return 0; +} + +/* Simulate running the GC */ +void _dp_test_session_gc(const char *file, int line) +{ + session_gc(); +} + +/* unlink everything */ +void _dp_test_session_unlink_all(struct session *s, const char *file, int line) +{ + session_unlink_all(s); + + _dp_test_fail_unless(rte_atomic16_read(&s->se_link_cnt) == 0, + file, line, + "session unlink_all: bad link cnt: %u\n", + rte_atomic16_read(&s->se_link_cnt)); + + _dp_test_fail_unless(cds_list_empty(&s->se_link->sl_link), file, line, + "session unlink_all: not unlinked\n"); +} diff --git a/tests/whole_dp/src/dp_test_session_lib.h b/tests/whole_dp/src/dp_test_session_internal_lib.h similarity index 91% rename from tests/whole_dp/src/dp_test_session_lib.h rename to tests/whole_dp/src/dp_test_session_internal_lib.h index 1eaf2ec2..9d5673fd 100644 --- a/tests/whole_dp/src/dp_test_session_lib.h +++ b/tests/whole_dp/src/dp_test_session_internal_lib.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2021, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -8,8 +8,8 @@ * Whole dataplane test npf session library */ -#ifndef __DP_TEST_SESSION_LIB_H__ -#define __DP_TEST_SESSION_LIB_H__ +#ifndef __DP_TEST_SESSION_INTERNAL_LIB_H__ +#define __DP_TEST_SESSION_INTERNAL_LIB_H__ #include "session/session.h" #include "session/session_feature.h" @@ -62,6 +62,8 @@ void _dp_test_session_reset(const char *file, int line); #define dp_test_session_reset() \ _dp_test_session_reset(__FILE__, __LINE__) +void dp_test_session_reset_session_id(void); + int _dp_test_session_feature_add(struct session *se, uint32_t if_index, enum session_feature_type type, void *data, const char *file, int line); @@ -103,12 +105,5 @@ void _dp_test_session_unlink_all(struct session *s, const char *file, int line); #define dp_test_session_unlink_all(s) \ _dp_test_session_unlink_all(s, __FILE__, __LINE__) - -void _dp_test_session_feature_register(enum session_feature_type type, - const struct session_feature_ops *ops, const char *file, - int line); -#define dp_test_session_feature_register(type, ops) \ - _dp_test_session_feature_register(type, ops, __FILE__, __LINE__) - #endif /* __DP_TEST_SESSION_LIB_H__ */ diff --git a/tests/whole_dp/src/dp_test_session_lib.c b/tests/whole_dp/src/dp_test_session_lib.c index 1e4c4945..ea5dd907 100644 --- a/tests/whole_dp/src/dp_test_session_lib.c +++ b/tests/whole_dp/src/dp_test_session_lib.c @@ -1,13 +1,7 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. - * Copyright (c) 2017 by Brocade Communications Systems, Inc. - * All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only - * - * Peter W. Morreale - * - * dataplane UT Session test lib */ #include @@ -17,349 +11,106 @@ #include #include -#include "ip_funcs.h" -#include "in_cksum.h" -#include "if_var.h" -#include "urcu.h" -#include "main.h" -#include "session/session.h" -#include "session/session_feature.h" -#include "npf/npf.h" -#include "npf/npf_cache.h" -#include "npf/npf_session.h" +#include "npf/npf_pack.h" #include "dp_test.h" #include "dp_test_controller.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_session_lib.h" - - -int _dp_test_session_establish(struct rte_mbuf *m, const struct ifnet *ifp, - uint32_t timeout, struct session **se, bool *created, - const char *file, int line) -{ - int rc; - struct session *s = NULL; - - rc = session_establish(m, ifp, timeout, &s, created); - - if (*created) { - _dp_test_fail_unless(s, file, line, - "session establish, no session\n"); - _dp_test_fail_unless(s->se_timeout == timeout, - file, line, "session establish, bad timeout\n"); - _dp_test_fail_unless(s->se_link == NULL, file, line, - "session establish: link defined\n"); - _dp_test_fail_unless(s->se_vrfid == pktmbuf_get_vrf(m), - file, line, - "session establish: bad session vrfid\n"); - _dp_test_fail_unless(rte_atomic16_read(&s->se_link_cnt) == 0, - file, line, "session establish: bad link cnt %u\n", - rte_atomic16_read(&s->se_link_cnt)); - _dp_test_fail_unless(rte_atomic16_read(&s->se_sen_cnt) == 2, - file, line, - "session establish: bad sentry cnt %u\n", - rte_atomic16_read(&s->se_sen_cnt)); - _dp_test_fail_unless( - rte_atomic16_read(&s->se_feature_count) == 0, - file, line, "session establish: bad feature cnt %u\n", - rte_atomic16_read(&s->se_feature_count)); - } - *se = s; - - return rc; -} - -/* Certain test want this to fail, only for continuity */ -int _dp_test_session_lookup(struct rte_mbuf *m, uint32_t if_index, - struct session **se, bool *forw) -{ - return session_lookup(m, if_index, se, forw); -} - -void _dp_test_session_expire(struct session *s, struct rte_mbuf *m, - const char *file, int line) -{ - session_expire(s, m); - - _dp_test_fail_unless(s->se_flags & SESSION_EXPIRED, file, line, - "session expire: bad flags: %u\n", s->se_flags); - - /* - * If we were linked, ensure that all is cleaned up - */ - if (s->se_link) { - _dp_test_fail_unless(cds_list_empty(&s->se_link->sl_children), - file, line, - "session expire: linked children\n"); - _dp_test_fail_unless(cds_list_empty(&s->se_link->sl_link), - file, line, - "session expire: still linked\n"); - _dp_test_fail_unless(s->se_link->sl_parent == NULL, file, line, - "session expire: parent exists\n"); - } +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_session_internal_lib.h" - if (m) - _dp_test_fail_unless( - !pktmbuf_mdata_exists(m, PKT_MDATA_SESSION_SENTRY), - file, line, - "session expire: pkt not cleared\n"); -} - -int _dp_test_session_init_sentry_packet(struct sentry_packet *sp, - uint32_t if_index, uint16_t flags, uint8_t proto, - vrfid_t vrfid, uint16_t sid, void *saddr, - uint16_t did, void *daddr, const char *file, int line) -{ - return session_init_sentry_packet(sp, if_index, flags, proto, - vrfid, sid, saddr, did, daddr); -} - - -int _dp_test_session_create_from_sentry_packets(struct rte_mbuf *m, - struct sentry_packet *sp_forw, struct sentry_packet *sp_back, - const struct ifnet *ifp, - uint32_t timeout, struct session **se, bool *created, - const char *file, int line) -{ - int rc; - struct session *s = NULL; - - rc = session_create_from_sentry_packets(m, sp_forw, sp_back, ifp, - timeout, &s, created); - _dp_test_fail_unless((rc == 0 && s), file, line, - "session create from sentry packets: %d s: %p\n", - rc, s); - *se = s; - return rc; -} +#include "dp_test/dp_test_session_lib.h" -int _dp_test_session_sentry_insert(struct session *s, uint32_t if_index, - uint16_t flags, uint16_t sid, void *saddr, uint16_t did, - void *daddr, const char *file, int line) +void _dp_test_session_msg_valid(void *msg, uint32_t size, + const char *file, int line) { - int rc; - uint16_t sen_cnt = rte_atomic16_read(&s->se_sen_cnt); + struct npf_pack_message *n_msg = msg; - rc = session_sentry_insert(s, if_index, flags, sid, saddr, did, daddr); - - /* - * Only on success, some want the failure returned - */ - if (!rc) { - uint16_t new_sen_cnt = rte_atomic16_read(&s->se_sen_cnt); - - _dp_test_fail_unless(new_sen_cnt - sen_cnt == 1, file, line, - "session sentry insert: bad sen cnt: %u:%u\n", - new_sen_cnt, sen_cnt); - } - - return rc; + _dp_test_fail_unless(npf_pack_validate_msg(n_msg, size), file, line, + "npf_pack message invalid\n"); } -static int print_sen(struct sentry *sen, void *data) +bool _dp_test_session_msg_full(void *msg, + const char *file, int line) { - printf("sentry walk: session: %p flags: %u len: %u\n", - sen->sen_session, sen->sen_flags, sen->sen_len); - return 0; + struct npf_pack_message *n_msg = msg; + if (npf_pack_get_msg_type(n_msg) == SESSION_PACK_FULL) + return true; + return false; } -static int print_se(struct session *s, void *data) +bool _dp_test_session_msg_update(void *msg, + const char *file, int line) { - printf("session walk: session: %p flags: %u link_cnt: %u sen_cnt: %u\n", - s, s->se_flags, rte_atomic16_read(&s->se_link_cnt), - rte_atomic16_read(&s->se_sen_cnt)); + struct npf_pack_message *n_msg = msg; - return 0; + if (npf_pack_get_msg_type(n_msg) == SESSION_PACK_UPDATE) + return true; + return false; } -void _dp_test_session_reset(const char *file, int line) +uint64_t _dp_test_session_msg_get_id(void *msg, + const char *file, int line) { - int rc; - unsigned long sen; - unsigned long se; - - rc = session_table_destroy_all(); + struct npf_pack_message *n_msg = msg; - _dp_test_fail_unless(rc == 0, file, line, - "session table destroy all: %d\n", rc); - - session_table_counts(&sen, &se); - - if (sen) - sentry_table_walk(print_sen, NULL); - if (se) - session_table_walk(print_se, NULL); - - _dp_test_fail_unless(sen == 0, file, line, - "session table counts: sentries: %lu\n", sen); - _dp_test_fail_unless(se == 0, file, line, - "session table counts: sessions: %lu\n", se); + return npf_pack_get_session_id(n_msg); } -int _dp_test_session_feature_add(struct session *s, uint32_t if_index, - enum session_feature_type type, void *data, - const char *file, int line) +void _dp_test_session_msg_check_rcvd(void *msg, + uint64_t pkts_per_session, + struct dp_test_session sess[], + const char *file, int line) { - int rc; - uint16_t old = rte_atomic16_read(&s->se_feature_count); - uint16_t new; - - rc = session_feature_add(s, if_index, type, data); - if (!rc) { - new = rte_atomic16_read(&s->se_feature_count); - _dp_test_fail_unless(new - old == 1, file, line, - "session feature add: bad counts: %u:%u\n", new, old); - } - return rc; -} + int i; + uint64_t se_id; + struct npf_pack_dp_sess_stats *stats; + struct npf_pack_message *n_msg = msg; -int _dp_test_session_feature_request_expiry(struct session *s, - uint32_t if_index, enum session_feature_type type, - const char *file, int line) -{ - int rc; - uint16_t old = rte_atomic16_read(&s->se_feature_exp_count); - uint16_t new; + _dp_test_fail_unless(sess, file, line, + "npf_pack sess input invalid\n"); + se_id = dp_test_session_msg_get_id(n_msg); + stats = npf_pack_get_session_stats(n_msg); + _dp_test_fail_unless(stats, file, line, + "Couldn't get stats from npf_pack message\n"); - rc = session_feature_request_expiry(s, if_index, type); - if (!rc) { - new = rte_atomic16_read(&s->se_feature_exp_count); - _dp_test_fail_unless(new - old == 1, file, line, - "session feature add: bad counts: %u:%u\n", old, new); + if (stats->pdss_pkts_in == pkts_per_session && + stats->pdss_pkts_out == pkts_per_session) { + for (i = 0; i < DP_TEST_MAX_TEST_SESSIONS; i++) { + if (sess[i].se_id == se_id) { + sess[i].completed = true; + return; + } + } + for (i = 0; i < DP_TEST_MAX_TEST_SESSIONS; i++) { + if (sess[i].se_id == 0) { + sess[i].se_id = se_id; + sess[i].completed = true; + return; + } + } } - return rc; } -void *_dp_test_session_feature_get(struct session *s, uint32_t if_index, - enum session_feature_type type) +bool _dp_test_session_msg_pulled_all(void *msg, + uint64_t pkts_per_session, + struct dp_test_session sess[], + const char *file, int line) { - return session_feature_get(s, if_index, type); -} + int i; + struct npf_pack_message *n_msg = msg; -static struct session_link *lookup_link(struct session_link *sl, - struct cds_list_head *head) -{ - struct session_link *tmp; + _dp_test_fail_unless(sess, file, line, + "npf_pack sess input invalid\n"); + dp_test_session_msg_check_rcvd(n_msg, pkts_per_session, sess); - cds_list_for_each_entry(tmp, head, sl_link) { - if (sl == tmp) - return sl; + for (i = 0; i < DP_TEST_MAX_TEST_SESSIONS; i++) { + if (sess[i].completed == false) + return false; } - return NULL; -} - -int _dp_test_session_link(struct session *parent, struct session *child, - const char *file, int line) -{ - uint16_t parent_linkcnt = rte_atomic16_read(&parent->se_link_cnt); - uint16_t child_linkcnt = rte_atomic16_read(&child->se_link_cnt); - uint16_t new_linkcnt; - struct session_link *sl; - int rc; - - rc = session_link(parent, child); - if (rc) - return rc; - - new_linkcnt = rte_atomic16_read(&parent->se_link_cnt); - _dp_test_fail_unless(new_linkcnt - parent_linkcnt == 1, file, line, - "session link: bad parent link cnt: %u:%u\n", - new_linkcnt, parent_linkcnt); - - /* Child does not inc */ - new_linkcnt = rte_atomic16_read(&child->se_link_cnt); - _dp_test_fail_unless(new_linkcnt - child_linkcnt == 0, file, line, - "session link: bad child link cnt: %u:%u\n", - new_linkcnt, child_linkcnt); - - /* Must have link structs on both parent and child */ - _dp_test_fail_unless(parent->se_link, file, line, - "session link: No parent link\n"); - _dp_test_fail_unless(child->se_link, file, line, - "session link: No child link\n"); - - /* Ensure session pointers in the link struct are correct. */ - _dp_test_fail_unless(child->se_link->sl_self == child, file, line, - "session link: No child self\n"); - _dp_test_fail_unless(child->se_link->sl_parent == parent, file, line, - "session link: No child parent\n"); - - /* Child list link cannot be empty */ - _dp_test_fail_unless(!cds_list_empty(&child->se_link->sl_link), - file, line, "session link: No child link\n"); - - /* Ensure the child is on the parent */ - sl = lookup_link(child->se_link, &parent->se_link->sl_children); - _dp_test_fail_unless(sl == child->se_link, file, line, - "session link: child not on parent\n"); - return 0; -} - -int _dp_test_session_unlink(struct session *s, const char *file, int line) -{ - struct session_link *sl; - struct session *parent; - uint16_t link_cnt; - - /* Not an error if not linked */ - if (!s->se_link) - return 0; - - /* Not an error if already unlinked */ - parent = s->se_link->sl_parent; - if (!parent) - return 0; - - link_cnt = rte_atomic16_read(&parent->se_link_cnt); - - session_unlink(s); - - _dp_test_fail_unless(rte_atomic16_read(&parent->se_link_cnt) == - (link_cnt - 1), file, line, - "session unlink: bad parent link cnt: %u:%u\n", - link_cnt, rte_atomic16_read(&parent->se_link_cnt)); - - _dp_test_fail_unless(s->se_link->sl_parent == NULL, file, line, - "session unlink: parent not cleared\n"); - _dp_test_fail_unless(cds_list_empty(&s->se_link->sl_link), file, line, - "session unlink: not unlinked\n"); - - sl = lookup_link(s->se_link, &parent->se_link->sl_children); - _dp_test_fail_unless(sl == NULL, file, line, - "session unlink: exists on parent\n"); - return 0; -} - -/* Simulate running the GC */ -void _dp_test_session_gc(const char *file, int line) -{ - session_gc(); -} - -/* unlink everything */ -void _dp_test_session_unlink_all(struct session *s, const char *file, int line) -{ - session_unlink_all(s); - - _dp_test_fail_unless(rte_atomic16_read(&s->se_link_cnt) == 0, - file, line, - "session unlink_all: bad link cnt: %u\n", - rte_atomic16_read(&s->se_link_cnt)); - - _dp_test_fail_unless(cds_list_empty(&s->se_link->sl_link), file, line, - "session unlink_all: not unlinked\n"); -} - -/* For completness only, nothing to check */ -void _dp_test_session_feature_register(enum session_feature_type type, - const struct session_feature_ops *ops, const char *file, - int line) -{ - session_feature_register(type, ops); + return true; } diff --git a/tests/whole_dp/src/dp_test_sfp.c b/tests/whole_dp/src/dp_test_sfp.c index 840272fa..368326cf 100644 --- a/tests/whole_dp/src/dp_test_sfp.c +++ b/tests/whole_dp/src/dp_test_sfp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -37,7 +37,7 @@ static const uint8_t eeprom_dummy[512] = { 0x00, 0x3a, 0x00, 0x00, 0x57, 0x51, 0x31, 0x36, 0x30, 0x34, 0x31, 0x32, 0x41, 0x31, 0x31, 0x35, 0x20, 0x20, 0x20, 0x20, 0x31, 0x35, 0x31, 0x36, - 0x31, 0x30, 0x20, 0x20, 0xfc, 0xfa, 0x03, 0x3b, + 0x31, 0x30, 0x20, 0x20, 0x68, 0xfa, 0x03, 0x3b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -104,7 +104,7 @@ static const uint8_t eeprom_8079_dummy[] = { 0x00, 0x3a, 0x00, 0x00, 0x57, 0x51, 0x31, 0x36, 0x30, 0x34, 0x31, 0x32, 0x41, 0x31, 0x31, 0x35, 0x20, 0x20, 0x20, 0x20, 0x31, 0x35, 0x31, 0x36, - 0x31, 0x30, 0x20, 0x20, 0xfc, 0xfa, 0x03, 0x3b, + 0x31, 0x30, 0x20, 0x20, 0x68, 0xfa, 0x03, 0x3b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -198,7 +198,7 @@ static void dp_test_verify_intf_sfp_state(struct ifnet *ifp, " \"vendor_pn\": \"SFP-10G-SR-IT\"," " \"vendor_sn\": \"WQ160412A115\"," " \"vendor_oui\": \"00.8b.21\"," - " \"diag_type\": 252," + " \"diag_type\": 104," " \"date\": \"2015-16-10\"," " \"class\": \"10G Base-SR\"," "%s" @@ -225,7 +225,7 @@ DP_START_TEST(valid_sfp, sfp_dump_8472) struct ifnet *ifp; sfp_test = DP_TEST_SFP_SFF8472; - ifp = ifnet_byifname(IIFNAME); + ifp = dp_ifnet_byifname(IIFNAME); if (!ifp) return; @@ -238,7 +238,7 @@ DP_START_TEST(valid_sfp, sfp_dump_8079) struct ifnet *ifp; sfp_test = DP_TEST_SFP_SFF8079; - ifp = ifnet_byifname(IIFNAME); + ifp = dp_ifnet_byifname(IIFNAME); if (!ifp) return; diff --git a/tests/whole_dp/src/dp_test_sfp.h b/tests/whole_dp/src/dp_test_sfp.h index 8e4fae17..9509e7a9 100644 --- a/tests/whole_dp/src/dp_test_sfp.h +++ b/tests/whole_dp/src/dp_test_sfp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ diff --git a/tests/whole_dp/src/dp_test_slow_path.c b/tests/whole_dp/src/dp_test_slow_path.c index d538b5db..87395bc5 100644 --- a/tests/whole_dp/src/dp_test_slow_path.c +++ b/tests/whole_dp/src/dp_test_slow_path.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -14,14 +14,14 @@ #include "ip_funcs.h" #include "main.h" #include "shadow.h" -#include "gre.h" +#include "if/gre.h" #include "dp_test.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_crypto_utils.h" DP_DECL_TEST_SUITE(slow_suite); @@ -60,7 +60,7 @@ DP_START_TEST(slow_dp_pkt, test_shadow_ipv4) 1, &len); dp_test_pktmbuf_eth_init(test_pak, dst_mac_str, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -68,7 +68,7 @@ DP_START_TEST(slow_dp_pkt, test_shadow_ipv4) dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), exp_mac_str, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_send_slowpath_pkt(test_pak, exp); } @@ -100,7 +100,7 @@ DP_START_TEST(slow_dp_pkt, test_shadow_ipv6) 1, &len); dp_test_pktmbuf_eth_init(test_pak, dst_mac_str, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -108,7 +108,7 @@ DP_START_TEST(slow_dp_pkt, test_shadow_ipv6) dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), exp_mac_str, dp_test_intf_name2mac_str("dp1T0"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); dp_test_send_slowpath_pkt(test_pak, exp); } @@ -142,12 +142,12 @@ DP_START_TEST(slow_dp_pkt, test_spath_gre) "1.1.2.1", "1.1.2.2", 1, &gre_pl_len, ETH_P_TEB, 0, 0, &gre_payload); memcpy(gre_payload, rte_pktmbuf_mtod(payload_pak, - const struct ether_hdr *), gre_pl_len); + const struct rte_ether_hdr *), gre_pl_len); dp_test_set_pak_ip_field(iphdr(test_pak), DP_TEST_SET_DF, 1); dp_test_pktmbuf_eth_init(test_pak, nh_mac_str, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); rte_pktmbuf_free(test_pak); @@ -161,7 +161,7 @@ DP_START_TEST(slow_dp_pkt, test_spath_gre) 1, &len); dp_test_pktmbuf_eth_init(payload_pak, nh_mac_str, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); gre_pl_len = rte_pktmbuf_data_len(payload_pak); @@ -169,12 +169,12 @@ DP_START_TEST(slow_dp_pkt, test_spath_gre) "1.1.2.1", "1.1.2.2", 1, &gre_pl_len, ETH_P_TEB, 0, 0, &gre_payload); memcpy(gre_payload, rte_pktmbuf_mtod(payload_pak, - const struct ether_hdr *), gre_pl_len); + const struct rte_ether_hdr *), gre_pl_len); dp_test_set_pak_ip_field(iphdr(test_pak), DP_TEST_SET_DF, 1); dp_test_pktmbuf_eth_init(test_pak, nh_mac_str, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(test_pak); rte_pktmbuf_free(test_pak); diff --git a/tests/whole_dp/src/dp_test_storm_ctl.c b/tests/whole_dp/src/dp_test_storm_ctl.c index 12ddba2c..9ac0a342 100644 --- a/tests/whole_dp/src/dp_test_storm_ctl.c +++ b/tests/whole_dp/src/dp_test_storm_ctl.c @@ -1,17 +1,17 @@ /* - * Copyright (c) 2018-2019, AT&T Intellectual Property. + * Copyright (c) 2018-2020, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ -#include "dp_test_macros.h" +#include "dp_test/dp_test_macros.h" #include "util.h" #include "dp_test.h" #include "dp_test_controller.h" #include "dp_test_cmd_state.h" #include "dp_test_console.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "bridge_vlan_set.h" DP_DECL_TEST_SUITE(storm_ctl); @@ -144,7 +144,8 @@ _dp_test_verify_storm_ctl_state(bool monitoring, int count, " },", storm_ctl_traffic_type_to_str(i), cfg_rate[i], - cfg_rate[i] + FAL_BUMPS_RATE_BY, + stats[i] ? + cfg_rate[i] + FAL_BUMPS_RATE_BY : 0, stats[i]); } } @@ -154,15 +155,18 @@ _dp_test_verify_storm_ctl_state(bool monitoring, int count, snprintf(interface_tbl_str, 1000, " \"intfs\": [{" " \"ifname\": \"%s\", " - " \"vlan_table\": [{ " + " %s {" " \"vlan\": %d, " " \"profile\": \"%s\"," "%s" " }" - " ]" + " %s" " }" " ]", - interface, vlan, profile_name, rate_str); + interface, + vlan ? "\"vlan_table\": [" : "\"whole_interface\" :", + vlan, profile_name, rate_str, + vlan ? "]" : ""); } else { snprintf(cmd_str, 100, "storm-ctl show"); interface_tbl_str[0] = '\0'; @@ -306,6 +310,9 @@ DP_START_TEST(add_profile, profile_for_intf) {0, 0}, {0, 0}, }; + dp_test_send_config_src(dp_test_cont_src_get(), + "switchport dpT10 hw-switching enable"); + /* Set unicast bandwidth-level */ dp_test_send_config_src(dp_test_cont_src_get(), "storm-ctl SET profile PR1 unicast bandwidth-level 10000000"); @@ -374,6 +381,9 @@ DP_START_TEST(add_profile, profile_for_intf) bandwidth[1][1] = 0; dp_test_verify_storm_ctl_profile("PR1", false); + dp_test_send_config_src(dp_test_cont_src_get(), + "switchport dpT10 hw-switching disable"); + } DP_END_TEST; DP_START_TEST(add_profile, profile_for_vlan) @@ -388,6 +398,9 @@ DP_START_TEST(add_profile, profile_for_vlan) uint32_t cfg_rate2[3] = { 0 }; uint64_t stats[3] = { 10, 10, 10 }; /* Always have 10 pkts accepted */ + dp_test_send_config_src(dp_test_cont_src_get(), + "switchport dpT10 hw-switching enable"); + /* Set unicast bandwidth-level on PR1 */ dp_test_send_config_src(dp_test_cont_src_get(), "storm-ctl SET profile PR1 unicast bandwidth-level 250"); @@ -493,4 +506,129 @@ DP_START_TEST(add_profile, profile_for_vlan) dp_test_intf_switch_del("switch0"); bridge_vlan_set_free(allowed_vlans); + dp_test_send_config_src(dp_test_cont_src_get(), + "switchport dpT10 hw-switching disable"); +} DP_END_TEST; + +DP_START_TEST(add_profile, profile_update_hw_switch) +{ + int bandwidth1[3][2] = { {0, 0}, + {0, 0}, + {0, 0}, }; + uint32_t cfg_rate1[3] = { 0 }; + uint64_t stats_no_hw_switch[3] = { 0, 0, 0 }; + uint64_t stats[3] = { 10, 10, 10 }; /* Always have 10 pkts accepted */ + + /* Set unicast bandwidth-level on PR1 */ + dp_test_send_config_src(dp_test_cont_src_get(), + "storm-ctl SET profile PR1 unicast bandwidth-level 250"); + bandwidth1[0][0] = 250; + dp_test_verify_storm_ctl_profile_state("PR1", 0, + SC_ACTION_NO_SHUT, bandwidth1); + + /* Set multicast bandwidth-percent on PR1*/ + dp_test_send_config_src(dp_test_cont_src_get(), + "storm-ctl SET profile PR1 multicast bandwidth-percent 50"); + bandwidth1[1][1] = 5000; + dp_test_verify_storm_ctl_profile_state("PR1", 0, + SC_ACTION_NO_SHUT, bandwidth1); + + struct bridge_vlan_set *allowed_vlans = bridge_vlan_set_create(); + + bridge_vlan_set_add(allowed_vlans, 1); + + dp_test_intf_switch_create("switch0"); + dp_test_intf_bridge_enable_vlan_filter("switch0"); + + dp_test_intf_switch_add_port("switch0", "dpT10"); + dp_test_intf_bridge_port_set_vlans("switch0", "dpT10", + 0, allowed_vlans, NULL); + + /* Apply PR1 to vlan 1 and verify it is there */ + dp_test_send_config_src(dp_test_cont_src_get(), + "storm-ctl SET dpT10 vlan 1 profile PR1"); + cfg_rate1[0] = 250; + /* + * rate stored as percent * 100 in DP, but in fal is converted to kbps + * based on a link speed of 10G + */ + cfg_rate1[1] = 5000 * 1000; + dp_test_verify_storm_ctl_intf_state(SC_MON_ON, 1, "dpT10", 1, "PR1", + cfg_rate1, stats_no_hw_switch); + + /* + * now enable hw-switching and check that the stats report + * non-zero values now. + */ + dp_test_send_config_src(dp_test_cont_src_get(), + "switchport dpT10 hw-switching enable"); + + dp_test_verify_storm_ctl_intf_state(SC_MON_ON, 1, "dpT10", 1, "PR1", + cfg_rate1, stats); + + /* Remove PR1 from vlan 1 and verify it is gone */ + dp_test_send_config_src(dp_test_cont_src_get(), + "storm-ctl DELETE dpT10 vlan 1"); + dp_test_verify_storm_ctl_no_intf_state("dpT10", 2, "PR1"); + + dp_test_send_config_src(dp_test_cont_src_get(), + "storm-ctl DELETE profile PR1 multicast bandwidth-percent"); + dp_test_send_config_src(dp_test_cont_src_get(), + "storm-ctl DELETE profile PR1 unicast bandwidth-level"); + + dp_test_verify_storm_ctl_profile("PR1", false); + dp_test_verify_storm_ctl_state(SC_MON_OFF, 0); + + dp_test_intf_switch_remove_port("switch0", "dpT10"); + dp_test_intf_switch_del("switch0"); + bridge_vlan_set_free(allowed_vlans); + + dp_test_send_config_src(dp_test_cont_src_get(), + "switchport dpT10 hw-switching disable"); +} DP_END_TEST; + +/* + * Test out of order delete of a profile + * + * Configuration of interface vs. profile is async, so we can't rely + * on the profile being deleted after interface config. + */ +DP_START_TEST(add_profile, out_order_delete) +{ + uint32_t cfg_rate1[3] = { 0 }; + uint64_t stats[3] = { 10, 10, 10 }; /* Always have 10 pkts accepted */ + int bandwidth[3][2] = { {0, 0}, + {0, 0}, + {0, 0}, }; + + dp_test_send_config_src(dp_test_cont_src_get(), + "switchport dpT10 hw-switching enable"); + + /* Set unicast bandwidth level */ + dp_test_send_config_src(dp_test_cont_src_get(), + "storm-ctl SET profile PR1 unicast bandwidth-level 100"); + bandwidth[0][0] = 100; + dp_test_verify_storm_ctl_profile_state("PR1", 0, SC_ACTION_NO_SHUT, + bandwidth); + + dp_test_verify_storm_ctl_state(SC_MON_OFF, 0); + dp_test_send_config_src(dp_test_cont_src_get(), + "storm-ctl SET dpT10 profile PR1"); + dp_test_verify_storm_ctl_intf_state(SC_MON_ON, 1, "dpT10", 0, "PR1", + cfg_rate1, stats); + + /* Clear unicast bandwidth level whilst profile still bound to dpT10 */ + dp_test_send_config_src(dp_test_cont_src_get(), + "storm-ctl DELETE profile PR1 unicast bandwidth-level"); + bandwidth[0][0] = 0; + dp_test_verify_storm_ctl_profile_state("PR1", 0, SC_ACTION_NO_SHUT, + bandwidth); + + /* Now unbind from dpT10 and the profile should now be removed */ + dp_test_send_config_src(dp_test_cont_src_get(), + "storm-ctl DELETE dpT10 profile PR1"); + dp_test_verify_storm_ctl_profile("PR1", false); + + dp_test_send_config_src(dp_test_cont_src_get(), + "switchport dpT10 hw-switching disable"); } DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_str.c b/tests/whole_dp/src/dp_test_str.c index 792be613..c99f7db6 100644 --- a/tests/whole_dp/src/dp_test_str.c +++ b/tests/whole_dp/src/dp_test_str.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -16,59 +16,9 @@ #include #include -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_str.h" -/* - * Convert string to IP address and mask. Returns 1 if successful, - * 0 if not. - */ -int -dp_test_str2ip(const char *str, in_addr_t *ip_addr, int *mask) -{ - char *sub; - int rv; - - /* Remove the mask, if present */ - sub = strstr(str, "/"); - if (sub) { - *mask = atoi(sub+1); - *sub = '\0'; - } else { - *mask = 32; - } - - /* Convert from text to IP address */ - rv = inet_pton(AF_INET, str, ip_addr); - - return rv; -} - -/* - * Convert string to IPv6 address and mask. Returns 1 if successful, - * 0 if not. - */ -int -dp_test_str2ip6(const char *str, struct in6_addr *ip6_addr, int *mask) -{ - char *sub; - int rv; - - /* Remove the mask, if present */ - sub = strstr(str, "/"); - if (sub) { - *mask = atoi(sub+1); - *sub = '\0'; - } else { - *mask = 128; - } - - /* Convert from text to IPv6 address */ - rv = inet_pton(AF_INET6, str, ip6_addr); - - return rv; -} - /* * Convert IPv4 or IPv6 string and prefix length to a network string and * prefix, e.g. "10.1.1.1/24" to "10.1.1.0/24", or "2001:1:1::1/64", to @@ -129,51 +79,11 @@ dp_test_ipstr_to_range(const char *ipstr, char *range, uint rlen) int l; l = spush(range, rlen, "%s-", inet_ntoa(lo)); l += spush(range + l, rlen - l, "%s", inet_ntoa(hi)); + (void) l; return 1; } -/* - * Convert MAC address to a temporary string - */ -#define MAC_STR_MAX 20 -#define N_MAC_STR 4 -static char mac_str[N_MAC_STR][MAC_STR_MAX]; -static int cur_mac_str; - -char * -dp_test_mac2str(struct ether_addr *mac) -{ - char *str = mac_str[cur_mac_str]; - - if (++cur_mac_str >= N_MAC_STR) - cur_mac_str = 0; - - spush(str, MAC_STR_MAX, "%02x:%02x:%02x:%02x:%02x:%02x", - mac->addr_bytes[0], mac->addr_bytes[1], - mac->addr_bytes[2], mac->addr_bytes[3], - mac->addr_bytes[4], mac->addr_bytes[5]); - - return str; -} - -/* - * Take a MAC address string with leading zeros or no leading zeros, and lower - * or upper case hex digits and convert it to no leading zeros and lowercase. - * This is typically the MAC address string format returned from the - * dataplane. - */ -char * -dp_test_canonicalise_macstr(const char *macstr, char *canon) -{ - struct ether_addr mac; - - if (!ether_aton_r(macstr, &mac)) - return NULL; - - return ether_ntoa_r(&mac, canon); -} - /* * Insert a string (insert) into another string (haystack) before or after a * sub-string (needle). @@ -218,18 +128,6 @@ dp_test_str_insert(const char *haystack, const char *needle, return new; } -/* - * Insert a string into another string, before a sub-string. The sub-string - * should be a string, and not a pointer into haystack. Returns a new string, - * which the caller must free. - */ -char * -dp_test_str_insert_before(const char *haystack, const char *needle, - const char *insert) -{ - return dp_test_str_insert(haystack, needle, insert, false); -} - /* * Insert a string into another string, after a sub-string. The sub-string * should be a string, and not a pointer into haystack. Returns a new string, diff --git a/tests/whole_dp/src/dp_test_str.h b/tests/whole_dp/src/dp_test_str.h index b728b1b5..379d64e8 100644 --- a/tests/whole_dp/src/dp_test_str.h +++ b/tests/whole_dp/src/dp_test_str.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2019, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -11,22 +11,6 @@ #ifndef _DP_TEST_STR_H_ #define _DP_TEST_STR_H_ -/* - * Convert string to IP address and mask. Returns 1 if successful, - * 0 if not. Looks for '/xx' at end of string. If not found then - * the mask is set to 32. - */ -int -dp_test_str2ip(const char *str, in_addr_t *ip_addr, int *mask); - -/* - * Convert string to IPv6 address and mask. Returns 1 if successful, - * 0 if not. Looks for '/xx' at end of string. If not found then - * the mask is set to 128. - */ -int -dp_test_str2ip6(const char *str, struct in6_addr *ip6_addr, int *mask); - /* * Convert IPv4 or IPv6 string and prefix length to a network string and * prefix, e.g. "10.1.1.1/24" to "10.1.1.0/24", or "2001:1:1::1/64", to @@ -46,22 +30,6 @@ dp_test_ipstr_to_netstr(const char *ipstr, char *netstr, size_t netstr_len); uint dp_test_ipstr_to_range(const char *ipstr, char *range, uint rlen); -/* - * Returns a temporary string to which the MAC address has been printed. - * Round-robins 4 fixed arrays. - */ -char * -dp_test_mac2str(struct ether_addr *mac); - -/* - * Take a MAC address string with leading zeros or no leading zeros, and lower - * or upper case hex digits and convert it to no leading zeros and lowercase. - * This is typically the MAC address string format returned from the - * dataplane. - */ -char * -dp_test_canonicalise_macstr(const char *macstr, char *canon); - /* * Insert a string (insert) into another string (haystack) before or after a * sub-string (needle). @@ -73,17 +41,6 @@ char * dp_test_str_insert(const char *haystack, const char *needle, const char *insert, bool after); -/* - * Insert a string (insert) into another string (haystack) before a sub-string - * (needle). - * - * The sub-string should be a string, and not a pointer into haystack. Returns - * a new string, which the caller must free. - */ -char * -dp_test_str_insert_before(const char *haystack, const char *needle, - const char *insert); - /* * Insert a string (insert) into another string (haystack) after a sub-string * (needle). diff --git a/tests/whole_dp/src/dp_test_stubs.c b/tests/whole_dp/src/dp_test_stubs.c index 87ed2228..1a889c44 100644 --- a/tests/whole_dp/src/dp_test_stubs.c +++ b/tests/whole_dp/src/dp_test_stubs.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. + * Copyright (c) 2017-2020, AT&T Intellectual Property. * All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. @@ -24,8 +24,8 @@ #include "netlink.h" #include "npf_shim.h" #include "commands.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" #include "capture.h" #include "dp_test.h" @@ -33,15 +33,19 @@ int spath_pipefd[2] = {0}; int shadow_pipefd[DATAPLANE_MAX_PORTS] = {0}; +void capture_hardware(const struct ifnet *ifp, struct rte_mbuf *mbuf) +{ +} + void capture_burst(const struct ifnet *ifp, struct rte_mbuf *pkts[], unsigned int n) { - return; + /* nothing to do */ } void capture_cancel(struct ifnet *ifp) { - return; + /* nothing to do */ } int cmd_capture(FILE *f, int argc, char **argv) @@ -60,7 +64,11 @@ int slowpath_init(void) void capture_init(uint16_t mbuf_sz) { - return; + /* nothing to do */ +} + +void capture_destroy(void) +{ } int @@ -73,7 +81,7 @@ void ip_id_init(void) { } -uint16_t ip_randomid(uint16_t salt) +uint16_t dp_ip_randomid(uint16_t salt) { return 0; } @@ -164,11 +172,8 @@ int tap_attach(const char *ifname) return pipefd[0]; } -void tap_teardown(const char *ifname) -{ -} - /* There is no syslog running in the whole_dp UT environment */ +/* NOLINTNEXTLINE(readability-inconsistent-declaration-parameter-name) */ void syslog(int priority, const char *format, ...) { char log_buf[DP_TEST_TMP_BUF]; diff --git a/tests/whole_dp/src/dp_test_stubs_linux.c b/tests/whole_dp/src/dp_test_stubs_linux.c index 370eca2c..426a7bf9 100644 --- a/tests/whole_dp/src/dp_test_stubs_linux.c +++ b/tests/whole_dp/src/dp_test_stubs_linux.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -8,34 +8,65 @@ * Linux-specific stubs. */ -#include -#include -#include #include +#include +#include #include +#include +#include #include -#include -#include +#include #include +#include + +#include "dp_test.h" -static bool starts_with_proc_or_sys(const char *path) +/* Same as PL_DLL_LOC */ +const char *pl_path = PKGLIB_DIR"/pipeline/plugins"; + +static bool starts_with(const char *needle, const char *haystack) { - if (!path) - return 0; + if (!needle || !haystack) + return false; + + if (strlen(needle) > strlen(haystack)) + return false; + + return !memcmp(needle, haystack, strlen(needle)); +} - return !memcmp(path, "/proc", strlen("/proc")) || - !memcmp(path, "/sys", strlen("/sys")) || - !memcmp(path, "/run", strlen("/run")); +static bool path_needs_redirected(const char *path) +{ + bool match = false; + + if (starts_with("/run", path)) + match = true; + + if (!from_external) + /* + * Don't redirect paths if running in external mode as these + * need to be picked up from where the dev package put them + */ + if (starts_with(pl_path, path)) + match = true; + + return match; } static int redirect_path(const char *orig_path, char *redir_path, size_t size) { - if (size < strlen("tests/whole_dp/dummyfs/") + strlen(orig_path)) { + if (size < strlen(dp_ut_dummyfs_dir) + strlen(orig_path)) { *redir_path = 0; return -1; } - strcpy(redir_path, "tests/whole_dp/dummyfs/"); + if (strncmp(orig_path, pl_path, strlen(pl_path)) == 0) { + strcpy(redir_path, "../../build/src/pipeline/nodes/sample"); + strcat(redir_path, orig_path + strlen(pl_path)); + return 0; + } + + strcpy(redir_path, dp_ut_dummyfs_dir); strcat(redir_path, orig_path); return 0; @@ -54,13 +85,11 @@ int open64(const char *file, int oflag, ...) va_start(ap, oflag); /* - * Redirect /proc and /sys to our dummy filesystem - * * To avoid unnecessary complexity, we don't deal with the * O_CREAT or O_TMPFILE semantics of calling real_open64. */ if (!(oflag & O_CREAT) && !(oflag & O_TMPFILE) && - starts_with_proc_or_sys(file) && + path_needs_redirected(file) && !redirect_path(file, redirfile, sizeof(redirfile))) { ret = real_open64(redirfile, oflag); } else { @@ -82,10 +111,7 @@ DIR *opendir(const char *name) if (!real_opendir) real_opendir = dlsym(RTLD_NEXT, "opendir"); - /* - * Redirect /proc and /sys to our dummy filesystem - */ - if (starts_with_proc_or_sys(name)) { + if (path_needs_redirected(name)) { char redirname[PATH_MAX]; if (!redirect_path(name, redirname, sizeof(redirname))) @@ -104,10 +130,7 @@ FILE *fopen(const char *__restrict filename, if (!real_fopen) real_fopen = dlsym(RTLD_NEXT, "fopen"); - /* - * Redirect /proc and /sys to our dummy filesystem - */ - if (starts_with_proc_or_sys(filename)) { + if (path_needs_redirected(filename)) { char redirfile[PATH_MAX]; if (!redirect_path(filename, redirfile, sizeof(redirfile))) @@ -126,10 +149,7 @@ FILE *fopen64(const char *__restrict filename, if (!real_fopen64) real_fopen64 = dlsym(RTLD_NEXT, "fopen64"); - /* - * Redirect /proc and /sys to our dummy filesystem - */ - if (starts_with_proc_or_sys(filename)) { + if (path_needs_redirected(filename)) { char redirfile[PATH_MAX]; if (!redirect_path(filename, redirfile, sizeof(redirfile))) @@ -146,7 +166,7 @@ int access(const char *name, int type) if (!real_access) real_access = dlsym(RTLD_NEXT, "access"); - if (starts_with_proc_or_sys(name)) { + if (path_needs_redirected(name)) { char redirname[PATH_MAX]; if (!redirect_path(name, redirname, sizeof(redirname))) @@ -156,6 +176,7 @@ int access(const char *name, int type) return real_access(name, type); } +/* NOLINTNEXTLINE(readability-inconsistent-declaration-parameter-name) */ int __xstat(int ver, const char *pathname, struct stat *buf) { static int (*real_xstat)(int ver, const char *pathname, @@ -164,10 +185,7 @@ int __xstat(int ver, const char *pathname, struct stat *buf) if (!real_xstat) real_xstat = dlsym(RTLD_NEXT, "__xstat"); - /* - * Redirect /proc and /sys to our dummy filesystem - */ - if (starts_with_proc_or_sys(pathname)) { + if (path_needs_redirected(pathname)) { char redirname[PATH_MAX]; if (!redirect_path(pathname, redirname, sizeof(redirname))) @@ -177,6 +195,7 @@ int __xstat(int ver, const char *pathname, struct stat *buf) return real_xstat(ver, pathname, buf); } +/* NOLINTNEXTLINE(readability-inconsistent-declaration-parameter-name) */ int __xstat64(int ver, const char *pathname, struct stat64 *buf) { static int (*real_xstat64)(int ver, const char *pathname, @@ -185,10 +204,7 @@ int __xstat64(int ver, const char *pathname, struct stat64 *buf) if (!real_xstat64) real_xstat64 = dlsym(RTLD_NEXT, "__xstat64"); - /* - * Redirect /proc and /sys to our dummy filesystem - */ - if (starts_with_proc_or_sys(pathname)) { + if (path_needs_redirected(pathname)) { char redirname[PATH_MAX]; if (!redirect_path(pathname, redirname, sizeof(redirname))) @@ -197,3 +213,21 @@ int __xstat64(int ver, const char *pathname, struct stat64 *buf) return real_xstat64(ver, pathname, buf); } + +/* NOLINTNEXTLINE(readability-inconsistent-declaration-parameter-name) */ +void *dlopen(const char *filename, int flags) +{ + static void *(*real_dlopen)(const char *filename, int flags); + + if (!real_dlopen) + real_dlopen = dlsym(RTLD_NEXT, "dlopen"); + + if (path_needs_redirected(filename)) { + char redirname[PATH_MAX]; + + if (!redirect_path(filename, redirname, sizeof(redirname))) + return real_dlopen(redirname, flags); + } + + return real_dlopen(filename, flags); +} diff --git a/tests/whole_dp/src/dp_test_switch.c b/tests/whole_dp/src/dp_test_switch.c index 2a1fb2c7..bcbf53dc 100644 --- a/tests/whole_dp/src/dp_test_switch.c +++ b/tests/whole_dp/src/dp_test_switch.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2018-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2018 ATT, Inc. * All rights reserved. * @@ -11,19 +11,17 @@ #include #include "ether.h" #include "dp_test.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_pkt.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "fal_plugin_framer.h" #include "ip_funcs.h" #include "in_cksum.h" #include "fal.h" -#define DP_TEST_FAL_PLUGIN ".libs/fal_plugin_test.so" - static int (*fal_plugin_add_hdr)(const char *name, struct rte_mbuf *mbuf); static int (*fal_plugin_rx_qdirect)(const char *name, struct rte_mbuf *mbuf); static bool (*fal_plugin_enable_rx_framer)(bool enabled); @@ -32,7 +30,7 @@ static int (*fal_plugin_bp_from_swport)(const char *name, uint16_t *dpdk_port); static void dp_test_load_fal_plugin(void) { - void *lib = dlopen(DP_TEST_FAL_PLUGIN, RTLD_LAZY); + void *lib = dlopen(platform_cfg.fal_plugin, RTLD_LAZY); dp_test_assert_internal(lib); fal_plugin_add_hdr = dlsym(lib, "fal_plugin_add_ut_framer_hdr"); @@ -52,7 +50,7 @@ dp_test_edsa_tag_insert(struct rte_mbuf *pak, const char *port_name, bool tx) if (tx) return; - struct ether_hdr *eh = ethhdr(pak); + struct rte_ether_hdr *eh = ethhdr(pak); struct edsa_hdr *edsa = (struct edsa_hdr *)&eh->ether_type; DSA_SET_TAG_TYPE(edsa, DSA_TAG_TYPE_FORWARD); @@ -107,10 +105,10 @@ DP_START_TEST(switch_unicast, switch_unicast_tx) mac_a = "00:00:a4:00:33:aa"; mac_b = "00:00:a4:00:44:bb"; - dp_test_netlink_set_interface_l2("sw_port_0_0"); + dp_test_netlink_set_interface_l2("dp1sw_port_0_0"); dp_test_intf_switch_create("switch0"); - dp_test_intf_switch_add_port("switch0", "sw_port_0_0"); + dp_test_intf_switch_add_port("switch0", "dp1sw_port_0_0"); dp_test_intf_switch_add_port("switch0", "dp2T1"); /* @@ -126,16 +124,18 @@ DP_START_TEST(switch_unicast, switch_unicast_tx) * Transparent bridging so we expect pak to be identical. */ exp = dp_test_exp_create(test_pak); - dp_test_edsa_tag_insert(dp_test_exp_get_pak(exp), "sw_port_0_0", 1); + dp_test_edsa_tag_insert(dp_test_exp_get_pak(exp), "dp1sw_port_0_0", 1); dp_test_exp_set_oif_name(exp, dp_test_bp_intf_from_switch_port( - "sw_port_0_0")); + "dp1sw_port_0_0")); dp_test_pak_receive(test_pak, "dp2T1", exp); - dp_test_intf_switch_remove_port("switch0", "sw_port_0_0"); + dp_test_intf_switch_remove_port("switch0", "dp1sw_port_0_0"); dp_test_intf_switch_remove_port("switch0", "dp2T1"); dp_test_intf_switch_del("switch0"); + + dp_test_netlink_del_interface_l2("dp1sw_port_0_0"); } DP_END_TEST; DP_START_TEST(switch_unicast, switch_unicast_rx_port_0) @@ -149,8 +149,10 @@ DP_START_TEST(switch_unicast, switch_unicast_rx_port_0) mac_b = "00:00:a4:00:44:cc"; fal_plugin_enable_rx_framer(true); + dp_test_netlink_set_interface_l2("dp1sw_port_0_0"); + dp_test_intf_switch_create("switch0"); - dp_test_intf_switch_add_port("switch0", "sw_port_0_0"); + dp_test_intf_switch_add_port("switch0", "dp1sw_port_0_0"); dp_test_intf_switch_add_port("switch0", "dp2T1"); /* @@ -166,7 +168,7 @@ DP_START_TEST(switch_unicast, switch_unicast_rx_port_0) * Transparent bridging so we expect pak to be identical. */ exp = dp_test_exp_create(test_pak); - dp_test_edsa_tag_insert(test_pak, "sw_port_0_0", 0); + dp_test_edsa_tag_insert(test_pak, "dp1sw_port_0_0", 0); dp_test_exp_set_oif_name(exp, "dp2T1"); @@ -175,12 +177,14 @@ DP_START_TEST(switch_unicast, switch_unicast_rx_port_0) */ dp_test_pak_receive(test_pak, dp_test_bp_intf_from_switch_port( - "sw_port_0_0"), exp); + "dp1sw_port_0_0"), exp); - dp_test_intf_switch_remove_port("switch0", "sw_port_0_0"); + dp_test_intf_switch_remove_port("switch0", "dp1sw_port_0_0"); dp_test_intf_switch_remove_port("switch0", "dp2T1"); dp_test_intf_switch_del("switch0"); + dp_test_netlink_del_interface_l2("dp1sw_port_0_0"); + fal_plugin_enable_rx_framer(false); } DP_END_TEST; @@ -195,11 +199,13 @@ DP_START_TEST(switch_unicast, switch_unicast_tx_tagged) bridge_vlan_set_add(allowed_vlans, 10); + dp_test_netlink_set_interface_l2("dp1sw_port_0_0"); + dp_test_intf_switch_create("switch0"); dp_test_intf_bridge_enable_vlan_filter("switch0"); - dp_test_intf_switch_add_port("switch0", "sw_port_0_0"); - dp_test_intf_bridge_port_set_vlans("switch0", "sw_port_0_0", + dp_test_intf_switch_add_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_bridge_port_set_vlans("switch0", "dp1sw_port_0_0", 0, allowed_vlans, NULL); dp_test_intf_switch_add_port("switch0", "dp2T1"); dp_test_intf_bridge_port_set_vlans("switch0", "dp2T1", @@ -222,18 +228,19 @@ DP_START_TEST(switch_unicast, switch_unicast_tx_tagged) exp = dp_test_exp_create(test_pak); dp_test_switch_insert_vlan_hdr(dp_test_exp_get_pak(exp)); - dp_test_edsa_tag_insert(dp_test_exp_get_pak(exp), "sw_port_0_0", 1); + dp_test_edsa_tag_insert(dp_test_exp_get_pak(exp), "dp1sw_port_0_0", 1); dp_test_exp_set_oif_name(exp, dp_test_bp_intf_from_switch_port( - "sw_port_0_0")); + "dp1sw_port_0_0")); exp->exp_pak[0]->vlan_tci = 0; dp_test_pak_receive(test_pak, "dp2T1", exp); - dp_test_intf_switch_remove_port("switch0", "sw_port_0_0"); + dp_test_intf_switch_remove_port("switch0", "dp1sw_port_0_0"); dp_test_intf_switch_remove_port("switch0", "dp2T1"); dp_test_intf_switch_del("switch0"); + dp_test_netlink_del_interface_l2("dp1sw_port_0_0"); bridge_vlan_set_free(allowed_vlans); } DP_END_TEST; @@ -253,11 +260,13 @@ DP_START_TEST(switch_unicast, switch_unicast_rx_tagged) mac_a = "00:00:a4:00:33:dd"; mac_b = "00:00:a4:00:44:cc"; + dp_test_netlink_set_interface_l2("dp1sw_port_0_0"); + dp_test_intf_switch_create("switch0"); dp_test_intf_bridge_enable_vlan_filter("switch0"); - dp_test_intf_switch_add_port("switch0", "sw_port_0_0"); - dp_test_intf_bridge_port_set_vlans("switch0", "sw_port_0_0", + dp_test_intf_switch_add_port("switch0", "dp1sw_port_0_0"); + dp_test_intf_bridge_port_set_vlans("switch0", "dp1sw_port_0_0", 0, allowed_vlans, NULL); dp_test_intf_switch_add_port("switch0", "dp2T1"); dp_test_intf_bridge_port_set_vlans("switch0", "dp2T1", @@ -282,7 +291,7 @@ DP_START_TEST(switch_unicast, switch_unicast_rx_tagged) * the switch. */ dp_test_switch_insert_vlan_hdr(test_pak); - dp_test_edsa_tag_insert(test_pak, "sw_port_0_0", 0); + dp_test_edsa_tag_insert(test_pak, "dp1sw_port_0_0", 0); dp_test_exp_set_oif_name(exp, "dp2T1"); @@ -291,11 +300,12 @@ DP_START_TEST(switch_unicast, switch_unicast_rx_tagged) */ dp_test_pak_receive(test_pak, dp_test_bp_intf_from_switch_port( - "sw_port_0_0"), exp); + "dp1sw_port_0_0"), exp); - dp_test_intf_switch_remove_port("switch0", "sw_port_0_0"); + dp_test_intf_switch_remove_port("switch0", "dp1sw_port_0_0"); dp_test_intf_switch_remove_port("switch0", "dp2T1"); dp_test_intf_switch_del("switch0"); + dp_test_netlink_del_interface_l2("dp1sw_port_0_0"); bridge_vlan_set_free(allowed_vlans); fal_plugin_enable_rx_framer(false); @@ -311,10 +321,10 @@ DP_START_TEST(switch_unicast, switch_unicast_rx_port_7) mac_a = "00:00:a4:00:33:dd"; mac_b = "00:00:a4:00:44:cc"; - dp_test_netlink_set_interface_l2("sw_port_0_7"); + dp_test_netlink_set_interface_l2("dp1sw_port_0_7"); dp_test_intf_switch_create("switch0"); - dp_test_intf_switch_add_port("switch0", "sw_port_0_7"); + dp_test_intf_switch_add_port("switch0", "dp1sw_port_0_7"); dp_test_intf_switch_add_port("switch0", "dp2T1"); /* @@ -331,13 +341,14 @@ DP_START_TEST(switch_unicast, switch_unicast_rx_port_7) */ exp = dp_test_exp_create(test_pak); - fal_plugin_rx_qdirect("sw_port_0_7", test_pak); + fal_plugin_rx_qdirect("dp1sw_port_0_7", test_pak); dp_test_exp_set_oif_name(exp, "dp2T1"); - dp_test_pak_receive(test_pak, "sw_port_0_7", exp); + dp_test_pak_receive(test_pak, "dp1sw_port_0_7", exp); - dp_test_intf_switch_remove_port("switch0", "sw_port_0_7"); + dp_test_intf_switch_remove_port("switch0", "dp1sw_port_0_7"); dp_test_intf_switch_remove_port("switch0", "dp2T1"); dp_test_intf_switch_del("switch0"); + dp_test_netlink_del_interface_l2("dp1sw_port_0_7"); } DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_switch_vlan.c b/tests/whole_dp/src/dp_test_switch_vlan.c index 97319a9a..6250abf6 100644 --- a/tests/whole_dp/src/dp_test_switch_vlan.c +++ b/tests/whole_dp/src/dp_test_switch_vlan.c @@ -1,19 +1,19 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ #include "dp_test.h" #include "dp_test_console.h" -#include "dp_test_lib.h" +#include "dp_test_lib_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_pkt.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "dp_test_netlink_state_internal.h" -#include "bridge.h" +#include "if/bridge/bridge.h" #include "ip_funcs.h" #define dp_test_clear_vlan_stats(name) \ @@ -129,7 +129,7 @@ DP_START_TEST(switch_vlan_stats, switch_vlan_stats) dp_test_verify_vlan_stats("sw0", 10, &sw_stats_zero); dp_test_verify_vlan_stats("sw0", 10, &sw_stats); - /* Create frame from mac_a to mac_b */ + /* Create unicast frame from mac_a to mac_b */ mac_a = "00:00:a4:00:33:dd"; mac_b = "00:00:a4:00:44:cc"; test_pak = dp_test_create_8021q_l2_pak(mac_b, mac_a, 10, @@ -143,8 +143,29 @@ DP_START_TEST(switch_vlan_stats, switch_vlan_stats) sw_stats_inc = sw_stats; sw_stats_inc.rx_octets += 78; sw_stats_inc.rx_pkts++; + sw_stats_inc.rx_ucast_pkts++; sw_stats_inc.tx_octets += 78; sw_stats_inc.tx_pkts++; + sw_stats_inc.tx_ucast_pkts++; + dp_test_verify_vlan_stats("sw0", 10, &sw_stats_inc); + + /* Create non unicast frame from mac_a to mac_b */ + mac_a = "00:00:a4:00:33:dd"; + mac_b = "ff:ff:ff:ff:ff:ff"; + test_pak = dp_test_create_8021q_l2_pak(mac_b, mac_a, 10, + ETH_P_8021Q, + DP_TEST_ET_LLDP, + 1, &len); + exp = dp_test_exp_create(test_pak); + dp_test_exp_set_oif_name(exp, "dp2T2"); + + dp_test_pak_receive(test_pak, "dp2T1", exp); + sw_stats_inc.rx_octets += 78; + sw_stats_inc.rx_pkts++; + sw_stats_inc.rx_nucast_pkts++; + sw_stats_inc.tx_octets += 78; + sw_stats_inc.tx_pkts++; + sw_stats_inc.tx_nucast_pkts++; dp_test_verify_vlan_stats("sw0", 10, &sw_stats_inc); dp_test_intf_switch_remove_port("sw0", "dp2T2"); diff --git a/tests/whole_dp/src/dp_test_tcp_mss_clamp.c b/tests/whole_dp/src/dp_test_tcp_mss_clamp.c index d84afa37..84ebd369 100644 --- a/tests/whole_dp/src/dp_test_tcp_mss_clamp.c +++ b/tests/whole_dp/src/dp_test_tcp_mss_clamp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -22,11 +22,11 @@ #include "dp_test.h" #include "dp_test_controller.h" #include "dp_test_console.h" -#include "dp_test_netlink_state.h" -#include "dp_test_cmd_check.h" -#include "dp_test_lib.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test/dp_test_cmd_check.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_lib_exp.h" #include "dp_test_lib_pkt.h" #include "dp_test_lib_tcp.h" @@ -42,8 +42,6 @@ dp_test_create_and_send_tcpmss_msg(bool enable, int mtu_val) { int len; - void *buf; - TCPMSSConfig tcpmsscon = TCPMSSCONFIG__INIT; /* set values here */ @@ -67,23 +65,7 @@ dp_test_create_and_send_tcpmss_msg(bool enable, tcpmssconfig__pack(&tcpmsscon, buf2); - DataplaneEnvelope msg = DATAPLANE_ENVELOPE__INIT; - msg.type = strdup("vyatta:tcp-mss"); - msg.msg.data = buf2; - msg.msg.len = len; - - len = dataplane_envelope__get_packed_size(&msg); - - buf = malloc(len); - dp_test_assert_internal(buf); - - dataplane_envelope__pack(&msg, buf); - - free(buf2); - free(msg.type); - - dp_test_send_config_src_pb(dp_test_cont_src_get(), buf, len); - free(buf); + dp_test_lib_pb_wrap_and_send_pb("vyatta:tcp-mss", buf2, len); } /* @@ -248,7 +230,7 @@ dp_test_tcp_mss_opt(uint8_t flags, uint8_t *opts, */ static void dp_test_tcp_test_cb(const char *str, - uint pktno, enum dp_test_tcp_dir dir, + uint pktno, bool forw, uint8_t flags, struct dp_test_pkt_desc_t *pre, struct dp_test_pkt_desc_t *post, @@ -337,122 +319,53 @@ DP_START_TEST(tcp_mss_clamp_ipv4, test1) "aa:bb:cc:18:0:1"); - struct dp_test_pkt_desc_t ins_pre = { - .text = "Inside pre", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "100.101.102.103", - .l2_src = "aa:bb:cc:16:0:20", - .l3_dst = "200.201.202.203", - .l2_dst = dp1T0_mac, - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 49152, - .dport = 80, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp2T1" - }; + struct dp_test_pkt_desc_t *ins_pre, *ins_post; + struct dp_test_pkt_desc_t *outs_pre, *outs_post; - struct dp_test_pkt_desc_t ins_post = { - .text = "Inside post", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "100.101.102.103", - .l2_src = dp2T1_mac, - .l3_dst = "200.201.202.203", - .l2_dst = "aa:bb:cc:18:0:1", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 49152, - .dport = 80, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp2T1" - }; + ins_pre = dpt_pdesc_v4_create( + "Inside pre", IPPROTO_TCP, + "aa:bb:cc:16:0:20", "100.101.102.103", 49152, + dp1T0_mac, "200.201.202.203", 80, + "dp1T0", "dp2T1"); - struct dp_test_pkt_desc_t outs_pre = { - .text = "Outside pre", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "200.201.202.203", - .l2_src = "aa:bb:cc:18:0:1", - .l3_dst = "100.101.102.103", - .l2_dst = dp2T1_mac, - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 80, - .dport = 49152, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp2T1", - .tx_intf = "dp1T0" - }; + ins_post = dpt_pdesc_v4_create( + "Inside post", IPPROTO_TCP, + dp2T1_mac, "100.101.102.103", 49152, + "aa:bb:cc:18:0:1", "200.201.202.203", 80, + "dp1T0", "dp2T1"); - struct dp_test_pkt_desc_t outs_post = { - .text = "Outside post", - .len = 0, - .ether_type = ETHER_TYPE_IPv4, - .l3_src = "200.201.202.203", - .l2_src = dp1T0_mac, - .l3_dst = "100.101.102.103", - .l2_dst = "aa:bb:cc:16:0:20", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 80, - .dport = 49152, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp2T1", - .tx_intf = "dp1T0" - }; + outs_pre = dpt_pdesc_v4_create( + "Outside pre", IPPROTO_TCP, + "aa:bb:cc:18:0:1", "200.201.202.203", 80, + dp2T1_mac, "100.101.102.103", 49152, + "dp2T1", "dp1T0"); + + outs_post = dpt_pdesc_v4_create( + "Outside post", IPPROTO_TCP, + dp1T0_mac, "200.201.202.203", 80, + "aa:bb:cc:16:0:20", "100.101.102.103", 49152, + "dp2T1", "dp1T0"); - struct dp_test_tcp_call tcp_call = { - .str[0] = '\0', + struct dpt_tcp_flow tcp_call = { + .text[0] = '\0', .isn = {0, 0}, - .desc[DP_DIR_FORW] = { - .pre = &ins_pre, - .post = &ins_post, + .desc[DPT_FORW] = { + .pre = ins_pre, + .pst = ins_post, }, - .desc[DP_DIR_BACK] = { - .pre = &outs_pre, - .post = &outs_post, + .desc[DPT_BACK] = { + .pre = outs_pre, + .pst = outs_post, }, .test_cb = dp_test_tcp_test_cb, .post_cb = NULL, }; - struct dp_test_tcp_flow_pkt tcp_pkt1[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_ACK, 20, NULL}, + struct dpt_tcp_flow_pkt tcp_pkt1[] = { + {DPT_FORW, TH_SYN, 0, NULL, 0, NULL}, + {DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL}, + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL}, + {DPT_BACK, TH_ACK, 20, NULL, 0, NULL}, /* call truncated ... */ }; @@ -480,7 +393,7 @@ DP_START_TEST(tcp_mss_clamp_ipv4, test1) ctx.pre_mss = 1600; ctx.mtu = 1500; - spush(tcp_call.str, sizeof(tcp_call.str), + spush(tcp_call.text, sizeof(tcp_call.text), "TCP mss clamp Test 1.1 - mtu"); /* Enable the feature and check it's there */ @@ -496,7 +409,7 @@ DP_START_TEST(tcp_mss_clamp_ipv4, test1) dp_test_wait_for_pl_feat("dp1T0", "vyatta:ipv4-tcp-mss-out", "ipv4-out"); - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), &ctx, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, &ctx, 0); /***************************************************************** @@ -511,7 +424,7 @@ DP_START_TEST(tcp_mss_clamp_ipv4, test1) /* IP + TCP (ignore options) */ ctx.l3l4_size = 20 + 20; - spush(tcp_call.str, sizeof(tcp_call.str), + spush(tcp_call.text, sizeof(tcp_call.text), "TCP mss clamp Test 1.2 - mtu-minus"); /* Enable the feature and check it's there */ @@ -527,7 +440,7 @@ DP_START_TEST(tcp_mss_clamp_ipv4, test1) dp_test_wait_for_pl_feat("dp1T0", "vyatta:ipv4-tcp-mss-out", "ipv4-out"); - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), &ctx, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, &ctx, 0); /***************************************************************** @@ -542,7 +455,7 @@ DP_START_TEST(tcp_mss_clamp_ipv4, test1) /* IP + TCP */ ctx.l3l4_size = 20 + 20; - spush(tcp_call.str, sizeof(tcp_call.str), + spush(tcp_call.text, sizeof(tcp_call.text), "TCP mss clamp Test 1.3 - limit on input interface"); /* Enable the feature and check it's there */ @@ -558,7 +471,7 @@ DP_START_TEST(tcp_mss_clamp_ipv4, test1) dp_test_wait_for_pl_feat("dp1T0", "vyatta:ipv4-tcp-mss-out", "ipv4-out"); - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), &ctx, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, &ctx, 0); /* Disable the feature */ @@ -594,7 +507,7 @@ DP_START_TEST(tcp_mss_clamp_ipv4, test1) /* IP + TCP */ ctx.l3l4_size = 20 + 20; - spush(tcp_call.str, sizeof(tcp_call.str), + spush(tcp_call.text, sizeof(tcp_call.text), "TCP mss clamp Test 1.4 - limit on output interface"); /* Enable the feature and check it's there */ @@ -609,7 +522,7 @@ DP_START_TEST(tcp_mss_clamp_ipv4, test1) dp_test_wait_for_pl_feat("dp2T1", "vyatta:ipv4-tcp-mss-out", "ipv4-out"); - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), &ctx, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, &ctx, 0); /* Disable the feature */ @@ -639,7 +552,7 @@ DP_START_TEST(tcp_mss_clamp_ipv4, test1) /* IP + TCP */ ctx.l3l4_size = 20 + 20; - spush(tcp_call.str, sizeof(tcp_call.str), + spush(tcp_call.text, sizeof(tcp_call.text), "TCP mss clamp Test 1.5 - limit on both interfaces"); /* Enable the feature on dp1T0 and check it's there */ @@ -668,7 +581,7 @@ DP_START_TEST(tcp_mss_clamp_ipv4, test1) dp_test_wait_for_pl_feat("dp2T1", "vyatta:ipv4-tcp-mss-out", "ipv4-out"); - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), &ctx, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, &ctx, 0); /* Disable the feature on dp1T0 */ @@ -699,6 +612,11 @@ DP_START_TEST(tcp_mss_clamp_ipv4, test1) "ipv4-out"); + free(ins_pre); + free(ins_post); + free(outs_pre); + free(outs_post); + /************************************************************* * Cleanup *************************************************************/ @@ -739,123 +657,53 @@ DP_START_TEST(tcp_mss_clamp_ipv6, test1) dp_test_netlink_add_neigh("dp2T1", "2002:2:2::1", "aa:bb:cc:dd:2:b1"); - - struct dp_test_pkt_desc_t ins_pre = { - .text = "Inside pre", - .len = 0, - .ether_type = ETHER_TYPE_IPv6, - .l3_src = "2001:1:1::2", - .l2_src = "aa:bb:cc:dd:1:a1", - .l3_dst = "2002:2:2::1", - .l2_dst = dp1T0_mac, - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 0xDEAD, - .dport = 0xBEEF, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp2T1" - }; - - struct dp_test_pkt_desc_t ins_post = { - .text = "Inside post", - .len = 0, - .ether_type = ETHER_TYPE_IPv6, - .l3_src = "2001:1:1::2", - .l2_src = dp2T1_mac, - .l3_dst = "2002:2:2::1", - .l2_dst = "aa:bb:cc:dd:2:b1", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 0xDEAD, - .dport = 0xBEEF, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp1T0", - .tx_intf = "dp2T1" - }; - - struct dp_test_pkt_desc_t outs_pre = { - .text = "Outside pre", - .len = 0, - .ether_type = ETHER_TYPE_IPv6, - .l3_src = "2002:2:2::1", - .l2_src = "aa:bb:cc:dd:2:b1", - .l3_dst = "2001:1:1::2", - .l2_dst = dp2T1_mac, - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 0xBEEF, - .dport = 0xDEAD, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp2T1", - .tx_intf = "dp1T0" - }; - - struct dp_test_pkt_desc_t outs_post = { - .text = "Outside post", - .len = 0, - .ether_type = ETHER_TYPE_IPv6, - .l3_src = "2002:2:2::1", - .l2_src = dp1T0_mac, - .l3_dst = "2001:1:1::2", - .l2_dst = "aa:bb:cc:dd:1:a1", - .proto = IPPROTO_TCP, - .l4 = { - .tcp = { - .sport = 0xBEEF, - .dport = 0xDEAD, - .flags = 0, - .seq = 0, - .ack = 0, - .win = 8192, - .opts = NULL - } - }, - .rx_intf = "dp2T1", - .tx_intf = "dp1T0" - }; - - struct dp_test_tcp_call tcp_call = { - .str[0] = '\0', + struct dp_test_pkt_desc_t *ins_pre, *ins_post; + struct dp_test_pkt_desc_t *outs_pre, *outs_post; + + ins_pre = dpt_pdesc_v6_create( + "Inside pre", IPPROTO_TCP, + "aa:bb:cc:dd:1:a1", "2001:1:1::2", 0xDEAD, + dp1T0_mac, "2002:2:2::1", 0xBEEF, + "dp1T0", "dp2T1"); + + ins_post = dpt_pdesc_v6_create( + "Inside post", IPPROTO_TCP, + dp2T1_mac, "2001:1:1::2", 0xDEAD, + "aa:bb:cc:dd:2:b1", "2002:2:2::1", 0xBEEF, + "dp1T0", "dp2T1"); + + outs_pre = dpt_pdesc_v6_create( + "Outside pre", IPPROTO_TCP, + "aa:bb:cc:dd:2:b1", "2002:2:2::1", 0xBEEF, + dp2T1_mac, "2001:1:1::2", 0xDEAD, + "dp2T1", "dp1T0"); + + outs_post = dpt_pdesc_v6_create( + "Outside post", IPPROTO_TCP, + dp1T0_mac, "2002:2:2::1", 0xBEEF, + "aa:bb:cc:dd:1:a1", "2001:1:1::2", 0xDEAD, + "dp2T1", "dp1T0"); + + struct dpt_tcp_flow tcp_call = { + .text[0] = '\0', .isn = {0, 0}, - .desc[DP_DIR_FORW] = { - .pre = &ins_pre, - .post = &ins_post, + .desc[DPT_FORW] = { + .pre = ins_pre, + .pst = ins_post, }, - .desc[DP_DIR_BACK] = { - .pre = &outs_pre, - .post = &outs_post, + .desc[DPT_BACK] = { + .pre = outs_pre, + .pst = outs_post, }, .test_cb = dp_test_tcp_test_cb, .post_cb = NULL, }; - struct dp_test_tcp_flow_pkt tcp_pkt1[] = { - {DP_DIR_FORW, TH_SYN, 0, NULL}, - {DP_DIR_BACK, TH_SYN | TH_ACK, 0, NULL}, - {DP_DIR_FORW, TH_ACK, 0, NULL}, - {DP_DIR_BACK, TH_ACK, 20, NULL}, + struct dpt_tcp_flow_pkt tcp_pkt1[] = { + {DPT_FORW, TH_SYN, 0, NULL, 0, NULL}, + {DPT_BACK, TH_SYN | TH_ACK, 0, NULL, 0, NULL}, + {DPT_FORW, TH_ACK, 0, NULL, 0, NULL}, + {DPT_BACK, TH_ACK, 20, NULL, 0, NULL}, /* call truncated ... */ }; @@ -878,7 +726,7 @@ DP_START_TEST(tcp_mss_clamp_ipv6, test1) .l3l4_size = 40 + 20, }; - spush(tcp_call.str, sizeof(tcp_call.str), + spush(tcp_call.text, sizeof(tcp_call.text), "npf TCP mss clamp Test 2.1 - mtu"); /* Enable the feature and check it's there */ @@ -894,7 +742,7 @@ DP_START_TEST(tcp_mss_clamp_ipv6, test1) dp_test_wait_for_pl_feat("dp1T0", "vyatta:ipv6-tcp-mss-out", "ipv6-out"); - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), &ctx, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, &ctx, 0); /***************************************************************** @@ -908,7 +756,7 @@ DP_START_TEST(tcp_mss_clamp_ipv6, test1) /* IP + TCP */ ctx.l3l4_size = 40 + 20; - spush(tcp_call.str, sizeof(tcp_call.str), + spush(tcp_call.text, sizeof(tcp_call.text), "npf TCP mss clamp Test 2.2 - mtu-minus"); /* Enable the feature and check it's there */ @@ -924,7 +772,7 @@ DP_START_TEST(tcp_mss_clamp_ipv6, test1) dp_test_wait_for_pl_feat("dp1T0", "vyatta:ipv6-tcp-mss-out", "ipv6-out"); - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), &ctx, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, &ctx, 0); /***************************************************************** @@ -939,7 +787,7 @@ DP_START_TEST(tcp_mss_clamp_ipv6, test1) /* IP + TCP */ ctx.l3l4_size = 40 + 20; - spush(tcp_call.str, sizeof(tcp_call.str), + spush(tcp_call.text, sizeof(tcp_call.text), "npf TCP mss clamp Test 2.3 - limit on input interface"); /* Enable the feature and check it's there */ @@ -955,7 +803,7 @@ DP_START_TEST(tcp_mss_clamp_ipv6, test1) dp_test_wait_for_pl_feat("dp1T0", "vyatta:ipv6-tcp-mss-out", "ipv6-out"); - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), &ctx, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, &ctx, 0); /* Disable the feature */ @@ -984,7 +832,7 @@ DP_START_TEST(tcp_mss_clamp_ipv6, test1) /* IP + TCP */ ctx.l3l4_size = 40 + 20; - spush(tcp_call.str, sizeof(tcp_call.str), + spush(tcp_call.text, sizeof(tcp_call.text), "npf TCP mss clamp Test 2.4 - limit on output interface"); /* Enable the feature and check it's there */ @@ -1000,7 +848,7 @@ DP_START_TEST(tcp_mss_clamp_ipv6, test1) dp_test_wait_for_pl_feat("dp2T1", "vyatta:ipv6-tcp-mss-out", "ipv6-out"); - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), &ctx, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, &ctx, 0); /* Disable the feature */ @@ -1028,7 +876,7 @@ DP_START_TEST(tcp_mss_clamp_ipv6, test1) /* IP + TCP */ ctx.l3l4_size = 40 + 20; - spush(tcp_call.str, sizeof(tcp_call.str), + spush(tcp_call.text, sizeof(tcp_call.text), "npf TCP mss clamp Test 2.5 - limit on both interface"); /* Enable the feature on dp1T0 and check it's there */ @@ -1057,7 +905,7 @@ DP_START_TEST(tcp_mss_clamp_ipv6, test1) dp_test_wait_for_pl_feat("dp2T1", "vyatta:ipv6-tcp-mss-out", "ipv6-out"); - dp_test_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), &ctx, 0); + dpt_tcp_call(&tcp_call, tcp_pkt1, ARRAY_SIZE(tcp_pkt1), 0, 0, &ctx, 0); /* Disable the feature on dp1T0 */ @@ -1086,6 +934,11 @@ DP_START_TEST(tcp_mss_clamp_ipv6, test1) dp_test_wait_for_pl_feat_gone("dp2T1", "vyatta:ipv6-tcp-mss-out", "ipv6-out"); + free(ins_pre); + free(ins_post); + free(outs_pre); + free(outs_post); + /************************************************************* * Cleanup *************************************************************/ diff --git a/tests/whole_dp/src/dp_test_vrf.c b/tests/whole_dp/src/dp_test_vrf.c index 6e19aca1..bae1fc45 100644 --- a/tests/whole_dp/src/dp_test_vrf.c +++ b/tests/whole_dp/src/dp_test_vrf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -19,12 +19,12 @@ #include "dp_test.h" #include "dp_test_controller.h" -#include "dp_test_netlink_state.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" #include "dp_test_lib_exp.h" -#include "dp_test_pktmbuf_lib.h" -#include "vrf.h" +#include "dp_test_pktmbuf_lib_internal.h" +#include "vrf_internal.h" #define TEST_VRF 50 @@ -89,26 +89,20 @@ DP_START_TEST(vrf_if_cfg, default_stays) /* * Verify default vrf table still exists as well as new vrf. - * dp_test_default_vrf_clean_count() return includes the switch port - * count and so that needs to be deducted from expected results. */ dp_test_wait_for_vrf(TEST_VRF, - dp_test_default_vrf_clean_count() - 1 - - dp_test_intf_switch_port_count()); - dp_test_wait_for_vrf(VRF_DEFAULT_ID, - dp_test_intf_switch_port_count() + 2); + dp_test_default_vrf_clean_count() - 1); + dp_test_wait_for_vrf(VRF_DEFAULT_ID, 2); /* put an interface back into the default VRF */ for (i = 0; i < dp_test_intf_count(); i++) { dp_test_wait_for_vrf( TEST_VRF, - dp_test_default_vrf_clean_count() - i - 1 - - dp_test_intf_switch_port_count()); + dp_test_default_vrf_clean_count() - i - 1); dp_test_intf_port2name(i, if_name); dp_test_netlink_set_interface_vrf(if_name, VRF_DEFAULT_ID); - dp_test_wait_for_vrf(VRF_DEFAULT_ID, - dp_test_intf_switch_port_count() + i + 3); + dp_test_wait_for_vrf(VRF_DEFAULT_ID, i + 3); } dp_test_netlink_del_vrf(TEST_VRF, 0); @@ -301,18 +295,18 @@ DP_DECL_TEST_CASE(vrf_suite, vrf_cfg, NULL, NULL); /* * Test the scenario whereby routes for a VRF arrive before the VRF - * master link creation + * link creation * * Due to the presence of the broker, there is a chance that route * updates for a table that is the main table for a VRF could arrive - * before the link message advising the dataplane of the VRF master - * interface and its association with the table. + * before the link message advising the dataplane of the VRF device + * and its association with the table. * - * Verify that in this sequence of events when the VRF master device + * Verify that in this sequence of events when the VRF device * is signalled that the routes make it into the VRF. Just for good * measure, check that a delete and recreate works too. */ -DP_START_TEST(vrf_cfg, out_of_seq_vrfmaster_v4) +DP_START_TEST(vrf_cfg, out_of_seq_vrf_v4) { char vrf_name[IFNAMSIZ + 1]; uint32_t tableid; @@ -323,8 +317,8 @@ DP_START_TEST(vrf_cfg, out_of_seq_vrfmaster_v4) dp_test_fail_unless(ret, "maximum vrf limit reached\n"); /* Add the VRF route, although we don't know it's a VRF route yet */ - dp_test_nl_add_route_fmt(true, "tbl:%d 2.2.2.0/24 nh int:dp1T1", - tableid); + dp_test_nl_add_route_incomplete_fmt( + "tbl:%d 2.2.2.0/24 nh int:dp1T1", tableid); /* The route shouldn't be there */ dp_test_wait_for_route_gone("vrf:50 2.2.2.0/24 nh int:dp1T1", true, @@ -337,22 +331,19 @@ DP_START_TEST(vrf_cfg, out_of_seq_vrfmaster_v4) dp_test_wait_for_route("vrf:50 2.2.2.0/24 nh int:dp1T1", true); /* Delete and recreate the VRF */ - _dp_test_intf_vrf_master_delete(vrf_name, TEST_VRF, + _dp_test_intf_vrf_if_delete(vrf_name, TEST_VRF, tableid, __FILE__, __LINE__); - _dp_test_intf_vrf_master_create(vrf_name, TEST_VRF, + _dp_test_intf_vrf_if_create(vrf_name, TEST_VRF, tableid, __FILE__, __LINE__); - /* Check the route still appears in the VRF still */ - dp_test_wait_for_route("vrf:50 2.2.2.0/24 nh int:dp1T1", true); + /* Check the route has been deleted */ + dp_test_wait_for_route_gone("vrf:50 2.2.2.0/24 nh int:dp1T1", true, + __FILE__, __func__, __LINE__); - /* Now clean up and verify the route isn't deleted implicitly */ dp_test_netlink_del_vrf(TEST_VRF, 1); - - dp_test_nl_del_route_fmt(true, "tbl:%d 2.2.2.0/24 nh int:dp1T1", - tableid); } DP_END_TEST; -DP_START_TEST(vrf_cfg, out_of_seq_vrfmaster_v6) +DP_START_TEST(vrf_cfg, out_of_seq_vrf_v6) { char vrf_name[IFNAMSIZ + 1]; uint32_t tableid; @@ -363,8 +354,8 @@ DP_START_TEST(vrf_cfg, out_of_seq_vrfmaster_v6) dp_test_fail_unless(ret, "maximum vrf limit reached\n"); /* Add the VRF route, although we don't know it's a VRF route yet */ - dp_test_nl_add_route_fmt(true, "tbl:%d 2:2:2::/64 nh int:dp1T1", - tableid); + dp_test_nl_add_route_incomplete_fmt( + "tbl:%d 2:2:2::/64 nh int:dp1T1", tableid); /* The route shouldn't be there */ dp_test_wait_for_route_gone("vrf:50 2:2:2::/64 nh int:dp1T1", true, @@ -377,19 +368,16 @@ DP_START_TEST(vrf_cfg, out_of_seq_vrfmaster_v6) dp_test_wait_for_route("vrf:50 2:2:2::/64 nh int:dp1T1", true); /* Delete and recreate the VRF */ - _dp_test_intf_vrf_master_delete(vrf_name, TEST_VRF, + _dp_test_intf_vrf_if_delete(vrf_name, TEST_VRF, tableid, __FILE__, __LINE__); - _dp_test_intf_vrf_master_create(vrf_name, TEST_VRF, + _dp_test_intf_vrf_if_create(vrf_name, TEST_VRF, tableid, __FILE__, __LINE__); - /* Check the route still appears in the VRF still */ - dp_test_wait_for_route("vrf:50 2:2:2::/64 nh int:dp1T1", true); + /* Check the route has been deleted */ + dp_test_wait_for_route_gone("vrf:50 2:2:2::/64 nh int:dp1T1", true, + __FILE__, __func__, __LINE__); - /* Now clean up and verify the route isn't deleted implicitly */ dp_test_netlink_del_vrf(TEST_VRF, 1); - - dp_test_nl_del_route_fmt(true, "tbl:%d 2:2:2::/64 nh int:dp1T1", - tableid); } DP_END_TEST; DP_DECL_TEST_CASE(vrf_suite, vrf_ip6_cfg, NULL, NULL); @@ -482,7 +470,7 @@ DP_START_TEST(vrf_ip_fwd, vrf_basic_ipv4) test_pak = dp_test_create_ipv4_pak("10.73.0.0", "10.73.2.0", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -491,7 +479,7 @@ DP_START_TEST(vrf_ip_fwd, vrf_basic_ipv4) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str, dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); @@ -551,7 +539,7 @@ DP_START_TEST(vrf_ip_fwd, vrf_basic_ipv6) test_pak = dp_test_create_ipv6_pak("2010:73::", "2010:73:2::", 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T0"), - NULL, ETHER_TYPE_IPv6); + NULL, RTE_ETHER_TYPE_IPV6); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -560,7 +548,7 @@ DP_START_TEST(vrf_ip_fwd, vrf_basic_ipv6) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str, dp_test_intf_name2mac_str("dp2T1"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); dp_test_ipv6_decrement_ttl(dp_test_exp_get_pak(exp)); @@ -584,6 +572,64 @@ DP_START_TEST(vrf_ip_fwd, vrf_basic_ipv6) dp_test_netlink_del_vrf(TEST_VRF2, 0); } DP_END_TEST; +DP_DECL_TEST_CASE(vrf_suite, vrf_ip_fwd2, NULL, NULL); + +/* + * Add a v6 connected and address, and the v6 multicast route. Then add a + * neighbour. + * + * Delete the interface route, and verify that it can all be tidied. The + * multicast route is modified as part of the tidy. This maps to a set of + * updates that were seen to cause an issue in the live system. + */ +DP_START_TEST(vrf_ip_fwd2, vrf_basic_ipv6) +{ + const char *nh_mac_str; + + dp_test_netlink_add_vrf(TEST_VRF2, 1); + + + dp_test_netlink_set_interface_vrf("dp2T1", TEST_VRF2); + dp_test_netlink_set_interface_vrf("dp1T1", TEST_VRF2); + + /* Link local (ours) */ + dp_test_netlink_add_ip_address_vrf("dp1T1", + "fe80::4056:1ff:fee8:101/128", + TEST_VRF2); + + dp_test_nl_add_ip_addr_and_connected_vrf("dp1T1", "2012::1/24", + TEST_VRF2); + + /* mcast via dp1T1 and dp2T1 */ + dp_test_netlink_replace_route_nv( + "vrf:55 ff00::/8 nh int:dp1T1 nh int:dp2T1"); + + nh_mac_str = "aa:bb:cc:dd:ee:ff"; + dp_test_netlink_add_neigh("dp1T1", "2012::2", nh_mac_str); + + /* + * Now start deleting. + */ + dp_test_netlink_del_route("vrf:55 2012::/24 scope:253 nh int:dp1T1"); + + dp_test_netlink_replace_route_nv("vrf:55 ff00::/8 nh int:dp1T1"); + + dp_test_netlink_del_ip_address_vrf("dp1T1", "2012::1/24", + TEST_VRF2); + dp_test_netlink_del_neigh("dp1T1", "2012::2", nh_mac_str); + + dp_test_netlink_set_interface_vrf("dp1T1", VRF_DEFAULT_ID); + dp_test_netlink_del_ip_address_vrf("dp1T1", + "fe80::4056:1ff:fee8:101/128", + TEST_VRF2); + + dp_test_netlink_set_interface_vrf("dp2T1", VRF_DEFAULT_ID); + dp_test_netlink_set_interface_vrf("dp1T1", VRF_DEFAULT_ID); + dp_test_netlink_del_vrf(TEST_VRF2, 0); + +} DP_END_TEST; + + DP_DECL_TEST_CASE(vrf_suite, vrf_vif_ipv4, NULL, NULL) DP_START_TEST(vrf_vif_ipv4, vrf_vif_ipv4) { @@ -621,7 +667,7 @@ DP_START_TEST(vrf_vif_ipv4, vrf_vif_ipv4) 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str(l3_vif_intf), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* Create pak we expect to receive on the tx ring */ exp = dp_test_exp_create(test_pak); @@ -630,7 +676,7 @@ DP_START_TEST(vrf_vif_ipv4, vrf_vif_ipv4) (void)dp_test_pktmbuf_eth_init(dp_test_exp_get_pak(exp), nh_mac_str, dp_test_intf_name2mac_str(l3_intf_tx), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); dp_test_ipv4_decrement_ttl(dp_test_exp_get_pak(exp)); diff --git a/tests/whole_dp/src/dp_test_vti.c b/tests/whole_dp/src/dp_test_vti.c index 655c62b4..b6ac2a24 100644 --- a/tests/whole_dp/src/dp_test_vti.c +++ b/tests/whole_dp/src/dp_test_vti.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -20,15 +20,15 @@ #include "if_var.h" #include "main.h" #include "crypto/vti.h" -#include "vrf.h" +#include "vrf_internal.h" #include "dp_test.h" #include "dp_test_controller.h" -#include "dp_test_netlink_state.h" -#include "dp_test_cmd_check.h" -#include "dp_test_lib.h" -#include "dp_test_pktmbuf_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_netlink_state_internal.h" +#include "dp_test/dp_test_cmd_check.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test_pktmbuf_lib_internal.h" #include "dp_test_crypto_utils.h" #include "dp_test_lib_exp.h" @@ -326,7 +326,7 @@ static int vti_count_of_vtis(void) { int count = 0; - ifnet_walk(vti_count, &count); + dp_ifnet_walk(vti_count, &count); return count; } @@ -435,6 +435,46 @@ static struct rte_mbuf *build_expected_esp_packet(int *payload_len) NULL /* transport_hdr*/); } +/* + * build_expected_esp_icmp_unreach_packet() + * + * This helper function creates an output ESP packet containing an + * encrypted ICMP unreachable packet. + */ +static struct rte_mbuf *build_expected_esp_icmp_unreach_packet(int *payload_len) +{ + const char encrypted_payload[] = { + 0x64, 0xc8, 0x6e, 0x89, 0x53, 0x45, 0x54, 0xd6, 0xb1, 0x0c, + 0x8c, 0xca, 0xc4, 0x44, 0xbf, 0xd3, 0xae, 0x78, 0x67, 0x31, + 0x71, 0x81, 0x55, 0x53, 0xf1, 0x93, 0x8b, 0x2d, 0xf8, 0x7e, + 0x01, 0x7a, 0xc4, 0x1b, 0xd9, 0xa8, 0xd8, 0x6d, 0xd4, 0xc2, + 0xe1, 0xcb, 0x39, 0x0f, 0xc5, 0x2d, 0x8a, 0xf6, 0x3b, 0x81, + 0xad, 0x59, 0xc7, 0x48, 0xfc, 0x43, 0xeb, 0xa3, 0x83, 0x79, + 0x0b, 0x06, 0xd5, 0x12, 0x6e, 0xb0, 0xee, 0x9a, 0x50, 0x1b, + 0x6f, 0x00, 0xeb, 0x90, 0xb1, 0xd0, 0xbc, 0xd0, 0x18, 0x7a, + 0x9d, 0xd0, 0xb3, 0x9c, 0x1b, 0xaa, 0xd1, 0xde, 0x88, 0x18, + 0x7f, 0xe6, 0xc3, 0x36, 0xd3, 0x81, 0x7f, 0x33, 0xc6, 0x36, + 0x87, 0xa6, 0x93, 0x03, 0xb5, 0xef, 0x9f, 0x6a, 0xbe, 0x08, + 0x1f, 0x6e, 0x21, 0x57, 0x93, 0x07, 0xe2, 0x3e, 0x98, 0x2f, + 0x25, 0x66, 0x0d, 0x8f, 0xf6, 0x2d, 0x80, 0x6c, 0xb4, 0x29, + 0xea, 0xae, 0x74, 0xf3, 0x2d, 0x7b, 0x9e, 0x20, 0x67, 0xd3, + 0x99, 0x94, 0x0e, 0x10, 0x15, 0x18, 0x7c, 0xf5, 0x67, 0x98, + 0xfb, 0x24, 0x30, 0x1f, 0x40, 0xf0 + }; + + *payload_len = sizeof(encrypted_payload); + + return dp_test_create_esp_ipv4_pak(PORT_EAST, PEER, 1, + payload_len, + encrypted_payload, + SPI_OUTBOUND, + 1 /* seq no */, + 0 /* ip ID */, + 255 /* ttl */, + NULL /* udp/esp */, + NULL /* transport_hdr*/); +} + /* * TEST: encrypt_a_packet * @@ -458,7 +498,7 @@ DP_START_TEST(vti, encrypt_a_packet) input_packet = build_input_icmp_packet(); (void)dp_test_pktmbuf_eth_init(input_packet, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); output_packet = build_expected_esp_packet(&encrypted_payload_len); @@ -467,7 +507,7 @@ DP_START_TEST(vti, encrypt_a_packet) (void)dp_test_pktmbuf_eth_init(output_packet, PEER_MAC_ADDR, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(output_packet); rte_pktmbuf_free(output_packet); @@ -511,7 +551,7 @@ DP_START_TEST(vti_toobig, vti_toobig) test_pak = dp_test_create_ipv6_pak(CLIENT_LOCAL6, CLIENT_REMOTE6, 1, &len); dp_test_pktmbuf_eth_init(test_pak, dp_test_intf_name2mac_str("dp1T1"), - CLIENT_LOCAL_MAC_ADDR, ETHER_TYPE_IPv6); + CLIENT_LOCAL_MAC_ADDR, RTE_ETHER_TYPE_IPV6); /* * Expected ICMP response @@ -533,7 +573,7 @@ DP_START_TEST(vti_toobig, vti_toobig) (void)dp_test_pktmbuf_eth_init(icmp_pak, CLIENT_LOCAL_MAC_ADDR, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv6); + RTE_ETHER_TYPE_IPV6); in6_inner = (struct ip6_hdr *)(icmp6 + 1); in6_inner->ip6_hlim--; @@ -579,7 +619,7 @@ DP_START_TEST(vti, encrypt_a_packet_vrf) input_packet = build_input_icmp_packet(); (void)dp_test_pktmbuf_eth_init(input_packet, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); output_packet = build_expected_esp_packet(&encrypted_payload_len); @@ -588,7 +628,7 @@ DP_START_TEST(vti, encrypt_a_packet_vrf) (void)dp_test_pktmbuf_eth_init(output_packet, PEER_MAC_ADDR, dp_test_intf_name2mac_str("dp2T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(output_packet); rte_pktmbuf_free(output_packet); @@ -686,6 +726,62 @@ static struct rte_mbuf *build_expected_icmp_packet(int *payload_len) return packet; } +/* + * TEST: return_enc_icmp + * + * This test checks that an ICMP error generated as a result of processing + * a packet received on a VTI interface will be returned. + */ +DP_START_TEST(vti, return_enc_icmp) +{ + struct rte_mbuf *output_packet; + struct rte_mbuf *input_packet; + struct dp_test_expected *exp; + int encrypted_payload_len = 0; + struct iphdr *ip; + + vti_setup_tunnel(VRF_DEFAULT_ID, OUTPUT_MARK); + vti_setup_policies_and_sas(VRF_DEFAULT_ID); + dp_test_fail_unless((vti_count_of_vtis() == 1), + "Expected VTI to be created"); + + /* + * Create the input encrypted packet. + */ + input_packet = build_encrypted_input_packet(); + (void)dp_test_pktmbuf_eth_init(input_packet, + dp_test_intf_name2mac_str("dp2T2"), + NULL, RTE_ETHER_TYPE_IPV4); + + /* + * Expect an ICMP Unreachable + */ + output_packet = build_expected_esp_icmp_unreach_packet( + &encrypted_payload_len); + ip = dp_pktmbuf_mtol3(output_packet, struct iphdr *); + dp_test_set_pak_ip_field(ip, DP_TEST_SET_TOS, 0xc0); + (void)dp_test_pktmbuf_eth_init(output_packet, + PEER_MAC_ADDR, + dp_test_intf_name2mac_str("dp2T2"), + RTE_ETHER_TYPE_IPV4); + + exp = dp_test_exp_create(output_packet); + dp_test_exp_set_oif_name(exp, "dp2T2"); + rte_pktmbuf_free(output_packet); + + /* Set low MTU on outif to provoke unreachable */ + dp_test_netlink_set_interface_mtu("dp1T1", 64); + + dp_test_pak_receive(input_packet, "dp2T2", exp); + dp_test_crypto_check_sad_packets(VRF_DEFAULT_ID, 1, 84); + + dp_test_netlink_set_interface_mtu("dp1T1", 1500); + vti_teardown_tunnel(VRF_DEFAULT_ID); + vti_teardown_sas_and_policy(); + dp_test_fail_unless((vti_count_of_vtis() == 0), + "Expected VTI to be deleted"); +} DP_END_TEST; + /* * TEST: decrypt_a_packet * @@ -710,7 +806,7 @@ DP_START_TEST(vti, decrypt_a_packet) input_packet = build_encrypted_input_packet(); (void)dp_test_pktmbuf_eth_init(input_packet, dp_test_intf_name2mac_str("dp2T2"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* * Ceate the expected decrypted ping packet @@ -719,7 +815,7 @@ DP_START_TEST(vti, decrypt_a_packet) (void)dp_test_pktmbuf_eth_init(output_packet, CLIENT_LOCAL_MAC_ADDR, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* * Create an expectation for the decypted ICMP ping packet on dp1T1. @@ -767,7 +863,7 @@ DP_START_TEST(vti, decrypt_a_packet_vrf) input_packet = build_encrypted_input_packet(); (void)dp_test_pktmbuf_eth_init(input_packet, dp_test_intf_name2mac_str("dp2T2"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); /* * Ceate the expected decrypted ping packet @@ -776,7 +872,7 @@ DP_START_TEST(vti, decrypt_a_packet_vrf) (void)dp_test_pktmbuf_eth_init(output_packet, CLIENT_LOCAL_MAC_ADDR, dp_test_intf_name2mac_str("dp1T1"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); /* * Create an expectation for the decypted ICMP ping packet on dp1T1. @@ -817,7 +913,7 @@ DP_START_TEST(vti, encrypt_a_packet_bind_dp2) input_pak = build_input_icmp_packet(); (void)dp_test_pktmbuf_eth_init(input_pak, dp_test_intf_name2mac_str("dp1T1"), - NULL, ETHER_TYPE_IPv4); + NULL, RTE_ETHER_TYPE_IPV4); output_pak = build_expected_esp_packet(&encrypted_payload_len); dp_test_set_pak_ip_field(iphdr(output_pak), DP_TEST_SET_DF, 1); @@ -825,7 +921,7 @@ DP_START_TEST(vti, encrypt_a_packet_bind_dp2) PEER_MAC_ADDR, dp_test_intf_name2mac_str ("dp2T2"), - ETHER_TYPE_IPv4); + RTE_ETHER_TYPE_IPV4); exp = dp_test_exp_create(output_pak); rte_pktmbuf_free(output_pak); diff --git a/tests/whole_dp/src/dp_test_vxlan.c b/tests/whole_dp/src/dp_test_vxlan.c index 39240e75..b7a69f07 100644 --- a/tests/whole_dp/src/dp_test_vxlan.c +++ b/tests/whole_dp/src/dp_test_vxlan.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -6,8 +7,8 @@ * * dataplane UT VXLAN tests */ -#include "dp_test_lib_intf.h" -#include "dp_test_macros.h" +#include "dp_test_lib_intf_internal.h" +#include "dp_test/dp_test_macros.h" DP_DECL_TEST_SUITE(vxlan_suite); @@ -31,3 +32,15 @@ DP_START_TEST(vxlan_cfg_double, vxlan_cfg_double) dp_test_intf_vxlan_del("vxl60", 60); } DP_END_TEST; +/* Create 2 x vxlan on the same vni, 2nd creation should fail */ +DP_DECL_TEST_CASE(vxlan_suite, vxlan_cfg_duplicate, NULL, NULL); +DP_START_TEST(vxlan_cfg_duplicate, vxlan_cfg_duplicate) +{ + /* Reintroduce when we have expect failure test api */ +#if 0 + dp_test_intf_vxlan_create("vxl70", 70, "dp1T0"); + dp_test_intf_vxlan_create("vxl71", 70, "dp1T1"); /* dup vni */ + dp_test_intf_vxlan_del("vxl70", 70); + /* vxlan 71 should have failed to be created, so we dont delete it */ +#endif +} DP_END_TEST; diff --git a/tests/whole_dp/src/dp_test_wrapped_funcs.c b/tests/whole_dp/src/dp_test_wrapped_funcs.c index 617cf5bf..27c06aba 100644 --- a/tests/whole_dp/src/dp_test_wrapped_funcs.c +++ b/tests/whole_dp/src/dp_test_wrapped_funcs.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -36,8 +36,8 @@ #include "main.h" #include "dp_test.h" -#include "dp_test_lib.h" -#include "dp_test_lib_intf.h" +#include "dp_test_lib_internal.h" +#include "dp_test_lib_intf_internal.h" unsigned int proc_pagemap_readable; diff --git a/tests/whole_dp/src/dp_test_xfrm.c b/tests/whole_dp/src/dp_test_xfrm.c index f50c1eb0..d6e72c6b 100644 --- a/tests/whole_dp/src/dp_test_xfrm.c +++ b/tests/whole_dp/src/dp_test_xfrm.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2018, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2015-2016 by Brocade Communications Systems, Inc. * All rights reserved. * @@ -9,17 +9,19 @@ */ #include "dp_test.h" -#include "dp_test_lib.h" -#include "dp_test_netlink_state.h" +#include "dp_test_lib_internal.h" +#include "dp_test_netlink_state_internal.h" #include "dp_test_crypto_utils.h" #include "crypto/crypto_forward.h" +#include "dp_test_xfrm_server.h" #define LOCAL_ADDRESS "10.10.2.2" #define NETWORK_LOCAL "10.10.1.0/24" #define PEER_ADDRESS "10.10.2.3" #define NETWORK_REMOTE "10.10.3.0/24" -#define TUNNEL_REQID 1234 +#define TUNNEL_REQID_OUT 1234 +#define TUNNEL_REQID_IN 0x100000 #define TUNNEL_PRIORITY 2048 #define SPI_OUTBOUND 0x10 #define SPI_INBOUND 0x11 @@ -40,8 +42,9 @@ DP_START_TEST(xfrm_policy, create_two_policies) .dst_family = AF_INET, .dir = XFRM_POLICY_OUT, .family = AF_INET, - .reqid = TUNNEL_REQID, + .reqid = TUNNEL_REQID_OUT, .priority = TUNNEL_PRIORITY, + .rule_no = 1, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -54,8 +57,9 @@ DP_START_TEST(xfrm_policy, create_two_policies) .dst_family = AF_INET, .dir = XFRM_POLICY_IN, .family = AF_INET, - .reqid = TUNNEL_REQID, + .reqid = TUNNEL_REQID_IN, .priority = TUNNEL_PRIORITY, + .rule_no = 2, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -65,6 +69,8 @@ DP_START_TEST(xfrm_policy, create_two_policies) dp_test_crypto_create_policy(&input_policy); dp_test_crypto_create_policy(&output_policy); + dp_test_crypto_check_policy_count(VRF_DEFAULT_ID, 2, AF_INET); + dp_test_check_state_show("ipsec spd", "\"ipv4\": 2", false); dp_test_crypto_delete_policy(&input_policy); @@ -72,6 +78,8 @@ DP_START_TEST(xfrm_policy, create_two_policies) dp_test_check_state_show("ipsec spd", "\"ipv4\": 0", false); + dp_test_crypto_check_policy_count(VRF_DEFAULT_ID, 0, AF_INET); + } DP_END_TEST; DP_START_TEST(xfrm_policy, create_two_policies_vrf) @@ -86,8 +94,9 @@ DP_START_TEST(xfrm_policy, create_two_policies_vrf) .dst_family = AF_INET, .dir = XFRM_POLICY_OUT, .family = AF_INET, - .reqid = TUNNEL_REQID, + .reqid = TUNNEL_REQID_OUT, .priority = TUNNEL_PRIORITY, + .rule_no = 1, .mark = 0, .vrfid = TEST_VRF }; @@ -100,8 +109,9 @@ DP_START_TEST(xfrm_policy, create_two_policies_vrf) .dst_family = AF_INET, .dir = XFRM_POLICY_IN, .family = AF_INET, - .reqid = TUNNEL_REQID, + .reqid = TUNNEL_REQID_IN, .priority = TUNNEL_PRIORITY, + .rule_no = 2, .mark = 0, .vrfid = TEST_VRF }; @@ -116,6 +126,8 @@ DP_START_TEST(xfrm_policy, create_two_policies_vrf) dp_test_crypto_create_policy(&input_policy); dp_test_crypto_create_policy(&output_policy); + dp_test_crypto_check_policy_count(TEST_VRF, 2, AF_INET); + dp_test_check_state_show("ipsec spd", "\"ipv4\": 0", false); vrfid = dp_test_translate_vrf_id(TEST_VRF); snprintf(cmd_str, sizeof(cmd_str), "ipsec spd vrf_id %d", vrfid); @@ -124,6 +136,8 @@ DP_START_TEST(xfrm_policy, create_two_policies_vrf) dp_test_crypto_delete_policy(&input_policy); dp_test_crypto_delete_policy(&output_policy); + dp_test_crypto_check_policy_count(TEST_VRF, 0, AF_INET); + dp_test_check_state_show(cmd_str, "\"ipv4\": 0", false); dp_test_nl_del_ip_addr_and_connected_vrf("dp1T0", "16.1.1.1/24", TEST_VRF); @@ -132,7 +146,6 @@ DP_START_TEST(xfrm_policy, create_two_policies_vrf) } DP_END_TEST; DP_DECL_TEST_CASE(xfrm_suite, xfrm_sa, NULL, NULL); - /* can we create to SAs with only key and hmac args? */ DP_START_TEST(xfrm_sa, create_two_sas_crypto_only) { @@ -157,7 +170,7 @@ DP_START_TEST(xfrm_sa, create_two_sas_crypto_only) .s_addr = LOCAL_ADDRESS, .family = AF_INET, .mode = XFRM_MODE_TUNNEL, - .reqid = TUNNEL_REQID, + .reqid = TUNNEL_REQID_OUT, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -174,7 +187,7 @@ DP_START_TEST(xfrm_sa, create_two_sas_crypto_only) .s_addr = PEER_ADDRESS, .family = AF_INET, .mode = XFRM_MODE_TUNNEL, - .reqid = TUNNEL_REQID, + .reqid = TUNNEL_REQID_IN, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -184,9 +197,17 @@ DP_START_TEST(xfrm_sa, create_two_sas_crypto_only) dp_test_crypto_create_sa(&input_sa); dp_test_crypto_create_sa(&output_sa); + dp_test_xfrm_poison_sa_stats(); + dp_test_crypto_get_sa(&input_sa); + dp_test_crypto_check_xfrm_sa_cntrs(0, 0, true); + + dp_test_xfrm_poison_sa_stats(); + dp_test_crypto_check_xfrm_sa_cntrs(0, 0, false); + dp_test_check_state_show("ipsec sad", - "\"cipher\": \"CBS(AES) 128\",\n" - " \"digest\": \"hmac(sha1)\"", + "\"cipher\": \"aes-cbc\",\n" + " \"cipher_key_len\": 128,\n" + " \"digest\": \"sha1-hmac\"", false); dp_test_crypto_check_sa_count(VRF_DEFAULT_ID, 2); @@ -197,6 +218,133 @@ DP_START_TEST(xfrm_sa, create_two_sas_crypto_only) } DP_END_TEST; +DP_START_TEST(xfrm_sa, xfrm_sa_scale) +{ + const unsigned char crypto_key_128[] = { + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef}; + + const unsigned char auth_key[] = { + 0x0b, 0x1b, 0x2b, 0x3b, 0x4b, 0x5b, 0x6b, 0x7b, + 0x8b, 0x9b, 0xab, 0xbb, 0xcb, 0xeb, 0xfb, 0x1c, + 0x2c, 0x3c, 0x4c, 0x5c}; + + + struct dp_test_crypto_sa input_sa = { + .cipher_algo = CRYPTO_CIPHER_AES_CBC, + .cipher_key = crypto_key_128, + .cipher_key_len = (sizeof(crypto_key_128) * 8), + .auth_algo = CRYPTO_AUTH_HMAC_SHA1, + .auth_key = auth_key, + .auth_key_len = (sizeof(auth_key) * 8), + .spi = SPI_INBOUND, + .d_addr = LOCAL_ADDRESS, + .s_addr = PEER_ADDRESS, + .family = AF_INET, + .mode = XFRM_MODE_TUNNEL, + .reqid = TUNNEL_REQID_IN, + .mark = 0, + .vrfid = VRF_DEFAULT_ID + }; + + + struct dp_test_crypto_sa output_sa = { + .cipher_algo = CRYPTO_CIPHER_AES_CBC, + .cipher_key = crypto_key_128, + .cipher_key_len = (sizeof(crypto_key_128) * 8), + .auth_algo = CRYPTO_AUTH_HMAC_SHA1, + .auth_key = auth_key, + .auth_key_len = (sizeof(auth_key) * 8), + .spi = SPI_OUTBOUND, + .d_addr = PEER_ADDRESS, + .s_addr = LOCAL_ADDRESS, + .family = AF_INET, + .mode = XFRM_MODE_TUNNEL, + .reqid = TUNNEL_REQID_OUT, + .mark = 0, + .vrfid = VRF_DEFAULT_ID + }; + +#define PEER_BASE_OUT_ADDR 0x0a0a0203 +#define PEER_BASE_IN_ADDR 0x0b0b0203 + + dp_test_crypto_check_sa_count(VRF_DEFAULT_ID, 0); + +#define SA_INSTALL 32 + int ip_peer_addr_out = PEER_BASE_OUT_ADDR; + int ip_peer_addr_in = PEER_BASE_IN_ADDR; + + int spi_out = SPI_OUTBOUND; + int spi_in = SPI_INBOUND; + int req_id = TUNNEL_REQID_IN; + + char ip_peer_addr_str[INET_ADDRSTRLEN]; + int i, tmp_ip; + + for (i = 0; i < SA_INSTALL; i++) { + tmp_ip = htonl(ip_peer_addr_in++); + if (!inet_ntop(AF_INET, &tmp_ip, + ip_peer_addr_str, INET_ADDRSTRLEN)) + assert(0); + input_sa.s_addr = ip_peer_addr_str; + input_sa.spi = spi_in++; + input_sa.reqid = req_id; + dp_test_crypto_create_sa_verify(&input_sa, false); + + tmp_ip = htonl(ip_peer_addr_out++); + if (!inet_ntop(AF_INET, &tmp_ip, + ip_peer_addr_str, INET_ADDRSTRLEN)) + assert(0); + output_sa.d_addr = ip_peer_addr_str; + output_sa.spi = spi_out++; + output_sa.reqid = req_id++; + dp_test_crypto_create_sa_verify(&output_sa, false); + + } + + sleep(1); + printf("DONE"); + dp_test_check_state_show("ipsec sad", + "\"cipher\": \"aes-cbc\",\n" + " \"cipher_key_len\": 128,\n" + " \"digest\": \"sha1-hmac\"", + false); + + dp_test_crypto_check_sa_count(VRF_DEFAULT_ID, SA_INSTALL * 2); + + ip_peer_addr_out = PEER_BASE_OUT_ADDR; + ip_peer_addr_in = PEER_BASE_IN_ADDR; + + spi_out = SPI_OUTBOUND; + spi_in = SPI_INBOUND; + req_id = TUNNEL_REQID_IN; + + for (i = 0; i < SA_INSTALL; i++) { + tmp_ip = htonl(ip_peer_addr_in++); + if (!inet_ntop(AF_INET, &tmp_ip, + ip_peer_addr_str, INET_ADDRSTRLEN)) + assert(0); + input_sa.s_addr = ip_peer_addr_str; + input_sa.spi = spi_in++; + input_sa.reqid = req_id; + dp_test_crypto_delete_sa_verify(&input_sa, false); + + tmp_ip = htonl(ip_peer_addr_out++); + if (!inet_ntop(AF_INET, &tmp_ip, + ip_peer_addr_str, INET_ADDRSTRLEN)) + assert(0); + output_sa.d_addr = ip_peer_addr_str; + output_sa.spi = spi_out++; + output_sa.reqid = req_id++; + dp_test_crypto_delete_sa_verify(&output_sa, false); + } + + sleep(1); + + dp_test_crypto_check_sa_count(VRF_DEFAULT_ID, 0); + +} DP_END_TEST; + /* * sa_expire: Check that an XFRM_MSG_EXPIRE message removes an SA * from the dataplane if 'hard' is true, but not if it's false. @@ -210,7 +358,7 @@ DP_START_TEST(xfrm_sa, sa_expire) .s_addr = LOCAL_ADDRESS, .family = AF_INET, .mode = XFRM_MODE_TUNNEL, - .reqid = TUNNEL_REQID, + .reqid = TUNNEL_REQID_OUT, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -222,7 +370,7 @@ DP_START_TEST(xfrm_sa, sa_expire) .s_addr = PEER_ADDRESS, .family = AF_INET, .mode = XFRM_MODE_TUNNEL, - .reqid = TUNNEL_REQID, + .reqid = TUNNEL_REQID_IN, .mark = 0, .vrfid = VRF_DEFAULT_ID }; @@ -242,7 +390,7 @@ DP_START_TEST(xfrm_sa, sa_expire) dp_test_crypto_expire_sa(&output_sa, true); dp_test_crypto_check_sa_count(VRF_DEFAULT_ID, 1); - dp_test_crypto_delete_sa(&input_sa); + dp_test_crypto_flush(); dp_test_crypto_check_sa_count(VRF_DEFAULT_ID, 0); diff --git a/tests/whole_dp/src/dp_test_xfrm_server.c b/tests/whole_dp/src/dp_test_xfrm_server.c new file mode 100644 index 00000000..85f26fac --- /dev/null +++ b/tests/whole_dp/src/dp_test_xfrm_server.c @@ -0,0 +1,176 @@ +/*- + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +/* + * A test version of the xfrm broker process, running as its own thread. + */ +#include + +#include +#include +#include +#include + +#include "dp_test_controller.h" +#include "dp_test_lib_internal.h" +#include "dp_test_xfrm_server.h" +#include "util.h" +#include "zmq_dp.h" +#include "czmq.h" +#include "dp_test_crypto_utils.h" + +static int process_xfrm_actor_message(zsock_t *sock) +{ + zmsg_t *msg; + char *str; + bool stop = false; + + msg = zmsg_recv(sock); + if (!msg) + return false; + + str = zmsg_popstr(msg); + if (streq(str, "$TERM")) + stop = true; + + free(str); + zmsg_destroy(&msg); + return stop; +} + +uint32_t xfrm_seq_received; +uint32_t xfrm_ack_err; +uint64_t xfrm_bytes, xfrm_packets; +uint32_t xfrm_expire_spi; +xfrm_address_t xfrm_expire_dst; +uint16_t xfrm_expire_family; + +static void process_xfrm_ack_message(zsock_t *sock) +{ + zframe_t *msg; + struct nlmsghdr *nlh; + struct nlmsgerr *err_msg; + struct xfrm_usersa_info *sa; + struct xfrm_user_expire *expire; + bool seq_inc = true; + + msg = zframe_recv(sock); + assert(msg); + nlh = (struct nlmsghdr *)zframe_data(msg); + dp_test_assert_internal(nlh); + + /* Netlink ACK/OK are carried in Error messages*/ + switch (nlh->nlmsg_type) { + case NLMSG_ERROR: + err_msg = mnl_nlmsg_get_payload(nlh); + if (xfrm_ack_err) { + xfrm_ack_err--; + dp_test_assert_internal(err_msg->error != 0); + } else { + dp_test_assert_internal(err_msg->error == 0); + } + break; + case XFRM_MSG_NEWSA: + /* Stats request response */ + sa = mnl_nlmsg_get_payload(nlh); + dp_test_assert_internal(sa); + xfrm_bytes = sa->curlft.bytes; + xfrm_packets = sa->curlft.packets; + break; + case XFRM_MSG_EXPIRE: + expire = mnl_nlmsg_get_payload(nlh); + dp_test_assert_internal(expire); + dp_test_assert_internal(expire->state.id.proto == IPPROTO_ESP); + + xfrm_expire_family = expire->state.family; + xfrm_expire_dst = expire->state.id.daddr; + /* + * This is a autonomous message, i.e. no xfrm request + * was sent, so do not inc the xfrm_seq_received + */ + seq_inc = false; + break; + default: + dp_test_assert_internal(nlh->nlmsg_type == NLMSG_ERROR); + } + + /* Error code 0 indicates a ACK/OK else we have an error */ + + if (seq_inc) + xfrm_seq_received++; + + dp_test_assert_internal(xfrm_seq_received <= xfrm_seq); + zframe_destroy(&msg); +} + +zsock_t *xfrm_server_push_sock; +zsock_t *xfrm_server_pull_sock; + +#define XFRM_SERVER_POLL_TIMER 3000 + +void +dp_test_xfrm_server_thread_run(zsock_t *pipe, void *args) +{ + char socket_names[MAX_XFRM_SOCKET_NAME_SIZE * 2]; + char *ep_pull, *ep_push; + + pthread_setname_np(pthread_self(), "dp_test_xfrm_sv"); + + xfrm_server_push_sock = zsock_new_push(NULL); + assert(xfrm_server_push_sock); + if (zsock_bind(xfrm_server_push_sock, "%s", "ipc://*") < 0) + dp_test_abort_internal(); + ep_push = zsock_last_endpoint(xfrm_server_push_sock); + + xfrm_server_pull_sock = zsock_new_pull(NULL); + assert(xfrm_server_pull_sock); + if (zsock_bind(xfrm_server_pull_sock, "%s", "ipc://*") < 0) + dp_test_abort_internal(); + ep_pull = zsock_last_endpoint(xfrm_server_pull_sock); + + snprintf(socket_names, sizeof(socket_names), "%s %s", + ep_push, ep_pull); + + zsock_signal(pipe, 0); + zstr_send(pipe, socket_names); + free(ep_push); + free(ep_pull); + + zmq_pollitem_t items[] = { + { + .socket = zsock_resolve(pipe), + .events = ZMQ_POLLIN|ZMQ_POLLERR, + }, + { + .socket = zsock_resolve(xfrm_server_pull_sock), + .events = ZMQ_POLLIN + }, + }; + int item_count = ARRAY_SIZE(items); + + dp_test_crypto_flush(); + + while (!zsys_interrupted) { + if (zmq_poll(items, item_count, + XFRM_SERVER_POLL_TIMER * ZMQ_POLL_MSEC) < 0) { + if (errno == EINTR) + continue; + break; + } + + if (items[0].revents & ZMQ_POLLERR) + break; + + if (items[0].revents & ZMQ_POLLIN) + if (process_xfrm_actor_message(pipe)) + break; + + if (items[1].revents & ZMQ_POLLIN) + process_xfrm_ack_message(xfrm_server_pull_sock); + } + zsock_destroy(&xfrm_server_pull_sock); + zsock_destroy(&xfrm_server_push_sock); +} diff --git a/tests/whole_dp/src/dp_test_xfrm_server.h b/tests/whole_dp/src/dp_test_xfrm_server.h new file mode 100644 index 00000000..cc8c390f --- /dev/null +++ b/tests/whole_dp/src/dp_test_xfrm_server.h @@ -0,0 +1,25 @@ +/*- + * Copyright (c) 2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + +/* + * A test version of the route broker process, running as its own thread. + */ +#include + +#include +#include + +#define MAX_XFRM_SOCKET_NAME_SIZE 48 + +extern zsock_t *xfrm_server_push_sock; +extern uint32_t xfrm_seq; +extern uint32_t xfrm_seq_received; +extern uint32_t xfrm_ack_err; + +extern uint64_t xfrm_bytes; +extern uint64_t xfrm_packets; + +void dp_test_xfrm_server_thread_run(zsock_t *pipe, void *args); diff --git a/tests/whole_dp/src/fal_plugin_cpp_limiter.c b/tests/whole_dp/src/fal_plugin_cpp_limiter.c index fed77804..ced37e39 100644 --- a/tests/whole_dp/src/fal_plugin_cpp_limiter.c +++ b/tests/whole_dp/src/fal_plugin_cpp_limiter.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ @@ -51,10 +51,11 @@ static void free_limiter(struct cpp_limiter_obj *limiter) prot_obj, entries); free(prot_obj); } - free(limiter); + fal_free_deferred(limiter); } } +__FOR_EXPORT int fal_plugin_create_cpp_limiter(uint32_t attr_count, const struct fal_attribute_t *attr_list, fal_object_t *new_limiter_id) @@ -66,7 +67,7 @@ int fal_plugin_create_cpp_limiter(uint32_t attr_count, INFO("%s, attr-count: %u\n", __func__, attr_count); - limiter = calloc(1, sizeof(*limiter)); + limiter = fal_calloc(1, sizeof(*limiter)); if (!limiter) { ret = -ENOMEM; goto error; @@ -90,6 +91,8 @@ int fal_plugin_create_cpp_limiter(uint32_t attr_count, case FAL_CPP_LIMITER_ATTR_RSVP: case FAL_CPP_LIMITER_ATTR_UDP: case FAL_CPP_LIMITER_ATTR_TCP: + case FAL_CPP_LIMITER_ATTR_PIM: + case FAL_CPP_LIMITER_ATTR_IP_MC: { fal_object_t policer_obj = attr_list[i].value.objid; struct cpp_limiter_protocol_obj *protocol_obj; @@ -136,6 +139,7 @@ int fal_plugin_create_cpp_limiter(uint32_t attr_count, return ret; } +__FOR_EXPORT int fal_plugin_remove_cpp_limiter(fal_object_t limiter_id) { struct cpp_limiter_obj *limiter = (struct cpp_limiter_obj *)limiter_id; @@ -167,6 +171,7 @@ static int get_policer_obj(struct cpp_limiter_obj *limiter, return -ENOENT; } +__FOR_EXPORT int fal_plugin_get_cpp_limiter_attribute(fal_object_t limiter_id, uint32_t attr_count, struct fal_attribute_t *attr_list) @@ -197,6 +202,8 @@ int fal_plugin_get_cpp_limiter_attribute(fal_object_t limiter_id, case FAL_CPP_LIMITER_ATTR_RSVP: case FAL_CPP_LIMITER_ATTR_UDP: case FAL_CPP_LIMITER_ATTR_TCP: + case FAL_CPP_LIMITER_ATTR_PIM: + case FAL_CPP_LIMITER_ATTR_IP_MC: { fal_object_t policer_obj; ret = get_policer_obj(limiter, attr_list[i].id, @@ -258,6 +265,7 @@ static int fal_commit_cpp_limiter(fal_object_t limiter_id) * Note that the following function could be merged with any other test * versions of the function that are added. */ +__FOR_EXPORT int fal_plugin_set_switch_attribute(const struct fal_attribute_t *attr) { int ret = 0; @@ -275,3 +283,25 @@ int fal_plugin_set_switch_attribute(const struct fal_attribute_t *attr) return ret; } + +__FOR_EXPORT +int fal_plugin_get_switch_attribute(uint32_t attr_count, + struct fal_attribute_t *attr_list) +{ + struct fal_attribute_t *attr; + + for (uint32_t i = 0; i < attr_count; i++) { + attr = &attr_list[i]; + switch (attr->id) { + case FAL_SWITCH_ATTR_MAX_BURST_SIZE: + attr->value.u32 = 130048; + break; + default: + ERROR("%s(%d): unknown switch attribute %d\n", + __func__, attr_count, attr->id); + return -EINVAL; + } + } + + return 0; +} diff --git a/tests/whole_dp/src/fal_plugin_framer.c b/tests/whole_dp/src/fal_plugin_framer.c index 095cf50a..bc934947 100644 --- a/tests/whole_dp/src/fal_plugin_framer.c +++ b/tests/whole_dp/src/fal_plugin_framer.c @@ -12,45 +12,44 @@ #include "fal_plugin_framer.h" #define DSA_GET_TAG_TYPE(_x) \ - (((_x->tag[0]) & 0xc0) >> 6) + ((((_x)->tag[0]) & 0xc0) >> 6) #define DSA_GET_DEVICE(_x) \ - (_x->tag[0] & 0x1f) - + ((_x)->tag[0] & 0x1f) #define DSA_SET_DEVICE(_x, _y) \ - (_x->tag[0] |= (_y & 0x1f)) + ((_x)->tag[0] |= ((_y) & 0x1f)) #define DSA_GET_PORT(_x) \ - ((_x->tag[1] >> 3) & 0x1f) + (((_x)->tag[1] >> 3) & 0x1f) #define DSA_SET_PORT(_x, _y) \ - ((_x->tag[1]) |= (_y & 0x1f) << 3) + (((_x)->tag[1]) |= ((_y) & 0x1f) << 3) #define DSA_GET_IS_TAGGED(_x) \ - ((_x->tag[0] >> 5) & 0x1) + (((_x)->tag[0] >> 5) & 0x1) #define DSA_SET_IS_TAGGED(_x) \ - ((_x->tag[0]) |= 0x20) + (((_x)->tag[0]) |= 0x20) #define DSA_CLEAR_IS_TAGGED(_x) \ - ((_x->tag[0]) &= ~0x20) + (((_x)->tag[0]) &= ~0x20) #define DSA_GET_CFI(_x) \ - (_x->tag[1] & 0x1) + ((_x)->tag[1] & 0x1) #define DSA_SET_CFI(_x) \ - (_x->tag[1] |= 0x1) + ((_x)->tag[1] |= 0x1) -static inline struct ether_hdr *ethhdr(struct rte_mbuf *m) +static inline struct rte_ether_hdr *ethhdr(struct rte_mbuf *m) { - return rte_pktmbuf_mtod(m, struct ether_hdr *); + return rte_pktmbuf_mtod(m, struct rte_ether_hdr *); } int plugin_framer_rcv(struct rte_mbuf *mbuf, uint16_t *dpdk_port, union fal_pkt_feature_info *feat_info) { - struct ether_hdr *eh = ethhdr(mbuf); + struct rte_ether_hdr *eh = ethhdr(mbuf); struct edsa_hdr *edsa = (struct edsa_hdr *)&eh->ether_type; uint8_t hw_device, hw_port; int rc; @@ -90,9 +89,9 @@ int plugin_framer_rcv(struct rte_mbuf *mbuf, uint16_t *dpdk_port, return -1; } -int32_t plugin_framer_tx(void *sw_port, void *fal_info, struct rte_mbuf *mbuf) +int32_t plugin_framer_tx(void *sw_port, void *fal_info, struct rte_mbuf **mbuf) { - struct ether_hdr *eh = ethhdr(mbuf); + struct rte_ether_hdr *eh = ethhdr(*mbuf); uint16_t proto; uint8_t dev, port; @@ -104,7 +103,7 @@ int32_t plugin_framer_tx(void *sw_port, void *fal_info, struct rte_mbuf *mbuf) char *new_eh; struct edsa_hdr *edsa_hdr; - new_eh = rte_pktmbuf_prepend(mbuf, DSA_TAG_LEN); + new_eh = rte_pktmbuf_prepend(*mbuf, DSA_TAG_LEN); if (!new_eh) return -1; @@ -132,7 +131,7 @@ int32_t plugin_framer_tx(void *sw_port, void *fal_info, struct rte_mbuf *mbuf) char *new_eh; struct edsa_hdr *edsa_hdr; - new_eh = rte_pktmbuf_prepend(mbuf, EDSA_HLEN); + new_eh = rte_pktmbuf_prepend(*mbuf, EDSA_HLEN); memmove(new_eh, new_eh + EDSA_HLEN, 2 * ETH_ALEN); /* * Construct untagged FROM_CPU DSA tag. diff --git a/tests/whole_dp/src/fal_plugin_framer.h b/tests/whole_dp/src/fal_plugin_framer.h index 3d94d50e..399be1be 100644 --- a/tests/whole_dp/src/fal_plugin_framer.h +++ b/tests/whole_dp/src/fal_plugin_framer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. + * Copyright (c) 2018-2019, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -28,6 +28,6 @@ struct edsa_hdr { int plugin_framer_rcv(struct rte_mbuf *mbuf, uint16_t *dpdk_port, union fal_pkt_feature_info *feat_info); -int32_t plugin_framer_tx(void *sw_port, void *fal_info, struct rte_mbuf *mbuf); +int32_t plugin_framer_tx(void *sw_port, void *fal_info, struct rte_mbuf **mbuf); #endif //_FAL_PLUGIN_FAL_FRAME_H_ diff --git a/tests/whole_dp/src/fal_plugin_pm.c b/tests/whole_dp/src/fal_plugin_pm.c index 8fbfc0ee..df47a26a 100644 --- a/tests/whole_dp/src/fal_plugin_pm.c +++ b/tests/whole_dp/src/fal_plugin_pm.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018, AT&T Intellectual Property. + * Copyright (c) 2018-2019, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -7,6 +7,7 @@ #include #include +#include "compiler.h" #define LOG(l, t, ...) \ rte_log(RTE_LOG_ ## l, \ @@ -21,28 +22,33 @@ #define INFO(...) LOG(INFO, FAL_TEST, __VA_ARGS__) #define ERROR(...) LOG(ERR, FAL_TEST, __VA_ARGS__) - +__FOR_EXPORT int fal_plugin_mirror_session_create(uint32_t attr_count, const struct fal_attribute_t *attr_list, - fal_object_t *mr_obj_id) + fal_object_t *obj) { INFO("To be implemented %s\n", __func__); return 0; } -int fal_plugin_mirror_session_delete(fal_object_t mr_obj_id) + +__FOR_EXPORT +int fal_plugin_mirror_session_delete(fal_object_t obj) { INFO("To be implemented %s\n", __func__); return 0; } -int fal_plugin_mirror_session_set_attr(fal_object_t mr_obj_id, + +__FOR_EXPORT +int fal_plugin_mirror_session_set_attr(fal_object_t obj, const struct fal_attribute_t *attr) { INFO("To be implemented %s\n", __func__); return 0; } -int fal_plugin_mirror_session_get_attr(fal_object_t mr_obj_id, +__FOR_EXPORT +int fal_plugin_mirror_session_get_attr(fal_object_t obj, uint32_t attr_count, struct fal_attribute_t *attr_list) { diff --git a/tests/whole_dp/src/fal_plugin_policer.c b/tests/whole_dp/src/fal_plugin_policer.c index d89c639b..c9230726 100644 --- a/tests/whole_dp/src/fal_plugin_policer.c +++ b/tests/whole_dp/src/fal_plugin_policer.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018-2019, AT&T Intellectual Property. + * Copyright (c) 2018-2021, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -12,7 +12,8 @@ #include #include -#include "dp_test_macros.h" +#include "dp_test.h" +#include "dp_test/dp_test_macros.h" #include "fal_plugin_test.h" #define LOG(l, t, ...) \ @@ -82,8 +83,14 @@ static const char *fal_policer_attr_to_str(enum fal_policer_attr_t val) return "cbs"; case FAL_POLICER_ATTR_CIR: return "cir"; + case FAL_POLICER_ATTR_EBS: + return "ebs"; + case FAL_POLICER_ATTR_EIR: + return "eir"; case FAL_POLICER_ATTR_RED_PACKET_ACTION: return "action"; + default: + break; } assert(0); return "ERROR"; @@ -105,7 +112,7 @@ static const char *fal_policer_attr_to_str(enum fal_policer_attr_t val) * RATE_VAL3 * delete */ - +__FOR_EXPORT int fal_plugin_policer_create(uint32_t attr_count, const struct fal_attribute_t *attr_list, fal_object_t *obj) @@ -113,7 +120,7 @@ int fal_plugin_policer_create(uint32_t attr_count, uint i; struct fal_policer *policer; - policer = calloc(1, sizeof(*policer)); + policer = fal_calloc(1, sizeof(*policer)); assert(policer); DEBUG("%s start\n", __func__); for (i = 0; i < attr_count; i++) { @@ -164,7 +171,8 @@ int fal_plugin_policer_create(uint32_t attr_count, assert(policer->meter == FAL_POLICER_METER_TYPE_BYTES || policer->meter == FAL_POLICER_METER_TYPE_PACKETS); assert(policer->mode == FAL_POLICER_MODE_STORM_CTL || - policer->mode == FAL_POLICER_MODE_CPP); + policer->mode == FAL_POLICER_MODE_CPP || + policer->mode == FAL_POLICER_MODE_INGRESS); assert(policer->action == FAL_PACKET_ACTION_DROP); if (policer->rate == RATE_VAL1) @@ -176,6 +184,7 @@ int fal_plugin_policer_create(uint32_t attr_count, return 0; } +__FOR_EXPORT int fal_plugin_policer_delete(fal_object_t obj) { struct fal_policer *policer = (struct fal_policer *)obj; @@ -188,10 +197,11 @@ int fal_plugin_policer_delete(fal_object_t obj) } DEBUG("%s %p\n", __func__, (void *)obj); - free(policer); + fal_free_deferred(policer); return 0; } +__FOR_EXPORT int fal_plugin_policer_set_attr(fal_object_t obj, const struct fal_attribute_t *attr) { @@ -216,6 +226,7 @@ int fal_plugin_policer_set_attr(fal_object_t obj, return 0; } +__FOR_EXPORT int fal_plugin_policer_get_attr(fal_object_t obj, uint32_t attr_count, struct fal_attribute_t *attr_list) @@ -258,6 +269,7 @@ int fal_plugin_policer_get_attr(fal_object_t obj, return 0; } +__FOR_EXPORT int fal_plugin_policer_get_stats_ext(fal_object_t obj, uint32_t num_counters, const enum fal_policer_stat_type *cntr_ids, diff --git a/tests/whole_dp/src/fal_plugin_ptp.c b/tests/whole_dp/src/fal_plugin_ptp.c index 238dba61..365ab6ff 100644 --- a/tests/whole_dp/src/fal_plugin_ptp.c +++ b/tests/whole_dp/src/fal_plugin_ptp.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. + * Copyright (c) 2019-2020, AT&T Intellectual Property. All rights reserved. * Copyright (c) 2019 AT&T Intellectual Property. * All rights reserved. * @@ -14,8 +14,9 @@ #include #include +#include "dp_test.h" #include "json_writer.h" -#include "dp_test_macros.h" +#include "dp_test/dp_test_macros.h" #include "util.h" #define LOG(l, t, ...) \ @@ -68,9 +69,10 @@ struct fal_attribute_t *get_attribute(uint32_t id, return NULL; } +__FOR_EXPORT int fal_plugin_create_ptp_clock(uint32_t attr_count, struct fal_attribute_t *attr_list, - fal_object_t *clock_obj) + fal_object_t *clock_id) { const struct fal_attribute_t *attr; @@ -118,17 +120,26 @@ int fal_plugin_create_ptp_clock(uint32_t attr_count, dp_test_fail_unless(attr, "Must specify priority2 during create"); - *clock_obj = fal_test_plugin_ptp_next_obj++; - ptp_clock = *clock_obj; + /* if set, it will be set to 100ns */ + attr = get_attribute(FAL_PTP_CLOCK_ANTENNA_DELAY, + attr_count, + attr_list); + if (attr) + dp_test_fail_unless(attr->value.i32 == 100, + "antenna-delay should be 100 during create"); + + *clock_id = fal_test_plugin_ptp_next_obj++; + ptp_clock = *clock_id; DEBUG("created PTP clock %d, 0x%lx\n", ptp_clock_id, ptp_clock); return 0; } -int fal_plugin_dump_ptp_clock(fal_object_t clock_obj, json_writer_t *wr) +__FOR_EXPORT +int fal_plugin_dump_ptp_clock(fal_object_t clock_id, json_writer_t *wr) { - DEBUG("dump PTP clock 0x%lx\n", clock_obj); - if (clock_obj != ptp_clock) + DEBUG("dump PTP clock 0x%lx\n", clock_id); + if (clock_id != ptp_clock) return -ENODEV; jsonw_name(wr, "default-dataset"); @@ -140,21 +151,24 @@ int fal_plugin_dump_ptp_clock(fal_object_t clock_obj, json_writer_t *wr) return 0; } -int fal_plugin_delete_ptp_clock(fal_object_t clock_obj) +__FOR_EXPORT +int fal_plugin_delete_ptp_clock(fal_object_t clock_id) { - DEBUG("deleted PTP clock 0x%lx\n", clock_obj); - if (clock_obj != ptp_clock) + DEBUG("deleted PTP clock 0x%lx\n", clock_id); + if (clock_id != ptp_clock) return -ENODEV; ptp_clock = FAL_NULL_OBJECT_ID; return 0; } +__FOR_EXPORT int fal_plugin_create_ptp_port(uint32_t attr_count, struct fal_attribute_t *attr_list, - fal_object_t *port_obj) + fal_object_t *port_id) { const struct fal_attribute_t *attr; uint16_t vlan_id = 1; + int ifindex; attr = get_attribute(FAL_PTP_PORT_PORT_NUMBER, attr_count, @@ -185,12 +199,24 @@ int fal_plugin_create_ptp_port(uint32_t attr_count, if (num_ptp_ports == MAX_PTP_PORTS) return -ENOMEM; - ptp_domain_number = attr->value.u8; - *port_obj = fal_test_plugin_ptp_next_obj++; - ptp_ports[num_ptp_ports].obj_id = *port_obj; + attr = get_attribute(FAL_PTP_PORT_ADDITIONAL_PATH, + attr_count, + attr_list); + if (attr) { + ifindex = attr->value.ptp_port_path.ifindex; + dp_test_fail_unless(ifindex == 101, + "Expected ifindex 101 for additional path"); + + vlan_id = attr->value.ptp_port_path.vlan_id; + dp_test_fail_unless(vlan_id == 100, + "Expected vlan 100 for additional path"); + } + + *port_id = fal_test_plugin_ptp_next_obj++; + ptp_ports[num_ptp_ports].obj_id = *port_id; ptp_ports[num_ptp_ports].vlan_id = vlan_id; num_ptp_ports++; - DEBUG("created PTP port 0x%lx\n", *port_obj); + DEBUG("created PTP port 0x%lx\n", *port_id); return 0; } @@ -207,11 +233,12 @@ fal_plugin_find_ptp_port(fal_object_t port) return NULL; } -int fal_plugin_delete_ptp_port(fal_object_t port_obj) +__FOR_EXPORT +int fal_plugin_delete_ptp_port(fal_object_t port_id) { struct ptp_port *port; - port = fal_plugin_find_ptp_port(port_obj); + port = fal_plugin_find_ptp_port(port_id); if (!port) return -ENODEV; port->obj_id = FAL_NULL_OBJECT_ID; @@ -232,9 +259,10 @@ fal_plugin_find_ptp_peer(fal_object_t peer) return NULL; } +__FOR_EXPORT int fal_plugin_create_ptp_peer(uint32_t attr_count, struct fal_attribute_t *attr_list, - fal_object_t *peer_obj) + fal_object_t *peer_id) { const struct fal_attribute_t *attr; enum fal_ptp_peer_type_t type; @@ -264,25 +292,26 @@ int fal_plugin_create_ptp_peer(uint32_t attr_count, if (num_ptp_peers == MAX_PTP_PEERS) return -ENOMEM; - *peer_obj = fal_test_plugin_ptp_next_obj++; + *peer_id = fal_test_plugin_ptp_next_obj++; peer = &ptp_peers[num_ptp_peers++]; - peer->obj_id = *peer_obj; + peer->obj_id = *peer_id; peer->ip = ip; peer->type = type; - DEBUG("created PTP peer 0x%lx\n", *peer_obj); + DEBUG("created PTP peer 0x%lx\n", *peer_id); return 0; } -int fal_plugin_delete_ptp_peer(fal_object_t peer_obj) +__FOR_EXPORT +int fal_plugin_delete_ptp_peer(fal_object_t peer_id) { struct ptp_peer *peer; - peer = fal_plugin_find_ptp_peer(peer_obj); + peer = fal_plugin_find_ptp_peer(peer_id); if (!peer) return -ENODEV; peer->obj_id = FAL_NULL_OBJECT_ID; num_ptp_peers--; - DEBUG("deleted PTP peer 0x%lx\n", peer_obj); + DEBUG("deleted PTP peer 0x%lx\n", peer_id); return 0; } diff --git a/tests/whole_dp/src/fal_plugin_qos.c b/tests/whole_dp/src/fal_plugin_qos.c index f6d149aa..c18837fc 100644 --- a/tests/whole_dp/src/fal_plugin_qos.c +++ b/tests/whole_dp/src/fal_plugin_qos.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018-2019, AT&T Intellectual Property. + * Copyright (c) 2018-2020, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only @@ -11,6 +11,10 @@ #include #include +#include "dp_test.h" +#include "dp_test/dp_test_macros.h" +#include "fal_plugin_test.h" + #define LOG(l, t, ...) \ rte_log(RTE_LOG_ ## l, \ RTE_LOGTYPE_USER1, # t ": " __VA_ARGS__) @@ -33,12 +37,13 @@ struct fal_bcm_code_point { uint8_t queue_id; uint8_t drop_precedence; uint8_t dot1p; + uint8_t des; }; struct fal_bcm_qos_map { uint8_t map_type; struct fal_bcm_code_point code_points[FAL_QOS_MAP_DSCP_VALUES]; - bool local_priority; /* Can only be true for ingress-map types */ + bool system_default; }; struct fal_bcm_qos_queue { @@ -53,7 +58,7 @@ struct fal_bcm_qos_queue { uint8_t queue_index; uint8_t queue_type; uint8_t tc; - bool local_priority; + uint8_t designator; }; struct fal_bcm_qos_sched { @@ -77,6 +82,7 @@ struct fal_bcm_qos_sched_group { uint8_t sched_level; uint8_t max_children; uint16_t vlan; + uint8_t local_prio_des; TAILQ_HEAD(children, fal_bcm_qos_sched_group) child_list; TAILQ_HEAD(queues, fal_bcm_qos_queue) queue_list; }; @@ -97,6 +103,11 @@ struct fal_bcm_qos_wred { uint8_t filter_weight; }; +struct fal_bcm_qos_ext_buf_cntr { + uint32_t buf_free; + uint32_t dropped; +}; + /** * @brief New QOS Map * @@ -107,20 +118,19 @@ struct fal_bcm_qos_wred { * * @return 0 on success, failure status code on error */ +__FOR_EXPORT int fal_plugin_qos_new_map(fal_object_t switch_id, uint32_t attr_count, const struct fal_attribute_t *attr_list, fal_object_t *new_map_id) { uint8_t map_type = FAL_QOS_MAP_TYPE_MAX + 1; - bool local_priority = false; + bool sys_def = false; struct fal_qos_map_list_t *map_list = NULL; uint32_t i; int ret = 0; - INFO("%s, attr-count: %u\n", __func__, attr_count); - - *new_map_id = FAL_QOS_NULL_OBJECT_ID; + DEBUG("%s, attr-count: %u\n", __func__, attr_count); for (i = 0; i < attr_count; i++) { switch (attr_list[i].id) { @@ -132,8 +142,8 @@ int fal_plugin_qos_new_map(fal_object_t switch_id, map_list = attr_list[i].value.maplist; break; - case FAL_QOS_MAP_ATTR_LOCAL_PRIORITY_QUEUE: - local_priority = attr_list[i].value.booldata; + case FAL_QOS_MAP_ATTR_INGRESS_SYSTEM_DEFAULT: + sys_def = attr_list[i].value.booldata; break; default: @@ -152,14 +162,45 @@ int fal_plugin_qos_new_map(fal_object_t switch_id, __func__); return -EINVAL; } - if ((map_type == FAL_QOS_MAP_TYPE_DOT1P_TO_TC && - map_list->count != FAL_QOS_MAP_PCP_VALUES) || - (map_type == FAL_QOS_MAP_TYPE_DSCP_TO_TC && - map_list->count != FAL_QOS_MAP_DSCP_VALUES) || - (map_type == FAL_QOS_MAP_TYPE_DSCP_TO_DOT1P && - map_list->count != FAL_QOS_MAP_DSCP_VALUES)) { - ERROR("%s: mismatch between map-type (%u) and map-list count " - "(%u)\n", __func__, map_type, map_list->count); + switch (map_type) { + case FAL_QOS_MAP_TYPE_DSCP_TO_DESIGNATOR: + case FAL_QOS_MAP_TYPE_DSCP_TO_DOT1P: + *new_map_id = FAL_QOS_NULL_OBJECT_ID; + if (map_list->count != FAL_QOS_MAP_DSCP_VALUES) { + ERROR("%s: map-type (%u), expected %d values, got %d\n", + __func__, map_type, FAL_QOS_MAP_DSCP_VALUES, + map_list->count); + return -EINVAL; + } + break; + case FAL_QOS_MAP_TYPE_DOT1P_TO_DESIGNATOR: + *new_map_id = FAL_QOS_NULL_OBJECT_ID; + if (map_list->count != FAL_QOS_MAP_PCP_VALUES) { + ERROR("%s: map-type (%u), expected %d values, got %d\n", + __func__, map_type, FAL_QOS_MAP_PCP_VALUES, + map_list->count); + return -EINVAL; + } + break; + + case FAL_QOS_MAP_TYPE_DESIGNATOR_TO_DOT1P: + if (map_list->count != FAL_QOS_MAP_DES_DP_VALUES) { + ERROR("%s: map-type (%u), expected %d values, got %d\n", + __func__, map_type, + FAL_QOS_MAP_DES_DP_VALUES, + map_list->count); + return -EINVAL; + } + if (*new_map_id) { + DEBUG("%s, egress-map already setup %"PRIxPTR"\n", + __func__, *new_map_id); + return 0; + } + break; + + default: + ERROR("%s: unsupported map-type (%u)\n", __func__, + map_type); return -EINVAL; } @@ -168,31 +209,25 @@ int fal_plugin_qos_new_map(fal_object_t switch_id, uint8_t cp; uint8_t i; - map = calloc(1, sizeof(*map)); + map = fal_calloc(1, sizeof(*map)); if (!map) return -ENOMEM; map->map_type = map_type; - map->local_priority = local_priority; - if (map_type == FAL_QOS_MAP_TYPE_DOT1P_TO_TC) { - for (i = 0; i < FAL_QOS_MAP_PCP_VALUES; i++) { - cp = map_list->list[i].key.dot1p; + map->system_default = sys_def; + if (map_type == FAL_QOS_MAP_TYPE_DSCP_TO_DESIGNATOR || + map_type == FAL_QOS_MAP_TYPE_DOT1P_TO_DESIGNATOR) { + for (i = 0; i < map_list->count; i++) { + cp = map_type == + FAL_QOS_MAP_TYPE_DSCP_TO_DESIGNATOR ? + map_list->list[i].key.dscp : + map_list->list[i].key.dot1p; map->code_points[cp].tc_id = - map_list->list[i].value.tc; - map->code_points[cp].queue_id = - map_list->list[i].value.wrr; - map->code_points[cp].drop_precedence = - map_list->list[i].value.dp; - } - } else if (map_type == FAL_QOS_MAP_TYPE_DSCP_TO_TC) { - for (i = 0; i < FAL_QOS_MAP_DSCP_VALUES; i++) { - cp = map_list->list[i].key.dscp; - map->code_points[cp].tc_id = - map_list->list[i].value.tc; - map->code_points[cp].queue_id = - map_list->list[i].value.wrr; + map_list->list[i].value.des; map->code_points[cp].drop_precedence = map_list->list[i].value.dp; + map->code_points[cp].des = + map_list->list[i].value.des; } } else if (map_type == FAL_QOS_MAP_TYPE_DSCP_TO_DOT1P) { for (i = 0; i < FAL_QOS_MAP_DSCP_VALUES; i++) { @@ -201,11 +236,18 @@ int fal_plugin_qos_new_map(fal_object_t switch_id, map_list->list[i].value.dot1p; map->code_points[cp].drop_precedence = 0; } + } else if (map_type == FAL_QOS_MAP_TYPE_DESIGNATOR_TO_DOT1P) { + for (i = 0; i < map_list->count; i++) { + cp = map_list->list[i].key.des; + map->code_points[cp].dot1p = + map_list->list[i].value.dot1p; + } } else { ERROR("%s: unsupported map type: %u\n", __func__, map_type); ret = -EINVAL; - free(map); + fal_free_deferred(map); + return ret; } *new_map_id = (fal_object_t)map; } @@ -219,16 +261,17 @@ int fal_plugin_qos_new_map(fal_object_t switch_id, * * @return 0 on success, failure status code on error */ +__FOR_EXPORT int fal_plugin_qos_del_map(fal_object_t map_id) { struct fal_bcm_qos_map *map = (struct fal_bcm_qos_map *)map_id; - INFO("%s - %lx\n", __func__, map_id); + DEBUG("%s - %lx\n", __func__, map_id); if (map_id == FAL_QOS_NULL_OBJECT_ID) return -EINVAL; - free(map); + fal_free_deferred(map); return 0; } @@ -240,6 +283,7 @@ int fal_plugin_qos_del_map(fal_object_t map_id) * * @return 0 on success, failure status code on error */ +__FOR_EXPORT int fal_plugin_qos_upd_map(fal_object_t map_id, const struct fal_attribute_t *attr) { @@ -248,7 +292,7 @@ int fal_plugin_qos_upd_map(fal_object_t map_id, uint8_t cp; uint8_t i; - INFO("%s - %lx\n", __func__, map_id); + DEBUG("%s - %lx\n", __func__, map_id); if (map_id == FAL_QOS_NULL_OBJECT_ID || attr->id != FAL_QOS_MAP_ATTR_MAP_TO_VALUE_LIST) @@ -256,28 +300,24 @@ int fal_plugin_qos_upd_map(fal_object_t map_id, map_list = attr->value.maplist; - if (map->map_type == FAL_QOS_MAP_TYPE_DOT1P_TO_TC) { + if (map->map_type == FAL_QOS_MAP_TYPE_DOT1P_TO_DESIGNATOR) { if (map_list->count != FAL_QOS_MAP_PCP_VALUES) return -EINVAL; for (i = 0; i < FAL_QOS_MAP_PCP_VALUES; i++) { cp = map_list->list[i].key.dot1p; - map->code_points[cp].tc_id = map_list->list[i].value.tc; - map->code_points[cp].queue_id = - map_list->list[i].value.wrr; + map->code_points[cp].des = map_list->list[i].value.des; map->code_points[cp].drop_precedence = map_list->list[i].value.dp; } - } else if (map->map_type == FAL_QOS_MAP_TYPE_DSCP_TO_TC) { + } else if (map->map_type == FAL_QOS_MAP_TYPE_DSCP_TO_DESIGNATOR) { if (map_list->count != FAL_QOS_MAP_DSCP_VALUES) return -EINVAL; for (i = 0; i < FAL_QOS_MAP_DSCP_VALUES; i++) { cp = map_list->list[i].key.dscp; - map->code_points[cp].tc_id = - map_list->list[i].value.tc; - map->code_points[cp].queue_id = - map_list->list[i].value.wrr; + map->code_points[cp].des = + map_list->list[i].value.des; map->code_points[cp].drop_precedence = map_list->list[i].value.dp; } @@ -304,6 +344,7 @@ int fal_plugin_qos_upd_map(fal_object_t map_id, * * @return 0 on success, failure status code on error */ +__FOR_EXPORT int fal_plugin_qos_get_map_attrs(fal_object_t map_id, uint32_t attr_count, struct fal_attribute_t *attr_list) { @@ -312,7 +353,7 @@ int fal_plugin_qos_get_map_attrs(fal_object_t map_id, uint32_t attr_count, uint32_t i; int ret = 0; - INFO("%s - %lx, attr-count: %u\n", __func__, map_id, attr_count); + DEBUG("%s - %lx, attr-count: %u\n", __func__, map_id, attr_count); if (map == FAL_QOS_NULL_OBJECT_ID) return -EINVAL; @@ -327,10 +368,6 @@ int fal_plugin_qos_get_map_attrs(fal_object_t map_id, uint32_t attr_count, map_list = attr_list[i].value.maplist; break; - case FAL_QOS_MAP_ATTR_LOCAL_PRIORITY_QUEUE: - attr_list[i].value.booldata = map->local_priority; - break; - default: ERROR("%s: unknown qos map attribute-id %u\n", __func__, attr_list[i].id); @@ -340,31 +377,27 @@ int fal_plugin_qos_get_map_attrs(fal_object_t map_id, uint32_t attr_count, } if (map_list) { - if (map->map_type == FAL_QOS_MAP_TYPE_DOT1P_TO_TC) { - if (map_list->count != FAL_QOS_MAP_PCP_VALUES) + if (map->map_type == FAL_QOS_MAP_TYPE_DSCP_TO_DESIGNATOR || + map->map_type == FAL_QOS_MAP_TYPE_DOT1P_TO_DESIGNATOR) { + uint count = + map->map_type == + FAL_QOS_MAP_TYPE_DSCP_TO_DESIGNATOR ? + FAL_QOS_MAP_DSCP_VALUES : + FAL_QOS_MAP_PCP_VALUES; + + if (map_list->count != count) return -EINVAL; - for (i = 0; i < FAL_QOS_MAP_PCP_VALUES; i++) { - map_list->list[i].key.dot1p = i; - map_list->list[i].value.tc = - map->code_points[i].tc_id; - map_list->list[i].value.wrr = - map->code_points[i].queue_id; - map_list->list[i].value.dp = - map->code_points[i].drop_precedence; - } - } else if (map->map_type == FAL_QOS_MAP_TYPE_DSCP_TO_TC) { - if (map_list->count != FAL_QOS_MAP_DSCP_VALUES) - return -EINVAL; - - for (i = 0; i < FAL_QOS_MAP_DSCP_VALUES; i++) { - map_list->list[i].key.dscp = i; - map_list->list[i].value.tc = - map->code_points[i].tc_id; - map_list->list[i].value.wrr = - map->code_points[i].queue_id; + for (i = 0; i < count; i++) { + if (map->map_type == + FAL_QOS_MAP_TYPE_DSCP_TO_DESIGNATOR) + map_list->list[i].key.dscp = i; + else + map_list->list[i].key.dot1p = i; map_list->list[i].value.dp = map->code_points[i].drop_precedence; + map_list->list[i].value.des = + map->code_points[i].des; } } else { /* map->map_type == FAL_QOS_MAP_TYPE_DSCP_TO_DOT1P */ if (map_list->count != FAL_QOS_MAP_DSCP_VALUES) @@ -376,13 +409,101 @@ int fal_plugin_qos_get_map_attrs(fal_object_t map_id, uint32_t attr_count, map->code_points[i].dot1p; map_list->list[i].value.dp = map->code_points[i].drop_precedence; -; + map_list->list[i].value.des = + map->code_points[i].des; } } } return ret; } +__FOR_EXPORT +void fal_plugin_qos_dump_map(fal_object_t map_id, json_writer_t *wr) +{ + struct fal_bcm_qos_map *map = (struct fal_bcm_qos_map *)map_id; + int i; + + switch (map->map_type) { + case FAL_QOS_MAP_TYPE_DSCP_TO_DESIGNATOR: + jsonw_name(wr, "fal-qos-dscp2des"); + jsonw_start_array(wr); + for (i = 0; i < FAL_QOS_MAP_DSCP_VALUES; i++) { + int tc, dp; + + tc = map->code_points[i].tc_id; + dp = map->code_points[i].drop_precedence; + + jsonw_start_object(wr); + jsonw_uint_field(wr, "dscp", i); + jsonw_uint_field(wr, "des", tc); + jsonw_uint_field(wr, "dp", dp); + jsonw_end_object(wr); + } + + jsonw_end_array(wr); + break; + + case FAL_QOS_MAP_TYPE_DOT1P_TO_DESIGNATOR: + jsonw_name(wr, "fal-qos-dot1p2des"); + jsonw_start_array(wr); + for (i = 0; i < FAL_QOS_MAP_PCP_VALUES; i++) { + int tc, dp; + + tc = map->code_points[i].tc_id; + dp = map->code_points[i].drop_precedence; + + jsonw_start_object(wr); + jsonw_uint_field(wr, "pcp", i); + jsonw_uint_field(wr, "des", tc); + jsonw_uint_field(wr, "dp", dp); + jsonw_end_object(wr); + } + + jsonw_end_array(wr); + break; + + case FAL_QOS_MAP_TYPE_DSCP_TO_DOT1P: + jsonw_name(wr, "fal-qos-dscp2dot1p"); + jsonw_start_array(wr); + for (i = 0; i < FAL_QOS_MAP_DSCP_VALUES; i++) { + uint8_t pcp; + + pcp = map->code_points[i].dot1p; + + jsonw_start_object(wr); + jsonw_uint_field(wr, "dscp", i); + jsonw_uint_field(wr, "pcp", pcp); + jsonw_end_object(wr); + } + jsonw_end_array(wr); + break; + + case FAL_QOS_MAP_TYPE_DESIGNATOR_TO_DOT1P: + jsonw_name(wr, "fal-qos-des2dot1p"); + jsonw_start_array(wr); + for (i = 0; i < FAL_QOS_MAP_DES_DP_VALUES; i++) { + uint8_t pcp; + + pcp = map->code_points[i].dot1p; + + jsonw_start_object(wr); + jsonw_uint_field(wr, "des", + i/FAL_NUM_PACKET_COLOURS); + jsonw_uint_field(wr, "dp", + i%FAL_NUM_PACKET_COLOURS); + jsonw_uint_field(wr, "pcp", pcp); + jsonw_end_object(wr); + } + jsonw_end_array(wr); + break; + + default: + ERROR("Dump of unsupported map type\n"); + break; + } +} + + /** * @brief New QoS queue * @@ -393,6 +514,7 @@ int fal_plugin_qos_get_map_attrs(fal_object_t map_id, uint32_t attr_count, * * @return 0 on success, failure status code on error */ +__FOR_EXPORT int fal_plugin_qos_new_queue(fal_object_t switch_id, uint32_t attr_count, const struct fal_attribute_t *attr_list, fal_object_t *new_queue_id) @@ -405,12 +527,12 @@ int fal_plugin_qos_new_queue(fal_object_t switch_id, uint32_t attr_count, uint16_t queue_limit = 0; uint8_t queue_type = FAL_QOS_QUEUE_TYPE_MAX + 1; /* invalid value */ uint8_t queue_index = 0; - uint8_t tc; - bool local_priority = false; + uint8_t tc = 0; + uint8_t designator = 0; uint32_t i; int ret = 0; - INFO("%s - attr-count: %u\n", __func__, attr_count); + DEBUG("%s - attr-count: %u\n", __func__, attr_count); for (i = 0; i < attr_count; i++) { switch (attr_list[i].id) { @@ -446,8 +568,8 @@ int fal_plugin_qos_new_queue(fal_object_t switch_id, uint32_t attr_count, tc = attr_list[i].value.u8; break; - case FAL_QOS_QUEUE_ATTR_LOCAL_PRIORITY: - local_priority = attr_list[i].value.booldata; + case FAL_QOS_QUEUE_ATTR_DESIGNATOR: + designator = attr_list[i].value.u8; break; default: @@ -471,7 +593,7 @@ int fal_plugin_qos_new_queue(fal_object_t switch_id, uint32_t attr_count, if (!ret) { struct fal_bcm_qos_sched_group *parent_group; - queue = calloc(1, sizeof(*queue)); + queue = fal_calloc(1, sizeof(*queue)); if (!queue) return -ENOMEM; @@ -484,7 +606,7 @@ int fal_plugin_qos_new_queue(fal_object_t switch_id, uint32_t attr_count, queue->queue_type = queue_type; queue->queue_limit = queue_limit; queue->tc = tc; - queue->local_priority = local_priority; + queue->designator = designator; parent_group = (struct fal_bcm_qos_sched_group *)parent_id; TAILQ_INSERT_TAIL(&parent_group->queue_list, queue, peer_list); @@ -501,13 +623,14 @@ int fal_plugin_qos_new_queue(fal_object_t switch_id, uint32_t attr_count, * * @return 0 on success, failure status code on error */ +__FOR_EXPORT int fal_plugin_qos_del_queue(fal_object_t queue_id) { struct fal_bcm_qos_queue *queue = (struct fal_bcm_qos_queue *)queue_id; int ret = 0; - INFO("%s - %lx\n", __func__, queue_id); + DEBUG("%s - %lx\n", __func__, queue_id); if (!queue) return -EINVAL; @@ -524,7 +647,7 @@ int fal_plugin_qos_del_queue(fal_object_t queue_id) /* * Finally free this queue. */ - free(queue); + fal_free_deferred(queue); return ret; } @@ -536,6 +659,7 @@ int fal_plugin_qos_del_queue(fal_object_t queue_id) * * @return 0 on success, failure status code on error */ +__FOR_EXPORT int fal_plugin_qos_upd_queue(fal_object_t queue_id, const struct fal_attribute_t *attr) { @@ -544,7 +668,7 @@ int fal_plugin_qos_upd_queue(fal_object_t queue_id, struct fal_bcm_qos_sched_group *parent_group; int ret = 0; - INFO("%s: queue: %lx, attribute-id: %u, object-id: %lx\n", + DEBUG("%s: queue: %lx, attribute-id: %u, object-id: %lx\n", __func__, queue_id, attr->id, attr->value.objid); /* @@ -581,7 +705,7 @@ int fal_plugin_qos_upd_queue(fal_object_t queue_id, case FAL_QOS_QUEUE_ATTR_BUFFER_ID: case FAL_QOS_QUEUE_ATTR_QUEUE_LIMIT: case FAL_QOS_QUEUE_ATTR_TC: - case FAL_QOS_QUEUE_ATTR_LOCAL_PRIORITY: + case FAL_QOS_QUEUE_ATTR_DESIGNATOR: ERROR("%s: cannot update queue attribute-id %u\n", __func__, attr->id); ret = -EINVAL; @@ -605,6 +729,7 @@ int fal_plugin_qos_upd_queue(fal_object_t queue_id, * * @return 0 on success, failure status code on error */ +__FOR_EXPORT int fal_plugin_qos_get_queue_attrs(fal_object_t queue_id, uint32_t attr_count, struct fal_attribute_t *attr_list) { @@ -613,7 +738,7 @@ int fal_plugin_qos_get_queue_attrs(fal_object_t queue_id, uint32_t attr_count, uint32_t i; int ret = 0; - INFO("%s - %lx, attr-count: %u\n", __func__, queue_id, attr_count); + DEBUG("%s - %lx, attr-count: %u\n", __func__, queue_id, attr_count); if (!queue) return -EINVAL; @@ -652,8 +777,8 @@ int fal_plugin_qos_get_queue_attrs(fal_object_t queue_id, uint32_t attr_count, attr_list[i].value.u8 = queue->tc; break; - case FAL_QOS_QUEUE_ATTR_LOCAL_PRIORITY: - attr_list[i].value.booldata = queue->local_priority; + case FAL_QOS_QUEUE_ATTR_DESIGNATOR: + attr_list[i].value.u8 = queue->designator; break; default: @@ -676,6 +801,7 @@ int fal_plugin_qos_get_queue_attrs(fal_object_t queue_id, uint32_t attr_count, * * @return 0 on success, failure status code on error */ +__FOR_EXPORT int fal_plugin_qos_get_queue_stats(fal_object_t queue_id, uint32_t number_of_counters, const uint32_t *counter_ids, @@ -741,7 +867,7 @@ int fal_plugin_qos_get_queue_stats(fal_object_t queue_id, break; default: - ERROR("%s: unknown qos queue counter-id %u\n", + DEBUG("%s: unknown qos queue counter-id %u\n", __func__, counter_ids[i]); rv = -EINVAL; break; @@ -848,11 +974,12 @@ int fal_plugin_qos_get_queue_stats_ext(fal_object_t queue_id, * * @return 0 on success, failure status code on error */ +__FOR_EXPORT int fal_plugin_qos_clear_queue_stats(fal_object_t queue_id, uint32_t number_of_counters, const uint32_t *counter_ids) { - INFO("%s - %lx - to be implemented\n", __func__, queue_id); + DEBUG("%s - %lx - to be implemented\n", __func__, queue_id); return 0; } @@ -866,9 +993,10 @@ int fal_plugin_qos_clear_queue_stats(fal_object_t queue_id, * * @return 0 on success, failure status code on error */ +__FOR_EXPORT int fal_plugin_qos_new_scheduler(fal_object_t switch_id, uint32_t attr_count, const struct fal_attribute_t *attr_list, - fal_object_t *new_scheduler_id) + fal_object_t *new_sched_id) { struct fal_bcm_qos_sched *sched; uint8_t sched_type = FAL_QOS_SCHEDULING_TYPE_MAX + 1; /* bad value */ @@ -880,7 +1008,7 @@ int fal_plugin_qos_new_scheduler(fal_object_t switch_id, uint32_t attr_count, uint32_t i; int ret = 0; - INFO("%s - attr-count: %u\n", __func__, attr_count); + DEBUG("%s - attr-count: %u\n", __func__, attr_count); for (i = 0; i < attr_count; i++) { switch (attr_list[i].id) { case FAL_QOS_SCHEDULER_ATTR_SCHEDULING_TYPE: @@ -926,7 +1054,7 @@ int fal_plugin_qos_new_scheduler(fal_object_t switch_id, uint32_t attr_count, } if (!ret) { - sched = calloc(1, sizeof(*sched)); + sched = fal_calloc(1, sizeof(*sched)); if (!sched) return -ENOMEM; @@ -937,7 +1065,7 @@ int fal_plugin_qos_new_scheduler(fal_object_t switch_id, uint32_t attr_count, sched->max_bandwidth = max_bandwidth; sched->max_burst = max_burst; sched->overhead = overhead; - *new_scheduler_id = (fal_object_t)sched; + *new_sched_id = (fal_object_t)sched; } return ret; } @@ -949,16 +1077,17 @@ int fal_plugin_qos_new_scheduler(fal_object_t switch_id, uint32_t attr_count, * * @return 0 on success, failure status code on error */ -int fal_plugin_qos_del_scheduler(fal_object_t scheduler_id) +__FOR_EXPORT +int fal_plugin_qos_del_scheduler(fal_object_t sched_id) { struct fal_bcm_qos_sched *sched = - (struct fal_bcm_qos_sched *)scheduler_id; + (struct fal_bcm_qos_sched *)sched_id; - INFO("%s - %lx\n", __func__, scheduler_id); + DEBUG("%s - %lx\n", __func__, sched_id); if (!sched) return -EINVAL; - free(sched); + fal_free_deferred(sched); return 0; } @@ -970,14 +1099,15 @@ int fal_plugin_qos_del_scheduler(fal_object_t scheduler_id) * * @return 0 on success, failure status code on error */ -int fal_plugin_qos_upd_scheduler(fal_object_t scheduler_id, +__FOR_EXPORT +int fal_plugin_qos_upd_scheduler(fal_object_t sched_id, const struct fal_attribute_t *attr) { struct fal_bcm_qos_sched *sched = - (struct fal_bcm_qos_sched *)scheduler_id; + (struct fal_bcm_qos_sched *)sched_id; int ret = 0; - INFO("%s - %lx\n", __func__, scheduler_id); + DEBUG("%s - %lx\n", __func__, sched_id); if (!sched) return -EINVAL; @@ -1056,16 +1186,17 @@ int fal_plugin_qos_upd_scheduler(fal_object_t scheduler_id, * * @return 0 on success, failure status code on error */ -int fal_plugin_qos_get_scheduler_attrs(fal_object_t scheduler_id, +__FOR_EXPORT +int fal_plugin_qos_get_scheduler_attrs(fal_object_t sched_id, uint32_t attr_count, struct fal_attribute_t *attr_list) { struct fal_bcm_qos_sched *sched = - (struct fal_bcm_qos_sched *)scheduler_id; + (struct fal_bcm_qos_sched *)sched_id; uint32_t i; int ret = 0; - INFO("%s - %lx, attr-count: %u\n", __func__, scheduler_id, attr_count); + DEBUG("%s - %lx, attr-count: %u\n", __func__, sched_id, attr_count); if (!sched) return -EINVAL; @@ -1116,6 +1247,7 @@ int fal_plugin_qos_get_scheduler_attrs(fal_object_t scheduler_id, * * @return 0 on success, failure status code on error */ +__FOR_EXPORT int fal_plugin_qos_new_sched_group(fal_object_t switch_id, uint32_t attr_count, const struct fal_attribute_t *attr_list, @@ -1129,13 +1261,15 @@ int fal_plugin_qos_new_sched_group(fal_object_t switch_id, fal_object_t egress_map_id = FAL_QOS_NULL_OBJECT_ID; uint8_t max_children = 0; uint8_t sched_level; - uint16_t vlan; + uint16_t vlan = 0; bool sg_index_present = false; bool sched_level_present = false; + uint8_t lp_des; + bool lp_des_present = false; uint32_t i; int ret = 0; - INFO("%s - attr-count: %u\n", __func__, attr_count); + DEBUG("%s - attr-count: %u\n", __func__, attr_count); for (i = 0; i < attr_count; i++) { switch (attr_list[i].id) { @@ -1173,6 +1307,11 @@ int fal_plugin_qos_new_sched_group(fal_object_t switch_id, vlan = attr_list[i].value.u16; break; + case FAL_QOS_SCHED_GROUP_ATTR_LOCAL_PRIORITY_DESIGNATOR: + lp_des = attr_list[i].value.u8; + lp_des_present = true; + break; + default: ERROR("%s: unknown sched-group attribute-id %u\n", __func__, attr_list[i].id); @@ -1204,7 +1343,7 @@ int fal_plugin_qos_new_sched_group(fal_object_t switch_id, if (!ret) { struct fal_bcm_qos_sched_group *parent_group; - sched_group = calloc(1, sizeof(*sched_group)); + sched_group = fal_calloc(1, sizeof(*sched_group)); if (!sched_group) return -ENOMEM; @@ -1216,6 +1355,7 @@ int fal_plugin_qos_new_sched_group(fal_object_t switch_id, sched_group->ingress_map_id = ingress_map_id; sched_group->egress_map_id = egress_map_id; sched_group->vlan = vlan; + sched_group->local_prio_des = lp_des_present ? lp_des : -1; TAILQ_INIT(&sched_group->child_list); TAILQ_INIT(&sched_group->queue_list); if (parent_id != FAL_QOS_NULL_OBJECT_ID) { @@ -1240,13 +1380,14 @@ int fal_plugin_qos_new_sched_group(fal_object_t switch_id, * * @return 0 on success, failure status code on error */ +__FOR_EXPORT int fal_plugin_qos_del_sched_group(fal_object_t sched_group_id) { struct fal_bcm_qos_sched_group *sched_group = (struct fal_bcm_qos_sched_group *)sched_group_id; int ret = 0; - INFO("%s - %lx\n", __func__, sched_group_id); + DEBUG("%s - %lx\n", __func__, sched_group_id); if (!sched_group) return -EINVAL; @@ -1278,7 +1419,7 @@ int fal_plugin_qos_del_sched_group(fal_object_t sched_group_id) /* * Finally free this sched-group. */ - free(sched_group); + fal_free_deferred(sched_group); return ret; } @@ -1290,6 +1431,7 @@ int fal_plugin_qos_del_sched_group(fal_object_t sched_group_id) * * @return 0 on success, failure status code on error */ +__FOR_EXPORT int fal_plugin_qos_upd_sched_group(fal_object_t sched_group_id, const struct fal_attribute_t *attr) { @@ -1300,13 +1442,14 @@ int fal_plugin_qos_upd_sched_group(fal_object_t sched_group_id, if (sched_group == FAL_QOS_NULL_OBJECT_ID) return -EINVAL; - INFO("%s - %lx\n", __func__, sched_group_id); + DEBUG("%s - %lx\n", __func__, sched_group_id); switch (attr->id) { case FAL_QOS_SCHED_GROUP_ATTR_SG_INDEX: case FAL_QOS_SCHED_GROUP_ATTR_LEVEL: case FAL_QOS_SCHED_GROUP_ATTR_MAX_CHILDREN: case FAL_QOS_SCHED_GROUP_ATTR_VLAN_ID: + case FAL_QOS_SCHED_GROUP_ATTR_LOCAL_PRIORITY_DESIGNATOR: ERROR("%s: cannot update sched-group attribute-id %u\n", __func__, attr->id); ret = -EINVAL; @@ -1392,6 +1535,7 @@ uint32_t get_sched_group_child_count(struct fal_bcm_qos_sched_group *parent) * * @return 0 on success, failure status code on error */ +__FOR_EXPORT int fal_plugin_qos_get_sched_group_attrs(fal_object_t sched_group_id, uint32_t attr_count, struct fal_attribute_t *attr_list) @@ -1404,7 +1548,7 @@ int fal_plugin_qos_get_sched_group_attrs(fal_object_t sched_group_id, uint32_t child_count = 0; uint32_t i; - INFO("%s - %lx, attr-count: %u\n", + DEBUG("%s - %lx, attr-count: %u\n", __func__, sched_group_id, attr_count); if (!sched_group) @@ -1469,8 +1613,12 @@ int fal_plugin_qos_get_sched_group_attrs(fal_object_t sched_group_id, attr_list[i].value.u16 = sched_group->vlan; break; + case FAL_QOS_SCHED_GROUP_ATTR_LOCAL_PRIORITY_DESIGNATOR: + attr_list[i].value.u8 = sched_group->local_prio_des; + break; + default: - INFO("%s - attr-id %u not yet implemented\n", + DEBUG("%s - attr-id %u not yet implemented\n", __func__, attr_list[i].id); break; } @@ -1488,6 +1636,7 @@ int fal_plugin_qos_get_sched_group_attrs(fal_object_t sched_group_id, * * @return 0 on success, failure status code on error */ +__FOR_EXPORT int fal_plugin_qos_new_wred(fal_object_t switch_id, uint32_t attr_count, const struct fal_attribute_t *attr_list, fal_object_t *new_wred_id) @@ -1509,7 +1658,7 @@ int fal_plugin_qos_new_wred(fal_object_t switch_id, uint32_t attr_count, uint32_t i; int ret = 0; - INFO("%s - attr-count: %u\n", __func__, attr_count); + DEBUG("%s - attr-count: %u\n", __func__, attr_count); for (i = 0; i < attr_count; i++) { switch (attr_list[i].id) { @@ -1583,7 +1732,7 @@ int fal_plugin_qos_new_wred(fal_object_t switch_id, uint32_t attr_count, } if (!ret) { - wred = calloc(1, sizeof(*wred)); + wred = fal_calloc(1, sizeof(*wred)); if (!wred) return -ENOMEM; @@ -1613,16 +1762,17 @@ int fal_plugin_qos_new_wred(fal_object_t switch_id, uint32_t attr_count, * * @return 0 on success, failure status code on error */ +__FOR_EXPORT int fal_plugin_qos_del_wred(fal_object_t wred_id) { struct fal_bcm_qos_wred *wred = (struct fal_bcm_qos_wred *)wred_id; - INFO("%s - %lx\n", __func__, wred_id); + DEBUG("%s - %lx\n", __func__, wred_id); if (!wred) return -EINVAL; - free(wred); + fal_free_deferred(wred); return 0; } @@ -1634,10 +1784,11 @@ int fal_plugin_qos_del_wred(fal_object_t wred_id) * * @return 0 on success, failure status code on error */ +__FOR_EXPORT int fal_plugin_qos_upd_wred(fal_object_t wred_id, const struct fal_attribute_t *attr) { - INFO("%s - %lx - to be implemented\n", __func__, wred_id); + DEBUG("%s - %lx - to be implemented\n", __func__, wred_id); return 0; } @@ -1650,13 +1801,14 @@ int fal_plugin_qos_upd_wred(fal_object_t wred_id, * * @return 0 on success, failure status code on error */ +__FOR_EXPORT int fal_plugin_qos_get_wred_attrs(fal_object_t wred_id, uint32_t attr_count, struct fal_attribute_t *attr_list) { struct fal_bcm_qos_wred *wred = (struct fal_bcm_qos_wred *)wred_id; uint32_t i; - INFO("%s - %lx, attr_count: %u\n", __func__, wred_id, attr_count); + DEBUG("%s - %lx, attr_count: %u\n", __func__, wred_id, attr_count); if (!wred) return -EINVAL; @@ -1716,7 +1868,7 @@ int fal_plugin_qos_get_wred_attrs(fal_object_t wred_id, uint32_t attr_count, break; default: - INFO("%s - attr-id %u not yet implemented\n", + DEBUG("%s - attr-id %u not yet implemented\n", __func__, attr_list[i].id); break; } @@ -1724,3 +1876,36 @@ int fal_plugin_qos_get_wred_attrs(fal_object_t wred_id, uint32_t attr_count, return 0; } + +/** + * @brief Get value from hardware counter registers + * + * @param[in] cntr_ids Array for IDs of counters + * @param[in] num_cntrs Number of ID in 'cntr_ids' + * @param[inout] cntrs Array for value of counters + * + * @return 0 on success, failure otherwise + */ +__FOR_EXPORT +int fal_plugin_qos_get_counters(const uint32_t *cntr_ids, uint32_t num_cntrs, + uint64_t *cntrs) +{ + static int idx; + struct fal_bcm_qos_ext_buf_cntr buf_cntr[] = { + {50000, 0}, {3000, 0}, {50000, 0}, {50000, 1}, {3000, 0}, + {3000, 1}, {3000, 0} + }; + int array_size = ARRAY_SIZE(buf_cntr); + + for (uint32_t i = 0; i < num_cntrs; i++) { + if (cntr_ids[i] == FAL_QOS_EXTERNAL_BUFFER_COUNTER_ID) + cntrs[i] = buf_cntr[idx].buf_free; + else if (cntr_ids[i] == + FAL_QOS_EXTERNAL_BUFFER_PKT_REJECT_COUNTER_ID) + cntrs[i] = buf_cntr[idx].dropped; + } + + idx = (idx + 1) % array_size; + + return 0; +} diff --git a/tests/whole_dp/src/fal_plugin_sw_port.c b/tests/whole_dp/src/fal_plugin_sw_port.c index 41fe7df7..8f4df33d 100644 --- a/tests/whole_dp/src/fal_plugin_sw_port.c +++ b/tests/whole_dp/src/fal_plugin_sw_port.c @@ -1,13 +1,13 @@ /* - * Copyright (c) 2018, AT&T Intellectual Property. + * Copyright (c) 2018-2020, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only */ #include -#include "dp_test_lib.h" -#include "dp_test_macros.h" -#include "dp_test_netlink_state.h" +#include "dp_test_lib_internal.h" +#include "dp_test/dp_test_macros.h" +#include "dp_test_netlink_state_internal.h" #include "fal_plugin_test.h" #include #include "fal_plugin.h" @@ -101,10 +101,11 @@ eth_tx_queue_setup(struct rte_eth_dev *dev __rte_unused, return 0; } -static void +static int eth_dev_info(struct rte_eth_dev *dev __rte_unused, struct rte_eth_dev_info *dev_info __rte_unused) { + return 0; } static int @@ -114,9 +115,10 @@ eth_stats_get(struct rte_eth_dev *dev __rte_unused, return 0; } -static void +static int eth_stats_reset(struct rte_eth_dev *dev __rte_unused) { + return 0; } static void @@ -127,7 +129,7 @@ eth_mac_addr_remove(struct rte_eth_dev *dev __rte_unused, static int eth_mac_addr_add(struct rte_eth_dev *dev __rte_unused, - struct ether_addr *mac_addr __rte_unused, + struct rte_ether_addr *mac_addr __rte_unused, uint32_t index __rte_unused, uint32_t vmdq __rte_unused) { @@ -160,8 +162,8 @@ static const struct eth_dev_ops eth_ops = { .get_module_eeprom = dp_test_get_module_eeprom, }; -#define SW_PORT_0_0 "sw_port_0_0" -#define SW_PORT_0_7 "sw_port_0_7" +#define SW_PORT_0_0 "dp1sw_port_0_0" +#define SW_PORT_0_7 "dp1sw_port_0_7" static uint16_t fal_plugin_tx_backplane_cb(void *fal_info, uint16_t backplane, uint16_t port, @@ -226,7 +228,7 @@ void fal_plugin_sw_ports_create(void) fal_sw_port_0.port_id = args.dpdk_port_id; } -__externally_visible +__FOR_EXPORT bool fal_plugin_ut_enable_rx_framer(bool enabled) { uint16_t backplane; @@ -236,7 +238,8 @@ bool fal_plugin_ut_enable_rx_framer(bool enabled) plugin_framer_rcv); } -int __externally_visible +__FOR_EXPORT +int fal_plugin_add_ut_framer_hdr(const char *name, struct rte_mbuf *mbuf) { struct fal_sw_port *fal_sw_port; @@ -249,10 +252,11 @@ fal_plugin_add_ut_framer_hdr(const char *name, struct rte_mbuf *mbuf) else return -1; - return plugin_framer_tx(fal_sw_port->sw_port, fal_sw_port, mbuf); + return plugin_framer_tx(fal_sw_port->sw_port, fal_sw_port, &mbuf); } -int __externally_visible +__FOR_EXPORT +int fal_plugin_get_sw_port_info(struct fal_sw_port *fal_sw_port, uint16_t *proto, uint8_t *dev, uint8_t *port) { @@ -265,7 +269,8 @@ fal_plugin_get_sw_port_info(struct fal_sw_port *fal_sw_port, uint16_t *proto, return 0; } -int __externally_visible +__FOR_EXPORT +int fal_plugin_queue_rx_direct(const char *name, struct rte_mbuf *mbuf) { struct fal_sw_port *fal_sw_port; @@ -281,7 +286,8 @@ fal_plugin_queue_rx_direct(const char *name, struct rte_mbuf *mbuf) return sw_port_enqueue_rx_mbuf(fal_sw_port->sw_port, 0, &mbuf, 1); } -int __externally_visible +__FOR_EXPORT +int fal_plugin_backplane_from_sw_port(const char *name, uint16_t *dpdk_port) { struct fal_sw_port *fal_sw_port; diff --git a/tests/whole_dp/src/fal_plugin_sw_port.h b/tests/whole_dp/src/fal_plugin_sw_port.h index 9767eeed..c61a9aba 100644 --- a/tests/whole_dp/src/fal_plugin_sw_port.h +++ b/tests/whole_dp/src/fal_plugin_sw_port.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2018, AT&T Intellectual Property. + * Copyright (c) 2018-2019, AT&T Intellectual Property. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-only diff --git a/tests/whole_dp/src/fal_plugin_test.c b/tests/whole_dp/src/fal_plugin_test.c index e6c5dc6a..ab4d204c 100644 --- a/tests/whole_dp/src/fal_plugin_test.c +++ b/tests/whole_dp/src/fal_plugin_test.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017-2019, AT&T Intellectual Property. + * Copyright (c) 2017-2020, AT&T Intellectual Property. * All rights reserved. * Copyright (c) 2017 by Brocade Communications Systems, Inc. * All rights reserved. @@ -15,9 +15,13 @@ #include #include -#include "dp_test_macros.h" +#include "compiler.h" +#include "dp_test.h" +#include "dp_test_lib_internal.h" +#include "dp_test/dp_test_macros.h" #include "util.h" #include "fal_plugin_sw_port.h" +#include "dp_test_lib_internal.h" #define LOG(l, t, ...) \ rte_log(RTE_LOG_ ## l, \ @@ -36,26 +40,42 @@ #include "fal_plugin_test.h" -struct ether_addr; +struct rte_ether_addr; static fal_object_t fal_test_plugin_next_obj = 1; +static zhash_t *fal_test_brports; +static zhash_t *fal_test_ingressm_port; + +__FOR_EXPORT int fal_plugin_init(void) { INFO("Initializing test fal plugin\n"); + + fal_test_brports = zhash_new(); + if (!fal_test_brports) + return -ENOMEM; + + fal_test_ingressm_port = zhash_new(); + if (!fal_test_ingressm_port) + return -ENOMEM; + return 0; } +__FOR_EXPORT int fal_plugin_init_log(void) { DEBUG("%s()\n", __func__); return 0; } +__FOR_EXPORT void fal_plugin_setup_interfaces(void) { uint16_t portid; int ret; + uint16_t test_vlan = 16; struct bridge_vlan_set *set; DEBUG("%s()\n", __func__); @@ -69,6 +89,17 @@ void fal_plugin_setup_interfaces(void) set = bridge_vlan_set_create(); dp_test_fail_unless((set != NULL), "Expected bridge_vlan_set_create() to not return NULL"); + + bridge_vlan_set_add(set, test_vlan); + + dp_test_fail_unless((bridge_vlan_set_is_empty(set) == false), + "Expected non-EMPTY bridge vlan set"); + + bridge_vlan_set_clear(set); + + dp_test_fail_unless((bridge_vlan_set_is_empty(set) != false), + "Expected EMPTY bridge vlan set"); + bridge_vlan_set_free(set); fal_plugin_sw_ports_create(); @@ -88,14 +119,15 @@ struct fal_attribute_t *get_attribute(uint32_t id, return NULL; } -void fal_plugin_l2_new_port(unsigned int if_index, +__FOR_EXPORT +void fal_plugin_l2_new_port(unsigned int ifindex, uint32_t attr_count, const struct fal_attribute_t *attr_list) { const struct fal_attribute_t *attr; DEBUG("%s(if_index %d, attr_count %d, ...)\n", - __func__, if_index, attr_count); + __func__, ifindex, attr_count); dp_test_fail_unless((get_attribute(FAL_PORT_ATTR_KIND, attr_count, attr_list) != NULL), @@ -106,40 +138,60 @@ void fal_plugin_l2_new_port(unsigned int if_index, "Expected FAL_PORT_ATTR_NAME attribute"); if (attr != NULL) { DEBUG("%s(if_index %d, ...) name %s\n", - __func__, if_index, attr->value.if_name); + __func__, ifindex, attr->value.if_name); } attr = get_attribute(FAL_PORT_ATTR_DPDK_PORT, attr_count, attr_list); if (attr) { DEBUG("%s(if_index %d, ...) port %d\n", - __func__, if_index, attr->value.u8); + __func__, ifindex, attr->value.u8); } attr = get_attribute(FAL_PORT_ATTR_VLAN_ID, attr_count, attr_list); if (attr) { DEBUG("%s(if_index %d, ...) VLAN_ID %d\n", - __func__, if_index, attr->value.u16); + __func__, ifindex, attr->value.u16); } attr = get_attribute(FAL_PORT_ATTR_PARENT_IFINDEX, attr_count, attr_list); if (attr) { DEBUG("%s(if_index %d, ...) parent if_index %d\n", - __func__, if_index, attr->value.u32); + __func__, ifindex, attr->value.u32); } attr = get_attribute(FAL_PORT_ATTR_MTU, attr_count, attr_list); if (attr) { DEBUG("%s(if_index %d, ...) MTU %d\n", - __func__, if_index, attr->value.u16); + __func__, ifindex, attr->value.u16); } } -int fal_plugin_l2_get_attrs(unsigned int if_index, +__FOR_EXPORT +int fal_plugin_l2_get_attrs(unsigned int ifindex, uint32_t attr_count, struct fal_attribute_t *attr_list) { + uint32_t i; + char ifi_str[16]; + int rc = -EOPNOTSUPP; + DEBUG("%s(if_index %d, attr_count %d, ...)\n", - __func__, if_index, attr_count); + __func__, ifindex, attr_count); - return -1; + for (i = 0; i < attr_count; i++) { + switch (attr_list[i].id) { + case FAL_PORT_ATTR_QOS_INGRESS_MAP_ID: + snprintf(ifi_str, sizeof(ifi_str), + "%u", ifindex); + attr_list[i].value.objid = + (fal_object_t)zhash_lookup + (fal_test_ingressm_port, ifi_str); + rc = 0; + break; + default: + DEBUG("%s requested %u\n", __func__, attr_list[i].id); + break; + } + } + return rc; } static const char *fal_port_attr_t_to_str(enum fal_port_attr_t val) @@ -181,6 +233,10 @@ static const char *fal_port_attr_t_to_str(enum fal_port_attr_t val) return "ingress_mirror_session"; case FAL_PORT_ATTR_EGRESS_MIRROR_SESSION: return "egress_mirror_session"; + case FAL_PORT_ATTR_INGRESS_MIRROR_VLAN: + return "ingress_mirror_vlans"; + case FAL_PORT_ATTR_EGRESS_MIRROR_VLAN: + return "egress_mirror_vlans"; case FAL_PORT_ATTR_HW_MIRRORING: return "hw_mirroring"; case FAL_PORT_ATTR_UNICAST_STORM_CONTROL_POLICER_ID: @@ -191,88 +247,150 @@ static const char *fal_port_attr_t_to_str(enum fal_port_attr_t val) return "mcast-storm_ctl"; case FAL_PORT_ATTR_FDB_AGING_TIME: return "fdb-aging-time"; + case FAL_PORT_ATTR_QOS_INGRESS_MAP_ID: + return "ingress-map-id"; + case FAL_PORT_ATTR_CAPTURE_BIND: + return "capture-bind"; + case FAL_PORT_ATTR_HW_CAPTURE: + return "hw-capture"; + case FAL_PORT_ATTR_GLOBAL_FLOW_CONTROL_MODE: + return "pause"; + case FAL_PORT_ATTR_REMOTE_ADVERTISED_FLOW_CONTROL_MODE: + return "pause-advertised"; + case FAL_PORT_ATTR_QOS_EGRESS_MAP_ID: + return "egress-map-id"; + case FAL_PORT_ATTR_SYNCE_ADMIN_STATUS: + return "synce_admin"; } assert(0); return "ERROR"; } -void fal_plugin_l2_upd_port(unsigned int if_index, - struct fal_attribute_t *attr) +__FOR_EXPORT +int fal_plugin_l2_upd_port(unsigned int ifindex, + struct fal_attribute_t *attr) { + char ifi_str[16]; DEBUG("%s(if_index %d, { id %d %s %p })\n", - __func__, if_index, attr->id, - fal_port_attr_t_to_str(attr->id), - attr->value.ptr); + __func__, ifindex, attr->id, + fal_port_attr_t_to_str(attr->id), + attr->value.ptr); + if (attr->id == FAL_PORT_ATTR_QOS_INGRESS_MAP_ID) { + snprintf(ifi_str, sizeof(ifi_str), "%u", ifindex); + if (attr->value.objid != FAL_NULL_OBJECT_ID) + zhash_insert(fal_test_ingressm_port, ifi_str, + (void *)(fal_object_t) + attr->value.objid); + else + zhash_delete(fal_test_ingressm_port, ifi_str); + } + + return 0; } -void fal_plugin_l2_del_port(unsigned int if_index) +__FOR_EXPORT +void fal_plugin_l2_del_port(unsigned int ifindex) { - DEBUG("%s(if_index %d)\n", __func__, if_index); + DEBUG("%s(if_index %d)\n", __func__, ifindex); } -void fal_plugin_l2_new_addr(unsigned int if_index, - const struct ether_addr *addr, +__FOR_EXPORT +void fal_plugin_l2_new_addr(unsigned int ifindex, + const struct rte_ether_addr *addr, uint32_t attr_count, const struct fal_attribute_t *attr_list) { char __addr[19]; ether_ntoa_r(addr, __addr); - DEBUG("%s(if_index %d, addr %s, ...)\n", __func__, if_index, __addr); + DEBUG("%s(if_index %d, addr %s, ...)\n", __func__, ifindex, __addr); } -void fal_plugin_l2_upd_addr(unsigned int if_index, - const struct ether_addr *addr, +__FOR_EXPORT +void fal_plugin_l2_upd_addr(unsigned int ifindex, + const struct rte_ether_addr *addr, struct fal_attribute_t *attr) { char __addr[19]; ether_ntoa_r(addr, __addr); - DEBUG("%s(if_index %d, addr %s, ...)\n", __func__, if_index, __addr); + DEBUG("%s(if_index %d, addr %s, ...)\n", __func__, ifindex, __addr); } -void fal_plugin_l2_del_addr(unsigned int if_index, - const struct ether_addr *addr) +__FOR_EXPORT +void fal_plugin_l2_del_addr(unsigned int ifindex, + const struct rte_ether_addr *addr) { char __addr[19]; ether_ntoa_r(addr, __addr); - DEBUG("%s(if_index %d, addr %p)\n", __func__, if_index, __addr); + DEBUG("%s(if_index %d, addr %p)\n", __func__, ifindex, __addr); } +__FOR_EXPORT void fal_plugin_br_new_port(unsigned int bridge_ifindex, unsigned int child_ifindex, uint32_t attr_count, const struct fal_attribute_t *attr_list) { + char ifi_str[16]; + DEBUG("%s(bridge_ifindex %d, child_ifindex %d, attr_count %d...)\n", __func__, bridge_ifindex, child_ifindex, attr_count); + + snprintf(ifi_str, sizeof(ifi_str), "%u", child_ifindex); + + dp_test_fail_unless(!zhash_lookup(fal_test_brports, ifi_str), + "duplicate %s for %u\n", + __func__, child_ifindex); + + zhash_insert(fal_test_brports, ifi_str, + (void *)(uintptr_t)bridge_ifindex); } -void fal_plugin_br_upd_port(unsigned int if_index, +__FOR_EXPORT +void fal_plugin_br_upd_port(unsigned int child_ifindex, struct fal_attribute_t *attr) { + char ifi_str[16]; + DEBUG("%s(if_index %d, attr { id %d, ... })\n", - __func__, if_index, attr->id); + __func__, child_ifindex, attr->id); + + snprintf(ifi_str, sizeof(ifi_str), "%u", child_ifindex); + dp_test_fail_unless(zhash_lookup(fal_test_brports, ifi_str), + "missing fal_plugin_br_new_port for %u\n", + child_ifindex); } +__FOR_EXPORT void fal_plugin_br_del_port(unsigned int bridge_ifindex, unsigned int child_ifindex) { + char ifi_str[16]; + DEBUG("%s(bridge_ifindex %d, child_ifindex %d)\n", __func__, bridge_ifindex, child_ifindex); + + snprintf(ifi_str, sizeof(ifi_str), "%u", child_ifindex); + dp_test_fail_unless(zhash_lookup(fal_test_brports, ifi_str), + "missing fal_plugin_br_new_port for %u\n", + child_ifindex); + zhash_delete(fal_test_brports, ifi_str); } -void fal_plugin_br_new_neigh(unsigned int if_index, +__FOR_EXPORT +void fal_plugin_br_new_neigh(unsigned int child_ifindex, uint16_t vlanid, - const struct ether_addr *addr, + const struct rte_ether_addr *dst, uint32_t attr_count, const struct fal_attribute_t *attr_list) { char __addr[19]; - ether_ntoa_r(addr, __addr); - DEBUG("%s(if_index %u, vlanid %hu, addr %s, ...)\n", __func__, if_index, + ether_ntoa_r(dst, __addr); + DEBUG("%s(if_index %u, vlanid %hu, addr %s, ...)\n", __func__, + child_ifindex, vlanid, __addr); dp_test_fail_unless((get_attribute(FAL_BRIDGE_NEIGH_ATTR_STATE, @@ -281,29 +399,32 @@ void fal_plugin_br_new_neigh(unsigned int if_index, "Expected FAL_BRIDGE_NEIGH_ATTR_STATE"); } -void fal_plugin_br_upd_neigh(unsigned int if_index, +__FOR_EXPORT +void fal_plugin_br_upd_neigh(unsigned int child_ifindex, uint16_t vlanid, - const struct ether_addr *addr, + const struct rte_ether_addr *dst, struct fal_attribute_t *attr) { char __addr[19]; - ether_ntoa_r(addr, __addr); - DEBUG("%s(if_index %u, vlanid %hu, addr %s, ...)\n", __func__, if_index, - vlanid, __addr); + ether_ntoa_r(dst, __addr); + DEBUG("%s(if_index %u, vlanid %hu, addr %s, ...)\n", __func__, + child_ifindex, vlanid, __addr); } -void fal_plugin_br_del_neigh(unsigned int if_index, +__FOR_EXPORT +void fal_plugin_br_del_neigh(unsigned int child_ifindex, uint16_t vlanid, - const struct ether_addr *addr) + const struct rte_ether_addr *dst) { char __addr[19]; - ether_ntoa_r(addr, __addr); - DEBUG("%s(if_index %u, vlanid %hu, addr %p)\n", __func__, if_index, - vlanid, __addr); + ether_ntoa_r(dst, __addr); + DEBUG("%s(if_index %u, vlanid %hu, addr %p)\n", __func__, + child_ifindex, vlanid, __addr); } +__FOR_EXPORT void fal_plugin_br_flush_neigh(unsigned int bridge_ifindex, uint32_t attr_count, const struct fal_attribute_t *attr_list) @@ -363,7 +484,8 @@ static void fal_ntop(const struct fal_ip_address_t *ipaddr, inet_ntop(AF_INET6, &(ipaddr->addr.addr6), buf, size); } -int fal_plugin_ip_new_neigh(unsigned int if_index, +__FOR_EXPORT +int fal_plugin_ip_new_neigh(unsigned int ifindex, struct fal_ip_address_t *ipaddr, uint32_t attr_count, const struct fal_attribute_t *attr_list) @@ -372,12 +494,13 @@ int fal_plugin_ip_new_neigh(unsigned int if_index, fal_ntop(ipaddr, __ipaddr, sizeof(__ipaddr)); DEBUG("%s(if_index %d, ipaddr %s, ...)\n", - __func__, if_index, __ipaddr); + __func__, ifindex, __ipaddr); return 0; } -int fal_plugin_ip_upd_neigh(unsigned int if_index, +__FOR_EXPORT +int fal_plugin_ip_upd_neigh(unsigned int ifindex, struct fal_ip_address_t *ipaddr, struct fal_attribute_t *attr) { @@ -389,26 +512,28 @@ int fal_plugin_ip_upd_neigh(unsigned int if_index, ether_ntoa_r(&attr->value.mac, eaddr); DEBUG("%s(if_index %d, ipaddr %s, { id %d, mac %s })\n", - __func__, if_index, __ipaddr, attr->id, eaddr); + __func__, ifindex, __ipaddr, attr->id, eaddr); } else DEBUG("%s(if_index %d, ipaddr %s, { id %d, ... })\n", - __func__, if_index, __ipaddr, attr->id); + __func__, ifindex, __ipaddr, attr->id); return 0; } -int fal_plugin_ip_del_neigh(unsigned int if_index, +__FOR_EXPORT +int fal_plugin_ip_del_neigh(unsigned int ifindex, struct fal_ip_address_t *ipaddr) { char __ipaddr[64]; fal_ntop(ipaddr, __ipaddr, sizeof(__ipaddr)); - DEBUG("%s(if_index %d, ipaddr %s)\n", __func__, if_index, __ipaddr); + DEBUG("%s(if_index %d, ipaddr %s)\n", __func__, ifindex, __ipaddr); return 0; } -void fal_plugin_ip_new_addr(unsigned int if_index, +__FOR_EXPORT +void fal_plugin_ip_new_addr(unsigned int ifindex, struct fal_ip_address_t *ipaddr, uint8_t prefixlen, uint32_t attr_count, @@ -418,10 +543,11 @@ void fal_plugin_ip_new_addr(unsigned int if_index, fal_ntop(ipaddr, __ipaddr, sizeof(__ipaddr)); DEBUG("%s(if_index %d, ipaddr %s/%d, ...)\n", - __func__, if_index, __ipaddr, prefixlen); + __func__, ifindex, __ipaddr, prefixlen); } -void fal_plugin_ip_upd_addr(unsigned int if_index, +__FOR_EXPORT +void fal_plugin_ip_upd_addr(unsigned int ifindex, struct fal_ip_address_t *ipaddr, uint8_t prefixlen, struct fal_attribute_t *attr) @@ -431,10 +557,11 @@ void fal_plugin_ip_upd_addr(unsigned int if_index, fal_ntop(ipaddr, __ipaddr, sizeof(__ipaddr)); DEBUG("%s(if_index %d, ipaddr %s/%d, { id %d, ... })\n", - __func__, if_index, __ipaddr, prefixlen, attr->id); + __func__, ifindex, __ipaddr, prefixlen, attr->id); } -void fal_plugin_ip_del_addr(unsigned int if_index, +__FOR_EXPORT +void fal_plugin_ip_del_addr(unsigned int ifindex, struct fal_ip_address_t *ipaddr, uint8_t prefixlen) { @@ -442,9 +569,10 @@ void fal_plugin_ip_del_addr(unsigned int if_index, fal_ntop(ipaddr, __ipaddr, sizeof(__ipaddr)); DEBUG("%s(if_index %d, ipaddr %s/%d)\n", - __func__, if_index, __ipaddr, prefixlen); + __func__, ifindex, __ipaddr, prefixlen); } +__FOR_EXPORT int fal_plugin_ip_new_next_hop_group(uint32_t attr_count, const struct fal_attribute_t *attr_list, fal_object_t *obj) @@ -454,6 +582,7 @@ int fal_plugin_ip_new_next_hop_group(uint32_t attr_count, return 0; } +__FOR_EXPORT int fal_plugin_ip_upd_next_hop_group(fal_object_t obj, const struct fal_attribute_t *attr) { @@ -461,12 +590,14 @@ int fal_plugin_ip_upd_next_hop_group(fal_object_t obj, return 0; } +__FOR_EXPORT int fal_plugin_ip_del_next_hop_group(fal_object_t obj) { DEBUG("%s(0x%lx)\n", __func__, obj); return 0; } +__FOR_EXPORT int fal_plugin_ip_new_next_hops(uint32_t nh_count, const uint32_t *attr_count, const struct fal_attribute_t **attr_list, @@ -504,6 +635,7 @@ int fal_plugin_ip_new_next_hops(uint32_t nh_count, return 0; } +__FOR_EXPORT int fal_plugin_ip_upd_next_hop(fal_object_t obj, const struct fal_attribute_t *attr) { @@ -512,6 +644,7 @@ int fal_plugin_ip_upd_next_hop(fal_object_t obj, return 0; } +__FOR_EXPORT int fal_plugin_ip_del_next_hops(uint32_t nh_count, const fal_object_t *obj) { @@ -520,7 +653,8 @@ int fal_plugin_ip_del_next_hops(uint32_t nh_count, return 0; } -int fal_plugin_ip_new_route(unsigned int vrfid, +__FOR_EXPORT +int fal_plugin_ip_new_route(unsigned int vrf_id, struct fal_ip_address_t *ipaddr, uint8_t prefixlen, uint32_t tableid, @@ -532,7 +666,7 @@ int fal_plugin_ip_new_route(unsigned int vrfid, fal_ntop(ipaddr, __ipaddr, sizeof(__ipaddr)); DEBUG("%s(vrfid %d, ipaddr %s/%d, tableid %d, attr_count %d, ...)\n", - __func__, vrfid, __ipaddr, + __func__, vrf_id, __ipaddr, prefixlen, tableid, attr_count); attr = get_attribute(FAL_ROUTE_ENTRY_ATTR_NEXT_HOP_GROUP, @@ -552,7 +686,8 @@ int fal_plugin_ip_new_route(unsigned int vrfid, return 0; } -int fal_plugin_ip_upd_route(unsigned int vrfid, +__FOR_EXPORT +int fal_plugin_ip_upd_route(unsigned int vrf_id, struct fal_ip_address_t *ipaddr, uint8_t prefixlen, uint32_t tableid, @@ -563,7 +698,7 @@ int fal_plugin_ip_upd_route(unsigned int vrfid, fal_ntop(ipaddr, __ipaddr, sizeof(__ipaddr)); DEBUG("%s(vrfid %d, ipaddr %s/%d, " "tableid %d, { id %d, ... })\n", - __func__, vrfid, __ipaddr, prefixlen, tableid, attr->id); + __func__, vrf_id, __ipaddr, prefixlen, tableid, attr->id); switch (attr->id) { case FAL_ROUTE_ENTRY_ATTR_NEXT_HOP_GROUP: DEBUG("%s() next-hop-group 0x%lx\n", __func__, @@ -578,7 +713,8 @@ int fal_plugin_ip_upd_route(unsigned int vrfid, return 0; } -int fal_plugin_ip_del_route(unsigned int vrfid, +__FOR_EXPORT +int fal_plugin_ip_del_route(unsigned int vrf_id, struct fal_ip_address_t *ipaddr, uint8_t prefixlen, uint32_t tableid) @@ -587,15 +723,16 @@ int fal_plugin_ip_del_route(unsigned int vrfid, fal_ntop(ipaddr, __ipaddr, sizeof(__ipaddr)); DEBUG("%s(vrfid %d, ipaddr %s/%d, tableid %d, ...)\n", - __func__, vrfid, __ipaddr, prefixlen, tableid); + __func__, vrf_id, __ipaddr, prefixlen, tableid); return 0; } #define STP_INST_CHECK(_inst) \ - dp_test_fail_unless((_inst >= 0) && (_inst < STP_INST_COUNT), \ - "invalid STP instance value: %u", _inst) + dp_test_fail_unless(((_inst) >= 0) && ((_inst) < STP_INST_COUNT), \ + "invalid STP instance value: %u", (_inst)) +__FOR_EXPORT int fal_plugin_stp_create(unsigned int bridge_ifindex, uint32_t attr_count, const struct fal_attribute_t *attr_list, @@ -636,6 +773,7 @@ int fal_plugin_stp_create(unsigned int bridge_ifindex, return 0; } +__FOR_EXPORT int fal_plugin_stp_delete(fal_object_t obj) { dp_test_fail_unless(obj != 0, "missing STP object"); @@ -643,6 +781,7 @@ int fal_plugin_stp_delete(fal_object_t obj) return 0; } +__FOR_EXPORT int fal_plugin_stp_set_attribute(fal_object_t obj, const struct fal_attribute_t *attr_list) { @@ -657,6 +796,7 @@ int fal_plugin_stp_set_attribute(fal_object_t obj, return 0; } +__FOR_EXPORT int fal_plugin_stp_get_attribute(fal_object_t obj, uint32_t attr_count, struct fal_attribute_t *attr_list) { @@ -665,6 +805,7 @@ int fal_plugin_stp_get_attribute(fal_object_t obj, uint32_t attr_count, return 0; } +__FOR_EXPORT int fal_plugin_stp_set_port_attribute(unsigned int child_ifindex, uint32_t attr_count, const struct fal_attribute_t *attr_list) @@ -702,6 +843,7 @@ int fal_plugin_stp_set_port_attribute(unsigned int child_ifindex, return 0; } +__FOR_EXPORT int fal_plugin_stp_get_port_attribute(unsigned int child_ifindex, uint32_t attr_count, struct fal_attribute_t *attr_list) @@ -711,16 +853,21 @@ int fal_plugin_stp_get_port_attribute(unsigned int child_ifindex, return 0; } +__FOR_EXPORT void fal_plugin_cleanup(void) { DEBUG("%s\n", __func__); + + zhash_destroy(&fal_test_brports); } +__FOR_EXPORT void fal_plugin_command(FILE *f, int argc, char **argv) { DEBUG("%s\n", __func__); } +__FOR_EXPORT int fal_plugin_vlan_feature_create(uint32_t attr_count, const struct fal_attribute_t *attr_list, fal_object_t *obj) @@ -728,8 +875,10 @@ int fal_plugin_vlan_feature_create(uint32_t attr_count, struct vlan_feat *vf; uint i; - vf = calloc(1, sizeof(*vf)); + /* explicitly test fal_malloc function */ + vf = fal_malloc(sizeof(*vf)); assert(vf); + memset(vf, 0, sizeof(*vf)); DEBUG("%s start\n", __func__); for (i = 0; i < attr_count; i++) { @@ -760,6 +909,15 @@ int fal_plugin_vlan_feature_create(uint32_t attr_count, DEBUG("%s attr: MCAST: %p\n", __func__, vf->policer[FAL_TRAFFIC_MCAST]); break; + case FAL_VLAN_FEATURE_ATTR_MAC_LIMIT: + vf->mac_limit = attr_list[i].value.u32; + DEBUG("%s attr: MAC_LIMIT: %d\n", __func__, + vf->mac_limit); + break; + case FAL_VLAN_FEATURE_ATTR_QOS_INGRESS_MAP_ID: + vf->map_obj = attr_list[i].value.objid; + break; + } } @@ -768,15 +926,17 @@ int fal_plugin_vlan_feature_create(uint32_t attr_count, return 0; } +__FOR_EXPORT int fal_plugin_vlan_feature_delete(fal_object_t obj) { struct vlan_feat *vf = (struct vlan_feat *)obj; DEBUG("%s %p\n", __func__, (void *)obj); - free(vf); + fal_free_deferred(vf); return 0; } +__FOR_EXPORT int fal_plugin_vlan_feature_set_attr(fal_object_t obj, const struct fal_attribute_t *attr) { @@ -805,12 +965,46 @@ int fal_plugin_vlan_feature_set_attr(fal_object_t obj, DEBUG("%s attr: MCAST: %p\n", __func__, vf->policer[FAL_TRAFFIC_MCAST]); break; + case FAL_VLAN_FEATURE_ATTR_MAC_LIMIT: + vf->mac_limit = attr->value.u32; + DEBUG("%s attr: MAC_LIMIT: %d\n", __func__, vf->mac_limit); + break; + } + return 0; +} + +__FOR_EXPORT +int +fal_plugin_vlan_feature_get_attr(fal_object_t obj, uint32_t attr_count, + struct fal_attribute_t *attr_list) +{ + uint32_t i; + struct vlan_feat *vf = (struct vlan_feat *)obj; + + for (i = 0; i < attr_count; i++) { + switch (attr_list[i].id) { + case FAL_VLAN_FEATURE_ATTR_QOS_INGRESS_MAP_ID: + attr_list->value.objid = vf->map_obj; + break; + + case FAL_VLAN_FEATURE_ATTR_MAC_COUNT: + attr_list->value.u32 = 0; + break; + + default: + ERROR("%s: Unhandled attribute %d\n", + __func__, attr_list[i].id); + break; + } } + return 0; } + static bool vlan_stats_cleared; +__FOR_EXPORT int fal_plugin_vlan_get_stats(uint16_t vlan, uint32_t num_cntrs, const enum fal_vlan_stat_type *cntr_ids, uint64_t *cntrs) @@ -835,6 +1029,7 @@ int fal_plugin_vlan_get_stats(uint16_t vlan, uint32_t num_cntrs, return 0; } +__FOR_EXPORT int fal_plugin_vlan_clear_stats(uint16_t vlan, uint32_t num_cntrs, const enum fal_vlan_stat_type *cntr_ids) { @@ -845,3 +1040,34 @@ int fal_plugin_vlan_clear_stats(uint16_t vlan, uint32_t num_cntrs, vlan_stats_cleared = true; return 0; } + +__FOR_EXPORT +int fal_plugin_create_router_interface(uint32_t attr_count, + struct fal_attribute_t *attr_list, + fal_object_t *obj) +{ + DEBUG("%s(attr_count %d, ...)\n", + __func__, attr_count); + + *obj = fal_test_plugin_next_obj++; + + return 0; +} + +__FOR_EXPORT +int fal_plugin_delete_router_interface(fal_object_t obj) +{ + DEBUG("%s(0x%lx)\n", __func__, obj); + + return 0; +} + +__FOR_EXPORT +int +fal_plugin_set_router_interface_attr(fal_object_t obj, + const struct fal_attribute_t *attr_list) +{ + DEBUG("%s(0x%lx, ...)\n", __func__, obj); + + return 0; +} diff --git a/tests/whole_dp/src/fal_plugin_test.h b/tests/whole_dp/src/fal_plugin_test.h index 8420114b..47a620dd 100644 --- a/tests/whole_dp/src/fal_plugin_test.h +++ b/tests/whole_dp/src/fal_plugin_test.h @@ -1,8 +1,16 @@ +/* + * Copyright (c) 2017-2020, AT&T Intellectual Property. All rights reserved. + * + * SPDX-License-Identifier: LGPL-2.1-only + */ + #ifndef FAL_PLUGIN_TEST_H #define FAL_PLUGIN_TEST_H #include extern bool dp_test_fal_plugin_called; +extern uint32_t dp_test_fal_plugin_state; +extern void *dp_test_fal_plugin_ptr; struct fal_policer { uint32_t meter; /* always packets */ @@ -16,6 +24,8 @@ struct fal_policer { struct vlan_feat { int ifindex; uint16_t vlan; + uint32_t mac_limit; + fal_object_t map_obj; struct fal_policer *policer[FAL_TRAFFIC_MAX]; }; diff --git a/tests/whole_dp/src/fal_plugin_test.sym b/tests/whole_dp/src/fal_plugin_test.sym deleted file mode 100644 index c726ec9d..00000000 --- a/tests/whole_dp/src/fal_plugin_test.sym +++ /dev/null @@ -1,92 +0,0 @@ -fal_plugin_init -fal_plugin_init_log -fal_plugin_setup_interfaces -fal_plugin_l2_new_port -fal_plugin_l2_get_attrs -fal_plugin_l2_upd_port -fal_plugin_l2_del_port -fal_plugin_l2_new_addr -fal_plugin_l2_upd_addr -fal_plugin_l2_del_addr -fal_plugin_br_new_port -fal_plugin_br_upd_port -fal_plugin_br_del_port -fal_plugin_br_new_neigh -fal_plugin_br_upd_neigh -fal_plugin_br_del_neigh -fal_plugin_br_flush_neigh -fal_plugin_stp_create -fal_plugin_stp_delete -fal_plugin_stp_set_attribute -fal_plugin_stp_get_attribute -fal_plugin_stp_set_port_attribute -fal_plugin_stp_get_port_attribute -fal_plugin_ip_new_neigh -fal_plugin_ip_upd_neigh -fal_plugin_ip_del_neigh -fal_plugin_ip_new_addr -fal_plugin_ip_upd_addr -fal_plugin_ip_del_addr -fal_plugin_ip_new_route -fal_plugin_ip_upd_route -fal_plugin_ip_del_route -fal_plugin_ip_new_next_hop_group -fal_plugin_ip_upd_next_hop_group -fal_plugin_ip_del_next_hop_group -fal_plugin_ip_new_next_hops -fal_plugin_ip_upd_next_hop -fal_plugin_ip_del_next_hops -fal_plugin_cleanup -fal_plugin_command -fal_plugin_add_ut_framer_hdr -fal_plugin_queue_rx_direct -fal_plugin_ut_enable_rx_framer -fal_plugin_backplane_from_sw_port -fal_plugin_qos_new_map -fal_plugin_qos_del_map -fal_plugin_qos_upd_map -fal_plugin_qos_get_map_attrs -fal_plugin_qos_new_queue -fal_plugin_qos_del_queue -fal_plugin_qos_upd_queue -fal_plugin_qos_get_queue_attrs -fal_plugin_qos_get_queue_stats -fal_plugin_qos_clear_queue_stats -fal_plugin_qos_new_scheduler -fal_plugin_qos_del_scheduler -fal_plugin_qos_upd_scheduler -fal_plugin_qos_get_scheduler_attrs -fal_plugin_qos_new_sched_group -fal_plugin_qos_del_sched_group -fal_plugin_qos_upd_sched_group -fal_plugin_qos_get_sched_group_attrs -fal_plugin_qos_new_wred -fal_plugin_qos_del_wred -fal_plugin_qos_upd_wred -fal_plugin_qos_get_wred_attrs -fal_plugin_mirror_session_create -fal_plugin_mirror_session_deletete -fal_plugin_mirror_session_set_attr -fal_plugin_mirror_session_get_attr -fal_plugin_policer_create -fal_plugin_policer_delete -fal_plugin_policer_set_attr -fal_plugin_policer_get_attr -fal_plugin_policer_get_stats_ext -fal_plugin_vlan_feature_create -fal_plugin_vlan_feature_delete -fal_plugin_vlan_feature_update -fal_plugin_vlan_feature_set_attr -fal_plugin_vlan_clear_stats -fal_plugin_vlan_get_stats -fal_plugin_create_cpp_limiter -fal_plugin_remove_cpp_limiter -fal_plugin_get_cpp_limiter_attribute -fal_plugin_set_switch_attribute -fal_plugin_create_ptp_clock -fal_plugin_delete_ptp_clock -fal_plugin_dump_ptp_clock -fal_plugin_create_ptp_port -fal_plugin_delete_ptp_port -fal_plugin_create_ptp_peer -fal_plugin_delete_ptp_peer diff --git a/tests/whole_dp/src/platform.conf b/tests/whole_dp/src/platform.conf new file mode 100644 index 00000000..34482dea --- /dev/null +++ b/tests/whole_dp/src/platform.conf @@ -0,0 +1,2 @@ +[dataplane] +fal_plugin = /usr/lib/x86_64-linux-gnu/vyatta-dataplane/fal_plugin_test.so diff --git a/tests/whole_dp/src/qos-cli-to-dataplane-commands.py b/tests/whole_dp/src/qos-cli-to-dataplane-commands.py index 8d17870d..3b5b10c1 100644 --- a/tests/whole_dp/src/qos-cli-to-dataplane-commands.py +++ b/tests/whole_dp/src/qos-cli-to-dataplane-commands.py @@ -1,5 +1,9 @@ #!/usr/bin/env python # +# Copyright (c) 2018,2020, AT&T Intellectual Property. All rights reserved. +# +# SPDX-License-Identifier: LGPL-2.1-only +# # This python script is run on a Vyatta Router with QoS configured on it. # The output of the script contains the current QoS CLI configuration # commands, and the QoS configuration commands that are sent down to the diff --git a/tests/whole_dp/src/qos-cli-to-dataplane-commands.sh b/tests/whole_dp/src/qos-cli-to-dataplane-commands.sh index 666d4586..f62df065 100644 --- a/tests/whole_dp/src/qos-cli-to-dataplane-commands.sh +++ b/tests/whole_dp/src/qos-cli-to-dataplane-commands.sh @@ -1,5 +1,9 @@ #!/bin/vcli -f # +# Copyright (c) 2018,2020, AT&T Intellectual Property. All rights reserved. +# +# SPDX-License-Identifier: LGPL-2.1-only +# # This bash/vcli script is run on a Vyatta Router with QoS configured on it. # The output of the script contains the current QoS CLI configuration # commands, and the QoS configuration commands that are sent down to the diff --git a/tests/whole_dp/valgrind_suppressions b/tests/whole_dp/valgrind_suppressions index d59ec746..70db9fad 100644 --- a/tests/whole_dp/valgrind_suppressions +++ b/tests/whole_dp/valgrind_suppressions @@ -48,27 +48,16 @@ Memcheck:Value8 fun:g2_eval4 - fun:npf_ruleset_inspect - fun:npf_hook_notrack - ... - fun:pipeline_fused_no_dyn_feats_ether_in ... - fun:ether_input* - fun:process_burst - fun:poll_receive_queues + fun:npf_ruleset_inspect ... } { Memcheck:Value8 fun:g2_eval6 - fun:npf_ruleset_inspect - ... - fun:pipeline_fused_no_dyn_feats_ether_in ... - fun:ether_input* - fun:process_burst - fun:poll_receive_queues + fun:npf_ruleset_inspect ... } { @@ -137,3 +126,24 @@ fun:mpls_oam_pool_init ... } +{ + + Memcheck:Leak + match-leak-kinds: possible + fun:calloc + fun:allocate_dtv + fun:_dl_allocate_tls + fun:allocate_stack + fun:pthread_create@@GLIBC_2.2.5 + fun:urcu_workqueue_create + ... +} +{ + + Memcheck:Cond + fun:rte_cpu_get_flag_enabled + ... + fun:rte_vdev_init + fun:crypto_rte_create_pmd + ... +} \ No newline at end of file diff --git a/tools/irq-remap b/tools/irq-remap index 2ea5e766..1c7a32a3 100755 --- a/tools/irq-remap +++ b/tools/irq-remap @@ -1,5 +1,6 @@ #! /usr/bin/perl +# Copyright (c) 2019, AT&T Intellectual Property. All rights reserved. # Copyright (c) 2015-2016, Brocade Communications Systems, Inc. # All rights reserved. # @@ -90,9 +91,9 @@ sub dataplane_cpus { $isolated = $online; } - my $master = first_set_bit($isolated); - vec( $isolated, $master, 1 ) = 0 - if ( $master >= 0 ); + my $main = first_set_bit($isolated); + vec( $isolated, $main, 1 ) = 0 + if ( $main >= 0 ); # never ban CPU 0 vec( $isolated, 0, 1 ) = 0; diff --git a/tools/prestart.d/vplane-mlx-setup b/tools/prestart.d/vplane-mlx-setup new file mode 100755 index 00000000..4cbcb0b4 --- /dev/null +++ b/tools/prestart.d/vplane-mlx-setup @@ -0,0 +1,63 @@ +#! /usr/bin/perl + +# Copyright (c) 2021 AT&T Intellectual Property. All rights reserved. +# All rights reserved. +# +# SPDX-License-Identifier: LGPL-2.1-only + +use strict; +use warnings; + +use lib '/opt/vyatta/share/perl5'; +use Vyatta::Vplane; + +# +# As per the performance tuning guide from Mellanox, the following parameters +# need to be set to achieve the best performance from the NIC: +# 1. PCI maximum read request size needs to be set to 1024 bytes. The value is +# set using a PCI write to the upper nibble of word 68. The mapping from the +# the upper nibble to the request size is as follows: +# 0 - 128B, 1 - 256B, 2 - 512B, 3 - 1024B, 4 - 2048B and 5 - 4096B +# +# 2. Compression of Completion Queue Events (CQE) - This is used to reduce PCIe +# bandwidth usage resulting in better performance +# +# References: +# 1. https://community.mellanox.com/s/article/understanding-pcie-configuration-for-maximum-performance +# 2. https://fast.dpdk.org/doc/perf/DPDK_18_11_Mellanox_NIC_performance_report.pdf +# 3. https://community.mellanox.com/s/article/understanding-mlx5-ethtool-counters +# +sub mlx_setup { + my @mlx_pcis = `lspci -n -d 0x15b3:`; + + foreach my $line (@mlx_pcis) { + chop($line); + (my $pci, undef, my $vendor_device) = split(' ', $line); + (my $vendor, my $device) = split(':', $vendor_device); + $vendor = hex($vendor); + $device = hex($device); + + next unless is_supported_ib_device($vendor, $device); + + my $cur_val = `setpci -s $pci 68.w`; + $cur_val =~ s/\n//; + + my $new_val = sprintf( "0x%04x", 3 << 12 | hex($cur_val) & 0xfff ); + + system("setpci -s $pci 68.w=$new_val"); + my $val = + `lspci -vvv -s $pci | grep -o "MaxReadReq.*" | cut -d ' ' -f 2`; + $val =~ s/\n//; + system( + "logger PCI read request size for device $pci set to $val bytes"); + + # + # Enable compression of completion queue events + # + system("mstconfig -d $pci set CQE_COMPRESSION=1") + } +} + +mlx_setup(); + +1; diff --git a/tools/vplane-hugepages b/tools/vplane-hugepages index 65cf56bb..ca16721f 100755 --- a/tools/vplane-hugepages +++ b/tools/vplane-hugepages @@ -54,6 +54,10 @@ set_defaults() { HUGEPAGES=$(( ( M / 4 ) / HUGE_SZ )) elif [[ $MB -le 8192 ]]; then HUGEPAGES=$(( ( M / 2 ) / HUGE_SZ )) + # MemTotal does not account memory allocated for kernel binary code and + # bits. For systems >=64Gb, check conservatively to account for that. + elif [[ $MB -ge 61440 ]]; then + HUGEPAGES=$(( 16777216 / HUGE_SZ )) fi DPDK_ARG=$(( ( HUGEPAGES * HUGE_SZ ) / 1024 )) # MB OVERCOMMIT_HUGEPAGES=0 diff --git a/tools/vplane-uio b/tools/vplane-uio index caef9368..287c46a9 100755 --- a/tools/vplane-uio +++ b/tools/vplane-uio @@ -1,6 +1,6 @@ #! /usr/bin/perl -# Copyright (c) 2017-2019, AT&T Intellectual Property. All rights reserved. +# Copyright (c) 2017-2021, AT&T Intellectual Property. All rights reserved. # Copyright (c) 2015-2016, Brocade Communications Systems, Inc. # All rights reserved. # @@ -20,277 +20,15 @@ use File::Basename; use File::Slurp; use File::Spec; +use lib '/opt/vyatta/share/perl5'; +use Vyatta::Vplane; + my $DEBUG = 0; my $DATAPLANE_CFG = "/etc/vyatta/dataplane.conf"; my $VMBUS_NETWORK_DEVICE = 'f8615163-df3e-46c5-913f-f2d2f965ed0e'; my $PCI_BASE_CLASS_BRIDGE = 0x06; -# -# List of supported PCI device Id's generated from rte_pci_dev_ids.h in DPDK -my @pci_devices = ( - - # Intel E1000 - { vendor => 0x8086, device => 0x100e }, - { vendor => 0x8086, device => 0x100f }, - { vendor => 0x8086, device => 0x1011 }, - { vendor => 0x8086, device => 0x1010 }, - { vendor => 0x8086, device => 0x1012 }, - { vendor => 0x8086, device => 0x101d }, - { vendor => 0x8086, device => 0x105e }, - { vendor => 0x8086, device => 0x105f }, - { vendor => 0x8086, device => 0x1060 }, - { vendor => 0x8086, device => 0x10d9 }, - { vendor => 0x8086, device => 0x10da }, - { vendor => 0x8086, device => 0x10a4 }, - { vendor => 0x8086, device => 0x10d5 }, - { vendor => 0x8086, device => 0x10a5 }, - { vendor => 0x8086, device => 0x10bc }, - { vendor => 0x8086, device => 0x107d }, - { vendor => 0x8086, device => 0x107e }, - { vendor => 0x8086, device => 0x107f }, - { vendor => 0x8086, device => 0x10b9 }, - { vendor => 0x8086, device => 0x109a }, - { vendor => 0x8086, device => 0x10d3 }, - { vendor => 0x8086, device => 0x10f6 }, - { vendor => 0x8086, device => 0x150c }, - { vendor => 0x8086, device => 0x153a }, - { vendor => 0x8086, device => 0x153b }, - { vendor => 0x8086, device => 0x155a }, - { vendor => 0x8086, device => 0x1559 }, - { vendor => 0x8086, device => 0x15a0 }, - { vendor => 0x8086, device => 0x15a1 }, - { vendor => 0x8086, device => 0x15a2 }, - { vendor => 0x8086, device => 0x15a3 }, - - # Intel IGB - { vendor => 0x8086, device => 0x10c9 }, - { vendor => 0x8086, device => 0x10e6 }, - { vendor => 0x8086, device => 0x10e7 }, - { vendor => 0x8086, device => 0x10e8 }, - { vendor => 0x8086, device => 0x1526 }, - { vendor => 0x8086, device => 0x150a }, - { vendor => 0x8086, device => 0x1518 }, - { vendor => 0x8086, device => 0x150d }, - { vendor => 0x8086, device => 0x10a7 }, - { vendor => 0x8086, device => 0x10a9 }, - { vendor => 0x8086, device => 0x10d6 }, - { vendor => 0x8086, device => 0x150e }, - { vendor => 0x8086, device => 0x150f }, - { vendor => 0x8086, device => 0x1510 }, - { vendor => 0x8086, device => 0x1511 }, - { vendor => 0x8086, device => 0x1516 }, - { vendor => 0x8086, device => 0x1527 }, - { vendor => 0x8086, device => 0x1521 }, - { vendor => 0x8086, device => 0x1522 }, - { vendor => 0x8086, device => 0x1523 }, - { vendor => 0x8086, device => 0x1524 }, - { vendor => 0x8086, device => 0x1546 }, - { vendor => 0x8086, device => 0x1533 }, - { vendor => 0x8086, device => 0x1534 }, - { vendor => 0x8086, device => 0x1535 }, - { vendor => 0x8086, device => 0x1536 }, - { vendor => 0x8086, device => 0x1537 }, - { vendor => 0x8086, device => 0x1538 }, - { vendor => 0x8086, device => 0x1539 }, - { vendor => 0x8086, device => 0x1f40 }, - { vendor => 0x8086, device => 0x1f41 }, - { vendor => 0x8086, device => 0x1f45 }, - { vendor => 0x8086, device => 0x0438 }, - { vendor => 0x8086, device => 0x043a }, - { vendor => 0x8086, device => 0x043c }, - { vendor => 0x8086, device => 0x0440 }, - - # Intel IXGBE - { vendor => 0x8086, device => 0x10b6 }, - { vendor => 0x8086, device => 0x1508 }, - { vendor => 0x8086, device => 0x10c6 }, - { vendor => 0x8086, device => 0x10c7 }, - { vendor => 0x8086, device => 0x10c8 }, - { vendor => 0x8086, device => 0x150b }, - { vendor => 0x8086, device => 0x10db }, - { vendor => 0x8086, device => 0x10dd }, - { vendor => 0x8086, device => 0x10ec }, - { vendor => 0x8086, device => 0x10f1 }, - { vendor => 0x8086, device => 0x10e1 }, - { vendor => 0x8086, device => 0x10f4 }, - { vendor => 0x8086, device => 0x10f7 }, - { vendor => 0x8086, device => 0x1514 }, - { vendor => 0x8086, device => 0x1517 }, - { vendor => 0x8086, device => 0x10f8 }, - { vendor => 0x8086, device => 0x000c }, - { vendor => 0x8086, device => 0x10f9 }, - { vendor => 0x8086, device => 0x10fb }, - { vendor => 0x8086, device => 0x11a9 }, - { vendor => 0x8086, device => 0x1f72 }, - { vendor => 0x8086, device => 0x17d0 }, - { vendor => 0x8086, device => 0x0470 }, - { vendor => 0x8086, device => 0x152a }, - { vendor => 0x8086, device => 0x1529 }, - { vendor => 0x8086, device => 0x1507 }, - { vendor => 0x8086, device => 0x154d }, - { vendor => 0x8086, device => 0x154a }, - { vendor => 0x8086, device => 0x1558 }, - { vendor => 0x8086, device => 0x1557 }, - { vendor => 0x8086, device => 0x10fc }, - { vendor => 0x8086, device => 0x151c }, - { vendor => 0x8086, device => 0x154f }, - { vendor => 0x8086, device => 0x1528 }, - { vendor => 0x8086, device => 0x1560 }, - { vendor => 0x8086, device => 0x15ac }, - { vendor => 0x8086, device => 0x15ad }, - { vendor => 0x8086, device => 0x15ae }, - { vendor => 0x8086, device => 0x1563 }, - { vendor => 0x8086, device => 0x15aa }, - { vendor => 0x8086, device => 0x15ab }, - { vendor => 0x8086, device => 0x15b4 }, - { vendor => 0x8086, device => 0x15c2 }, - { vendor => 0x8086, device => 0x15c3 }, - { vendor => 0x8086, device => 0x15c4 }, - { vendor => 0x8086, device => 0x15c5 }, - { vendor => 0x8086, device => 0x15c6 }, - { vendor => 0x8086, device => 0x15c7 }, - { vendor => 0x8086, device => 0x15c8 }, - { vendor => 0x8086, device => 0x15ca }, - { vendor => 0x8086, device => 0x15cc }, - { vendor => 0x8086, device => 0x15ce }, - { vendor => 0x8086, device => 0x15e4 }, - { vendor => 0x8086, device => 0x15e5 }, - - # Intel I40E (Fortville) - { vendor => 0x8086, device => 0x1572 }, - { vendor => 0x8086, device => 0x1574 }, - { vendor => 0x8086, device => 0x157f }, - { vendor => 0x8086, device => 0x1580 }, - { vendor => 0x8086, device => 0x1581 }, - { vendor => 0x8086, device => 0x1583 }, - { vendor => 0x8086, device => 0x1584 }, - { vendor => 0x8086, device => 0x1585 }, - { vendor => 0x8086, device => 0x1586 }, - { vendor => 0x8086, device => 0x1587 }, - { vendor => 0x8086, device => 0x1588 }, - { vendor => 0x8086, device => 0x1589 }, - { vendor => 0x8086, device => 0x158a }, - { vendor => 0x8086, device => 0x158b }, - { vendor => 0x8086, device => 0x374c }, - { vendor => 0x8086, device => 0x37d0 }, - { vendor => 0x8086, device => 0x37d1 }, - { vendor => 0x8086, device => 0x37d2 }, - { vendor => 0x8086, device => 0x37d3 }, - - # Intel FM10K (Red Rock Canyon) - { vendor => 0x8086, device => 0x15a4 }, - { vendor => 0x8086, device => 0x15d0 }, - - # Intel IGB VF - { vendor => 0x8086, device => 0x10ca }, - { vendor => 0x8086, device => 0x152d }, - { vendor => 0x8086, device => 0x1520 }, - { vendor => 0x8086, device => 0x152f }, - - # Intel IXGBE VF - { vendor => 0x8086, device => 0x10ed }, - { vendor => 0x8086, device => 0x152e }, - { vendor => 0x8086, device => 0x1515 }, - { vendor => 0x8086, device => 0x1530 }, - { vendor => 0x8086, device => 0x1564 }, - { vendor => 0x8086, device => 0x1565 }, - { vendor => 0x8086, device => 0x15a8 }, - { vendor => 0x8086, device => 0x15a9 }, - - # Intel I40E VF - { vendor => 0x8086, device => 0x154c }, - { vendor => 0x8086, device => 0x1571 }, - { vendor => 0x8086, device => 0x37cd }, - { vendor => 0x8086, device => 0x37d9 }, - - # Intel FM10K VF - { vendor => 0x8086, device => 0x15a5 }, - - # Broadcom/Qlogic BNX2X - { vendor => 0x14e4, device => 0x168a }, - { vendor => 0x14e4, device => 0x16a9 }, - { vendor => 0x14e4, device => 0x164f }, - { vendor => 0x14e4, device => 0x168e }, - { vendor => 0x14e4, device => 0x16af }, - { vendor => 0x14e4, device => 0x163d }, - { vendor => 0x14e4, device => 0x163f }, - { vendor => 0x14e4, device => 0x168d }, - { vendor => 0x14e4, device => 0x16a1 }, - { vendor => 0x14e4, device => 0x16a2 }, - { vendor => 0x14e4, device => 0x16ad }, - - # Broadcom BNXT - { vendor => 0x14e4, device => 0x1614 }, - { vendor => 0x14e4, device => 0x16c1 }, - { vendor => 0x14e4, device => 0x16c8 }, - { vendor => 0x14e4, device => 0x16c9 }, - { vendor => 0x14e4, device => 0x16ca }, - { vendor => 0x14e4, device => 0x16cb }, - { vendor => 0x14e4, device => 0x16cc }, - { vendor => 0x14e4, device => 0x16cd }, - { vendor => 0x14e4, device => 0x16ce }, - { vendor => 0x14e4, device => 0x16cf }, - { vendor => 0x14e4, device => 0x16d0 }, - { vendor => 0x14e4, device => 0x16d1 }, - { vendor => 0x14e4, device => 0x16d2 }, - { vendor => 0x14e4, device => 0x16d3 }, - { vendor => 0x14e4, device => 0x16d4 }, - { vendor => 0x14e4, device => 0x16d5 }, - { vendor => 0x14e4, device => 0x16d6 }, - { vendor => 0x14e4, device => 0x16d7 }, - { vendor => 0x14e4, device => 0x16d8 }, - { vendor => 0x14e4, device => 0x16d9 }, - { vendor => 0x14e4, device => 0x16dc }, - { vendor => 0x14e4, device => 0x16de }, - { vendor => 0x14e4, device => 0x16df }, - { vendor => 0x14e4, device => 0x16e0 }, - { vendor => 0x14e4, device => 0x16e1 }, - { vendor => 0x14e4, device => 0x16e2 }, - { vendor => 0x14e4, device => 0x16e3 }, - { vendor => 0x14e4, device => 0x16e4 }, - { vendor => 0x14e4, device => 0x16e7 }, - { vendor => 0x14e4, device => 0x16e8 }, - { vendor => 0x14e4, device => 0x16e9 }, - { vendor => 0x14e4, device => 0x16ea }, - { vendor => 0x14e4, device => 0x16ec }, - { vendor => 0x14e4, device => 0x16ee }, - - # Virtio - { vendor => 0x1af4, device => 0x1000 }, - - # Windriver Accelerated Virtual Port - { vendor => 0x1af4, device => 0x1110 }, - - # VMXNET3 - { vendor => 0x15ad, device => 0x07b0 }, - - # Cavium ThunderNic - { vendor => 0x177d, device => 0xa034 }, - { vendor => 0x177d, device => 0x0011 }, -); - -# -## List of Mellanox IB device. -my @ib_devices = ( - # Mellanox mlx4 Nic - { vendor => 0x15b3, device => 0x1003 }, - { vendor => 0x15b3, device => 0x1004 }, - { vendor => 0x15b3, device => 0x1007 }, - - # Mellanox mlx5 Nic - { vendor => 0x15b3, device => 0x1013 }, - { vendor => 0x15b3, device => 0x1014 }, - { vendor => 0x15b3, device => 0x1015 }, - { vendor => 0x15b3, device => 0x1016 }, - { vendor => 0x15b3, device => 0x1017 }, - { vendor => 0x15b3, device => 0x1018 }, - { vendor => 0x15b3, device => 0x1019 }, - { vendor => 0x15b3, device => 0x101a }, - { vendor => 0x15b3, device => 0xa2d2 }, - { vendor => 0x15b3, device => 0xa2d3 }, -); - sub debug { print @_ if $DEBUG; } @@ -300,9 +38,7 @@ sub pci_match { my $vendor = hex( read_value("$path/vendor") ); my $device = hex( read_value("$path/device") ); - return - unless grep { $_->{vendor} == $vendor && $_->{device} == $device } - @pci_devices; + return is_supported_pci_device($vendor, $device); } # how to identify network device in Hyper-v @@ -313,29 +49,20 @@ sub vmbus_match { return $class eq "{$VMBUS_NETWORK_DEVICE}"; } -# how to identify network device in xen -sub xen_match { - my $id = shift; - - return $id =~ /^vif-\d+$/; -} - sub ib_match { my ( $id, $path ) = @_; my $vendor = hex( read_value("$path/vendor") ); my $device = hex( read_value("$path/device") ); - return - unless grep { $_->{vendor} == $vendor && $_->{device} == $device } - @ib_devices; + return is_supported_ib_device($vendor, $device); } -my @blacklisted_devices; +my @excluded_devices; -# extract list of blacklisted network devices +# extract list of excluded network devices # then convert them into absolute device paths # and return a hash -sub get_blacklist { +sub get_excluded_devices { my $ini = Config::Tiny->read($DATAPLANE_CFG); die "Can't read $DATAPLANE_CFG: $!\n" @@ -344,17 +71,18 @@ sub get_blacklist { my $cfg = $ini->{'Dataplane'}; die "Can't find Dataplane section in $DATAPLANE_CFG\n" unless defined($cfg); - my $blacklist = $cfg->{'blacklist'}; - return unless defined($blacklist); + my $excluded = $cfg->{'exclude-interfaces'}; + $excluded = $cfg->{'blacklist'} unless defined($excluded); + return unless defined($excluded); - foreach my $ifname ( split /,/, $blacklist ) { + foreach my $ifname ( split /,/, $excluded ) { my $ifdev = "/sys/class/net/$ifname/device"; # skip non-existent devices (or pseudo) next unless -l $ifdev; # "/sys/devices/pci0000:00/0000:00:03.0/" - push @blacklisted_devices, abs_path($ifdev) . '/'; + push @excluded_devices, abs_path($ifdev) . '/'; } } @@ -402,7 +130,7 @@ sub scan_bus { # check if network device is a leaf of the bus device my $devpath = abs_path($path) . '/'; - next if grep { $_ =~ /^$devpath/ } @blacklisted_devices; + next if grep { $_ =~ /^$devpath/ } @excluded_devices; push @devices, $dev_id; my $group = get_iommu_group($devpath); if ($group) { @@ -555,11 +283,6 @@ my @buses = ( type => 'vmbus', match => \&vmbus_match, }, - { - driver => 'xen_uio', - type => 'xen', - match => \&xen_match, - }, { driver => 'net_mlx', type => 'ib', @@ -568,7 +291,7 @@ my @buses = ( ); # main -get_blacklist(); +get_excluded_devices(); foreach my $bus (@buses) { my $type = $bus->{type};